TitleCrawler : Use scala-scraper.
This commit is contained in:
parent
add30216b5
commit
57c4031377
|
@ -1,11 +1,24 @@
|
|||
package com.voronind.doublegis.test
|
||||
package model.crawler
|
||||
|
||||
import model.crawler.TitleCrawler.TITLE_UNKNOWN
|
||||
|
||||
import net.ruippeixotog.scalascraper.browser.JsoupBrowser
|
||||
|
||||
/**
|
||||
* A crawler that extracts Http Head Title.
|
||||
*/
|
||||
class TitleCrawler extends Crawler {
|
||||
override def crawl(url: String): String = {
|
||||
"Mock title."
|
||||
try {
|
||||
val browser = JsoupBrowser()
|
||||
val html = browser.get(url)
|
||||
html.title
|
||||
} catch
|
||||
case e: Exception => TITLE_UNKNOWN
|
||||
}
|
||||
}
|
||||
|
||||
object TitleCrawler {
|
||||
private val TITLE_UNKNOWN = "Unable to extract the title."
|
||||
}
|
||||
|
|
|
@ -1,12 +1,12 @@
|
|||
package com.voronind.doublegis.test
|
||||
package model.handler
|
||||
|
||||
import model.crawler.TitleCrawler
|
||||
import model.lib.{HttpUtil, StreamUtil}
|
||||
|
||||
import com.sun.net.httpserver.{HttpExchange, HttpHandler}
|
||||
import com.voronind.doublegis.test.model.crawler.TitleCrawler
|
||||
|
||||
import java.io.{BufferedReader, ByteArrayInputStream, InputStream, InputStreamReader}
|
||||
import java.io.{BufferedReader, ByteArrayInputStream, InputStreamReader}
|
||||
import scala.language.postfixOps
|
||||
|
||||
/**
|
||||
|
@ -28,20 +28,20 @@ class TitleCrawlerHandler extends HttpHandler, Handler {
|
|||
// I don't know if this one is ugly, but I wanted to show off a bit.
|
||||
extension (exchange: HttpExchange) private def sendResponse(request: Array[Byte]): Unit = {
|
||||
val reader = new BufferedReader(new InputStreamReader(ByteArrayInputStream(request)))
|
||||
|
||||
val result = Iterator
|
||||
val response = Iterator
|
||||
.continually(reader.readLine)
|
||||
.takeWhile(null !=)
|
||||
.filter(HttpUtil.isUrl)
|
||||
.map({ url => f"$url => ${runCrawler(url)}" })
|
||||
.toList
|
||||
.map({ url => s"$url => ${runCrawler(url)}" })
|
||||
.mkString("\n")
|
||||
.getBytes()
|
||||
|
||||
val response = result.mkString("\n")
|
||||
exchange.sendResponseHeaders(200, response.length())
|
||||
exchange.sendResponseHeaders(200, response.length)
|
||||
|
||||
val out = exchange.getResponseBody
|
||||
out.write(response.getBytes)
|
||||
out.close()
|
||||
val output = exchange.getResponseBody
|
||||
|
||||
StreamUtil.copyStream(new ByteArrayInputStream(response), output)
|
||||
output.close()
|
||||
}
|
||||
|
||||
private def runCrawler(url: String): String = {
|
||||
|
|
Reference in a new issue