diff --git a/src/main/scala/model/crawler/TitleCrawler.scala b/src/main/scala/model/crawler/TitleCrawler.scala index efcdd56..d6fce1a 100644 --- a/src/main/scala/model/crawler/TitleCrawler.scala +++ b/src/main/scala/model/crawler/TitleCrawler.scala @@ -1,11 +1,24 @@ package com.voronind.doublegis.test package model.crawler +import model.crawler.TitleCrawler.TITLE_UNKNOWN + +import net.ruippeixotog.scalascraper.browser.JsoupBrowser + /** * A crawler that extracts Http Head Title. */ class TitleCrawler extends Crawler { override def crawl(url: String): String = { - "Mock title." + try { + val browser = JsoupBrowser() + val html = browser.get(url) + html.title + } catch + case e: Exception => TITLE_UNKNOWN } } + +object TitleCrawler { + private val TITLE_UNKNOWN = "Unable to extract the title." +} diff --git a/src/main/scala/model/handler/TitleCrawlerHandler.scala b/src/main/scala/model/handler/TitleCrawlerHandler.scala index 4bf0258..7610a61 100644 --- a/src/main/scala/model/handler/TitleCrawlerHandler.scala +++ b/src/main/scala/model/handler/TitleCrawlerHandler.scala @@ -1,12 +1,12 @@ package com.voronind.doublegis.test package model.handler +import model.crawler.TitleCrawler import model.lib.{HttpUtil, StreamUtil} import com.sun.net.httpserver.{HttpExchange, HttpHandler} -import com.voronind.doublegis.test.model.crawler.TitleCrawler -import java.io.{BufferedReader, ByteArrayInputStream, InputStream, InputStreamReader} +import java.io.{BufferedReader, ByteArrayInputStream, InputStreamReader} import scala.language.postfixOps /** @@ -28,20 +28,20 @@ class TitleCrawlerHandler extends HttpHandler, Handler { // I don't know if this one is ugly, but I wanted to show off a bit. extension (exchange: HttpExchange) private def sendResponse(request: Array[Byte]): Unit = { val reader = new BufferedReader(new InputStreamReader(ByteArrayInputStream(request))) - - val result = Iterator + val response = Iterator .continually(reader.readLine) .takeWhile(null !=) .filter(HttpUtil.isUrl) - .map({ url => f"$url => ${runCrawler(url)}" }) - .toList + .map({ url => s"$url => ${runCrawler(url)}" }) + .mkString("\n") + .getBytes() - val response = result.mkString("\n") - exchange.sendResponseHeaders(200, response.length()) + exchange.sendResponseHeaders(200, response.length) - val out = exchange.getResponseBody - out.write(response.getBytes) - out.close() + val output = exchange.getResponseBody + + StreamUtil.copyStream(new ByteArrayInputStream(response), output) + output.close() } private def runCrawler(url: String): String = {