@@ -13,25 +13,40 @@ object HtmlParsing {
13
13
file : File ,
14
14
entity : Entity ,
15
15
allEntities : Set [Entity ],
16
+ linkSolver : String => String ,
16
17
): Either [Error , (String , String )] = {
17
18
val document = JsoupBrowser ().parseFile(file.toJava)
18
19
val docQuery = s " ${entity.entityId.map(" div#" + _ + " " ).getOrElse(" " )}div.cover > div.doc "
19
- extractTag(file, document, docQuery, allEntities).map((entity.name, _))
20
+ extractTag(file, document, docQuery, allEntities, linkSolver ).map((entity.name, _))
20
21
}
21
22
22
- private def extractTag (file : File , doc : Browser # DocumentType , tag : String , all : Set [Entity ]): Either [Error , String ] =
23
- doc.tryExtract(element(tag)).map(_.childNodes).toRight(ParseError (file, tag)).flatMap(toMarkdown(_, all))
23
+ private def extractTag (
24
+ file : File ,
25
+ doc : Browser # DocumentType ,
26
+ tag : String ,
27
+ all : Set [Entity ],
28
+ linkSolver : String => String ,
29
+ ): Either [Error , String ] =
30
+ doc
31
+ .tryExtract(element(tag))
32
+ .map(_.childNodes)
33
+ .toRight(ParseError (file, tag))
34
+ .flatMap(toMarkdown(_, all, linkSolver))
24
35
25
- private def toMarkdown (elems : Iterable [Node ], allEntities : Set [Entity ]): Either [Error , String ] = {
36
+ private def toMarkdown (
37
+ elems : Iterable [Node ],
38
+ allEntities : Set [Entity ],
39
+ linkSolver : String => String ,
40
+ ): Either [Error , String ] = {
26
41
def isLink (elem : Element ) = elem.tagName === " a"
27
- def toMarkdownLink (elem : Element ) = lookupLinkFor(extractName(elem)).map(l => s " [ ${elem.text}]( $l) " )
28
- def lookupLinkFor (name : String ) = allEntities.find(_.name === name).map(" ../ " + _.link).toRight(MissingLink (name))
42
+ def toMarkdownLink (elem : Element ) = lookupLinkFor(extractName(elem)).map(linkSolver).map( l => s " [ ${elem.text}]( $l) " )
43
+ def lookupLinkFor (name : String ) = allEntities.find(_.name === name).map(_.link).toRight(MissingLink (name))
29
44
def extractName (elem : Element ) = elem.attr(" href" ).replace(" .html" , " " ).split('$' ).last.split(" /" ).last
30
45
elems.foldLeft(" " .asRight[Error ]) { (acc, elem) =>
31
46
elem match {
32
47
case TextNode (s) => acc.map(_ + s)
33
48
case ElementNode (e) if isLink(e) => acc.flatMap(a => toMarkdownLink(e).map(a + _))
34
- case ElementNode (e) => acc.flatMap(a => toMarkdown(e.childNodes, allEntities).map(a + _))
49
+ case ElementNode (e) => acc.flatMap(a => toMarkdown(e.childNodes, allEntities, linkSolver ).map(a + _))
35
50
}
36
51
}
37
52
}
0 commit comments