@@ -7,23 +7,28 @@ import net.ruippeixotog.scalascraper.dsl.DSL.*
7
7
8
8
object HtmlParsing {
9
9
10
- def extractColumn (document : Browser # DocumentType , columnConfig : ColumnConfig ): Either [String , String ] =
10
+ def extractColumn (
11
+ document : Browser # DocumentType ,
12
+ fileName : String ,
13
+ columnConfig : ColumnConfig ,
14
+ ): Either [String , String ] = {
11
15
(document >?> element(columnConfig.htmlTag))
12
16
.map(_.childNodes)
13
- .map(toMarkdown)
17
+ .map(toMarkdown(_, fileName) )
14
18
.toRight(s " Cannot extract column ${columnConfig.name}" )
19
+ }
15
20
16
- private def toMarkdown (es : Iterable [Node ]): String = {
21
+ private def toMarkdown (es : Iterable [Node ], fileName : String ): String = {
17
22
es.foldLeft(" " ) { (acc, elem) =>
18
23
elem match {
19
24
case TextNode (s) => acc + s
20
- case ElementNode (e) if isLink(e) => acc + toMarkdownLink(e)
21
- case ElementNode (e) => acc + toMarkdown(e.childNodes)
25
+ case ElementNode (e) if isLink(e) => acc + toMarkdownLink(e, fileName )
26
+ case ElementNode (e) => acc + toMarkdown(e.childNodes, fileName )
22
27
}
23
28
}
24
29
}
25
30
26
31
private def isLink (e : Element ): Boolean = e.tagName == " a"
27
- private def toMarkdownLink (e : Element ): String = s " [ ${e.text} ]( ${e.attr( " href " ) }) "
32
+ private def toMarkdownLink (e : Element , fileName : String ): String = s " [ $fileName ]( ${e.text }) "
28
33
29
34
}
0 commit comments