Skip to content

Commit 26f4ed7

Browse files
vitlindanicolasfara
authored andcommitted
feat: correctly resolve documentation links
1 parent a91d47a commit 26f4ed7

File tree

2 files changed

+36
-12
lines changed

2 files changed

+36
-12
lines changed

src/main/scala/dev/atedeg/Errors.scala

+4
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,10 @@ final case class EntityNotFound(baseEntity: BaseEntity) extends Error {
1313
override def toString: String = s"Could not find entity '$baseEntity'"
1414
}
1515

16+
final case class MissingLink(name: String) extends Error {
17+
override def toString: String = s"Could not find link for entity '$name'"
18+
}
19+
1620
final case class FileNotFound(lookupDir: File, path: String) extends Error {
1721
override def toString: String = s"Could not find file '$path' in directory '${lookupDir.pathAsString}'"
1822
}

src/main/scala/dev/atedeg/HtmlParsing.scala

+32-12
Original file line numberDiff line numberDiff line change
@@ -12,27 +12,47 @@ object HtmlParsing {
1212
def extractTermAndDefinition(file: File, entity: Entity, allEntities: Set[Entity]): Either[Error, (String, String)] =
1313
for {
1414
document <- JsoupBrowser().parseFile(file.toJava).asRight
15-
doc <- extractDoc(file, document, entity)
15+
doc <- extractDoc(file, document, entity, allEntities)
1616
} yield (entity.name, doc)
1717

18-
def extractDoc(file: File, document: Browser#DocumentType, entity: Entity): Either[Error, String] = {
18+
def extractDoc(
19+
file: File,
20+
document: Browser#DocumentType,
21+
entity: Entity,
22+
allEntities: Set[Entity],
23+
): Either[Error, String] = {
1924
val searchQuery = s"#${entity.entityId.map(_ + " > ").getOrElse("")}div.cover > div.doc"
20-
extractTagFromDocument(file, document, searchQuery)
25+
extractTagFromDocument(file, document, searchQuery, allEntities)
2126
}
2227

23-
private def extractTagFromDocument(file: File, doc: Browser#DocumentType, tag: String): Either[Error, String] =
24-
doc.tryExtract(element(tag)).map(_.childNodes).map(toMarkdown).toRight(ParseError(file, tag))
28+
private def extractTagFromDocument(
29+
file: File,
30+
doc: Browser#DocumentType,
31+
tag: String,
32+
allEntities: Set[Entity],
33+
): Either[Error, String] =
34+
doc.tryExtract(element(tag)).map(_.childNodes).toRight(ParseError(file, tag)).flatMap(toMarkdown(_, allEntities))
2535

26-
private def toMarkdown(es: Iterable[Node]): String = {
36+
private def toMarkdown(es: Iterable[Node], allEntities: Set[Entity]): Either[Error, String] = {
2737
def isLink(e: Element): Boolean = e.tagName == "a"
28-
def toMarkdownLink(e: Element): String = s"[${extractName(e.text)}](${e.text})"
29-
def extractName(fullPath: String): String = fullPath.split('.').last
38+
def toMarkdownLink(e: Element) = lookupLinkFor(extractName(e)).map(l => s"[$l](${e.text})")
39+
def extractName(e: Element): String = e.attr("href").replace(".html", "")
40+
def lookupLinkFor(name: String): Either[Error, String] =
41+
allEntities.find(_.name == name).map(_.link.replace("/", ".")).toRight(MissingLink(name))
3042

31-
es.foldLeft("") { (acc, elem) =>
43+
es.foldLeft("".asRight[Error]) { (acc, elem) =>
3244
elem match {
33-
case TextNode(s) => acc + s
34-
case ElementNode(e) if isLink(e) => acc + toMarkdownLink(e)
35-
case ElementNode(e) => acc + toMarkdown(e.childNodes)
45+
case TextNode(s) => acc.map(_ + s)
46+
case ElementNode(e) if isLink(e) =>
47+
for {
48+
a <- acc
49+
l <- toMarkdownLink(e)
50+
} yield a + l
51+
case ElementNode(e) =>
52+
for {
53+
a <- acc
54+
m <- toMarkdown(e.childNodes, allEntities)
55+
} yield a + m
3656
}
3757
}
3858
}

0 commit comments

Comments
 (0)