Skip to content

Commit 1e12a7c

Browse files
authored
Merge pull request #36 from yellowtree/libxmlOptions
Possibility to omit Doctype for hyphenateHtml()
2 parents c8123b5 + df8e0c0 commit 1e12a7c

File tree

2 files changed

+20
-1
lines changed

2 files changed

+20
-1
lines changed

src/Syllable.php

+16-1
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,11 @@ class Syllable
6262
private $excludes = array();
6363
private $includes = array();
6464

65+
/**
66+
* @var int
67+
*/
68+
private $libxmlOptions = 0;
69+
6570
/**
6671
* Create a new Syllable class, with defaults
6772
*
@@ -195,6 +200,16 @@ public function getMinWordLength()
195200
return $this->min_word_length;
196201
}
197202

203+
/**
204+
* Options to use for HTML parsing by libxml
205+
* @param integer $libxmlOptions
206+
* @see https://www.php.net/manual/de/libxml.constants.php
207+
*/
208+
public function setLibxmlOptions($libxmlOptions)
209+
{
210+
$this->libxmlOptions = $libxmlOptions;
211+
}
212+
198213
private static function initEncoding()
199214
{
200215
if (self::$encoding) {
@@ -436,7 +451,7 @@ public function hyphenateHtml($html)
436451
{
437452
$dom = new \DOMDocument();
438453
$dom->resolveExternals = true;
439-
$dom->loadHTML($html);
454+
$dom->loadHTML($html, $this->libxmlOptions);
440455

441456
// filter excludes
442457
$xpath = new \DOMXPath($dom);

tests/SyllableTest.php

+4
Original file line numberDiff line numberDiff line change
@@ -222,6 +222,10 @@ public function testHyphenateHtml()
222222
$this->assertEquals('<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" "http://www.w3.org/TR/REC-html40/loose.dtd">'
223223
. "\n" . '<html><body><p>Ridicu-lous-ly <b class="unsplittable">com-pli-cat-ed</b> meta-text</p></body></html>'
224224
. "\n", $this->object->hyphenateHtml('Ridiculously <b class="unsplittable">complicated</b> metatext'));
225+
226+
$this->object->setLibxmlOptions(LIBXML_HTML_NOIMPLIED | LIBXML_HTML_NODEFDTD);
227+
$this->assertEquals('<p>Ridicu-lous-ly <b class="unsplittable">com-pli-cat-ed</b> meta-text</p>'
228+
. "\n", $this->object->hyphenateHtml('Ridiculously <b class="unsplittable">complicated</b> metatext'));
225229
}
226230

227231
public function testCaseInsensitivity()

0 commit comments

Comments
 (0)