Skip to content

Commit 3febfae

Browse files
author
Martijn van der Lee
committed
Fix some longstanding minor issues
1 parent 5386371 commit 3febfae

9 files changed

+74
-36
lines changed

.gitignore

+4-3
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
/.idea
2-
/nbproject/
3-
/debug/
4-
vendor
2+
/nbproject
3+
/debug
4+
/vendor
5+
/composer.lock

.travis.yml

+2
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,8 @@ php:
1111
- 7.4
1212
- 8.0
1313
- 8.1
14+
- 8.2
15+
- 8.3
1416
- hhvm
1517

1618
matrix:

README.md

+2-2
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,9 @@ Sentence
44
[![Build Status](https://travis-ci.org/vanderlee/php-sentence.svg?branch=master)](https://travis-ci.org/vanderlee/PHPSwaggerGen)
55
[![Quality](https://scrutinizer-ci.com/g/vanderlee/php-sentence/badges/quality-score.png?b=master)](https://scrutinizer-ci.com/g/vanderlee/PHPSwaggerGen)
66

7-
Version 1.0.7
7+
Version 1.0.8
88

9-
Copyright © 2016-2022 Martijn van der Lee (@vanderlee), parts copyright © 2017 @marktaw.
9+
Copyright © 2016-2024 Martijn van der Lee (@vanderlee), parts copyright © 2017 @marktaw.
1010

1111
MIT Open Source license applies.
1212

composer.json

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
22
"name": "vanderlee/php-sentence",
3-
"description": "Simple text sentence splitting and counting. Supports atleast english, german and dutch, possibly more.",
3+
"description": "Simple text sentence splitting and counting. Supports at least english, german and dutch, possibly more.",
44
"type": "library",
55
"keywords": [
66
"sentence",

index.html

+4-6
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11

22
<!DOCTYPE html>
3-
<html>
3+
<html lang="en-US">
44
<head>
55
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
66
<title>PHP Sentence</title>
@@ -125,11 +125,9 @@
125125
code[l] = tabsToSpaces(code[l], tabsize);
126126
}
127127

128-
129128
// determine minimum length
130-
var minlength = 2E53;
131-
var first = 2E53;
132-
var last = 0;
129+
var first = 2E53,
130+
last = 0;
133131
for (l in code) {
134132
if (/\S/.test(code[l])) {
135133
minlength = Math.min(minlength, /^\s*/.exec(code[l])[0].length);
@@ -197,7 +195,7 @@
197195
</div>
198196

199197
<div id="footer">
200-
Copyright &copy; 2016 Martijn van der Lee. MIT Open Source license applies.
198+
Copyright &copy; 2016-2024 Martijn van der Lee. MIT Open Source license applies.
201199
</div>
202200
</body>
203201
</html>

src/Multibyte.php

+1-1
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@ public static function cleanUnicode($string)
5656
*/
5757
public static function trim($string)
5858
{
59-
return mb_ereg_replace('^\s*([\s\S]*?)\s*$', '\1', $string);
59+
return mb_ereg_replace('(^\s*)|(\s*$)', '', $string);
6060
}
6161

6262
/**

src/Sentence.php

+15-14
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,8 @@
77
* Clipping may not be perfect.
88
* Sentence count should be VERY close to the truth.
99
*
10-
* Multibyte.php safe (atleast for UTF-8), but rules based on germanic
11-
* language stucture (English, Dutch, German). Should work for most
10+
* Multibyte.php safe (at least for UTF-8), but rules based on germanic
11+
* language structure (English, Dutch, German). Should work for most
1212
* latin-alphabet languages.
1313
*
1414
* @author Martijn van der Lee (@vanderlee)
@@ -50,7 +50,7 @@ class Sentence
5050
*
5151
* @return string
5252
*/
53-
private function getReplaceCode(int $index)
53+
private function getReplaceCode($index)
5454
{
5555
return 0x02 . $index . 0x03;
5656
}
@@ -62,8 +62,9 @@ private function getReplaceCode(int $index)
6262
*
6363
* @return string
6464
*/
65-
private function replaceFloatNumbers(string $text)
65+
private function replaceFloatNumbers($text)
6666
{
67+
$matches = array();
6768
preg_match_all('!\d+(?:\.\d+)?!', $text, $matches, PREG_OFFSET_CAPTURE);
6869

6970
$this->replacements = [];
@@ -133,7 +134,7 @@ private static function linebreakSplit($text)
133134
* Splits an array of lines by (consecutive sequences of)
134135
* terminals, keeping terminals.
135136
*
136-
* Multibyte.php safe (atleast for UTF-8)
137+
* Multibyte.php safe (at least for UTF-8)
137138
*
138139
* For example:
139140
* "There ... is. More!"
@@ -172,7 +173,7 @@ private function punctuationSplit($line)
172173
* Appends each terminal item after it's preceding
173174
* non-terminals.
174175
*
175-
* Multibyte.php safe (atleast for UTF-8)
176+
* Multibyte.php safe (at least for UTF-8)
176177
*
177178
* For example:
178179
* [ "There ", "...", " is", ".", " More", "!" ]
@@ -238,7 +239,7 @@ private function abbreviationMerge($fragments)
238239
$previous_is_abbreviation = false;
239240
$i = 0;
240241
foreach ($fragments as $fragment) {
241-
$is_abbreviation = self::isAbreviation($fragment);
242+
$is_abbreviation = self::isAbbreviation($fragment);
242243

243244
// merge previous fragment with this
244245
if ($previous_is_abbreviation) {
@@ -265,7 +266,7 @@ private function abbreviationMerge($fragments)
265266
*
266267
* @return bool
267268
*/
268-
private static function isAbreviation($fragment)
269+
private static function isAbbreviation($fragment)
269270
{
270271
$words = mb_split('\s+', Multibyte::trim($fragment));
271272

@@ -290,17 +291,17 @@ private static function isAbreviation($fragment)
290291
*/
291292
private function parenthesesMerge($parts)
292293
{
293-
$subsentences = [];
294+
$subSentences = [];
294295

295296
foreach ($parts as $part) {
296-
if ($part[0] === ')' && !empty($subsentences)) {
297-
$subsentences[count($subsentences) - 1] .= $part;
297+
if ($part[0] === ')' && !empty($subSentences)) {
298+
$subSentences[count($subSentences) - 1] .= $part;
298299
} else {
299-
$subsentences[] = $part;
300+
$subSentences[] = $part;
300301
}
301302
}
302303

303-
return $subsentences;
304+
return $subSentences;
304305
}
305306

306307
/**
@@ -396,7 +397,7 @@ private function sentenceMerge($shorts)
396397
}
397398

398399
/**
399-
* Return the sentences sentences detected in the provided text.
400+
* Return the sentences detected in the provided text.
400401
* Set the Sentence::SPLIT_TRIM flag to trim whitespace.
401402
*
402403
* @param string $text

tests/MultibyteTest.php

+37-1
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,13 @@
22

33
namespace Vanderlee\Sentence\Tests;
44

5+
use PHPUnit_Framework_TestCase;
56
use Vanderlee\Sentence\Multibyte;
67

7-
class MultibyteTest extends \PHPUnit_Framework_TestCase
8+
/**
9+
* @coversDefaultClass \Vanderlee\Sentence\Multibyte
10+
*/
11+
class MultibyteTest extends PHPUnit_Framework_TestCase
812
{
913

1014
/**
@@ -16,6 +20,9 @@ public function testSplit($expected, $pattern, $subject, $limit = -1, $flags = 0
1620
$this->assertSame($expected, Multibyte::split($pattern, $subject, $limit, $flags));
1721
}
1822

23+
/**
24+
* @return array[]
25+
*/
1926
public function dataSplit()
2027
{
2128
return [
@@ -28,4 +35,33 @@ public function dataSplit()
2835
[['a', '-', 'b', '-', 'c'], '(-)', 'a-b-c', -1, PREG_SPLIT_DELIM_CAPTURE],
2936
];
3037
}
38+
39+
/**
40+
* @covers ::
41+
*
42+
* @dataProvider dataTrim
43+
* @param $subject
44+
* @param $expected
45+
* @return void
46+
*/
47+
public function testTrim($subject, $expected=null)
48+
{
49+
if ($expected === null) {
50+
$expected = $subject;
51+
}
52+
$this->assertSame($expected, Multibyte::trim($subject));
53+
}
54+
55+
/**
56+
* @return array[]
57+
*/
58+
public function dataTrim()
59+
{
60+
return [
61+
['Foo bar', 'Foo bar'],
62+
[' Foo bar', 'Foo bar'],
63+
[' Foo bar ', 'Foo bar'],
64+
['Foo bar ', 'Foo bar'],
65+
];
66+
}
3167
}

tests/SentenceTest.php

+8-8
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
use Vanderlee\Sentence\Sentence;
77

88
/**
9-
* @coversDefaultClass \Vanderlee\Sentence\Sentence
9+
* @coversDefaultClass Sentence
1010
*/
1111
class SentenceTest extends PHPUnit_Framework_TestCase
1212
{
@@ -22,7 +22,7 @@ class SentenceTest extends PHPUnit_Framework_TestCase
2222
*/
2323
protected function setUp()
2424
{
25-
$this->object = new \Vanderlee\Sentence\Sentence();
25+
$this->object = new Sentence();
2626
}
2727

2828
/**
@@ -90,7 +90,7 @@ public function testCountLinebreaks()
9090
/**
9191
* @covers ::count
9292
*/
93-
public function testCountAbreviations()
93+
public function testCountAbbreviations()
9494
{
9595
$this->assertSame(1, $this->object->count("Hello mr. Smith."));
9696
$this->assertSame(1, $this->object->count("Hello, OMG Kittens!"));
@@ -190,9 +190,8 @@ public function testSplitLinebreaks()
190190
/**
191191
* @covers ::split
192192
*/
193-
public function testSplitAbreviations()
193+
public function testSplitAbbreviations()
194194
{
195-
// $this->markTestIncomplete('This test has not been implemented yet.');
196195
$this->assertSame(['Hello mr. Smith.'], $this->object->split("Hello mr. Smith."));
197196
$this->assertSame(['Hello, OMG Kittens!'], $this->object->split("Hello, OMG Kittens!"));
198197
$this->assertSame(['Hello, abbrev. Kittens!'], $this->object->split("Hello, abbrev. Kittens!"));
@@ -237,11 +236,12 @@ public function testSentenceWithNumericValues()
237236
$this->assertSame(1, $this->object->count("The price is 25.50, including postage and packing."));
238237
$this->assertSame(1, $this->object->count("I went true to size at 10.5 cms."));
239238
$this->assertSame(2, $this->object->count("The prices are £25.50 or £27.50, including postage and packing. I went true to size at 10.5 cms."));
239+
$this->assertSame(1, $this->object->count("Prices will go up for 8.6% and because of that it is expensive."));
240240
}
241241

242242
/**
243-
* @covers ::floatNumberClean
244-
* @covers ::floatNumberRevert
243+
* @covers ::replaceFloatNumbers
244+
* @covers ::restoreReplacements
245245
*
246246
* @dataProvider dataSplit
247247
*
@@ -250,7 +250,7 @@ public function testSentenceWithNumericValues()
250250
*
251251
* @return void
252252
*/
253-
public function testSplit(array $expected, string $text)
253+
public function testSplit($expected, $text)
254254
{
255255
$this->assertSame($expected, $this->object->split($text));
256256
$this->assertSame(count($expected), $this->object->count($text));

0 commit comments

Comments
 (0)