Skip to content

Commit 3a6a4bf

Browse files
committed
Do not accept empty OCRmyPDF results and fix log #79
Signed-off-by: Robin Windey <[email protected]>
1 parent 05e2509 commit 3a6a4bf

File tree

3 files changed

+55
-13
lines changed

3 files changed

+55
-13
lines changed

.gitignore

+2-1
Original file line numberDiff line numberDiff line change
@@ -5,4 +5,5 @@ coverage*.html
55
coverage_html
66
build
77
.phpunit.result.cache
8-
*.cov
8+
*.cov
9+
coverage

lib/OcrProcessors/PdfOcrProcessor.php

+16-7
Original file line numberDiff line numberDiff line change
@@ -50,15 +50,24 @@ public function ocrFile(string $fileContent): string {
5050
$stdErr = $this->command->getStdErr();
5151
$exitCode = $this->command->getExitCode();
5252

53-
if ($success) {
54-
if ($stdErr !== '' || $errorOutput !== '') {
55-
// Log warning if ocrmypdf wrote a warning to the stderr
56-
$this->logger->warning('OCRmyPDF succeeded with warning(s): {stdErr}, {errorOutput}', [$stdErr, $errorOutput]);
57-
}
53+
if (!$success) {
54+
throw new OcrNotPossibleException('OCRmyPDF exited abnormally with exit-code ' . $exitCode . '. Message: ' . $errorOutput . ' ' . $stdErr);
55+
}
56+
57+
if ($stdErr !== '' || $errorOutput !== '') {
58+
// Log warning if ocrmypdf wrote a warning to the stderr
59+
$this->logger->warning('OCRmyPDF succeeded with warning(s): {stdErr}, {errorOutput}', [
60+
'stdErr' => $stdErr,
61+
'errorOutput' => $errorOutput
62+
]);
63+
}
64+
65+
$ocrOutput = $this->command->getOutput();
5866

59-
return $this->command->getOutput();
67+
if (!$ocrOutput) {
68+
throw new OcrNotPossibleException('OCRmyPDF did not produce any output');
6069
}
6170

62-
throw new OcrNotPossibleException('OCRmyPDF exited abnormally with exit-code ' . $exitCode . '. Message: ' . $errorOutput . ' ' . $stdErr);
71+
return $ocrOutput;
6372
}
6473
}

tests/Unit/OcrProcessors/PdfOcrProcessorTest.php

+37-5
Original file line numberDiff line numberDiff line change
@@ -107,22 +107,54 @@ public function testLogsWarningIfOcrMyPdfSucceedsWithWarningOutput() {
107107
->willReturn(true);
108108
$this->command->expects($this->once())
109109
->method('getError')
110-
->willReturn('error');
110+
->willReturn('getErrorOutput');
111111
$this->command->expects($this->once())
112112
->method('getStdErr')
113-
->willReturn('stdErr');
113+
->willReturn('stdErrOutput');
114+
$this->command->expects($this->once())
115+
->method('getOutput')
116+
->willReturn('someOcrFileContent');
114117
$this->logger->expects($this->once())
115118
->method('warning')
116119
->with(
117-
$this->stringStartsWith('OCRmyPDF succeeded with warning(s):'),
120+
'OCRmyPDF succeeded with warning(s): {stdErr}, {errorOutput}',
118121
$this->callback(function ($paramsArray) {
119122
return is_array($paramsArray) &&
120123
count($paramsArray) === 2 &&
121-
$paramsArray[0] === 'stdErr' &&
122-
$paramsArray[1] === 'error';
124+
$paramsArray['stdErr'] === 'stdErrOutput' &&
125+
$paramsArray['errorOutput'] === 'getErrorOutput';
123126
}));
124127

125128
$processor = new PdfOcrProcessor($this->command, $this->logger);
126129
$processor->ocrFile('someContent');
127130
}
131+
132+
public function testThrowsErrorIfOcrFileWasEmpty() {
133+
$this->command->expects($this->once())
134+
->method('execute')
135+
->willReturn(true);
136+
$this->command->expects($this->once())
137+
->method('getError')
138+
->willReturn('error');
139+
$this->command->expects($this->once())
140+
->method('getStdErr')
141+
->willReturn('stdErr');
142+
$this->command->expects($this->once())
143+
->method('getOutput')
144+
->willReturn('');
145+
146+
147+
$thrown = false;
148+
$processor = new PdfOcrProcessor($this->command, $this->logger);
149+
150+
try {
151+
$processor->ocrFile('someContent');
152+
} catch (\Throwable $t) {
153+
$thrown = true;
154+
$this->assertInstanceOf(OcrNotPossibleException::class, $t);
155+
$this->assertEquals('OCRmyPDF did not produce any output', $t->getMessage());
156+
}
157+
158+
$this->assertTrue($thrown);
159+
}
128160
}

0 commit comments

Comments
 (0)