Skip to content

Commit 2808965

Browse files
authored
Add support for workflow_ocr_backend (#291)
* Add support for workflow_ocr_backend * Integration for https://github.com/R0Wi-DEV/workflow_ocr_backend * Implements #51 * Add full integration test for OcrBackend Service * Add full integrationtests to pipeline (#294) * Incorporate code review feedback * Line ending adjustments * Add heartbeat check in System Setup Check * Add additional tests * Add local CLI tests (#296)
1 parent 77a20f0 commit 2808965

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

58 files changed

+5049
-202
lines changed
+138
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,138 @@
1+
name: PHPUnit-Integration
2+
3+
on:
4+
pull_request:
5+
push:
6+
branches:
7+
- master
8+
- stable*
9+
10+
env:
11+
APP_NAME: workflow_ocr
12+
NEXTCLOUD_PORT: 8080
13+
NEXTCLOUD_USER: "admin"
14+
NEXTCLOUD_PASS: "password"
15+
NC_HAPROXY_PASSWORD: "some_secure_password"
16+
NC_HAPROXY_PORT: 2375
17+
DB_PORT: 4444
18+
MYSQL_ROOT_PASSWORD: "rootpassword"
19+
20+
jobs:
21+
# Do not change this name, it is used in the integration tests
22+
github-php-integrationtests:
23+
runs-on: ubuntu-24.04
24+
services:
25+
docker-socket-proxy:
26+
image: ghcr.io/nextcloud/nextcloud-appapi-dsp:release
27+
env:
28+
NC_HAPROXY_PASSWORD: "some_secure_password"
29+
options: --privileged
30+
volumes:
31+
- /var/run/docker.sock:/var/run/docker.sock
32+
ports:
33+
- 2375:2375
34+
mysql:
35+
image: mariadb:10.5
36+
ports:
37+
- 4444:3306/tcp
38+
env:
39+
MYSQL_ROOT_PASSWORD: rootpassword
40+
options: --health-cmd="mysqladmin ping" --health-interval 5s --health-timeout 2s --health-retries 5
41+
42+
strategy:
43+
fail-fast: false
44+
matrix:
45+
php-versions: ['8.3']
46+
databases: ['mysql']
47+
server-versions: ['stable31']
48+
backend: ['remote', 'local'] # Do not change these names, they're used in the integration tests
49+
50+
name: php-integrationtests-${{ matrix.backend }}-${{ matrix.php-versions }}-${{ matrix.databases }}
51+
52+
steps:
53+
- name: Checkout server
54+
uses: actions/checkout@v4
55+
with:
56+
repository: nextcloud/server
57+
ref: ${{ matrix.server-versions }}
58+
59+
- name: Checkout submodules
60+
shell: bash
61+
run: |
62+
auth_header="$(git config --local --get http.https://github.com/.extraheader)"
63+
git submodule sync --recursive
64+
git -c "http.extraheader=$auth_header" -c protocol.version=2 submodule update --init --force --recursive --depth=1
65+
66+
- name: Checkout app
67+
uses: actions/checkout@v4
68+
with:
69+
path: apps/${{ env.APP_NAME }}
70+
71+
- name: Set up php ${{ matrix.php-versions }}
72+
uses: shivammathur/setup-php@v2
73+
with:
74+
php-version: ${{ matrix.php-versions }}
75+
tools: phpunit
76+
extensions: mbstring, iconv, fileinfo, intl, sqlite, pdo_sqlite, gd, zip, imagick
77+
coverage: none
78+
79+
- name: Install ocrmypdf
80+
if: matrix.backend == 'local'
81+
run: |
82+
sudo apt-get update && sudo apt-get install -y ocrmypdf
83+
ocrmypdf --version
84+
85+
- name: Install composer dependencies
86+
working-directory: apps/${{ env.APP_NAME }}
87+
run: composer i
88+
89+
# Note: ./occ maintenance:mimetype:update-db is required to avoid
90+
# issues with the application/pdf mimetype
91+
- name: Set up Nextcloud
92+
run: |
93+
mkdir data
94+
./occ maintenance:install --verbose --database=${{ matrix.databases }} --database-name=nextcloud \
95+
--database-host=127.0.0.1 --database-port=${{ env.DB_PORT }} --database-user=root --database-pass=${{ env.MYSQL_ROOT_PASSWORD }} \
96+
--admin-user ${{ env.NEXTCLOUD_USER }} --admin-pass ${{ env.NEXTCLOUD_PASS }}
97+
./occ app:enable ${{ env.APP_NAME }}
98+
./occ maintenance:mimetype:update-db
99+
php -S localhost:${{ env.NEXTCLOUD_PORT }} &
100+
101+
- name: Checkout AppApi
102+
uses: actions/checkout@v4
103+
if: matrix.backend == 'remote'
104+
with:
105+
repository: nextcloud/app_api
106+
ref: ${{ matrix.server-versions }}
107+
path: apps/app_api
108+
109+
- name: Set up AppApi/ExApp infrastructure
110+
if: matrix.backend == 'remote'
111+
run: |
112+
./occ app:enable app_api
113+
./occ app_api:daemon:register local_docker "docker-socket-proxy" \
114+
"docker-install" "http" "localhost:${{ env.NC_HAPROXY_PORT }}" "http://localhost:${{ env.NEXTCLOUD_PORT }}" \
115+
--set-default --haproxy_password="${{ env.NC_HAPROXY_PASSWORD }}"
116+
./occ app_api:app:register workflow_ocr_backend \
117+
--wait-finish \
118+
--info-xml https://raw.githubusercontent.com/R0Wi-DEV/workflow_ocr_backend/refs/heads/${{ matrix.server-versions }}/appinfo/info.xml
119+
120+
- name: PHPUnit
121+
working-directory: apps/${{ env.APP_NAME }}
122+
env:
123+
GITHUB_MATRIX_BACKEND: ${{ matrix.backend }}
124+
run: make php-integrationtest
125+
126+
- name: Write OCR Backend logs to file
127+
if: failure() && matrix.backend == 'remote'
128+
run: |
129+
docker logs nc_app_workflow_ocr_backend > data/ocr_backend.log
130+
131+
- name: Upload logs
132+
if: failure()
133+
uses: actions/upload-artifact@v4
134+
with:
135+
name: logs
136+
path: data/*.log
137+
138+

README.md

+49-31
Original file line numberDiff line numberDiff line change
@@ -12,37 +12,40 @@
1212
## Table of contents
1313

1414
- [Nextcloud Workflow OCR app](#nextcloud-workflow-ocr-app)
15-
- [Table of contents](#table-of-contents)
16-
- [Setup](#setup)
17-
- [App installation](#app-installation)
18-
- [Nextcloud background jobs](#nextcloud-background-jobs)
19-
- [Backend](#backend)
20-
- [Usage](#usage)
21-
- [Useful triggers](#useful-triggers)
22-
- [Trigger OCR if file was created or updated](#trigger-ocr-if-file-was-created-or-updated)
23-
- [Trigger OCR on tag assigning](#trigger-ocr-on-tag-assigning)
24-
- [Settings](#settings)
25-
- [Per workflow settings](#per-workflow-settings)
26-
- [Global settings](#global-settings)
27-
- [Testing your configuration](#testing-your-configuration)
28-
- [Get feedback via Notifications](#get-feedback-via-notifications)
29-
- [How it works](#how-it-works)
30-
- [General](#general)
31-
- [PDF](#pdf)
32-
- [Images](#images)
33-
- [Troubleshooting](#troubleshooting)
34-
- [Generic troubleshooting guide](#generic-troubleshooting-guide)
35-
- [The Nextcloud Workflowengine](#the-nextcloud-workflowengine)
36-
- [Development](#development)
37-
- [Dev setup](#dev-setup)
38-
- [Debugging](#debugging)
39-
- [`docker`-based setup](#docker-based-setup)
40-
- [Executing tests](#executing-tests)
41-
- [Adding a new `OcrProcessor`](#adding-a-new-ocrprocessor)
42-
- [Events emitted by the app](#events-emitted-by-the-app)
43-
- [`TextRecognizedEvent`](#textrecognizedevent)
44-
- [Limitations](#limitations)
45-
- [Used libraries \& components](#used-libraries--components)
15+
- [Table of contents](#table-of-contents)
16+
- [Setup](#setup)
17+
- [App installation](#app-installation)
18+
- [Nextcloud background jobs](#nextcloud-background-jobs)
19+
- [Backend](#backend)
20+
- [Local installation](#local-installation)
21+
- [`workflow_ocr_backend` installation](#workflow_ocr_backend-installation)
22+
- [Setup Checks](#setup-checks)
23+
- [Usage](#usage)
24+
- [Useful triggers](#useful-triggers)
25+
- [Trigger OCR if file was created or updated](#trigger-ocr-if-file-was-created-or-updated)
26+
- [Trigger OCR on tag assigning](#trigger-ocr-on-tag-assigning)
27+
- [Settings](#settings)
28+
- [Per workflow settings](#per-workflow-settings)
29+
- [Global settings](#global-settings)
30+
- [Testing your configuration](#testing-your-configuration)
31+
- [Get feedback via Notifications](#get-feedback-via-notifications)
32+
- [How it works](#how-it-works)
33+
- [General](#general)
34+
- [PDF](#pdf)
35+
- [Images](#images)
36+
- [Troubleshooting](#troubleshooting)
37+
- [Generic troubleshooting guide](#generic-troubleshooting-guide)
38+
- [The Nextcloud Workflowengine](#the-nextcloud-workflowengine)
39+
- [Development](#development)
40+
- [Dev setup](#dev-setup)
41+
- [Debugging](#debugging)
42+
- [`docker`-based setup](#docker-based-setup)
43+
- [Executing tests](#executing-tests)
44+
- [Adding a new `OcrProcessor`](#adding-a-new-ocrprocessor)
45+
- [Events emitted by the app](#events-emitted-by-the-app)
46+
- [`TextRecognizedEvent`](#textrecognizedevent)
47+
- [Limitations](#limitations)
48+
- [Used libraries \& components](#used-libraries--components)
4649

4750
## Setup
4851
### App installation
@@ -58,6 +61,11 @@ Since the actual processing of the files is done asynchronously via Nextcloud's
5861

5962

6063
### Backend
64+
65+
This app is based on `ocrmypdf`. You can either install the CLI directly on the server running Nextcloud or use the alternative backend setup via Docker.
66+
67+
#### Local installation
68+
6169
> :warning: Since `v1.20.1` you'll have to install `OCRmyPDF`.
6270
6371
In the backend [`OCRmyPDF`](https://github.com/jbarlow83/OCRmyPDF) is used for processing PDF files. Make sure you have this commandline tool installed. Make sure you have the appropriate version (see below, Used libraries').
@@ -81,6 +89,16 @@ apt-get install tesseract-ocr-deu
8189
apt-get install tesseract-ocr-chi-sim
8290
```
8391

92+
#### `workflow_ocr_backend` installation
93+
94+
Starting from version 30, Nextcloud added support for [AppApi](https://docs.nextcloud.com/server/latest/admin_manual/exapps_management/AppAPIAndExternalApps.html) apps. In essence this allows external container based applications to be integrated into the Nextcloud ecosystem. This app is using this feature to provide an alternative backend setup via Docker.
95+
96+
If everything is setup properly, you can just install the `workflow_ocr_backend` app from the [appstore](https://apps.nextcloud.com/apps/workflow_ocr_backend).
97+
98+
Please refer to **https://github.com/R0Wi-DEV/workflow_ocr_backend** for more information on how to setup the backend.
99+
100+
> :information_source: If the `workflow_ocr_backend` External App is installed, this "frontend" app will automatically use it as the backend even if you installed `ocrmypdf` locally.
101+
84102
### Setup Checks
85103

86104
The app will perform some [Setup Checks](https://docs.nextcloud.com/server/latest/admin_manual/configuration_server/security_setup_warnings.html) to verify your installation. If there is any problem with your backend setup, you'll see an error printed in Nextcloud under `Administration Settings` → `Overview` → `Security & setup warnings`.

lib/AppInfo/Application.php

+11
Original file line numberDiff line numberDiff line change
@@ -33,8 +33,12 @@
3333
use OCA\WorkflowOcr\Helper\SidecarFileAccessor;
3434
use OCA\WorkflowOcr\Listener\RegisterFlowOperationsListener;
3535
use OCA\WorkflowOcr\Notification\Notifier;
36+
use OCA\WorkflowOcr\OcrProcessors\CommandLineUtils;
37+
use OCA\WorkflowOcr\OcrProcessors\ICommandLineUtils;
3638
use OCA\WorkflowOcr\OcrProcessors\IOcrProcessorFactory;
3739
use OCA\WorkflowOcr\OcrProcessors\OcrProcessorFactory;
40+
use OCA\WorkflowOcr\OcrProcessors\Remote\Client\ApiClient;
41+
use OCA\WorkflowOcr\OcrProcessors\Remote\Client\IApiClient;
3842
use OCA\WorkflowOcr\Service\EventService;
3943
use OCA\WorkflowOcr\Service\GlobalSettingsService;
4044
use OCA\WorkflowOcr\Service\IEventService;
@@ -46,8 +50,10 @@
4650
use OCA\WorkflowOcr\Service\OcrBackendInfoService;
4751
use OCA\WorkflowOcr\Service\OcrService;
4852
use OCA\WorkflowOcr\SetupChecks\OcrMyPdfCheck;
53+
use OCA\WorkflowOcr\Wrapper\AppApiWrapper;
4954
use OCA\WorkflowOcr\Wrapper\CommandWrapper;
5055
use OCA\WorkflowOcr\Wrapper\Filesystem;
56+
use OCA\WorkflowOcr\Wrapper\IAppApiWrapper;
5157
use OCA\WorkflowOcr\Wrapper\ICommand;
5258
use OCA\WorkflowOcr\Wrapper\IFilesystem;
5359
use OCA\WorkflowOcr\Wrapper\IViewFactory;
@@ -63,6 +69,8 @@
6369

6470
class Application extends App implements IBootstrap {
6571
public const APP_NAME = 'workflow_ocr';
72+
public const APP_BACKEND_NAME = 'workflow_ocr_backend';
73+
public const APP_API_APP_NAME = 'app_api';
6674

6775
/**
6876
* Application constructor.
@@ -83,6 +91,9 @@ public function register(IRegistrationContext $context): void {
8391
$context->registerServiceAlias(IEventService::class, EventService::class);
8492
$context->registerServiceAlias(IOcrBackendInfoService::class, OcrBackendInfoService::class);
8593
$context->registerServiceAlias(INotificationService::class, NotificationService::class);
94+
$context->registerServiceAlias(IApiClient::class, ApiClient::class);
95+
$context->registerServiceAlias(ICommandLineUtils::class, CommandLineUtils::class);
96+
$context->registerServiceAlias(IAppApiWrapper::class, AppApiWrapper::class);
8697

8798
// BUG #43
8899
$context->registerService(ICommand::class, function () {

lib/Exception/OcrProcessorNotFoundException.php

+2-2
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626
use Exception;
2727

2828
class OcrProcessorNotFoundException extends Exception {
29-
public function __construct(string $mimeType) {
30-
$this->message = 'OCR processor for mime type ' . $mimeType . ' not found';
29+
public function __construct(string $mimeType, bool $useRemoteBackend) {
30+
$this->message = 'OCR processor for mime type ' . $mimeType . '(useRemoteBackend=' . $useRemoteBackend . ') not found';
3131
}
3232
}

lib/Helper/SidecarFileAccessor.php

+2
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,8 @@ public function getOrCreateSidecarFile() {
4949
$this->sidecarFilePath = $this->tempManager->getTemporaryFile('sidecar');
5050
if (!$this->sidecarFilePath) {
5151
$this->logger->warning('Could not create temporary sidecar file');
52+
} elseif (!is_writable($this->sidecarFilePath)) {
53+
$this->logger->warning('Temporary sidecar file is not writable');
5254
}
5355
}
5456
return $this->sidecarFilePath;

lib/Model/WorkflowSettings.php

+1-1
Original file line numberDiff line numberDiff line change
@@ -168,7 +168,7 @@ private function setJson(?string $json = null) {
168168
$this->setProperty($this->customCliArgs, $data, 'customCliArgs', fn ($value) => is_string($value));
169169
}
170170

171-
private function setProperty(& $property, array $jsonData, string $key, ?callable $dataCheck = null): void {
171+
private function setProperty(array|bool|int|string & $property, array $jsonData, string $key, ?callable $dataCheck = null): void {
172172
if (array_key_exists($key, $jsonData) && ($dataCheck === null || $dataCheck($jsonData[$key]))) {
173173
$property = $jsonData[$key];
174174
}

0 commit comments

Comments
 (0)