Skip to content

Commit 2394ac8

Browse files
shahbazaamirilayaperumalg
authored andcommitted
Added test cases to cover usage of ExtractedTextFormatter
Signed-off-by: shahbazaamir <[email protected]>
1 parent 608b29c commit 2394ac8

File tree

1 file changed

+26
-1
lines changed

1 file changed

+26
-1
lines changed

Diff for: document-readers/tika-reader/src/test/java/org/springframework/ai/reader/tika/TikaDocumentReaderTests.java

+26-1
Original file line numberDiff line numberDiff line change
@@ -18,11 +18,14 @@
1818

1919
import org.junit.jupiter.params.ParameterizedTest;
2020
import org.junit.jupiter.params.provider.CsvSource;
21-
21+
import org.springframework.ai.reader.ExtractedTextFormatter;
2222
import static org.assertj.core.api.Assertions.assertThat;
23+
import static org.junit.jupiter.api.Assertions.assertFalse;
24+
import static org.junit.jupiter.api.Assertions.assertTrue;
2325

2426
/**
2527
* @author Christian Tzolov
28+
* @author Shahbaz Aamir
2629
*/
2730
public class TikaDocumentReaderTests {
2831

@@ -46,4 +49,26 @@ public void testDocx(String resourceUri, String resourceName, String contentSnip
4649
assertThat(doc.getText()).contains(contentSnipped);
4750
}
4851

52+
@ParameterizedTest
53+
@CsvSource({
54+
"classpath:/word-sample.docx,word-sample.docx,This document demonstrates the ability of the calibre DOCX Input plugin",
55+
"classpath:/sample2.pdf,sample2.pdf,Robert Maron", "classpath:/sample.ppt,sample.ppt,Sample FILE",
56+
"classpath:/sample.pptx,sample.pptx,Sample FILE" })
57+
public void testReaderWithFormatter(String resourceUri, String resourceName, String contentSnipped) {
58+
59+
ExtractedTextFormatter formatter = ExtractedTextFormatter.builder().withNumberOfTopTextLinesToDelete(5).build();
60+
var docs = new TikaDocumentReader(resourceUri, formatter).get();
61+
62+
assertThat(docs).hasSize(1);
63+
64+
var doc = docs.get(0);
65+
66+
assertThat(doc.getMetadata()).containsKeys(TikaDocumentReader.METADATA_SOURCE);
67+
assertThat(doc.getMetadata().get(TikaDocumentReader.METADATA_SOURCE)).isEqualTo(resourceName);
68+
assertFalse(doc.getText().contains(contentSnipped));
69+
docs = new TikaDocumentReader(resourceUri).get();
70+
doc = docs.get(0);
71+
assertThat(doc.getText()).contains(contentSnipped);
72+
}
73+
4974
}

0 commit comments

Comments
 (0)