Skip to content

Commit e48044d

Browse files
authored
feat: send all to megaparse_sdk (#3521)
# Description - Send all to megaparse sdk Co-authored-by: aminediro <[email protected]>
1 parent d6e0ed4 commit e48044d

File tree

3 files changed

+12
-7
lines changed

3 files changed

+12
-7
lines changed

core/pyproject.toml

+2-4
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,7 @@
22
name = "quivr-core"
33
version = "0.0.26"
44
description = "Quivr core RAG package"
5-
authors = [
6-
{ name = "Stan Girard", email = "[email protected]" }
7-
]
5+
authors = [{ name = "Stan Girard", email = "[email protected]" }]
86
dependencies = [
97
"pydantic>=2.8.2",
108
"langchain-core>=0.2.38",
@@ -23,7 +21,7 @@ dependencies = [
2321
"faiss-cpu>=1.8.0.post1",
2422
"rapidfuzz>=3.10.1",
2523
"markupsafe>=2.1.5",
26-
"megaparse-sdk==0.1.7"
24+
"megaparse-sdk>=0.1.9",
2725
]
2826
readme = "README.md"
2927
requires-python = ">= 3.11"

core/quivr_core/processor/implementations/megaparse_processor.py

+1-2
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ class MegaparseProcessor(ProcessorBase):
3131
"""
3232

3333
supported_extensions = [
34+
FileExtension.txt,
3435
FileExtension.pdf,
3536
FileExtension.docx,
3637
FileExtension.doc,
@@ -42,11 +43,9 @@ class MegaparseProcessor(ProcessorBase):
4243
FileExtension.bib,
4344
FileExtension.odt,
4445
FileExtension.html,
45-
FileExtension.py,
4646
FileExtension.markdown,
4747
FileExtension.md,
4848
FileExtension.mdx,
49-
FileExtension.ipynb,
5049
]
5150

5251
def __init__(

core/quivr_core/processor/registry.py

+9-1
Original file line numberDiff line numberDiff line change
@@ -124,13 +124,21 @@ def defaults_to_proc_entries(
124124
_append_proc_mapping(
125125
mapping=base_processors,
126126
file_exts=[
127+
FileExtension.txt,
127128
FileExtension.pdf,
128-
FileExtension.xls,
129129
FileExtension.docx,
130+
FileExtension.doc,
130131
FileExtension.pptx,
132+
FileExtension.xls,
133+
FileExtension.xlsx,
134+
FileExtension.csv,
131135
FileExtension.epub,
136+
FileExtension.bib,
132137
FileExtension.odt,
133138
FileExtension.html,
139+
FileExtension.markdown,
140+
FileExtension.md,
141+
FileExtension.mdx,
134142
],
135143
cls_mod="quivr_core.processor.implementations.megaparse_processor.MegaparseProcessor",
136144
errtxt=f"can't import MegaparseProcessor. Please install quivr-core[{ext_str}] to access MegaparseProcessor",

0 commit comments

Comments
 (0)