Compare commits
1 Commits
main
...
fix/pdf-du
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
2afcdf7b77 |
@@ -1162,6 +1162,11 @@ Examples:
|
|||||||
print(f"Warning: Could not process {file_path}: {e}")
|
print(f"Warning: Could not process {file_path}: {e}")
|
||||||
|
|
||||||
# Load other file types with default reader
|
# Load other file types with default reader
|
||||||
|
# Exclude PDFs from code_extensions if they were already processed separately
|
||||||
|
other_file_extensions = code_extensions
|
||||||
|
if should_process_pdfs and ".pdf" in code_extensions:
|
||||||
|
other_file_extensions = [ext for ext in code_extensions if ext != ".pdf"]
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# Create a custom file filter function using our PathSpec
|
# Create a custom file filter function using our PathSpec
|
||||||
def file_filter(
|
def file_filter(
|
||||||
@@ -1177,15 +1182,19 @@ Examples:
|
|||||||
except (ValueError, OSError):
|
except (ValueError, OSError):
|
||||||
return True # Include files that can't be processed
|
return True # Include files that can't be processed
|
||||||
|
|
||||||
other_docs = SimpleDirectoryReader(
|
# Only load other file types if there are extensions to process
|
||||||
docs_dir,
|
if other_file_extensions:
|
||||||
recursive=True,
|
other_docs = SimpleDirectoryReader(
|
||||||
encoding="utf-8",
|
docs_dir,
|
||||||
required_exts=code_extensions,
|
recursive=True,
|
||||||
file_extractor={}, # Use default extractors
|
encoding="utf-8",
|
||||||
exclude_hidden=not include_hidden,
|
required_exts=other_file_extensions,
|
||||||
filename_as_id=True,
|
file_extractor={}, # Use default extractors
|
||||||
).load_data(show_progress=True)
|
exclude_hidden=not include_hidden,
|
||||||
|
filename_as_id=True,
|
||||||
|
).load_data(show_progress=True)
|
||||||
|
else:
|
||||||
|
other_docs = []
|
||||||
|
|
||||||
# Filter documents after loading based on gitignore rules
|
# Filter documents after loading based on gitignore rules
|
||||||
filtered_docs = []
|
filtered_docs = []
|
||||||
|
|||||||
Reference in New Issue
Block a user