Initial commit

This commit is contained in:
yichuan520030910320
2025-06-30 11:01:12 +00:00
parent 30898814ae
commit ee507bfe7a
5 changed files with 1057 additions and 295 deletions

View File

@@ -21,7 +21,7 @@ file_extractor: dict[str, BaseReader] = {
".xlsx": reader,
}
node_parser = DoclingNodeParser(
chunker=HybridChunker(tokenizer="Qwen/Qwen3-Embedding-4B", max_tokens=10240)
chunker=HybridChunker(tokenizer="Qwen/Qwen3-Embedding-4B", max_tokens=256)
)
documents = SimpleDirectoryReader(