rm useless
This commit is contained in:
@@ -23,7 +23,7 @@ file_extractor: dict[str, BaseReader] = {
|
|||||||
".xlsx": reader,
|
".xlsx": reader,
|
||||||
}
|
}
|
||||||
node_parser = DoclingNodeParser(
|
node_parser = DoclingNodeParser(
|
||||||
chunker=HybridChunker(tokenizer="Qwen/Qwen3-Embedding-4B", max_tokens=64)
|
chunker=HybridChunker(tokenizer="Qwen/Qwen3-Embedding-4B", max_tokens=128)
|
||||||
)
|
)
|
||||||
print("Loading documents...")
|
print("Loading documents...")
|
||||||
documents = SimpleDirectoryReader(
|
documents = SimpleDirectoryReader(
|
||||||
|
|||||||
@@ -32,6 +32,7 @@ def _compute_embeddings(chunks: List[str], model_name: str) -> np.ndarray:
|
|||||||
else:
|
else:
|
||||||
from sentence_transformers import SentenceTransformer
|
from sentence_transformers import SentenceTransformer
|
||||||
model = SentenceTransformer(model_name)
|
model = SentenceTransformer(model_name)
|
||||||
|
model = model.half()
|
||||||
print(f"INFO: Computing embeddings for {len(chunks)} chunks using SentenceTransformer model '{model_name}'...")
|
print(f"INFO: Computing embeddings for {len(chunks)} chunks using SentenceTransformer model '{model_name}'...")
|
||||||
embeddings = model.encode(chunks, show_progress_bar=True)
|
embeddings = model.encode(chunks, show_progress_bar=True)
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user