fix main cli

This commit is contained in:
yichuan520030910320
2025-07-15 21:19:16 -07:00
parent 326783f7f1
commit dec3ee85fd
2 changed files with 36 additions and 55 deletions

View File

@@ -14,7 +14,7 @@ dotenv.load_dotenv()
node_parser = SentenceSplitter(
chunk_size=256,
chunk_overlap=20,
chunk_overlap=64,
separator=" ",
paragraph_separator="\n\n"
)
@@ -32,41 +32,40 @@ for doc in documents:
for node in nodes:
all_texts.append(node.get_content())
INDEX_DIR = Path("./test_pdf_index_pangu_test")
INDEX_PATH = str(INDEX_DIR / "pdf_documents.leann")
if not INDEX_DIR.exists():
print(f"--- Index directory not found, building new index ---")
print(f"\n[PHASE 1] Building Leann index...")
# Use HNSW backend for better macOS compatibility
builder = LeannBuilder(
backend_name="hnsw",
embedding_model="facebook/contriever",
graph_degree=32,
complexity=64,
is_compact=True,
is_recompute=True,
num_threads=1 # Force single-threaded mode
)
print(f"Loaded {len(all_texts)} text chunks from documents.")
for chunk_text in all_texts:
builder.add_text(chunk_text)
builder.build_index(INDEX_PATH)
print(f"\nLeann index built at {INDEX_PATH}!")
else:
print(f"--- Using existing index at {INDEX_DIR} ---")
async def main(args):
INDEX_DIR = Path(args.index_dir)
INDEX_PATH = str(INDEX_DIR / "pdf_documents.leann")
if not INDEX_DIR.exists():
print(f"--- Index directory not found, building new index ---")
print(f"\n[PHASE 1] Building Leann index...")
# Use HNSW backend for better macOS compatibility
builder = LeannBuilder(
backend_name="hnsw",
embedding_model="facebook/contriever",
graph_degree=32,
complexity=64,
is_compact=True,
is_recompute=True,
num_threads=1 # Force single-threaded mode
)
print(f"Loaded {len(all_texts)} text chunks from documents.")
for chunk_text in all_texts:
builder.add_text(chunk_text)
builder.build_index(INDEX_PATH)
print(f"\nLeann index built at {INDEX_PATH}!")
else:
print(f"--- Using existing index at {INDEX_DIR} ---")
print(f"\n[PHASE 2] Starting Leann chat session...")
llm_config = {
"type": args.llm,
"model": args.model,
"host": args.host
"type": "ollama", "model": "Qwen/Qwen3-8B"
}
chat = LeannChat(index_path=INDEX_PATH, llm_config=llm_config)
@@ -82,8 +81,9 @@ async def main(args):
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Run Leann Chat with various LLM backends.")
parser.add_argument("--llm", type=str, default="hf", choices=["simulated", "ollama", "hf", "openai"], help="The LLM backend to use.")
parser.add_argument("--model", type=str, default='meta-llama/Llama-3.2-3B-Instruct', help="The model name to use (e.g., 'llama3:8b' for ollama, 'deepseek-ai/deepseek-llm-7b-chat' for hf, 'gpt-4o' for openai).")
parser.add_argument("--model", type=str, default='Qwen/Qwen3-0.6B', help="The model name to use (e.g., 'llama3:8b' for ollama, 'deepseek-ai/deepseek-llm-7b-chat' for hf, 'gpt-4o' for openai).")
parser.add_argument("--host", type=str, default="http://localhost:11434", help="The host for the Ollama API.")
parser.add_argument("--index-dir", type=str, default="./test_pdf_index_pangu_test", help="Directory where the Leann index will be stored.")
args = parser.parse_args()
asyncio.run(main(args))