[chat] update 30s example

This commit is contained in:
yichuan520030910320
2025-07-24 14:40:33 -07:00
parent 1614203786
commit de252fef31
4 changed files with 81 additions and 56 deletions

View File

@@ -44,8 +44,8 @@
" from .autonotebook import tqdm as notebook_tqdm\n", " from .autonotebook import tqdm as notebook_tqdm\n",
"INFO:sentence_transformers.SentenceTransformer:Load pretrained SentenceTransformer: facebook/contriever\n", "INFO:sentence_transformers.SentenceTransformer:Load pretrained SentenceTransformer: facebook/contriever\n",
"WARNING:sentence_transformers.SentenceTransformer:No sentence-transformers model found with name facebook/contriever. Creating a new one with mean pooling.\n", "WARNING:sentence_transformers.SentenceTransformer:No sentence-transformers model found with name facebook/contriever. Creating a new one with mean pooling.\n",
"Writing passages: 100%|██████████| 5/5 [00:00<00:00, 31254.13chunk/s]\n", "Writing passages: 100%|██████████| 5/5 [00:00<00:00, 27887.66chunk/s]\n",
"Batches: 100%|██████████| 1/1 [00:00<00:00, 12.19it/s]\n", "Batches: 100%|██████████| 1/1 [00:00<00:00, 13.51it/s]\n",
"WARNING:leann_backend_hnsw.hnsw_backend:Converting data to float32, shape: (5, 768)\n", "WARNING:leann_backend_hnsw.hnsw_backend:Converting data to float32, shape: (5, 768)\n",
"INFO:leann_backend_hnsw.hnsw_backend:INFO: Converting HNSW index to CSR-pruned format...\n" "INFO:leann_backend_hnsw.hnsw_backend:INFO: Converting HNSW index to CSR-pruned format...\n"
] ]
@@ -64,32 +64,32 @@
" Reading vector (dtype=<class 'numpy.int32'>, fmt='i')... Count=7, Bytes=28\n", " Reading vector (dtype=<class 'numpy.int32'>, fmt='i')... Count=7, Bytes=28\n",
"[0.11s] Read cum_nneighbor_per_level (7)\n", "[0.11s] Read cum_nneighbor_per_level (7)\n",
" Reading vector (dtype=<class 'numpy.int32'>, fmt='i')... Count=5, Bytes=20\n", " Reading vector (dtype=<class 'numpy.int32'>, fmt='i')... Count=5, Bytes=20\n",
"[0.23s] Read levels (5)\n", "[0.21s] Read levels (5)\n",
"[0.34s] Probing for compact storage flag...\n", "[0.30s] Probing for compact storage flag...\n",
"[0.34s] Found compact flag: False\n", "[0.30s] Found compact flag: False\n",
"[0.34s] Compact flag is False, reading original format...\n", "[0.30s] Compact flag is False, reading original format...\n",
"[0.34s] Probing for potential extra byte before non-compact offsets...\n", "[0.30s] Probing for potential extra byte before non-compact offsets...\n",
"[0.34s] Found and consumed an unexpected 0x00 byte.\n", "[0.30s] Found and consumed an unexpected 0x00 byte.\n",
" Reading vector (dtype=<class 'numpy.uint64'>, fmt='Q')... Count=6, Bytes=48\n", " Reading vector (dtype=<class 'numpy.uint64'>, fmt='Q')... Count=6, Bytes=48\n",
"[0.34s] Read offsets (6)\n", "[0.30s] Read offsets (6)\n",
"[0.44s] Attempting to read neighbors vector...\n", "[0.40s] Attempting to read neighbors vector...\n",
" Reading vector (dtype=<class 'numpy.int32'>, fmt='i')... Count=320, Bytes=1280\n", " Reading vector (dtype=<class 'numpy.int32'>, fmt='i')... Count=320, Bytes=1280\n",
"[0.44s] Read neighbors (320)\n", "[0.40s] Read neighbors (320)\n",
"[0.54s] Read scalar params (ep=4, max_lvl=0)\n", "[0.50s] Read scalar params (ep=4, max_lvl=0)\n",
"[0.54s] Checking for storage data...\n", "[0.50s] Checking for storage data...\n",
"[0.54s] Found storage fourcc: 49467849.\n", "[0.50s] Found storage fourcc: 49467849.\n",
"[0.54s] Converting to CSR format...\n", "[0.50s] Converting to CSR format...\n",
"[0.54s] Conversion loop finished. \n", "[0.50s] Conversion loop finished. \n",
"[0.54s] Running validation checks...\n", "[0.50s] Running validation checks...\n",
" Checking total valid neighbor count...\n", " Checking total valid neighbor count...\n",
" OK: Total valid neighbors = 20\n", " OK: Total valid neighbors = 20\n",
" Checking final pointer indices...\n", " Checking final pointer indices...\n",
" OK: Final pointers match data size.\n", " OK: Final pointers match data size.\n",
"[0.54s] Deleting original neighbors and offsets arrays...\n", "[0.50s] Deleting original neighbors and offsets arrays...\n",
" CSR Stats: |data|=20, |level_ptr|=10\n", " CSR Stats: |data|=20, |level_ptr|=10\n",
"[0.63s] Writing CSR HNSW graph data in FAISS-compatible order...\n", "[0.59s] Writing CSR HNSW graph data in FAISS-compatible order...\n",
" Pruning embeddings: Writing NULL storage marker.\n", " Pruning embeddings: Writing NULL storage marker.\n",
"[0.73s] Conversion complete.\n" "[0.69s] Conversion complete.\n"
] ]
}, },
{ {
@@ -105,8 +105,8 @@
"from leann.api import LeannBuilder\n", "from leann.api import LeannBuilder\n",
"\n", "\n",
"builder = LeannBuilder(backend_name=\"hnsw\")\n", "builder = LeannBuilder(backend_name=\"hnsw\")\n",
"builder.add_text(\"C# is a powerful programming language\")\n", "builder.add_text(\"C# is a powerful programming language and it is good at game development\")\n",
"builder.add_text(\"Python is a powerful programming language and it is very popular\")\n", "builder.add_text(\"Python is a powerful programming language and it is good at machine learning tasks\")\n",
"builder.add_text(\"Machine learning transforms industries\")\n", "builder.add_text(\"Machine learning transforms industries\")\n",
"builder.add_text(\"Neural networks process complex data\")\n", "builder.add_text(\"Neural networks process complex data\")\n",
"builder.add_text(\"Leann is a great storage saving engine for RAG on your MacBook\")\n", "builder.add_text(\"Leann is a great storage saving engine for RAG on your MacBook\")\n",
@@ -136,19 +136,14 @@
"INFO:leann.embedding_server_manager:Port 5557 has incompatible server, trying next port...\n", "INFO:leann.embedding_server_manager:Port 5557 has incompatible server, trying next port...\n",
"INFO:leann.embedding_server_manager:Port 5558 has incompatible server, trying next port...\n", "INFO:leann.embedding_server_manager:Port 5558 has incompatible server, trying next port...\n",
"INFO:leann.embedding_server_manager:Port 5559 has incompatible server, trying next port...\n", "INFO:leann.embedding_server_manager:Port 5559 has incompatible server, trying next port...\n",
"INFO:leann.embedding_server_manager:Found compatible server on port 5560\n", "INFO:leann.embedding_server_manager:Using port 5560 instead of 5557\n",
"INFO:leann.embedding_server_manager:Using existing compatible server on port 5560\n", "INFO:leann.embedding_server_manager:Starting embedding server on port 5560...\n",
"INFO:leann.api: Launching server time: 0.05758476257324219 seconds\n", "INFO:leann.embedding_server_manager:Command: /Users/yichuan/Desktop/code/LEANN/leann/.venv/bin/python -m leann_backend_hnsw.hnsw_embedding_server --zmq-port 5560 --model-name facebook/contriever --passages-file knowledge.leann.meta.json\n",
"INFO:leann.embedding_server_manager:Found compatible server on port 5560\n", "huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\n",
"INFO:leann.embedding_server_manager:Using existing compatible server on port 5560\n", "To disable this warning, you can either:\n",
"INFO:leann.api: Generated embedding shape: (1, 768)\n", "\t- Avoid using `tokenizers` before the fork if possible\n",
"INFO:leann.api: Embedding time: 0.05983591079711914 seconds\n", "\t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\n",
"INFO:leann.api: Search time: 0.039762258529663086 seconds\n", "INFO:leann.embedding_server_manager:Server process started with PID: 4574\n"
"INFO:leann.api: Backend returned: labels=2 results\n",
"INFO:leann.api: Processing 2 passage IDs:\n",
"INFO:leann.api: 1. passage_id='0' -> SUCCESS: C# is a powerful programming language...\n",
"INFO:leann.api: 2. passage_id='1' -> SUCCESS: Python is a powerful programming language and it is very popular...\n",
"INFO:leann.api: Final enriched results: 2 passages\n"
] ]
}, },
{ {
@@ -167,14 +162,46 @@
"[read_HNSW NL v4] Read neighbors data, size: 20\n", "[read_HNSW NL v4] Read neighbors data, size: 20\n",
"[read_HNSW NL v4] Finished reading metadata and CSR indices.\n", "[read_HNSW NL v4] Finished reading metadata and CSR indices.\n",
"INFO: Skipping external storage loading, since is_recompute is true.\n", "INFO: Skipping external storage loading, since is_recompute is true.\n",
"INFO: Registering backend 'hnsw'\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"INFO:leann.embedding_server_manager:Embedding server is ready!\n",
"INFO:leann.api: Launching server time: 1.078078269958496 seconds\n",
"INFO:leann.embedding_server_manager:Existing server process (PID 4574) is compatible\n",
"INFO:sentence_transformers.SentenceTransformer:Load pretrained SentenceTransformer: facebook/contriever\n",
"WARNING:sentence_transformers.SentenceTransformer:No sentence-transformers model found with name facebook/contriever. Creating a new one with mean pooling.\n",
"INFO:leann.api: Generated embedding shape: (1, 768)\n",
"INFO:leann.api: Embedding time: 2.9307072162628174 seconds\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"ZmqDistanceComputer initialized: d=768, metric=0\n" "ZmqDistanceComputer initialized: d=768, metric=0\n"
] ]
}, },
{
"name": "stderr",
"output_type": "stream",
"text": [
"INFO:leann.api: Search time: 0.27327895164489746 seconds\n",
"INFO:leann.api: Backend returned: labels=2 results\n",
"INFO:leann.api: Processing 2 passage IDs:\n",
"INFO:leann.api: 1. passage_id='0' -> SUCCESS: C# is a powerful programming language and it is good at game development...\n",
"INFO:leann.api: 2. passage_id='1' -> SUCCESS: Python is a powerful programming language and it is good at machine learning tasks...\n",
"INFO:leann.api: Final enriched results: 2 passages\n"
]
},
{ {
"data": { "data": {
"text/plain": [ "text/plain": [
"[SearchResult(id='0', score=np.float32(0.9646692), text='C# is a powerful programming language', metadata={}),\n", "[SearchResult(id='0', score=np.float32(0.9874103), text='C# is a powerful programming language and it is good at game development', metadata={}),\n",
" SearchResult(id='1', score=np.float32(0.91955304), text='Python is a powerful programming language and it is very popular', metadata={})]" " SearchResult(id='1', score=np.float32(0.8922168), text='Python is a powerful programming language and it is good at machine learning tasks', metadata={})]"
] ]
}, },
"execution_count": 2, "execution_count": 2,
@@ -199,7 +226,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 1, "execution_count": null,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
@@ -207,14 +234,14 @@
"output_type": "stream", "output_type": "stream",
"text": [ "text": [
"INFO:leann.chat:Attempting to create LLM of type='hf' with model='Qwen/Qwen3-0.6B'\n", "INFO:leann.chat:Attempting to create LLM of type='hf' with model='Qwen/Qwen3-0.6B'\n",
"INFO:leann.chat:Initializing HFChat with model='Qwen/Qwen3-0.6B'\n" "INFO:leann.chat:Initializing HFChat with model='Qwen/Qwen3-0.6B'\n",
"INFO:leann.chat:MPS is available. Using Apple Silicon GPU.\n"
] ]
}, },
{ {
"name": "stdout", "name": "stdout",
"output_type": "stream", "output_type": "stream",
"text": [ "text": [
"INFO: Registering backend 'hnsw'\n",
"[read_HNSW - CSR NL v4] Reading metadata & CSR indices (manual offset)...\n", "[read_HNSW - CSR NL v4] Reading metadata & CSR indices (manual offset)...\n",
"[read_HNSW NL v4] Read levels vector, size: 5\n", "[read_HNSW NL v4] Read levels vector, size: 5\n",
"[read_HNSW NL v4] Reading Compact Storage format indices...\n", "[read_HNSW NL v4] Reading Compact Storage format indices...\n",
@@ -233,11 +260,8 @@
"name": "stderr", "name": "stderr",
"output_type": "stream", "output_type": "stream",
"text": [ "text": [
"/Users/yichuan/Desktop/code/LEANN/leann/.venv/lib/python3.11/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
" from .autonotebook import tqdm as notebook_tqdm\n",
"INFO:leann.chat:MPS is available. Using Apple Silicon GPU.\n",
"INFO:leann.api:🔍 LeannSearcher.search() called:\n", "INFO:leann.api:🔍 LeannSearcher.search() called:\n",
"INFO:leann.api: Query: 'Compare the two retrieved programming languages and say which one is more popular today.'\n", "INFO:leann.api: Query: 'Compare the two retrieved programming languages and tell me their advantages.'\n",
"INFO:leann.api: Top_k: 2\n", "INFO:leann.api: Top_k: 2\n",
"INFO:leann.api: Additional kwargs: {}\n", "INFO:leann.api: Additional kwargs: {}\n",
"INFO:leann.embedding_server_manager:Port 5557 has incompatible server, trying next port...\n", "INFO:leann.embedding_server_manager:Port 5557 has incompatible server, trying next port...\n",
@@ -245,18 +269,18 @@
"INFO:leann.embedding_server_manager:Port 5559 has incompatible server, trying next port...\n", "INFO:leann.embedding_server_manager:Port 5559 has incompatible server, trying next port...\n",
"INFO:leann.embedding_server_manager:Found compatible server on port 5560\n", "INFO:leann.embedding_server_manager:Found compatible server on port 5560\n",
"INFO:leann.embedding_server_manager:Using existing compatible server on port 5560\n", "INFO:leann.embedding_server_manager:Using existing compatible server on port 5560\n",
"INFO:leann.api: Launching server time: 0.11421084403991699 seconds\n", "INFO:leann.api: Launching server time: 0.04932403564453125 seconds\n",
"INFO:leann.embedding_server_manager:Found compatible server on port 5560\n", "INFO:leann.embedding_server_manager:Found compatible server on port 5560\n",
"INFO:leann.embedding_server_manager:Using existing compatible server on port 5560\n", "INFO:leann.embedding_server_manager:Using existing compatible server on port 5560\n",
"INFO:leann.api: Generated embedding shape: (1, 768)\n", "INFO:leann.api: Generated embedding shape: (1, 768)\n",
"INFO:leann.api: Embedding time: 0.1147918701171875 seconds\n", "INFO:leann.api: Embedding time: 0.06902289390563965 seconds\n",
"INFO:leann.api: Search time: 0.05468583106994629 seconds\n", "INFO:leann.api: Search time: 0.026793241500854492 seconds\n",
"INFO:leann.api: Backend returned: labels=2 results\n", "INFO:leann.api: Backend returned: labels=2 results\n",
"INFO:leann.api: Processing 2 passage IDs:\n", "INFO:leann.api: Processing 2 passage IDs:\n",
"INFO:leann.api: 1. passage_id='1' -> SUCCESS: Python is a powerful programming language and it is very popular...\n", "INFO:leann.api: 1. passage_id='0' -> SUCCESS: C# is a powerful programming language and it is good at game development...\n",
"INFO:leann.api: 2. passage_id='0' -> SUCCESS: C# is a powerful programming language...\n", "INFO:leann.api: 2. passage_id='1' -> SUCCESS: Python is a powerful programming language and it is good at machine learning tasks...\n",
"INFO:leann.api: Final enriched results: 2 passages\n", "INFO:leann.api: Final enriched results: 2 passages\n",
"INFO:leann.chat:Generating with HuggingFace model, config: {'max_new_tokens': 512, 'temperature': 0.7, 'top_p': 0.9, 'do_sample': True, 'pad_token_id': 151645, 'eos_token_id': 151645}\n" "INFO:leann.chat:Generating with HuggingFace model, config: {'max_new_tokens': 128, 'temperature': 0.7, 'top_p': 0.9, 'do_sample': True, 'pad_token_id': 151645, 'eos_token_id': 151645}\n"
] ]
}, },
{ {
@@ -269,10 +293,10 @@
{ {
"data": { "data": {
"text/plain": [ "text/plain": [
"'<think>\\n\\n</think>\\n\\nBased on the context provided, both Python and C# are mentioned as powerful programming languages, but no specific information is given about their popularity today. However, generally, Python is more popular for data science, web development, and other tasks, while C# is widely used in enterprise applications and game development. Since the context does not explicitly state which is more popular, but Python is often considered more popular in many cases, the best answer would be:\\n\\n**Python is more popular today.**'" "\"<think>\\n\\n</think>\\n\\nBased on the context provided, here's a comparison of the two retrieved programming languages:\\n\\n**C#** is known for being a powerful programming language and is well-suited for game development. It is often used in game development and is popular among developers working on Windows applications.\\n\\n**Python**, on the other hand, is also a powerful language and is well-suited for machine learning tasks. It is widely used for data analysis, scientific computing, and other applications that require handling large datasets or performing complex calculations.\\n\\n**Advantages**:\\n- C#: Strong for game development and cross-platform compatibility.\\n- Python: Strong for\""
] ]
}, },
"execution_count": 1, "execution_count": 8,
"metadata": {}, "metadata": {},
"output_type": "execute_result" "output_type": "execute_result"
} }
@@ -282,13 +306,14 @@
"\n", "\n",
"llm_config = {\n", "llm_config = {\n",
" \"type\": \"hf\",\n", " \"type\": \"hf\",\n",
" \"model\": \"Qwen/Qwen3-0.6B\"\n", " \"model\": \"Qwen/Qwen3-0.6B\",\n",
"}\n", "}\n",
"\n", "\n",
"chat = LeannChat(index_path=\"knowledge.leann\", llm_config=llm_config)\n", "chat = LeannChat(index_path=\"knowledge.leann\", llm_config=llm_config)\n",
"response = chat.ask(\n", "response = chat.ask(\n",
" \"Compare the two retrieved programming languages and say which one is more popular today.\",\n", " \"Compare the two retrieved programming languages and tell me their advantages.\",\n",
" top_k=2,\n", " top_k=2,\n",
" llm_kwargs={\"max_tokens\": 128}\n",
")\n", ")\n",
"response" "response"
] ]

View File

@@ -222,7 +222,6 @@ async def query_leann_index(index_path: str, query: str):
recompute_beighbor_embeddings=True, recompute_beighbor_embeddings=True,
complexity=32, complexity=32,
beam_width=1, beam_width=1,
) )
end_time = time.time() end_time = time.time()
print(f"Time taken: {end_time - start_time} seconds") print(f"Time taken: {end_time - start_time} seconds")

View File

@@ -549,6 +549,7 @@ class HFChat(LLMInterface):
self.tokenizer.pad_token = self.tokenizer.eos_token self.tokenizer.pad_token = self.tokenizer.eos_token
def ask(self, prompt: str, **kwargs) -> str: def ask(self, prompt: str, **kwargs) -> str:
print('kwargs in HF: ', kwargs)
# Check if this is a Qwen model and add /no_think by default # Check if this is a Qwen model and add /no_think by default
is_qwen_model = "qwen" in self.model.config._name_or_path.lower() is_qwen_model = "qwen" in self.model.config._name_or_path.lower()