From 0db81c16cd890772c38d9c0746f67e58827bdf4d Mon Sep 17 00:00:00 2001 From: yichuan520030910320 Date: Thu, 17 Jul 2025 12:58:11 -0700 Subject: [PATCH] update readme and chrome example --- demo.ipynb | 182 ++++++++---------------- examples/google_history_reader_leann.py | 4 +- 2 files changed, 64 insertions(+), 122 deletions(-) diff --git a/demo.ipynb b/demo.ipynb index 92f8ea4..b39a56f 100644 --- a/demo.ipynb +++ b/demo.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 3, + "execution_count": 5, "metadata": {}, "outputs": [ { @@ -25,7 +25,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "Batches: 100%|██████████| 1/1 [00:00<00:00, 13.92it/s]\n", + "Batches: 100%|██████████| 1/1 [00:00<00:00, 4.51it/s]\n", "INFO:sentence_transformers.SentenceTransformer:Use pytorch device_name: mps\n", "INFO:sentence_transformers.SentenceTransformer:Load pretrained SentenceTransformer: facebook/contriever-msmarco\n", "WARNING:sentence_transformers.SentenceTransformer:No sentence-transformers model found with name facebook/contriever-msmarco. Creating a new one with mean pooling.\n" @@ -42,7 +42,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "Batches: 100%|██████████| 1/1 [00:00<00:00, 50.18it/s]" + "Batches: 100%|██████████| 1/1 [00:00<00:00, 14.51it/s]" ] }, { @@ -56,10 +56,7 @@ "[0.00s] Header read: d=768, ntotal=5\n", "[0.00s] Reading HNSW struct vectors...\n", " Reading vector (dtype=, fmt='d')... Count=6, Bytes=48\n", - "[0.00s] Read assign_probas (6)\n", - " Reading vector (dtype=, fmt='i')... Count=7, Bytes=28\n", - "[0.13s] Read cum_nneighbor_per_level (7)\n", - " Reading vector (dtype=, fmt='i')... " + "[0.00s] Read assign_probas (6)\n" ] }, { @@ -73,8 +70,10 @@ "name": "stdout", "output_type": "stream", "text": [ - "Count=5, Bytes=20\n", - "[0.23s] Read levels (5)\n", + " Reading vector (dtype=, fmt='i')... Count=7, Bytes=28\n", + "[0.14s] Read cum_nneighbor_per_level (7)\n", + " Reading vector (dtype=, fmt='i')... Count=5, Bytes=20\n", + "[0.22s] Read levels (5)\n", "[0.31s] Probing for compact storage flag...\n", "[0.31s] Found compact flag: False\n", "[0.31s] Compact flag is False, reading original format...\n", @@ -82,24 +81,24 @@ "[0.31s] Found and consumed an unexpected 0x00 byte.\n", " Reading vector (dtype=, fmt='Q')... Count=6, Bytes=48\n", "[0.31s] Read offsets (6)\n", - "[0.40s] Attempting to read neighbors vector...\n", + "[0.39s] Attempting to read neighbors vector...\n", " Reading vector (dtype=, fmt='i')... Count=320, Bytes=1280\n", - "[0.40s] Read neighbors (320)\n", - "[0.49s] Read scalar params (ep=4, max_lvl=0)\n", - "[0.49s] Checking for storage data...\n", - "[0.49s] Found storage fourcc: 49467849.\n", - "[0.49s] Converting to CSR format...\n", - "[0.49s] Conversion loop finished. \n", - "[0.49s] Running validation checks...\n", + "[0.39s] Read neighbors (320)\n", + "[0.47s] Read scalar params (ep=4, max_lvl=0)\n", + "[0.47s] Checking for storage data...\n", + "[0.47s] Found storage fourcc: 49467849.\n", + "[0.47s] Converting to CSR format...\n", + "[0.47s] Conversion loop finished. \n", + "[0.47s] Running validation checks...\n", " Checking total valid neighbor count...\n", " OK: Total valid neighbors = 20\n", " Checking final pointer indices...\n", " OK: Final pointers match data size.\n", - "[0.49s] Deleting original neighbors and offsets arrays...\n", + "[0.47s] Deleting original neighbors and offsets arrays...\n", " CSR Stats: |data|=20, |level_ptr|=10\n", - "[0.57s] Writing CSR HNSW graph data in FAISS-compatible order...\n", + "[0.56s] Writing CSR HNSW graph data in FAISS-compatible order...\n", " Pruning embeddings: Writing NULL storage marker.\n", - "[0.66s] Conversion complete.\n" + "[0.64s] Conversion complete.\n" ] }, { @@ -118,7 +117,6 @@ "✅ CSR conversion successful.\n", "INFO: Replaced original index with CSR-pruned version at 'knowledge.index'\n", "[read_HNSW - CSR NL v4] Reading metadata & CSR indices (manual offset)...\n", - "INFO: Terminating session server process (PID: 70979)...\n", "[read_HNSW NL v4] Read levels vector, size: 5\n", "[read_HNSW NL v4] Reading Compact Storage format indices...\n", "[read_HNSW NL v4] Read compact_level_ptr, size: 10\n", @@ -130,6 +128,7 @@ "[read_HNSW NL v4] Read neighbors data, size: 20\n", "[read_HNSW NL v4] Finished reading metadata and CSR indices.\n", "INFO: Skipping external storage loading, since is_recompute is true.\n", + "INFO: Terminating session server process (PID: 77679)...\n", "INFO: Server process terminated.\n", "🔍 DEBUG LeannSearcher.search() called:\n", " Query: 'programming languages'\n", @@ -142,7 +141,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "Batches: 100%|██████████| 1/1 [00:00<00:00, 12.04it/s]" + "Batches: 100%|██████████| 1/1 [00:00<00:00, 12.13it/s]" ] }, { @@ -150,11 +149,11 @@ "output_type": "stream", "text": [ " Generated embedding shape: (1, 768)\n", - " Embedding time: 1.2403802871704102 seconds\n", + " Embedding time: 1.2287070751190186 seconds\n", "INFO: Starting session-level embedding server for 'leann_backend_hnsw.hnsw_embedding_server'...\n", "INFO: Running command from project root: /Users/yichuan/Desktop/code/LEANN/leann\n", "INFO: Command: /Users/yichuan/Desktop/code/LEANN/leann/.venv/bin/python -m leann_backend_hnsw.hnsw_embedding_server --zmq-port 5557 --model-name facebook/contriever-msmarco --passages-file knowledge.leann.meta.json --disable-warmup\n", - "INFO: Server process started with PID: 71209\n" + "INFO: Server process started with PID: 77844\n" ] }, { @@ -197,8 +196,8 @@ "[leann_backend_hnsw.hnsw_embedding_server LOG]: Request for 1 node embeddings\n", "[leann_backend_hnsw.hnsw_embedding_server LOG]: Total batch size: 1, max_batch_size: 128\n", "[leann_backend_hnsw.hnsw_embedding_server LOG]: DEBUG zmq_server_thread: Final 'hidden' array | Shape: (1, 768) | Dtype: float32 | Has NaN/Inf: False\n", - "[leann_backend_hnsw.hnsw_embedding_server LOG]: Serialize time: 0.000154 seconds\n", - "[leann_backend_hnsw.hnsw_embedding_server LOG]: ZMQ E2E time: 0.131017 seconds\n", + "[leann_backend_hnsw.hnsw_embedding_server LOG]: Serialize time: 0.000268 seconds\n", + "[leann_backend_hnsw.hnsw_embedding_server LOG]: ZMQ E2E time: 0.171174 seconds\n", "[leann_backend_hnsw.hnsw_embedding_server LOG]: Received ZMQ request of size 3849 bytes\n", "[leann_backend_hnsw.hnsw_embedding_server LOG]: DEBUG: request_payload length: 2\n", "[leann_backend_hnsw.hnsw_embedding_server LOG]: DEBUG: request_payload[0]: - [0, 1, 2, 3]\n", @@ -208,7 +207,7 @@ "[leann_backend_hnsw.hnsw_embedding_server LOG]: Query vector dim: 768\n", "[leann_backend_hnsw.hnsw_embedding_server LOG]: Passages loaded: 5\n", "[leann_backend_hnsw.hnsw_embedding_server LOG]: DEBUG: Looking up passage ID 0\n", - "[leann_backend_hnsw.hnsw_embedding_server LOG]: DEBUG: Found text for ID 0, length: 37\n", + "[leann_backend_hnsw.hnsw_embedding_server LOG]: DEBUG: Found text for ID 0, length: 64\n", "[leann_backend_hnsw.hnsw_embedding_server LOG]: DEBUG: Looking up passage ID 1\n", "[leann_backend_hnsw.hnsw_embedding_server LOG]: DEBUG: Found text for ID 1, length: 64\n", "[leann_backend_hnsw.hnsw_embedding_server LOG]: DEBUG: Looking up passage ID 2\n", @@ -216,8 +215,8 @@ "[leann_backend_hnsw.hnsw_embedding_server LOG]: DEBUG: Looking up passage ID 3\n", "[leann_backend_hnsw.hnsw_embedding_server LOG]: DEBUG: Found text for ID 3, length: 36\n", "[leann_backend_hnsw.hnsw_embedding_server LOG]: Sending distance response with 4 distances\n", - "[leann_backend_hnsw.hnsw_embedding_server LOG]: Distance calculation E2E time: 0.131733 seconds\n", - " Search time: 4.379124879837036 seconds\n", + "[leann_backend_hnsw.hnsw_embedding_server LOG]: Distance calculation E2E time: 0.143306 seconds\n", + " Search time: 5.966892957687378 seconds\n", " Backend returned: labels=2 results\n", " Processing 2 passage IDs:\n" ] @@ -226,18 +225,20 @@ "name": "stderr", "output_type": "stream", "text": [ - "INFO:leann.chat:Attempting to create LLM of type='hf' with model='Qwen/Qwen3-0.6B'\n", - "INFO:leann.chat:Initializing HFChat with model='Qwen/Qwen3-0.6B'\n" + "INFO:leann.chat:Attempting to create LLM of type='ollama' with model='qwen3:8b'\n", + "INFO:leann.chat:Initializing OllamaChat with model='qwen3:8b' and host='http://localhost:11434'\n", + "INFO:sentence_transformers.SentenceTransformer:Use pytorch device_name: mps\n", + "INFO:sentence_transformers.SentenceTransformer:Load pretrained SentenceTransformer: facebook/contriever-msmarco\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - " 1. passage_id='8f6d8742-3659-4d2f-ac45-377fd69b031e' -> SUCCESS: C# is a powerful programming language...\n", - " 2. passage_id='837f1f70-3c8c-498f-867d-06a063aa2a6e' -> SUCCESS: Python is a powerful programming language and it is very popular...\n", + " 1. passage_id='90d76803-a8a0-4693-ab89-0190a4dbce09' -> SUCCESS: Python is a powerful programming language and it is very popular...\n", + " 2. passage_id='f831e915-3cca-4504-bda3-ee59c32fb151' -> SUCCESS: C# is a powerful programming language but it is not very popular...\n", " Final enriched results: 2 passages\n", - "[SearchResult(id='8f6d8742-3659-4d2f-ac45-377fd69b031e', score=np.float32(1.4450607), text='C# is a powerful programming language', metadata={}), SearchResult(id='837f1f70-3c8c-498f-867d-06a063aa2a6e', score=np.float32(1.394449), text='Python is a powerful programming language and it is very popular', metadata={})]\n", + "[SearchResult(id='90d76803-a8a0-4693-ab89-0190a4dbce09', score=np.float32(1.394449), text='Python is a powerful programming language and it is very popular', metadata={}), SearchResult(id='f831e915-3cca-4504-bda3-ee59c32fb151', score=np.float32(1.3833004), text='C# is a powerful programming language but it is not very popular', metadata={})]\n", "[read_HNSW - CSR NL v4] Reading metadata & CSR indices (manual offset)...\n", "[read_HNSW NL v4] Read levels vector, size: 5\n", "[read_HNSW NL v4] Reading Compact Storage format indices...\n", @@ -249,17 +250,17 @@ "[read_HNSW NL v4] Reading neighbors data into memory.\n", "[read_HNSW NL v4] Read neighbors data, size: 20\n", "[read_HNSW NL v4] Finished reading metadata and CSR indices.\n", - "INFO: Skipping external storage loading, since is_recompute is true.\n" + "INFO: Skipping external storage loading, since is_recompute is true.\n", + "🔍 DEBUG LeannSearcher.search() called:\n", + " Query: 'Compare the two retrieved programming languages and say which one is more popular today. Respond in a single well-formed sentence.'\n", + " Top_k: 2\n", + " Additional kwargs: {'recompute_beighbor_embeddings': True}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "INFO:leann.chat:MPS is available. Using Apple Silicon GPU.\n", - "Device set to use mps\n", - "INFO:sentence_transformers.SentenceTransformer:Use pytorch device_name: mps\n", - "INFO:sentence_transformers.SentenceTransformer:Load pretrained SentenceTransformer: facebook/contriever-msmarco\n", "WARNING:sentence_transformers.SentenceTransformer:No sentence-transformers model found with name facebook/contriever-msmarco. Creating a new one with mean pooling.\n" ] }, @@ -267,10 +268,6 @@ "name": "stdout", "output_type": "stream", "text": [ - "🔍 DEBUG LeannSearcher.search() called:\n", - " Query: 'Compare the two retrieved programming languages and say which one is more popular today. Respond in a single well-formed sentence.'\n", - " Top_k: 2\n", - " Additional kwargs: {'recompute_beighbor_embeddings': True}\n", "INFO: Computing embeddings for 1 chunks using SentenceTransformer model 'facebook/contriever-msmarco'...\n" ] }, @@ -278,7 +275,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "Batches: 100%|██████████| 1/1 [00:00<00:00, 7.66it/s]" + "Batches: 100%|██████████| 1/1 [00:00<00:00, 2.70it/s]" ] }, { @@ -286,7 +283,7 @@ "output_type": "stream", "text": [ " Generated embedding shape: (1, 768)\n", - " Embedding time: 1.5981061458587646 seconds\n", + " Embedding time: 2.0163228511810303 seconds\n", "INFO: Port 5557 is in use. Checking server compatibility...\n", "[leann_backend_hnsw.hnsw_embedding_server LOG]: Received ZMQ request of size 17 bytes\n", "[leann_backend_hnsw.hnsw_embedding_server LOG]: DEBUG: request_payload length: 1\n", @@ -302,23 +299,10 @@ "[leann_backend_hnsw.hnsw_embedding_server LOG]: DEBUG: request_payload length: 1\n", "[leann_backend_hnsw.hnsw_embedding_server LOG]: DEBUG: request_payload[0]: - [4]\n", "[leann_backend_hnsw.hnsw_embedding_server LOG]: Request for 1 node embeddings\n", - "[leann_backend_hnsw.hnsw_embedding_server LOG]: Total batch size: 1, max_batch_size: 128\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ + "[leann_backend_hnsw.hnsw_embedding_server LOG]: Total batch size: 1, max_batch_size: 128\n", "[leann_backend_hnsw.hnsw_embedding_server LOG]: DEBUG zmq_server_thread: Final 'hidden' array | Shape: (1, 768) | Dtype: float32 | Has NaN/Inf: False\n", - "[leann_backend_hnsw.hnsw_embedding_server LOG]: Serialize time: 0.000330 seconds\n", - "[leann_backend_hnsw.hnsw_embedding_server LOG]: ZMQ E2E time: 0.165497 seconds\n", + "[leann_backend_hnsw.hnsw_embedding_server LOG]: Serialize time: 0.000132 seconds\n", + "[leann_backend_hnsw.hnsw_embedding_server LOG]: ZMQ E2E time: 0.075267 seconds\n", "[leann_backend_hnsw.hnsw_embedding_server LOG]: Received ZMQ request of size 3849 bytes\n", "[leann_backend_hnsw.hnsw_embedding_server LOG]: DEBUG: request_payload length: 2\n", "[leann_backend_hnsw.hnsw_embedding_server LOG]: DEBUG: request_payload[0]: - [0, 1, 2, 3]\n", @@ -328,7 +312,7 @@ "[leann_backend_hnsw.hnsw_embedding_server LOG]: Query vector dim: 768\n", "[leann_backend_hnsw.hnsw_embedding_server LOG]: Passages loaded: 5\n", "[leann_backend_hnsw.hnsw_embedding_server LOG]: DEBUG: Looking up passage ID 0\n", - "[leann_backend_hnsw.hnsw_embedding_server LOG]: DEBUG: Found text for ID 0, length: 37\n", + "[leann_backend_hnsw.hnsw_embedding_server LOG]: DEBUG: Found text for ID 0, length: 64\n", "[leann_backend_hnsw.hnsw_embedding_server LOG]: DEBUG: Looking up passage ID 1\n", "[leann_backend_hnsw.hnsw_embedding_server LOG]: DEBUG: Found text for ID 1, length: 64\n", "[leann_backend_hnsw.hnsw_embedding_server LOG]: DEBUG: Looking up passage ID 2\n", @@ -336,8 +320,8 @@ "[leann_backend_hnsw.hnsw_embedding_server LOG]: DEBUG: Looking up passage ID 3\n", "[leann_backend_hnsw.hnsw_embedding_server LOG]: DEBUG: Found text for ID 3, length: 36\n", "[leann_backend_hnsw.hnsw_embedding_server LOG]: Sending distance response with 4 distances\n", - "[leann_backend_hnsw.hnsw_embedding_server LOG]: Distance calculation E2E time: 0.082911 seconds\n", - " Search time: 0.2542300224304199 seconds\n", + "[leann_backend_hnsw.hnsw_embedding_server LOG]: Distance calculation E2E time: 0.066100 seconds\n", + " Search time: 0.14907574653625488 seconds\n", " Backend returned: labels=2 results\n", " Processing 2 passage IDs:\n" ] @@ -346,72 +330,30 @@ "name": "stderr", "output_type": "stream", "text": [ - "INFO:leann.chat:Generating text with Hugging Face model with params: {'max_length': 500, 'num_return_sequences': 1}\n", - "Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.\n", - "Both `max_new_tokens` (=256) and `max_length`(=500) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)\n" + "\n", + "INFO:leann.chat:Sending request to Ollama: {'model': 'qwen3:8b', 'prompt': 'Here is some retrieved context that might help answer your question:\\n\\nPython is a powerful programming language and it is very popular\\n\\nC# is a powerful programming language but it is not very popular\\n\\nQuestion: Compare the two retrieved programming languages and say which one is more popular today. Respond in a single well-formed sentence.\\n\\nPlease provide the best answer you can based on this context and your knowledge.', 'stream': False, 'options': {}}\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - " 1. passage_id='837f1f70-3c8c-498f-867d-06a063aa2a6e' -> SUCCESS: Python is a powerful programming language and it is very popular...\n", - " 2. passage_id='8f6d8742-3659-4d2f-ac45-377fd69b031e' -> SUCCESS: C# is a powerful programming language...\n", + " 1. passage_id='90d76803-a8a0-4693-ab89-0190a4dbce09' -> SUCCESS: Python is a powerful programming language and it is very popular...\n", + " 2. passage_id='f831e915-3cca-4504-bda3-ee59c32fb151' -> SUCCESS: C# is a powerful programming language but it is not very popular...\n", " Final enriched results: 2 passages\n", - "Also, make sure you answer in a single sentence.\n", - "Answer:\n", - "The retrieved context says that Python and C# are both powerful programming languages, but it does not provide any specific information about their popularity. However, based on general knowledge, Python is more popular than C# in current markets.\n", - "The answer should be in a single sentence.\n", - "Answer:\n", - "Python is a more popular programming language than C# today.\n", - "---\n", + "\n", + "Okay, let's tackle this question. The user wants to compare Python and C# in terms of popularity today based on the provided context and my knowledge. \n", "\n", - "So the final answer is:\n", + "First, looking at the retrieved context: it says Python is very popular, while C# is not very popular. That's a clear indicator from the given data. But I should also consider my existing knowledge to confirm.\n", "\n", - "Python is a more popular programming language than C# today.\n", - "---\n", + "From what I know, Python has a massive community and is widely used in areas like data science, machine learning, web development, automation, and scripting. Its simplicity and readability make it a favorite for beginners and experts alike. Major companies like Google, Facebook, and Microsoft use Python extensively. The Python Package Index (PyPI) has thousands of libraries, which boosts its popularity.\n", "\n", - "Answer:\n", - "Python is a more popular programming language than C# today.\n", - "---\n", + "C#, on the other hand, is developed by Microsoft and is primarily used for Windows applications, game development with Unity, and enterprise software. While it's powerful and has a good ecosystem, its usage is more niche compared to Python. The .NET framework and Visual Studio support C#, but the community isn't as large or diverse as Python's. Although C# is popular in specific domains, it doesn't have the same widespread adoption across different industries.\n", "\n", - "So the final answer is:\n", + "So combining the context provided and my knowledge, Python is definitely more popular today. The context already states that, and my understanding aligns with that. The answer should reflect that Python is more popular than C# based on both the given info and general trends.\n", + "\n", "\n", - "Python is a more popular programming language than C# today.\n", - "---\n", - "\n", - "Answer:\n", - "Python is a more popular programming language than C# today.\n", - "---\n", - "\n", - "So the final answer is:\n", - "\n", - "Python is a more popular programming language than C# today.\n", - "---\n", - "\n", - "Answer:\n", - "Python is a more popular programming language than C# today.\n", - "---\n", - "\n", - "So the final answer is:\n", - "\n", - "Python is a more popular programming language than C# today.\n", - "---\n", - "\n", - "Answer:\n", - "Python is a more popular programming language than C# today.\n", - "---\n", - "\n", - "So the final answer is:\n", - "\n", - "Python is a more popular programming language than C# today.\n", - "---\n", - "\n", - "Answer:\n", - "Python is a more popular programming language than C# today.\n", - "---\n", - "\n", - "So the final answer\n" + "Python is more popular today than C# due to its widespread use in diverse fields like data science, web development, and automation, whereas C# is primarily used in specific domains such as Windows applications and game development.\n" ] } ], @@ -419,7 +361,7 @@ "from leann.api import LeannBuilder, LeannSearcher, LeannChat\n", "# 1. Build index (no embeddings stored!)\n", "builder = LeannBuilder(backend_name=\"hnsw\")\n", - "builder.add_text(\"C# is a powerful programming language\")\n", + "builder.add_text(\"C# is a powerful programming language but it is not very popular\")\n", "builder.add_text(\"Python is a powerful programming language and it is very popular\")\n", "builder.add_text(\"Machine learning transforms industries\") \n", "builder.add_text(\"Neural networks process complex data\")\n", @@ -430,7 +372,7 @@ "results = searcher.search(\"programming languages\", top_k=2, recompute_beighbor_embeddings=True)\n", "print(results)\n", "\n", - "llm_config = {\"type\": \"hf\", \"model\": \"Qwen/Qwen3-0.6B\"}\n", + "llm_config = {\"type\": \"ollama\", \"model\": \"qwen3:8b\"}\n", "\n", "chat = LeannChat(index_path=\"knowledge.leann\", llm_config=llm_config)\n", "\n", diff --git a/examples/google_history_reader_leann.py b/examples/google_history_reader_leann.py index c07f2f0..fd97d98 100644 --- a/examples/google_history_reader_leann.py +++ b/examples/google_history_reader_leann.py @@ -204,9 +204,9 @@ async def query_leann_index(index_path: str, query: str): print(f"You: {query}") chat_response = chat.ask( query, - top_k=5, + top_k=10, recompute_beighbor_embeddings=True, - complexity=128, + complexity=32, beam_width=1, llm_config={ "type": "openai",