From 8a75829f3a8c33d9f3ca2e8433f54cacbde3d3f3 Mon Sep 17 00:00:00 2001 From: yichuan520030910320 Date: Mon, 21 Jul 2025 22:30:03 -0700 Subject: [PATCH] readme --- README.md | 22 +++-- demo.ipynb | 285 +++++++++++++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 290 insertions(+), 17 deletions(-) diff --git a/README.md b/README.md index fef8813..a755689 100755 --- a/README.md +++ b/README.md @@ -69,7 +69,7 @@ uv sync uv sync --extra diskann ``` -**Ollama Setup (Optional for Local LLM):** +**Ollama Setup (Recommended for full privacy):** *We support both hf-transformers and Ollama for local LLMs. Ollama is recommended for faster performance.* @@ -101,21 +101,27 @@ You can also replace `llama3.2:1b` to `deepseek-r1:1.5b` or `qwen3:4b` for bette Just 3 lines of code. Our declarative API makes RAG as easy as writing a config file: ```python -from leann.api import LeannBuilder, LeannSearcher - +from leann.api import LeannBuilder, LeannSearcher, LeannChat # 1. Build index (no embeddings stored!) builder = LeannBuilder(backend_name="hnsw") -builder.add_text("C# is a powerful programming language") -builder.add_text("Python is a powerful programming language") +builder.add_text("C# is a powerful programming language but it is not very popular") +builder.add_text("Python is a powerful programming language and it is very popular") builder.add_text("Machine learning transforms industries") builder.add_text("Neural networks process complex data") builder.add_text("Leann is a great storage saving engine for RAG on your macbook") builder.build_index("knowledge.leann") - # 2. Search with real-time embeddings searcher = LeannSearcher("knowledge.leann") -results = searcher.search("C++ programming languages", top_k=2, recompute_beighbor_embeddings=True) -print(results) +results = searcher.search("programming languages", top_k=2, recompute_beighbor_embeddings=True,complexity=2) +print("LEANN Search results: ", results) +# 3. Chat with LEANN +chat = LeannChat(index_path="knowledge.leann", llm_config={"type": "ollama", "model": "llama3.2:1b"}) +response = chat.ask( + "Compare the two retrieved programming languages and say which one is more popular today. Respond in a single well-formed sentence.", + top_k=2, + recompute_beighbor_embeddings=True, +) +print("LEANN Chat response: ", response) ``` **That's it.** No cloud setup, no API keys, no "fine-tuning". Just your data, your questions, your laptop. diff --git a/demo.ipynb b/demo.ipynb index 8ade403..06ef0f0 100644 --- a/demo.ipynb +++ b/demo.ipynb @@ -4,7 +4,277 @@ "cell_type": "code", "execution_count": null, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "INFO: Computing embeddings for 1 texts using SentenceTransformer, model: 'facebook/contriever-msmarco'\n", + "INFO: Using cached model: facebook/contriever-msmarco\n", + "INFO: Starting embedding computation...\n", + "INFO: Generated 1 embeddings, dimension: 768\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Writing passages: 100%|██████████| 5/5 [00:00<00:00, 11715.93chunk/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "INFO: Computing embeddings for 5 texts using SentenceTransformer, model: 'facebook/contriever-msmarco'\n", + "INFO: Using cached model: facebook/contriever-msmarco\n", + "INFO: Starting embedding computation...\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Batches: 100%|██████████| 1/1 [00:00<00:00, 76.09it/s]\n", + "WARNING:leann_backend_hnsw.hnsw_backend:Converting data to float32, shape: (5, 768)\n", + "INFO:leann_backend_hnsw.hnsw_backend:INFO: Converting HNSW index to CSR-pruned format...\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "INFO: Generated 5 embeddings, dimension: 768\n", + "M: 64 for level: 0\n", + "Starting conversion: knowledge.index -> knowledge.csr.tmp\n", + "[0.00s] Reading Index HNSW header...\n", + "[0.00s] Header read: d=768, ntotal=5\n", + "[0.00s] Reading HNSW struct vectors...\n", + " Reading vector (dtype=, fmt='d')... Count=6, Bytes=48\n", + "[0.00s] Read assign_probas (6)\n", + " Reading vector (dtype=, fmt='i')... Count=7, Bytes=28\n", + "[0.18s] Read cum_nneighbor_per_level (7)\n", + " Reading vector (dtype=, fmt='i')... Count=5, Bytes=20\n", + "[0.29s] Read levels (5)\n", + "[0.40s] Probing for compact storage flag...\n", + "[0.40s] Found compact flag: False\n", + "[0.40s] Compact flag is False, reading original format...\n", + "[0.40s] Probing for potential extra byte before non-compact offsets...\n", + "[0.40s] Found and consumed an unexpected 0x00 byte.\n", + " Reading vector (dtype=, fmt='Q')... Count=6, Bytes=48\n", + "[0.40s] Read offsets (6)\n", + "[0.50s] Attempting to read neighbors vector...\n", + " Reading vector (dtype=, fmt='i')... Count=320, Bytes=1280\n", + "[0.50s] Read neighbors (320)\n", + "[0.60s] Read scalar params (ep=4, max_lvl=0)\n", + "[0.60s] Checking for storage data...\n", + "[0.60s] Found storage fourcc: 49467849.\n", + "[0.60s] Converting to CSR format...\n", + "[0.60s] Conversion loop finished. \n", + "[0.60s] Running validation checks...\n", + " Checking total valid neighbor count...\n", + " OK: Total valid neighbors = 20\n", + " Checking final pointer indices...\n", + " OK: Final pointers match data size.\n", + "[0.60s] Deleting original neighbors and offsets arrays...\n", + " CSR Stats: |data|=20, |level_ptr|=10\n", + "[0.69s] Writing CSR HNSW graph data in FAISS-compatible order...\n", + " Pruning embeddings: Writing NULL storage marker.\n", + "[0.79s] Conversion complete.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:leann_backend_hnsw.hnsw_backend:✅ CSR conversion successful.\n", + "INFO:leann_backend_hnsw.hnsw_backend:INFO: Replaced original index with CSR-pruned version at 'knowledge.index'\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[read_HNSW - CSR NL v4] Reading metadata & CSR indices (manual offset)...\n", + "[read_HNSW NL v4] Read levels vector, size: 5\n", + "INFO: Terminating server process (PID: 25224) for backend leann_backend_hnsw.hnsw_embedding_server...\n", + "[read_HNSW NL v4] Reading Compact Storage format indices...\n", + "[read_HNSW NL v4] Read compact_level_ptr, size: 10\n", + "[read_HNSW NL v4] Read compact_node_offsets, size: 6\n", + "[read_HNSW NL v4] Read entry_point: 4, max_level: 0\n", + "[read_HNSW NL v4] Read storage fourcc: 0x6c6c756e\n", + "[read_HNSW NL v4 FIX] Detected FileIOReader. Neighbors size field offset: 326\n", + "[read_HNSW NL v4] Reading neighbors data into memory.\n", + "[read_HNSW NL v4] Read neighbors data, size: 20\n", + "[read_HNSW NL v4] Finished reading metadata and CSR indices.\n", + "INFO: Skipping external storage loading, since is_recompute is true.\n", + "INFO: Server process 25224 terminated.\n", + "🔍 DEBUG LeannSearcher.search() called:\n", + " Query: 'programming languages'\n", + " Top_k: 2\n", + " Additional kwargs: {'recompute_beighbor_embeddings': True}\n", + "INFO: Starting embedding server on port 5557...\n", + "INFO: Command: /Users/yichuan/Desktop/code/LEANN/leann/.venv/bin/python -m leann_backend_hnsw.hnsw_embedding_server --zmq-port 5557 --model-name facebook/contriever-msmarco --passages-file knowledge.leann.meta.json\n", + "INFO: Server process started with PID: 25336\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\n", + "To disable this warning, you can either:\n", + "\t- Avoid using `tokenizers` before the fork if possible\n", + "\t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "✅ Embedding server is ready!\n", + "[leann_backend_hnsw.hnsw_embedding_server LOG]: INFO: Registering backend 'diskann'\n", + "DEBUG: Found process on port 5557: /Users/yichuan/Desktop/code/LEANN/leann/.venv/bin/python -m leann_backend_hnsw.hnsw_embedding_server --zmq-port 5557 --model-name facebook/contriever-msmarco --passages-file knowledge.leann.meta.json\n", + "[leann_backend_hnsw.hnsw_embedding_server LOG]: INFO: Registering backend 'hnsw'\n", + "DEBUG: model_matches: True, passages_matches: True, overall: True\n", + "✅ Existing server process (PID 25336) is compatible\n", + "[leann_backend_hnsw.hnsw_embedding_server LOG]: Starting HNSW server on port 5557 with model facebook/contriever-msmarco\n", + "[leann_backend_hnsw.hnsw_embedding_server LOG]: Using embedding mode: sentence-transformers\n", + "[leann_backend_hnsw.hnsw_embedding_server LOG]: Successfully imported unified embedding computation module\n", + "[leann_backend_hnsw.hnsw_embedding_server LOG]: Loaded PassageManager with 5 passages from metadata\n", + "[leann_backend_hnsw.hnsw_embedding_server LOG]: HNSW ZMQ server listening on port 5557\n", + "[leann_backend_hnsw.hnsw_embedding_server LOG]: Started HNSW ZMQ server thread on port 5557\n", + "[leann_backend_hnsw.hnsw_embedding_server LOG]: Received ZMQ request of size 23 bytes\n", + "[leann_backend_hnsw.hnsw_embedding_server LOG]: INFO:__main__:Processing direct text embedding request for 1 texts in sentence-transformers mode\n", + "[leann_backend_hnsw.hnsw_embedding_server LOG]: INFO: Computing embeddings for 1 texts using SentenceTransformer, model: 'facebook/contriever-msmarco'\n", + "[leann_backend_hnsw.hnsw_embedding_server LOG]: INFO: Loading and caching SentenceTransformer model: facebook/contriever-msmarco\n", + "[leann_backend_hnsw.hnsw_embedding_server LOG]: INFO:datasets:PyTorch version 2.7.1 available.\n", + "[leann_backend_hnsw.hnsw_embedding_server LOG]: INFO: Using device: mps\n", + "[leann_backend_hnsw.hnsw_embedding_server LOG]: INFO:sentence_transformers.SentenceTransformer:Load pretrained SentenceTransformer: facebook/contriever-msmarco\n", + "[leann_backend_hnsw.hnsw_embedding_server LOG]: WARNING:sentence_transformers.SentenceTransformer:No sentence-transformers model found with name facebook/contriever-msmarco. Creating a new one with mean pooling.\n", + "[leann_backend_hnsw.hnsw_embedding_server LOG]: ✅ Model loaded successfully! (local + optimized)\n", + "[leann_backend_hnsw.hnsw_embedding_server LOG]: ✅ Using FP16 precision and compile optimization: facebook/contriever-msmarco\n", + "[leann_backend_hnsw.hnsw_embedding_server LOG]: ✅ Model cached: sentence_transformers_facebook/contriever-msmarco_mps_True\n", + "[leann_backend_hnsw.hnsw_embedding_server LOG]: INFO: Starting embedding computation...\n", + "[leann_backend_hnsw.hnsw_embedding_server LOG]: INFO: Generated 1 embeddings, dimension: 768\n", + "[leann_backend_hnsw.hnsw_embedding_server LOG]: INFO:__main__:⏱️ Text embedding E2E time: 2.707787s\n", + " Generated embedding shape: (1, 768)\n", + " Embedding time: 3.7433197498321533 seconds\n", + "ZmqDistanceComputer initialized: d=768, metric=0\n", + "[leann_backend_hnsw.hnsw_embedding_server LOG]: Received ZMQ request of size 3 bytes\n", + "[leann_backend_hnsw.hnsw_embedding_server LOG]: Request for 1 node embeddings\n", + "[leann_backend_hnsw.hnsw_embedding_server LOG]: INFO: Computing embeddings for 1 texts using SentenceTransformer, model: 'facebook/contriever-msmarco'\n", + "[leann_backend_hnsw.hnsw_embedding_server LOG]: INFO: Using cached model: facebook/contriever-msmarco\n", + "[leann_backend_hnsw.hnsw_embedding_server LOG]: INFO: Starting embedding computation...\n", + "[leann_backend_hnsw.hnsw_embedding_server LOG]: INFO: Generated 1 embeddings, dimension: 768\n", + "[leann_backend_hnsw.hnsw_embedding_server LOG]: INFO: Computed embeddings for 1 texts, shape: (1, 768)\n", + "[leann_backend_hnsw.hnsw_embedding_server LOG]: INFO:__main__:⏱️ ZMQ E2E time: 0.048147s\n", + "[leann_backend_hnsw.hnsw_embedding_server LOG]: Received ZMQ request of size 3849 bytes\n", + "[leann_backend_hnsw.hnsw_embedding_server LOG]: Node IDs: [0, 1, 2, 3]\n", + "[leann_backend_hnsw.hnsw_embedding_server LOG]: Query vector dim: 768\n", + "[leann_backend_hnsw.hnsw_embedding_server LOG]: INFO: Computing embeddings for 4 texts using SentenceTransformer, model: 'facebook/contriever-msmarco'\n", + "[leann_backend_hnsw.hnsw_embedding_server LOG]: INFO: Using cached model: facebook/contriever-msmarco\n", + "[leann_backend_hnsw.hnsw_embedding_server LOG]: INFO: Starting embedding computation...\n", + "[leann_backend_hnsw.hnsw_embedding_server LOG]: INFO: Generated 4 embeddings, dimension: 768\n", + "[leann_backend_hnsw.hnsw_embedding_server LOG]: INFO: Computed embeddings for 4 texts, shape: (4, 768)\n", + "[leann_backend_hnsw.hnsw_embedding_server LOG]: Sending distance response with 4 distances\n", + "[leann_backend_hnsw.hnsw_embedding_server LOG]: ⏱️ Distance calculation E2E time: 0.081424s\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:leann.chat:Attempting to create LLM of type='ollama' with model='llama3.2:1b'\n", + "INFO:leann.chat:Initializing OllamaChat with model='llama3.2:1b' and host='http://localhost:11434'\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + " Search time: 0.13153481483459473 seconds\n", + " Backend returned: labels=2 results\n", + " Processing 2 passage IDs:\n", + " 1. passage_id='1' -> SUCCESS: Python is a powerful programming language and it is very popular...\n", + " 2. passage_id='0' -> SUCCESS: C# is a powerful programming language but it is not very popular...\n", + " Final enriched results: 2 passages\n", + "LEANN Search results: [SearchResult(id='1', score=np.float32(1.394647), text='Python is a powerful programming language and it is very popular', metadata={}), SearchResult(id='0', score=np.float32(1.3831015), text='C# is a powerful programming language but it is not very popular', metadata={})]\n", + "[read_HNSW - CSR NL v4] Reading metadata & CSR indices (manual offset)...\n", + "[read_HNSW NL v4] Read levels vector, size: 5\n", + "[read_HNSW NL v4] Reading Compact Storage format indices...\n", + "[read_HNSW NL v4] Read compact_level_ptr, size: 10\n", + "[read_HNSW NL v4] Read compact_node_offsets, size: 6\n", + "[read_HNSW NL v4] Read entry_point: 4, max_level: 0\n", + "[read_HNSW NL v4] Read storage fourcc: 0x6c6c756e\n", + "[read_HNSW NL v4 FIX] Detected FileIOReader. Neighbors size field offset: 326\n", + "[read_HNSW NL v4] Reading neighbors data into memory.\n", + "[read_HNSW NL v4] Read neighbors data, size: 20\n", + "[read_HNSW NL v4] Finished reading metadata and CSR indices.\n", + "INFO: Skipping external storage loading, since is_recompute is true.\n", + "🔍 DEBUG LeannSearcher.search() called:\n", + " Query: 'Compare the two retrieved programming languages and say which one is more popular today. Respond in a single well-formed sentence.'\n", + " Top_k: 2\n", + " Additional kwargs: {'recompute_beighbor_embeddings': True}\n", + "DEBUG: Found process on port 5557: /Users/yichuan/Desktop/code/LEANN/leann/.venv/bin/python -m leann_backend_hnsw.hnsw_embedding_server --zmq-port 5557 --model-name facebook/contriever-msmarco --passages-file knowledge.leann.meta.json\n", + "DEBUG: model_matches: True, passages_matches: True, overall: True\n", + "✅ Found compatible server on port 5557\n", + "✅ Using existing compatible server on port 5557\n", + "DEBUG: Found process on port 5557: /Users/yichuan/Desktop/code/LEANN/leann/.venv/bin/python -m leann_backend_hnsw.hnsw_embedding_server --zmq-port 5557 --model-name facebook/contriever-msmarco --passages-file knowledge.leann.meta.json\n", + "DEBUG: model_matches: True, passages_matches: True, overall: True\n", + "✅ Found compatible server on port 5557\n", + "✅ Using existing compatible server on port 5557\n", + "[leann_backend_hnsw.hnsw_embedding_server LOG]: Received ZMQ request of size 133 bytes\n", + "[leann_backend_hnsw.hnsw_embedding_server LOG]: INFO:__main__:Processing direct text embedding request for 1 texts in sentence-transformers mode\n", + "[leann_backend_hnsw.hnsw_embedding_server LOG]: INFO: Computing embeddings for 1 texts using SentenceTransformer, model: 'facebook/contriever-msmarco'\n", + "[leann_backend_hnsw.hnsw_embedding_server LOG]: INFO: Using cached model: facebook/contriever-msmarco\n", + "[leann_backend_hnsw.hnsw_embedding_server LOG]: INFO: Starting embedding computation...\n", + "[leann_backend_hnsw.hnsw_embedding_server LOG]: INFO: Generated 1 embeddings, dimension: 768\n", + "[leann_backend_hnsw.hnsw_embedding_server LOG]: INFO:__main__:⏱️ Text embedding E2E time: 0.049667s\n", + " Generated embedding shape: (1, 768)\n", + " Embedding time: 0.06644177436828613 seconds\n", + "ZmqDistanceComputer initialized: d=768, metric=0\n", + "[leann_backend_hnsw.hnsw_embedding_server LOG]: Received ZMQ request of size 3 bytes\n", + "[leann_backend_hnsw.hnsw_embedding_server LOG]: Request for 1 node embeddings\n", + "[leann_backend_hnsw.hnsw_embedding_server LOG]: INFO: Computing embeddings for 1 texts using SentenceTransformer, model: 'facebook/contriever-msmarco'\n", + "[leann_backend_hnsw.hnsw_embedding_server LOG]: INFO: Using cached model: facebook/contriever-msmarco\n", + "[leann_backend_hnsw.hnsw_embedding_server LOG]: INFO: Starting embedding computation...\n", + "[leann_backend_hnsw.hnsw_embedding_server LOG]: INFO: Generated 1 embeddings, dimension: 768\n", + "[leann_backend_hnsw.hnsw_embedding_server LOG]: INFO: Computed embeddings for 1 texts, shape: (1, 768)\n", + "[leann_backend_hnsw.hnsw_embedding_server LOG]: INFO:__main__:⏱️ ZMQ E2E time: 0.014071s\n", + "[leann_backend_hnsw.hnsw_embedding_server LOG]: Received ZMQ request of size 3849 bytes\n", + "[leann_backend_hnsw.hnsw_embedding_server LOG]: Node IDs: [0, 1, 2, 3]\n", + "[leann_backend_hnsw.hnsw_embedding_server LOG]: Query vector dim: 768\n", + "[leann_backend_hnsw.hnsw_embedding_server LOG]: INFO: Computing embeddings for 4 texts using SentenceTransformer, model: 'facebook/contriever-msmarco'\n", + "[leann_backend_hnsw.hnsw_embedding_server LOG]: INFO: Using cached model: facebook/contriever-msmarco\n", + "[leann_backend_hnsw.hnsw_embedding_server LOG]: INFO: Starting embedding computation...\n", + "[leann_backend_hnsw.hnsw_embedding_server LOG]: INFO: Generated 4 embeddings, dimension: 768\n", + "[leann_backend_hnsw.hnsw_embedding_server LOG]: INFO: Computed embeddings for 4 texts, shape: (4, 768)\n", + " Search time: 0.041433095932006836 seconds\n", + " Backend returned: labels=2 results\n", + " Processing 2 passage IDs:\n", + "[leann_backend_hnsw.hnsw_embedding_server LOG]: Sending distance response with 4 distances\n", + " 1. passage_id='1' -> SUCCESS: Python is a powerful programming language and it is very popular...[leann_backend_hnsw.hnsw_embedding_server LOG]: ⏱️ Distance calculation E2E time: 0.025329s\n", + "\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:leann.chat:Sending request to Ollama: {'model': 'llama3.2:1b', 'prompt': 'Here is some retrieved context that might help answer your question:\\n\\nPython is a powerful programming language and it is very popular\\n\\nC# is a powerful programming language but it is not very popular\\n\\nQuestion: Compare the two retrieved programming languages and say which one is more popular today. Respond in a single well-formed sentence.\\n\\nPlease provide the best answer you can based on this context and your knowledge.', 'stream': False, 'options': {}}\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + " 2. passage_id='0' -> SUCCESS: C# is a powerful programming language but it is not very popular...\n", + " Final enriched results: 2 passages\n", + "LEANN Chat response: Based on general trends and metrics, Python is currently more popular than C#, particularly among developers and in specific industries such as data science, artificial intelligence, and web development.\n" + ] + } + ], "source": [ "from leann.api import LeannBuilder, LeannSearcher, LeannChat\n", "# 1. Build index (no embeddings stored!)\n", @@ -17,19 +287,16 @@ "builder.build_index(\"knowledge.leann\")\n", "# 2. Search with real-time embeddings\n", "searcher = LeannSearcher(\"knowledge.leann\")\n", - "results = searcher.search(\"programming languages\", top_k=2, recompute_beighbor_embeddings=True)\n", - "print(results)\n", - "\n", - "llm_config = {\"type\": \"ollama\", \"model\": \"qwen3:8b\"}\n", - "\n", - "chat = LeannChat(index_path=\"knowledge.leann\", llm_config=llm_config)\n", - "\n", + "results = searcher.search(\"programming languages\", top_k=2, recompute_beighbor_embeddings=True,complexity=2)\n", + "print(\"LEANN Search results: \", results)\n", + "# 3. Chat with LEANN\n", + "chat = LeannChat(index_path=\"knowledge.leann\", llm_config={\"type\": \"ollama\", \"model\": \"llama3.2:1b\"})\n", "response = chat.ask(\n", " \"Compare the two retrieved programming languages and say which one is more popular today. Respond in a single well-formed sentence.\",\n", " top_k=2,\n", " recompute_beighbor_embeddings=True,\n", ")\n", - "print(response)" + "print(\"LEANN Chat response: \", response)" ] } ],