diff --git a/README.md b/README.md index a755689..fbcdbbe 100755 --- a/README.md +++ b/README.md @@ -104,7 +104,7 @@ Just 3 lines of code. Our declarative API makes RAG as easy as writing a config from leann.api import LeannBuilder, LeannSearcher, LeannChat # 1. Build index (no embeddings stored!) builder = LeannBuilder(backend_name="hnsw") -builder.add_text("C# is a powerful programming language but it is not very popular") +builder.add_text("C# is a powerful programming language ") builder.add_text("Python is a powerful programming language and it is very popular") builder.add_text("Machine learning transforms industries") builder.add_text("Neural networks process complex data") @@ -113,15 +113,13 @@ builder.build_index("knowledge.leann") # 2. Search with real-time embeddings searcher = LeannSearcher("knowledge.leann") results = searcher.search("programming languages", top_k=2, recompute_beighbor_embeddings=True,complexity=2) -print("LEANN Search results: ", results) # 3. Chat with LEANN chat = LeannChat(index_path="knowledge.leann", llm_config={"type": "ollama", "model": "llama3.2:1b"}) response = chat.ask( - "Compare the two retrieved programming languages and say which one is more popular today. Respond in a single well-formed sentence.", + "Compare the two retrieved programming languages and say which one is more popular today.", top_k=2, recompute_beighbor_embeddings=True, ) -print("LEANN Chat response: ", response) ``` **That's it.** No cloud setup, no API keys, no "fine-tuning". Just your data, your questions, your laptop. diff --git a/demo.ipynb b/demo.ipynb index 06ef0f0..e8c689e 100644 --- a/demo.ipynb +++ b/demo.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": null, + "execution_count": 7, "metadata": {}, "outputs": [ { @@ -19,7 +19,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "Writing passages: 100%|██████████| 5/5 [00:00<00:00, 11715.93chunk/s]\n" + "Writing passages: 100%|██████████| 5/5 [00:00<00:00, 11008.67chunk/s]\n" ] }, { @@ -35,7 +35,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "Batches: 100%|██████████| 1/1 [00:00<00:00, 76.09it/s]\n", + "Batches: 100%|██████████| 1/1 [00:00<00:00, 28.75it/s]\n", "WARNING:leann_backend_hnsw.hnsw_backend:Converting data to float32, shape: (5, 768)\n", "INFO:leann_backend_hnsw.hnsw_backend:INFO: Converting HNSW index to CSR-pruned format...\n" ] @@ -53,34 +53,34 @@ " Reading vector (dtype=, fmt='d')... Count=6, Bytes=48\n", "[0.00s] Read assign_probas (6)\n", " Reading vector (dtype=, fmt='i')... Count=7, Bytes=28\n", - "[0.18s] Read cum_nneighbor_per_level (7)\n", + "[0.14s] Read cum_nneighbor_per_level (7)\n", " Reading vector (dtype=, fmt='i')... Count=5, Bytes=20\n", - "[0.29s] Read levels (5)\n", - "[0.40s] Probing for compact storage flag...\n", - "[0.40s] Found compact flag: False\n", - "[0.40s] Compact flag is False, reading original format...\n", - "[0.40s] Probing for potential extra byte before non-compact offsets...\n", - "[0.40s] Found and consumed an unexpected 0x00 byte.\n", + "[0.25s] Read levels (5)\n", + "[0.35s] Probing for compact storage flag...\n", + "[0.35s] Found compact flag: False\n", + "[0.35s] Compact flag is False, reading original format...\n", + "[0.35s] Probing for potential extra byte before non-compact offsets...\n", + "[0.35s] Found and consumed an unexpected 0x00 byte.\n", " Reading vector (dtype=, fmt='Q')... Count=6, Bytes=48\n", - "[0.40s] Read offsets (6)\n", - "[0.50s] Attempting to read neighbors vector...\n", + "[0.35s] Read offsets (6)\n", + "[0.45s] Attempting to read neighbors vector...\n", " Reading vector (dtype=, fmt='i')... Count=320, Bytes=1280\n", - "[0.50s] Read neighbors (320)\n", - "[0.60s] Read scalar params (ep=4, max_lvl=0)\n", - "[0.60s] Checking for storage data...\n", - "[0.60s] Found storage fourcc: 49467849.\n", - "[0.60s] Converting to CSR format...\n", - "[0.60s] Conversion loop finished. \n", - "[0.60s] Running validation checks...\n", + "[0.45s] Read neighbors (320)\n", + "[0.55s] Read scalar params (ep=4, max_lvl=0)\n", + "[0.55s] Checking for storage data...\n", + "[0.55s] Found storage fourcc: 49467849.\n", + "[0.55s] Converting to CSR format...\n", + "[0.55s] Conversion loop finished. \n", + "[0.55s] Running validation checks...\n", " Checking total valid neighbor count...\n", " OK: Total valid neighbors = 20\n", " Checking final pointer indices...\n", " OK: Final pointers match data size.\n", - "[0.60s] Deleting original neighbors and offsets arrays...\n", + "[0.55s] Deleting original neighbors and offsets arrays...\n", " CSR Stats: |data|=20, |level_ptr|=10\n", - "[0.69s] Writing CSR HNSW graph data in FAISS-compatible order...\n", + "[0.65s] Writing CSR HNSW graph data in FAISS-compatible order...\n", " Pruning embeddings: Writing NULL storage marker.\n", - "[0.79s] Conversion complete.\n" + "[0.74s] Conversion complete.\n" ] }, { @@ -96,8 +96,8 @@ "output_type": "stream", "text": [ "[read_HNSW - CSR NL v4] Reading metadata & CSR indices (manual offset)...\n", + "INFO: Terminating server process (PID: 25336) for backend leann_backend_hnsw.hnsw_embedding_server...\n", "[read_HNSW NL v4] Read levels vector, size: 5\n", - "INFO: Terminating server process (PID: 25224) for backend leann_backend_hnsw.hnsw_embedding_server...\n", "[read_HNSW NL v4] Reading Compact Storage format indices...\n", "[read_HNSW NL v4] Read compact_level_ptr, size: 10\n", "[read_HNSW NL v4] Read compact_node_offsets, size: 6\n", @@ -108,14 +108,14 @@ "[read_HNSW NL v4] Read neighbors data, size: 20\n", "[read_HNSW NL v4] Finished reading metadata and CSR indices.\n", "INFO: Skipping external storage loading, since is_recompute is true.\n", - "INFO: Server process 25224 terminated.\n", + "INFO: Server process 25336 terminated.\n", "🔍 DEBUG LeannSearcher.search() called:\n", " Query: 'programming languages'\n", " Top_k: 2\n", " Additional kwargs: {'recompute_beighbor_embeddings': True}\n", "INFO: Starting embedding server on port 5557...\n", "INFO: Command: /Users/yichuan/Desktop/code/LEANN/leann/.venv/bin/python -m leann_backend_hnsw.hnsw_embedding_server --zmq-port 5557 --model-name facebook/contriever-msmarco --passages-file knowledge.leann.meta.json\n", - "INFO: Server process started with PID: 25336\n" + "INFO: Server process started with PID: 26499\n" ] }, { @@ -134,11 +134,11 @@ "text": [ "✅ Embedding server is ready!\n", "[leann_backend_hnsw.hnsw_embedding_server LOG]: INFO: Registering backend 'diskann'\n", - "DEBUG: Found process on port 5557: /Users/yichuan/Desktop/code/LEANN/leann/.venv/bin/python -m leann_backend_hnsw.hnsw_embedding_server --zmq-port 5557 --model-name facebook/contriever-msmarco --passages-file knowledge.leann.meta.json\n", - "[leann_backend_hnsw.hnsw_embedding_server LOG]: INFO: Registering backend 'hnsw'\n", - "DEBUG: model_matches: True, passages_matches: True, overall: True\n", - "✅ Existing server process (PID 25336) is compatible\n", + "DEBUG: Found process on port 5557: /Users/yichuan/Desktop/code/LEANN/leann/.venv/bin/python -m leann_backend_hnsw.hnsw_embedding_server --zmq-port 5557 --model-name facebook/contriever-msmarco --passages-file knowledge.leann.meta.json[leann_backend_hnsw.hnsw_embedding_server LOG]: INFO: Registering backend 'hnsw'\n", + "\n", "[leann_backend_hnsw.hnsw_embedding_server LOG]: Starting HNSW server on port 5557 with model facebook/contriever-msmarco\n", + "DEBUG: model_matches: True, passages_matches: True, overall: True\n", + "✅ Existing server process (PID 26499) is compatible\n", "[leann_backend_hnsw.hnsw_embedding_server LOG]: Using embedding mode: sentence-transformers\n", "[leann_backend_hnsw.hnsw_embedding_server LOG]: Successfully imported unified embedding computation module\n", "[leann_backend_hnsw.hnsw_embedding_server LOG]: Loaded PassageManager with 5 passages from metadata\n", @@ -157,9 +157,9 @@ "[leann_backend_hnsw.hnsw_embedding_server LOG]: ✅ Model cached: sentence_transformers_facebook/contriever-msmarco_mps_True\n", "[leann_backend_hnsw.hnsw_embedding_server LOG]: INFO: Starting embedding computation...\n", "[leann_backend_hnsw.hnsw_embedding_server LOG]: INFO: Generated 1 embeddings, dimension: 768\n", - "[leann_backend_hnsw.hnsw_embedding_server LOG]: INFO:__main__:⏱️ Text embedding E2E time: 2.707787s\n", + "[leann_backend_hnsw.hnsw_embedding_server LOG]: INFO:__main__:⏱️ Text embedding E2E time: 3.429944s\n", " Generated embedding shape: (1, 768)\n", - " Embedding time: 3.7433197498321533 seconds\n", + " Embedding time: 4.473694801330566 seconds\n", "ZmqDistanceComputer initialized: d=768, metric=0\n", "[leann_backend_hnsw.hnsw_embedding_server LOG]: Received ZMQ request of size 3 bytes\n", "[leann_backend_hnsw.hnsw_embedding_server LOG]: Request for 1 node embeddings\n", @@ -168,7 +168,7 @@ "[leann_backend_hnsw.hnsw_embedding_server LOG]: INFO: Starting embedding computation...\n", "[leann_backend_hnsw.hnsw_embedding_server LOG]: INFO: Generated 1 embeddings, dimension: 768\n", "[leann_backend_hnsw.hnsw_embedding_server LOG]: INFO: Computed embeddings for 1 texts, shape: (1, 768)\n", - "[leann_backend_hnsw.hnsw_embedding_server LOG]: INFO:__main__:⏱️ ZMQ E2E time: 0.048147s\n", + "[leann_backend_hnsw.hnsw_embedding_server LOG]: INFO:__main__:⏱️ ZMQ E2E time: 0.049782s\n", "[leann_backend_hnsw.hnsw_embedding_server LOG]: Received ZMQ request of size 3849 bytes\n", "[leann_backend_hnsw.hnsw_embedding_server LOG]: Node IDs: [0, 1, 2, 3]\n", "[leann_backend_hnsw.hnsw_embedding_server LOG]: Query vector dim: 768\n", @@ -178,7 +178,10 @@ "[leann_backend_hnsw.hnsw_embedding_server LOG]: INFO: Generated 4 embeddings, dimension: 768\n", "[leann_backend_hnsw.hnsw_embedding_server LOG]: INFO: Computed embeddings for 4 texts, shape: (4, 768)\n", "[leann_backend_hnsw.hnsw_embedding_server LOG]: Sending distance response with 4 distances\n", - "[leann_backend_hnsw.hnsw_embedding_server LOG]: ⏱️ Distance calculation E2E time: 0.081424s\n" + " Search time: 0.21038389205932617 seconds\n", + " Backend returned: labels=2 results\n", + " Processing 2 passage IDs:\n", + "[leann_backend_hnsw.hnsw_embedding_server LOG]: ⏱️ Distance calculation E2E time: 0.158250s\n" ] }, { @@ -193,13 +196,10 @@ "name": "stdout", "output_type": "stream", "text": [ - " Search time: 0.13153481483459473 seconds\n", - " Backend returned: labels=2 results\n", - " Processing 2 passage IDs:\n", - " 1. passage_id='1' -> SUCCESS: Python is a powerful programming language and it is very popular...\n", - " 2. passage_id='0' -> SUCCESS: C# is a powerful programming language but it is not very popular...\n", + " 1. passage_id='0' -> SUCCESS: C# is a powerful programming language...\n", + " 2. passage_id='1' -> SUCCESS: Python is a powerful programming language and it is very popular...\n", " Final enriched results: 2 passages\n", - "LEANN Search results: [SearchResult(id='1', score=np.float32(1.394647), text='Python is a powerful programming language and it is very popular', metadata={}), SearchResult(id='0', score=np.float32(1.3831015), text='C# is a powerful programming language but it is not very popular', metadata={})]\n", + "LEANN Search results: [SearchResult(id='0', score=np.float32(1.444752), text='C# is a powerful programming language', metadata={}), SearchResult(id='1', score=np.float32(1.394647), text='Python is a powerful programming language and it is very popular', metadata={})]\n", "[read_HNSW - CSR NL v4] Reading metadata & CSR indices (manual offset)...\n", "[read_HNSW NL v4] Read levels vector, size: 5\n", "[read_HNSW NL v4] Reading Compact Storage format indices...\n", @@ -230,9 +230,9 @@ "[leann_backend_hnsw.hnsw_embedding_server LOG]: INFO: Using cached model: facebook/contriever-msmarco\n", "[leann_backend_hnsw.hnsw_embedding_server LOG]: INFO: Starting embedding computation...\n", "[leann_backend_hnsw.hnsw_embedding_server LOG]: INFO: Generated 1 embeddings, dimension: 768\n", - "[leann_backend_hnsw.hnsw_embedding_server LOG]: INFO:__main__:⏱️ Text embedding E2E time: 0.049667s\n", + "[leann_backend_hnsw.hnsw_embedding_server LOG]: INFO:__main__:⏱️ Text embedding E2E time: 0.046458s\n", " Generated embedding shape: (1, 768)\n", - " Embedding time: 0.06644177436828613 seconds\n", + " Embedding time: 0.06156015396118164 seconds\n", "ZmqDistanceComputer initialized: d=768, metric=0\n", "[leann_backend_hnsw.hnsw_embedding_server LOG]: Received ZMQ request of size 3 bytes\n", "[leann_backend_hnsw.hnsw_embedding_server LOG]: Request for 1 node embeddings\n", @@ -241,7 +241,7 @@ "[leann_backend_hnsw.hnsw_embedding_server LOG]: INFO: Starting embedding computation...\n", "[leann_backend_hnsw.hnsw_embedding_server LOG]: INFO: Generated 1 embeddings, dimension: 768\n", "[leann_backend_hnsw.hnsw_embedding_server LOG]: INFO: Computed embeddings for 1 texts, shape: (1, 768)\n", - "[leann_backend_hnsw.hnsw_embedding_server LOG]: INFO:__main__:⏱️ ZMQ E2E time: 0.014071s\n", + "[leann_backend_hnsw.hnsw_embedding_server LOG]: INFO:__main__:⏱️ ZMQ E2E time: 0.011214s\n", "[leann_backend_hnsw.hnsw_embedding_server LOG]: Received ZMQ request of size 3849 bytes\n", "[leann_backend_hnsw.hnsw_embedding_server LOG]: Node IDs: [0, 1, 2, 3]\n", "[leann_backend_hnsw.hnsw_embedding_server LOG]: Query vector dim: 768\n", @@ -250,28 +250,28 @@ "[leann_backend_hnsw.hnsw_embedding_server LOG]: INFO: Starting embedding computation...\n", "[leann_backend_hnsw.hnsw_embedding_server LOG]: INFO: Generated 4 embeddings, dimension: 768\n", "[leann_backend_hnsw.hnsw_embedding_server LOG]: INFO: Computed embeddings for 4 texts, shape: (4, 768)\n", - " Search time: 0.041433095932006836 seconds\n", + " Search time: 0.027656078338623047 seconds\n", " Backend returned: labels=2 results\n", " Processing 2 passage IDs:\n", "[leann_backend_hnsw.hnsw_embedding_server LOG]: Sending distance response with 4 distances\n", - " 1. passage_id='1' -> SUCCESS: Python is a powerful programming language and it is very popular...[leann_backend_hnsw.hnsw_embedding_server LOG]: ⏱️ Distance calculation E2E time: 0.025329s\n", - "\n" + " 1. passage_id='1' -> SUCCESS: Python is a powerful programming language and it is very popular...\n", + "[leann_backend_hnsw.hnsw_embedding_server LOG]: ⏱️ Distance calculation E2E time: 0.014145s\n", + " 2. passage_id='0' -> SUCCESS: C# is a powerful programming language...\n", + " Final enriched results: 2 passages\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "INFO:leann.chat:Sending request to Ollama: {'model': 'llama3.2:1b', 'prompt': 'Here is some retrieved context that might help answer your question:\\n\\nPython is a powerful programming language and it is very popular\\n\\nC# is a powerful programming language but it is not very popular\\n\\nQuestion: Compare the two retrieved programming languages and say which one is more popular today. Respond in a single well-formed sentence.\\n\\nPlease provide the best answer you can based on this context and your knowledge.', 'stream': False, 'options': {}}\n" + "INFO:leann.chat:Sending request to Ollama: {'model': 'llama3.2:1b', 'prompt': 'Here is some retrieved context that might help answer your question:\\n\\nPython is a powerful programming language and it is very popular\\n\\nC# is a powerful programming language\\n\\nQuestion: Compare the two retrieved programming languages and say which one is more popular today. Respond in a single well-formed sentence.\\n\\nPlease provide the best answer you can based on this context and your knowledge.', 'stream': False, 'options': {}}\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - " 2. passage_id='0' -> SUCCESS: C# is a powerful programming language but it is not very popular...\n", - " Final enriched results: 2 passages\n", - "LEANN Chat response: Based on general trends and metrics, Python is currently more popular than C#, particularly among developers and in specific industries such as data science, artificial intelligence, and web development.\n" + "LEANN Chat response: Python and C# are both highly popular programming languages, but Python's widespread adoption and influence in various industries such as data science, machine learning, and web development have earned it the title of more popular today among developers and users alike.\n" ] } ], @@ -279,7 +279,7 @@ "from leann.api import LeannBuilder, LeannSearcher, LeannChat\n", "# 1. Build index (no embeddings stored!)\n", "builder = LeannBuilder(backend_name=\"hnsw\")\n", - "builder.add_text(\"C# is a powerful programming language but it is not very popular\")\n", + "builder.add_text(\"C# is a powerful programming language\")\n", "builder.add_text(\"Python is a powerful programming language and it is very popular\")\n", "builder.add_text(\"Machine learning transforms industries\") \n", "builder.add_text(\"Neural networks process complex data\")\n",