From c1bc2603a24e4c9ef5ca55b6cf5853ec0ef2d0d3 Mon Sep 17 00:00:00 2001 From: yichuan520030910320 Date: Tue, 15 Jul 2025 23:18:01 -0700 Subject: [PATCH] update readme and 30 seconds example --- README.md | 15 +- demo.ipynb | 487 +++++++++++++++++++++-------------------------------- 2 files changed, 198 insertions(+), 304 deletions(-) diff --git a/README.md b/README.md index 003b469..08b9324 100755 --- a/README.md +++ b/README.md @@ -68,27 +68,24 @@ uv sync ```python from leann.api import LeannBuilder, LeannSearcher - # 1. Build index (no embeddings stored!) builder = LeannBuilder(backend_name="hnsw") +builder.add_text("C# is a powerful programming language") builder.add_text("Python is a powerful programming language") builder.add_text("Machine learning transforms industries") builder.add_text("Neural networks process complex data") builder.add_text("Leann is a great storage saving engine for RAG on your macbook") builder.build_index("knowledge.leann") - # 2. Search with real-time embeddings searcher = LeannSearcher("knowledge.leann") -results = searcher.search("programming languages", top_k=2) - -for result in results: - print(f"Score: {result['score']:.3f} - {result['text']}") +results = searcher.search("C++ programming languages", top_k=2, recompute_beighbor_embeddings=True) +print(results) ``` -### Run the Demo +### Run the Demo (support .pdf,.txt,.docx, .pptx, .csv, .md etc) ```bash -uv run examples/document_search.py +uv run ./examples/main_cli_example.py ``` or you want to use python @@ -99,7 +96,7 @@ python ./examples/main_cli_example.py ``` **PDF RAG Demo (using LlamaIndex for document parsing and Leann for indexing/search)** -This demo showcases how to build a RAG system for PDF documents using Leann. +This demo showcases how to build a RAG system for PDF/md documents using Leann. 1. Place your PDF files (and other supported formats like .docx, .pptx, .xlsx) into the `examples/data/` directory. 2. Ensure you have an `OPENAI_API_KEY` set in your environment variables or in a `.env` file for the LLM to function. diff --git a/demo.ipynb b/demo.ipynb index 0aacafe..c76f340 100644 --- a/demo.ipynb +++ b/demo.ipynb @@ -2,205 +2,64 @@ "cells": [ { "cell_type": "code", - "execution_count": 1, + "execution_count": 6, "metadata": {}, "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:sentence_transformers.SentenceTransformer:Use pytorch device_name: mps\n", + "INFO:sentence_transformers.SentenceTransformer:Load pretrained SentenceTransformer: facebook/contriever-msmarco\n", + "WARNING:sentence_transformers.SentenceTransformer:No sentence-transformers model found with name facebook/contriever-msmarco. Creating a new one with mean pooling.\n" + ] + }, { "name": "stdout", "output_type": "stream", "text": [ - "Initializing leann-backend-diskann...\n", - "INFO: Registering backend 'diskann'\n", - "INFO: DiskANN backend loaded successfully\n", - "INFO: LeannBuilder initialized with 'diskann' backend.\n" + "INFO: Computing embeddings for 1 chunks using SentenceTransformer model 'facebook/contriever-msmarco'...\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "/home/ubuntu/LEANN_clean/leann/.venv/lib/python3.11/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", - " from .autonotebook import tqdm as notebook_tqdm\n" + "Batches: 100%|██████████| 1/1 [00:00<00:00, 65.35it/s]\n", + "INFO:sentence_transformers.SentenceTransformer:Use pytorch device_name: mps\n", + "INFO:sentence_transformers.SentenceTransformer:Load pretrained SentenceTransformer: facebook/contriever-msmarco\n", + "WARNING:sentence_transformers.SentenceTransformer:No sentence-transformers model found with name facebook/contriever-msmarco. Creating a new one with mean pooling.\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "INFO: Computing embeddings for 6 chunks using 'sentence-transformers/all-mpnet-base-v2'...\n" + "INFO: Computing embeddings for 5 chunks using SentenceTransformer model 'facebook/contriever-msmarco'...\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "Batches: 100%|██████████| 1/1 [00:00<00:00, 2.91it/s]\n" + "Batches: 100%|██████████| 1/1 [00:00<00:00, 50.38it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "INFO: Building DiskANN index for 6 vectors with metric Metric.INNER_PRODUCT...\n", - "Using Inner Product search, so need to pre-process base data into temp file. Please ensure there is additional (n*(d+1)*4) bytes for storing pre-processed base vectors, apart from the interim indices created by DiskANN and the final index.\n", - "Pre-processing base file by adding extra coordinate\n", - "✅ DiskANN index built successfully at 'knowledge'\n", - "Writing bin: knowledge_disk.index_max_base_norm.bin\n", - "bin: #pts = 1, #dims = 1, size = 12B\n", - "Finished writing bin.\n", - "Time for preprocessing data for inner product: 0.000172 seconds\n", - "Reading max_norm_of_base from knowledge_disk.index_max_base_norm.bin\n", - "Reading bin file knowledge_disk.index_max_base_norm.bin ...\n", - "Opening bin file knowledge_disk.index_max_base_norm.bin... \n", - "Metadata: #pts = 1, #dims = 1...\n", - "done.\n", - "max_norm_of_base: 1\n", - "! Using prepped_base file at knowledge_prepped_base.bin\n", - "Starting index build: R=32 L=64 Query RAM budget: 4.02653e+09 Indexing ram budget: 8 T: 8\n", - "getting bin metadata\n", - "Time for getting bin metadata: 0.000019 seconds\n", - "Compressing 769-dimensional data into 512 bytes per vector.\n", - "Opened: knowledge_prepped_base.bin, size: 18464, cache_size: 18464\n", - "Training data with 6 samples loaded.\n", - "Reading bin file knowledge_pq_pivots.bin ...\n", - "Opening bin file knowledge_pq_pivots.bin... \n", - "Metadata: #pts = 256, #dims = 769...\n", - "done.\n", - "PQ pivot file exists. Not generating again\n", - "Opened: knowledge_prepped_base.bin, size: 18464, cache_size: 18464\n", - "Reading bin file knowledge_pq_pivots.bin ...\n", - "Opening bin file knowledge_pq_pivots.bin... \n", - "Metadata: #pts = 4, #dims = 1...\n", - "done.\n", - "Reading bin file knowledge_pq_pivots.bin ...\n", - "Opening bin file knowledge_pq_pivots.bin... \n", - "Metadata: #pts = 256, #dims = 769...\n", - "done.\n", - "Reading bin file knowledge_pq_pivots.bin ...\n", - "Opening bin file knowledge_pq_pivots.bin... \n", - "Metadata: #pts = 769, #dims = 1...\n", - "done.\n", - "Reading bin file knowledge_pq_pivots.bin ...\n", - "Opening bin file knowledge_pq_pivots.bin... \n", - "Metadata: #pts = 513, #dims = 1...\n", - "done.\n", - "Loaded PQ pivot information\n", - "Processing points [0, 6)...done.\n", - "Time for generating quantized data: 0.055587 seconds\n", - "Full index fits in RAM budget, should consume at most 2.03973e-05GiBs, so building in one shot\n", - "L2: Using AVX2 distance computation DistanceL2Float\n", - "Passed, empty search_params while creating index config\n", - "Using only first 6 from file.. \n", - "Starting index build with 6 points... \n", - "0% of index build completed.Starting final cleanup..done. Link time: 0.00011s\n", - "Index built with degree: max:5 avg:5 min:5 count(deg<2):0\n", - "Not saving tags as they are not enabled.\n", - "Time taken for save: 0.000148s.\n", - "Time for building merged vamana index: 0.000836 seconds\n", - "Opened: knowledge_prepped_base.bin, size: 18464, cache_size: 18464\n", - "Vamana index file size=168\n", - "Opened: knowledge_disk.index, cache_size: 67108864\n", - "medoid: 0B\n", - "max_node_len: 3100B\n", - "nnodes_per_sector: 1B\n", - "# sectors: 6\n", - "Sector #0written\n", - "Finished writing 28672B\n", - "Writing bin: knowledge_disk.index\n", - "bin: #pts = 9, #dims = 1, size = 80B\n", - "Finished writing bin.\n", - "Output disk index file written to knowledge_disk.index\n", - "Finished writing 28672B\n", - "Time for generating disk layout: 0.040268 seconds\n", - "Opened: knowledge_prepped_base.bin, size: 18464, cache_size: 18464\n", - "Loading base knowledge_prepped_base.bin. #points: 6. #dim: 769.\n", - "Wrote 1 points to sample file: knowledge_sample_data.bin\n", - "Indexing time: 0.0970594\n", - "INFO: Leann metadata saved to knowledge.leann.meta.json\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Opened file : knowledge_disk.index\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "✅ DiskANN index loaded successfully.\n", - "INFO: LeannSearcher initialized with 'diskann' backend using index 'knowledge.leann'.\n", - "Since data is floating point, we assume that it has been appropriately pre-processed (normalization for cosine, and convert-to-l2 by adding extra dimension for MIPS). So we shall invoke an l2 distance function.\n", - "L2: Using AVX2 distance computation DistanceL2Float\n", - "L2: Using AVX2 distance computation DistanceL2Float\n", - "Before index load\n", - "Reading bin file knowledge_pq_compressed.bin ...\n", - "Opening bin file knowledge_pq_compressed.bin... \n", - "Metadata: #pts = 6, #dims = 512...\n", - "done.\n", - "Reading bin file knowledge_pq_pivots.bin ...\n", - "Opening bin file knowledge_pq_pivots.bin... \n", - "Metadata: #pts = 4, #dims = 1...\n", - "done.\n", - "Offsets: 4096 791560 794644 796704\n", - "Reading bin file knowledge_pq_pivots.bin ...\n", - "Opening bin file knowledge_pq_pivots.bin... \n", - "Metadata: #pts = 256, #dims = 769...\n", - "done.\n", - "Reading bin file knowledge_pq_pivots.bin ...\n", - "Opening bin file knowledge_pq_pivots.bin... \n", - "Metadata: #pts = 769, #dims = 1...\n", - "done.\n", - "Reading bin file knowledge_pq_pivots.bin ...\n", - "Opening bin file knowledge_pq_pivots.bin... \n", - "Metadata: #pts = 513, #dims = 1...\n", - "done.\n", - "Loaded PQ Pivots: #ctrs: 256, #dims: 769, #chunks: 512\n", - "Loaded PQ centroids and in-memory compressed vectors. #points: 6 #dim: 769 #aligned_dim: 776 #chunks: 512\n", - "Loading index metadata from knowledge_disk.index\n", - "Disk-Index File Meta-data: # nodes per sector: 1, max node len (bytes): 3100, max node degree: 5\n", - "Disk-Index Meta: nodes per sector: 1, max node len: 3100, max node degree: 5\n", - "Setting up thread-specific contexts for nthreads: 8\n", - "allocating ctx: 0x7a33f7204000 to thread-id:134367072315200\n", - "allocating ctx: 0x7a33f6805000 to thread-id:134355206802368\n", - "allocating ctx: 0x7a33f5e72000 to thread-id:134355217288000\n", - "allocating ctx: 0x7a33f5e61000 to thread-id:134355227773632\n", - "allocating ctx: 0x7a33f5e50000 to thread-id:134355196316736\n", - "allocating ctx: 0x7a33f5e3f000 to thread-id:134355164859840\n", - "allocating ctx: 0x7a33f5e2e000 to thread-id:134355175345472\n", - "allocating ctx: 0x7a33f5e1d000 to thread-id:134355185831104\n", - "Loading centroid data from medoids vector data of 1 medoid(s)\n", - "Reading bin file knowledge_disk.index_max_base_norm.bin ...\n", - "Opening bin file knowledge_disk.index_max_base_norm.bin... \n", - "Metadata: #pts = 1, #dims = 1...\n", - "done.\n", - "Setting re-scaling factor of base vectors to 1\n", - "load_from_separate_paths done.\n", - "Reading (with alignment) bin file knowledge_sample_data.bin ...Metadata: #pts = 1, #dims = 769, aligned_dim = 776... allocating aligned memory of 3104 bytes... done. Copying data to mem_aligned buffer... done.\n", - "reserve ratio: 1\n", - "Graph traversal completed, hops: 3\n", - "Loading the cache list into memory....done.\n", - "After index load\n", - "INFO: Computing embeddings for 1 chunks using 'sentence-transformers/all-mpnet-base-v2'...\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Batches: 100%|██████████| 1/1 [00:00<00:00, 60.54it/s]" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "INFO: DiskANN ZMQ mode enabled - ensuring embedding server is running\n", - "INFO: Starting session-level embedding server as a background process...\n", - "INFO: Running command from project root: /home/ubuntu/LEANN_clean/leann\n", - "INFO: Server process started with PID: 424761\n" + "M: 64 for level: 0\n", + "INFO: Converting HNSW index to CSR-pruned format...\n", + "Starting conversion: knowledge.index -> knowledge.csr.tmp\n", + "[0.00s] Reading Index HNSW header...\n", + "[0.00s] Header read: d=768, ntotal=5\n", + "[0.00s] Reading HNSW struct vectors...\n", + " Reading vector (dtype=, fmt='d')... Count=6, Bytes=48\n", + "[0.00s] Read assign_probas (6)\n", + " Reading vector (dtype=, fmt='i')... Count=7, Bytes=28\n", + "[0.12s] Read cum_nneighbor_per_level (7)\n", + " Reading vector (dtype=, fmt='i')... " ] }, { @@ -214,149 +73,187 @@ "name": "stdout", "output_type": "stream", "text": [ - "✅ Embedding server is up and ready for this session.\n", - "[EmbeddingServer LOG]: Initializing leann-backend-diskann...\n", - "[EmbeddingServer LOG]: WARNING: Could not import DiskANN backend: cannot import name '_diskannpy' from partially initialized module 'packages.leann-backend-diskann.leann_backend_diskann' (most likely due to a circular import) (/home/ubuntu/LEANN_clean/leann/packages/leann-backend-diskann/leann_backend_diskann/__init__.py)\n", - "[EmbeddingServer LOG]: INFO: Initializing embedding server thread on port 5555\n", - "[EmbeddingServer LOG]: INFO: Using CUDA device\n", - "[EmbeddingServer LOG]: INFO: Loading model sentence-transformers/all-mpnet-base-v2\n", - "[EmbeddingServer LOG]: INFO: Using FP16 precision with model: sentence-transformers/all-mpnet-base-v2\n", - "[EmbeddingServer LOG]: INFO: Loaded 6 demo documents\n", - "[EmbeddingServer LOG]: INFO: ZMQ ROUTER server listening on port 5555\n", - "[EmbeddingServer LOG]: INFO: Embedding server ready to serve requests\n", - "[EmbeddingServer LOG]: INFO: Received ZMQ request from client 006b8b45, size 3 bytes\n", - "[EmbeddingServer LOG]: INFO: Request for 1 node embeddings: [0]\n", - "[EmbeddingServer LOG]: DEBUG: Node ID range: 0 to 0\n", - "[EmbeddingServer LOG]: Time taken for text lookup: 0.000028 seconds\n", - "[EmbeddingServer LOG]: INFO: Total batch size: 1, max_batch_size: 128\n", - "[EmbeddingServer LOG]: INFO: Processing batch of size 1\n", - "[EmbeddingServer LOG]: Time taken for tokenization (batch): 0.019294 seconds\n", - "[EmbeddingServer LOG]: Batch size: 1, Sequence length: 256\n", - "[EmbeddingServer LOG]: Time taken for transfer to device (batch): 0.000210 seconds\n", - "[EmbeddingServer LOG]: Time taken for embedding (batch): 3.065444 seconds\n", - "[EmbeddingServer LOG]: Time taken for mean pooling (batch): 0.041810 seconds\n", - "[EmbeddingServer LOG]: INFO: Serialize time: 0.000194 seconds\n", - "[EmbeddingServer LOG]: INFO: ZMQ E2E time: 3.128073 seconds\n", - "[EmbeddingServer LOG]: INFO: Received ZMQ request from client 006b8b45, size 7 bytes\n", - "[EmbeddingServer LOG]: INFO: Request for 5 node embeddings: [1, 2, 3, 4, 5]\n", - "[EmbeddingServer LOG]: DEBUG: Node ID range: 1 to 5\n", - "[EmbeddingServer LOG]: Time taken for text lookup: 0.000042 seconds\n", - "[EmbeddingServer LOG]: INFO: Total batch size: 5, max_batch_size: 128\n", - "[EmbeddingServer LOG]: INFO: Processing batch of size 5\n", - "[EmbeddingServer LOG]: Time taken for tokenization (batch): 0.001791 seconds\n", - "[EmbeddingServer LOG]: Batch size: 5, Sequence length: 256\n", - "[EmbeddingServer LOG]: Time taken for transfer to device (batch): 0.000112 seconds\n", - "[EmbeddingServer LOG]: Time taken for embedding (batch): 3.674183 seconds\n", - "[EmbeddingServer LOG]: Time taken for mean pooling (batch): 0.000372 seconds\n", - "[EmbeddingServer LOG]: INFO: Serialize time: 0.000177 seconds\n", - "[EmbeddingServer LOG]: INFO: ZMQ E2E time: 3.677425 seconds\n", - "[EmbeddingServer LOG]: INFO: Received ZMQ request from client 006b8b45, size 7 bytes\n", - "[EmbeddingServer LOG]: INFO: Request for 5 node embeddings: [3, 4, 2, 1, 0]\n", - "[EmbeddingServer LOG]: DEBUG: Node ID range: 0 to 4\n", - "[EmbeddingServer LOG]: Time taken for text lookup: 0.000030 seconds\n", - "[EmbeddingServer LOG]: INFO: Total batch size: 5, max_batch_size: 128\n", - "[EmbeddingServer LOG]: INFO: Processing batch of size 5\n", - "[EmbeddingServer LOG]: Time taken for tokenization (batch): 0.001550 seconds\n", - "[EmbeddingServer LOG]: Batch size: 5, Sequence length: 256\n", - "[EmbeddingServer LOG]: Time taken for transfer to device (batch): 0.000097 seconds\n", - "[EmbeddingServer LOG]: Time taken for embedding (batch): 0.009335 seconds\n", - "[EmbeddingServer LOG]: Time taken for mean pooling (batch): 0.000154 seconds\n", - "[EmbeddingServer LOG]: INFO: Serialize time: 0.000073 seconds\n", - "[EmbeddingServer LOG]: INFO: ZMQ E2E time: 0.011773 seconds\n", - "[EmbeddingServer LOG]: INFO: Received ZMQ request from client 006b8b45, size 7 bytes\n", - "[EmbeddingServer LOG]: INFO: Request for 5 node embeddings: [0, 1, 2, 4, 5]\n", - "[EmbeddingServer LOG]: DEBUG: Node ID range: 0 to 5\n", - "[EmbeddingServer LOG]: Time taken for text lookup: 0.000020 seconds\n", - "[EmbeddingServer LOG]: INFO: Total batch size: 5, max_batch_size: 128\n", - "[EmbeddingServer LOG]: INFO: Processing batch of size 5\n", - "[EmbeddingServer LOG]: Time taken for tokenization (batch): 0.001041 seconds\n", - "[EmbeddingServer LOG]: Batch size: 5, Sequence length: 256\n", - "[EmbeddingServer LOG]: Time taken for transfer to device (batch): 0.000125 seconds\n", - "[EmbeddingServer LOG]: Time taken for embedding (batch): 0.008972 seconds\n", - "[EmbeddingServer LOG]: Time taken for mean pooling (batch): 0.000151 seconds\n", - "[EmbeddingServer LOG]: INFO: Serialize time: 0.000048 seconds\n", - "[EmbeddingServer LOG]: INFO: ZMQ E2E time: 0.010853 seconds\n", - "[EmbeddingServer LOG]: INFO: Received ZMQ request from client 006b8b45, size 7 bytes\n", - "[EmbeddingServer LOG]: INFO: Request for 5 node embeddings: [3, 1, 0, 2, 5]\n", - "[EmbeddingServer LOG]: DEBUG: Node ID range: 0 to 5\n", - "[EmbeddingServer LOG]: Time taken for text lookup: 0.000020 seconds\n", - "[EmbeddingServer LOG]: INFO: Total batch size: 5, max_batch_size: 128\n", - "[EmbeddingServer LOG]: INFO: Processing batch of size 5\n", - "[EmbeddingServer LOG]: Time taken for tokenization (batch): 0.001350 seconds\n", - "[EmbeddingServer LOG]: Batch size: 5, Sequence length: 256\n", - "[EmbeddingServer LOG]: Time taken for transfer to device (batch): 0.000088 seconds\n", - "[EmbeddingServer LOG]: Time taken for embedding (batch): 0.008869 seconds\n", - "[EmbeddingServer LOG]: Time taken for mean pooling (batch): 0.000146 seconds\n", - "[EmbeddingServer LOG]: INFO: Serialize time: 0.000063 seconds\n", - "[EmbeddingServer LOG]: INFO: ZMQ E2E time: 0.011054 seconds\n", - "[EmbeddingServer LOG]: INFO: Received ZMQ request from client 006b8b45, size 7 bytes\n", - "[EmbeddingServer LOG]: INFO: Request for 5 node embeddings: [0, 2, 3, 4, 5]\n", - "[EmbeddingServer LOG]: DEBUG: Node ID range: 0 to 5\n", - "[EmbeddingServer LOG]: Time taken for text lookup: 0.000022 seconds\n", - "[EmbeddingServer LOG]: INFO: Total batch size: 5, max_batch_size: 128\n", - "[EmbeddingServer LOG]: INFO: Processing batch of size 5\n", - "[EmbeddingServer LOG]: Time taken for tokenization (batch): 0.001195 seconds\n", - "[EmbeddingServer LOG]: Batch size: 5, Sequence length: 256\n", - "[EmbeddingServer LOG]: Time taken for transfer to device (batch): 0.000087 seconds\n", - "[EmbeddingServer LOG]: Time taken for embedding (batch): 0.008903 seconds\n", - "[EmbeddingServer LOG]: Time taken for mean pooling (batch): 0.000145 seconds\n", - "[EmbeddingServer LOG]: INFO: Serialize time: 0.000060 seconds\n", - "[EmbeddingServer LOG]: INFO: ZMQ E2E time: 0.010921 seconds\n", - "[EmbeddingServer LOG]: INFO: Received ZMQ request from client 006b8b45, size 7 bytes\n", - "[EmbeddingServer LOG]: INFO: Request for 5 node embeddings: [1, 0, 3, 4, 5]\n", - "[EmbeddingServer LOG]: DEBUG: Node ID range: 0 to 5\n", - "[EmbeddingServer LOG]: Time taken for text lookup: 0.000020 seconds\n", - "[EmbeddingServer LOG]: INFO: Total batch size: 5, max_batch_size: 128\n", - "[EmbeddingServer LOG]: INFO: Processing batch of size 5\n", - "[EmbeddingServer LOG]: Time taken for tokenization (batch): 0.001188 seconds\n", - "[EmbeddingServer LOG]: Batch size: 5, Sequence length: 256\n", - "[EmbeddingServer LOG]: Time taken for transfer to device (batch): 0.000087 seconds\n", - "[EmbeddingServer LOG]: Time taken for embedding (batch): 0.008858 seconds\n", - "[EmbeddingServer LOG]: Time taken for mean pooling (batch): 0.000153 seconds\n", - "[EmbeddingServer LOG]: INFO: Serialize time: 0.000052 seconds\n", - "[EmbeddingServer LOG]: INFO: ZMQ E2E time: 0.010886 seconds\n", - "reserve ratio: Score: -0.481 - C++ is a powerful programming language1\n", - "Graph traversal completed, hops: 3\n", - "\n", - "Score: -1.049 - Java is a powerful programming language\n" + "Count=5, Bytes=20\n", + "[0.21s] Read levels (5)\n", + "[0.30s] Probing for compact storage flag...\n", + "[0.30s] Found compact flag: False\n", + "[0.30s] Compact flag is False, reading original format...\n", + "[0.30s] Probing for potential extra byte before non-compact offsets...\n", + "[0.30s] Found and consumed an unexpected 0x00 byte.\n", + " Reading vector (dtype=, fmt='Q')... Count=6, Bytes=48\n", + "[0.30s] Read offsets (6)\n", + "[0.39s] Attempting to read neighbors vector...\n", + " Reading vector (dtype=, fmt='i')... Count=320, Bytes=1280\n", + "[0.39s] Read neighbors (320)\n", + "[0.47s] Read scalar params (ep=4, max_lvl=0)\n", + "[0.47s] Checking for storage data...\n", + "[0.47s] Found storage fourcc: 49467849.\n", + "[0.47s] Converting to CSR format...\n", + "[0.47s] Conversion loop finished. \n", + "[0.47s] Running validation checks...\n", + " Checking total valid neighbor count...\n", + " OK: Total valid neighbors = 20\n", + " Checking final pointer indices...\n", + " OK: Final pointers match data size.\n", + "[0.47s] Deleting original neighbors and offsets arrays...\n", + " CSR Stats: |data|=20, |level_ptr|=10\n", + "[0.56s] Writing CSR HNSW graph data in FAISS-compatible order...\n", + " Pruning embeddings: Writing NULL storage marker.\n", + "[0.64s] Conversion complete." + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:sentence_transformers.SentenceTransformer:Use pytorch device_name: mps\n", + "INFO:sentence_transformers.SentenceTransformer:Load pretrained SentenceTransformer: facebook/contriever-msmarco\n", + "WARNING:sentence_transformers.SentenceTransformer:No sentence-transformers model found with name facebook/contriever-msmarco. Creating a new one with mean pooling.\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "[EmbeddingServer LOG]: INFO: ZMQ socket timeout, continuing to listen\n", - "[EmbeddingServer LOG]: INFO: ZMQ socket timeout, continuing to listen\n", - "[EmbeddingServer LOG]: INFO: ZMQ socket timeout, continuing to listen\n", - "[EmbeddingServer LOG]: INFO: ZMQ socket timeout, continuing to listen\n", - "[EmbeddingServer LOG]: INFO: ZMQ socket timeout, continuing to listen\n", - "[EmbeddingServer LOG]: INFO: ZMQ socket timeout, continuing to listen\n", - "[EmbeddingServer LOG]: INFO: ZMQ socket timeout, continuing to listen\n", - "[EmbeddingServer LOG]: INFO: ZMQ socket timeout, continuing to listen\n", - "[EmbeddingServer LOG]: INFO: ZMQ socket timeout, continuing to listen\n", - "[EmbeddingServer LOG]: INFO: ZMQ socket timeout, continuing to listen\n" + "\n", + "✅ CSR conversion successful.\n", + "INFO: Replaced original index with CSR-pruned version at 'knowledge.index'\n", + "[read_HNSW - CSR NL v4] Reading metadata & CSR indices (manual offset)...\n", + "[read_HNSW NL v4] Read levels vector, size: 5\n", + "[read_HNSW NL v4] Reading Compact Storage format indices...\n", + "[read_HNSW NL v4] Read compact_level_ptr, size: 10\n", + "INFO: Terminating session server process (PID: 21439)...\n", + "[read_HNSW NL v4] Read compact_node_offsets, size: 6\n", + "[read_HNSW NL v4] Read entry_point: 4, max_level: 0\n", + "[read_HNSW NL v4] Read storage fourcc: 0x6c6c756e\n", + "[read_HNSW NL v4 FIX] Detected FileIOReader. Neighbors size field offset: 326\n", + "[read_HNSW NL v4] Reading neighbors data into memory.\n", + "[read_HNSW NL v4] Read neighbors data, size: 20\n", + "[read_HNSW NL v4] Finished reading metadata and CSR indices.\n", + "INFO: Skipping external storage loading, since is_recompute is true.\n", + "INFO: Server process terminated.\n", + "🔍 DEBUG LeannSearcher.search() called:\n", + " Query: 'C++ programming languages'\n", + " Top_k: 2\n", + " Search kwargs: {'recompute_beighbor_embeddings': True}\n", + "INFO: Computing embeddings for 1 chunks using SentenceTransformer model 'facebook/contriever-msmarco'...\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Batches: 100%|██████████| 1/1 [00:00<00:00, 85.08it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + " Generated embedding shape: (1, 768)\n", + "🔍 DEBUG Query embedding first 10 values: [ 0.04288 -0.04135 0.0666 0.02197 -0.0881 -0.04367 -0.02835 -0.0408\n", + " -0.1254 -0.08594]\n", + "🔍 DEBUG Query embedding norm: 1.3876953125\n", + "INFO: Starting session-level embedding server for 'leann_backend_hnsw.hnsw_embedding_server'...\n", + "INFO: Running command from project root: /Users/yichuan/Desktop/code/LEANN/leann\n", + "INFO: Command: /Users/yichuan/Desktop/code/LEANN/leann/.venv/bin/python -m leann_backend_hnsw.hnsw_embedding_server --zmq-port 5557 --model-name facebook/contriever-msmarco --passages-file knowledge.leann.meta.json --disable-warmup\n", + "INFO: Server process started with PID: 21622\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\n", + "To disable this warning, you can either:\n", + "\t- Avoid using `tokenizers` before the fork if possible\n", + "\t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "✅ Embedding server is up and ready for this session.\n", + "[leann_backend_hnsw.hnsw_embedding_server LOG]: INFO: Starting backend auto-discovery...\n", + "ZmqDistanceComputer initialized: d=768, metric=0\n", + "[leann_backend_hnsw.hnsw_embedding_server LOG]: INFO: Registering backend 'diskann'\n", + "[leann_backend_hnsw.hnsw_embedding_server LOG]: INFO: Backend auto-discovery finished.\n", + "[leann_backend_hnsw.hnsw_embedding_server LOG]: INFO: Registering backend 'hnsw'\n", + "[leann_backend_hnsw.hnsw_embedding_server LOG]: Loading tokenizer for facebook/contriever-msmarco...\n", + "[leann_backend_hnsw.hnsw_embedding_server LOG]: Tokenizer loaded successfully!\n", + "[leann_backend_hnsw.hnsw_embedding_server LOG]: MPS available: True\n", + "[leann_backend_hnsw.hnsw_embedding_server LOG]: CUDA available: False\n", + "[leann_backend_hnsw.hnsw_embedding_server LOG]: Using MPS device (Apple Silicon)\n", + "[leann_backend_hnsw.hnsw_embedding_server LOG]: Starting HNSW server on port 5557 with model facebook/contriever-msmarco\n", + "[leann_backend_hnsw.hnsw_embedding_server LOG]: Loading model facebook/contriever-msmarco... (this may take a while if downloading)\n", + "[leann_backend_hnsw.hnsw_embedding_server LOG]: Model facebook/contriever-msmarco loaded successfully!\n", + "[leann_backend_hnsw.hnsw_embedding_server LOG]: Loaded label map with 5 entries\n", + "[leann_backend_hnsw.hnsw_embedding_server LOG]: Initialized lazy passage loading for 5 passages\n", + "[leann_backend_hnsw.hnsw_embedding_server LOG]: Warmup disabled or no passages available (enable_warmup=False, passages=5)\n", + "[leann_backend_hnsw.hnsw_embedding_server LOG]: HNSW ZMQ server listening on port 5557\n", + "[leann_backend_hnsw.hnsw_embedding_server LOG]: Started HNSW ZMQ server thread on port 5557\n", + "[leann_backend_hnsw.hnsw_embedding_server LOG]: Received ZMQ request of size 3 bytes\n", + "[leann_backend_hnsw.hnsw_embedding_server LOG]: DEBUG: request_payload length: 1\n", + "[leann_backend_hnsw.hnsw_embedding_server LOG]: DEBUG: request_payload[0]: - [4]\n", + "[leann_backend_hnsw.hnsw_embedding_server LOG]: Request for 1 node embeddings\n", + "[leann_backend_hnsw.hnsw_embedding_server LOG]: Total batch size: 1, max_batch_size: 128\n", + "[leann_backend_hnsw.hnsw_embedding_server LOG]: DEBUG zmq_server_thread: Final 'hidden' array | Shape: (1, 768) | Dtype: float32 | Has NaN/Inf: False\n", + "[leann_backend_hnsw.hnsw_embedding_server LOG]: Serialize time: 0.000150 seconds\n", + "[leann_backend_hnsw.hnsw_embedding_server LOG]: ZMQ E2E time: 0.142946 seconds\n", + "[leann_backend_hnsw.hnsw_embedding_server LOG]: Received ZMQ request of size 3849 bytes\n", + "[leann_backend_hnsw.hnsw_embedding_server LOG]: DEBUG: request_payload length: 2\n", + "[leann_backend_hnsw.hnsw_embedding_server LOG]: DEBUG: request_payload[0]: - [0, 1, 2, 3]\n", + "[leann_backend_hnsw.hnsw_embedding_server LOG]: DEBUG: request_payload[1]: - [0.042877197265625, -0.041351318359375, 0.06658935546875, 0.02197265625, -0.08807373046875, -0.04367...\n", + "[leann_backend_hnsw.hnsw_embedding_server LOG]: DEBUG: Distance calculation request received\n", + "[leann_backend_hnsw.hnsw_embedding_server LOG]: Node IDs: [0, 1, 2, 3]\n", + "[leann_backend_hnsw.hnsw_embedding_server LOG]: Query vector dim: 768\n", + "[leann_backend_hnsw.hnsw_embedding_server LOG]: Passages loaded: 5\n", + "[leann_backend_hnsw.hnsw_embedding_server LOG]: DEBUG: Looking up passage ID 0\n", + "[leann_backend_hnsw.hnsw_embedding_server LOG]: DEBUG: Found text for ID 0, length: 37\n", + "[leann_backend_hnsw.hnsw_embedding_server LOG]: DEBUG: Looking up passage ID 1\n", + "[leann_backend_hnsw.hnsw_embedding_server LOG]: DEBUG: Found text for ID 1, length: 41\n", + "[leann_backend_hnsw.hnsw_embedding_server LOG]: DEBUG: Looking up passage ID 2\n", + "[leann_backend_hnsw.hnsw_embedding_server LOG]: DEBUG: Found text for ID 2, length: 38\n", + "[leann_backend_hnsw.hnsw_embedding_server LOG]: DEBUG: Looking up passage ID 3\n", + "[leann_backend_hnsw.hnsw_embedding_server LOG]: DEBUG: Found text for ID 3, length: 36\n", + "[leann_backend_hnsw.hnsw_embedding_server LOG]: Sending distance response with 4 distances\n", + "[leann_backend_hnsw.hnsw_embedding_server LOG]: Distance calculation E2E time: 0.173929 seconds\n", + " Backend returned: labels=2 results\n", + " Processing 2 passage IDs:\n", + " 1. passage_id='28b7b6b9-d0a4-408d-9e7f-9a7fcb7d8186' -> SUCCESS: C# is a powerful programming language...\n", + " 2. passage_id='1bccf691-a571-4e9d-aaed-424a30ba8604' -> SUCCESS: Python is a powerful programming language...\n", + " Final enriched results: 2 passages\n", + "[SearchResult(id='28b7b6b9-d0a4-408d-9e7f-9a7fcb7d8186', score=np.float32(1.5213046), text='C# is a powerful programming language', metadata={}), SearchResult(id='1bccf691-a571-4e9d-aaed-424a30ba8604', score=np.float32(1.2999034), text='Python is a powerful programming language', metadata={})]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[leann_backend_hnsw.hnsw_embedding_server LOG]: ZMQ socket timeout, continuing to listen\n", + "[leann_backend_hnsw.hnsw_embedding_server LOG]: ZMQ socket timeout, continuing to listen\n", + "[leann_backend_hnsw.hnsw_embedding_server LOG]: ZMQ socket timeout, continuing to listen\n", + "[leann_backend_hnsw.hnsw_embedding_server LOG]: ZMQ socket timeout, continuing to listen\n", + "[leann_backend_hnsw.hnsw_embedding_server LOG]: ZMQ socket timeout, continuing to listen\n" ] } ], "source": [ "from leann.api import LeannBuilder, LeannSearcher\n", - "import leann_backend_diskann\n", "# 1. Build index (no embeddings stored!)\n", - "builder = LeannBuilder(backend_name=\"diskann\")\n", + "builder = LeannBuilder(backend_name=\"hnsw\")\n", + "builder.add_text(\"C# is a powerful programming language\")\n", "builder.add_text(\"Python is a powerful programming language\")\n", "builder.add_text(\"Machine learning transforms industries\") \n", "builder.add_text(\"Neural networks process complex data\")\n", - "builder.add_text(\"Java is a powerful programming language\")\n", - "builder.add_text(\"C++ is a powerful programming language\")\n", - "builder.add_text(\"C# is a powerful programming language\")\n", + "builder.add_text(\"Leann is a great storage saving engine for RAG on your macbook\")\n", "builder.build_index(\"knowledge.leann\")\n", - "\n", "# 2. Search with real-time embeddings\n", "searcher = LeannSearcher(\"knowledge.leann\")\n", - "results = searcher.search(\"C++ programming languages\", top_k=2,recompute_beighbor_embeddings=True)\n", - "\n", - "for result in results:\n", - " print(f\"Score: {result['score']:.3f} - {result['text']}\")" + "results = searcher.search(\"C++ programming languages\", top_k=2, recompute_beighbor_embeddings=True)\n", + "print(results)" ] } ], @@ -376,7 +273,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.11" + "version": "3.11.12" } }, "nbformat": 4,