{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Initializing leann-backend-diskann...\n", "INFO: Registering backend 'diskann'\n", "INFO: DiskANN backend loaded successfully\n", "INFO: LeannBuilder initialized with 'diskann' backend.\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/home/ubuntu/LEANN_clean/leann/.venv/lib/python3.11/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", " from .autonotebook import tqdm as notebook_tqdm\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "INFO: Computing embeddings for 6 chunks using 'sentence-transformers/all-mpnet-base-v2'...\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Batches: 100%|██████████| 1/1 [00:00<00:00, 2.91it/s]\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "INFO: Building DiskANN index for 6 vectors with metric Metric.INNER_PRODUCT...\n", "Using Inner Product search, so need to pre-process base data into temp file. Please ensure there is additional (n*(d+1)*4) bytes for storing pre-processed base vectors, apart from the interim indices created by DiskANN and the final index.\n", "Pre-processing base file by adding extra coordinate\n", "✅ DiskANN index built successfully at 'knowledge'\n", "Writing bin: knowledge_disk.index_max_base_norm.bin\n", "bin: #pts = 1, #dims = 1, size = 12B\n", "Finished writing bin.\n", "Time for preprocessing data for inner product: 0.000172 seconds\n", "Reading max_norm_of_base from knowledge_disk.index_max_base_norm.bin\n", "Reading bin file knowledge_disk.index_max_base_norm.bin ...\n", "Opening bin file knowledge_disk.index_max_base_norm.bin... \n", "Metadata: #pts = 1, #dims = 1...\n", "done.\n", "max_norm_of_base: 1\n", "! Using prepped_base file at knowledge_prepped_base.bin\n", "Starting index build: R=32 L=64 Query RAM budget: 4.02653e+09 Indexing ram budget: 8 T: 8\n", "getting bin metadata\n", "Time for getting bin metadata: 0.000019 seconds\n", "Compressing 769-dimensional data into 512 bytes per vector.\n", "Opened: knowledge_prepped_base.bin, size: 18464, cache_size: 18464\n", "Training data with 6 samples loaded.\n", "Reading bin file knowledge_pq_pivots.bin ...\n", "Opening bin file knowledge_pq_pivots.bin... \n", "Metadata: #pts = 256, #dims = 769...\n", "done.\n", "PQ pivot file exists. Not generating again\n", "Opened: knowledge_prepped_base.bin, size: 18464, cache_size: 18464\n", "Reading bin file knowledge_pq_pivots.bin ...\n", "Opening bin file knowledge_pq_pivots.bin... \n", "Metadata: #pts = 4, #dims = 1...\n", "done.\n", "Reading bin file knowledge_pq_pivots.bin ...\n", "Opening bin file knowledge_pq_pivots.bin... \n", "Metadata: #pts = 256, #dims = 769...\n", "done.\n", "Reading bin file knowledge_pq_pivots.bin ...\n", "Opening bin file knowledge_pq_pivots.bin... \n", "Metadata: #pts = 769, #dims = 1...\n", "done.\n", "Reading bin file knowledge_pq_pivots.bin ...\n", "Opening bin file knowledge_pq_pivots.bin... \n", "Metadata: #pts = 513, #dims = 1...\n", "done.\n", "Loaded PQ pivot information\n", "Processing points [0, 6)...done.\n", "Time for generating quantized data: 0.055587 seconds\n", "Full index fits in RAM budget, should consume at most 2.03973e-05GiBs, so building in one shot\n", "L2: Using AVX2 distance computation DistanceL2Float\n", "Passed, empty search_params while creating index config\n", "Using only first 6 from file.. \n", "Starting index build with 6 points... \n", "0% of index build completed.Starting final cleanup..done. Link time: 0.00011s\n", "Index built with degree: max:5 avg:5 min:5 count(deg<2):0\n", "Not saving tags as they are not enabled.\n", "Time taken for save: 0.000148s.\n", "Time for building merged vamana index: 0.000836 seconds\n", "Opened: knowledge_prepped_base.bin, size: 18464, cache_size: 18464\n", "Vamana index file size=168\n", "Opened: knowledge_disk.index, cache_size: 67108864\n", "medoid: 0B\n", "max_node_len: 3100B\n", "nnodes_per_sector: 1B\n", "# sectors: 6\n", "Sector #0written\n", "Finished writing 28672B\n", "Writing bin: knowledge_disk.index\n", "bin: #pts = 9, #dims = 1, size = 80B\n", "Finished writing bin.\n", "Output disk index file written to knowledge_disk.index\n", "Finished writing 28672B\n", "Time for generating disk layout: 0.040268 seconds\n", "Opened: knowledge_prepped_base.bin, size: 18464, cache_size: 18464\n", "Loading base knowledge_prepped_base.bin. #points: 6. #dim: 769.\n", "Wrote 1 points to sample file: knowledge_sample_data.bin\n", "Indexing time: 0.0970594\n", "INFO: Leann metadata saved to knowledge.leann.meta.json\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Opened file : knowledge_disk.index\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "✅ DiskANN index loaded successfully.\n", "INFO: LeannSearcher initialized with 'diskann' backend using index 'knowledge.leann'.\n", "Since data is floating point, we assume that it has been appropriately pre-processed (normalization for cosine, and convert-to-l2 by adding extra dimension for MIPS). So we shall invoke an l2 distance function.\n", "L2: Using AVX2 distance computation DistanceL2Float\n", "L2: Using AVX2 distance computation DistanceL2Float\n", "Before index load\n", "Reading bin file knowledge_pq_compressed.bin ...\n", "Opening bin file knowledge_pq_compressed.bin... \n", "Metadata: #pts = 6, #dims = 512...\n", "done.\n", "Reading bin file knowledge_pq_pivots.bin ...\n", "Opening bin file knowledge_pq_pivots.bin... \n", "Metadata: #pts = 4, #dims = 1...\n", "done.\n", "Offsets: 4096 791560 794644 796704\n", "Reading bin file knowledge_pq_pivots.bin ...\n", "Opening bin file knowledge_pq_pivots.bin... \n", "Metadata: #pts = 256, #dims = 769...\n", "done.\n", "Reading bin file knowledge_pq_pivots.bin ...\n", "Opening bin file knowledge_pq_pivots.bin... \n", "Metadata: #pts = 769, #dims = 1...\n", "done.\n", "Reading bin file knowledge_pq_pivots.bin ...\n", "Opening bin file knowledge_pq_pivots.bin... \n", "Metadata: #pts = 513, #dims = 1...\n", "done.\n", "Loaded PQ Pivots: #ctrs: 256, #dims: 769, #chunks: 512\n", "Loaded PQ centroids and in-memory compressed vectors. #points: 6 #dim: 769 #aligned_dim: 776 #chunks: 512\n", "Loading index metadata from knowledge_disk.index\n", "Disk-Index File Meta-data: # nodes per sector: 1, max node len (bytes): 3100, max node degree: 5\n", "Disk-Index Meta: nodes per sector: 1, max node len: 3100, max node degree: 5\n", "Setting up thread-specific contexts for nthreads: 8\n", "allocating ctx: 0x7a33f7204000 to thread-id:134367072315200\n", "allocating ctx: 0x7a33f6805000 to thread-id:134355206802368\n", "allocating ctx: 0x7a33f5e72000 to thread-id:134355217288000\n", "allocating ctx: 0x7a33f5e61000 to thread-id:134355227773632\n", "allocating ctx: 0x7a33f5e50000 to thread-id:134355196316736\n", "allocating ctx: 0x7a33f5e3f000 to thread-id:134355164859840\n", "allocating ctx: 0x7a33f5e2e000 to thread-id:134355175345472\n", "allocating ctx: 0x7a33f5e1d000 to thread-id:134355185831104\n", "Loading centroid data from medoids vector data of 1 medoid(s)\n", "Reading bin file knowledge_disk.index_max_base_norm.bin ...\n", "Opening bin file knowledge_disk.index_max_base_norm.bin... \n", "Metadata: #pts = 1, #dims = 1...\n", "done.\n", "Setting re-scaling factor of base vectors to 1\n", "load_from_separate_paths done.\n", "Reading (with alignment) bin file knowledge_sample_data.bin ...Metadata: #pts = 1, #dims = 769, aligned_dim = 776... allocating aligned memory of 3104 bytes... done. Copying data to mem_aligned buffer... done.\n", "reserve ratio: 1\n", "Graph traversal completed, hops: 3\n", "Loading the cache list into memory....done.\n", "After index load\n", "INFO: Computing embeddings for 1 chunks using 'sentence-transformers/all-mpnet-base-v2'...\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Batches: 100%|██████████| 1/1 [00:00<00:00, 60.54it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "INFO: DiskANN ZMQ mode enabled - ensuring embedding server is running\n", "INFO: Starting session-level embedding server as a background process...\n", "INFO: Running command from project root: /home/ubuntu/LEANN_clean/leann\n", "INFO: Server process started with PID: 424761\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "✅ Embedding server is up and ready for this session.\n", "[EmbeddingServer LOG]: Initializing leann-backend-diskann...\n", "[EmbeddingServer LOG]: WARNING: Could not import DiskANN backend: cannot import name '_diskannpy' from partially initialized module 'packages.leann-backend-diskann.leann_backend_diskann' (most likely due to a circular import) (/home/ubuntu/LEANN_clean/leann/packages/leann-backend-diskann/leann_backend_diskann/__init__.py)\n", "[EmbeddingServer LOG]: INFO: Initializing embedding server thread on port 5555\n", "[EmbeddingServer LOG]: INFO: Using CUDA device\n", "[EmbeddingServer LOG]: INFO: Loading model sentence-transformers/all-mpnet-base-v2\n", "[EmbeddingServer LOG]: INFO: Using FP16 precision with model: sentence-transformers/all-mpnet-base-v2\n", "[EmbeddingServer LOG]: INFO: Loaded 6 demo documents\n", "[EmbeddingServer LOG]: INFO: ZMQ ROUTER server listening on port 5555\n", "[EmbeddingServer LOG]: INFO: Embedding server ready to serve requests\n", "[EmbeddingServer LOG]: INFO: Received ZMQ request from client 006b8b45, size 3 bytes\n", "[EmbeddingServer LOG]: INFO: Request for 1 node embeddings: [0]\n", "[EmbeddingServer LOG]: DEBUG: Node ID range: 0 to 0\n", "[EmbeddingServer LOG]: Time taken for text lookup: 0.000028 seconds\n", "[EmbeddingServer LOG]: INFO: Total batch size: 1, max_batch_size: 128\n", "[EmbeddingServer LOG]: INFO: Processing batch of size 1\n", "[EmbeddingServer LOG]: Time taken for tokenization (batch): 0.019294 seconds\n", "[EmbeddingServer LOG]: Batch size: 1, Sequence length: 256\n", "[EmbeddingServer LOG]: Time taken for transfer to device (batch): 0.000210 seconds\n", "[EmbeddingServer LOG]: Time taken for embedding (batch): 3.065444 seconds\n", "[EmbeddingServer LOG]: Time taken for mean pooling (batch): 0.041810 seconds\n", "[EmbeddingServer LOG]: INFO: Serialize time: 0.000194 seconds\n", "[EmbeddingServer LOG]: INFO: ZMQ E2E time: 3.128073 seconds\n", "[EmbeddingServer LOG]: INFO: Received ZMQ request from client 006b8b45, size 7 bytes\n", "[EmbeddingServer LOG]: INFO: Request for 5 node embeddings: [1, 2, 3, 4, 5]\n", "[EmbeddingServer LOG]: DEBUG: Node ID range: 1 to 5\n", "[EmbeddingServer LOG]: Time taken for text lookup: 0.000042 seconds\n", "[EmbeddingServer LOG]: INFO: Total batch size: 5, max_batch_size: 128\n", "[EmbeddingServer LOG]: INFO: Processing batch of size 5\n", "[EmbeddingServer LOG]: Time taken for tokenization (batch): 0.001791 seconds\n", "[EmbeddingServer LOG]: Batch size: 5, Sequence length: 256\n", "[EmbeddingServer LOG]: Time taken for transfer to device (batch): 0.000112 seconds\n", "[EmbeddingServer LOG]: Time taken for embedding (batch): 3.674183 seconds\n", "[EmbeddingServer LOG]: Time taken for mean pooling (batch): 0.000372 seconds\n", "[EmbeddingServer LOG]: INFO: Serialize time: 0.000177 seconds\n", "[EmbeddingServer LOG]: INFO: ZMQ E2E time: 3.677425 seconds\n", "[EmbeddingServer LOG]: INFO: Received ZMQ request from client 006b8b45, size 7 bytes\n", "[EmbeddingServer LOG]: INFO: Request for 5 node embeddings: [3, 4, 2, 1, 0]\n", "[EmbeddingServer LOG]: DEBUG: Node ID range: 0 to 4\n", "[EmbeddingServer LOG]: Time taken for text lookup: 0.000030 seconds\n", "[EmbeddingServer LOG]: INFO: Total batch size: 5, max_batch_size: 128\n", "[EmbeddingServer LOG]: INFO: Processing batch of size 5\n", "[EmbeddingServer LOG]: Time taken for tokenization (batch): 0.001550 seconds\n", "[EmbeddingServer LOG]: Batch size: 5, Sequence length: 256\n", "[EmbeddingServer LOG]: Time taken for transfer to device (batch): 0.000097 seconds\n", "[EmbeddingServer LOG]: Time taken for embedding (batch): 0.009335 seconds\n", "[EmbeddingServer LOG]: Time taken for mean pooling (batch): 0.000154 seconds\n", "[EmbeddingServer LOG]: INFO: Serialize time: 0.000073 seconds\n", "[EmbeddingServer LOG]: INFO: ZMQ E2E time: 0.011773 seconds\n", "[EmbeddingServer LOG]: INFO: Received ZMQ request from client 006b8b45, size 7 bytes\n", "[EmbeddingServer LOG]: INFO: Request for 5 node embeddings: [0, 1, 2, 4, 5]\n", "[EmbeddingServer LOG]: DEBUG: Node ID range: 0 to 5\n", "[EmbeddingServer LOG]: Time taken for text lookup: 0.000020 seconds\n", "[EmbeddingServer LOG]: INFO: Total batch size: 5, max_batch_size: 128\n", "[EmbeddingServer LOG]: INFO: Processing batch of size 5\n", "[EmbeddingServer LOG]: Time taken for tokenization (batch): 0.001041 seconds\n", "[EmbeddingServer LOG]: Batch size: 5, Sequence length: 256\n", "[EmbeddingServer LOG]: Time taken for transfer to device (batch): 0.000125 seconds\n", "[EmbeddingServer LOG]: Time taken for embedding (batch): 0.008972 seconds\n", "[EmbeddingServer LOG]: Time taken for mean pooling (batch): 0.000151 seconds\n", "[EmbeddingServer LOG]: INFO: Serialize time: 0.000048 seconds\n", "[EmbeddingServer LOG]: INFO: ZMQ E2E time: 0.010853 seconds\n", "[EmbeddingServer LOG]: INFO: Received ZMQ request from client 006b8b45, size 7 bytes\n", "[EmbeddingServer LOG]: INFO: Request for 5 node embeddings: [3, 1, 0, 2, 5]\n", "[EmbeddingServer LOG]: DEBUG: Node ID range: 0 to 5\n", "[EmbeddingServer LOG]: Time taken for text lookup: 0.000020 seconds\n", "[EmbeddingServer LOG]: INFO: Total batch size: 5, max_batch_size: 128\n", "[EmbeddingServer LOG]: INFO: Processing batch of size 5\n", "[EmbeddingServer LOG]: Time taken for tokenization (batch): 0.001350 seconds\n", "[EmbeddingServer LOG]: Batch size: 5, Sequence length: 256\n", "[EmbeddingServer LOG]: Time taken for transfer to device (batch): 0.000088 seconds\n", "[EmbeddingServer LOG]: Time taken for embedding (batch): 0.008869 seconds\n", "[EmbeddingServer LOG]: Time taken for mean pooling (batch): 0.000146 seconds\n", "[EmbeddingServer LOG]: INFO: Serialize time: 0.000063 seconds\n", "[EmbeddingServer LOG]: INFO: ZMQ E2E time: 0.011054 seconds\n", "[EmbeddingServer LOG]: INFO: Received ZMQ request from client 006b8b45, size 7 bytes\n", "[EmbeddingServer LOG]: INFO: Request for 5 node embeddings: [0, 2, 3, 4, 5]\n", "[EmbeddingServer LOG]: DEBUG: Node ID range: 0 to 5\n", "[EmbeddingServer LOG]: Time taken for text lookup: 0.000022 seconds\n", "[EmbeddingServer LOG]: INFO: Total batch size: 5, max_batch_size: 128\n", "[EmbeddingServer LOG]: INFO: Processing batch of size 5\n", "[EmbeddingServer LOG]: Time taken for tokenization (batch): 0.001195 seconds\n", "[EmbeddingServer LOG]: Batch size: 5, Sequence length: 256\n", "[EmbeddingServer LOG]: Time taken for transfer to device (batch): 0.000087 seconds\n", "[EmbeddingServer LOG]: Time taken for embedding (batch): 0.008903 seconds\n", "[EmbeddingServer LOG]: Time taken for mean pooling (batch): 0.000145 seconds\n", "[EmbeddingServer LOG]: INFO: Serialize time: 0.000060 seconds\n", "[EmbeddingServer LOG]: INFO: ZMQ E2E time: 0.010921 seconds\n", "[EmbeddingServer LOG]: INFO: Received ZMQ request from client 006b8b45, size 7 bytes\n", "[EmbeddingServer LOG]: INFO: Request for 5 node embeddings: [1, 0, 3, 4, 5]\n", "[EmbeddingServer LOG]: DEBUG: Node ID range: 0 to 5\n", "[EmbeddingServer LOG]: Time taken for text lookup: 0.000020 seconds\n", "[EmbeddingServer LOG]: INFO: Total batch size: 5, max_batch_size: 128\n", "[EmbeddingServer LOG]: INFO: Processing batch of size 5\n", "[EmbeddingServer LOG]: Time taken for tokenization (batch): 0.001188 seconds\n", "[EmbeddingServer LOG]: Batch size: 5, Sequence length: 256\n", "[EmbeddingServer LOG]: Time taken for transfer to device (batch): 0.000087 seconds\n", "[EmbeddingServer LOG]: Time taken for embedding (batch): 0.008858 seconds\n", "[EmbeddingServer LOG]: Time taken for mean pooling (batch): 0.000153 seconds\n", "[EmbeddingServer LOG]: INFO: Serialize time: 0.000052 seconds\n", "[EmbeddingServer LOG]: INFO: ZMQ E2E time: 0.010886 seconds\n", "reserve ratio: Score: -0.481 - C++ is a powerful programming language1\n", "Graph traversal completed, hops: 3\n", "\n", "Score: -1.049 - Java is a powerful programming language\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "[EmbeddingServer LOG]: INFO: ZMQ socket timeout, continuing to listen\n", "[EmbeddingServer LOG]: INFO: ZMQ socket timeout, continuing to listen\n", "[EmbeddingServer LOG]: INFO: ZMQ socket timeout, continuing to listen\n", "[EmbeddingServer LOG]: INFO: ZMQ socket timeout, continuing to listen\n", "[EmbeddingServer LOG]: INFO: ZMQ socket timeout, continuing to listen\n", "[EmbeddingServer LOG]: INFO: ZMQ socket timeout, continuing to listen\n", "[EmbeddingServer LOG]: INFO: ZMQ socket timeout, continuing to listen\n", "[EmbeddingServer LOG]: INFO: ZMQ socket timeout, continuing to listen\n", "[EmbeddingServer LOG]: INFO: ZMQ socket timeout, continuing to listen\n", "[EmbeddingServer LOG]: INFO: ZMQ socket timeout, continuing to listen\n" ] } ], "source": [ "from leann.api import LeannBuilder, LeannSearcher\n", "import leann_backend_diskann\n", "# 1. Build index (no embeddings stored!)\n", "builder = LeannBuilder(backend_name=\"diskann\")\n", "builder.add_text(\"Python is a powerful programming language\")\n", "builder.add_text(\"Machine learning transforms industries\") \n", "builder.add_text(\"Neural networks process complex data\")\n", "builder.add_text(\"Java is a powerful programming language\")\n", "builder.add_text(\"C++ is a powerful programming language\")\n", "builder.add_text(\"C# is a powerful programming language\")\n", "builder.build_index(\"knowledge.leann\")\n", "\n", "# 2. Search with real-time embeddings\n", "searcher = LeannSearcher(\"knowledge.leann\")\n", "results = searcher.search(\"C++ programming languages\", top_k=2,recompute_beighbor_embeddings=True)\n", "\n", "for result in results:\n", " print(f\"Score: {result['score']:.3f} - {result['text']}\")" ] } ], "metadata": { "kernelspec": { "display_name": ".venv", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.11" } }, "nbformat": 4, "nbformat_minor": 2 }