commit 46f6cc100b31aace55bba1d94d168097e12947fa Author: yichuan520030910320 Date: Mon Jun 30 09:05:05 2025 +0000 Initial commit diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..898ed5c --- /dev/null +++ b/.gitattributes @@ -0,0 +1 @@ +paper_plot/data/big_graph_degree_data.npz filter=lfs diff=lfs merge=lfs -text diff --git a/.gitignore b/.gitignore new file mode 100755 index 0000000..0ad5a54 --- /dev/null +++ b/.gitignore @@ -0,0 +1,72 @@ +raw_data/ +scaling_out/ +scaling_out_old/ +sanity_check/ +demo/indices/ +# .vscode/ +*.log +*pycache* +outputs/ +*.pkl +.history/ +scripts/ +lm_eval.egg-info/ +demo/experiment_results/**/*.json +*.jsonl +*.sh +*.txt +!CMakeLists.txt +latency_breakdown*.json +experiment_results/eval_results/diskann/*.json +aws/ +.venv/ +.cursor/rules/ +*.egg-info/ +skip_reorder_comparison/ +analysis_results/ +build/ +.cache/ +nprobe_logs/ +micro/results +micro/contriever-INT8 +*.qdstrm +benchmark_results/ +results/ +frac_*.png +final_in_*.png +embedding_comparison_results/ +*.ind +*.gz +*.fvecs +*.ivecs +*.index +*.bin + +read_graph +analyze_diskann_graph +degree_distribution.png +micro/degree_distribution.png + +policy_results_* +results_*/ +experiment_results/ +.DS_Store + +# The above are inherited from old Power RAG repo + +# Python-generated files +__pycache__/ +*.py[oc] +build/ +dist/ +wheels/ +*.egg-info + +# Virtual environments +.venv +.env + +test_indices*/ +test_*.py +!tests/** +packages/leann-backend-diskann/third_party/DiskANN/_deps/ diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 0000000..8c49b3e --- /dev/null +++ b/.gitmodules @@ -0,0 +1,6 @@ +[submodule "packages/leann-backend-diskann/third_party/DiskANN"] + path = packages/leann-backend-diskann/third_party/DiskANN + url = https://github.com/yichuan520030910320/DiskANN.git +[submodule "packages/leann-backend-hnsw/third_party/faiss"] + path = packages/leann-backend-hnsw/third_party/faiss + url = https://github.com/yichuan520030910320/faiss.git diff --git a/.python-version b/.python-version new file mode 100644 index 0000000..2c07333 --- /dev/null +++ b/.python-version @@ -0,0 +1 @@ +3.11 diff --git a/.vscode/extensions.json b/.vscode/extensions.json new file mode 100644 index 0000000..e793901 --- /dev/null +++ b/.vscode/extensions.json @@ -0,0 +1,9 @@ +{ + "recommendations": [ + "llvm-vs-code-extensions.vscode-clangd", + "ms-python.python", + "ms-vscode.cmake-tools", + "vadimcn.vscode-lldb", + "eamodio.gitlens", + ] +} \ No newline at end of file diff --git a/.vscode/launch.json b/.vscode/launch.json new file mode 100755 index 0000000..5f96fb2 --- /dev/null +++ b/.vscode/launch.json @@ -0,0 +1,283 @@ +{ + // Use IntelliSense to learn about possible attributes. + // Hover to view descriptions of existing attributes. + // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387 + "version": "0.2.0", + "configurations": [ + // new emdedder + { + "name": "New Embedder", + "type": "debugpy", + "request": "launch", + "program": "demo/main.py", + "console": "integratedTerminal", + "args": [ + "--search", + "--use-original", + "--domain", + "dpr", + "--nprobe", + "5000", + "--load", + "flat", + "--embedder", + "intfloat/multilingual-e5-small" + ] + } + //python /home/ubuntu/Power-RAG/faiss/demo/simple_build.py + { + "name": "main.py", + "type": "debugpy", + "request": "launch", + "program": "demo/main.py", + "console": "integratedTerminal", + "cwd": "${workspaceFolder}", + "args": [ + "--query", + "1000", + "--load", + "bm25" + ] + }, + { + "name": "Simple Build", + "type": "lldb", + "request": "launch", + "program": "${workspaceFolder}/.venv/bin/python", + "console": "integratedTerminal", + "cwd": "${workspaceFolder}", + "args": [ + "faiss/demo/simple_build.py" + ], + "env": { + "LD_PRELOAD": "/lib/x86_64-linux-gnu/libmkl_core.so:/lib/x86_64-linux-gnu/libmkl_intel_thread.so:/lib/x86_64-linux-gnu/libmkl_intel_lp64.so:/lib/x86_64-linux-gnu/libiomp5.so" + } + }, + //# Fix for Intel MKL error + //export LD_PRELOAD=/lib/x86_64-linux-gnu/libmkl_core.so:/lib/x86_64-linux-gnu/libmkl_intel_thread.so:/lib/x86_64-linux-gnu/libmkl_intel_lp64.so:/lib/x86_64-linux-gnu/libiomp5.so + //python faiss/demo/build_demo.py + { + "name": "Build Demo", + "type": "lldb", + "request": "launch", + "program": "${workspaceFolder}/.venv/bin/python", + "console": "integratedTerminal", + "cwd": "${workspaceFolder}", + "args": [ + "faiss/demo/build_demo.py" + ], + "env": { + "LD_PRELOAD": "/lib/x86_64-linux-gnu/libmkl_core.so:/lib/x86_64-linux-gnu/libmkl_intel_thread.so:/lib/x86_64-linux-gnu/libmkl_intel_lp64.so:/lib/x86_64-linux-gnu/libiomp5.so" + } + }, + { + "name": "DiskANN Serve", + "type": "lldb", + "request": "launch", + "program": "${workspaceFolder}/.venv/bin/python", + "console": "integratedTerminal", + "cwd": "${workspaceFolder}", + "args": [ + "demo/main.py", + "--mode", + "serve", + "--engine", + "sglang", + "--load-indices", + "diskann", + "--domain", + "rpj_wiki", + "--lazy-load", + "--recompute-beighbor-embeddings", + "--port", + "8082", + "--diskann-search-memory-maximum", + "2", + "--diskann-graph", + "240", + "--search-only" + ], + "env": { + "PYTHONPATH": "${workspaceFolder}/faiss_repo/build/faiss/python:$PYTHONPATH" + }, + "preLaunchTask": "CMake: build", + }, + { + "name": "DiskANN Serve MAC", + "type": "lldb", + "request": "launch", + "program": "${workspaceFolder}/.venv/bin/python", + "console": "integratedTerminal", + "cwd": "${workspaceFolder}", + "args": [ + "demo/main.py", + "--mode", + "serve", + "--engine", + "ollama", + "--load-indices", + "diskann", + "--domain", + "rpj_wiki", + "--lazy-load", + "--recompute-beighbor-embeddings" + ], + "preLaunchTask": "CMake: build", + "env": { + "KMP_DUPLICATE_LIB_OK": "TRUE", + "OMP_NUM_THREADS": "1", + "MKL_NUM_THREADS": "1", + "DYLD_INSERT_LIBRARIES": "/Users/ec2-user/Power-RAG/.venv/lib/python3.10/site-packages/torch/lib/libomp.dylib", + "KMP_BLOCKTIME": "0" + } + }, + { + "name": "Python Debugger: Current File with Arguments", + "type": "debugpy", + "request": "launch", + "program": "ric/main_ric.py", + "console": "integratedTerminal", + "cwd": "${workspaceFolder}", + "args": [ + "--config-name", + "${input:configSelection}" + ], + "justMyCode": false + }, + //python ./demo/validate_equivalence.py sglang + { + "name": "Validate Equivalence", + "type": "debugpy", + "request": "launch", + "program": "demo/validate_equivalence.py", + "console": "integratedTerminal", + "args": [ + "sglang" + ], + }, + //python demo/retrieval_demo.py --engine sglang --skip-embeddings --domain dpr --load-indices flat ivf_flat + { + "name": "Retrieval Demo", + "type": "debugpy", + "request": "launch", + "program": "demo/retrieval_demo.py", + "console": "integratedTerminal", + "args": [ + "--engine", + "vllm", + "--skip-embeddings", + "--domain", + "dpr", + "--load-indices", + // "flat", + "ivf_flat" + ], + }, + //python demo/retrieval_demo.py --engine sglang --skip-embeddings --domain dpr --load-indices diskann --hnsw-M 64 --hnsw-efConstruction 150 --hnsw-efSearch 128 --hnsw-sq-bits 8 + { + "name": "Retrieval Demo DiskANN", + "type": "debugpy", + "request": "launch", + "program": "demo/retrieval_demo.py", + "console": "integratedTerminal", + "args": [ + "--engine", + "sglang", + "--skip-embeddings", + "--domain", + "dpr", + "--load-indices", + "diskann", + "--hnsw-M", + "64", + "--hnsw-efConstruction", + "150", + "--hnsw-efSearch", + "128", + "--hnsw-sq-bits", + "8" + ], + }, + { + "name": "Find Probe", + "type": "debugpy", + "request": "launch", + "program": "find_probe.py", + "console": "integratedTerminal", + "cwd": "${workspaceFolder}", + }, + { + "name": "Python: Attach", + "type": "debugpy", + "request": "attach", + "processId": "${command:pickProcess}", + "justMyCode": true + }, + { + "name": "Edge RAG", + "type": "lldb", + "request": "launch", + "program": "${workspaceFolder}/.venv/bin/python", + "console": "integratedTerminal", + "cwd": "${workspaceFolder}", + "args": [ + "edgerag_demo.py" + ], + "env": { + "LD_PRELOAD": "/lib/x86_64-linux-gnu/libiomp5.so /lib/x86_64-linux-gnu/libmkl_core.so /lib/x86_64-linux-gnu/libmkl_intel_lp64.so /lib/x86_64-linux-gnu/libmkl_intel_thread.so", + "MKL_NUM_THREADS": "1", + "OMP_NUM_THREADS": "1", + } + }, + { + "name": "Launch Embedding Server", + "type": "debugpy", + "request": "launch", + "program": "demo/embedding_server.py", + "console": "integratedTerminal", + "cwd": "${workspaceFolder}", + "args": [ + "--domain", + "rpj_wiki", + "--zmq-port", + "5556", + ] + }, + { + "name": "HNSW Serve", + "type": "lldb", + "request": "launch", + "program": "${workspaceFolder}/.venv/bin/python", + "console": "integratedTerminal", + "cwd": "${workspaceFolder}", + "args": [ + "demo/main.py", + "--domain", + "rpj_wiki", + "--load", + "hnsw", + "--mode", + "serve", + "--search", + "--skip-pa", + "--recompute", + "--hnsw-old" + ], + "env": { + "LD_PRELOAD": "/lib/x86_64-linux-gnu/libmkl_core.so:/lib/x86_64-linux-gnu/libmkl_intel_thread.so:/lib/x86_64-linux-gnu/libmkl_intel_lp64.so:/lib/x86_64-linux-gnu/libiomp5.so" + } + }, + ], + "inputs": [ + { + "id": "configSelection", + "type": "pickString", + "description": "Select a configuration", + "options": [ + "example_config", + "vllm_gritlm" + ], + "default": "example_config" + } + ], +} \ No newline at end of file diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100755 index 0000000..e2b5794 --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,43 @@ +{ + "python.analysis.extraPaths": [ + "./sglang_repo/python" + ], + "cmake.sourceDirectory": "${workspaceFolder}/DiskANN", + "cmake.configureArgs": [ + "-DPYBIND=True", + "-DUPDATE_EDITABLE_INSTALL=ON", + ], + "cmake.environment": { + "PATH": "/Users/ec2-user/Power-RAG/.venv/bin:${env:PATH}" + }, + "cmake.buildDirectory": "${workspaceFolder}/build", + "files.associations": { + "*.tcc": "cpp", + "deque": "cpp", + "string": "cpp", + "unordered_map": "cpp", + "vector": "cpp", + "map": "cpp", + "unordered_set": "cpp", + "atomic": "cpp", + "inplace_vector": "cpp", + "*.ipp": "cpp", + "forward_list": "cpp", + "list": "cpp", + "any": "cpp", + "system_error": "cpp", + "__hash_table": "cpp", + "__split_buffer": "cpp", + "__tree": "cpp", + "ios": "cpp", + "set": "cpp", + "__string": "cpp", + "string_view": "cpp", + "ranges": "cpp", + "iosfwd": "cpp" + }, + "lldb.displayFormat": "auto", + "lldb.showDisassembly": "auto", + "lldb.dereferencePointers": true, + "lldb.consoleMode": "commands", +} \ No newline at end of file diff --git a/.vscode/tasks.json b/.vscode/tasks.json new file mode 100644 index 0000000..4edc48b --- /dev/null +++ b/.vscode/tasks.json @@ -0,0 +1,16 @@ +{ + "version": "2.0.0", + "tasks": [ + { + "type": "cmake", + "label": "CMake: build", + "command": "build", + "targets": [ + "all" + ], + "group": "build", + "problemMatcher": [], + "detail": "CMake template build task" + } + ] +} \ No newline at end of file diff --git a/LICENSE b/LICENSE new file mode 100755 index 0000000..0419215 --- /dev/null +++ b/LICENSE @@ -0,0 +1,9 @@ +MIT License + +Copyright (c) 2024 Rulin Shao + +Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/README.md b/README.md new file mode 100755 index 0000000..5f85426 --- /dev/null +++ b/README.md @@ -0,0 +1,292 @@ +# 🚀 LEANN: A Low-Storage Vector Index + +

+ Python 3.9+ + MIT License + PRs Welcome + Platform +

+ +

+ ⚡ Real-time embedding computation for large-scale RAG on consumer hardware +

+ +

+ Quick Start • + Features • + Benchmarks • + Documentation • + Paper +

+ +--- + +## 🌟 What is Leann? + +**Leann** revolutionizes Retrieval-Augmented Generation (RAG) by eliminating the storage bottleneck of traditional vector databases. Instead of pre-computing and storing billions of embeddings, Leann dynamically computes embeddings at query time using highly optimized graph-based search algorithms. + +### 🎯 Why Leann? + +Traditional RAG systems face a fundamental trade-off: +- **💾 Storage**: Storing embeddings for millions of documents requires massive disk space +- **🔄 Freshness**: Pre-computed embeddings become stale when documents change +- **💰 Cost**: Vector databases are expensive to scale + +**Leann solves this by:** +- ✅ **Zero embedding storage** - Only graph structure is persisted +- ✅ **Real-time computation** - Embeddings computed on-demand with ms latency +- ✅ **Memory efficient** - Runs on consumer hardware (8GB RAM) +- ✅ **Always fresh** - No stale embeddings, ever + +## 🚀 Quick Start + +### Installation + +```bash +git clone https://github.com/yichuan520030910320/Power-RAG.git leann +cd leann +uv sync +``` + +### 30-Second Example + +```python +from leann.api import LeannBuilder, LeannSearcher + +# 1. Build index (no embeddings stored!) +builder = LeannBuilder(backend_name="diskann") +builder.add_text("Python is a powerful programming language") +builder.add_text("Machine learning transforms industries") +builder.add_text("Neural networks process complex data") +builder.build_index("knowledge.leann") + +# 2. Search with real-time embeddings +searcher = LeannSearcher("knowledge.leann") +results = searcher.search("programming languages", top_k=2) + +for result in results: + print(f"Score: {result['score']:.3f} - {result['text']}") +``` + +### Run the Demo + +```bash +uv run examples/document_search.py +``` + +**PDF RAG Demo (using LlamaIndex for document parsing and Leann for indexing/search)** + +This demo showcases how to build a RAG system for PDF documents using Leann. +1. Place your PDF files (and other supported formats like .docx, .pptx, .xlsx) into the `examples/data/` directory. +2. Ensure you have an `OPENAI_API_KEY` set in your environment variables or in a `.env` file for the LLM to function. + +```bash +uv run examples/main_cli_example.py +``` + +## ✨ Features + +### 🔥 Core Features +- **📊 Multiple Distance Functions**: L2, Cosine, MIPS (Maximum Inner Product Search) +- **🏗️ Pluggable Backends**: DiskANN, HNSW/FAISS with unified API +- **🔄 Real-time Embeddings**: Dynamic computation using optimized ZMQ servers +- **📈 Scalable Architecture**: Handles millions of documents on consumer hardware +- **🎯 Graph Pruning**: Advanced techniques for memory-efficient search + +### 🛠️ Technical Highlights +- **Zero-copy operations** for maximum performance +- **SIMD-optimized** distance computations (AVX2/AVX512) +- **Async embedding pipeline** with batched processing +- **Memory-mapped indices** for fast startup +- **Recompute mode** for highest accuracy scenarios + +### 🎨 Developer Experience +- **Simple Python API** - Get started in minutes +- **Extensible backend system** - Easy to add new algorithms +- **Comprehensive examples** - From basic usage to production deployment +- **Rich debugging tools** - Built-in performance profiling + +## 📊 Benchmarks + +### Memory Usage Comparison + +| System | 1M Documents | 10M Documents | 100M Documents | +|--------|-------------|---------------|----------------| +| Traditional Vector DB | 3.1 GB | 31 GB | 310 GB | +| **Leann** | **180 MB** | **1.2 GB** | **8.4 GB** | +| **Reduction** | **94.2%** | **96.1%** | **97.3%** | + +### Query Performance + +| Backend | Index Size | Query Time | Recall@10 | +|---------|------------|------------|-----------| +| DiskANN | 1M docs | 12ms | 0.95 | +| DiskANN + Recompute | 1M docs | 145ms | 0.98 | +| HNSW | 1M docs | 8ms | 0.93 | + +*Benchmarks run on AMD Ryzen 7 with 32GB RAM* + +## 🏗️ Architecture + +``` +┌─────────────────┐ ┌──────────────────┐ ┌─────────────────┐ +│ Query Text │───▶│ Embedding │───▶│ Graph-based │ +│ │ │ Computation │ │ Search │ +└─────────────────┘ └──────────────────┘ └─────────────────┘ + │ │ + ▼ ▼ + ┌──────────────┐ ┌──────────────┐ + │ ZMQ Server │ │ Pruned Graph │ + │ (Cached) │ │ Index │ + └──────────────┘ └──────────────┘ +``` + +### Key Components + +1. **🧠 Embedding Engine**: Real-time transformer inference with caching +2. **📊 Graph Index**: Memory-efficient navigation structures +3. **🔄 Search Coordinator**: Orchestrates embedding + graph search +4. **⚡ Backend Adapters**: Pluggable algorithm implementations + +## 🎓 Supported Models & Backends + +### 🤖 Embedding Models +- **sentence-transformers/all-mpnet-base-v2** (default) +- **sentence-transformers/all-MiniLM-L6-v2** (lightweight) +- Any HuggingFace sentence-transformer model +- Custom model support via API + +### 🔧 Search Backends +- **DiskANN**: Microsoft's billion-scale ANN algorithm +- **HNSW**: Hierarchical Navigable Small World graphs +- **Coming soon**: ScaNN, Faiss-IVF, NGT + +### 📏 Distance Functions +- **L2**: Euclidean distance for precise similarity +- **Cosine**: Angular similarity for normalized vectors +- **MIPS**: Maximum Inner Product Search for recommendation systems + +## 🔬 Paper + +If you find Leann useful, please cite: + +**[LEANN: A Low-Storage Vector Index](https://arxiv.org/abs/2506.08276)** + +```bibtex +@misc{wang2025leannlowstoragevectorindex, + title={LEANN: A Low-Storage Vector Index}, + author={Yichuan Wang and Shu Liu and Zhifei Li and Yongji Wu and Ziming Mao and Yilong Zhao and Xiao Yan and Zhiying Xu and Yang Zhou and Ion Stoica and Sewon Min and Matei Zaharia and Joseph E. Gonzalez}, + year={2025}, + eprint={2506.08276}, + archivePrefix={arXiv}, + primaryClass={cs.DB}, + url={https://arxiv.org/abs/2506.08276}, +} +``` + +## 🌍 Use Cases + +### 💼 Enterprise RAG +```python +# Handle millions of documents with limited resources +builder = LeannBuilder( + backend_name="diskann", + distance_metric="cosine", + graph_degree=64, + memory_budget="4GB" +) +``` + +### 🔬 Research & Experimentation +```python +# Quick prototyping with different algorithms +for backend in ["diskann", "hnsw"]: + searcher = LeannSearcher(index_path, backend=backend) + evaluate_recall(searcher, queries, ground_truth) +``` + +### 🚀 Real-time Applications +```python +# Sub-second response times +chat = LeannChat("knowledge.leann") +response = chat.ask("What is quantum computing?") +# Returns in <100ms with recompute mode +``` + +## 🤝 Contributing + +We welcome contributions! Leann is built by the community, for the community. + +### Ways to Contribute +- 🐛 **Bug Reports**: Found an issue? Let us know! +- 💡 **Feature Requests**: Have an idea? We'd love to hear it! +- 🔧 **Code Contributions**: PRs welcome for all skill levels +- 📖 **Documentation**: Help make Leann more accessible +- 🧪 **Benchmarks**: Share your performance results + +### Development Setup +```bash +git clone https://github.com/yourname/leann +cd leann +uv sync --dev +uv run pytest tests/ +``` + +### Quick Tests +```bash +# Sanity check all distance functions +uv run python tests/sanity_checks/test_distance_functions.py + +# Verify L2 implementation +uv run python tests/sanity_checks/test_l2_verification.py +``` + +## 📈 Roadmap + +### 🎯 Q1 2024 +- [x] DiskANN backend with MIPS/L2/Cosine support +- [x] HNSW backend integration +- [x] Real-time embedding pipeline +- [x] Memory-efficient graph pruning + +### 🚀 Q2 2024 +- [ ] Distributed search across multiple nodes +- [ ] ScaNN backend support +- [ ] Advanced caching strategies +- [ ] Kubernetes deployment guides + +### 🌟 Q3 2024 +- [ ] GPU-accelerated embedding computation +- [ ] Approximate distance functions +- [ ] Integration with LangChain/LlamaIndex +- [ ] Visual similarity search + +## 💬 Community + +Join our growing community of researchers and engineers! + +- 🐦 **Twitter**: [@LeannAI](https://twitter.com/LeannAI) +- 💬 **Discord**: [Join our server](https://discord.gg/leann) +- 📧 **Email**: leann@yourcompany.com +- 🐙 **GitHub Discussions**: [Ask questions here](https://github.com/yourname/leann/discussions) + +## 📄 License + +MIT License - see [LICENSE](LICENSE) for details. + +## 🙏 Acknowledgments + +- **Microsoft Research** for the DiskANN algorithm +- **Meta AI** for FAISS and optimization insights +- **HuggingFace** for the transformer ecosystem +- **Our amazing contributors** who make this possible + +--- + +

+ ⭐ Star us on GitHub if Leann is useful for your research or applications! +

+ +

+ Made with ❤️ by the Leann team +

\ No newline at end of file diff --git a/demo.ipynb b/demo.ipynb new file mode 100644 index 0000000..a27c93b --- /dev/null +++ b/demo.ipynb @@ -0,0 +1,248 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "INFO: LeannBuilder initialized with 'diskann' backend.\n", + "INFO: Computing embeddings for 6 chunks using 'sentence-transformers/all-mpnet-base-v2'...\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Batches: 100%|██████████| 1/1 [00:00<00:00, 77.61it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "INFO: Building DiskANN index for 6 vectors with metric Metric.INNER_PRODUCT...\n", + "Using Inner Product search, so need to pre-process base data into temp file. Please ensure there is additional (n*(d+1)*4) bytes for storing pre-processed base vectors, apart from the interim indices created by DiskANN and the final index.\n", + "Pre-processing base file by adding extra coordinate\n", + "✅ DiskANN index built successfully at 'knowledge'\n", + "Writing bin: knowledge_disk.index_max_base_norm.bin\n", + "bin: #pts = 1, #dims = 1, size = 12B\n", + "Finished writing bin.\n", + "Time for preprocessing data for inner product: 0.000165 seconds\n", + "Reading max_norm_of_base from knowledge_disk.index_max_base_norm.bin\n", + "Reading bin file knowledge_disk.index_max_base_norm.bin ...\n", + "Opening bin file knowledge_disk.index_max_base_norm.bin... \n", + "Metadata: #pts = 1, #dims = 1...\n", + "done.\n", + "max_norm_of_base: 1\n", + "! Using prepped_base file at knowledge_prepped_base.bin\n", + "Starting index build: R=32 L=64 Query RAM budget: 4.02653e+09 Indexing ram budget: 8 T: 8\n", + "getting bin metadata\n", + "Time for getting bin metadata: 0.000008 seconds\n", + "Compressing 769-dimensional data into 512 bytes per vector.\n", + "Opened: knowledge_prepped_base.bin, size: 18464, cache_size: 18464\n", + "Training data with 6 samples loaded.\n", + "Reading bin file knowledge_pq_pivots.bin ...\n", + "Opening bin file knowledge_pq_pivots.bin... \n", + "Metadata: #pts = 256, #dims = 769...\n", + "done.\n", + "PQ pivot file exists. Not generating again\n", + "Opened: knowledge_prepped_base.bin, size: 18464, cache_size: 18464\n", + "Reading bin file knowledge_pq_pivots.bin ...\n", + "Opening bin file knowledge_pq_pivots.bin... \n", + "Metadata: #pts = 4, #dims = 1...\n", + "done.\n", + "Reading bin file knowledge_pq_pivots.bin ...\n", + "Opening bin file knowledge_pq_pivots.bin... \n", + "Metadata: #pts = 256, #dims = 769...\n", + "done.\n", + "Reading bin file knowledge_pq_pivots.bin ...\n", + "Opening bin file knowledge_pq_pivots.bin... \n", + "Metadata: #pts = 769, #dims = 1...\n", + "done.\n", + "Reading bin file knowledge_pq_pivots.bin ...\n", + "Opening bin file knowledge_pq_pivots.bin... \n", + "Metadata: #pts = 513, #dims = 1...\n", + "done.\n", + "Loaded PQ pivot information\n", + "Processing points [0, 6)...done.\n", + "Time for generating quantized data: 0.023918 seconds\n", + "Full index fits in RAM budget, should consume at most 2.03973e-05GiBs, so building in one shot\n", + "L2: Using AVX2 distance computation DistanceL2Float\n", + "Passed, empty search_params while creating index config\n", + "Using only first 6 from file.. \n", + "Starting index build with 6 points... \n", + "0% of index build completed.Starting final cleanup..done. Link time: 9e-05s\n", + "Index built with degree: max:5 avg:5 min:5 count(deg<2):0\n", + "Not saving tags as they are not enabled.\n", + "Time taken for save: 0.000178s.\n", + "Time for building merged vamana index: 0.000579 seconds\n", + "Opened: knowledge_prepped_base.bin, size: 18464, cache_size: 18464\n", + "Vamana index file size=168\n", + "Opened: knowledge_disk.index, cache_size: 67108864\n", + "medoid: 0B\n", + "max_node_len: 3100B\n", + "nnodes_per_sector: 1B\n", + "# sectors: 6\n", + "Sector #0written\n", + "Finished writing 28672B\n", + "Writing bin: knowledge_disk.index\n", + "bin: #pts = 9, #dims = 1, size = 80B\n", + "Finished writing bin.\n", + "Output disk index file written to knowledge_disk.index\n", + "Finished writing 28672B\n", + "Time for generating disk layout: 0.043488 seconds\n", + "Opened: knowledge_prepped_base.bin, size: 18464, cache_size: 18464\n", + "Loading base knowledge_prepped_base.bin. #points: 6. #dim: 769.\n", + "Wrote 1 points to sample file: knowledge_sample_data.bin\n", + "Indexing time: 0.0684344\n", + "INFO: Leann metadata saved to knowledge.leann.meta.json\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "Opened file : knowledge_disk.index\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Since data is floating point, we assume that it has been appropriately pre-processed (normalization for cosine, and convert-to-l2 by adding extra dimension for MIPS). So we shall invoke an l2 distance function.\n", + "L2: Using AVX2 distance computation DistanceL2Float\n", + "L2: Using AVX2 distance computation DistanceL2Float\n", + "Before index load\n", + "✅ DiskANN index loaded successfully.\n", + "INFO: LeannSearcher initialized with 'diskann' backend using index 'knowledge.leann'.\n", + "Reading bin file knowledge_pq_compressed.bin ...\n", + "Opening bin file knowledge_pq_compressed.bin... \n", + "Metadata: #pts = 6, #dims = 512...\n", + "done.\n", + "Reading bin file knowledge_pq_pivots.bin ...\n", + "Opening bin file knowledge_pq_pivots.bin... \n", + "Metadata: #pts = 4, #dims = 1...\n", + "done.\n", + "Offsets: 4096 791560 794644 796704\n", + "Reading bin file knowledge_pq_pivots.bin ...\n", + "Opening bin file knowledge_pq_pivots.bin... \n", + "Metadata: #pts = 256, #dims = 769...\n", + "done.\n", + "Reading bin file knowledge_pq_pivots.bin ...\n", + "Opening bin file knowledge_pq_pivots.bin... \n", + "Metadata: #pts = 769, #dims = 1...\n", + "done.\n", + "Reading bin file knowledge_pq_pivots.bin ...\n", + "Opening bin file knowledge_pq_pivots.bin... \n", + "Metadata: #pts = 513, #dims = 1...\n", + "done.\n", + "Loaded PQ Pivots: #ctrs: 256, #dims: 769, #chunks: 512\n", + "Loaded PQ centroids and in-memory compressed vectors. #points: 6 #dim: 769 #aligned_dim: 776 #chunks: 512\n", + "Loading index metadata from knowledge_disk.index\n", + "Disk-Index File Meta-data: # nodes per sector: 1, max node len (bytes): 3100, max node degree: 5\n", + "Disk-Index Meta: nodes per sector: 1, max node len: 3100, max node degree: 5\n", + "Setting up thread-specific contexts for nthreads: 8\n", + "allocating ctx: 0x78348f4de000 to thread-id:132170359560000\n", + "allocating ctx: 0x78348f4cd000 to thread-id:132158431693760\n", + "allocating ctx: 0x78348f4bc000 to thread-id:132158442179392\n", + "allocating ctx: 0x78348f4ab000 to thread-id:132158421208128\n", + "allocating ctx: 0x78348f49a000 to thread-id:132158452665024\n", + "allocating ctx: 0x78348f489000 to thread-id:132158389751232\n", + "allocating ctx: 0x78348f478000 to thread-id:132158410722496\n", + "allocating ctx: 0x78348f467000 to thread-id:132158400236864\n", + "Loading centroid data from medoids vector data of 1 medoid(s)\n", + "Reading bin file knowledge_disk.index_max_base_norm.bin ...\n", + "Opening bin file knowledge_disk.index_max_base_norm.bin... \n", + "Metadata: #pts = 1, #dims = 1...\n", + "done.\n", + "Setting re-scaling factor of base vectors to 1\n", + "load_from_separate_paths done.\n", + "Reading (with alignment) bin file knowledge_sample_data.bin ...Metadata: #pts = 1, #dims = 769, aligned_dim = 776... allocating aligned memory of 3104 bytes... done. Copying data to mem_aligned buffer... done.\n", + "reserve ratio: 1\n", + "Graph traversal completed, hops: 3\n", + "Loading the cache list into memory....done.\n", + "After index load\n", + "Clearing scratch\n", + "INFO: Computing embeddings for 1 chunks using 'sentence-transformers/all-mpnet-base-v2'...\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Batches: 100%|██████████| 1/1 [00:00<00:00, 92.66it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Score: -0.481 - C++ is a powerful programming language\n", + "Score: -1.049 - Java is a powerful programming language\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "reserve ratio: 1\n", + "Graph traversal completed, hops: 3\n" + ] + } + ], + "source": [ + "from leann.api import LeannBuilder, LeannSearcher\n", + "import leann_backend_diskann\n", + "# 1. Build index (no embeddings stored!)\n", + "builder = LeannBuilder(backend_name=\"diskann\")\n", + "builder.add_text(\"Python is a powerful programming language\")\n", + "builder.add_text(\"Machine learning transforms industries\") \n", + "builder.add_text(\"Neural networks process complex data\")\n", + "builder.add_text(\"Java is a powerful programming language\")\n", + "builder.add_text(\"C++ is a powerful programming language\")\n", + "builder.add_text(\"C# is a powerful programming language\")\n", + "builder.build_index(\"knowledge.leann\")\n", + "\n", + "# 2. Search with real-time embeddings\n", + "searcher = LeannSearcher(\"knowledge.leann\")\n", + "results = searcher.search(\"C++ programming languages\", top_k=2)\n", + "\n", + "for result in results:\n", + " print(f\"Score: {result['score']:.3f} - {result['text']}\")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": ".venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.11" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/examples/data/2506.08276v1.pdf b/examples/data/2506.08276v1.pdf new file mode 100644 index 0000000..2756eef --- /dev/null +++ b/examples/data/2506.08276v1.pdf @@ -0,0 +1,7905 @@ +%PDF-1.5 +% +1 0 obj +<< /Lang (en) /Metadata 3 0 R /Names 4 0 R /OpenAction 5 0 R /Outlines 6 0 R /PageMode /UseOutlines /Pages 7 0 R /Type /Catalog /ViewerPreferences << /DisplayDocTitle true >> >> +endobj +2 0 obj +<< /Author (Yichuan Wang; Shu Liu; Zhifei Li; Yongji Wu; Ziming Mao; Yilong Zhao; Xiao Yan; Zhiying Xu; Yang Zhou; Ion Stoica; Sewon Min; Matei Zaharia; Joseph E. Gonzalez) /CreationDate (D:20250611152430+00'00') /Creator (arXiv GenPDF \(tex2pdf:\)) /DOI (https://doi.org/10.48550/arXiv.2506.08276) /Keywords () /License (http://arxiv.org/licenses/nonexclusive-distrib/1.0/) /ModDate (D:20250611152430+00'00') /PTEX.Fullbanner (This is pdfTeX, Version 3.141592653-2.6-1.40.25 \(TeX Live 2023\) kpathsea version 6.3.5) /Producer (pikepdf 8.15.1) /Title (LEANN: A Low-Storage Vector Index) /Trapped /False /arXivID (https://arxiv.org/abs/2506.08276v1) >> +endobj +3 0 obj +<< /Subtype /XML /Type /Metadata /Length 13436 >> +stream + + + + + + + + Adobe PDF Schema + pdf + http://ns.adobe.com/pdf/1.3/ + + + + Trapped + Text + internal + Indication if the document has been modified to include trapping information + + + + + + XMP Media Management Schema + xmpMM + http://ns.adobe.com/xap/1.0/mm/ + + + + DocumentID + URI + internal + UUID based identifier for all versions and renditions of a document + + + InstanceID + URI + internal + UUID based identifier for specific incarnation of a document + + + VersionID + Text + internal + Document version identifier + + + RenditionClass + RenditionClass + internal + The manner in which a document is rendered + + + + + + PRISM Basic Metadata + prism + http://prismstandard.org/namespaces/basic/3.0/ + + + + complianceProfile + Text + internal + PRISM specification compliance profile to which this document adheres + + + publicationName + Text + external + Publication name + + + aggregationType + Text + external + Publication type + + + bookEdition + Text + external + Edition of the book in which the document was published + + + volume + Text + external + Publication volume number + + + number + Text + external + Publication issue number within a volume + + + pageRange + Text + external + Page range for the document within the print version of its publication + + + issn + Text + external + ISSN for the printed publication in which the document was published + + + eIssn + Text + external + ISSN for the electronic publication in which the document was published + + + isbn + Text + external + ISBN for the publication in which the document was published + + + doi + Text + external + Digital Object Identifier for the document + + + url + URL + external + URL at which the document can be found + + + byteCount + Integer + internal + Approximate file size in octets + + + pageCount + Integer + internal + Number of pages in the print version of the document + + + subtitle + Text + external + Document's subtitle + + + + + + + pikepdf 8.15.1 + + 1.5 + application/pdf + + LEANN: A Low-Storage Vector Index + arXiv + + + 2025-06-11T15:24:30Z + + + + + Text + + + + Yichuan WangShu LiuZhifei LiYongji WuZiming MaoYilong ZhaoXiao YanZhiying XuYang ZhouIon StoicaSewon MinMatei ZahariaJoseph E. Gonzalez + main.tex + + + en + + + https://arxiv.org/abs/2506.08276v1 + 2025-06-11T15:24:30Z + 2025-06-11T15:24:30Z + 2025-06-11T15:24:36.250178+00:00 + arXiv GenPDF (tex2pdf:) + uuid:75fd75f2-b182-4bbb-ac9b-620a88d7aeae + uuid:8d18d13e-fdc9-4541-91dc-efd13959bb18 + 1 + default + three + Proceedings of Make sure to enter the correct conference title from your rights confirmation email (Conference acronym 'XX) + book + 1 + 1 + XXXXXXX.XXXXXXX + 15 + 15 + + http://arxiv.org/licenses/nonexclusive-distrib/1.0/cs.DBcs.LG + + + + +endstream +endobj +4 0 obj +<< /Dests 8 0 R >> +endobj +5 0 obj +<< /D [ 9 0 R /Fit ] /S /GoTo >> +endobj +6 0 obj +<< /Count 11 /First 10 0 R /Last 11 0 R /Type /Outlines >> +endobj +7 0 obj +<< /Count 15 /Kids [ 12 0 R 13 0 R 14 0 R ] /Type /Pages >> +endobj +8 0 obj +<< /Kids [ 15 0 R 16 0 R 17 0 R 18 0 R 19 0 R ] /Limits [ (ALG@line.1) (table.caption.9) ] >> +endobj +9 0 obj +<< /Annots [ 20 0 R 21 0 R 22 0 R 23 0 R 24 0 R 25 0 R 26 0 R 27 0 R 28 0 R 29 0 R 30 0 R 31 0 R 32 0 R 33 0 R 34 0 R 35 0 R 36 0 R ] /Contents [ 37 0 R 38 0 R ] /MediaBox [ 0 0 612 792 ] /Parent 12 0 R /Resources 39 0 R /Type /Page >> +endobj +10 0 obj +<< /A 40 0 R /Next 41 0 R /Parent 6 0 R /Title 42 0 R >> +endobj +11 0 obj +<< /A 43 0 R /Parent 6 0 R /Prev 44 0 R /Title 45 0 R >> +endobj +12 0 obj +<< /Count 6 /Kids [ 9 0 R 46 0 R 47 0 R 48 0 R 49 0 R 50 0 R ] /Parent 7 0 R /Type /Pages >> +endobj +13 0 obj +<< /Count 6 /Kids [ 51 0 R 52 0 R 53 0 R 54 0 R 55 0 R 56 0 R ] /Parent 7 0 R /Type /Pages >> +endobj +14 0 obj +<< /Count 3 /Kids [ 57 0 R 58 0 R 59 0 R ] /Parent 7 0 R /Type /Pages >> +endobj +15 0 obj +<< /Kids [ 60 0 R 61 0 R 62 0 R 63 0 R 64 0 R 65 0 R ] /Limits [ (ALG@line.1) (cite.asai2023self) ] >> +endobj +16 0 obj +<< /Kids [ 66 0 R 67 0 R 68 0 R 69 0 R 70 0 R 71 0 R ] /Limits [ (cite.aumuller2020ann) (cite.munyampirwa2024down) ] >> +endobj +17 0 obj +<< /Kids [ 72 0 R 73 0 R 74 0 R 75 0 R 76 0 R 77 0 R ] /Limits [ (cite.nsg) (equation.5.1) ] >> +endobj +18 0 obj +<< /Kids [ 78 0 R 79 0 R 80 0 R 81 0 R 82 0 R 83 0 R ] /Limits [ (figure.caption.10) (section.5) ] >> +endobj +19 0 obj +<< /Kids [ 84 0 R 85 0 R 86 0 R ] /Limits [ (section.6) (table.caption.9) ] >> +endobj +20 0 obj +<< /A << /D (Hfootnote.1) /S /GoTo >> /Border [ 0 0 0 ] /C [ 1 0 0 ] /H /I /Rect [ 53.004 360.913 55.494 366.522 ] /Subtype /Link /Type /Annot >> +endobj +21 0 obj +<< /A << /D (cite.izacard2021unsupervised) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 188.421 321.946 199.679 330.2 ] /Subtype /Link /Type /Annot >> +endobj +22 0 obj +<< /A << /D (cite.lin2022pretrained) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 202.564 321.946 213.822 330.2 ] /Subtype /Link /Type /Annot >> +endobj +23 0 obj +<< /A << /D (cite.karpukhin2020dense) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 150.818 298.036 162.075 306.29 ] /Subtype /Link /Type /Annot >> +endobj +24 0 obj +<< /A << /D (cite.zamani2023conversational) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 165.19 298.036 176.448 306.29 ] /Subtype /Link /Type /Annot >> +endobj +25 0 obj +<< /A << /D (cite.craswell2020overview) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 220.739 274.215 231.997 282.379 ] /Subtype /Link /Type /Annot >> +endobj +26 0 obj +<< /A << /D (cite.zhang2018visual) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 234.667 274.125 245.925 282.379 ] /Subtype /Link /Type /Annot >> +endobj +27 0 obj +<< /A << /D (cite.he2019streaming) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 201.078 190.529 212.336 198.693 ] /Subtype /Link /Type /Annot >> +endobj +28 0 obj +<< /A << /D (cite.work-in-progress) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 215.026 190.439 226.284 198.693 ] /Subtype /Link /Type /Annot >> +endobj +29 0 obj +<< /A << /D (cite.wang2024mememo) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 228.973 190.439 240.231 198.693 ] /Subtype /Link /Type /Annot >> +endobj +30 0 obj +<< /A << /D (cite.yin2024devicers) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 242.921 190.439 254.179 198.693 ] /Subtype /Link /Type /Annot >> +endobj +31 0 obj +<< /A << /D (cite.shao2024scaling) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 75.586 154.574 86.844 162.828 ] /Subtype /Link /Type /Annot >> +endobj +32 0 obj +<< /A << /D (cite.wang2021comprehensive_survey) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 340.008 543.703 351.265 551.957 ] /Subtype /Link /Type /Annot >> +endobj +33 0 obj +<< /A << /D (cite.pq) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 394.996 507.837 406.254 516.091 ] /Subtype /Link /Type /Annot >> +endobj +34 0 obj +<< /A << /D (cite.hnsw) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 555.225 436.106 566.483 444.36 ] /Subtype /Link /Type /Annot >> +endobj +35 0 obj +<< /A << /D (cite.shao2024scaling) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 434.339 113.317 445.596 121.571 ] /Subtype /Link /Type /Annot >> +endobj +36 0 obj +<< /A << /S /URI /URI (https://arxiv.org/abs/2506.08276v1) >> /BS << /W 0 >> /NM (fitz-L0) /Rect [ 12 227.68 32 564.32 ] /Subtype /Link >> +endobj +37 0 obj +<< /Filter /FlateDecode /Length 139 >> +stream +xEM +1 9E.1Mtą ;E*.;*{L@y9v֣TaxL #koC4¢Bb\ZmE{&&=X\B鑁)VL>m`` o6& +endstream +endobj +38 0 obj +<< /Filter /FlateDecode /Length 5551 >> +stream +xڥ[[ȱ~_GOԅg^l2gݙp-0_2+@h<=KEjV_f/^++ kU{XID(*V^lT_OM~y5|_PݤzW@߼ïR\I)g~XmbJDRZ5 Ff}0:"ve64A/#u(>YGH&\$5ի͸-3C~ijE&'?ʇ+,. & Wf/;NDOa~ۏ*CʪqpXeϑ~:\]">/aZfō̈́VO]AJp -}_Xd }EziH}NgJ&}މf|p+wbF4Ϣq/揋 +ipF3zC32^+EZ gwG ka`ܷ9vd"HI8(+ [U0U(Xv6yKOH^E~=pU8>} P벦k~:veS󘶇YPfER?C_ ?i$TUQ<*wA\ˏC]E]o8m_í i&ahYcAe!uw(@4Zsz##kMrT 鄓7n)/GQ%W7;$ +In6u~Ĝ޵!;zylܗG8_M}t/7 kb\14*ႎ*/WTx#5sms:iONcW~L4mK8}(4Dc'~|ĝk5"o#Ϲ zt풔)RŜ9{<^&qCZwrnϫ, }K%{$y nHs~ +Ke #N ,oI8Nᚸ[rx$!Q双(|+NGd"2e +N&p?8GA-tvw*܍YmMס:Y +-3ZމG1Dz{:psh6#ilϥ~:a,jb˴ Nt|G@j&NIF2>6/zKU)DlK +OvFa';FM<+ڎ>.@-$ڙ*115<:I:u|{)4&D1:źmZ-X*0e=E3SR"tq\[{t i3| ;s~:L\lak8ᙣP&Z΂e%Od0^N\{xBk\K6vSed$LG GTɷb-Tqst ޶3?x``Izн).h=%r?']=VN0Q1Irc sǝנJ'[σʺ +瀞Vq5}uO{S0yA$ɖgć^ lSc#,Bh 5/<9to`lLPĉ v}ޚ*o)%A%ʚ{KQ5Cp +] + S_X~5ab.Amr}ǖO 㟮YFLkTNJh2RbV|@… nx&K6[xHignhtD^_3[|W2;^K<3>Y *Tayw队VKMKvQg)͟'-8%O|tĽ{(%7Gv9=:֚4Fd*x+kرF#Ksj,^MXkBhϗ29#1_xYhp` RmK]ǚǞ|˞|@hU<ŇY=8a  g':ZWígoǨf2 $qL_ +!:C~x~+2Z"vEwl:;NJ%#m",%fq 7rCmN@C2<@p7QĖ)NsTQ ҡ7a-w69"0=,N.,_7EjiwgaSSPYKZx h㚩グFSXPqQ |N2rJ +;e\(MwXp8G98!,I + +Zd B{#f#7Ǡ@mf *z gנ|4dE jQ+?.cp޵ E-cKaXpo%.BDHAƆY:F-dDYmy5CݍE a nsx^ݽE`28\VՋF4h8|] + IRWq\~W B$"C6P.s L-, +swRo3|d0꬟uȓT)}3|鲠2Je o@F}2N`9Q!ݐ >#W͙Sz +o|*>g7 F}J*L~xpy1sU5*@[/(M@ +Q11GXt05=ޒ_<򐆮 %w UIh`HL|da! 73Sf|033!ߘtY e ۼ0>߂aLBJ{WV'`֪w0̠&39TO'EXQg +l[৚?DBc0}I0k>pw洤IjKR}e/@ 1L\m +y nT{!;8/k;̔Y?P(-nc3$]kH_wM;FCѶ]ű@~jv8:zZ?Y k!MOU^ܓfGƸh}Pe2/cq$b2-'']:Y9<|bT]r=2Į.Ƴ^;ܱV":Y˽_ٲ ⻇L7n-)E$Dxз.h³,@H_2:еw5U&lu1Cjc]%3ԧIYi WQJXH/=:XWd H39xKkos&lfV !P=vpv:L$ǓhJIEݟEu׺^X-0g0'/]*t\:BeRDAJ6 F, d, |3#Gr}K^2`vs~Ųt +'T }vOyF}'`' x=2i x7?g* J):0UaCRb&Ňy5'.+p6̓+ό5/+`p +^iY@l)6jor,{a9 w#T2f(aM$լjuA.:h [4!f {Tn&h>&4?/4>A$KŇC,PϣP2-טKaOy% 9$p-֫ 0{q Z~k$xnadS|vA5ƱΒAo2&Z.ݽ̑&Ny@9rr +1ud7x =ʡ4eg cDoX1O]B' ѕÉp8фKqt[jsA%k'nj}{eBO{v +pELX+wDU!эiAsIT e4ĘhoS *.H;T +q ͉O< (ǣ?[_%R7*@d3,Xn1ڿ&*").^'eY6(4[6bt Vm-hl|7ؔIcAr摄))Z8nKA-Ϻs9mqQ8:e3`~#hG,&ђ:g6Zv]&*ؕ`ǼO[׌ŸXdaǧAAG +?o>c,[׽?ğzE^{ЀK̀KĝF"̢AAc~l0w[99u=yUn:.\k}q,Yl+-]wYbW4˚reAS +voM1ʟhМZ9GY Il`?~tnGۅkvt(ȇ\-׋mnGС+Iƙ&kzڦyR7 +k7Ԃ{y r%2!P%▚TuF~?Qf8NY䂺,zȡ 6)6C} [ FfGƘ**|$$VۈFƧ(GdPSb 79Wkxȇ?,E{)rB='Ga|l8GބW~|fͼD!,W+^EJ_w%DzUS*HCt=:<>ۤ#&ϒӟHOz#,'G'Ǒ=qĹ C#YrL[a v6杕l8 +endstream +endobj +39 0 obj +<< /ColorSpace 87 0 R /ExtGState 88 0 R /Font << /F198 89 0 R /F201 90 0 R /F204 91 0 R /F206 92 0 R /F209 93 0 R /F212 94 0 R /F246 95 0 R /Times-Roman 96 0 R >> /Pattern 97 0 R /ProcSet [ /PDF /Text ] >> +endobj +40 0 obj +<< /D (section*.1) /S /GoTo >> +endobj +41 0 obj +<< /A 98 0 R /Next 99 0 R /Parent 6 0 R /Prev 10 0 R /Title 100 0 R >> +endobj +42 0 obj + +endobj +43 0 obj +<< /D (section*.17) /S /GoTo >> +endobj +44 0 obj +<< /A 101 0 R /Next 11 0 R /Parent 6 0 R /Prev 102 0 R /Title 103 0 R >> +endobj +45 0 obj + +endobj +46 0 obj +<< /Annots [ 104 0 R 105 0 R 106 0 R 107 0 R 108 0 R 109 0 R 110 0 R 111 0 R 112 0 R 113 0 R 114 0 R 115 0 R 116 0 R 117 0 R 118 0 R 119 0 R 120 0 R 121 0 R 122 0 R 123 0 R ] /Contents 124 0 R /MediaBox [ 0 0 612 792 ] /Parent 12 0 R /Resources 125 0 R /Type /Page >> +endobj +47 0 obj +<< /Annots [ 126 0 R 127 0 R 128 0 R 129 0 R 130 0 R 131 0 R 132 0 R 133 0 R 134 0 R 135 0 R 136 0 R 137 0 R 138 0 R 139 0 R 140 0 R 141 0 R 142 0 R 143 0 R 144 0 R 145 0 R 146 0 R 147 0 R 148 0 R 149 0 R 150 0 R 151 0 R ] /Contents 152 0 R /MediaBox [ 0 0 612 792 ] /Parent 12 0 R /Resources 153 0 R /Type /Page >> +endobj +48 0 obj +<< /Annots [ 154 0 R 155 0 R 156 0 R 157 0 R 158 0 R 159 0 R 160 0 R 161 0 R 162 0 R 163 0 R 164 0 R 165 0 R 166 0 R 167 0 R 168 0 R 169 0 R ] /Contents 170 0 R /MediaBox [ 0 0 612 792 ] /Parent 12 0 R /Resources 171 0 R /Type /Page >> +endobj +49 0 obj +<< /Annots [ 172 0 R 173 0 R 174 0 R 175 0 R 176 0 R 177 0 R 178 0 R ] /Contents 179 0 R /MediaBox [ 0 0 612 792 ] /Parent 12 0 R /Resources 180 0 R /Type /Page >> +endobj +50 0 obj +<< /Annots [ 181 0 R 182 0 R 183 0 R 184 0 R 185 0 R 186 0 R 187 0 R 188 0 R 189 0 R 190 0 R ] /Contents 191 0 R /MediaBox [ 0 0 612 792 ] /Parent 12 0 R /Resources 192 0 R /Type /Page >> +endobj +51 0 obj +<< /Annots [ 193 0 R 194 0 R 195 0 R 196 0 R 197 0 R 198 0 R 199 0 R 200 0 R 201 0 R 202 0 R 203 0 R 204 0 R 205 0 R 206 0 R 207 0 R 208 0 R 209 0 R 210 0 R 211 0 R 212 0 R 213 0 R 214 0 R 215 0 R 216 0 R 217 0 R 218 0 R 219 0 R 220 0 R 221 0 R 222 0 R ] /Contents 223 0 R /MediaBox [ 0 0 612 792 ] /Parent 13 0 R /Resources 224 0 R /Type /Page >> +endobj +52 0 obj +<< /Annots [ 225 0 R 226 0 R 227 0 R 228 0 R 229 0 R 230 0 R 231 0 R 232 0 R 233 0 R 234 0 R 235 0 R 236 0 R 237 0 R ] /Contents 238 0 R /MediaBox [ 0 0 612 792 ] /Parent 13 0 R /Resources 239 0 R /Type /Page >> +endobj +53 0 obj +<< /Annots [ 240 0 R 241 0 R 242 0 R 243 0 R 244 0 R 245 0 R 246 0 R 247 0 R 248 0 R 249 0 R 250 0 R 251 0 R 252 0 R 253 0 R 254 0 R ] /Contents 255 0 R /MediaBox [ 0 0 612 792 ] /Parent 13 0 R /Resources 256 0 R /Type /Page >> +endobj +54 0 obj +<< /Annots [ 257 0 R 258 0 R 259 0 R 260 0 R 261 0 R 262 0 R 263 0 R 264 0 R 265 0 R 266 0 R 267 0 R 268 0 R 269 0 R 270 0 R 271 0 R ] /Contents 272 0 R /MediaBox [ 0 0 612 792 ] /Parent 13 0 R /Resources 273 0 R /Type /Page >> +endobj +55 0 obj +<< /Annots [ 274 0 R 275 0 R 276 0 R 277 0 R 278 0 R 279 0 R 280 0 R 281 0 R 282 0 R 283 0 R 284 0 R 285 0 R 286 0 R 287 0 R 288 0 R 289 0 R 290 0 R 291 0 R ] /Contents 292 0 R /MediaBox [ 0 0 612 792 ] /Parent 13 0 R /Resources 293 0 R /Type /Page >> +endobj +56 0 obj +<< /Annots [ 294 0 R 295 0 R 296 0 R 297 0 R 298 0 R 299 0 R 300 0 R 301 0 R 302 0 R 303 0 R 304 0 R 305 0 R 306 0 R 307 0 R 308 0 R 309 0 R ] /Contents 310 0 R /MediaBox [ 0 0 612 792 ] /Parent 13 0 R /Resources 311 0 R /Type /Page >> +endobj +57 0 obj +<< /Annots [ 312 0 R 313 0 R 314 0 R 315 0 R 316 0 R 317 0 R 318 0 R 319 0 R 320 0 R 321 0 R 322 0 R 323 0 R 324 0 R 325 0 R 326 0 R 327 0 R 328 0 R 329 0 R 330 0 R 331 0 R 332 0 R 333 0 R 334 0 R 335 0 R 336 0 R 337 0 R 338 0 R 339 0 R ] /Contents 340 0 R /MediaBox [ 0 0 612 792 ] /Parent 14 0 R /Resources 341 0 R /Type /Page >> +endobj +58 0 obj +<< /Annots [ 342 0 R 343 0 R 344 0 R 345 0 R 346 0 R 347 0 R 348 0 R 349 0 R 350 0 R 351 0 R 352 0 R 353 0 R 354 0 R 355 0 R 356 0 R 357 0 R 358 0 R 359 0 R 360 0 R 361 0 R 362 0 R 363 0 R 364 0 R 365 0 R 366 0 R 367 0 R 368 0 R 369 0 R 370 0 R 371 0 R ] /Contents 372 0 R /MediaBox [ 0 0 612 792 ] /Parent 14 0 R /Resources 373 0 R /Type /Page >> +endobj +59 0 obj +<< /Annots [ 374 0 R 375 0 R 376 0 R 377 0 R ] /Contents 378 0 R /MediaBox [ 0 0 612 792 ] /Parent 14 0 R /Resources 379 0 R /Type /Page >> +endobj +60 0 obj +<< /Limits [ (ALG@line.1) (ALG@line.14) ] /Names [ (ALG@line.1) 380 0 R (ALG@line.10) 381 0 R (ALG@line.11) 382 0 R (ALG@line.12) 383 0 R (ALG@line.13) 384 0 R (ALG@line.14) 385 0 R ] >> +endobj +61 0 obj +<< /Limits [ (ALG@line.15) (ALG@line.2) ] /Names [ (ALG@line.15) 386 0 R (ALG@line.16) 387 0 R (ALG@line.17) 388 0 R (ALG@line.18) 389 0 R (ALG@line.19) 390 0 R (ALG@line.2) 391 0 R ] >> +endobj +62 0 obj +<< /Limits [ (ALG@line.20) (ALG@line.7) ] /Names [ (ALG@line.20) 392 0 R (ALG@line.3) 393 0 R (ALG@line.4) 394 0 R (ALG@line.5) 395 0 R (ALG@line.6) 396 0 R (ALG@line.7) 397 0 R ] >> +endobj +63 0 obj +<< /Limits [ (ALG@line.8) (Hfootnote.3) ] /Names [ (ALG@line.8) 398 0 R (ALG@line.9) 399 0 R (Doc-Start) 400 0 R (Hfootnote.1) 401 0 R (Hfootnote.2) 402 0 R (Hfootnote.3) 403 0 R ] >> +endobj +64 0 obj +<< /Limits [ (Item.1) (algorithm.3) ] /Names [ (Item.1) 404 0 R (Item.2) 405 0 R (Item.3) 406 0 R (algorithm.1) 407 0 R (algorithm.2) 408 0 R (algorithm.3) 409 0 R ] >> +endobj +65 0 obj +<< /Limits [ (cite.LM-DiskANN) (cite.asai2023self) ] /Names [ (cite.LM-DiskANN) 410 0 R (cite.ObjectBox2024EdgeAI) 411 0 R (cite.Totino2025PhoneStorage) 412 0 R (cite.Xue2024PowerInfer2) 413 0 R (cite.appleM1Ultra) 414 0 R (cite.asai2023self) 415 0 R ] >> +endobj +66 0 obj +<< /Limits [ (cite.aumuller2020ann) (cite.choo2020k) ] /Names [ (cite.aumuller2020ann) 416 0 R (cite.azure2025vectorquota) 417 0 R (cite.baranchuk2019towards) 418 0 R (cite.cai2024recall) 419 0 R (cite.castro2024azure) 420 0 R (cite.choo2020k) 421 0 R ] >> +endobj +67 0 obj +<< /Limits [ (cite.craswell2020overview) (cite.dubey2024llama) ] /Names [ (cite.craswell2020overview) 422 0 R (cite.cui2022dvabatch) 423 0 R (cite.diskann) 424 0 R (cite.douze2018link) 425 0 R (cite.douze2020faiss1t) 426 0 R (cite.dubey2024llama) 427 0 R ] >> +endobj +68 0 obj +<< /Limits [ (cite.faiss) (cite.graph_better) ] /Names [ (cite.faiss) 428 0 R (cite.faissGuidelines) 429 0 R (cite.fast25) 430 0 R (cite.g5.48xlarge) 431 0 R (cite.gao2024rabitq_gps_ref15) 432 0 R (cite.graph_better) 433 0 R ] >> +endobj +69 0 obj +<< /Limits [ (cite.hcnng) (cite.ivf_crtpt:2023/1438) ] /Names [ (cite.hcnng) 434 0 R (cite.he2019streaming) 435 0 R (cite.hmann) 436 0 R (cite.hnsw) 437 0 R (cite.ivf) 438 0 R (cite.ivf_crtpt:2023/1438) 439 0 R ] >> +endobj +70 0 obj +<< /Limits [ (cite.izacard2021unsupervised) (cite.li2019approximate) ] /Names [ (cite.izacard2021unsupervised) 440 0 R (cite.joshi2017triviaqa) 441 0 R (cite.karpukhin2020dense) 442 0 R (cite.ktransformers2025) 443 0 R (cite.kwiatkowski-etal-2019-natural) 444 0 R (cite.li2019approximate) 445 0 R ] >> +endobj +71 0 obj +<< /Limits [ (cite.li2023towardsgte) (cite.munyampirwa2024down) ] /Names [ (cite.li2023towardsgte) 446 0 R (cite.li2024svdqunat) 447 0 R (cite.lin2022pretrained) 448 0 R (cite.mac) 449 0 R (cite.msjimmy_bm25) 450 0 R (cite.munyampirwa2024down) 451 0 R ] >> +endobj +72 0 obj +<< /Limits [ (cite.nsg) (cite.pineconeHNSW) ] /Names [ (cite.nsg) 452 0 R (cite.nssg) 453 0 R (cite.nvidia2024blackwell) 454 0 R (cite.nvidiaA10) 455 0 R (cite.optimum2025storage) 456 0 R (cite.pineconeHNSW) 457 0 R ] >> +endobj +73 0 obj +<< /Limits [ (cite.pq) (cite.schuhmann2021laion) ] /Names [ (cite.pq) 458 0 R (cite.priximity_graph) 459 0 R (cite.rein2024gpqa) 460 0 R (cite.rekabsaz2021tripclick) 461 0 R (cite.ryan2024enronqa) 462 0 R (cite.schuhmann2021laion) 463 0 R ] >> +endobj +74 0 obj +<< /Limits [ (cite.seemakhupt2024edgerag) (cite.snap_cvpr2023_tutorial) ] /Names [ (cite.seemakhupt2024edgerag) 464 0 R (cite.severo2025lossless) 465 0 R (cite.shao2024scaling) 466 0 R (cite.shen2024understandingsystemstradeoffsretrievalaugmented) 467 0 R (cite.skewmanohar2024parlayann) 468 0 R (cite.snap_cvpr2023_tutorial) 469 0 R ] >> +endobj +75 0 obj +<< /Limits [ (cite.sptag) (cite.wang2024starling_sigmod) ] /Names [ (cite.sptag) 470 0 R (cite.tatsuno2024aisaq_gps_ref46) 471 0 R (cite.together2023redpajama) 472 0 R (cite.wang2021comprehensive_survey) 473 0 R (cite.wang2024mememo) 474 0 R (cite.wang2024starling_sigmod) 475 0 R ] >> +endobj +76 0 obj +<< /Limits [ (cite.work-in-progress) (cite.zerhoudi2024personarag) ] /Names [ (cite.work-in-progress) 476 0 R (cite.yang2018hotpotqa) 477 0 R (cite.yin2024devicers) 478 0 R (cite.yu2025ragdoll) 479 0 R (cite.zamani2023conversational) 480 0 R (cite.zerhoudi2024personarag) 481 0 R ] >> +endobj +77 0 obj +<< /Limits [ (cite.zhang2018visual) (equation.5.1) ] /Names [ (cite.zhang2018visual) 482 0 R (cite.zhang2020learning) 483 0 R (cite.zhu) 484 0 R (cite.zhu2024nanoflow) 485 0 R (cite.zilliz2025hnswoverhead) 486 0 R (equation.5.1) 487 0 R ] >> +endobj +78 0 obj +<< /Limits [ (figure.caption.10) (figure.caption.15) ] /Names [ (figure.caption.10) 488 0 R (figure.caption.11) 489 0 R (figure.caption.12) 490 0 R (figure.caption.13) 491 0 R (figure.caption.14) 492 0 R (figure.caption.15) 493 0 R ] >> +endobj +79 0 obj +<< /Limits [ (figure.caption.3) (page.1) ] /Names [ (figure.caption.3) 494 0 R (figure.caption.4) 495 0 R (figure.caption.6) 496 0 R (figure.caption.7) 497 0 R (figure.caption.8) 498 0 R (page.1) 499 0 R ] >> +endobj +80 0 obj +<< /Limits [ (page.10) (page.15) ] /Names [ (page.10) 500 0 R (page.11) 501 0 R (page.12) 502 0 R (page.13) 503 0 R (page.14) 504 0 R (page.15) 505 0 R ] >> +endobj +81 0 obj +<< /Limits [ (page.2) (page.7) ] /Names [ (page.2) 506 0 R (page.3) 507 0 R (page.4) 508 0 R (page.5) 509 0 R (page.6) 510 0 R (page.7) 511 0 R ] >> +endobj +82 0 obj +<< /Limits [ (page.8) (section*.2) ] /Names [ (page.8) 512 0 R (page.9) 513 0 R (section*.1) 514 0 R (section*.16) 515 0 R (section*.17) 516 0 R (section*.2) 517 0 R ] >> +endobj +83 0 obj +<< /Limits [ (section*.5) (section.5) ] /Names [ (section*.5) 518 0 R (section.1) 519 0 R (section.2) 520 0 R (section.3) 521 0 R (section.4) 522 0 R (section.5) 523 0 R ] >> +endobj +84 0 obj +<< /Limits [ (section.6) (subsection.2.2) ] /Names [ (section.6) 524 0 R (section.7) 525 0 R (section.8) 526 0 R (section.9) 527 0 R (subsection.2.1) 528 0 R (subsection.2.2) 529 0 R ] >> +endobj +85 0 obj +<< /Limits [ (subsection.2.3) (subsection.6.3) ] /Names [ (subsection.2.3) 530 0 R (subsection.4.1) 531 0 R (subsection.4.2) 532 0 R (subsection.6.1) 533 0 R (subsection.6.2) 534 0 R (subsection.6.3) 535 0 R ] >> +endobj +86 0 obj +<< /Limits [ (subsection.6.4) (table.caption.9) ] /Names [ (subsection.6.4) 536 0 R (subsection.8.1) 537 0 R (subsection.8.2) 538 0 R (subsection.8.3) 539 0 R (table.caption.9) 540 0 R ] >> +endobj +87 0 obj +<< /pgfprgb [ /Pattern /DeviceRGB ] >> +endobj +88 0 obj +<< >> +endobj +89 0 obj +<< /BaseFont /SVARXO+LinLibertineTB /Encoding 541 0 R /FirstChar 27 /FontDescriptor 542 0 R /LastChar 122 /Subtype /Type1 /ToUnicode 543 0 R /Type /Font /Widths 544 0 R >> +endobj +90 0 obj +<< /BaseFont /DHRMAA+LinLibertineT /Encoding 545 0 R /FirstChar 16 /FontDescriptor 546 0 R /LastChar 252 /Subtype /Type1 /ToUnicode 547 0 R /Type /Font /Widths 548 0 R >> +endobj +91 0 obj +<< /BaseFont /KLFPWG+txsys /FirstChar 1 /FontDescriptor 549 0 R /LastChar 188 /Subtype /Type1 /ToUnicode 550 0 R /Type /Font /Widths 551 0 R >> +endobj +92 0 obj +<< /BaseFont /PEYUND+LibertineMathMI /FirstChar 22 /FontDescriptor 552 0 R /LastChar 149 /Subtype /Type1 /ToUnicode 553 0 R /Type /Font /Widths 554 0 R >> +endobj +93 0 obj +<< /BaseFont /DHRMAA+LinLibertineT /Encoding 555 0 R /FirstChar 37 /FontDescriptor 546 0 R /LastChar 120 /Subtype /Type1 /ToUnicode 556 0 R /Type /Font /Widths 557 0 R >> +endobj +94 0 obj +<< /BaseFont /TCFTCN+LinLibertineTI /Encoding 558 0 R /FirstChar 38 /FontDescriptor 559 0 R /LastChar 122 /Subtype /Type1 /ToUnicode 560 0 R /Type /Font /Widths 561 0 R >> +endobj +95 0 obj +<< /BaseFont /DHRMAA+LinLibertineT /Encoding 562 0 R /FirstChar 132 /FontDescriptor 546 0 R /LastChar 132 /Subtype /Type1 /ToUnicode 563 0 R /Type /Font /Widths 564 0 R >> +endobj +96 0 obj +<< /BaseFont /Times-Roman /Encoding /WinAnsiEncoding /Subtype /Type1 /Type /Font >> +endobj +97 0 obj +<< >> +endobj +98 0 obj +<< /D (section.1) /S /GoTo >> +endobj +99 0 obj +<< /A 565 0 R /Count -3 /First 566 0 R /Last 567 0 R /Next 568 0 R /Parent 6 0 R /Prev 41 0 R /Title 569 0 R >> +endobj +100 0 obj + +endobj +101 0 obj +<< /D (section.9) /S /GoTo >> +endobj +102 0 obj +<< /A 570 0 R /Count -3 /First 571 0 R /Last 572 0 R /Next 44 0 R /Parent 6 0 R /Prev 573 0 R /Title 574 0 R >> +endobj +103 0 obj + +endobj +104 0 obj +<< /A << /D (cite.faiss) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 226.768 625.24 238.026 633.494 ] /Subtype /Link /Type /Annot >> +endobj +105 0 obj +<< /A << /D (cite.kwiatkowski-etal-2019-natural) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 74.439 601.33 85.696 609.584 ] /Subtype /Link /Type /Annot >> +endobj +106 0 obj +<< /A << /D (cite.yang2018hotpotqa) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 143.671 601.33 154.929 609.584 ] /Subtype /Link /Type /Annot >> +endobj +107 0 obj +<< /A << /D (cite.joshi2017triviaqa) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 208.326 601.33 219.584 609.584 ] /Subtype /Link /Type /Annot >> +endobj +108 0 obj +<< /A << /D (cite.rein2024gpqa) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 279.464 601.33 290.722 609.584 ] /Subtype /Link /Type /Annot >> +endobj +109 0 obj +<< /A << /D (cite.nvidiaA10) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 174.56 565.465 185.818 573.719 ] /Subtype /Link /Type /Annot >> +endobj +110 0 obj +<< /A << /D (cite.mac) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 284.246 565.465 290.871 573.719 ] /Subtype /Link /Type /Annot >> +endobj +111 0 obj +<< /A << /D (cite.ivf) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 245.633 75.303 256.891 83.557 ] /Subtype /Link /Type /Annot >> +endobj +112 0 obj +<< /A << /D (cite.hnsw) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 259.472 75.303 270.73 83.557 ] /Subtype /Link /Type /Annot >> +endobj +113 0 obj +<< /A << /D (cite.shen2024understandingsystemstradeoffsretrievalaugmented) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 471.419 565.302 482.677 573.556 ] /Subtype /Link /Type /Annot >> +endobj +114 0 obj +<< /A << /D (cite.ivf) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 505.519 441.765 516.777 450.019 ] /Subtype /Link /Type /Annot >> +endobj +115 0 obj +<< /A << /D (cite.choo2020k) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 357.828 417.855 364.453 426.109 ] /Subtype /Link /Type /Annot >> +endobj +116 0 obj +<< /A << /D (cite.hnsw) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 504.771 378.004 516.029 386.258 ] /Subtype /Link /Type /Annot >> +endobj +117 0 obj +<< /A << /D (cite.nsg) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 467.724 366.139 478.981 374.303 ] /Subtype /Link /Type /Annot >> +endobj +118 0 obj +<< /A << /D (cite.priximity_graph) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 481.711 366.049 492.969 374.303 ] /Subtype /Link /Type /Annot >> +endobj +119 0 obj +<< /A << /D (cite.diskann) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 495.698 366.049 506.956 374.303 ] /Subtype /Link /Type /Annot >> +endobj +120 0 obj +<< /A << /D (subsection.2.2) /S /GoTo >> /Border [ 0 0 0 ] /C [ 1 0 0 ] /H /I /Rect [ 438.742 280.126 456.904 291.314 ] /Subtype /Link /Type /Annot >> +endobj +121 0 obj +<< /A << /D (algorithm.1) /S /GoTo >> /Border [ 0 0 0 ] /C [ 1 0 0 ] /H /I /Rect [ 372.991 240.438 379.649 251.626 ] /Subtype /Link /Type /Annot >> +endobj +122 0 obj +<< /A << /D (ALG@line.4) /S /GoTo >> /Border [ 0 0 0 ] /C [ 1 0 0 ] /H /I /Rect [ 429.675 191.776 436.393 203.99 ] /Subtype /Link /Type /Annot >> +endobj +123 0 obj +<< /A << /D (ALG@line.9) /S /GoTo >> /Border [ 0 0 0 ] /C [ 1 0 0 ] /H /I /Rect [ 439.969 191.776 446.687 203.99 ] /Subtype /Link /Type /Annot >> +endobj +124 0 obj +<< /Filter /FlateDecode /Length 6513 >> +stream +xڵ<ۖq +s2NdJo╎ "Y,} 8#miF_^X~&wJ *WJfB7+kVaz6T롩Wz{r]ocreuF=^Cs);z;9޷ j#S=}wM?D=nsݟYMUR1@F:m~k!vw~wiԅap0-cݍ}s[@Be^~+|# mөLZH;u؏W\MHc<*J)5ZsUmb,2[tB(?(6#x t7KVnvSG[ЋLNταZd`'vc fU3=mЋ< KOstsȂ-,P] 5K ff{2Rv'V) +ع Vy+,\cDU *A*ȓ)WѸ;:zr=oEy#DD[R@v㏘V<\ޑY}/& + Uj;~<(c)5RMc+6hmG =zxҭm'iU>@S +X0S#݄kzS:\TE2<򸲰J"Т8McouN`-оSRU8Xf7=!^'cPMc71<)30tat3^Ok`CVB<w@DwT^I +QD +Wb~t?ºxjo&>bPS7Ap9 CdonA}onf`5T?8 DP#ȩ : kfIM^(F(c:Lئ['{o2,c(hk"ip2@F0(N멥`,~֎~xpvp +f F?2#܉h1&6ыv#L2!) oc,K/ Q6G/zpY.eÌc8 +dA!,yNvSp2)mƘu)L24E|]6zųBf|͖%dСw9?b7K6ӕ G2d3 a~{Oe䰾f!~;i +YpJ#\|ҥe:L0"SȢN|)qɄkߣZ=T`*veA0^riYPQJY@6`q2+U2mSO +-Y򨏦 +nk"5A)?oʳ6祥sgBYSPO)ufI=%bF܏ʌO~LL:w_fF=- 0&y'ĶJd@LRN6%8 @؉wbizn@ɼʬdRRgUQztEj"ar;oOU~QXY"f< =ɣK;sn +T*cܣ>" +]iՉ6a9]HU@y15 hbfmYpΒ$[Zܰ7QrG,͵ LLd2`p4)3I8;RSaai'UqAYLzj /\WdRVѼ40֑yQDLab6aլ8l,wEd@Tn&U1.m YwM׏͐Z^۲@f <*Z +ޝ#8<})TB 4O>T,H llF.۶?c`輻=&<Ӱtg)3 t "c7qH4{ge { +~r[Ӗ&^L Y`EY ΌM_9ft `fW^uL5/11zlTdIr >'vjlO9םo‡soԃ L,+)),-ujb)DQ eyYGIDVs1LBa>U l_ڭ8@0 JTqC@>(i˲nr [Js2}֏z{hdighXPj4vUK拉oöXmn΀yg>7*w'brU>5 X嗰/c2#؍j;גޤrđO0z 0}3:T!XpM1q@.J&հ Ct1Vˑ ˬYfP=*cbTEMdüi[%mB4}Bh1dֱF2" 3uN#H + 2OE'<1o1t&^|hi~=lGAGr=-u zWGM}&zD[KJ+ڰAt{rRZrɋ=GQ ҹ* [8 O0Aԧ;L)b|3$lVv/^fc咣291({kZYҒ罒yj0_"T5E +i,K LƉGcF 8vJ|{b%+\J2z"cpgrQbI*S`1/;g`6q+_vfڸ@VtqQ8*W Xv+23*.Շ7L9|/@>,!TANnNS7u)5ISgI;seX\AƛS/V*W@;jU*BӲq;vAŬ˞Jvҽ6Ye"ሖHߤsۍ ~wŵP(n-!,uC+@4Cz%j@1AvͰE|)=.u>*߉:B>p0ftTԀdҿS/fm ՟o #[9<C BAoGKbÀ bm)1_1tS3s@ތ]+נ^agz9γۏE[B?ci|7KJiUtbP'?\\nB[Hs d^'yK>9|͋Qg20Wf@z{nƫXx3\/ +]L,8v&~L%Gd ,Ӌ.LJWVq֏X橪وv-jǟZ끉"]0$2*x/>V^d^v!`7xi7򢒱oܥKз~[Ӈqo: /t%+U3u +,$SxUt@G+jI3?J`*elLnnkڑcHi|:4Ohz`t+l6 {MKWHH])ETkSF^zK:\ޮ׷ c QJ8U3pJvhc;.)fG]ͤ* +p{Eq)J;_qҧ %E+%Ҋ"Yx8]$NB*<-&Ys/籰 [X,{"1_t9!5pT:vF;5'/+OOot œq״C1 ?RLevikQlzkw ?zĮH%tFf9Hx5Ee9]-kn b_J&+l`đur|de''reTi$hH27N'^\2*8Kru>k-X 뀞)k[ޑt-}O?ɘ[:.UU.ıPMKA}"X 1ùA Y(RdNZ '2\޴ueɄ]QmسtטQ״timh/ɴ?8_O?F>;s-YR7毕%T2?&E( 4I4 LS@6QLx\F[4)=~uJ;xkxy +ksw;ARDG2yLh/Txw";].%|-"R4Z(k:^ӯZT| ^AS)&wWH*O +ڟS,ydzE*1SS4*5l#+}dnS$-}[]Eέ.Ѕqdi +aܙRlf)FVe2Ov?0rH*dE伮j}{b|bsP4t :: A֯zp)~' >ŕ>ևflOiR3!%i˵!AYaNf?qp-0{Tuuqhҗ2;ƒ7;qةa7ߌ!v>U`.uAŪ`&@yq0a{ + +endstream +endobj +125 0 obj +<< /ColorSpace 87 0 R /ExtGState 88 0 R /Font << /F198 89 0 R /F201 90 0 R /F204 91 0 R /F206 92 0 R /F209 93 0 R /F212 94 0 R /F297 575 0 R /F301 576 0 R /F304 577 0 R /F311 578 0 R >> /Pattern 97 0 R /ProcSet [ /PDF /Text ] >> +endobj +126 0 obj +<< /A << /D (cite.graph_better) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 137.083 345.77 148.341 353.934 ] /Subtype /Link /Type /Annot >> +endobj +127 0 obj +<< /A << /D (cite.priximity_graph) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 151.07 345.68 162.328 353.934 ] /Subtype /Link /Type /Annot >> +endobj +128 0 obj +<< /A << /D (cite.li2019approximate) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 165.057 345.68 176.315 353.934 ] /Subtype /Link /Type /Annot >> +endobj +129 0 obj +<< /A << /D (cite.hnsw) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 179.044 345.68 190.302 353.934 ] /Subtype /Link /Type /Annot >> +endobj +130 0 obj +<< /A << /D (figure.caption.3) /S /GoTo >> /Border [ 0 0 0 ] /C [ 1 0 0 ] /H /I /Rect [ 218.443 331.488 225.068 342.676 ] /Subtype /Link /Type /Annot >> +endobj +131 0 obj +<< /A << /D (cite.work-in-progress) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 91.661 266.585 102.918 274.839 ] /Subtype /Link /Type /Annot >> +endobj +132 0 obj +<< /A << /D (cite.seemakhupt2024edgerag) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 105.825 266.585 117.083 274.839 ] /Subtype /Link /Type /Annot >> +endobj +133 0 obj +<< /A << /D (cite.wang2024mememo) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 119.989 266.585 131.247 274.839 ] /Subtype /Link /Type /Annot >> +endobj +134 0 obj +<< /A << /D (cite.yu2025ragdoll) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 134.154 266.585 145.412 274.839 ] /Subtype /Link /Type /Annot >> +endobj +135 0 obj +<< /A << /D (cite.ObjectBox2024EdgeAI) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 93.715 254.63 104.972 262.884 ] /Subtype /Link /Type /Annot >> +endobj +136 0 obj +<< /A << /D (cite.Totino2025PhoneStorage) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 107.85 254.63 119.108 262.884 ] /Subtype /Link /Type /Annot >> +endobj +137 0 obj +<< /A << /D (cite.Xue2024PowerInfer2) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 121.985 254.63 133.243 262.884 ] /Subtype /Link /Type /Annot >> +endobj +138 0 obj +<< /A << /D (cite.azure2025vectorquota) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 148.157 206.809 159.415 215.063 ] /Subtype /Link /Type /Annot >> +endobj +139 0 obj +<< /A << /D (cite.shao2024scaling) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 161.449 206.809 172.707 215.063 ] /Subtype /Link /Type /Annot >> +endobj +140 0 obj +<< /A << /D (cite.zilliz2025hnswoverhead) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 174.741 206.809 185.999 215.063 ] /Subtype /Link /Type /Annot >> +endobj +141 0 obj +<< /A << /D (cite.castro2024azure) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 212.483 182.899 219.108 191.153 ] /Subtype /Link /Type /Annot >> +endobj +142 0 obj +<< /A << /D (cite.douze2020faiss1t) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 221.809 182.899 233.067 191.153 ] /Subtype /Link /Type /Annot >> +endobj +143 0 obj +<< /A << /D (cite.optimum2025storage) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 234.715 158.989 245.973 167.243 ] /Subtype /Link /Type /Annot >> +endobj +144 0 obj +<< /A << /D (cite.work-in-progress) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 229.826 111.168 241.084 119.422 ] /Subtype /Link /Type /Annot >> +endobj +145 0 obj +<< /A << /D (cite.wang2024mememo) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 243.018 111.168 254.276 119.422 ] /Subtype /Link /Type /Annot >> +endobj +146 0 obj +<< /A << /D (cite.cai2024recall) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 118.048 99.213 124.673 107.467 ] /Subtype /Link /Type /Annot >> +endobj +147 0 obj +<< /A << /D (cite.diskann) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 436.803 430.229 448.061 438.483 ] /Subtype /Link /Type /Annot >> +endobj +148 0 obj +<< /A << /D (cite.wang2024starling_sigmod) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 450.777 430.229 462.035 438.483 ] /Subtype /Link /Type /Annot >> +endobj +149 0 obj +<< /A << /D (cite.pq) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 508.335 370.453 519.593 378.707 ] /Subtype /Link /Type /Annot >> +endobj +150 0 obj +<< /A << /D (section.6) /S /GoTo >> /Border [ 0 0 0 ] /C [ 1 0 0 ] /H /I /Rect [ 353.261 261.053 364.701 271.992 ] /Subtype /Link /Type /Annot >> +endobj +151 0 obj +<< /A << /D (cite.diskann) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 543.471 262.856 554.729 271.11 ] /Subtype /Link /Type /Annot >> +endobj +152 0 obj +<< /Filter /FlateDecode /Length 6061 >> +stream +xڵ\KƱWpN0 ^\ٖl%r!"ɯOUW5 1: FɯoJ\}D\\-޼ͯl0Բx*hK'UxL/Yc1NuĮ +ī]uɮJЂ+B +&y/BA=OS) += ࣇ-mR%|K%2gP)@۟H88#*s:J"Jy֣Z0veŴ +,]H!2r0.2av(k6k⺛GIf0CUͮm9,mmf\gFAzՠs!o&N]On0)G|2d;$Jpe/Sp,0ฒc$cD8fEma O=0:ҤbvzB4K^G =-E.ilѯOvTۺY#'VH8! tYeխZe Fa]ogigUQS & AizEWU-[оtmWT>z [P.%/*TFV^J4a.P+)׀ &(H09%na9ŧͩ $CZgz jz~ A2! 44z=[~5@ƀ1x4x"P9|g3Pz4=ôOSbn4ugHf`<aV[-_p?˒wSAG9+!+9w-5ȩ#Ƃ'^y4=J6xd"=ʊWHb (GQ3.퓗R% 6bKhQz!CCUiR3:&)LX %ZK3m`>BEc8 !)3X\x.iI۱>4d#;\zFxM; :[D(tev #A>;4臒=T78 YR +iEk~MKB`Y +ž,KDMߊ0i+aLb Ɗ a!#Ctl! (?Z ٱ k@! ;;Eo#̨AwtsPC -ƪ]N+{]/f+߯jSMr9;qSavïVN@A k1Qh f %kBƤlNT[z@et!@a#({^޶c}ݞ+w}~JM,HiB@U-p< +z$=l=a Wy +b}hƱiK]QtϹ#_|ߴ-jJ*aªPNlt2|P;Z'vYp1!5DOx +sȐ|ov=:iEYXskC/Bb]?RdA-4`~'StɕG~3,di*;C/;ěng'$R;0J9DJq{ #oi|r}\{?(ܼX8ʷzC)/a_-c +̓OrdnL@ >6>P2 )7D|sX~ۻf3nq1.dL&yIfv |k_9 Il<ѷ:Y [ք'N)u&QM/eǟ{۵j~553yv▓j8#w? +^ 'Ն?nQqVje2)Og~k#2T2gpAh(LH 6d&O]`9z88llw'XPj >z}5R,riɸOfC؇Z*/ %EX{S`ձf-#Hx\?Nq>(0 +W;x冚Tc/]X5<4˂[`nY_ _*?]r>ƇXT9˻޿Qb`Z7GB59~%ӵd\`VoyUAw*B5ıbWdJs'jԠNx$\ctP^mt O2Prj {墨6 F&k(.X75L{ +(e2;}Y}mנX5rűpgj풬'# {Mw +!7P Z7p6[jlv-ۏ;_ +NC+(qIB?T|E|6!K?4ڡaV pq=n@Se > +z\%z?c>Уna[5(&HSܜ\M+c9>L*Ī:ϫ-J4U#GdGa쯣ֱMATZ"~֗} +Iki!g; t؎Lj5GABm1Y"{hU_jF7TM +n7u +45M\^ugºtSS" oIÎB+BjQ NL9An~ 6y~8O'TL \nTb~huEtTE&r&kLTsyy*O^n酆Gx;eю^|#SQo]@&DP@107 !s,S :ظg,ǃ-U-z/67Pд'>8@ ~#ѸõgtP(e&Cc"D𺢟bm(b=z,Tb`v8\ +r2 wq j , +feT9W  )0mXdJ>aQ|#'>9ҚYF-:MRo.x-Vo{E5Z\dJxNI1D>0F@8^rS# Ǘ!mšlXʠt~JPu ͥϿ. tmxN51E5L9N꽮39- '7'{bÍb 9JFy0*E)zE)(&,o" +#혊GORI0 $F-ڬt+,~^L}v,+< +endstream +endobj +153 0 obj +<< /ColorSpace 87 0 R /ExtGState 88 0 R /Font << /F198 89 0 R /F201 90 0 R /F204 91 0 R /F206 92 0 R /F209 93 0 R /F212 94 0 R /F293 579 0 R /F297 575 0 R /F301 576 0 R >> /Pattern 97 0 R /ProcSet [ /PDF /Text ] /XObject << /Im1 580 0 R >> >> +endobj +154 0 obj +<< /A << /D (algorithm.1) /S /GoTo >> /Border [ 0 0 0 ] /C [ 1 0 0 ] /H /I /Rect [ 177.368 545.772 183.901 556.96 ] /Subtype /Link /Type /Annot >> +endobj +155 0 obj +<< /A << /D (ALG@line.9) /S /GoTo >> /Border [ 0 0 0 ] /C [ 1 0 0 ] /H /I /Rect [ 227.264 509.907 233.796 521.095 ] /Subtype /Link /Type /Annot >> +endobj +156 0 obj +<< /A << /D (section.4) /S /GoTo >> /Border [ 0 0 0 ] /C [ 1 0 0 ] /H /I /Rect [ 212.831 350.504 223.981 361.877 ] /Subtype /Link /Type /Annot >> +endobj +157 0 obj +<< /A << /D (section.5) /S /GoTo >> /Border [ 0 0 0 ] /C [ 1 0 0 ] /H /I /Rect [ 280.367 326.594 291.518 337.966 ] /Subtype /Link /Type /Annot >> +endobj +158 0 obj +<< /A << /D (figure.caption.4) /S /GoTo >> /Border [ 0 0 0 ] /C [ 1 0 0 ] /H /I /Rect [ 259.586 298.699 266.198 309.887 ] /Subtype /Link /Type /Annot >> +endobj +159 0 obj +<< /A << /D (subsection.8.1) /S /GoTo >> /Border [ 0 0 0 ] /C [ 1 0 0 ] /H /I /Rect [ 120.789 226.968 138.951 238.156 ] /Subtype /Link /Type /Annot >> +endobj +160 0 obj +<< /A << /D (section.5) /S /GoTo >> /Border [ 0 0 0 ] /C [ 1 0 0 ] /H /I /Rect [ 113.82 179.147 125.129 190.519 ] /Subtype /Link /Type /Annot >> +endobj +161 0 obj +<< /A << /D (subsection.4.1) /S /GoTo >> /Border [ 0 0 0 ] /C [ 1 0 0 ] /H /I /Rect [ 179.83 107.416 198.17 118.788 ] /Subtype /Link /Type /Annot >> +endobj +162 0 obj +<< /A << /D (figure.caption.4) /S /GoTo >> /Border [ 0 0 0 ] /C [ 1 0 0 ] /H /I /Rect [ 344.746 533.817 351.371 545.189 ] /Subtype /Link /Type /Annot >> +endobj +163 0 obj +<< /A << /D (subsection.4.2) /S /GoTo >> /Border [ 0 0 0 ] /C [ 1 0 0 ] /H /I /Rect [ 447.21 497.951 465.372 509.324 ] /Subtype /Link /Type /Annot >> +endobj +164 0 obj +<< /A << /D (subsection.4.1) /S /GoTo >> /Border [ 0 0 0 ] /C [ 1 0 0 ] /H /I /Rect [ 536.343 386.131 554.828 397.504 ] /Subtype /Link /Type /Annot >> +endobj +165 0 obj +<< /A << /D (subsection.4.2) /S /GoTo >> /Border [ 0 0 0 ] /C [ 1 0 0 ] /H /I /Rect [ 460.082 362.221 478.244 373.593 ] /Subtype /Link /Type /Annot >> +endobj +166 0 obj +<< /A << /D (equation.5.1) /S /GoTo >> /Border [ 0 0 0 ] /C [ 1 0 0 ] /H /I /Rect [ 391.263 324.125 397.981 335.497 ] /Subtype /Link /Type /Annot >> +endobj +167 0 obj +<< /A << /D (ALG@line.20) /S /GoTo >> /Border [ 0 0 0 ] /C [ 1 0 0 ] /H /I /Rect [ 371.239 204.573 377.809 215.761 ] /Subtype /Link /Type /Annot >> +endobj +168 0 obj +<< /A << /D (ALG@line.12) /S /GoTo >> /Border [ 0 0 0 ] /C [ 1 0 0 ] /H /I /Rect [ 544.04 180.663 555.344 192.035 ] /Subtype /Link /Type /Annot >> +endobj +169 0 obj +<< /A << /D (ALG@line.14) /S /GoTo >> /Border [ 0 0 0 ] /C [ 1 0 0 ] /H /I /Rect [ 487.024 85.021 498.18 96.394 ] /Subtype /Link /Type /Annot >> +endobj +170 0 obj +<< /Filter /FlateDecode /Length 4583 >> +stream +xڭ;َȑ5(a9]{`k1}`,h2zGdD&Um heF̌Ȋv]]t+h'v"R.*TZ fȮc7n^.D&]B8 ~?<쳠 ?|/#>סj|iqZlۺ8 E"طOuW4rOCY}qհ?8Tw'_v1\@ںۢ!8CqCLe8hC&F;4&]c$~F@vi&jΜs>a#,s8fY`v:ÕE8,S'S[gq(⯅DRP 4]"xߜh8σ>}Y,Vi 3B~Re:"HHgx, vZe S0o[e(`F0pM S#,Y@PE 4'ByeHSwG*8 ^wzAGkM_t)X -R(װ06쾔p~~ klh$T+QJ^y],̩`G%p@K-ikZVϋCeWUtbj'o񆅕6G̣NU# 08D] G6Pquޑ w0ĩ4TX$y6ܴY4s\՝ L0N؎?U^߁lqGPfwAY=Wu((*-5$:(SL~ђӘgpL'AXeVU3~(tX$7<i &zAвFnzxoNw:Pd1ӱ4{y,lU-ޚZZՕHP=]p)ְA @k*(~&7BYBUyclV֕UIR~*0vEwH^I?Q'}CKZ8Ltv +M*ҵK++0Rωm"tBWa=eI~Ccx4 D +br&֦ &N)$ 66Khp87N1j7T4GIm}bgV2 +ZŌǖmXE~;6LLCBC,U -qxpsu!A}II3":|WfL"Wlu0 NK+x,72\e1KCcZ Xof*6U}W}]H +}&Ao}O8/%mWL–UWPH/JkUw~` < T$H'++&p:e=q1$h svj?boˍ?݆yF9 aN^=k ’#1 +왎co97p(i +ص.9@I4 +|rg[{`:4Bg w6T&I(2ƖO\5F+܃ȕ~Hs~:ME#EB 6;ʠ8u=x-ڄċHoST<ЈR҈-5:;4LMٚtR %+yytnPp~"Bg+жMr`cKS_1YzSQD$be!2T|f5FoeϓqtCs=w7㰗6_B0(ئR@:J$k)Q#<62:/tĹ/C)h2"\){|Rrpndte/줙2w-/2 i g:ɿ4{=Zrz Eݎ nHDs ukz*/}5ߗb3]>ӹ;- b4w ]kPgLǽµ9qt&q(0 +ˎ&cؤlơtA[ +1f#秽D|c\^q 0zOI#ֿ!*Zu{CXg GTR4! IF`+=PGj)MOEOՊ.=mѦ4TY-l82;gC8gC ,&R)Oəubu,={PkRYټS-Çdj]N飗Zw& ЩohB]>ai?nroܭ@Ƶb3W&.|?mK!ƀ ]slvlqthLu (ܴ$nFBlcd Аͧ͸"?]^%o:<%*3; gz5=C3`Lqp'~2T79W1)IM+- z܃-/>{קf!>V[2uE$J]){reDwVd&;0jX=d~pٔ5W3A2\nj?oAh^v{-}\X 2j/DL&Rdp[`@6mPqTe/I{oGM2|•%3[}Ow5̅ssU>]RD@U0 m79] if!zqD7T*L3ƌx̴^Y˕[_)8Nh}X[lD).22K@*ǐᮔmæf( +iٲSRaf@ݦɢ; 䛷6 ^:+̭GHkJqwxŝH=2SHݻ:G10Fץ)"!:Յ#\=8K?lH][,h$>OC[\.Aw^)ξ+Qf 72C Js~ AC"HeŌ)%EG/1'a037gpyNH0^.aJZm sN7:-i݋>|r>`ZY#^iWUբ72 Xɼ<( B$ O[1J*,@9<"+BࣃaA )pAJ*~^BY?wz8 q5:gBGoM\b=VN4a(a17@+D>LX/WF&/@d*4˛T:ԙs2Unj>E9 +gE 3@yֽTZm>g2Xr>5˦m"}]Rs96γ$Rmxf._VEX;Ec|ʢ|] e U%rpitYk &Ҹ/L'5١j?k$NvogWj# .i/N{7v]GߔR2Yw.d!SZՖ&ٳ5JMymijFl{QDN<ŗϻ6Mdm>_Յ +endstream +endobj +171 0 obj +<< /ColorSpace 87 0 R /ExtGState 88 0 R /Font << /F198 89 0 R /F201 90 0 R /F206 92 0 R /F209 93 0 R >> /Pattern 97 0 R /ProcSet [ /PDF /Text ] /XObject << /Im2 581 0 R >> >> +endobj +172 0 obj +<< /A << /D (cite.douze2018link) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 476.44 637.196 487.698 645.45 ] /Subtype /Link /Type /Annot >> +endobj +173 0 obj +<< /A << /D (ALG@line.16) /S /GoTo >> /Border [ 0 0 0 ] /C [ 1 0 0 ] /H /I /Rect [ 543.385 515.407 554.828 526.78 ] /Subtype /Link /Type /Annot >> +endobj +174 0 obj +<< /A << /D (cite.cui2022dvabatch) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 448.993 422.002 460.25 430.257 ] /Subtype /Link /Type /Annot >> +endobj +175 0 obj +<< /A << /D (cite.zhu2024nanoflow) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 463.161 422.002 474.419 430.257 ] /Subtype /Link /Type /Annot >> +endobj +176 0 obj +<< /A << /D (algorithm.1) /S /GoTo >> /Border [ 0 0 0 ] /C [ 1 0 0 ] /H /I /Rect [ 436.407 371.945 442.939 383.133 ] /Subtype /Link /Type /Annot >> +endobj +177 0 obj +<< /A << /D (subsection.4.1) /S /GoTo >> /Border [ 0 0 0 ] /C [ 1 0 0 ] /H /I /Rect [ 485.163 217.166 503.648 227.716 ] /Subtype /Link /Type /Annot >> +endobj +178 0 obj +<< /A << /D (cite.diskann) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 434.556 158.989 445.813 167.243 ] /Subtype /Link /Type /Annot >> +endobj +179 0 obj +<< /Filter /FlateDecode /Length 6041 >> +stream +xڽ\[sD~?A.Ptaa0Nb[֞72T%-Ju˗9Z]eW_W~+qeU!4b꧟%*KzWRgi \o狛WUyUU.󫛻+\ 4Kf?z;K%r_?z+a"ͅ173]%2̊Wl6M} ys3SfR,Ҟ**VyyW:5YIkG #KJ +Q^fw?"IȲ796\ѽo;ZWW?8p dz7͋d(u2*8"O.ݻv⦺oV!s*aK9*n1^ߚɵ2q$'+W^6I :/٭TejQ43bߘ㡋{ uWy7 &&ϩD˪F %L\]pJZ3= s tˇ85K g W2UdYDfRIcHq>UMFtXorQXyі&L-Hu4['p?933*4 (d`쌘:bk:2:_]8`n(5u>HK\̂\́xrz ˃뫲bDqcGyl +xF oF`ZK}oEq]{@CUpP,>i#iyv}[4g}v--Hg +*Bh5md}I_83!ד 9mԀt?9.Zf['#]EZ  uxw/y,|w"!R +MIj`r"5}ȿO倄UDay1mJ +O^oy6ݞlC-ԴrFyA*mAJ?9 B< 1hh~0ea9%PR5w~^s`YyN7Wj8#e OKP` 'J~e +5sEq^wh3_^Mk2=bg|]|2UUca'`Vqal}J˩Q *L*NMREvQB%44۵==kæt4>ܕ4s,ܺboBtl^j~ s۴/..anԿTmt)5 t<l]&aU]/K@FpAwHPתhƅŴ004%LGxгnl.-b%ka"c~2VͿxvOw/+^R6n.INkZ h/-1-Т"׻ު ~K=;fcuM FТ8fRcT8ɻ5_˗dEv-5 Osò\_^tfy +'ZHsQ@ a}y\ЯK, Ssb%2'wOOHBֶvԮvtMm%8g{}{H(o.J,IuG'4K:pq 3_Q S,]5qk +t(Įo t !s^##{U$_/ZxFz/0K0]捵05[Rr2ͬ0v@N:z}aH[fV3'&_v!$S"M#|(@z~<j Z1F`0cM9"fҹ]N9چ1i +)7;xJMr 'P J2 zP@, +4ǪFjUbg_@rp ȑfJ^O$) n2U|?&SEkjlf=lg2u}jKQõ +UiD:zl6Ӻ%vc:|u{\rY6S Bgt*a.3$eWtw]@_=M֬߷q 'و '>WV33fބ8pM8). AX&dKm!];G.;=ǾO1:uZ.w'v0+$c]vjw!IbLIxHNQ4xφ/G1c(PJ}x.)n8y<}Kp'_usJ҂Y#gG.Ϸ.s.TBJSEә,|sl|60$hi%n"7S|N{WqޢFducyx:'tt ݖ +"Ը@ѳ OvLǖEO.wnw.80AR)nxI2D?uG>vi|Ԗ+ [TmqsA꾠+\zFbIK%*ø8: - T^E05e0~ +Fh?t*]p8@+1܀7F n I֖C- ve*SL1Wu82z14j]dv|iW_?)Hrn|L.eS|\8Ȇ-~7^Wcܪ?wc֤mbT,mbes.j G09JA%/ ^> 4ņXzjP "3ͯ/3E~zB73mX{tZZiLc3|#0@bfU3^ \rɁ$m;ΗzUUMG( ?Utҁrc t=գB֜謳v +]v)L '!ҷ=);9j:]?8t8OL/Li9*+ՑćfPT9%QKI/]Tc \TofJB'zxfe\Ї7i5p>P %Fr6[}6NO/ \>MOVTd8 ,.3eqP&*A&'jk^!`!ǫ1Z(w֧j]c6k."Uv lpXoFII&J}ZO Ž~4O*T=*;P> +;kFv*̷n<4E6t2YVFfU Z{3dmlJ{pdKbyfNJX~MC]?Eu鋄 `JܕS(95D88zU5g|}> פgNYyg5&>7.LO${9" ~ o6q^"Ntd`D ߲YD魧Uaci \$UiCpu-Gl]m]t@X_gP4C,t +dFPtUNϲ%V+OY{x㬠\X{a}wf +,EQ^hHVh.*;A.LcVxH6T]0YX+[7˸&_ 2O?d(asDQR9[7D +ddGzK#ޅ| ნ~nT\˦\vnl vuIEEĨR.WS*U%D Lj8;~v,XƗH#)Muڣ:-,Eq,aC؈~Xpje#NIQik<]abyP c`)(s΁+dz҉% vȜ[>4æ}CNF!7z;i|PGu'*B(}\$D"DxׅrW08vݝRQ1s!Da`cYhk!e|?ZVPP3>~2RzN}ԷvUĜ{W/xSۓD +(,'J,°VI)MGgxܭ2^TaQ؝>?߃aR爡L( 6?T`w={H,GxqPV*C2*\E4 +endstream +endobj +180 0 obj +<< /ColorSpace 87 0 R /ExtGState 88 0 R /Font << /F198 89 0 R /F201 90 0 R /F204 91 0 R /F206 92 0 R /F209 93 0 R /F293 579 0 R /F297 575 0 R /F301 576 0 R >> /Pattern 97 0 R /ProcSet [ /PDF /Text ] >> +endobj +181 0 obj +<< /A << /D (algorithm.1) /S /GoTo >> /Border [ 0 0 0 ] /C [ 1 0 0 ] /H /I /Rect [ 288.645 595.519 295.27 606.892 ] /Subtype /Link /Type /Annot >> +endobj +182 0 obj +<< /A << /D (section.3) /S /GoTo >> /Border [ 0 0 0 ] /C [ 1 0 0 ] /H /I /Rect [ 131.491 312.169 143.016 323.357 ] /Subtype /Link /Type /Annot >> +endobj +183 0 obj +<< /A << /D (cite.severo2025lossless) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 69.279 266.585 80.537 274.839 ] /Subtype /Link /Type /Annot >> +endobj +184 0 obj +<< /A << /D (Hfootnote.2) /S /GoTo >> /Border [ 0 0 0 ] /C [ 1 0 0 ] /H /I /Rect [ 361.053 569.59 366.926 582.038 ] /Subtype /Link /Type /Annot >> +endobj +185 0 obj +<< /A << /D (algorithm.1) /S /GoTo >> /Border [ 0 0 0 ] /C [ 1 0 0 ] /H /I /Rect [ 551.727 386.711 558.343 397.9 ] /Subtype /Link /Type /Annot >> +endobj +186 0 obj +<< /A << /D (Hfootnote.3) /S /GoTo >> /Border [ 0 0 0 ] /C [ 1 0 0 ] /H /I /Rect [ 536.728 362.801 542.601 375.249 ] /Subtype /Link /Type /Annot >> +endobj +187 0 obj +<< /A << /D (figure.caption.15) /S /GoTo >> /Border [ 0 0 0 ] /C [ 1 0 0 ] /H /I /Rect [ 526.941 326.936 538.384 338.124 ] /Subtype /Link /Type /Annot >> +endobj +188 0 obj +<< /A << /D (subsection.6.4) /S /GoTo >> /Border [ 0 0 0 ] /C [ 1 0 0 ] /H /I /Rect [ 507.569 255.205 525.407 266.393 ] /Subtype /Link /Type /Annot >> +endobj +189 0 obj +<< /A << /D (figure.caption.6) /S /GoTo >> /Border [ 0 0 0 ] /C [ 1 0 0 ] /H /I /Rect [ 499.567 219.339 506.211 230.527 ] /Subtype /Link /Type /Annot >> +endobj +190 0 obj +<< /A << /D (section*.5) /S /GoTo >> /Border [ 0 0 0 ] /C [ 1 0 0 ] /H /I /Rect [ 443.925 159.563 450.643 170.751 ] /Subtype /Link /Type /Annot >> +endobj +191 0 obj +<< /Filter /FlateDecode /Length 5910 >> +stream +xڽ\[6~Pjk5# ]NvgoxN!GK"Rv&t(N=HFNx^d_eW~_jaxt~h"c4~!T2nտ<|x^Z"LKf/6_잻c3lw+gKy_?#(4W0yKwŲ[ix}>S]N-Ë_=LV,fj dJ? 3bK !Se,= K{8 Y +ҡ86M[+bl<+\ &E}&͗D1^ُ`Zm;Xu;TNv\5ύ'?hTiR̀Yg\q]m}u_pIKJpM]ņVqQ)EʂE` zi t3U*o,.2 .UғRR_2s3@I^b%e:u[%';W pJfQ>Y-W-):ǂvcF>~hW1ĵ@,4 e9w^ +O7,ˍciw%B\R?6#$W}g@`_]bdD$ЋaKٞB]˖oT PSx;O'V +~)o!ՅsHLjn,cxGznIC業`Yc5ep'#(%AHpRv6ޚL @vպ& iJ/p1^P[ȱK OHQΙ,QVΆ0˿7Ps"=?_OH\mp`شnT)&eɔ0q. զ*zA{ؽӳ{)WGfRaK&-8Ʊ2dW%Gll"@c^n冚c7)3>\a VnnװœQlzzdh Z&Ki/>[bwo֖ \& dcO=~:9ֻ L}nIh?R (JЀ`(,]8`wjOͦvM[wGnLB5if;4]6zZ8C)'c?TXė0pm]m5f?+<CCi\/ qm~}l;BSG/8О0y:USYL'oប :$yZ`uJPY]\~Oぜ{+= +4. wOO;Rp]{q#N^AN/˸OFR&3^G[d*9(A"6=T@>NN ԮR YjkJ{V鑷lk@u n+>AMO/n= {7P)2( ~ES;xKah~[ +W"@xg*ݟ:-z? Eq*$fc;Chܽկd>N*"@jƋ8A`´Z#اWG?D|bU + =i2"M(&Ml1a5)SU[n=d\8ӱZL4fd|sOLƃz 0h,ƲWAĈ\Y) |g%h\۴3"pvіri{ޯrv?DR:ձw lv6]?r| f O'a3ੳ J+;+ +ߪpF3SbX71+pd(Ϛ-Kȏs e/_v.+?*|Q*e:Pic!lBwAY;|xX=6g}i9YLU,>b0:"T~OJ.Nʅu0q+ƒc0ANYJ~n}i4 W LdDۆ` M;mȎ0vc \cԆY9eFv`O,!8~sXF۸m< Kus8T</gZ1PJ +-h}5lTLyV1^ƂqЄ ʜ@%hGKZ()Se-4z%WVi]!QW;WS:K ƲEiZr=:.ћ|%rЊ~F7 nO0Є >Bt#Pe?H}1o{MKh@)P}3Q"XP Ҋ7I0DH}J +ML=CE{ afCT0R"jᅹ"@+DF5NĬq&lDD3$ #"Wu8lnULrP`Tq+.y +λXǺF_ұ=K(_-V6 #l`NAЛ֛k4HR'0ރ%^%xpeKR>a5ҡI UVI0 3 y(-LiGS[E~B,m> nc4% +t%2ͤ^T=&46lcWl!a*?kI6֕=$5A4)ј1lM@TN"OE!=_8 D3 {;dJ z}>d'*5 ^'-y}"\?Ky߾晋|o4{ݯ.;vZEP(ي2ґ(T}\K 2>Tٰ1=Tc98=(Ԍ*ӑ^'P+U/=XHq= Jk0r{z愙NcwצAD_Dх'64A9VBz{*Ʃ=j,Bk"Rdu#ccqCǍbvܴPEqkt./}ETKD _LC|d ,FwZ! b]sq_(cl+[:T< yA;8MZ-b.0|wOϬ.GG;3yD4=-c:ޥt `k$6npYNעo_A/8B lgKxJMѕBze: 91&~2,gp)Mxi[ϾLYȚ0F(yq^ !#8w{aGZ(мÉ&Mo#~|Z- +}B͑K 6' F^̗P9ZhĹ +(OF{"mRPelSu1(;8<6nK2z0j"Zޣj֨p}Kca9}qԘ2HO…A=+HsUfOLc?e'=Lvb\E@u,"NdƏNBv64MN'Q/ /=ֺ)r4VGlT_!*eqcլ}ug%O,-IZLp*[ +endstream +endobj +192 0 obj +<< /ColorSpace 87 0 R /ExtGState 88 0 R /Font << /F198 89 0 R /F201 90 0 R /F204 91 0 R /F206 92 0 R /F209 93 0 R /F293 579 0 R /F297 575 0 R /F301 576 0 R /F311 578 0 R /F357 582 0 R /F70 583 0 R >> /Pattern 97 0 R /ProcSet [ /PDF /Text ] /XObject << /Im3 584 0 R >> >> +endobj +193 0 obj +<< /A << /D (ALG@line.10) /S /GoTo >> /Border [ 0 0 0 ] /C [ 1 0 0 ] /H /I /Rect [ 154.261 670.824 165.334 682.197 ] /Subtype /Link /Type /Annot >> +endobj +194 0 obj +<< /A << /D (ALG@line.8) /S /GoTo >> /Border [ 0 0 0 ] /C [ 1 0 0 ] /H /I /Rect [ 131.318 646.914 137.851 658.286 ] /Subtype /Link /Type /Annot >> +endobj +195 0 obj +<< /A << /D (cite.munyampirwa2024down) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 142.054 613.375 153.312 621.539 ] /Subtype /Link /Type /Annot >> +endobj +196 0 obj +<< /A << /D (cite.hmann) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 155.581 613.285 166.839 621.539 ] /Subtype /Link /Type /Annot >> +endobj +197 0 obj +<< /A << /D (ALG@line.4) /S /GoTo >> /Border [ 0 0 0 ] /C [ 1 0 0 ] /H /I /Rect [ 284.985 599.093 291.518 610.466 ] /Subtype /Link /Type /Annot >> +endobj +198 0 obj +<< /A << /D (ALG@line.10) /S /GoTo >> /Border [ 0 0 0 ] /C [ 1 0 0 ] /H /I /Rect [ 282.653 539.318 293.939 550.506 ] /Subtype /Link /Type /Annot >> +endobj +199 0 obj +<< /A << /D (ALG@line.13) /S /GoTo >> /Border [ 0 0 0 ] /C [ 1 0 0 ] /H /I /Rect [ 80.732 503.452 91.805 514.824 ] /Subtype /Link /Type /Annot >> +endobj +200 0 obj +<< /A << /D (subsection.6.2) /S /GoTo >> /Border [ 0 0 0 ] /C [ 1 0 0 ] /H /I /Rect [ 194.264 333.052 212.426 344.37 ] /Subtype /Link /Type /Annot >> +endobj +201 0 obj +<< /A << /D (subsection.6.3) /S /GoTo >> /Border [ 0 0 0 ] /C [ 1 0 0 ] /H /I /Rect [ 99.748 285.515 117.91 296.549 ] /Subtype /Link /Type /Annot >> +endobj +202 0 obj +<< /A << /D (subsection.6.4) /S /GoTo >> /Border [ 0 0 0 ] /C [ 1 0 0 ] /H /I /Rect [ 237.511 261.321 255.673 272.639 ] /Subtype /Link /Type /Annot >> +endobj +203 0 obj +<< /A << /D (cite.together2023redpajama) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 229.036 199.836 240.294 208.09 ] /Subtype /Link /Type /Annot >> +endobj +204 0 obj +<< /A << /D (cite.izacard2021unsupervised) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 233.011 152.015 244.269 160.269 ] /Subtype /Link /Type /Annot >> +endobj +205 0 obj +<< /A << /D (cite.together2023redpajama) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 407.387 683.352 418.645 691.607 ] /Subtype /Link /Type /Annot >> +endobj +206 0 obj +<< /A << /D (table.caption.9) /S /GoTo >> /Border [ 0 0 0 ] /C [ 1 0 0 ] /H /I /Rect [ 402.158 657.206 408.876 668.578 ] /Subtype /Link /Type /Annot >> +endobj +207 0 obj +<< /A << /D (cite.shao2024scaling) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 508.485 659.442 519.742 667.696 ] /Subtype /Link /Type /Annot >> +endobj +208 0 obj +<< /A << /D (cite.izacard2021unsupervised) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 504.942 635.532 516.2 643.786 ] /Subtype /Link /Type /Annot >> +endobj +209 0 obj +<< /A << /D (cite.dubey2024llama) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 531.888 563.801 543.145 572.055 ] /Subtype /Link /Type /Annot >> +endobj +210 0 obj +<< /A << /D (cite.kwiatkowski-etal-2019-natural) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 516.126 539.89 527.384 548.144 ] /Subtype /Link /Type /Annot >> +endobj +211 0 obj +<< /A << /D (cite.joshi2017triviaqa) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 341.178 527.935 352.436 536.189 ] /Subtype /Link /Type /Annot >> +endobj +212 0 obj +<< /A << /D (cite.rein2024gpqa) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 390.553 527.935 401.811 536.189 ] /Subtype /Link /Type /Annot >> +endobj +213 0 obj +<< /A << /D (cite.yang2018hotpotqa) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 474.05 527.935 485.307 536.189 ] /Subtype /Link /Type /Annot >> +endobj +214 0 obj +<< /A << /D (cite.g5.48xlarge) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 443.478 488.174 450.103 496.339 ] /Subtype /Link /Type /Annot >> +endobj +215 0 obj +<< /A << /D (cite.mac) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 505.583 452.219 512.208 460.473 ] /Subtype /Link /Type /Annot >> +endobj +216 0 obj +<< /A << /D (section.2) /S /GoTo >> /Border [ 0 0 0 ] /C [ 1 0 0 ] /H /I /Rect [ 352.699 350.356 364.224 361.544 ] /Subtype /Link /Type /Annot >> +endobj +217 0 obj +<< /A << /D (cite.pq) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 395.757 328.683 407.015 336.937 ] /Subtype /Link /Type /Annot >> +endobj +218 0 obj +<< /A << /D (cite.schuhmann2021laion) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 410.419 328.683 421.677 336.937 ] /Subtype /Link /Type /Annot >> +endobj +219 0 obj +<< /A << /D (cite.zhu) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 425.081 328.683 436.339 336.937 ] /Subtype /Link /Type /Annot >> +endobj +220 0 obj +<< /A << /D (cite.asai2023self) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 533.022 304.862 539.647 313.026 ] /Subtype /Link /Type /Annot >> +endobj +221 0 obj +<< /A << /D (cite.shao2024scaling) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 542.981 304.772 554.239 313.026 ] /Subtype /Link /Type /Annot >> +endobj +222 0 obj +<< /A << /D (algorithm.1) /S /GoTo >> /Border [ 0 0 0 ] /C [ 1 0 0 ] /H /I /Rect [ 488.225 123.208 494.901 134.58 ] /Subtype /Link /Type /Annot >> +endobj +223 0 obj +<< /Filter /FlateDecode /Length 5587 >> +stream +xڥ\ms6_1W4\djvgX|H\W y 9R pd@~JwdE¿/_L"s+v!Dd2YUPHDRju 6jj6EEqmo~O)YT]Ut$4̺s/p5fynf<16$4fnOu QݸK)c̙Z"UmWSʢjǧIS{epM&y6`fc!jb TIbpP㊫mPw7P *LҋUS#ٳhUM@owMDS)Y +emA7U{1mmx%<}TA实H=IKd5sRYtHCc m%ѱ Cg$[a-uDY&M~i;O(GcfX02I`IeRá*Aܻ@eZ`=^<:-:ѱWbdth5d0;~Fo0U+ﱻkIZ@\i :# OU L-/MTq[g wϩ8V1>AbvͶj7xS9-u85| 7YH6|SI9p3 `@]0^(.)[=# +#y " @u$`$TlɃbb;jWZ1)H{BhF =Fjlj9F9`i*)=:Uof VŒ73;3 nju +4ꮸnXC M~CܜIa~iq@e#?mYW,8Olm8Oy?4'9kPZ+wkG_"Pkwx^P)K°&zuzb+J\b:KpWF֫d 5$D-weo.y)'6c6%Qa$MdymY=:.ҹX{%@YV펚M]p&7V< + /2 9pw ;#D#$YAs1IWh` 3#~r3LXe:^/I`4SvI/{+h{_%yԛp'=J܆)XhYרN?^x: J o!+҃k[c`?Q.HD,(BA9GHm"UqMpvbIؠNlbs^[ڷii2j8wFy>VsLnonqRHQx`TDI\eQjߐt +}9ǍUH1Dz@aP[8M6aJԤ+r(cw({P'y}L :x< +19؉9 jQEom64@NTO\I +Spx)*q^aR= +R5*Cu_-YPq}O2kJdy/P5`yAcFX<*TXǟ'*cay8`xIrt"JJ {޴mWHd5UwKrN(d0߄z.Nm-:t-)0ω~of_~*D™^A{u޴3gVy,_@_I)WQՃ?`A:>{ݖ͉w>ҷz%<([kgԘN{hb ]Am!*a d]mDf>g i" 1ۇjv>qX&9/cc8ا7x|WP.bh\Wbv73EĀ?ӫo$ jFs䘱` '(#v\ ߽-2q*(%哪jϙ) NU*ۖ<TW>w!]S߾9էAi)%J]$W/SU=a'(F>C @h09W\{NzD,; Zn@5zI# T>huʹ]En2G}h"GM5UtI5>j#Dl +T!s\ƒxDr$a+=6Qs2agG8apN7Gτ:k+wD)?aksҥIP2K!c. +>Q:|^䳡rSn{ȧ|MH ^ln!?(B=K &:}Qm_2gAwm%%ҹ?7TnԨ2?@ Ղ~נ~,Uqz_:Nуs,O~3y9WƒJg}Jf1ہ>+0xbzduBn(ػ@SLi*AdZݿy:v6yY?+k]k0:wJ5Uv +pEVۭ48tņυmJ Lp &~I0Zy:!SwR>E;C2 ԡ>5jkTL}ǎD)^G.yu0TG:2@Eն1h| 폳5gf#Z|$㤐d'@c?1,=KgbSI'34Y튎_c$rF7jb8MTꝯ-T3Sj F|j%acz- @ 4A܌ lNB.@lWZ +I{rl\)>KP1UsޟtRX`C) ]gǑUE(Hm.\e:/(uq 2b9&7Iʾ1[zp還E17v@9[ +X( p`n|#&m{N| vVO..ٯ͐&|ô/ƲN@4l4Iy0RqeqeR<(a'5,,Z}s.>U?,-̯0EzLt_tɛsJ,{;^`6mFګbjn%ŊSm#>;|-;΀T#hL"s<SJsxa +#o%NU/񧧉#J|k{Ѣ,,F+Wm|4=.Y +endstream +endobj +224 0 obj +<< /ColorSpace 87 0 R /ExtGState 88 0 R /Font << /F198 89 0 R /F201 90 0 R /F204 91 0 R /F206 92 0 R /F209 93 0 R /F212 94 0 R /F297 575 0 R /F70 583 0 R >> /Pattern 97 0 R /ProcSet [ /PDF /Text ] >> +endobj +225 0 obj +<< /A << /D (table.caption.9) /S /GoTo >> /Border [ 0 0 0 ] /C [ 1 0 0 ] /H /I /Rect [ 548.941 457.93 555.473 469.303 ] /Subtype /Link /Type /Annot >> +endobj +226 0 obj +<< /A << /D (figure.caption.8) /S /GoTo >> /Border [ 0 0 0 ] /C [ 1 0 0 ] /H /I /Rect [ 355.91 422.065 362.627 433.253 ] /Subtype /Link /Type /Annot >> +endobj +227 0 obj +<< /A << /D (cite.hnsw) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 157.187 123.123 168.445 131.377 ] /Subtype /Link /Type /Annot >> +endobj +228 0 obj +<< /A << /D (cite.aumuller2020ann) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 155.807 111.258 162.432 119.422 ] /Subtype /Link /Type /Annot >> +endobj +229 0 obj +<< /A << /D (cite.pineconeHNSW) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 164.248 111.168 175.506 119.422 ] /Subtype /Link /Type /Annot >> +endobj +230 0 obj +<< /A << /D (cite.faissGuidelines) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 544.185 359.943 555.442 368.197 ] /Subtype /Link /Type /Annot >> +endobj +231 0 obj +<< /A << /D (cite.ivf_crtpt:2023/1438) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 396.283 347.988 407.541 356.242 ] /Subtype /Link /Type /Annot >> +endobj +232 0 obj +<< /A << /D (cite.diskann) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 374.836 311.33 386.094 319.584 ] /Subtype /Link /Type /Annot >> +endobj +233 0 obj +<< /A << /D (cite.diskann) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 492.239 263.509 503.497 271.763 ] /Subtype /Link /Type /Annot >> +endobj +234 0 obj +<< /A << /D (cite.seemakhupt2024edgerag) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 483.895 179.823 495.153 188.077 ] /Subtype /Link /Type /Annot >> +endobj +235 0 obj +<< /A << /D (cite.pq) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 406.159 132.002 417.417 140.257 ] /Subtype /Link /Type /Annot >> +endobj +236 0 obj +<< /A << /D (cite.msjimmy_bm25) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 356.321 96.137 367.579 104.391 ] /Subtype /Link /Type /Annot >> +endobj +237 0 obj +<< /A << /D (cite.rekabsaz2021tripclick) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 369.485 96.137 380.743 104.391 ] /Subtype /Link /Type /Annot >> +endobj +238 0 obj +<< /Filter /FlateDecode /Length 3720 >> +stream +xZmo6_ap8UUvnuAa7 -ג7CJ,'b"<3T2%Β/dfJt{19T_]}}lŹrvu39%3XK>ZG3h b<K/YbtnJ$xԣPDdLGvomi68T\$.ۢZ4gf~D(&}"Kf<(/?O@G8KhXgnKSat@EQݡ-eKmwi(k_#OU%kRZz{T\VF_KKj;+G:);{P/k]&mqfqx_iX$&E @ŬS{K?q Ǧ +윺X{.ŊZ|6XhrYU]{ڱUoZВwEYzMk[m&f/Ӹz-nj '1>W5vt4U^Y,fW# eAݟ:BpٖKui2+(-=q_nhƆvGu'Ncw^K8& |r{YZ'mZd<0T^#@9@6E"Dx-bv<{@}8uz8).p'a} *_쫦vSןEvՋIfuFӚEM*K78:(KD_i5.ͪrl s= 9vg<6<۰lڗ6:NAF-M`'f6"&{ɅK`0s9B,IŻuX"Q3;`>>4mi?<^Fl1r5q!9 Zy,VU^\Q}CؑU0ʓ?_P~erøuu{BC o+Ж`Q B%%MZ5 ],CcUݮʦ1pʆF:j<>q2h8lXyP.6Hw^MٷBR?G.ݖے'=oH/'N9gi #Wew3@)T<'ǡiƲeZƉ>b+'2=Y!2h/2i"בDw# vsÎ y@ZgєWN)R(O@3Nnat)?vni{tnY^K[:sdmWsFf4w'f Aǡ,!ahq(ajXАD""qv)7,|u.S2ֹ 2Y&F4D`Qdc]UM+0/b-b>}^ZPJ:R̡boT2LӳpQ~,N,B/V12!k2Vpk1*n3ܭm&C[w8yHi7gs[OT)i% rI f.7HC6WZIE>y{[o6hyƺ )K/>^L9E<5r<gʺ(7O2IX'tErJ,U4)w<# 0:\(W[q80Gts&$a*Y\gO#N.|٠A[\Ƣ0/rȅuB:4'dsrGf! a4e҅q|aB*H2x (,8,27&HF]4飑іT!dvgoz2$uk,ٱKNx9 4h*zy$p1B tGQYyzRT%:b#5虏IW QI<^V.Gu&bZH J*@2FQr#|"'\Sn2<\$0}e4)3칼( b4wH6͘ +L )t2%CIbwi!WԽ_U̵3RֺP\s LL@أ!u|rb:E@ӴD,So$Ms}JU e)@X1 #7bFG37J'c0̜Smv@W(f UJMmK*hkMKwQޮݴ~~yn=Yruw.!td$7Xy%>J M1|_tzjSb@*u.}Ynq?H=1A3}jN=S%bup N*.?)@i8GkP`&ÁjjȗWceܐӓآEUꅯ C &4d vh^ |lJ҈O>1&N6rXjJ1/dcKz *iMgap &No `n`shOq*CmU;7-ƶ0!^y_ltt} ٮ6nPkvS+`0adfH`RQe+9OL8_ooC +endstream +endobj +239 0 obj +<< /ColorSpace 87 0 R /ExtGState 88 0 R /Font << /F198 89 0 R /F201 90 0 R /F204 91 0 R /F206 92 0 R /F209 93 0 R /F212 94 0 R /F297 575 0 R /F70 583 0 R >> /Pattern 97 0 R /ProcSet [ /PDF /Text ] /XObject << /Im4 585 0 R /Im5 586 0 R /Im6 587 0 R >> >> +endobj +240 0 obj +<< /A << /D (figure.caption.7) /S /GoTo >> /Border [ 0 0 0 ] /C [ 1 0 0 ] /H /I /Rect [ 69.469 691.746 76.001 702.934 ] /Subtype /Link /Type /Annot >> +endobj +241 0 obj +<< /A << /D (subsection.2.3) /S /GoTo >> /Border [ 0 0 0 ] /C [ 1 0 0 ] /H /I /Rect [ 275.967 643.925 293.967 655.113 ] /Subtype /Link /Type /Annot >> +endobj +242 0 obj +<< /A << /D (figure.caption.7) /S /GoTo >> /Border [ 0 0 0 ] /C [ 1 0 0 ] /H /I /Rect [ 287.622 608.06 294.34 619.432 ] /Subtype /Link /Type /Annot >> +endobj +243 0 obj +<< /A << /D (table.caption.9) /S /GoTo >> /Border [ 0 0 0 ] /C [ 1 0 0 ] /H /I /Rect [ 236.761 177.903 243.479 188.07 ] /Subtype /Link /Type /Annot >> +endobj +244 0 obj +<< /A << /D (figure.caption.7) /S /GoTo >> /Border [ 0 0 0 ] /C [ 1 0 0 ] /H /I /Rect [ 80.074 141.016 86.672 152.204 ] /Subtype /Link /Type /Annot >> +endobj +245 0 obj +<< /A << /D (cite.wang2021comprehensive_survey) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 376.11 696.971 387.367 705.225 ] /Subtype /Link /Type /Annot >> +endobj +246 0 obj +<< /A << /D (subsection.2.3) /S /GoTo >> /Border [ 0 0 0 ] /C [ 1 0 0 ] /H /I /Rect [ 429.027 623.059 447.512 634.376 ] /Subtype /Link /Type /Annot >> +endobj +247 0 obj +<< /A << /D (cite.ktransformers2025) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 396.909 577.509 408.166 585.674 ] /Subtype /Link /Type /Annot >> +endobj +248 0 obj +<< /A << /D (cite.li2024svdqunat) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 411.914 577.42 423.172 585.674 ] /Subtype /Link /Type /Annot >> +endobj +249 0 obj +<< /A << /D (cite.appleM1Ultra) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 413.606 529.689 424.864 537.853 ] /Subtype /Link /Type /Annot >> +endobj +250 0 obj +<< /A << /D (cite.nvidiaA10) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 427.554 529.599 438.812 537.853 ] /Subtype /Link /Type /Annot >> +endobj +251 0 obj +<< /A << /D (figure.caption.8) /S /GoTo >> /Border [ 0 0 0 ] /C [ 1 0 0 ] /H /I /Rect [ 420.899 463.601 427.441 474.79 ] /Subtype /Link /Type /Annot >> +endobj +252 0 obj +<< /A << /D (subsection.6.2) /S /GoTo >> /Border [ 0 0 0 ] /C [ 1 0 0 ] /H /I /Rect [ 349.543 403.826 367.381 415.014 ] /Subtype /Link /Type /Annot >> +endobj +253 0 obj +<< /A << /D (figure.caption.8) /S /GoTo >> /Border [ 0 0 0 ] /C [ 1 0 0 ] /H /I /Rect [ 411.666 367.96 418.291 379.148 ] /Subtype /Link /Type /Annot >> +endobj +254 0 obj +<< /A << /D (section.4) /S /GoTo >> /Border [ 0 0 0 ] /C [ 1 0 0 ] /H /I /Rect [ 520.816 96.976 532.125 108.164 ] /Subtype /Link /Type /Annot >> +endobj +255 0 obj +<< /Filter /FlateDecode /Length 5942 >> +stream +xڥ\[s6~_᪭U񒷙l!-6T&ίnt)Ԕ F׍ӫݫܼ[YW(3]]YsT:^dB]eiʦɏu =a}M7P_o[hT)8fVZfcmvy +k=^=^MyuZ41Ԙ'*er8^I׭#Z 5=Th? M[v=t0&p+)Ҵ/ ;G`Mɓ =w;zz״5~# +z0!*#BE?/TaΓX0jjUl%q +*vĝE554:OM/dK3f;~]xQdٖgM聊csߴՎJ#ab|On@2P9siR=GU4vWS_A[&+OoxۦߜJE*2*s%LaPh +-=[OD +.oj K~II~Fa6$zP~תL^SSEqGE?=m/ #2T@aM :VTt;  2,(DcA&͉QhJ兦/M ofwo$W8<;AO8JD'ЏP"=z.Yڲ2?4Ia]zU2SQ[A̹_⧅MU/CŶ*\C e`"5q8Ώyl}VasdF,er‡ Bn~.HNn5,j V6K**4K؃̞& s&`D/7C Oa,wW`~ +du۞ToњoA?Ԧu+"cut}l6&E[t\f`Ֆtq+ +oqgeӥHzg%X/љ3 DtE?һQt/QTR>6XcU.rżv}J&a%zA)mn;צϞȬ;1 :||1=Ar\ +VZg,zȠP'ɫ{]T,RmkcwWAҩϱF%~1kЊxfQS"18?.9aE&NzAʰ%|\Uw0WTtu}80l86]3 ;ajŒ.3Z +U0o#-ʲAGMN/:oPh-o@xTaGmvQ* Cs$ȓ9 =}>N,PlMk0*sݿw ԒN!=-̠?dQB Ljx= * ~8~KԲT~7P6.VptiQʳaJn0b@i'g +4>>؉ +V? lemZ-/ls* +ULt9 Odm*n8qxpjiX/%ԐDF<v=(acgpap2uZ5 1plXuFq? ԃ?9[erͲ9v_ʧY`6Y_W1lls!w3;1`\N08[7$.DH4Ty = IKpܘF[n*=ԝmO厷k`asp.|,ErXp3\D^*<, +:~a3D,SBIfO35a-acw{n0N2~$djrv(缱}JR_!BW2>TEi3r\kG=P4M}>16sʎ>"bq{}ij9B_ +swN+^մF2:q7o1-#?$ u!sҟ(l 0 t 'U첚X`.4=GFu⏖=&ybzE+޷)/Y~˖\~ڳIon'o8ϪTYK?OS$>9fA}Ʒ,G=gZރ~/:D#IGE` AV1R^:x>jm|!D3?6G/hyFCXxs ܆|bo~鄴̂ +y-GgjH5h;w.Qbz;>Y9s7^^˔qЁ?P] lv0"Рg;IK.R]ZMy6ʂJ =肃(ݖ}Ÿ6P=. S/Jg3.\fbf^ʅ Z0`cȑS'QQWYfEO!HZP_*ǼUM=9^,NE7}yd4@$Ob93޿ +b6 `.ƠA %…}R% 4>6iŢD Ў[Kٍ{s킓 U h25>#_ĂA K#jE +Y>\4N:o^^ (e̳3[Ipft9QZ 6KeNw$8) ՜XmSWL&p踈(ЙR@3 +;O1Q2D;me48ePk~ftИVwL?S v >"f9e}j0)ا19sd'ecbR5t.C;Jϭ,/Vde G\bv<֡52OYUN17a5tKh~ ޏ;Ӥ-dFq y OI).s r@h~QJN#"BWsRCYS.)7=wPPܜB=fuP8SXrOQp7Bq<֥8Cd&'>J2_Z\Nt 63zAJ1]J;< Tĥ=YR_!Zj:s(GGZP5f\ +<:~%(X}Wںm;RPmMT$e.SںXs"[19|,( ?{^h8C 1MY|JEK yx_Sb'@.XaVx$/γg/,fSYR TY8s\nJa%,WPjA:ˡMIlnh0*S!+YH>Sǚ*8ZLY¾B:t]|_T,hGi7<̈́#:{LD]? uyCD3~3ZcИi8薠DT8;.C>6s9uLS:BpJh Sƈύ)7Qo1P#jm +ʩUr9-!kflh;{%>X̯<`4gxrE҅ }4/|jns\g5;[7xE`zf+^xxQI+rre+y Vf'L{+ ʆfƲ}r&3d'gm1Jwd7Gkt߳gK {fE'zNrz ]ƿp^ @gWZx S Az;P9u*" +i>ͥIl?MatKn?yy?Qb2Bi&0fts|o8tןQir Rp"e`􌌛 ͳO#XNoKs)OyF/Gz{$Uhsl ,h A9p~9(m }M(1#?1Cz+[ü>gYcd`- +B mosw>$[5))9f&q&@xj_6I/o^p{ +endstream +endobj +256 0 obj +<< /ColorSpace 87 0 R /ExtGState 88 0 R /Font << /F198 89 0 R /F201 90 0 R /F204 91 0 R /F206 92 0 R /F209 93 0 R >> /Pattern 97 0 R /ProcSet [ /PDF /Text ] >> +endobj +257 0 obj +<< /A << /D (section.4) /S /GoTo >> /Border [ 0 0 0 ] /C [ 1 0 0 ] /H /I /Rect [ 213.123 580.237 224.648 591.425 ] /Subtype /Link /Type /Annot >> +endobj +258 0 obj +<< /A << /D (subsection.4.1) /S /GoTo >> /Border [ 0 0 0 ] /C [ 1 0 0 ] /H /I /Rect [ 226.869 556.327 244.707 567.61 ] /Subtype /Link /Type /Annot >> +endobj +259 0 obj +<< /A << /D (subsection.4.2) /S /GoTo >> /Border [ 0 0 0 ] /C [ 1 0 0 ] /H /I /Rect [ 115.698 543.136 133.86 555.56 ] /Subtype /Link /Type /Annot >> +endobj +260 0 obj +<< /A << /D (table.caption.9) /S /GoTo >> /Border [ 0 0 0 ] /C [ 1 0 0 ] /H /I /Rect [ 164.221 352.353 170.897 363.541 ] /Subtype /Link /Type /Annot >> +endobj +261 0 obj +<< /A << /D (subsection.4.1) /S /GoTo >> /Border [ 0 0 0 ] /C [ 1 0 0 ] /H /I /Rect [ 277.432 268.334 295.27 279.522 ] /Subtype /Link /Type /Annot >> +endobj +262 0 obj +<< /A << /D (figure.caption.11) /S /GoTo >> /Border [ 0 0 0 ] /C [ 1 0 0 ] /H /I /Rect [ 175.522 85.021 182.184 96.209 ] /Subtype /Link /Type /Annot >> +endobj +263 0 obj +<< /A << /D (figure.caption.11) /S /GoTo >> /Border [ 0 0 0 ] /C [ 1 0 0 ] /H /I /Rect [ 489.4 541.947 495.97 553.135 ] /Subtype /Link /Type /Annot >> +endobj +264 0 obj +<< /A << /D (table.caption.9) /S /GoTo >> /Border [ 0 0 0 ] /C [ 1 0 0 ] /H /I /Rect [ 349.085 352.02 355.803 363.208 ] /Subtype /Link /Type /Annot >> +endobj +265 0 obj +<< /A << /D (figure.caption.11) /S /GoTo >> /Border [ 0 0 0 ] /C [ 1 0 0 ] /H /I /Rect [ 390.24 256.379 396.958 267.567 ] /Subtype /Link /Type /Annot >> +endobj +266 0 obj +<< /A << /D (section.5) /S /GoTo >> /Border [ 0 0 0 ] /C [ 1 0 0 ] /H /I /Rect [ 547.472 256.379 558.996 267.567 ] /Subtype /Link /Type /Annot >> +endobj +267 0 obj +<< /A << /D (section.5) /S /GoTo >> /Border [ 0 0 0 ] /C [ 1 0 0 ] /H /I /Rect [ 529.801 144.797 540.951 155.985 ] /Subtype /Link /Type /Annot >> +endobj +268 0 obj +<< /A << /D (figure.caption.12) /S /GoTo >> /Border [ 0 0 0 ] /C [ 1 0 0 ] /H /I /Rect [ 386.257 120.887 392.818 132.075 ] /Subtype /Link /Type /Annot >> +endobj +269 0 obj +<< /A << /D (cite.skewmanohar2024parlayann) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 471.409 75.303 482.667 83.557 ] /Subtype /Link /Type /Annot >> +endobj +270 0 obj +<< /A << /D (cite.munyampirwa2024down) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 485.357 75.392 496.615 83.557 ] /Subtype /Link /Type /Annot >> +endobj +271 0 obj +<< /A << /D (cite.hmann) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 499.305 75.303 510.563 83.557 ] /Subtype /Link /Type /Annot >> +endobj +272 0 obj +<< /Filter /FlateDecode /Length 4154 >> +stream +xڥ[K6WZj/>'gdrsH+ɯnt(JԖgutzZ%o_%J^yW?^ř"/}pW| ߈"_qtuJ"W"LabxWooΣjFEiLϷkiۆy-/ֿod"!2B97NvÉ-Zg{Zgzw2]d+'N冦iO}}u4z4c:U\?7"TƐcaVzzkGy'U n< e,,?qx(,[8[ڕ}U}LԷTk_BW+θ-{) @mh23u->-CC7^pz[zcQykK@.|bIfwTUڅ&i< +ڋP@2Ą vQΠùaԹln8Vt\gjx?Uðˢ +;Iơ: +:eKes!#i"LS +)]0{"qawR͔X,K/MUd.D^1n*e39ޑCF` +""Qelol@y|$tfJ¡2~%*IqLD_'c.& [6Od4_x{Pa|TuS*pU@CC݇j,Pݤ=E7_;ئ9,j=/Y)Dt4Q9(0q ^@/w gihQ뵐SCosSN;[L HT`M8HI!" 0/ckwNShFiGeTqmIɷQ ,.:[v<*\mLmS4W_ނ'T8'hm*bfA;mn9z[wyf}^?p$'hT`+{\c0IŽXԱ2K%gaǘkcwT]SF 3J y(km)PZZp,kd8vYN#y3| ȝx}M}{c@KZȋR'C_\ЂE+m'9 E!P.KwB/id +KA"Idi'+4h +by)t|g3.gXE/6hƑAPax!S%; +[ڌ hG }S8t\|n`+P1vLQC=6nwO(oY1#-HA$zbzG.vXw5RG[ǵ,\6h}X4s{`h6TD &VIO*Eѻs +e_l :y'y_,_Ot,\b`Z/9Qn^$)RW cO@Г Ȫ{xc6I>cH{ zH+,=U5 2A*B9pw 1GeRm|/jШyk&'r~vqAHEٗvu_2c!-XxΖ9%<0S$H?$zPvG'7AWO(rB `9 99,T0gmJ;N +g'MW]6,&2 +=oܜ<78eƠMIxDܻ=dc>Ra[] @%bm2?icK3<|⭾QaU!~I{-_wxË6F>r_\2w,6:sQufRAѫ5ʈd`\c}bfYs1Kn +3Jb{ޏ?SO7M~}:P5OLU*G 9 &&u[o0) ź]W͕O?*y . VsHY?z2DA^po2AHCkHg̜PQC7%416cʧ +Ixa,+0 +S<)hNu4%i99/#dƴяvѠA0u3y#SA8ͅoq:ձa8Q&Ȃnds"_(0ql~~ڍ KIW=뙆8$>M'XogT7ǹ +$_׹X^ӧǓ0ƞA*Q;E/O1aDE +X;ʮk\]u t,1ykzte\΋ѱ *bpb#f?LƷP*%RB~-7S8%zɸ㖸+tiQp&Lj~VQH x̧}C̝:T( C}Md2vS%St3;$]OB78N]h7#XQ?xXnԎRWɼ]K޵JU֜b0m)6s['?~EA? +,#iUAinKokoS4{jQH)%=;f7_OȕP.̺w{`~vw1]H7ku7o!O.Lh6HFn@cC2 |} +endstream +endobj +273 0 obj +<< /ColorSpace 87 0 R /ExtGState 88 0 R /Font << /F198 89 0 R /F201 90 0 R /F204 91 0 R /F206 92 0 R /F209 93 0 R /F212 94 0 R >> /Pattern 97 0 R /ProcSet [ /PDF /Text ] /XObject << /Im7 588 0 R /Im8 589 0 R /Im9 590 0 R >> >> +endobj +274 0 obj +<< /A << /D (figure.caption.15) /S /GoTo >> /Border [ 0 0 0 ] /C [ 1 0 0 ] /H /I /Rect [ 161.693 463.601 173.137 474.789 ] /Subtype /Link /Type /Annot >> +endobj +275 0 obj +<< /A << /D (subsection.6.2) /S /GoTo >> /Border [ 0 0 0 ] /C [ 1 0 0 ] /H /I /Rect [ 191.011 427.736 209.172 439.108 ] /Subtype /Link /Type /Annot >> +endobj +276 0 obj +<< /A << /D (cite.li2023towardsgte) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 128.603 418.017 139.861 426.271 ] /Subtype /Link /Type /Annot >> +endobj +277 0 obj +<< /A << /D (figure.caption.13) /S /GoTo >> /Border [ 0 0 0 ] /C [ 1 0 0 ] /H /I /Rect [ 287.895 391.87 294.427 403.058 ] /Subtype /Link /Type /Annot >> +endobj +278 0 obj +<< /A << /D (section.3) /S /GoTo >> /Border [ 0 0 0 ] /C [ 1 0 0 ] /H /I /Rect [ 236.477 316.134 247.983 327.609 ] /Subtype /Link /Type /Annot >> +endobj +279 0 obj +<< /A << /D (figure.caption.15) /S /GoTo >> /Border [ 0 0 0 ] /C [ 1 0 0 ] /H /I /Rect [ 254.783 168.707 266.041 180.162 ] /Subtype /Link /Type /Annot >> +endobj +280 0 obj +<< /A << /D (subsection.4.2) /S /GoTo >> /Border [ 0 0 0 ] /C [ 1 0 0 ] /H /I /Rect [ 196.347 120.887 214.832 132.075 ] /Subtype /Link /Type /Annot >> +endobj +281 0 obj +<< /A << /D (cite.ivf) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 407.958 182.899 419.216 191.153 ] /Subtype /Link /Type /Annot >> +endobj +282 0 obj +<< /A << /D (cite.hnsw) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 523.634 182.899 534.892 191.153 ] /Subtype /Link /Type /Annot >> +endobj +283 0 obj +<< /A << /D (cite.hnsw) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 500.509 158.989 511.767 167.243 ] /Subtype /Link /Type /Annot >> +endobj +284 0 obj +<< /A << /D (cite.nsg) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 543.021 159.078 554.279 167.243 ] /Subtype /Link /Type /Annot >> +endobj +285 0 obj +<< /A << /D (cite.diskann) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 355.869 147.034 367.127 155.288 ] /Subtype /Link /Type /Annot >> +endobj +286 0 obj +<< /A << /D (cite.sptag) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 422.973 147.034 429.599 155.288 ] /Subtype /Link /Type /Annot >> +endobj +287 0 obj +<< /A << /D (cite.nssg) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 432.316 147.034 443.573 155.288 ] /Subtype /Link /Type /Annot >> +endobj +288 0 obj +<< /A << /D (cite.hcnng) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 446.29 147.123 457.548 155.288 ] /Subtype /Link /Type /Annot >> +endobj +289 0 obj +<< /A << /D (cite.wang2021comprehensive_survey) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 495.712 111.168 506.97 119.422 ] /Subtype /Link /Type /Annot >> +endobj +290 0 obj +<< /A << /D (cite.baranchuk2019towards) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 357.883 87.258 364.508 95.512 ] /Subtype /Link /Type /Annot >> +endobj +291 0 obj +<< /A << /D (cite.zhang2020learning) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 366.805 87.258 378.063 95.512 ] /Subtype /Link /Type /Annot >> +endobj +292 0 obj +<< /Filter /FlateDecode /Length 4162 >> +stream +xڥZKW*le c\bK˱}tij=`A6)Պ<{zu7f-^=&batebau&_vxB+hw"[|3䥗yKQ"-ՋҦ +Tdf=yJȤq_y)hsZI;҃?rR);s%3g-LC56,a8s)K^fx,>[ ]8hL\ӪwDՊmI!ci:81IX2&mcl;.ՌX +B:9e!.ձp۽snĩ]G}0mEA;&^3=7]bS./'$4MJTRiUJrK?Jn|hJ<֫sp,+jEf.?/}2ojwf\^* ,ywZE6qɻF[{vr + +q{]p^V=kM!XQIk6 cC/ω*:"-JqC74Zӓŀ"w^F +#)U\M֡rsbUhӲds%:t +Ђ+8}C;1 Q9 `J $mϪ|,QPAR`8j~j)P nʱ~E Rh}b[:]mۖG꜋M4 6qWX"- +߼/ϟviv9 UkvUiZՃ{4\- 21FSNvO%xu +7砩i| XgjhE<(-ManӘA +!E*OmD$EN+*#-Q6u@i·g(ϕ n~.%ƽ8l zŦrP#{k!!0M;@C[v(:Rټ]SKn/ ձGXhA. +;l97|`+Le< G9?Lr:8z`n,5h5Ft=Ǻ ]<βTI1֜ 5uYt~uy H@1XG ;ҺfW|PE* j:Xdr">8R }f'7%pGQ[܋:G_cįܦZ"=.쒝̩f<DbLW#@^p7 +x+/Vp`;3tҒưjtii+77FR+,5c(x$ W.{r#xVk||w@E)N5e6AA jAjQlԗ9GUȧ4 +Y'*_aTɧ%iPƕϖ`cż&jc?%pk.3`lyCe`'51М.ZWwAzX50:O7vM[g:O8e1UY5?^8h\`>Gr P(-&U*M\V oN>GxQcz@%TgǏqCc_T-D0y-]0"i0XddRAIVv[m}]SpSGMsRWEf}~ /T$)rf6?T]7ru%o"7IWa?]94jJ.SLŸ96bp!q$츤3ڰAm!n>!U^z۔\33ۺxiy 3 +p0?E;iWV9!\8*2\By*p]櫺Cv ɯtnbBZ+ fVÊe+zx%5kU&Sm RI0&%H`ln6|xɯ/_6SwxxQRFѾԭLF1=:Ӏ'Rh bRDaqBTZ6&1pʛ\H=Xvs[g}&C}ҧR ?j_8Z(*׳U c%- mAr␿7jymd`d33s#pu}|Hy*Z;,)g0*Xb +~ECY-OSv)R[(wgA`?%$y~q- @2~A!{j<L5'\>uQV\DKF> /Pattern 97 0 R /ProcSet [ /PDF /Text ] /XObject << /Im10 591 0 R /Im11 592 0 R /Im12 593 0 R /Im13 594 0 R >> >> +endobj +294 0 obj +<< /A << /D (cite.diskann) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 280.166 685.016 291.423 693.27 ] /Subtype /Link /Type /Annot >> +endobj +295 0 obj +<< /A << /D (cite.wang2024starling_sigmod) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 75.624 649.151 86.881 657.405 ] /Subtype /Link /Type /Annot >> +endobj +296 0 obj +<< /A << /D (cite.fast25) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 133.353 637.196 144.611 645.45 ] /Subtype /Link /Type /Annot >> +endobj +297 0 obj +<< /A << /D (cite.tatsuno2024aisaq_gps_ref46) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 279.742 625.24 291 633.494 ] /Subtype /Link /Type /Annot >> +endobj +298 0 obj +<< /A << /D (cite.LM-DiskANN) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 115.668 613.285 126.926 621.539 ] /Subtype /Link /Type /Annot >> +endobj +299 0 obj +<< /A << /D (cite.seemakhupt2024edgerag) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 280.29 601.33 291.548 609.584 ] /Subtype /Link /Type /Annot >> +endobj +300 0 obj +<< /A << /D (cite.pq) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 243.921 517.644 255.179 525.898 ] /Subtype /Link /Type /Annot >> +endobj +301 0 obj +<< /A << /D (cite.gao2024rabitq_gps_ref15) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 170.627 505.689 181.885 513.943 ] /Subtype /Link /Type /Annot >> +endobj +302 0 obj +<< /A << /D (cite.work-in-progress) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 236.776 374.182 248.034 382.436 ] /Subtype /Link /Type /Annot >> +endobj +303 0 obj +<< /A << /D (cite.ryan2024enronqa) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 251.005 374.182 262.263 382.436 ] /Subtype /Link /Type /Annot >> +endobj +304 0 obj +<< /A << /D (cite.wang2024mememo) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 265.235 374.182 276.493 382.436 ] /Subtype /Link /Type /Annot >> +endobj +305 0 obj +<< /A << /D (cite.zerhoudi2024personarag) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 279.464 374.182 290.722 382.436 ] /Subtype /Link /Type /Annot >> +endobj +306 0 obj +<< /A << /D (cite.yin2024devicers) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 215.298 362.227 226.556 370.481 ] /Subtype /Link /Type /Annot >> +endobj +307 0 obj +<< /A << /D (cite.snap_cvpr2023_tutorial) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 69.987 314.406 81.244 322.66 ] /Subtype /Link /Type /Annot >> +endobj +308 0 obj +<< /A << /D (cite.nvidia2024blackwell) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 541.775 411.465 553.033 419.63 ] /Subtype /Link /Type /Annot >> +endobj +309 0 obj +<< /A << /D (cite.nvidiaA10) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 495.356 399.421 506.613 407.675 ] /Subtype /Link /Type /Annot >> +endobj +310 0 obj +<< /Filter /FlateDecode /Length 5314 >> +stream +xڥ[K㶱ϯ]s$ >c#}3;$`Kl)Jɞt~}Pܴ"dP +fw||7}yv<)E7&Q0nRH7w-?/Ŧ\pl\TM\r"ef H\řƿ}l/w݇8_U&D??ou>hMg"bzxp2}L* ]Rd~3}IfBw8؝U>lf)!c^Uwdsl]>v`Zq,6遢|~M$oBqOoϤI<&F0o4ޭ9RS-d-b-0rO݁y0Rcw.N<tVKvlY0sܞ$%/j6z9NaG`<4]K7hjWtձ4+γba"zP o4}u2#̌w+Zѱ~FHMU6+rM%ڝkWv{ W"Qhg2(` .} qJWrC*-=cvH {r5rAtv2_0^jkM Pҗ6dQp:HR9Aiےhce/Vrm_!I:>{G< +v gjTe$U>وWnZ!asފ-Ar$fN\hx=}?k@:T$3/}ygѷŮB۷`FZ:b݉Ms&IDYРjb# Ogɘjc.{n`GxW3 SɘEMuxbp SIpݕ X1N#挲I3aFVɽhh/_>>WAqyiŊV=vU]PLc?x"#P$¤9ע骢čd:ޘea%IuuEP.KfΩlJt[yu#n0|xn$ܩ:~x ne׬9 'Kѡh6@ӌmKL['[ۀMQ^ Nq!ݞZ=4݅O^H`y܍EЀ E NzvJc:'3ڝHh<_=D[?g9؎q~ij[BBt0zɲ90:uT4ay RMݞ6z*xZǕJФcu<eL8+~h-93h9I;tE5㴪Xm`RvZzhL=}mF٭[rݮ.glbXh/Ce+ XBXeA቎L(ha:)S̗]ٵfoz@v,i?' c"thȻ6k6&ygL6 -c2lxC!Yx>+adꬽ::r,%9gwvJq2Ҍ/kth\\ Bh7* L%Cq`x, E4qy- mc= ':VBJEeC|K&FrN5Zˠ{KM'SBm&c*~j~vNm]sks!.HӹzZ_nͭk] E|KSy.+x /ֵtObp ł.X2V0hGR.6:; +B.]Oz2ie͓y+cX #[;sг̰"5j6"F[C'$ m[E4Z߫R_UPx4 F07Rcp}w$o< M:|37rƓo 'y`%rYB!Y4^ͫ11qn$`SCm{6*|vt=+#rIa)OCV ֕u8+e>VޱXutV%4+*TdM7 +!!f(nSe |j)Q{}?KTh`GFk%:rVo+5JzhuHH<-ej.seD?i ss=2'r +?f`Na$]w$W,\48|)ޠ;FpHDc9T\s +&vzi9?e<ܬAb31w[ )C"<Iow>g_|*=H $] N(YB?^/P$zki c(_ȱj&cކobCŒC HEq>A5Sxxiy#D/$ Gb41D,0> ~ *HDeE$":s)m귾fE*6D[u4T#I25CQã *֔bB>K0 MO(a"YsW&`@lZ(#V?Ӊ-HRN]j}AM$htɋ3 sN60:%A-3 ^냘sPz~H9ΤHPY,Ք2xm8'[n_9PbXPÆ\Ϭoؼ4<!~ =i{ͭ.gZe 5u F(s?)qVBۗ_;ʐ͆ J"!'ͽEvv:HbK:4킚w2@E>}NAV#`Ɣau$ep-6Ou9bzEPU)KX&RgeJ%pR|Lkgw +&3^汆Zc4Bo䑻|"49aDSr ,a_V]2q.bD$&*J(e>iyl ^[_X:7*-Jh9*k\9wg9%nxمld61^RBߑ9 +DЉI4z[O #  +U!2 +s:7D=Ls̓B23C,6eXd+慠8[`cͻrNa~*E_fQR 4;`f.2.6T&qM'&Y(>+|-37m1 +ú=@apPyOFx৅̹gXM2|jtf_J Zl7]d*AW-⾋D>CC3Wc,Z\Y4v>˄ XӺJ|>hd?+_B4cze_+NF9u3Y-D5hnҝ2;nnՕ:O6jHd:o!'KECAHlRfw$qeF4f\-(,rQT>ӣ)[n-$R-gPIb.$/9 6CD r~#/F0(q@OymUMYef0P\Rpe㫻Ψ5c:J~IŞ-s'0褨F/g%Ϯ,x6H.lT:j¾ UqeO棡e#Ŗ*¡H:xFÐLsYř&iM~ a7-EB xLBG)}6/Vg.+mً iޞ?D/ %BY o.x\b?xoYh 2>N088ELK{ UᏳtDK +3w:k1xw3 䙇p 6-h]2֎mm0(6>蜴I>|WýK9NXcֲB q({臞`yùT_yvWaR%CX`-J]=8pG6Hs6sMYPs!gnup;%G.nNN@Ypz.dVP֎ukXJR|M@8IizR3e`L}^PߞO,.&*5vuGIMsv*faKCi7-\m2֮8py{Rr=k,t]᥶D%_bEWdJOD3_dVpfP623BlSPA#J#ޑƳC|A4?(#Ws ؾYphTЦ6RY哲 [Kz,brD yia`nCGg>YKoKT? _jTU2O=bJFnwbE'ruF[F_q=1KR:@t1B4u +x0> +H!0Io ЮX ߽ng +endstream +endobj +311 0 obj +<< /ColorSpace 87 0 R /ExtGState 88 0 R /Font << /F198 89 0 R /F201 90 0 R >> /Pattern 97 0 R /ProcSet [ /PDF /Text ] >> +endobj +312 0 obj +<< /A << /S /URI /Type /Action /URI (https://aws.amazon.com/ec2/instance-types/mac/) >> /Border [ 0 0 0 ] /C [ 0 1 1 ] /H /I /Rect [ 195.914 491.999 295.041 501.464 ] /Subtype /Link /Type /Annot >> +endobj +313 0 obj +<< /A << /S /URI /Type /Action /URI (https://aws.amazon.com/ec2/instance-types/mac/) >> /Border [ 0 0 0 ] /C [ 0 1 1 ] /H /I /Rect [ 70.076 481.997 137.108 491.501 ] /Subtype /Link /Type /Annot >> +endobj +314 0 obj +<< /A << /S /URI /Type /Action /URI (https://aws.amazon.com/ec2/instance-types/g5) >> /Border [ 0 0 0 ] /C [ 0 1 1 ] /H /I /Rect [ 195.914 472.074 295.041 481.538 ] /Subtype /Link /Type /Annot >> +endobj +315 0 obj +<< /A << /S /URI /Type /Action /URI (https://aws.amazon.com/ec2/instance-types/g5) >> /Border [ 0 0 0 ] /C [ 0 1 1 ] /H /I /Rect [ 70.076 462.071 128.532 471.576 ] /Subtype /Link /Type /Annot >> +endobj +316 0 obj +<< /A << /S /URI /Type /Action /URI (https://techcommunity.microsoft.com/blog/azure-ai-services-blog/announcing-cost-effective-rag-at-scale-with-azure-ai-search/4104961) >> /Border [ 0 0 0 ] /C [ 0 1 1 ] /H /I /Rect [ 108.617 392.333 295.041 401.69 ] /Subtype /Link /Type /Annot >> +endobj +317 0 obj +<< /A << /S /URI /Type /Action /URI (https://techcommunity.microsoft.com/blog/azure-ai-services-blog/announcing-cost-effective-rag-at-scale-with-azure-ai-search/4104961) >> /Border [ 0 0 0 ] /C [ 0 1 1 ] /H /I /Rect [ 70.076 382.37 295.041 391.715 ] /Subtype /Link /Type /Annot >> +endobj +318 0 obj +<< /A << /S /URI /Type /Action /URI (https://techcommunity.microsoft.com/blog/azure-ai-services-blog/announcing-cost-effective-rag-at-scale-with-azure-ai-search/4104961) >> /Border [ 0 0 0 ] /C [ 0 1 1 ] /H /I /Rect [ 70.076 373.101 130.868 381.752 ] /Subtype /Link /Type /Annot >> +endobj +319 0 obj +<< /A << /S /URI /Type /Action /URI (https://github.com/togethercomputer/RedPajama-Data) >> /Border [ 0 0 0 ] /C [ 0 1 1 ] /H /I /Rect [ 189.956 282.744 295.041 292.101 ] /Subtype /Link /Type /Annot >> +endobj +320 0 obj +<< /A << /S /URI /Type /Action /URI (https://github.com/togethercomputer/RedPajama-Data) >> /Border [ 0 0 0 ] /C [ 0 1 1 ] /H /I /Rect [ 70.076 272.821 148.601 282.138 ] /Subtype /Link /Type /Annot >> +endobj +321 0 obj +<< /A << /S /URI /Type /Action /URI (https://github.com/kvcache-ai/ktransformers) >> /Border [ 0 0 0 ] /C [ 0 1 1 ] /H /I /Rect [ 70.076 242.893 220.181 252.25 ] /Subtype /Link /Type /Annot >> +endobj +322 0 obj +<< /A << /S /URI /Type /Action /URI (https://www.cpu-monkey.com/en/igpu-apple_m1_ultra_64_core) >> /Border [ 0 0 0 ] /C [ 0 1 1 ] /H /I /Rect [ 236.137 232.938 295.041 242.435 ] /Subtype /Link /Type /Annot >> +endobj +323 0 obj +<< /A << /S /URI /Type /Action /URI (https://www.cpu-monkey.com/en/igpu-apple_m1_ultra_64_core) >> /Border [ 0 0 0 ] /C [ 0 1 1 ] /H /I /Rect [ 70.076 222.968 221.897 232.325 ] /Subtype /Link /Type /Annot >> +endobj +324 0 obj +<< /A << /S /URI /Type /Action /URI (https://github.com/facebookresearch/faiss/wiki/Indexing-1T-vectors) >> /Border [ 0 0 0 ] /C [ 0 1 1 ] /H /I /Rect [ 212.136 83.491 295.041 92.995 ] /Subtype /Link /Type /Annot >> +endobj +325 0 obj +<< /A << /S /URI /Type /Action /URI (https://github.com/facebookresearch/faiss/wiki/Indexing-1T-vectors) >> /Border [ 0 0 0 ] /C [ 0 1 1 ] /H /I /Rect [ 70.076 73.528 214.088 82.873 ] /Subtype /Link /Type /Annot >> +endobj +326 0 obj +<< /A << /S /URI /Type /Action /URI (https://arxiv.org/abs/2401.08281) >> /Border [ 0 0 0 ] /C [ 0 1 1 ] /H /I /Rect [ 353.9 677.264 391.001 686.769 ] /Subtype /Link /Type /Annot >> +endobj +327 0 obj +<< /A << /S /URI /Type /Action /URI (https://arxiv.org/abs/2401.08281) >> /Border [ 0 0 0 ] /C [ 0 1 1 ] /H /I /Rect [ 418.466 677.264 523.863 686.769 ] /Subtype /Link /Type /Annot >> +endobj +328 0 obj +<< /A << /S /URI /Type /Action /URI (https://arxiv.org/abs/1907.06146) >> /Border [ 0 0 0 ] /C [ 0 1 1 ] /H /I /Rect [ 477.234 567.683 514.265 577.18 ] /Subtype /Link /Type /Annot >> +endobj +329 0 obj +<< /A << /S /URI /Type /Action /URI (https://arxiv.org/abs/1907.06146) >> /Border [ 0 0 0 ] /C [ 0 1 1 ] /H /I /Rect [ 539.045 567.683 558.996 577.18 ] /Subtype /Link /Type /Annot >> +endobj +330 0 obj +<< /A << /S /URI /Type /Action /URI (https://arxiv.org/abs/1907.06146) >> /Border [ 0 0 0 ] /C [ 0 1 1 ] /H /I /Rect [ 334.031 557.713 420.299 567.058 ] /Subtype /Link /Type /Annot >> +endobj +331 0 obj +<< /A << /S /URI /Type /Action /URI (https://doi.org/10.14778/3303753.3303754) >> /Border [ 0 0 0 ] /C [ 0 1 1 ] /H /I /Rect [ 536.81 527.833 558.996 537.329 ] /Subtype /Link /Type /Annot >> +endobj +332 0 obj +<< /A << /S /URI /Type /Action /URI (https://doi.org/10.14778/3303753.3303754) >> /Border [ 0 0 0 ] /C [ 0 1 1 ] /H /I /Rect [ 334.031 517.862 450.227 527.207 ] /Subtype /Link /Type /Annot >> +endobj +333 0 obj +<< /A << /S /URI /Type /Action /URI (https://doi.org/10.1145/3589282) >> /Border [ 0 0 0 ] /C [ 0 1 1 ] /H /I /Rect [ 389.431 478.011 493.353 487.516 ] /Subtype /Link /Type /Annot >> +endobj +334 0 obj +<< /A << /S /URI /Type /Action /URI (https://doi.org/10.1145/3600006.3613134) >> /Border [ 0 0 0 ] /C [ 0 1 1 ] /H /I /Rect [ 445.313 378.385 558.996 387.742 ] /Subtype /Link /Type /Annot >> +endobj +335 0 obj +<< /A << /S /URI /Type /Action /URI (https://doi.org/10.1145/3600006.3613134) >> /Border [ 0 0 0 ] /C [ 0 1 1 ] /H /I /Rect [ 334.031 370.256 354.371 377.19 ] /Subtype /Link /Type /Annot >> +endobj +336 0 obj +<< /A << /S /URI /Type /Action /URI (https://doi.org/10.1145/276698.276876) >> /Border [ 0 0 0 ] /C [ 0 1 1 ] /H /I /Rect [ 419.12 318.609 543.326 327.966 ] /Subtype /Link /Type /Annot >> +endobj +337 0 obj +<< /A << /S /URI /Type /Action /URI (https://doi.org/10.1109/TPAMI.2010.57) >> /Border [ 0 0 0 ] /C [ 0 1 1 ] /H /I /Rect [ 514.876 218.975 558.996 228.487 ] /Subtype /Link /Type /Annot >> +endobj +338 0 obj +<< /A << /S /URI /Type /Action /URI (https://doi.org/10.1109/TPAMI.2010.57) >> /Border [ 0 0 0 ] /C [ 0 1 1 ] /H /I /Rect [ 334.031 209.02 418.06 218.138 ] /Subtype /Link /Type /Annot >> +endobj +339 0 obj +<< /A << /S /URI /Type /Action /URI (https://doi.org/10.1162/tacl_a_00276) >> /Border [ 0 0 0 ] /C [ 0 1 1 ] /H /I /Rect [ 432.442 99.423 552.503 108.935 ] /Subtype /Link /Type /Annot >> +endobj +340 0 obj +<< /Filter /FlateDecode /Length 8369 >> +stream +x|Rȶ +^nl 堔Tjj {jddhLYm{"rּVfwW~__^fF/{QP퉽X%" +#]-~ ^ڛ|)=-ݟTUZ*XA.|? 9Sc έkVR6_}}2dadpF7K{7`=FB'n4RծmO`?:9^ܭZuFlEKLH57^?/uw]TEuvۊ}( cl`vd9%=˛{S +gYŶ%ih9l wH.Bc԰ݵmi p6vp;a {;N>~䤝΋ng7PUGOC;-iy^Pߕ_Veɩ|q E QYWېSoVKh쎫_P +E͐ e<0JijVSk_t8μsY~!CCҖ m y9"~k,akڥm8Dۀ8 +_3"usr ,tN:2fvytvHMW JC7vIpodPx}8e@W@_yyeyi-]ǭ#QQb7T$Ά@iy/]Ʋ)*[̶+4œ[$cNuXs}Z=M$ʆ-ڧVB*3?Q*EmE;DqcN ^*Fzd[tmHf.DH_# @%!~E3s%vՔY`Ǜy0K*c7WIsHTn 48_h?*VVs4iKOqqeGX(]GVyǬ czY4bT8)#B' +.?\ !nm8% ]0G kLDWHD]! Fd`#M@KjMק_ +80 +{A|}mHnAju$b_QGnf'Y#pL 2AWJP +((v (Dh_`M%,tYrװs" ++WG]oF9o,RdN~ $S{11 װ]6*ҏ\!S n%}O {wSZe̶tFn.ULN A$$ A>Lf/$}뉛$n[ 7-#9D$ !b0F0A ׫3Xn^ /qH>"Tpz,Y|oQ591Q@8ٻ~$W'QoĪY[e;mBNA8gqOWێWбܴ'@Ld>.Y&a0kܜz_}ʢc[ Bt*-r@=@|@@_==Yl=ٳM(;Ej#gEzu +$'KX|uCђt|{m=9q!gr3HD8߃#\)E 2Jk DOGI8UQ$ʏ\8c玿HP8BAa$ N.Ss?EPt\+hf^W`^!6ǡҩ "Cqi=]U׌;vQ4جЖ9.knvfiWzFh:Se [XPHgFv L:*KФ&;^+V0 *mCdtpr}3{~0x/`6Uk֒}GTWנ/[mTG4񴿇sqϲĄR#h_ֈ_y|&JDx@[lc^ y&CH#4$1W$^q?$Ν\ y͗hVG\ nΕfid${ `'yzz;#$Nv'`_ +`EɄw0{vvΉC2|}xb;7 +]$N EH3'$t +!&Q<MH[Q6Z!T8$8y]?:stB(a͜4zr1~z'=ժnΨ cA3YPNVo bH갸_ +Lau;ϴ|>Eb8 CV!] +:l0Ѿ_z,liv㐇L6vӧ b(تp.ؔѸP٤ Ty&^}( $,RqᬅEŴ~ş l ?63l,G/G gɪ]10oY(4!eXGvh~v) 9\sdrRt,+NVrupO,P)p飀;gµ-b؈"r*QwpnEl < +Smس?ji]G;bPlhhBeIFNIƠU/ \Q|"܈b~|@@b -N/E&TS :u+`N"LA4BͳpGn^Nj` +r(픽qeAj{d^–Y |w[_ë.m8SNc ZYD򔎹Gl  e :!^=e]m;2WԹ&l] #@QܢmaP zht\_l4mq[ +JyBXǿu=N\8XIj/1w[W֩)1 .6.R/|I49],lBA]%x,,tV/85,m)GZjP +e$#W4"}M +D*5?bNr' ¼t֏*@zFL_/՚߄jdXv7Aq^7;f &&b/Sw֨t 9[M&AȦȁd 2/uiI$ݨqU;G;on}LDΊv'R83wo`l GgE|2D.GC.bdwv&ly`a7VcË!J+81lu"ګ:xzzEξx֮Ի3dj;:&3NS7U1 TQ)17쩱 elHqyq㝬'h;4c5Q:m]͗Kڅe7m-Wn N6ڻWYa_|aq稜y9@"ov7:_-d:t-j^t]`fZ8, +zO'sq8h%nL@ 4A9B 7+9oY[]iܙp0FfV&u3n(MOH!z9o%#?^8Ct.|p630b>!:x 58)&U,yհ%GH PNrN\/f&cm,ZĸtwEɡ78(-KuQ01^s[?TBCA~\ +&…f"#.`L/\7i#-Kfqcj]6dw $&L'W@50CTZg΢Y)(uӇu/bulðNW?($9N ZFО h> ӱ{_ض34LԤG 0q*hIK`{!!wkNV3߷"L:P +K+%;[AOzC(؅^{鄿2 t8vHC׺3тl6bzg\bl3K8OsIN ++G u\ZohbiޖR [w| +rC't'){L<'HyO>:pR3pBi=2(Uoc4>b)lYZ`(S5䔐.:t^Ѕ7m#/ׅE$4F<SG[%cTJRc|q2D-H6ލvÿ3:M]^!8[< TA{ +־\P\MMϙL_n8/2~K`ɵJŲgq^^e'@ꪸ;9>9SDc,E]2 㱊 drJIVu~l0#t!x9?ǤT&?LAfCl*нC8H%ce`<$㋢&~3$`Bs{R*B 2tioԁ5W77۰h >uAD0]-g!lAйzpF_9' m+쯚\ `9!N عG壏*V " +kA\9ve:(уo@\ 4%1S?G=(_ۦHXgfyUCh5  ) + V!^E +Y"a}p񩐳tYgc!J6vw(FU؍b^拃;U _;.ߟ't;u<4m]!deB|?_;HF>3 DgqPfF0AtU5f h!}?Y\ 9K޶%sG]Lw瀭IH'Ya8(Eb8'K%PQT.Cӹ^ˌNm?`8;~muI=9`%+_w0WQ*FoD$GcdpJk̝s +oy +TdXʐȼ1C׼,RXGE]T4Yl{>D656u` (plVа})蒗Z3enA@JSZ)ѵޝKE7ʲpdVIb0R#-CPB  LCWfPoCvg}$_ +_{i@7a*(~2Bw9qQ|*]ױ逓{2λK +Aq/,wrjH +3}tIM6a +~υ+]Ȑjg\ݓ}.1I4:ߚ]1TRD{Ϥi}O#.dIo5r+@>Qt$/#2#`0go1W3Qv^rOɦ=$n8$$հ;BÇӾ“* 2Sa+>,WhR45]y/+пEJ9 `y,i]@*Rv ;wt])"isT}Iz˺qV(T7'>i}6N9n^39gƒ'cW- JNPW3&V=j0MJr}gSNs6s8gG $;t4~ R߆[?+~U'SȗgZHIA ny *.f!#Ѷ +awoOw;?XW߄JBQ\!=f"@CacdƗv(S:9_D{+X!0N>wdhc~$1 h=yi#U|n‚j3~@VE Dyu,Ͷz̹B.v,W7sLXnoU;E#.E:rЈ2) >*j2(#^=z.uFW k=7*_,QfIQ;sտ`2 +endstream +endobj +341 0 obj +<< /ColorSpace 87 0 R /ExtGState 88 0 R /Font << /F198 89 0 R /F201 90 0 R /F204 91 0 R /F212 94 0 R /F470 595 0 R >> /Pattern 97 0 R /ProcSet [ /PDF /Text ] >> +endobj +342 0 obj +<< /A << /S /URI /Type /Action /URI (https://doi.org/10.1109/EMSOFT60242.2024.00006) >> /Border [ 0 0 0 ] /C [ 0 1 1 ] /H /I /Rect [ 275.333 697.182 295.041 706.622 ] /Subtype /Link /Type /Annot >> +endobj +343 0 obj +<< /A << /S /URI /Type /Action /URI (https://doi.org/10.1109/EMSOFT60242.2024.00006) >> /Border [ 0 0 0 ] /C [ 0 1 1 ] /H /I /Rect [ 70.076 687.227 213.928 696.572 ] /Subtype /Link /Type /Annot >> +endobj +344 0 obj +<< /A << /S /URI /Type /Action /URI (https://learn.microsoft.com/en-us/azure/search/vector-search-index-size?utm_source=chatgpt.com&tabs=portal-vector-quota) >> /Border [ 0 0 0 ] /C [ 0 1 1 ] /H /I /Rect [ 275.43 408.265 295.041 417.777 ] /Subtype /Link /Type /Annot >> +endobj +345 0 obj +<< /A << /S /URI /Type /Action /URI (https://learn.microsoft.com/en-us/azure/search/vector-search-index-size?utm_source=chatgpt.com&tabs=portal-vector-quota) >> /Border [ 0 0 0 ] /C [ 0 1 1 ] /H /I /Rect [ 70.076 399.004 295.041 407.655 ] /Subtype /Link /Type /Annot >> +endobj +346 0 obj +<< /A << /S /URI /Type /Action /URI (https://learn.microsoft.com/en-us/azure/search/vector-search-index-size?utm_source=chatgpt.com&tabs=portal-vector-quota) >> /Border [ 0 0 0 ] /C [ 0 1 1 ] /H /I /Rect [ 70.076 388.348 259.19 397.693 ] /Subtype /Link /Type /Annot >> +endobj +347 0 obj +<< /A << /S /URI /Type /Action /URI (https://www.nvidia.com/en-us/data-center/products/a10-gpu/) >> /Border [ 0 0 0 ] /C [ 0 1 1 ] /H /I /Rect [ 224.187 308.686 295.041 318.151 ] /Subtype /Link /Type /Annot >> +endobj +348 0 obj +<< /A << /S /URI /Type /Action /URI (https://www.nvidia.com/en-us/data-center/products/a10-gpu/) >> /Border [ 0 0 0 ] /C [ 0 1 1 ] /H /I /Rect [ 70.076 298.684 207.612 308.041 ] /Subtype /Link /Type /Annot >> +endobj +349 0 obj +<< /A << /S /URI /Type /Action /URI (https://images.nvidia.com/aem-dam/Solutions/geforce/blackwell/nvidia-rtx-blackwell-gpu-architecture.pdf) >> /Border [ 0 0 0 ] /C [ 0 1 1 ] /H /I /Rect [ 70.076 278.759 295.041 288.104 ] /Subtype /Link /Type /Annot >> +endobj +350 0 obj +<< /A << /S /URI /Type /Action /URI (https://images.nvidia.com/aem-dam/Solutions/geforce/blackwell/nvidia-rtx-blackwell-gpu-architecture.pdf) >> /Border [ 0 0 0 ] /C [ 0 1 1 ] /H /I /Rect [ 70.076 268.796 201.877 278.153 ] /Subtype /Link /Type /Annot >> +endobj +351 0 obj +<< /A << /S /URI /Type /Action /URI (https://objectbox.io/on-device-vector-databases-and-edge-ai/) >> /Border [ 0 0 0 ] /C [ 0 1 1 ] /H /I /Rect [ 254.426 258.841 295.041 268.338 ] /Subtype /Link /Type /Annot >> +endobj +352 0 obj +<< /A << /S /URI /Type /Action /URI (https://objectbox.io/on-device-vector-databases-and-edge-ai/) >> /Border [ 0 0 0 ] /C [ 0 1 1 ] /H /I /Rect [ 70.076 248.871 242.027 258.228 ] /Subtype /Link /Type /Annot >> +endobj +353 0 obj +<< /A << /S /URI /Type /Action /URI (https://doi.org/10.1109/BigData59044.2023.10386517) >> /Border [ 0 0 0 ] /C [ 0 1 1 ] /H /I /Rect [ 70.076 199.058 241.561 208.402 ] /Subtype /Link /Type /Annot >> +endobj +354 0 obj +<< /A << /S /URI /Type /Action /URI (https://www.pinecone.io/learn/series/faiss/hnsw/) >> /Border [ 0 0 0 ] /C [ 0 1 1 ] /H /I /Rect [ 70.076 179.172 252.969 188.489 ] /Subtype /Link /Type /Annot >> +endobj +355 0 obj +<< /A << /S /URI /Type /Action /URI (https://snap-research.github.io/efficient-nn-tutorial/) >> /Border [ 0 0 0 ] /C [ 0 1 1 ] /H /I /Rect [ 539.045 687.235 558.996 696.584 ] /Subtype /Link /Type /Annot >> +endobj +356 0 obj +<< /A << /S /URI /Type /Action /URI (https://snap-research.github.io/efficient-nn-tutorial/) >> /Border [ 0 0 0 ] /C [ 0 1 1 ] /H /I /Rect [ 334.031 677.264 488.942 686.8 ] /Subtype /Link /Type /Annot >> +endobj +357 0 obj +<< /A << /S /URI /Type /Action /URI (https://github.com/facebookresearch/faiss/wiki/Guidelines-to-choose-an-index/28074dc0ddc733f84b06fa4d99b3f6e2ef65613d#if-below-1m-vectors-ivfx) >> /Border [ 0 0 0 ] /C [ 0 1 1 ] /H /I /Rect [ 537.954 617.528 558.996 626.993 ] /Subtype /Link /Type /Annot >> +endobj +358 0 obj +<< /A << /S /URI /Type /Action /URI (https://github.com/facebookresearch/faiss/wiki/Guidelines-to-choose-an-index/28074dc0ddc733f84b06fa4d99b3f6e2ef65613d#if-below-1m-vectors-ivfx) >> /Border [ 0 0 0 ] /C [ 0 1 1 ] /H /I /Rect [ 334.031 607.526 558.996 616.871 ] /Subtype /Link /Type /Annot >> +endobj +359 0 obj +<< /A << /S /URI /Type /Action /URI (https://github.com/facebookresearch/faiss/wiki/Guidelines-to-choose-an-index/28074dc0ddc733f84b06fa4d99b3f6e2ef65613d#if-below-1m-vectors-ivfx) >> /Border [ 0 0 0 ] /C [ 0 1 1 ] /H /I /Rect [ 334.031 598.257 558.996 606.908 ] /Subtype /Link /Type /Annot >> +endobj +360 0 obj +<< /A << /S /URI /Type /Action /URI (https://github.com/facebookresearch/faiss/wiki/Guidelines-to-choose-an-index/28074dc0ddc733f84b06fa4d99b3f6e2ef65613d#if-below-1m-vectors-ivfx) >> /Border [ 0 0 0 ] /C [ 0 1 1 ] /H /I /Rect [ 334.031 588.294 374.204 596.957 ] /Subtype /Link /Type /Annot >> +endobj +361 0 obj +<< /A << /S /URI /Type /Action /URI (https://arxiv.org/abs/2412.11854) >> /Border [ 0 0 0 ] /C [ 0 1 1 ] /H /I /Rect [ 353.9 358.46 391.001 367.964 ] /Subtype /Link /Type /Annot >> +endobj +362 0 obj +<< /A << /S /URI /Type /Action /URI (https://arxiv.org/abs/2412.11854) >> /Border [ 0 0 0 ] /C [ 0 1 1 ] /H /I /Rect [ 419.016 358.46 524.413 367.964 ] /Subtype /Link /Type /Annot >> +endobj +363 0 obj +<< /A << /S /URI /Type /Action /URI (https://arxiv.org/abs/2404.06004) >> /Border [ 0 0 0 ] /C [ 0 1 1 ] /H /I /Rect [ 521.377 278.751 559.18 288.263 ] /Subtype /Link /Type /Annot >> +endobj +364 0 obj +<< /A << /S /URI /Type /Action /URI (https://arxiv.org/abs/2404.06004) >> /Border [ 0 0 0 ] /C [ 0 1 1 ] /H /I /Rect [ 334.031 268.796 439.428 278.141 ] /Subtype /Link /Type /Annot >> +endobj +365 0 obj +<< /A << /S /URI /Type /Action /URI (https://www.usenix.org/conference/fast25/presentation/tian-bing) >> /Border [ 0 0 0 ] /C [ 0 1 1 ] /H /I /Rect [ 466.001 209.02 558.996 218.377 ] /Subtype /Link /Type /Annot >> +endobj +366 0 obj +<< /A << /S /URI /Type /Action /URI (https://www.usenix.org/conference/fast25/presentation/tian-bing) >> /Border [ 0 0 0 ] /C [ 0 1 1 ] /H /I /Rect [ 334.031 199.058 458.815 208.402 ] /Subtype /Link /Type /Annot >> +endobj +367 0 obj +<< /A << /S /URI /Type /Action /URI (https://www.optimum.com/articles/mobile/choosing-phone-storage-amount-needs-guide) >> /Border [ 0 0 0 ] /C [ 0 1 1 ] /H /I /Rect [ 334.031 179.132 558.996 188.477 ] /Subtype /Link /Type /Annot >> +endobj +368 0 obj +<< /A << /S /URI /Type /Action /URI (https://www.optimum.com/articles/mobile/choosing-phone-storage-amount-needs-guide) >> /Border [ 0 0 0 ] /C [ 0 1 1 ] /H /I /Rect [ 334.031 169.17 432.234 178.515 ] /Subtype /Link /Type /Annot >> +endobj +369 0 obj +<< /A << /S /URI /Type /Action /URI (https://www.optimum.com/articles/mobile/choosing-phone-storage-amount-needs-guide) >> /Border [ 0 0 0 ] /C [ 0 1 1 ] /H /I /Rect [ 334.031 149.244 558.996 158.589 ] /Subtype /Link /Type /Annot >> +endobj +370 0 obj +<< /A << /S /URI /Type /Action /URI (https://www.optimum.com/articles/mobile/choosing-phone-storage-amount-needs-guide) >> /Border [ 0 0 0 ] /C [ 0 1 1 ] /H /I /Rect [ 334.031 139.282 432.234 148.639 ] /Subtype /Link /Type /Annot >> +endobj +371 0 obj +<< /A << /S /URI /Type /Action /URI (https://doi.org/10.1145/3639269.3652200) >> /Border [ 0 0 0 ] /C [ 0 1 1 ] /H /I /Rect [ 429.97 79.506 558.996 88.938 ] /Subtype /Link /Type /Annot >> +endobj +372 0 obj +<< /Filter /FlateDecode /Length 8136 >> +stream +xU ;s[QEʋ劋_P~*R7S n<7K#D>ER7AiU曣yUU/WGs άm5 zrPBan菀sy&S|è_sݳYazF*ȆX6ͺ>:yɳ8\@nMejXeS~:pq]~1鍐h?'L27?`3ح`sq +3裙7S6_MS=E)Z (PTi|X\gOAfsjLL ۙ8]܏e=0u vNOǘrQ-/4U2z5Xt?Mx Sz,%˗/nHIMu ̵Mml.onW#A+.D#@ӏ'AS-{A#ry1(/yε?N8s}M[~?ƠZ&XvV 0P@J$*V0mޫ?]eaJn $M %Bߖs.`YUurf#*33MG _Cׇ:[' j|"q)vn!"yrJQ߇sHzdf^b!soɥ”'>,B`C@TJO)?L"$; OYs +8A;hU {*HCKU(n%n%#I@p!ojL,X+f1]hsΧ +'E3 od(D09ɢ\$rvon!p$' ++Ν&"r[yw/@a9#6bB^XYnZ֬`}2ټvwf^ߥ.H424[m'a-ma )IKOBfW-dsXBo~I0i3sm8dCBCi$hE +=B)DZk{S +xND{8W;]&ٚ Zix2߇!4ҒYRMP_Y, CFvsR(U;I]F]$͔Hzq BU [S9Z~Y7&S÷3 ^p-ve[Xڿ!R.%<|sJkHq83>lfۥ%JL8vH<aL ,4u13 jWv5uש7H "3nꃯWT U `dtF;Gs4h~$祙uu’e@IB +*k0ǣsLfm]|W%K̾B)UEm , 0-SUX +˞2 }e23)JQr-UuCג$o9+}mCrSʀ ӽ^ܜ09d> +D(_vi@˭ bQX"tsY9g*|vग़IrnƝW44gB*",*珶-L A0 }ȑL'_c2([ǭ H梴:/.NȲ;{e'7rۉc(SW9/z)9s~¥6N܇:Qo6/A^6—q?*;"IG>zI|9?^.оD6S" /J'rrҁO'$2n7AG Nu/Sx=$<`CbM F(FdHp|D(\7 J (H"wDmtp1H:uפ )xɻI VB'8T=]w0=ApJ6>iHO)(D8>>D ʷU22uzϲП7FC}k1UYe{#Hcw'sJ0I~e]u6\t]"SHXt%&֍HV}(A|s/ +u Q {k\; wr3/vK A&BDYpAzPBȝ?r 3D CTG~'TƧ?>w6|/ޕyU +Jn!aҁԉ*9hs=]5 xיyۡg&RCzH$tpAU2$#kN9MVԻU7}V7A>ިcFc= P_PL0E4\p}5 +5ߜ2"3+lo@N#'6ACx~@@ۗceK6ǩs{ L⛶0df4\y!-|̙ՑdF۫؈ll) kVq:1 d5]:V7 %s(g(򉄸L_DJSJ۫* suq ̦nl (slT?,ۆeÞ[wSy⑌լm Za.| oj(7bm]fsv2 (zx0k-NTb|KD ~ '`Ya\_勿crNv$vj50V$>!9:=j.$ͯKmKE9G ~O{Pd8%%((p-Gj7 +D7+4= "93phZrkF"0;S14݇HxA<;(wp~r68=S.{LE @ǘ+/ W*tqtZzvh.qJ5&T5gJYF Y(1< 2(6D4xoƦnYnC{ 5[IɑJQ.-1`S@:%3A<:{=gD*1/1&I[1UFYWx[x#0y~⽣Qw?H╣#BʳNP֪;}=we`"Wi7"") u-)e8rr>̯2y6I= +ݜ PB$4WI0FEaJs9e ,.b-i朷\à31r*u.OJ+/6qtr(ZSNTGL@z,ܒ׊uMNF-E6TYO3%-c4d|s9Sspv8R3L5G CPͼk5<sEʷݜsݛ?ѽ>~~.(r#$#l^=h+)rw"H<;6rVqj6sO;{9 /Kb`ߏPL=fI݇}YsEѹ$Z f#C6DŽGZߊz ,ꉗ@n=c.1ȟ8aPSX^8E`BA[Tkb|ދ?%$Ziʫ +ݮř +6lӱTI [@ҟ2~滳!L|ԓ(LU] Gpf:-YPG#vmY\@Ĝ=/s,p'~CSOUGGxg$C] /x}5&ݶZؙaz$9& :hI]$ܜ鍑"駔/8ؐYg-̊5kƘNqs?RR7W(=/,T|6P0 ?إ;PL' /F=R4}NsϤv= +xn|Kҋ{l)Q?m']sy)g}hU{܂a"QX(X1eu@˝.ގriys! NeԫởPd+`Q~r93g-fuEHoX>΁k˂>ЋaqB l#ui#CH %&cf"jp +T̹F2CFt}אEFU* Q +4 +©{robo6Eo>O@/ƾzzq.>,e'wKkld$ӵPQ|jh1Vun;>c{09bw/ Za38=]! heB8t;gM> /Pattern 97 0 R /ProcSet [ /PDF /Text ] >> +endobj +374 0 obj +<< /A << /S /URI /Type /Action /URI (https://doi.org/10.14778/3476249.3476258) >> /Border [ 0 0 0 ] /C [ 0 1 1 ] /H /I /Rect [ 177.094 677.264 295.041 686.769 ] /Subtype /Link /Type /Annot >> +endobj +375 0 obj +<< /A << /S /URI /Type /Action /URI (https://doi.org/10.14778/3476249.3476258) >> /Border [ 0 0 0 ] /C [ 0 1 1 ] /H /I /Rect [ 70.076 669.135 86.893 676.069 ] /Subtype /Link /Type /Annot >> +endobj +376 0 obj +<< /A << /S /URI /Type /Action /URI (https://openreview.net/forum?id=HJlXC3EtwB) >> /Border [ 0 0 0 ] /C [ 0 1 1 ] /H /I /Rect [ 334.031 617.528 488.38 626.833 ] /Subtype /Link /Type /Annot >> +endobj +377 0 obj +<< /A << /S /URI /Type /Action /URI (https://eprint.iacr.org/2024/1255) >> /Border [ 0 0 0 ] /C [ 0 1 1 ] /H /I /Rect [ 444.791 547.75 551.272 557.107 ] /Subtype /Link /Type /Annot >> +endobj +378 0 obj +<< /Filter /FlateDecode /Length 3068 >> +stream +xڝY[s۶~ϯh&  HϜ8vKԱܚ>-H$ί R$E%c|?XgݳWOK=~ L-$_wA?yPvf$r2 xHODLT|ՓUv|*Pͬ+=oɘ$:O-p[L` UY%6Zekp1P<ϫ6hyvXsa,T +`ӊ$(-Ԟc5TLݧ U?5Pg_@g:0Kaxc65A uk4cpQ>Hi4Rʸ'024FXi0`,.Nr/v+4{vm(v<*]kqd3)B|ui| ~bage#t8N f -kO U, p|9ch<אDo0(c0/!5F:7}˅ЎіA #,3Xqv>S@F/?5;e ͙0,CZF6`"; LuX3aF߀##* xb֗2SQp(@{3글%H#!\;D ݵ ~sj݄x'VvQe4nЛ*ceO%|*oW7nZ_cm<|$Iq$ɏ̆Hdcl/l{^{.qWy-%gsM@-J\[gm!Ab3£B .*gv q'Y;*46XpHdm ۸C>^Jpd|GF3u'H?IFlIk=_$Y~۶6l6c389;|nd`(V7^t$a*ЍmuLohG zP!%ޓu>f1KI|Ei  +3gSVn0~RaV*kZM]m>dldp*b t3ʇÒ1c0~x&'%D5vɆmıHaq?kIفy|\9 +ݹn\I>4.#)SMHe)@@cIye yC} +.YQ?evB7)|d<_:|t /oz {Y}{у#W)zt#dMmYprc9cՔuPBmȚI; +{UZR T wix=Ų$`.nApņRlYrnC~؍?&Zg%DZ3Dxůٰ֡$ĘSaFUIꚯX;\ +*v)`2"'l=%& =iCTMbg pmE[M\/]ɺ.e(zhV|8H>ٺcԅVp}6bٓ!kpC +xoQP-1h) O+ T߉v'H/bl `w(dJuVTmv6}gY`f'9\fSTGb]J^^ǤY1ˁ7шg-qrtn-I;a2e.6qF{y;~ܷ?aԠjoTCZu4J%M쿆fg -UK,XoԦ&Ė٭M]*#˷ܷL 93i@+F|(d7d :8I٠6B6B9`ka:Y[bbNJxcFUp?qFE=FP>l1}OJK7Xʝg$^7uXݤ,_2 +oj,6:Aq6D۷w +endstream +endobj +379 0 obj +<< /ColorSpace 87 0 R /ExtGState 88 0 R /Font << /F201 90 0 R /F212 94 0 R /F470 595 0 R /F551 596 0 R >> /Pattern 97 0 R /ProcSet [ /PDF /Text ] >> +endobj +380 0 obj +<< /D [ 47 0 R /XYZ 65.955 704.471 null ] >> +endobj +381 0 obj +<< /D [ 47 0 R /XYZ 65.955 584.919 null ] >> +endobj +382 0 obj +<< /D [ 47 0 R /XYZ 65.955 572.964 null ] >> +endobj +383 0 obj +<< /D [ 47 0 R /XYZ 65.955 559.679 null ] >> +endobj +384 0 obj +<< /D [ 47 0 R /XYZ 65.955 547.724 null ] >> +endobj +385 0 obj +<< /D [ 47 0 R /XYZ 65.955 533.417 null ] >> +endobj +386 0 obj +<< /D [ 49 0 R /XYZ 65.955 510.942 null ] >> +endobj +387 0 obj +<< /D [ 49 0 R /XYZ 65.955 498.986 null ] >> +endobj +388 0 obj +<< /D [ 49 0 R /XYZ 65.955 487.031 null ] >> +endobj +389 0 obj +<< /D [ 49 0 R /XYZ 65.955 475.076 null ] >> +endobj +390 0 obj +<< /D [ 49 0 R /XYZ 65.955 463.121 null ] >> +endobj +391 0 obj +<< /D [ 47 0 R /XYZ 65.955 680.56 null ] >> +endobj +392 0 obj +<< /D [ 49 0 R /XYZ 65.955 448.839 null ] >> +endobj +393 0 obj +<< /D [ 47 0 R /XYZ 65.955 668.605 null ] >> +endobj +394 0 obj +<< /D [ 47 0 R /XYZ 65.955 656.65 null ] >> +endobj +395 0 obj +<< /D [ 47 0 R /XYZ 65.955 644.695 null ] >> +endobj +396 0 obj +<< /D [ 47 0 R /XYZ 65.955 632.74 null ] >> +endobj +397 0 obj +<< /D [ 47 0 R /XYZ 65.955 620.785 null ] >> +endobj +398 0 obj +<< /D [ 47 0 R /XYZ 65.955 608.829 null ] >> +endobj +399 0 obj +<< /D [ 47 0 R /XYZ 65.955 596.874 null ] >> +endobj +400 0 obj +<< /D [ 9 0 R /XYZ 54 638.463 null ] >> +endobj +401 0 obj +<< /D [ 9 0 R /XYZ 54.498 96.339 null ] >> +endobj +402 0 obj +<< /D [ 50 0 R /XYZ 321.326 126.771 null ] >> +endobj +403 0 obj +<< /D [ 50 0 R /XYZ 321.326 117.078 null ] >> +endobj +404 0 obj +<< /D [ 51 0 R /XYZ 54 372.096 null ] >> +endobj +405 0 obj +<< /D [ 51 0 R /XYZ 54 334.048 null ] >> +endobj +406 0 obj +<< /D [ 51 0 R /XYZ 54 286.512 null ] >> +endobj +407 0 obj +<< /D [ 47 0 R /XYZ 54 725.978 null ] >> +endobj +408 0 obj +<< /D [ 49 0 R /XYZ 54 725.978 null ] >> +endobj +409 0 obj +<< /D [ 50 0 R /XYZ 54 725.978 null ] >> +endobj +410 0 obj +<< /D [ 58 0 R /XYZ 54 241.793 null ] >> +endobj +411 0 obj +<< /D [ 58 0 R /XYZ 54 271.681 null ] >> +endobj +412 0 obj +<< /D [ 58 0 R /XYZ 317.955 201.943 null ] >> +endobj +413 0 obj +<< /D [ 59 0 R /XYZ 54 630.336 null ] >> +endobj +414 0 obj +<< /D [ 57 0 R /XYZ 54 245.778 null ] >> +endobj +415 0 obj +<< /D [ 57 0 R /XYZ 57.706 574.545 null ] >> +endobj +416 0 obj +<< /D [ 57 0 R /XYZ 57.706 534.695 null ] >> +endobj +417 0 obj +<< /D [ 58 0 R /XYZ 54 421.121 null ] >> +endobj +418 0 obj +<< /D [ 57 0 R /XYZ 57.706 464.956 null ] >> +endobj +419 0 obj +<< /D [ 57 0 R /XYZ 57.706 435.068 null ] >> +endobj +420 0 obj +<< /D [ 57 0 R /XYZ 57.706 415.143 null ] >> +endobj +421 0 obj +<< /D [ 57 0 R /XYZ 57.706 335.442 null ] >> +endobj +422 0 obj +<< /D [ 57 0 R /XYZ 54 176.04 null ] >> +endobj +423 0 obj +<< /D [ 57 0 R /XYZ 54 146.152 null ] >> +endobj +424 0 obj +<< /D [ 58 0 R /XYZ 317.955 361.345 null ] >> +endobj +425 0 obj +<< /D [ 57 0 R /XYZ 317.955 680.149 null ] >> +endobj +426 0 obj +<< /D [ 57 0 R /XYZ 54 96.339 null ] >> +endobj +427 0 obj +<< /D [ 57 0 R /XYZ 317.955 640.299 null ] >> +endobj +428 0 obj +<< /D [ 57 0 R /XYZ 317.955 720 null ] >> +endobj +429 0 obj +<< /D [ 58 0 R /XYZ 317.955 630.336 null ] >> +endobj +430 0 obj +<< /D [ 58 0 R /XYZ 317.955 271.681 null ] >> +endobj +431 0 obj +<< /D [ 57 0 R /XYZ 57.706 484.882 null ] >> +endobj +432 0 obj +<< /D [ 57 0 R /XYZ 317.955 480.897 null ] >> +endobj +433 0 obj +<< /D [ 57 0 R /XYZ 317.955 520.747 null ] >> +endobj +434 0 obj +<< /D [ 58 0 R /XYZ 54 391.233 null ] >> +endobj +435 0 obj +<< /D [ 57 0 R /XYZ 317.955 441.046 null ] >> +endobj +436 0 obj +<< /D [ 58 0 R /XYZ 317.955 680.149 null ] >> +endobj +437 0 obj +<< /D [ 58 0 R /XYZ 54 520.747 null ] >> +endobj +438 0 obj +<< /D [ 58 0 R /XYZ 54 690.112 null ] >> +endobj +439 0 obj +<< /D [ 57 0 R /XYZ 317.955 411.158 null ] >> +endobj +440 0 obj +<< /D [ 57 0 R /XYZ 317.955 321.494 null ] >> +endobj +441 0 obj +<< /D [ 57 0 R /XYZ 317.955 281.644 null ] >> +endobj +442 0 obj +<< /D [ 57 0 R /XYZ 317.955 211.905 null ] >> +endobj +443 0 obj +<< /D [ 57 0 R /XYZ 54 275.666 null ] >> +endobj +444 0 obj +<< /D [ 57 0 R /XYZ 317.955 172.055 null ] >> +endobj +445 0 obj +<< /D [ 58 0 R /XYZ 54 620.374 null ] >> +endobj +446 0 obj +<< /D [ 58 0 R /XYZ 54 570.56 null ] >> +endobj +447 0 obj +<< /D [ 58 0 R /XYZ 54 660.224 null ] >> +endobj +448 0 obj +<< /D [ 58 0 R /XYZ 54 540.672 null ] >> +endobj +449 0 obj +<< /D [ 57 0 R /XYZ 57.706 504.807 null ] >> +endobj +450 0 obj +<< /D [ 57 0 R /XYZ 54 225.853 null ] >> +endobj +451 0 obj +<< /D [ 58 0 R /XYZ 54 351.382 null ] >> +endobj +452 0 obj +<< /D [ 57 0 R /XYZ 317.955 560.598 null ] >> +endobj +453 0 obj +<< /D [ 57 0 R /XYZ 317.955 600.448 null ] >> +endobj +454 0 obj +<< /D [ 58 0 R /XYZ 54 301.569 null ] >> +endobj +455 0 obj +<< /D [ 58 0 R /XYZ 54 321.494 null ] >> +endobj +456 0 obj +<< /D [ 58 0 R /XYZ 317.955 172.055 null ] >> +endobj +457 0 obj +<< /D [ 58 0 R /XYZ 54 201.943 null ] >> +endobj +458 0 obj +<< /D [ 57 0 R /XYZ 317.955 251.756 null ] >> +endobj +459 0 obj +<< /D [ 57 0 R /XYZ 317.955 371.308 null ] >> +endobj +460 0 obj +<< /D [ 58 0 R /XYZ 54 172.055 null ] >> +endobj +461 0 obj +<< /D [ 58 0 R /XYZ 54 132.204 null ] >> +endobj +462 0 obj +<< /D [ 58 0 R /XYZ 317.955 590.486 null ] >> +endobj +463 0 obj +<< /D [ 58 0 R /XYZ 317.955 560.598 null ] >> +endobj +464 0 obj +<< /D [ 58 0 R /XYZ 317.955 510.785 null ] >> +endobj +465 0 obj +<< /D [ 58 0 R /XYZ 317.955 480.897 null ] >> +endobj +466 0 obj +<< /D [ 58 0 R /XYZ 317.955 441.046 null ] >> +endobj +467 0 obj +<< /D [ 58 0 R /XYZ 317.955 401.196 null ] >> +endobj +468 0 obj +<< /D [ 58 0 R /XYZ 54 480.897 null ] >> +endobj +469 0 obj +<< /D [ 58 0 R /XYZ 317.955 720 null ] >> +endobj +470 0 obj +<< /D [ 57 0 R /XYZ 57.706 375.293 null ] >> +endobj +471 0 obj +<< /D [ 58 0 R /XYZ 317.955 321.494 null ] >> +endobj +472 0 obj +<< /D [ 57 0 R /XYZ 54 305.554 null ] >> +endobj +473 0 obj +<< /D [ 59 0 R /XYZ 54 720 null ] >> +endobj +474 0 obj +<< /D [ 59 0 R /XYZ 54 670.187 null ] >> +endobj +475 0 obj +<< /D [ 58 0 R /XYZ 317.955 142.167 null ] >> +endobj +476 0 obj +<< /D [ 57 0 R /XYZ 317.955 102.316 null ] >> +endobj +477 0 obj +<< /D [ 59 0 R /XYZ 54 600.448 null ] >> +endobj +478 0 obj +<< /D [ 59 0 R /XYZ 54 560.598 null ] >> +endobj +479 0 obj +<< /D [ 59 0 R /XYZ 54 530.71 null ] >> +endobj +480 0 obj +<< /D [ 59 0 R /XYZ 317.955 720 null ] >> +endobj +481 0 obj +<< /D [ 59 0 R /XYZ 317.955 690.112 null ] >> +endobj +482 0 obj +<< /D [ 59 0 R /XYZ 317.955 620.374 null ] >> +endobj +483 0 obj +<< /D [ 59 0 R /XYZ 317.955 660.224 null ] >> +endobj +484 0 obj +<< /D [ 59 0 R /XYZ 317.955 580.523 null ] >> +endobj +485 0 obj +<< /D [ 59 0 R /XYZ 317.955 550.635 null ] >> +endobj +486 0 obj +<< /D [ 59 0 R /XYZ 317.955 510.785 null ] >> +endobj +487 0 obj +<< /D [ 50 0 R /XYZ 367.621 477.429 null ] >> +endobj +488 0 obj +<< /D [ 54 0 R /XYZ 54 725.978 null ] >> +endobj +489 0 obj +<< /D [ 54 0 R /XYZ 54 532.376 null ] >> +endobj +490 0 obj +<< /D [ 54 0 R /XYZ 317.955 725.978 null ] >> +endobj +491 0 obj +<< /D [ 55 0 R /XYZ 54 725.978 null ] >> +endobj +492 0 obj +<< /D [ 55 0 R /XYZ 317.955 725.978 null ] >> +endobj +493 0 obj +<< /D [ 55 0 R /XYZ 317.955 424.975 null ] >> +endobj +494 0 obj +<< /D [ 47 0 R /XYZ 317.955 725.978 null ] >> +endobj +495 0 obj +<< /D [ 48 0 R /XYZ 54 725.978 null ] >> +endobj +496 0 obj +<< /D [ 50 0 R /XYZ 317.955 725.978 null ] >> +endobj +497 0 obj +<< /D [ 52 0 R /XYZ 54 725.978 null ] >> +endobj +498 0 obj +<< /D [ 52 0 R /XYZ 54 400.904 null ] >> +endobj +499 0 obj +<< /D [ 9 0 R /XYZ 53 747.899 null ] >> +endobj +500 0 obj +<< /D [ 54 0 R /XYZ 53 747.899 null ] >> +endobj +501 0 obj +<< /D [ 55 0 R /XYZ 53 747.899 null ] >> +endobj +502 0 obj +<< /D [ 56 0 R /XYZ 53 747.899 null ] >> +endobj +503 0 obj +<< /D [ 57 0 R /XYZ 53 747.899 null ] >> +endobj +504 0 obj +<< /D [ 58 0 R /XYZ 53 747.899 null ] >> +endobj +505 0 obj +<< /D [ 59 0 R /XYZ 53 747.899 null ] >> +endobj +506 0 obj +<< /D [ 46 0 R /XYZ 53 747.899 null ] >> +endobj +507 0 obj +<< /D [ 47 0 R /XYZ 53 747.899 null ] >> +endobj +508 0 obj +<< /D [ 48 0 R /XYZ 53 747.899 null ] >> +endobj +509 0 obj +<< /D [ 49 0 R /XYZ 53 747.899 null ] >> +endobj +510 0 obj +<< /D [ 50 0 R /XYZ 53 747.899 null ] >> +endobj +511 0 obj +<< /D [ 51 0 R /XYZ 53 747.899 null ] >> +endobj +512 0 obj +<< /D [ 52 0 R /XYZ 53 747.899 null ] >> +endobj +513 0 obj +<< /D [ 53 0 R /XYZ 53 747.899 null ] >> +endobj +514 0 obj +<< /D [ 9 0 R /XYZ 54 638.463 null ] >> +endobj +515 0 obj +<< /D [ 57 0 R /XYZ 54 591.482 null ] >> +endobj +516 0 obj +<< /D [ 57 0 R /XYZ 54 574.378 null ] >> +endobj +517 0 obj +<< /D [ 9 0 R /XYZ 54 642.448 null ] >> +endobj +518 0 obj +<< /D [ 50 0 R /XYZ 54 703.345 null ] >> +endobj +519 0 obj +<< /D [ 9 0 R /XYZ 54 351.948 null ] >> +endobj +520 0 obj +<< /D [ 46 0 R /XYZ 54 351.752 null ] >> +endobj +521 0 obj +<< /D [ 47 0 R /XYZ 317.955 154.76 null ] >> +endobj +522 0 obj +<< /D [ 48 0 R /XYZ 317.955 427.975 null ] >> +endobj +523 0 obj +<< /D [ 50 0 R /XYZ 54 401.888 null ] >> +endobj +524 0 obj +<< /D [ 51 0 R /XYZ 54 410.706 null ] >> +endobj +525 0 obj +<< /D [ 55 0 R /XYZ 317.955 222.506 null ] >> +endobj +526 0 obj +<< /D [ 56 0 R /XYZ 54 275.308 null ] >> +endobj +527 0 obj +<< /D [ 56 0 R /XYZ 317.955 162.73 null ] >> +endobj +528 0 obj +<< /D [ 46 0 R /XYZ 54 277.761 null ] >> +endobj +529 0 obj +<< /D [ 46 0 R /XYZ 317.955 268.334 null ] >> +endobj +530 0 obj +<< /D [ 47 0 R /XYZ 54 320.139 null ] >> +endobj +531 0 obj +<< /D [ 48 0 R /XYZ 317.955 352.02 null ] >> +endobj +532 0 obj +<< /D [ 49 0 R /XYZ 317.955 591.123 null ] >> +endobj +533 0 obj +<< /D [ 51 0 R /XYZ 54 252.261 null ] >> +endobj +534 0 obj +<< /D [ 53 0 R /XYZ 54 720 null ] >> +endobj +535 0 obj +<< /D [ 53 0 R /XYZ 317.955 515.407 null ] >> +endobj +536 0 obj +<< /D [ 53 0 R /XYZ 317.955 179.029 null ] >> +endobj +537 0 obj +<< /D [ 56 0 R /XYZ 54 208.558 null ] >> +endobj +538 0 obj +<< /D [ 56 0 R /XYZ 317.955 651.563 null ] >> +endobj +539 0 obj +<< /D [ 56 0 R /XYZ 317.955 475.137 null ] >> +endobj +540 0 obj +<< /D [ 51 0 R /XYZ 54 228.045 null ] >> +endobj +541 0 obj +<< /Differences [ 27 /f_i /f_f_i /f_f /f_l 40 /parenleft /parenright 45 /hyphen /period 48 /zero /one /two /three /four /five /six /seven /eight /nine /colon 65 /A /B /C /D /E /F /G /H /I 76 /L /M /N /O /P /Q /R /S /T /U /V /W 91 /bracketleft 93 /bracketright 97 /a /b /c /d /e /f /g /h /i 107 /k /l /m /n /o /p /q /r /s /t /u /v /w /x /y /z ] /Type /Encoding >> +endobj +542 0 obj +<< /Ascent 714 /CapHeight 628 /CharSet (/A/B/C/D/E/F/G/H/I/L/M/N/O/P/Q/R/S/T/U/V/W/a/b/bracketleft/bracketright/c/colon/d/e/eight/f/f_f/f_f_i/f_i/f_l/five/four/g/h/hyphen/i/k/l/m/n/nine/o/one/p/parenleft/parenright/period/q/r/s/seven/six/t/three/two/u/v/w/x/y/z/zero) /Descent -231 /Flags 4 /FontBBox [ -1082 -328 6171 1014 ] /FontFile 597 0 R /FontName /SVARXO+LinLibertineTB /ItalicAngle 0 /StemV 130 /Type /FontDescriptor /XHeight 433 >> +endobj +543 0 obj +<< /Filter /FlateDecode /Length 874 >> +stream +xڕUMo:WA5iE_a, @EoDz%W쮓ڃjvwv(7o<Ķw>Ni՟=E77^~?y^}궺~~C{tOJAuCa&w۳yg5E=;`мE1q + +>1Ѣb O1էYM}/Y}K֟_~zOJ֟BO3ef/YN|֟u\X΄rYgB>[bghX|&^V|ƻgg33qgng3tZ[Yogt3:|'>gq3Ϙ݉_OXN]X߄:?85JC#9u#28~qem@w8rMGns6 +endstream +endobj +544 0 obj +[ 641 947 716 680 631 0 266 376 514 514 637 729 253 315 315 433 537 244 358 244 316 514 514 514 514 514 514 514 514 514 514 256 256 512 551 512 430 988 740 654 706 734 609 545 732 817 367 373 736 577 899 740 730 614 730 716 504 652 732 700 1028 718 624 624 397 307 397 518 486 253 506 542 456 561 489 391 521 619 322 312 613 325 905 616 551 581 573 428 427 358 598 529 777 561 558 452 ] +endobj +545 0 obj +<< /Differences [ 16 /quotedblleft /quotedblright 21 /endash /emdash 27 /f_i /f_f_i /f_f /f_l /f_f_l 34 /quotedbl /numbersign 37 /percent /ampersand /quoteright /parenleft /parenright /asterisk /plus /comma /hyphen /period /slash /zero /one /two /three /four /five /six /seven /eight /nine /colon /semicolon /less 62 /greater /question /at /A /B /C /D /E /F /G /H /I /J /K /L /M /N /O /P /Q /R /S /T /U /V /W /X /Y /Z /bracketleft 93 /bracketright 95 /underscore 97 /a /b /c /d /e /f /g /h /i /j /k /l /m /n /o /p /q /r /s /t /u /v /w /x /y /z 159 /section 225 /aacute 231 /ccedilla 233 /eacute 252 /udieresis ] /Type /Encoding >> +endobj +546 0 obj +<< /Ascent 0 /CapHeight 0 /CharSet (/A/B/C/D/E/F/G/H/I/J/K/L/M/N/O/P/Q/R/S/T/U/V/W/X/Y/Z/a/aacute/ampersand/asterisk/at/b/bracketleft/bracketright/c/ccedilla/colon/comma/d/dagger/e/eacute/eight/emdash/endash/f/f_f/f_f_i/f_f_l/f_i/f_l/five/four/g/greater/h/hyphen/i/j/k/l/less/m/n/nine/numbersign/o/one/p/parenleft/parenright/percent/period/plus/q/question/quotedbl/quotedblleft/quotedblright/quoteright/r/s/section/semicolon/seven/six/slash/t/three/two/u/udieresis/underscore/v/w/x/y/z/zero) /Descent 0 /Flags 4 /FontBBox [ -1082 -257 6171 1125 ] /FontFile 598 0 R /FontName /DHRMAA+LinLibertineT /ItalicAngle 0 /StemV 79 /Type /FontDescriptor /XHeight 429 >> +endobj +547 0 obj +<< /Filter /FlateDecode /Length 877 >> +stream +xڕUM0WJ!N/TUrFZ Į7&M4=of Z=O̼yq7Bۍ;ej]X}ܞzl/G7u>=W}p껡z.N*S?RG>o~4{ >Mgv=KN$(_yʓտ5s?yUv!M2aDYVH~x>x7`R/yiԹI) O`VNѩy|r*9ڱsӶuvxrJZ5:pC3snnQeQQ +]jq9cCCh +lhPE4p 1u"Nb.\GU +a^ǢN8^-pƹpq[i]r_T/k5M̹Pf/c`fk۬^/֛5"-#zCT3 a 11cϗ7䯡1cO+>G|V|~+>C1 V|R|FR|/g)g1{)>_|&~Χa90K)cR>,߉TG^ |h#0nS1.LF^.xB趻^X}j +endstream +endobj +548 0 obj +[ 375 375 375 543 543 548 742 0 271 271 272 560 829 582 540 815 250 288 336 465 465 637 705 268 298 298 369 550 220 338 220 323 465 465 465 465 465 465 465 465 465 465 236 236 550 550 550 435 895 695 588 646 701 557 485 685 730 297 322 637 528 839 699 702 541 702 587 485 597 661 652 951 660 575 604 356 287 356 518 486 268 457 493 428 506 447 310 500 538 271 272 512 264 790 542 504 519 503 372 390 316 531 497 747 490 515 424 277 205 277 449 338 695 695 646 646 701 557 557 685 528 528 528 699 699 725 702 587 587 485 485 485 597 597 661 661 575 604 604 604 598 297 506 473 457 457 428 428 528 447 447 500 264 290 264 542 542 528 504 372 372 390 390 390 374 316 531 531 515 424 424 424 542 288 435 465 695 695 695 695 695 695 865 646 557 557 557 557 297 297 297 297 701 699 702 702 702 702 702 869 702 661 661 661 667 575 527 0 457 457 457 457 457 457 687 428 447 447 447 447 271 271 271 271 486 542 504 504 504 504 504 778 504 531 531 531 531 ] +endobj +549 0 obj +<< /Ascent 707 /CapHeight 707 /CharSet (/S/arrowleft/asteriskmath/bar/braceleft/braceright/bracketleft/bracketright/bullet/dagger/element/emptyset/existential/greaterequal/intersection/lessequal/multiply/parenleft/parenright/periodcentered/prime/propersubset/radical/slash/union/universal) /Descent -152 /Flags 4 /FontBBox [ -47 -944 2835 838 ] /FontFile 599 0 R /FontName /KLFPWG+txsys /ItalicAngle 0 /StemV 52 /Type /FontDescriptor /XHeight 400 >> +endobj +550 0 obj +<< /Filter /FlateDecode /Length 980 >> +stream +xmVMoH+CnY0R;3D:"`6~UYWUtq7iߔ]54޷?ǡys|_>T}7 }vis¿vj<{3;OK6wF߂gſq1ǩo*ġoh舷We/]ߎ"k~Shߧٟ!rwR%[?vk|r>yEǭ Cߏ'o?+cEϒU5CN}}e痫mlE~}*` DJI@s5`d.DM("+k0aAP<&+XhAG"N[] +-b4Hv0 : $ arB dxiV %!K +]4fQP'uru!,׫Kayp/Za%RC}=\O8.D`!)DE"%0V°/ >tԢq*K4! Ȍ.sAyJ,J2FW(Q!1;d_/gFBA0S^ECWt.8-hDSz5ٓNWs:5+ȗ֡wI`5$KԒp4lRWwYW0~ggXi8E|ALy`8oE@)sbK}s d:6\Y1،@fI1dS1&l͘@CFu9 ܬ|no\s>7g7U${ k֌z"TNU㽣cNa:˾==- B``;X7`|3*)1f}s/Ϫru.%\]sp-K7y8Ëuxu +endstream +endobj +551 0 obj +[ 250 636 471 636 512 636 636 636 636 636 636 636 862 497 497 636 636 636 636 636 636 636 636 636 636 636 636 918 918 636 636 1024 1024 499 499 1024 1024 1024 636 1024 1024 550 550 1034 1014 1024 636 347 853 536 536 634 634 0 0 587 587 640 500 908 703 712 712 639 870 718 648 860 622 669 704 876 650 775 796 749 1080 822 703 709 703 680 679 678 897 892 1202 738 797 865 654 654 654 654 654 466 466 371 371 371 371 411 411 363 363 200 400 499 550 460 264 703 727 664 409 654 654 636 636 500 500 500 453 578 531 605 542 387 642 908 443 677 957 443 443 513 1173 456 456 376 633 889 418 663 928 418 418 472 1134 424 440 727 670 587 500 697 460 333 333 333 333 333 333 333 370 333 333 548 454 454 454 454 454 454 280 175 334 389 421 275 350 425 334 564 333 333 333 333 ] +endobj +552 0 obj +<< /Ascent 437 /CapHeight 437 /CharSet (/comma/period/u1D434/u1D436/u1D437/u1D438/u1D43A/u1D440/u1D441/u1D444/u1D445/u1D449/u1D44A/u1D44B/u1D44E/u1D450/u1D451/u1D452/u1D453/u1D456/u1D458/u1D459/u1D45A/u1D45B/u1D45C/u1D45D/u1D45E/u1D45F/u1D460/u1D461/u1D462/u1D463/u1D465) /Descent -11 /Flags 4 /FontBBox [ -342 -238 1011 786 ] /FontFile 600 0 R /FontName /PEYUND+LibertineMathMI /ItalicAngle -12 /StemV 82 /Type /FontDescriptor /XHeight 400 >> +endobj +553 0 obj +<< /Filter /FlateDecode /Length 592 >> +stream +x}Tn@+fH@B/ iIV{5@,mqkmr.Ӟzyu3뙳V}YMkc2Ȳ{2xھvcpmʢ{"}.GE< (6>١/8Ew&BFS#&9DeVL[l>},ʼh&ˋE]*$m'/X%a5W1krM40nV,GYu'ȯ>)灲Sq:7`pm 8V|# } +gz5@0uZvq}zֳC\>h7!uoZ3b;MC룖T-րuU#K`!zKaL +endstream +endobj +554 0 obj +[ 667 557 616 667 526 457 664 673 280 315 637 519 804 666 668 499 668 555 454 544 634 597 858 628 552 578 486 478 389 489 401 314 499 519 276 259 486 266 783 518 447 489 491 357 353 307 521 391 688 475 503 436 636 668 621 648 691 594 530 670 687 693 478 522 472 458 450 369 402 490 447 258 506 494 451 440 391 525 482 410 521 410 455 546 469 584 630 424 421 507 507 537 454 642 494 539 519 494 494 276 259 381 631 470 511 540 338 462 448 481 425 473 438 469 469 516 465 465 465 465 465 465 465 465 465 465 220 220 ] +endobj +555 0 obj +<< /Differences [ 37 /percent 48 /zero /one /two /three /four /five /six /seven /eight /nine /colon 97 /a 103 /g 105 /i 109 /m /n 114 /r 120 /x ] /Type /Encoding >> +endobj +556 0 obj +<< /Filter /FlateDecode /Length 732 >> +stream +xڕUn@+Ha!$ABJ( +!62FJsT]$ sϜreOzc}s/ū=֧&~}}nn:?m>Y[آ?=>Η"[Te{ċ*ߝ +۫/ڷHGܮOˍmڲ+mUY۴կھ[_"ʪlwk©'ՂͱUEZ17mYMךؠQOiQyYnp^~[_T惺Maz,O΢>!XvR?V 6`&[q'y]af]Yo$Xg⟳=6^90`4|!v2a@@mLĎ0s&F!ܣ {Q@R }Eu.MLq!f a<2ʖ!c0& SQ.eP58rr4Kgx:"'1jix43ԩRF94ZD3q 8e8̸t&&q̹ %7ʘ>b}8Fs%0t?b9\lċ>ËMy;㋍%Nv;.Yzg]sg~Qg(g[Kx&}{꾩dx5jMxZ!{=zݗF_vvy䧦q"-;PE?Zr +endstream +endobj +557 0 obj +[ 637 705 268 298 298 369 550 220 338 220 323 465 465 465 465 465 465 465 465 465 465 236 236 288 550 435 435 895 695 588 646 701 557 485 685 730 297 322 637 528 839 699 702 541 702 587 485 597 661 652 951 660 575 604 356 375 356 349 291 268 457 493 428 506 447 310 500 538 271 272 512 264 790 542 504 519 503 372 390 316 531 497 747 490 ] +endobj +558 0 obj +<< /Differences [ 38 /ampersand /quoteright /parenleft /parenright 45 /hyphen /period 48 /zero /one /two /three /four /five /six /seven /eight /nine /colon 63 /question 65 /A /B /C /D /E /F /G /H /I 75 /K /L /M /N /O /P 82 /R /S /T /U /V /W /X /Y 97 /a /b /c /d /e /f /g /h /i 107 /k /l /m /n /o /p /q /r /s /t /u /v /w /x /y /z ] /Type /Encoding >> +endobj +559 0 obj +<< /Ascent 0 /CapHeight 0 /CharSet (/A/B/C/D/E/F/G/H/I/K/L/M/N/O/P/R/S/T/U/V/W/X/Y/a/ampersand/b/c/colon/d/e/eight/f/five/four/g/h/hyphen/i/k/l/m/n/nine/o/one/p/parenleft/parenright/period/q/question/quoteright/r/registered/s/seven/six/t/three/two/u/v/w/x/y/z/zero) /Descent 0 /Flags 4 /FontBBox [ -634 -312 6171 893 ] /FontFile 601 0 R /FontName /TCFTCN+LinLibertineTI /ItalicAngle -12 /StemV 76 /Type /FontDescriptor /XHeight 429 >> +endobj +560 0 obj +<< /Filter /FlateDecode /Length 876 >> +stream +xڕUMHW&+CKr!R{V-Yf3%`T~UޓܺclqV_eNw';~|P_}vw7֏I7 +'G>{?|<ij~̟SQI5t2֡zZuV?N^-~-槗WK6;9Z?g^s~3 7&p _KhelT05 +) +PX`8SDž"20r30J0QcK3V  4`lȄ [: hq +<#`ؿ,wn8 ֆp zYg !X)ι7.N{:%N +1$܋a a2Ag&kxf|Wnoj˜5?ӌ;, +Ԅ6id]E3`Ԑ"cEΘ8{B^oEQ1sZԌK0פc %Od)%gd)?^֟{s3u渗%Lu&: 8g /VrG+93q$g첒3?+9c#9ûJi%gx3r3͗JYI[IW3 =L3W3%g|3^*^*a%LoXH4+EN09NxżuCt$> +endobj +563 0 obj +<< /Filter /FlateDecode /Length 689 >> +stream +xڕTn0C@cLB"Rn[5jo+N)1C~fH]=<ߛy3`_}{^inJbU_~/UVã1/xjmq\e+ 7T $m^NAq1?>4[ 5|[o˖m{|,f;ׄ±'ق0]ߴ^RJmlHMƮ箱u7&h jWn`+k\L^f?tgƛ> +endobj +566 0 obj +<< /A 602 0 R /Next 603 0 R /Parent 99 0 R /Title 604 0 R >> +endobj +567 0 obj +<< /A 605 0 R /Parent 99 0 R /Prev 603 0 R /Title 606 0 R >> +endobj +568 0 obj +<< /A 607 0 R /Next 608 0 R /Parent 6 0 R /Prev 99 0 R /Title 609 0 R >> +endobj +569 0 obj + +endobj +570 0 obj +<< /D (section.8) /S /GoTo >> +endobj +571 0 obj +<< /A 610 0 R /Next 611 0 R /Parent 102 0 R /Title 612 0 R >> +endobj +572 0 obj +<< /A 613 0 R /Parent 102 0 R /Prev 611 0 R /Title 614 0 R >> +endobj +573 0 obj +<< /A 615 0 R /Next 102 0 R /Parent 6 0 R /Prev 616 0 R /Title 617 0 R >> +endobj +574 0 obj + +endobj +575 0 obj +<< /BaseFont /UZJQXT+txmiaX /FirstChar 56 /FontDescriptor 618 0 R /LastChar 61 /Subtype /Type1 /ToUnicode 619 0 R /Type /Font /Widths 620 0 R >> +endobj +576 0 obj +<< /BaseFont /FBLZCM+LibertineMathMI7 /FirstChar 25 /FontDescriptor 621 0 R /LastChar 71 /Subtype /Type1 /ToUnicode 622 0 R /Type /Font /Widths 623 0 R >> +endobj +577 0 obj +<< /BaseFont /JESRHI+txsyb /FirstChar 82 /FontDescriptor 624 0 R /LastChar 82 /Subtype /Type1 /ToUnicode 625 0 R /Type /Font /Widths 626 0 R >> +endobj +578 0 obj +<< /BaseFont /THUYEG+txexs /FirstChar 205 /FontDescriptor 627 0 R /LastChar 213 /Subtype /Type1 /ToUnicode 628 0 R /Type /Font /Widths 629 0 R >> +endobj +579 0 obj +<< /BaseFont /IIOJER+NewTXMI /FirstChar 157 /FontDescriptor 630 0 R /LastChar 161 /Subtype /Type1 /ToUnicode 631 0 R /Type /Font /Widths 632 0 R >> +endobj +580 0 obj +<< /BBox [ 0 0 515 294 ] /Filter /FlateDecode /FormType 1 /PTEX.FileName (./figures/bg/anns.pdf) /PTEX.InfoDict 633 0 R /PTEX.PageNumber 1 /Resources << /ColorSpace << /Cs1 634 0 R >> /Font << /TT2 635 0 R /TT4 636 0 R >> /ProcSet [ /PDF /Text /ImageB /ImageC /ImageI ] /XObject << /Im1 637 0 R >> >> /Subtype /Form /Type /XObject /Length 3205 >> +stream +xݚ]oIW4 ,΂=.i%B,+lBs?corCyy^wJ+gU0~(5sn{)!K~M>gϛHXDT65E AtQM-]2lX]ZWKҸ>!nJ +%\rhk5!ȕGy<+[V_[N9ymh̠;h96:1n\$Ֆh\: 8?Gx0ᢷITߵfy7,P :.hfz~L:jVRR̺ɿ2~[ԉ㼌K卺'uꏗ2eiVL +kzl19[6^7è L50~ꒃb#2peKLM]2(|W/pwq[L+>iJu/ǟÑ!/@@]?T,- ;/#f -pg가09+amu -n8_=]K Pqf;U,;|T.5[m,Qw k!)xfX/I,CF a[S9ݦe,`+#Eρi9eT7,3ASqu-p໏2lbuhh$<<dB\X3 Ab2$0VͫBk:.CrP9N d;s(V0"ԃf,$D>!!ٙs⵻cxmawUm wV}"FH}Y܌?f}Z7s LW'Hff\ 7\vZ{ȄL`^+jbk=!p+ A7cM*!BPȩ a st 9uќuSD`hZΖ`+47ɀAhUB#B#5RVjSL rQbnh+ S(V=Rm~n}n}$JdHR SOJD!aL-C3WGf)5M4%ϵQPiۛ;f5FLMs-ӰVMC}+ b5MCOE.Żf`; .n@?>bY)ܯO._/oB}J*@NjqqLR'Q$g'W UӐ !Y<^jWC&0VEscJ}Ԝv4%zb`ZiL}>"U~LT{zʮJh!FS@3o&VaJk6I +ZV٭vUK.I&ZjDM˅ѥ)] = .CBz (骦vK'\ ApMkq,B`LC(M{*6Aq RHºˋ$6 ++m>yͥLO,{lslJ# +nxZrLphS(Yr0mOEyW1p͚v4_di˦'PFgwUnsZҚvXG 7D~#ǒ?2a ^Q̦%&[ @8iUn%Ky* ꗷ/i:wc2]6'5M4Vh{42u6>x4xj !B`^<%iL"q!skݟɱݨ},i$F[XgAO_ܱ(vrF+*W3H DcwM OŨr@*gXY{-(ض/O4LZLJt9O= >:nVӁtt\MZ `s䠁WVGal'[zBdU?R HrSt +endstream +endobj +581 0 obj +<< /BBox [ 0 0 654 173 ] /Filter /FlateDecode /FormType 1 /PTEX.FileName (./figures/arch/Canvas3.pdf) /PTEX.InfoDict 638 0 R /PTEX.PageNumber 1 /Resources << /ColorSpace << /Cs1 639 0 R /Cs2 640 0 R /Cs3 641 0 R >> /Font << /TT1 642 0 R /TT2 643 0 R /TT3 644 0 R >> /ProcSet [ /PDF /Text /ImageB /ImageC /ImageI ] /XObject << /Im1 645 0 R /Im2 646 0 R /Im3 647 0 R /Im4 648 0 R /Im5 649 0 R >> >> /Subtype /Form /Type /XObject /Length 7213 >> +stream +x˒q,t4RR8dɤË4gHE3␺˯O :4I[B@";_ڎ.ؾh}~}o~u_h}]y /].]ۏsڿwio7kŰ4Mu֩}N\~},<>7iwVՋC;2Kmċgo޽|l߷}߿j_͋l6u9.þnn<2vTpMS-w8ScN~7~8?u[yW/~Xvg:jѡ㾰!mz,?=O_Ne4wyXAG}gi~^u0zFǺ_[wDn.0]qjx4`/[ +$ e_m enoL A|VP(^Ќ:~XIfPH,2p _8iz^U$deZ mBБyl۴\֡]iR(GcV}W1d^/ݰ.cm6<]q(;#Yenk!g9,e^j_;9BЎzmX̻hg7@ e1͜!<Ҍ*i ,X. L<yy\BތӌZ (\0f8ʢ," CG߶iPycE|>1k˞xn[65 4CYnF<0cWidctXHYQ\@%I.xP5b^žiU*4'%B>7l„vP;:N2hNYl;NӔP0żD9rLؾdHERt#N3;\Ll?b1eٖKߍBxm +G 9,Vc̓'WLp7n+6ou]\v4^"b)~ .>< *#;w ++Dl˰RKS): G@q$ `e=DXaI`Ax[(۳o)zɜR{D)ܴυ0 k ,X +oD\ؤIn"_aRc5O>* S/jw&Ks&f؁Ԙ{H3_D!5օԘ{&ҦNp$wEZ'gy_}gRAy1RTGO%~VIO>׾#'>o#6,?U㆕`6rŜTY׷RLNU&2Х +##SU*cj+V1f֪6DF[#" n{_ 2;6=F +P Nrׅ߿`5Y“U0 +Ӽ~LeKF|}ٜT")>c03Ssi_ؓgNWO` hG A?$B:pAk5ji +v(DqEmgZ7SRv{~+U> +C;BG^?! + 2uQD!DXiي +7R%S, /#L@Yw>Wy9! >GX6VT[1g$)SֻUK V_ψՏo7?+j}C>tGܔћ95 +J"C)SXk]f +ǎ.N,mTblY~0@!Fi*> D*-dՔ\!\?9qi*~$ *z/V\ųzlhbv2٩ ! TH x OnܗoF*ʞI N5P@^WӪ7wVt7?ږTigB5,TrUH$Rؒ JkggO\7W`e:HL6W8$ ڈ.]˷ĐT6w4u5Bq`MLUD7 +}.S;^?>y D/;DB F=NE bSygK{cFNG.-O~Jt?/ jX +zJ~L?m*J_Nkw" +,ת]+x~w/ Yu7yt9:"{l6ˎ +}/)kB[yfƢE0ݓ%~YD`:(i9.."9яͳ0U(%jUUASog Qi=j1/AA^E*F ;s@_LTn:+ @ԉ28\v3HgBS{L9<i^K!.Xg\'Ũ8UiwJ4)*Ifhϒѐ5Lmp{R<'Ƥ;T,:a'ӝtDQg(.\.#؊61dyg2rw /[zi2НVL,mC vDDZ1`Z*iϜ,cܫ dž|`Ў<$ \HB֒T4H&Mw%J[ӝ1vtP;YB}9Cs@2E{Ǩ|qʼn( +E3ACuAiHH Rl2)ܫ gyR8R: ^UfгG;`bQúu4-@a2Yk W~X^,C2Li۲ʾ~qݰٯ+V8零ֺ6]`?*Hq@45(#RW$|)ca6~"J0JK +r+fl{~LOSVsR T*+AiM3g~9ܫ gyrq'\8 Fjɂi;N.tpҤ-H(86V +S5&w&+gr츹:&8W96d)2TLd3e)9Ssq,Fdt.v:dT4H"Rv]"[mUɂ6첚{@2| ™8 gR`~b$;!iarh%'ĵCMHQ칒5JF{-{&i7ڊK΄eyW$-nJٙ$.iLU Oə`u +gW$|9u7 3qX:eUIr1WFq&y7UDs&}p$|I +3!J|QjOJRtedIp҄ gg[SH3v&>xrXNqSu*g29LCJRIsHs]uv0JR8ZaϳRL<;:uv&T/H)LdT@ d԰#21I@܀i=冎9p,4*ənܙZyL3{ij+?ARw f)3. M;A*g}v&$I gR`ٙTBӀ4DVꔆnܙ)r&p + +r1*;\/RsWR(Wl&]>ͫ] :.@Z4sjҤKb􄕢rIIk0h< u5u8LCi$cYܔ; J"0W)*@.pQZ^Ea ċWyT"Y)A͹]h|(EFSLw\R=1kqxs.Y+?ՠ!rJs)=4&?౏SQԮ2J(9QQqy|YZsQt?EڂqE *3yLvrk.ۉE Fy ZUXRd+UMUAUAQc.R):EDǷ&@ hY>䗫Tq1*OË.&m+\ܢ*U8q}VI*LU +J#!*Y$"JmʝKRrE`8.`rE."CۊQEȽBVI\E0*9bT}7nxKIgLfݹGF)I9*Ĩ$R"$?(/(sE =)\È%Y@©W"*Sde )[%)E8gj,d4g!5]PS=)_.<#ޔ_Sn"o޾@Wki {!EyyxnWJ x4a#N>4?]hj%#|oξhv,LZה21@ի]=/sՖEyN +XBPxM/_A{{%|mcUK + O6EkT#7}*"k_.4ϼco3T1C54ڲ ա c^X4j8e,_ AdG,6&) ][xP+oYDɝBi&uzh!_v^6B֠-8^bAZ*8 4\,:hUb@ J +<ȹ_w֯=uzih"*#7v> QA{|֓yEyMj'pg{bc&nC= 'FL@MDj1GΪ=]yuv h$x(BrL+LlOsk21b%rTQ4Z@C\j{ScC:^^>;=]+t {x T M>d XdC%61QAb_͞|E]7ɥmbF ;aP̰'d>qK/0HEz]zN2(e? TxJDuD<ڴ";YES:G<䵖zz|L5|pWDIdT,>F@y# [mT@LjcЛj(uxԂL7 xG94`PhF+>p^@ljjC=@b}5'&o¬;ȧh8O>Fodva2%z|\p>;1ڊg6Hȃ +I+]\UVjRi0`LGE+'GJb |V'E述$'Uދ6 SNtvٯ7ZF8U&ҿ8A dO;c3u{w?V۔d _qa_q|v*S41=yIOww،dDS~*}63܌c7?DPڑZ o;ݽ ͖)Um0l\LQ `0S[?@+T6KiXKm%PQZ{U Ƒ!f&ߢBҿñ?~g^ۇX7PE>HP)Գ(+un[ +endstream +endobj +582 0 obj +<< /BaseFont /XKEAZI+LibertineMathMI5 /FirstChar 56 /FontDescriptor 650 0 R /LastChar 56 /Subtype /Type1 /ToUnicode 651 0 R /Type /Font /Widths 652 0 R >> +endobj +583 0 obj +<< /BaseFont /ZGUGQH+Inconsolatazi4-Regular /Encoding 653 0 R /FirstChar 46 /FontDescriptor 654 0 R /LastChar 121 /Subtype /Type1 /ToUnicode 655 0 R /Type /Font /Widths 656 0 R >> +endobj +584 0 obj +<< /BBox [ 0 0 424.8 172.8 ] /Filter /FlateDecode /FormType 1 /PTEX.FileName (./figures/main_eval/hnsw_visit_count_per_degree_corrected.pdf) /PTEX.InfoDict 657 0 R /PTEX.PageNumber 1 /Resources << /ExtGState << /A1 << /CA 0 /Type /ExtGState /ca 1 >> /A2 << /CA 1 /Type /ExtGState /ca 1 >> >> /Font << /F1 658 0 R /F2 659 0 R /F3 660 0 R /F4 661 0 R /F5 662 0 R >> /Pattern << >> /ProcSet [ /PDF /Text /ImageB /ImageC /ImageI ] /Shading << >> /XObject << >> >> /Subtype /Form /Type /XObject /Length 1358 >> +stream +xXMo6-zԯq}=3$c4r(hMMM}ԮW"2P,uvy|| +Ef" 6OGGL`C^,0n9:7-Z| %JDhP,?˷4qLn Ocus=鋔B+k0SINcHs>(^RPMB"VĩMO%!ϚiG%y + 9KaDkO$A|!BNepH lߨn$"!KH(P> +v_M=ʨV4B{DIf qdc4yϯfb\l: ;F%i"{4H}50SF6;FS"2EdhN-LQ:T"z̢t2V\vhA<;F𣈽o'R}XEUL9tKhnb8|Q9Y0qGl)v";4\!DwvaZ€1dt T9B+i,=5!73TδjkitFsFb!Na `WK譁gt;;E"IW؅FYL*%ρ/蠬i8y=.h%~[x;;i뭏q?c{zۢQrƘ#zH*Mo [2n~X˼Hc\@p o3(9 54ƥ~ے~Fcxd m[k:4x4bx;=L4ƒkҘA0Ru*ߢ^0\@Tj_o/܈n\^RJ*>^F~-Y<;ZWE1S; -b_lZ|G#Ѧ:dIeX5NInigaE\M,i۠#`l)W{} s%LE-Lh;Mx n7+TDU@O52DRF!E$lػ[G畊׿WCq?ͫ~ dEt)nvu4lLε0XL`!cpۋca)Ap:lC9Rom$=D~&&)&֫,|t{f[Wo:oo:q7Wbl +endstream +endobj +585 0 obj +<< /BBox [ 0 0 1996.0275 438.448 ] /Filter /FlateDecode /FormType 1 /PTEX.FileName (./figures/main_eval/main_exp_fig_1.pdf) /PTEX.InfoDict 663 0 R /PTEX.PageNumber 1 /Resources << /ExtGState << /A1 << /CA 0 /Type /ExtGState /ca 1 >> /A2 << /CA .7 /Type /ExtGState /ca 1 >> /A3 << /CA 1 /Type /ExtGState /ca 1 >> /A4 << /CA .85 /Type /ExtGState /ca .85 >> /A5 << /CA .7 /Type /ExtGState /ca .7 >> /A6 << /CA .8 /Type /ExtGState /ca .8 >> >> /Font << /F1 664 0 R /F2 665 0 R /F3 666 0 R >> /Pattern << >> /ProcSet [ /PDF /Text /ImageB /ImageC /ImageI ] /Shading << >> /XObject << >> >> /Subtype /Form /Type /XObject /Length 9284 >> +stream +x]M$qE_<%̰iܰՊ<+$%.,ݳCdB"Q/_ۗx_}zofgco;[kZˡ:},!Pv?>];vWLu%SZJ0tC]j.6zAdnN ?UCk9!})jq Kvӧh߿nG;kwPnqxiG<44ЖhU2 +z`TߔG~R>9_şv5,.U }] !TcCؿ5_MKRxZWþ,)O}/)QR8^4.czۙ 2 _ @'J2>;l^;R, xNb]9טr.؛XBQ/X]) /XwK \TK+6a =P<wcaMq62bXXI [𧏎G4)3{j߾_^ +&?|Vlד(hd`ZQth1'R\f1a-./nɻY OqoE)DW t?.,tЧY25)/9)kNm}6$L̰X!.z'mB6C:R(,x}(٧b*CE=a9ѻ|TɩIJ,P/ȅ,z1<Ĺ\Sx5 M#ʖ?Ww @^ TTcJ]Dse\}p/! zkO^n[,nb8zlWȫp&9/GkDw !Eu8vb1{ˎr#o -ck !|'$..0':ѭ1-/f%@vS<;QҜW(YpLYҙM]m=*43l"e,%jwN\3wڝ˂u8ݹ(Y;wtޱp΅vγt.tP1r8ăuk V-fCU)hnN+s,g6qM~ [4ǎ5^jC.VGZ2&즒DiWA^;^޹^y^'VB;yYlE;y\r,>hxLDZAmTvҹ.ܖD;Ww̦.NLЎ==Zը#[;3io'l&}2L%`<,^G6FjOD{;i'{aq1U:4](m=?^PDi>NLJۛI+!oH_4-,%n v!`n* T=ס ]ҕ)]' AP=Wq,Hȴ"=te$τҵ& +QB,2䀥ގ!6!UҶ3TgB!ssJWɸq§&r =DHa_[Q"g^]i%$r"Gk o$%;d%v$VZ0ݗ8 b[s|'s f'M&չhJ F 6~Jt1 %#1U1CTENUDoy%9d/i#a~)&GK9W&LMȃh,UTOaA{l v{og3rl!E9G(\!0̉G8c]Kux& 1/|,R0_Ogf=sQҶI=,>@&ݗ@S 0Ib^t}O nPB6˒ah+V]37DfNė;A/d~#D J5ԗRA?$gtF7^һ8w.Jq*Yt |G7lڳK"9J$ T[ن1o-b8f_|-WxJiS?rυG&@-Iɇ.+V{0ĦtDCBۻ +Ws!fN,(B3 G}J. 6X<|'l*G#'>IˉRϣ;wP1J5cb̒yK |P{Wv biF`3M>y㏯?yޱBCV~c.R&l1DNUAwq^A5sNԷoc܊ 9-zX3_@pSrR +6G !ȗ6D"GDbV5(5+ȗlv 9&&\T%VJR\WEZ\KdY ŋL]IU5m!EnX;E1"u.v@LI=㛭[s)U3jC  C$_v %{9=!٭U#쉀Ku&N^S5pOr.,ل._J#zkcח +ErJxU mdZ.Q'O\SBЕND?mj5.kV̥H׎#Xn%>6X5 +/9> ,+O +O;rPv %(XE)]G:J]H\$c>&ݘ\ؒ݀AN`he)l)N+t,P< u̦.6q7a_Ƣg?ʗ#Ctf("ug|>0r#ipUK}|Ɲ['ɴ"эK޻s7FVhL6s.ѣv|lh(mB1QRBn]kG-~$QUЅsw.sws ]g5vJ 6ڹaM$;:([]OvS:;хRWxBʲ.ŵ}y'> +ɕ?x4+plo'%bK# UTno&77fSȈxkk?^[OznMۇۛIZV`g3\Fq=ZW6a>Ɨm-ζƴZ}yK2D?.iyJ-"IZu,X>bVLn41VRTC\0z]4.~!^bʚ1Jet56ɚ\Xk + +UL\.[R +Oj +~w_n"_peJn^?^:\ס/,ή3rƯGbGpK + +% عMׅ( +y=5oR{K$wxBI[zx}{I:6;8îÁ>/5Xù!.ٷ?8V̅<Խ]Rum@V{5آi{OĘ^S(wźaX@Du/, +tyBHwǺe)^۠ ׎+X:#u'-@QG{Y%uL%Ubݷ.2o%c>9U<Ƃո.*ʎ1ebƇao0co'I('Z"aM(ܘvYM`絠_\,,߀v=3D1kՎXBz@.+lbś֮_^ؗ*c.z`[.$E*^/Iv:Dſ +oa%t/7`AjiHHFPIM0^%u +/ p%\yc-< +R~o}q<d"Q Jco1p >~/3׎6!SEZKm+1Aw uU@` `PϻvN~aPLꊹ.F]R1|EW=yr^q-(|84X]K5yBGc['K:'Z\JԂ}]+%uRR1b|֧tiNT;S`b4#;n[P7Sԣ[*m$86rml4o56Mg(,yzF`ZӣOjO_. ~\{3O?>xI1GOᙧy( WG2k]}KR7ӌT{t#/~}f[3GY!M&1<ٗPx6JŽ,IGg34+u 8ugp}'Igclݛ)g3 [3ql fqz]O+rm\!!ڋ8Ok]!bAhwK^!ne{;ioU0i'ͤ&frf?0dfmﰳ;+;afw\ה>1뙁`uGvv`av;쎫0Okc; +Kb1ջr]. Zٝ_]nƮj ,|!y(ќ-}ѶGFW>^8 еDG Vf9*;gY f7=ш P&wAXv50Xqew~a0lp7cWvum_9CHO/ ]"E-m(p헄a*uk)&}5s 2t()1S:92q +'xݺ[3%\gq6qf2Sް` + < ؄CFOGL* g0m *vT:-,zN2@Ui"/YTR&U>ƅVr&{ KkA *4T C~2ϧ W7[/ FccCn`T$kq]0=4s,Mz,k3z]`=4RXW>YEXYBɺ *?u腘k-2+ph>k:\@_.mi[,Y 0,&@&\.T 4"j*KVZѤ`md#N"a a@AvAmI99J%7E +ȝkI1q[HْGW sZ~q^RX {H+ +9l+L*Z% hubP%n,S.it +,Av+ CvttAՎ/HwN?+?r*?CjBi@::xqptƴ#MzmAL1ySo蝗;ѻf[3Xc >O?#HXzh7?d Ї@P 'h%U3@)^BqGZ;CKrp;afw\ƄPXIW^]>tW$Wu50U ^&쪮67hɏӏFATy\T].huvMW)eWW5c$&g.&yƒqa#S E(;])̈́Wѵ{튀,5رt"+x)[P'FSd,+] Y&}vU);pk&}tmƈi8!^2đ^`7к!+}Pq-Stc, 4P~`x>Ơ8> @ǚ1P&ܦμ<:HHA%Uekmcʇ8t͗%b.p'N> j`i2Zs茨5;XZܛ6PUUŝIiƺwEY?` <ČcFOGX*g+b\02҂"g:R ^Re1buŘ/XB&] مWr&{zAHBTjlK)]ܖyFb|_q(@l -؏zKX lT<߰ ? /28D\Ju((.PC{l. +J8T; +0 XuA~(*  ?DBQMͲ D7UP@ҟ{^PJ[*E'2w(`[(ĄT"dojV`bx;`jeH1)v?q*gzlWG[ g;ll:NP {Ԙ +K J#n׎g홐ډq$N:Kp]`1Φu38lc{5tz\ &/s;q\DRrmL$X"1Um=w@֙Xy3síĻqFStI+B| ;~K_~gZH aS="jGNO0tنPdCĎaHZ|Y*ֲGc +Kj`޾Y]Kha3k 7boVW[W ݏuWV%t"_4RCIcƊ@go։J U&z<(CلtRyye*` o8o>wӉk!Vݒe8y~ip)!q&gl#O1C_A]3?˜.gH[ 6%ne-5Ӟ<BGv+/o͜H\thM Txa**L^3pk0^D9tD zlQOr% + 4}X%! ߼.mM>.#wr_+[uo`pȷ!,>LRC Dtw4i*&]u3Qu8vb}EǶ {ocԙI-VilC/X!Mp:I?OPIqxAM$݂B@e-x2Fw '!R˯ՏK _&|ndD,W&saG"iUqiG^%D4m>T<Mu@ajEFr頂ՙx2,3P4@n0 v,$"GJn36KUMʼn$iax[~3SfH5biK-7u9䩖Ys}%ǺM-*\AYuP,.tgSwTHGVܯnDE&+_t{Oh8==c?eJ#&?Erq-oQ..FKvAl[-h,m_ܭL.> /A2 << /CA .7 /Type /ExtGState /ca 1 >> /A3 << /CA 1 /Type /ExtGState /ca 1 >> /A4 << /CA .85 /Type /ExtGState /ca .85 >> /A5 << /CA .7 /Type /ExtGState /ca .7 >> >> /Font << /F1 668 0 R /F2 669 0 R /F3 670 0 R >> /Pattern << >> /ProcSet [ /PDF /Text /ImageB /ImageC /ImageI ] /Shading << >> /XObject << >> >> /Subtype /Form /Type /XObject /Length 5487 >> +stream +x]߯qR 9ݴ*c}p`(J"CYM瞳3<ײ{ko-o^ͫ׿{/_Oϗ_~<;c yn +#SNѻ ^/(G`60w}ك2el=6|la|v<}^cӞ.zB'<+8:yx0I, KX"-h%^Ba0TS(o(xeU9f .?$}h>mWnӌe PL}/'ɆdA=W(̡8>g"t0CR4SR L(FWIЛd)C=x{9+ɮ w>E qNMyuH(&HW@6g゘a&iY 0KzKA,ys^. _rs/,6ē2%,IB+$JUPlrLjhQߟ]$eI͕o<f~DYX'Y(2U0gn?|x9Y?bcf둃2HH]L2&/LaL4a@R&J3,saeҭu"C(ކ(ăEyrd +4J7 Y#<9YX/½AZ #yٸ|[Qm}aKT IqQt'G6[: (3"[D +Sx&#A!*3v|K,e(;K52vT ,*T+P9v޽>ǀiI|?hx %cՉe+BcOaK.cԀfa2$\'u90ktxZU#`O100h9TG,Kˍ(XLb+\ILT9Gcr(O٢bwKe`~Krwdyo`rlu+ji*e5z#ϭ C0a-1rPlaQRQOgAzlK=albZ~|$c3Ovݛ޻ް1q3'@_"gh9gFANﺒc5҅`冡a/\8NůM4$ it}bZļ.N傦kBaT#.hf!Òx6ƾ;d {+fa6Fs7MM|hl GHxNEɐy*;3 )AB ^NXX""MːWpI2z(xd,/ T)OfxBGcyPbj#E3&T]/?M@)]aY9#R3n$%ߡ1 % U9rV' =bV#gav⣙x.ݳR9IFO.P3EI`Sc %E J0jӣ+*.Ǣ代E|Hxzt5AWGb&ҏdQ3n]htJgJPztclИ~nB G9Qr0ԯf<Ue*4fP3t<7R3CZf/fdS: p*"lKC;[Ԍ4yq`# +g +ĺ.HL8TQ w 9QN}B:c( +֠yȹ0p‰`QE{SA3!\áI"앜mgl^)X]ҦT}AT6j.[MU'ot :]ab-3 &oǷyk)iSn^}A5VSUAǚ1WVѸ\9{>pACKڔUc/h#6Utq&h .wyMUmВO3-EnU8|),SY({&Y!-BEr:!(FR6sGX۫+\d, f#Aw~4|+R% lTr֏rR5G? +pB@ Wƒ;W陴<|Syr:8aȆ+ju8%qM5)f`hzH 9 [7lQ6Tӄt#*Vn+&{8ywyacrV!hKѱoI`jh )fA> $I(OB1$%φQŗHF-,_d/vY'9s+IU> H+^ 4"CҨFQ@\/G7 F!e>}siTa9;bi,ܐqנxrfGOWӐ9Rfi< 8%,2hWW +endstream +endobj +587 0 obj +<< /BBox [ 0 0 539.2639 206.37436 ] /Filter /FlateDecode /FormType 1 /PTEX.FileName (./figures/main_eval/accuracy_comparison.pdf) /PTEX.InfoDict 671 0 R /PTEX.PageNumber 1 /Resources << /ExtGState << /A1 << /CA 0 /Type /ExtGState /ca 1 >> /A2 << /CA 1 /Type /ExtGState /ca 1 >> >> /Font << /F1 672 0 R >> /Pattern << /H1 673 0 R /H2 674 0 R /H3 675 0 R >> /ProcSet [ /PDF /Text /ImageB /ImageC /ImageI ] /Shading << >> /XObject << >> >> /Subtype /Form /Type /XObject /Length 3039 >> +stream +xZKsﯘKȃ@tQrl\DYCeCv5f9fw ۝~^W?nˏߛρs{t 'df9|S盍sƏO zCMp19`mO2:rd'q)E g pH22ࣅOncoixM,!b]%Cɇ$Eɋͫ|q)PcMdG"0뇹<)DRf&z{7+!8+2ZQh(]o77ɰK67AV#\HV*E{ c&lz'iJQ*ET@MId1ȧٶ5xaKmRbs1gG4sk]tNbOZM(ЊlPU#@E V]#鋋7K($_6e, aU.~s8?/?8̔³_tpM1pC$y)ص +0D@Hl@涝|CŻ|:o)yUg\q/Ф; )&'΃&3*ce7dslN:A U+8*dKZ NqHqxe$B%GK:] +";&b";!s4jHF0liׅ S%';Ni qWŞdMitzN5y#N6*' ++K S@v' W7@;g6!l,]*2̤#"WbV+4漐 :fDPtxk]e}k y-6pEFN?oC%%޹G!F94/zZßɪ6e-crZ[[9Z F5I*(e׿\}xsR|&Duh+Ry$<*e#֨+ՓHNCe(le V??1$?l|~Ƣ܆gw1H9<XnNԾws>G{6mahc9CmD<˝HGbb3uUO"Z J_Pu!aRλ +N|x/. Z;H#E9ND ?W>vN-z;Z*BTh/wjEZN:J$;'!-|q9 +xT +>;c %y+l+A[6 L+y%) vVpmU TWxj/[ռLv_uWiUEnKAie펍+V;±nQ,8WFd3,fan ˻^dy:Jݡc49ɠ:`љ8'{; +J(Z~f18Hy_ey;z +u )*B\$Ћ%JͩM +ݝ"%QyKN0fX5їl©qx0~z 8ȉ }r_m ޡ ţIL Oz %,`ޗV]kŝT`AS9Io>`[+(ZN0L2^lQ/?Ρ>FNţ*ßV| +Kn9x+ziNDm"pO >Eys(sz7R+߰sr!W ^o(%5 +J dds-i߲zPF{>ZCхl$}i0,*/A -L+! i; +_l2b+W+ _3:@H@3_;.Hw㼋/%|]/q %kT/kXA=9 8dA>^U8T׽j!Ŝ8z?2UG3z\:gΎt܎ #Ieä,]"F?<&SL5:J^"Cq((;tTr:z3EAjyS{o?|çJ_mѠ; +endstream +endobj +588 0 obj +<< /BBox [ 0 0 327.86253 142.22867 ] /Filter /FlateDecode /FormType 1 /PTEX.FileName (./figures/main_eval/latency_speedup.pdf) /PTEX.InfoDict 676 0 R /PTEX.PageNumber 1 /Resources << /ExtGState << /A1 << /CA 0 /Type /ExtGState /ca 1 >> /A2 << /CA 1 /Type /ExtGState /ca 1 >> >> /Font << /F1 677 0 R >> /Pattern << /H1 678 0 R /H2 679 0 R /H3 680 0 R >> /ProcSet [ /PDF /Text /ImageB /ImageC /ImageI ] /Shading << >> /XObject << >> >> /Subtype /Form /Type /XObject /Length 1454 >> +stream +xXMo9 WcQE(J:6E5EA-܏mߧ=#vEC̣HO[|~sOOW[]Zo%{KN.+GӲy"aǜ5 `?69~ +|uj+NԷ*B J^]5<2t ]bpe811SL7w1mN/dn [\QVW&S +EkbP_?fv>7 Ivpom8)G"mc3K +^ISGf+9'(٣7ߜ=vtZ)-ܭoR\*1'ڎjg87ì 1^s`*YCi7W39%::+1fD~e\l`A"~Z`޻Dz ,O-Lﱬ-xGSoyυ]ٲo(<(2ɯ( [EE&>8>y%b%?./et%;vyS +B=mnh]A%F9CFi[No!xZ |,?;^|z'd.!!d7O( +e0}ߢkƜoY=r4!rĄRB(ųcr gŢ8VVoNFw[;8fkmoʛ\Is eN\:o4+H}}MRI1BvkWԱ$[PdiF^u.i[ֹ : 7-E ?TmvΣ MUc/[a $ mG[,փnݔCh; s̎tMHp(<* +hBYh#E Z-p l!-uJVGNZη IbTቢ,J'g4jP |BP L|oNǤ%Fqf5 #EOZ(HBa0F h" +[N#A +Q ;(el>āg|3&ݔf,1#CY:?Ȁ^PȌ0ZM8Qvyܖq0kٯl+.VAcU{ ]6h:1谚^?N!:t4;]_-6cv󞧃".AA[woy0˹U$!lByڣ_v`;|x\Lh@٣3hfun6]8Nbԇ (<ݒV<0i# +endstream +endobj +589 0 obj +<< /BBox [ 0 0 348.90483 183.81324 ] /Filter /FlateDecode /FormType 1 /PTEX.FileName (./figures/main_eval/H_hnsw_recall_comparison.pdf) /PTEX.InfoDict 681 0 R /PTEX.PageNumber 1 /Resources << /ExtGState << /A1 << /CA 0 /Type /ExtGState /ca 1 >> /A2 << /CA 1 /Type /ExtGState /ca 1 >> /A3 << /CA .501961 /Type /ExtGState /ca 1 >> >> /Font << /F1 682 0 R /F2 683 0 R /F3 684 0 R /F4 685 0 R /F5 686 0 R >> /Pattern << >> /ProcSet [ /PDF /Text /ImageB /ImageC /ImageI ] /Shading << >> /XObject << /M0 687 0 R /M1 688 0 R /M2 689 0 R /M3 690 0 R >> >> /Subtype /Form /Type /XObject /Length 2108 >> +stream +xYn\M~f'Er"v9$ Ř|~Nn?S*px"t<y; $hW:02mFE\I8SGC(`!ȗJRqy߳HThC=ѣMOna[G5+DXC\R){t'ʅ;QB2{<'I|u8tJqRC@js>EaOu4;E#&x7SILA^Gb]tq;һ*DEF ED)ܴ 7O/sw}k^bW*d=4qqe^|00]}%XOWr+5\Ӟ`J*MU`u4=)*-J\a/;µHBOԇ 2L(].7WC VI! UHh#mE~lR +b]pINJU@ +J7}?AWO3`|;C4oo7y9X@&ǖ04iSy<1J}vYlrht 8 +hD +D'*y ]y RlLi9U;NKNz61Tfx&'tQ>ijv=Rl4`#k!hy+RFh+#]{nZHM\\ Dq* RP\JU%3&-;y8BPqCg#jׂ)byS1$OLڂb_(JܙX`&f$j)Q2|t2c&*(-u@@)5";!Y|XO[%Je 9/<Y\~~w{55ms}; Vt_!^3uǾ E;TD8K~8H{Z1h_5}g"W=I@끽&؞aR/d¢ ۤE!ZsGd{+$\" ֿ𝤭@Mg{X-w'uDӝπ#2 [ +@V8wuSĆd +ᮤ麑0j9I^ bs+0*dRIu #\U&|'&|B=xXY3 W+"-Wfr}a{k( 2J47#8M_ tĹ8> +>8L,0ȋczHs;^!9 +endstream +endobj +590 0 obj +<< /BBox [ 0 0 449.05213 244.08 ] /Filter /FlateDecode /FormType 1 /PTEX.FileName (./figures/main_eval/degree_distribution.pdf) /PTEX.InfoDict 691 0 R /PTEX.PageNumber 1 /Resources << /ExtGState << /A1 << /CA 0 /Type /ExtGState /ca 1 >> /A2 << /CA .85 /Type /ExtGState /ca .85 >> /A3 << /CA 1 /Type /ExtGState /ca 1 >> /A4 << /CA .6 /Type /ExtGState /ca .6 >> >> /Font << /F1 692 0 R /F2 693 0 R /F3 694 0 R >> /Pattern << >> /ProcSet [ /PDF /Text /ImageB /ImageC /ImageI ] /Shading << >> /XObject << >> >> /Subtype /Form /Type /XObject /Length 7294 >> +stream +x]ˎ%7r߯BlI #{F B4$Xf~'2#JՒteA{7(mI3^jq?cMrd&ϴjԑRr4\H - If( iPh!1^ŐBu%I +oZ$JLq~HLNUx#-Uq&)(~cN@G /@yVZ*bFJ!ERXvY0)T፴?ܐtaҢYL)*o:[Ek7P-Dˀ(O7B` -607͂p`chQT‹LZT)Ie$bFZn٪Yv.^ +ZDMen7UVGchQlSB(7B`p+V9+s,xl1x-$"(qs,8}yX/7B` -/s,1(.1x#-7U͍'39A$ș`|Of2+L I4鑕6h67PʬSwC =Qev :idbFjHI lƕsKL&ӧTS;!&=XʦEcL&'ejNjhі64FUbFzHI(;iі6A?èwC MzDfі6D;句@(d-m&^$^GiB̢-m&s Ŏ:1|#=$ҤGT6i3HEqњ6D 8̨wC =QwXDsYoD@nszў6KubNzI2W%f2)NI4hU&f"xR1 *ibLʊ_N I4:LXmb&FwubNzG4:+NDQNoDH,f2)Pt9vu:1|'=ҤGR[Xyb&m秱^龜@z C)X}b&t&* Ө7C"Mz8ULqf1ɫVBQ%ƚ2)y3mWɳJI 3h w1)]LN&oDp9ei&9:U|'=ҤGT>9]dU٣7qN I4Q[?,3{ìRwRcMbВ|.h&h₷J H 3T٧E[LTH#X-m&m0}-n֩;!~Yܖ21*&[7}IABQ1;TNj)C綔qfRwRC 9 ZìRf2la!&=JcRf"f*i#)d cD}1Y龜@zXyƺc9T(͡zі6ю4T8IdKXUj:c˃LI44zі61,!4Hw>f=*QnЉUӵws~fæ9:}||1 X|usEMǔC97_^o˛nc,fǀ\bQ꡶=}T=/R.C1.cy$R3BxY>6K@$T ywtQd3׿w1iN:tL:)We1#aX5ge9ymv` .qΫe+C(1UyM:iT+K髲eH1}M9'h OP)tqPkl*Sh^ѓ$7$ )K Dt: DC hRv&Sԁx:]F/UJPG/fTxJP-BqdPG|z?=96]08|`q6B(hC8p8.z^iQ_t?sNj1sG'LewtΥK;Qi_Ea-Pc*ꇷoۇZA까2uv nM-C)uQkF(eSfW|w_[_?ϭ&>zjY%gP!Q%K0>,Ƕ qW ++:?,q+F_BQDh[P-UjL + כqvDyy3?S:t0~Q=̛ +D1$ &uQGrYaAK.#;W^M§x8yp~N+ՂQ38" &q434oe&4*aA8~Q?yG(zfL4(FI~Q?P.1Mפ.#Y6 {Q_GL +ۂz5ѾsE|Q;"Nò]}n&?fT\_QlfA2o&ffgaפ.#V_z3k n. /J]G3kԫ>boΩ .{aJARg&Њ7:^*$$uQǼчj=X \:I~Q?wƘlUtpW:I~Q?7Z**j6:|p~n1./B0-l҇0IkRuL]HeQdbT.#zӡUWԉcPH/J\`ⴭ.q387kq:e3Tܨ R;*j2|S&$ (uQ/.1*i)0~NW9f"-#,$5K:&_ o3.#x^|c&֬I*~Q?PG6y.L&d< ǯI]G19-1)dFI~Q?N{UWQymgI*~Q?GW*j"ъ,I/J]4(hL9FI~M?/·1QL.fI*~Q?wIf{"rB_^ǥ\Yz3E4Y_ď; +y:^MDQ]c$(uQGYXz5c:;<.# ED fg=FI~Q?P/VYL,J(),IůI]GyiQtRQx偬ďgZ,4 tme.{y_(gL(~I"?N ΂z5gEHG8tXy5 uOQ'~DSh_y5/`.ucU>.2{gw$5K:¸lQD/UΒT~DyV^M4\5:#B' /J]GL1v&, 2Qѭ IůI]G1N./81q I/J]Gi#2*j/pm$ (u.Lb0FI*~M?:-1>0)H8VיXRޝ].#Nqw9f:t\_"VPr:_r+;t =>L*,&&%ꌏ Ysޗa,ft/{,/9/ۼ/uE/YX{AI7H]wFĒވ1@]z@< +ē nx'uIg.IK|uIz@;P'rKuIzA]!Kһ0A]IO½$ ^a ^aGA])Kѹ +0_A]YPW$_Ps%`ˠ.Iԥ|,<~S{`t{ys{K4{QDyaǹc~aӾÞzlf@uԿ/7y_<ȧp1 %spVQ s>_Ɓm&EKƏcѷ> ūo\=Qޞ,3^;ɱ5(btg]->. HG Mh$If!/E6 ]dB}BSLq~盥`NZ!}Ep 3P zFsOGA 0Ndtĩ>2l=7͂mHi>2dn%%pdh|;mmdB +I_?>2TKAl +葉}BLElan%$F<G.(*u?BT = Tt!LҼlY`BLn#DBDMen*SS}BL6ύedPb1Z`0t!7̘)̍e@NmB@K͍eo݉jKkЋ#ؠBwx X + > 2?q{d_!&eH3[_! U<rU_P}؊G +ltxr̓$l6I"-^RN@~ƤDu J2yO)-+:J}ᔜ_cH' /S|+:^T@s //4tyLE˓qLC-.C1+:\1ηh +|vurH +bjxy@t娤6!LC9 +pLEx(Uކ17 8pHA:%>!h^>5Z<登?-c5߱y4A.{جq?"b>8ZNPмQD2-+4;+_[E{jZ[u,2Ћa'/v"لi M apIef0j9tíT*^y1ZRM= bqx'5ʤj3f^$句@c`Zk̬F3ђ0{ "1x#5$ʤX>>Ԩ&:Y@'L"Ux'5ʤFD^4T9=:;!P!\hE d9Q"1x#5$ʤFE+LqZH I 2q*DSvA K:жlˢl&Zzye +ЁId~6V?i"1x#5$ʤ}&/fʞ 1"1x'5ʤFEh?r1 "1x'5g<]fFѭL$oD bQ^=fJKOc[wRC|F}9|$)/N\$句@btZ57K"Os1h7H}<ZhQMXcizg?eԈ;!PtMinA)dX"1x#5$ݹYjJFѧ=/;!P&5j{YhэA"Ś0д mgDVX;8c)bXag>0eR#h^acA$句@(h_aU@Hf;!PǮ6f΄=i!>h3EOkL"bb΋3$iBI #Jg3E(h 7RC|ƂA^| wRC| RA L[U{8kgdm(ֳBR9X1D H 10ֳȽ A#句@HB;f5ȸpNF I 00 +Ɋ_D>$bFjHI eg+>qv݊;i&LR$n~6ʻTL"93yr7F H 2csZ!1 + +%"1x'5ʝ Yjt$L[;8eRXCz6a +5t'%t(/F'ZAj&U!83!VzF I  +jLQ5NfI &XL/g3Ԑ(cbݭ 5;>s^ >a `pÏ3o,~g9EY꽢- +K0E cp(,'`Qx; +Oܣ9H z)^p9I z/)8s ~Rx('=U +K0J 0w)0) L0)]k<ʿă_[7ClD p%,Y7 odqp%NGT+B|16W&1O O_VkPxmIv2<˷/ޞnS8L +endstream +endobj +591 0 obj +<< /BBox [ 0 0 270.72 163.62 ] /Filter /FlateDecode /FormType 1 /PTEX.FileName (./figures/main_eval/plot1_em_f1.pdf) /PTEX.InfoDict 695 0 R /PTEX.PageNumber 1 /Resources << /ExtGState << /A1 << /CA 0 /Type /ExtGState /ca 1 >> /A2 << /CA 1 /Type /ExtGState /ca 1 >> >> /Font << /F1 696 0 R >> /Pattern << /H1 697 0 R /H2 698 0 R >> /ProcSet [ /PDF /Text /ImageB /ImageC /ImageI ] /Shading << >> /XObject << >> >> /Subtype /Form /Type /XObject /Length 1520 >> +stream +xXnGW%yа{{G9prsYAʈߧf2b;XthwOYmq߰yύ!O>7 ۪qltu;um7ыbϚ G+\>@.fevbEq٨fGD3j6Lkȹ| 6'j/m*r&+1H$~af?_=<\ݢ>w\ڼAN㝝OmV%ղ4*ٕ/JtiHS\GNV]qLqkZA)%eL GLHr@1ĸ`b1fTv>Smcd?Pr\i]T ɯ Fբ6ݏ՛dB)p[n_h+$=͒ &< gTOT:ޑ?ɀ,6h!~0N찹 +S+ +ZY=1 ++ Ē⦆kS[7@}Ȕ'DB1jցؒkmOqnG۽OKDb8zR'Aa'=.WRR}g$z\UU Hư#;w](YL(yT]Uޱ@s{?;Vm*b+:qms5G\Q*Jd<\O4eA EŌLiSj+x[찋B\ktFY"9G# _6"Q8BJKػ*^5e c4#VqD`̧xgL 0Ո[+EUAq8.ٺj A|.{h-^Bݺ:R֐ +"e.zF*h;\-˲\ĈCiMnWWeWs'!Uj#Q Y!)Y|}qenWl&`Zp + +[|6#v]f]IW +endstream +endobj +592 0 obj +<< /BBox [ 0 0 224.92906 163.62 ] /Filter /FlateDecode /FormType 1 /PTEX.FileName (./figures/main_eval/plot2_latency.pdf) /PTEX.InfoDict 699 0 R /PTEX.PageNumber 1 /Resources << /ExtGState << /A1 << /CA 0 /Type /ExtGState /ca 1 >> /A2 << /CA 1 /Type /ExtGState /ca 1 >> >> /Font << /F1 700 0 R >> /Pattern << /H1 701 0 R /H2 702 0 R >> /ProcSet [ /PDF /Text /ImageB /ImageC /ImageI ] /Shading << >> /XObject << >> >> /Subtype /Form /Type /XObject /Length 1224 >> +stream +xWMo7W(Lqrx"h:CӃʪ Am#A}w%R( jX|ٷw7wf6n {|{|>Y,jm+\|,W%?5slƈT2Q6)0J]9^a44p//S=i])q>4Ӿ&ԟӦvmObnfߓ%rE$[%"lra&?^?/nn^ngyq;G_=JT'9eĕL) F]bh<艞b{c5[ =ѳ^ hjzc 5E#|}`D^za0=m2##u)UAov3-noCo:mpa=0}'vD#Kw- ImAsޗݵsIV33wW}.g)ʫթ >ҋ +3*Lh]"HF;uq 3F7i(K-_]JQL9MFV^[4ubt^D%qIMfڋk%C LT WKd â^48F9 t$P 9'H|D1wZwCq~Z,pyy]F΅L>yDm(kLuoQXu>:ϰ 2/>҄˒clOa*Bhu mTӭ2]qeZ):3^  +R@uE-:fzHe@V('fV51 .RPKr c>EA)2,h'wﮯ·qc=xUD86ڢ{zn߹]uRrPW:V;(c8@}#2ldEknӱsv8{Z_V~Op\~U0D@=jQfC^WqJ9ϻv+E-kSG ,{N4Y<ǛSs >qJ +endstream +endobj +593 0 obj +<< /BBox [ 0 0 504 432 ] /Filter /FlateDecode /FormType 1 /PTEX.FileName (./figures/main_eval/disk_cache_latency.pdf) /PTEX.InfoDict 703 0 R /PTEX.PageNumber 1 /Resources << /ExtGState << /A1 << /CA 0 /Type /ExtGState /ca 1 >> /A2 << /CA 1 /Type /ExtGState /ca 1 >> /A3 << /CA 1 /Type /ExtGState /ca 0 >> /A4 << /CA .8 /Type /ExtGState /ca .8 >> >> /Font << /F1 704 0 R >> /Pattern << >> /ProcSet [ /PDF /Text /ImageB /ImageC /ImageI ] /Shading << >> /XObject << /M0 705 0 R /M1 706 0 R /M2 707 0 R /M3 708 0 R >> >> /Subtype /Form /Type /XObject /Length 4006 >> +stream +x\Mϯ`.{ۛW_>dM o@s{}kOg ?n_eK)Fec`cI#%)r +ڄ /TYԽ0W% ,9D6Q_JGOOn7He)$>*?0ܐJ>'ȑn;vFJ%kBܩd̀`@yNxCRdEܫ> "¨>1Ⱋ +!缱j7FXc|7B|9<۷_x&For}3~:ń#+@vj߭B"^ 1EzqV\sKtCi#/pE;'=p> =NV=){F~y5kq .T!Z"SNEl-^L Ç2v*kP!s|BnMTy05~L :(EZV*My2N%,@֤[Qscv[rLv{9N%,3 ߙhۤ`3Rc[@N>ni69eԦSv8P9I'i2p;DF "Xk!d7 '$&5xIl6)K$0`c?++g}JӇڋ&pP kAM(AIÅuBjɉSFwvSGto<-qT"q:yqMiɍ='Fz*ؽ '@$JEک;3%@t@FiNU1LGjJ!wƂ ?nUpˌ($wy +w$LqC+MEAh;?e,Ÿ8b!΍*L(FrebNQ8U`U۹r4}CtpBF׹v4mCto*GǑshZ:xTQ]=~4xGGz/ Ast:JtZrF+a분6?dLL."VތIՒet$TFCet# S1?ɥPms +"FQMP ם)~$E2*8T7!xa70MsͥGm#*Qk;h<T&h8PsPQrƣ@SV&[RqfKcJu7dlVxZkFu~;w^`yf QDf"q0, Cs̍Em@Os5|f_ج!α@C x١Y!6B% na*?ˇ%&<9`NjNm%*|E"Z%T`A0[bO.&u|5(JVr@Tn]Xk_Ԓ$#J4PyOD`頧/ӣ-'!/g8"~>p;'ln$ܽn~:=m<#z:g%\F1Om9NB? [[O9 qgvz3B5 ~H{ioc4@b4{ZG# ƀ# # րKسٛCrQh <{Ffpn}ZsyذhoOr 2Ӂ[ i1:C mj +XE(]7lJcvq +‰u(x`t+Uȯ27^4WgćNyGy`"{yF|*v<>7@d7Ja\bzO BDN<-Qp^Ѧ<-Qw`;"Uqv/Y|NW;ji;تK{+m/x ydDD..!䩹l/[@6uDݣ#FQ{$a&nYmuưJM1}q\+2Q,xxj`H`5*sʹf\J9JW3j +Ɣm<4h<.< Hr|u /WWݐ9P[P#K;Q 5BjP#Xjiii@Դ\^hzb!Q̷ZX6edNUñ|QcQă@l>Ll-_n jm]o蠚_Qt!=^ AȘͧեg:/`W| nQǩ6K9hFy!QUłEXrXsE gRbPO +R,#  %w҇YX \WO:񩓆J:Y^ʅByz-i9N 3+EV] HPtq|T|w(n pƲl{(t~J":b1|,xN3rEI1o5g 0H@sgΡFdғK8/= +endstream +endobj +594 0 obj +<< /BBox [ 0 0 502.1103 119.88684 ] /Filter /FlateDecode /FormType 1 /PTEX.FileName (./figures/main_eval/bottleneck_breakdown.pdf) /PTEX.InfoDict 709 0 R /PTEX.PageNumber 1 /Resources << /ExtGState << /A1 << /CA 0 /Type /ExtGState /ca 1 >> /A2 << /CA 1 /Type /ExtGState /ca 1 >> /A3 << /CA .8 /Type /ExtGState /ca .8 >> >> /Font << /F1 710 0 R >> /Pattern << /H1 711 0 R /H2 712 0 R /H3 713 0 R >> /ProcSet [ /PDF /Text /ImageB /ImageC /ImageI ] /Shading << >> /XObject << >> >> /Subtype /Form /Type /XObject /Length 1176 >> +stream +xWMo7WR@B[j-vRFڎR{ؑ4rZ-Wf `<>z|{\b6<];|n$;|H'R8Ҋ 9<.GbZ4~|'[1{0k,;"䓋Z{k) kevE 3*Q`됡"8#u-hI>8I|Gs1e~E{K"(-W6FM/9x5-QEDf6y q4qb:j;.Y#hױBnwɣGlY#i#;:eޏ xrmgbrW! 8b@"$ +7بdO=DJ>%L#6r2]6SAD-Rd9,חөSO3'4[jԟuϗXq]q6YYc#+ivDe[Q9x%XCqcXVb.࠘cCY[SeJJ;qu.Ϲ|M:epp.:&hgiyq"Ǻt&a)ЯZcaZj +ʲA]i%=c:9E-8J[ +0dJ@sZX,fiȧ]PF"gX_88?B +qB$%A`5hg6lUY"T!-‡9.J'#*Zz Lʴ;VeΊ _!i+B4LE]-R] +C#[WM'l87Ms q TƺryAJ킞bjF'xD l۾@Y.(aGe{z#ț)vGllDƓr(Nl\n8OԟX esWgcɐ1INyG+D;;߿3[t3{/ )'A\6_ +c+'UH7Ea[N&3J}d:Xrı'g=S~X COSsM%\^ޮެJ/?|zh, 2lˡ=%`.҈`:Q%8C~^58 +K^ 5׷.m4K*qH^w_X +endstream +endobj +595 0 obj +<< /BaseFont /PNRYLF+LinBiolinumT /Encoding 714 0 R /FirstChar 27 /FontDescriptor 715 0 R /LastChar 122 /Subtype /Type1 /ToUnicode 716 0 R /Type /Font /Widths 717 0 R >> +endobj +596 0 obj +<< /BaseFont /TCFTCN+LinLibertineTI /Encoding 718 0 R /FirstChar 174 /FontDescriptor 559 0 R /LastChar 174 /Subtype /Type1 /ToUnicode 719 0 R /Type /Font /Widths 720 0 R >> +endobj +597 0 obj +<< /Filter /FlateDecode /Length1 1756 /Length2 103156 /Length3 0 /Length 103199 >> +stream +xڤpݶ-N:m۶:~c۶:NǶmvn>[[5ysyɈULvtL܄FGg [0!==+ 5?waNv‰8 :۹223rppy`b'ΑPΈP`k u!p03w(wFhAhnamaoO(IOhgmMox{-_ t58tq"3%4s5#wUL N'P% ]-"tp6G+1ښ* \}{?‰9)͝:z'Sz[3տ%9,Eֿ#ߑw;r]?+L?W-J0233%1LDO` M, f0 QzMx3tvp'agdd"d1/ AE]HYS?|/;#'3! 3'!;!#+ϿS*Z%1W!?*7qg5!߾"w&y;?]%+/C21=gϖon+/Ƈ%7}+ x(ݴ?xi0hbmToS6OXX{7 u ?I9K_c$n0Qp6ju vNH[V''BH'ۙP?}0t4_.:Zpì,[w<}s; &*6F5;F$% +.ږ&iTa0uA{c@2wlev'2z$ qy1Z%odA$HSvf/<DžɹͧaL`RHes+z ؔ_,Vn )ǗbW̯y2ϡs]Ev"Ff\MXޙqMlkx(etkqbu%Ҵ](>#TFfd\#Uڧۘj5;ci/Մc/ŵ!P?\~Q0d/׎To%YyOJ^!ñ|IRbDsN0^oj8k:$m'בřJQ==;( < +kzD&ǃBdZIr`uYŞWdA2۶҅ysL_HUhj[ӽq 7 +|RbxgbٵcwhT۝o&ͩ/YqxlUR=tz"Y ٰ|AAAWi$Iu"/9^mR l2ÙZ^2AD . @ǜޒl;,Z32<_k'"5)wJA':ǯ| + AUݑJQ;9^*h@4 =qC$O`WC苌Fh?;c1L 'kƄMhDGW` +!!riF '$ɏ+8fԘzRt}lG}k $3Ζl2$(N'1#5‹Oy r?"ȗ6ۏhDVZ?Fq4~6w@iq1ǘJhE2ո/dz}qm-\\&?(Q|ؠ>6tpd@1)vŖϚE3^2Z/DA+xw-1˫?E-!\;1MI\4,K?"̃馣r9Y=ܼ dO '^^`"DӍ 0/!i'voXN`|?Sm3IJG䟵K椖Yydrn#qV-ٛ֞)< +$+l&.VWN,j͐Hw0*㩾Oe)^5aҀ?@-dqHՈ'akmS7aO0y$۲QMPktVlmG9pvT+ 9a=xeDSpx^4/SIv$G$c%{Tt$@]$ QC7:,>,=5h0i PWN#ܟaJ;Cp5 e*W?*{%,y,^A"0?5+F:Q/9E/Ų(4 XqM ;~bzǃYD(MEbOLtU~OzG[RϘ6 t=N虧Zq7X~6p)y laYݢܐKPW%5!i%1/ME;DUW jpۥ[ ~9d&pFuJrg_R1$D(aonn/:C`gHOv@MDkFVen;;f["hMT>lVlQʰR`.9)/n.'jD"='_ʷJvԨ-)^NXkf!w&/nXؒf3}n!{lO PFj[ +ܟ$΀fj424:%ՠb5~>_h50 +RV{#S ga^[?u|̟b.Uše7 0`b =i#fH\c#퇂~*8gBۅ|[>!uZ@g0]D{ 9VKޕph#56= EOӘD7hL7!:}-֑IpC ףuU q3L+` +}:Y_ݦJvQI%^Z5Ro R PDٽ"r#]U8AdE\R8ˈ[f/SPUtBX)'N((.;I0O/M]F]fbO>|`¯;4BO{.5oezɘ΄}9\<}wN@:;gX|eT۔lDqfemxupcSNR !n&7DJbh4˖ADXnB߲M(X2晙2pӷLau&+9$!w б W(*BgDQ\64 8.b~q{eT> fljsn Y?f W\p\YIN֪B2iqd1ȝ# $jW`aIfo<#/odJO`L|DĴr~2$b84 \ԩ>F6vof̧{3 +A +}z z,4%"ފ9*)u9|T!ēL;D^'d [r^ ֒R=LlGp=)nHiiGE Dw ėwM3rIt>kk 8ц-}2wr:pܛf'<G#w;溾!TJȃjq)#"5O"]7y[9NScCc|k}%:ڷ4t50ɮ{O#h RR +R?qfn|9$MǻnK}ҢkRnTja?O%엇Rh\t?s#K_u|}( #j/tg䍱2(aMwySӳ]w|%Ŕ4MFrs냑_s 6".fJigRxC⭾Aks$X"wtv705.u@PQi Q;"*p?xf1]3"0RFjJ?RmFhѠ_9fb E?cYE47-ﻏMjMGQ6#jg|қegC?W/@Zn`V1lkGG s;i(;|jfռY ̖@X^F썳!S?:ʹH~O$uy yX)bnRjRH*ti^7 zF*Ec;)!VEr9jCm S,sҜ`t"LƆs)_b 7pRB bI(W38Z^3j#~&՟0w00}tF,HP\) >X^Pϥg$đ&㚒NoEvVFvzM[ d6'Pg,g=޴/k!*%In6=7#T}Q.~@)iJp;ڲIOtlrALEoZl +Ľ^TGغ)NCnͪ}3hKqVhTgp$6Kȣ"=.d[ X60Uµ帔iu`ubUw[NCl:>MZ¸TSx2QJtvn"~4Ƣ54~*i7ɡOlYfj5p tc@׃hVE/wFNY4=)kr2 IeGs@-srK0mj$EL[C]7EM"8K1 'F%tz${IŒi{ZIOd:]k0}ئ>\?sq3.+bWiMVZꃺ=[zFt_~1} +_(:҂^olA. JeQ)jG5r\b7\yY5D:ŵbX3UrRp.xdoW!$gs+{w 1f^p3e0t(2Sgva]yܗOoZP.{՜#E֩91nG͡J*6'ft'g]HO^<-*&O$SԬFՈ>s]/YOH*d1XEiD8yYٞ}w`<):U{ˡY"M㚣B += r4ӅׄyfWb7]|+ k/n8~D-#+[kz7ii0I.=KW9{RJ, 8S)>#x&j=w z|ih F_ p.2?B8Pla j2`]U(y ~rO*Xj4u.5P0: +AVS}/mh|^{)8j$VP }^&Mh)r=v5y?E[b'Wg GRGg (%LLJEޝqxlY3.dK݂m3h7Pvm2(¦.=R)k;O~'8$+ohi. U+wE0Z6ZB#)ZesU&`)@dG(J[Vؽ{ vp &~Q>`La0a5i/DO/VYTQz u >oq-[l&` "%Pm\JB2ZՒ1DGH:V#28m|T%ŋ2E⾂xW@eȆn[3Bmfg5cj|ǭ n+h !íZt=iztoR f$$c!$+"7J ܬ֏#,eL-qOL5[OȽW=F$aBk,`[|۸ԑohZG{K||˽m-C[}*GP^xg5*c : $i[lC %aӁ{'%Lk֔? }j^M(o,|+Em4Zeњ) ͜]'EjLMWB5)=kcjPw=C S~ݱqFnՇhI}qIs+ۇºF)B'.< +^ݲi<ܥ gڛsÌ(8uk " *().猪; ,@-8ڪv睊e؂Bːvt LoA5 7|;˟k@H\=o IKK8\r];zZ=31LA][v +iˆyYeo>m$4sBRz+s˔vB )";$M;[w +|@?r &}%b ++AL#TZ5zuTKQ,}J-&PNNtC!ku¨&60isl>^[DgZW@dOZnɺԤ5Mװab6:`/,QQ曑g\;$$h"i.62#c; G퀼=@@\&i-| +ŘDYriO]d +Lŷ1v^XX'c-&by妿еE?*R"x 0C7M"߇Ta4H,"YqŘezdt'nHgN8s_ʝ9 B +5,( +Q5ofjV,LI׿ 64=b3@'{'Nt_cI&Q3ݰꮘEfE吆ɫ@Q{Yy0`}hPǕW>OPeD5Tގ(Ɔ(=P(H_0},I!5}ND[ D cFJA d-}J}1Bf@8T +m:g?Q[olE;Q?eY+BEN~ +nAT;G,tf!) z6~2*"O=T}po _D*]΢-V_]ǚwu賟񞰎A'w%>@_T{Z(l픶]g{֙ȁ3 +"k0ֽ֠j0 CI%d˯9^] +5@#Ԡ0-+)>+XU9=‹$8}+^ӋSd`[}KΙaT1@h8|М,>jgN6rۀ"x{'e[D軥@׬t2TI Za#%_ "%3MMƵðTF^Bc[gI\ATX`o1#FB/ e xnVSz0m:?t8.Sft(gR}ٓHp`mH|WR`U Ȉj!- r<2,Tb-;:˾eĢv\y]qKWkJb}v]؃i7:%I#w/d`Fs(?x 2;4k&x 3|̓σ\PZ%f!Sv'y6,B=>5 TvpѷXpǓ*͟g 8/4)i +]Cy%mDJY;O1tgA3jcm~n ,QGB1['V~pei0UEh힃ە@9sBoj3dz +NA)B9\?8%>N?ȴi,?FOd䀐Eoɻ|zVjQ,e!'@|xr4=ڷ}AJKpO\H&j݊"BsZ%qc7P'\>QL)=ŒZieљDǫGկ.( J'tռ=203ˇ$7uijҭ4 Үqw5yPl +IhMZs8:fh kSuC[XlscU%4 r0}^Jz\RK„ь⟶3=qAwzڑ:KHgj{O 34(옓ƬefʆdKEc[}U&#YVocdwN1#X3LR}F:kj)A)QCKpyS.PW_ e5gqT3EzkE= ?3 W3LO0#D^C+2z-IQVӶ+LS0[Ζ^zKژ=3m" zI[ Űǯ]z/ +DNCy]ӛ϶j~gP"A~=`^ R<mAVđ-Qdyy}~Q*"3ԔVڠL('V{DN0!_E&5pP̈Kb TXESDEd;XIeevpS Zv1]hyCBMoȿ]e 6n1SB+rUX"ryuA!B?iJx,zW^MM7X#SU[;&p +m^SPFbi .ꄁv +GB#i/)8sle]ipQV8j/v*FR1MU_V5y_KQ}tԬ_c0*Uk)tFϧ&& ɉ+GK>Y0*1a"?)H0׀X꟔ +̒8柀T?fgA0:kŃT%8瑣#`Ӭi3`*|8+/<jb{D@}* ZDO.UE's8b8>/<6:׌ZM-+mȏJ[[0Of2Gj9eFuGߧPk:H +j^F읮J鵌҉{S=hC7 }'u=DGiDKԂ8' J>s8 +@YV!Q00OuKs nDIbi_晭̈́UeeV6:IiZpI@k2,z6-7o+oi+IտJ܈oG-ƖtFm@v<uѱ"'%'4د߳zqs{B n/,|QDBbq^Ϸ8ҿg}Tؘtt+.֎|]FFu&>KR}ҝWkUDL&tA%߇ncI4ۗ鬙 z#cP7]D@2/z=DBul[yXl@Q?lc(hS +h&:~EdID=jęʶA[sdB@ Ȁs >&gu7;e֘ +T1iॖ +ETQt>'npeEp] +)A*G΄ǪcT*TnY|Y=@o?E@\9B*mbVsTy6#\+3+Eb3E*$,p8M#*eDvf Q1z)#dp*SΖ 7?-)o%u_wZLfW#qgCs1{;w4׊5)`CN=<$5]@坋Mv$h]ZuB}cpY  +d)Ƞ_; Z< yzE/=yY*j\~FPmHМ`䉜ٞ;o !7Keq]<ʃ*L'rY?ZJ8Dqwye &>spS_%yT >xBjOV{ېr5vUMH Lf`q6fȘAuVZJp̽6[G5C,`A)*O t]W`y's'̂@W>]KD^ćl4BfwQWeMz\ ^0L槒˂1ɷaJ|O\tK2tY~(3+}}j-5QjX[T/c?=/v6aw?=m:*uS0PvnvѬ\zamƝfLs=o.I+@1+]$yp҉zCwk87r۱w&ä =`9fVy׶MTX3JZѬ2Zًq~KWv{FLq2 <Tub%%‰F[>]#=ծOrkooSY"xĐ34D3P:t8A#tZ/O;9#|@XSz[y!T^9a oW#cHhlѻLe3sT~X5.]WYȡ J<;&/-}0 8{Zǰ=ۏQ# +?1hy< #aZ>N(5s.0Á[~ųw]&%V>^Jl˝g"!JzQXJ5p1rZTIY0M̼bCB1 FIi5G8+?#DϙpE Ή0iC7Q~Ƒ( *$?D΁㺨SJ:~c|Nw;țk|go?é}31fۭ +RHQ15-4e.)tаLtQ>N3ve?cx[3t +'?K1+ktdZ/x%FvO40 ^S@\lW^oL0hTYgcn0WOm!'swx *.f8htκz 0շ&? +Q(.Mjsbc?Ɠ~ǬXfጽJe[N*渗X?w0bj  yu@sø7g? /KhGLx*G+O M'r6Biؾ(6G1y8sfD'hf6!VҨtsJ2aD*!!+ + ^SUfq@Ҋ O+gw!Xf9ZHKZXZyA|E9ּ,7+C:jF^t긤tQUVt@*2drD|\/UzKoXZM. ‰ipu4ۭ ].=I%ϣr^tjlϊ:jOn߆i; +rӄN^}{\4F@<>9wrYKܰb1tHwa:7ktr}Brqeb:ŻRhY*dHkIUx&M]FaN|5o'W#3RNބ4mvc1Fl6 x8j%~MkRD@(sHJbz{y\Yi['!qgD1$X7VCܦ.U L2E]xyٺ/aN釥ǤȢrS@yy[^gI0I霁ɠnDɵkU mRcI?0byxR9Z~gNBUe3$Q>tAkZ fRve!H9(qjSz ?k5]q63=+<@v45iIJ+]p==\I,3FAdpR{c?@ƙxZ4ZqE}}[*vW ]m%Z(ԷOD|o ޘqBHK0<6 <gzOP|3~CHz)h#KrDVo'On K.  ɤ=ch f(&,(tpq~c] hZ ŬF6[vħj}#oJGʔQ(uq'շܲ<~Gtv!| _aQ F=DgZ]$st6߰)b\pJ+xG2iŕWet :r̴"jqM2^u;fJE9MGy\TRiXm߰?D.MH*#ew4CM jo<ƞd[ΑV3sg +U_J]=¤FC$Q{IC8ŘVO '3j{e +|0ܷ1ՎQAzho`.<U$zK\4J'qQmoy]E 'bgzKͬ#M q(LDxðVNSA)Z+ю\n|{ ܄\v~ bOMb5OunO@B`u9U$+r["n qٌ_SJ ضm۶۶m۶m۶mzx& :Ӻd H3t5z3?ƣ\MHq/ A,Uz UӶpZP:Wɇf!TRZ +Bpu鈰{^, 1}4|fj`P sڎbr"u=9,_4AZj7Wy&!Zw=~H*g]c69(+BM¬jH:YHu/67an!!^ pyu5εk9H[x;`Tr pZteS&GJeB¸TGd<*gHrv}Skeļ=Q( ߉] CjuRzh[7bz%]YǮ7:W@F+k,,$ˁvWD7`Wsyǣ{©6$9+蔆0ļ^3x9W#H)<θ%! Os#?ENVqa-ߔ@*3ֵd{zX vu-2LVXqJ.{9^1hý sq; O +)G\{-5!]e_hjS:XTu2∻@I+%i[ y+MJ$8* 0 K Q~ܧ/ub.eGVW莒XPZNg,gL98KON$ $?Z"Y4%w8t$?84=;d;V|*W]&W2~#bm`G6nDZcC`2i|< LMN,Ubc8pϟM=!ȷz>sˏ`9eK-\7۰ѳ,z|=6y71͑HYA̐;;^WlE~.Oi 1uNSً #_p_WzNf$A g(&ĈoJ7n@ sB擃BSEz6]8P pCQNqLOqs+noX&Qfj K;Ў(w%aMIy7J{LU@,hu~! Q^7' }cx2+wU;64P-GWR%BW=)9}XOs|Iճ,g%$ FH:j;Ir-`uߎViή~D4EGq#SZ_h|ZW2ˠB;dyH]:Y|V#R6 v3xq?G\̹?gs6|EJlm-P.;M_gqDXhoy<`4a8Φ^wLr= OM-oヲZ-N^{-S)ŧZW|sPԉO,x*G5I{U:BN?lP' C{p)2yigJ9}tr? Q +l8*?VqW%Bp:-DQ؍ jw[Ht\1UԣcɍEQ:J:O&7e^"QUh hԅFzČӧ ?Yu˃iGZc%u"nPx< Ʒ")=m_sH>sB] 7{Jf;@&"㜄YŵsxGB[EC*SFY&C4b +X Xīg&O@m(O?< UgROE( \ڏ|~s|'4/g}նoٰ',1bݮ5ƕ&=2_LlcWDM)/RfUQ9O)?SNj;}a<ʄ,ٕ0c%5Td $pAoady- + &ǺQk,=f^b_+ jEޮ[ !y"0pAјv JBH1뗧|Atv@W@|xc̹ ,M8vOIlj '$2F׉$`K[ZO' +,*WQjzP+*x)Vkqs/95]i 5=s4A<Xv{qMc +PVpg a)o Yyt3_.ʣ|M9.haKدYqo7NXQHQKg}%Ѻ%-@?͚~'n(]m PDjuvk'syնנS642vma).U "SAjT)'U0xrjZs{Ppf^cڙwMz& +(g[}4r.,s"2'^@4=jMfæ Mx(> kTtHpQx])̺k?Y;GFZSZPѡx1>2)HVQ[HJ5zw:W?#zs}175k_x|Xw0ə9l$^%-˼u%)[hjN]Sa6UW؟(Y41aY?.ưsG{RTob.xzR3<׶ XK &m٦zhb)4cڜ)9^5 XGw2v.A-!a=\CXxv}?~h2mot hwޓ"Q8zK{ Hd)LtФ{{3ɑ<"azi.OĥΨv7r{>pd0W S A~3{+gh^匡z| ꥩ_znG+т~=٬RU0_˹cZȟ2oA5օȼWKbKuΊxEdO漘T_8<^YFXկb7SW=͗H&hWOMUr4+T6i~K:j.(a0TkJ%KI@%-mG2#PQYNɽ]¯%RC1|&F{]:?Hr>q/"%f +5E +>,&P'hӤVR1 ٕF]^=ԗ Q6k9O}~ ETze0rf,z,ZBQthqN7aLHH 拏z7mߛ5ɣ~gͫ>1wuV*A8:4 e4i#H(BlqO o)n] &eLLnrTg|Jܕ;BPV[$~QUc,qBZ˘ +rFDQ3J mD ک0Wt&^0}l[ٖSGkI%DzX< v^G'_;mX}&:"^#CEQbom/F.Λڹæu? ۆ=Eu7=jaq_8 +vZ80J IKy^hY"Ve-jB %`.d|wR:*(4{߬&=@Т\ၑ~۩]O2B&Lm<3V|4kXf>H/h*5,sZuy;XnHK[Sgu2yx5sM:j76?x+3I`:'㫤B'L>h&5#I>j6+TYII|l;nt# +3| 7ڈaVzdl@Y( 9AhJQ5rr@Jȷ2L/Wlƒ(7~;7J3BE(.YULL4wڇ3z7pz#Wc!< 8O%pll5ti` +F@ .}37&lsĊdzu[<Ll|CkpE,N!5si}HfF 9l?jS<^]% <[?:GUo})[_1 \~-:YY W ]ؤ}Fa,80:o ˔z/ВGq;-";P:e5趖eة6/ލUep3,8~ߴZjq^jam,ONMˏJJN~TܔhcMLʖ]RsyA{iop!]Iߜ@c섬 ۄm})d§+r?;Kmk 18wLUï񔗖]ͬv`0 gp$ȆF&,pVp {,_Rk2clUl2rgN$/E|rC۰z1Ԓh? go<<(/hP?ֻjei)֫S1M)ݹ)!_>UM t=$' q ] v35+MfDnX{69t،{g@5{Ƚ@vV}hZ-T.Y( )Ai{Nx4AHlv_&56f8 +lKwWP4Iq^4'ZQl"eHGA}_n$!y_87\`q6 9=@C3BvKq +ɭ_QIe3 +İ- %eNosemofM{=\jO/W= +RJdRxq@X!,yI+:>mfrwHV:4ey~aE)ɝhn!A,cg0OTad>mʋ] &=6VhD][Щ0n?Uܼ 55;滧Pj)w8vUA5ZM:[h[ی +0iP w?vKYt`?CĒ!8eϓUoE]n&xE iwژ1?5./6H +Ru]eZ͚6;,T2\s=xYi]3CGd_&lpK @zOfC"{I2;\b (UpgiV ȑZ>=:K4G·%$ WrW$8bI#g̀ϻ23`|A9]LkV!f9;@n',wWVTP;4y_ IV deZG]_V +EYV)U1:DߘsυglU`Ԗ%o6(w֟`A=RyLM5SzL{6ͮsY7]y_[mMXw;Aqy.sAmn܊*o.ݤFimP|:Smo&K6u'!19;kDg0ލhKDO„!> 3rf;#^O)4HEQDk/nޚC/# ]1O2L1OoR4*B!<<e<}ZP Wˢ@U>Kggo I`#ϽV]GuR:#yK?MfAB\R !IUzƢ>ˇݍR?<&jĀ@s2D 炓ˍižyf*OlOÐ[= -+l; 37r<8u"rijXr0'L-a&BxցsoN~lYCW5O(ix. L$L)Xnka^<˄jVcJG`yS@sT._B,2%>PVNUv\+4p^v=xr?={;| ) 0IzajX"(2ɃP xlQSyr"^|˦%LZmYۅ .p^SvBRu)XSHxݨ1\j{C(ERj-QUbz_xC)HD #F&{lMzd<4qf0M;Hq[ A jE -C`ANeyңb +P䒋 נA SIBԒFp%4Uͩ9׋cfCK32S`{0RIQ2?W:jDo'N7CrFr't^Λ-PwW)QU3 r&^q%Ht*B4eo`|CnT?k|I:ꆨ*V B Ws^F42z6 햽vi `EGE>_ˍQ*n(~`w:Q e}PqpNd~B_Dz8r^B,kE2|OG 'a&x8,Q,|n<ʘysre8~F50xP)gf/GQ,ylVwA,N +A<)dL~6Y(/6ͨ"" '4^\7_OrYv3=H?f?C\ͣI]eA,]0` $7F;~AW > 4f@ݐ2Tx}}IP'8TQ9K]dH*\KRn=U#e脙ҊE:P[Șz䶘]W#w`o}eZg wwbT,E$OĸF\şq暸qb  ͖nσc}GŠ~st7qXOH)]Pw061ćXFU Y H)P]FDqg.Ш_dۡƆkQ^= VCNyGLpkM‚ձF jf9&s_}iI~3}0.3r7_njvAr|xvY@#L. _&:9,D 4Xs9^7r2y c S$~ӤG`ӞZedYOEMcÇe ٍ/M#HIE<ζ?ݤ`l& ࢄn[GJ$ LkGqeac<]x1~dύV֤9Q^b/[+poMb*! 9{_HĈ#fPÿZR{U%G:XցONTx"M d{F0W&ZGܰvPv0^-! +}3K?x{HeGi N`Z(%yks}4q )60AM(S^yh{֕X{}nlPèm1mdzO5/#M*=Aq=bMQ~>56!HT>LU(|L/WKM+õ@hc.Wh[Gep\=`榻 Td"Q5cgxts%^Z +lMeYW/ӮcUHTPl~s04Q(ͮk!؍7`˵Qζp)DzywȅAM 7 _4݋` 4b)aL ';kk.p0YgueHUыT̎֒7z \%R=v cЭ@쯱+d +Co$t{rJA-sYBP6T{h[%,f&ЅeJcLd{MƐ,k<6]>{vAdDqStCsM.Xv4f:Gg>7[3Dl>A#Bs}7,q$tf |lYF\Jh~6[ ŽC,ȹ4]H,ppXBX_ gU:,Kqh +=_ 7F#[/wID˖5򦃭01i%\ٴ ci{YɉvYe|az '{m0n|HX^<J7 |U6jGޘ7uF*n3rNsXaxʼA'̠INԕIE!"VW~˞'0FfbW#0c ZPPȳ='f7 +lYjY"$uYy^\6z4_ ML3fXJQ M `~pF-—z U|uf%!Vx\h%@ġ)T`ͣ}}}׏_)Z2겔NX2)Zu~ݫfE|Dw#Cv/?[. bq[픜NhtRͼ'HAZϘLc7{VĽ<V>&"<ӯ6{~3p7>;YM"*Mm/)$q{ᚾQ#W0ٗc$Ę4;LwJ2V'™@Kx=ڂNxX 4>DEu?1[ȀjlM[7|G厡"P66([7_շ-cZV]#Q d +ӡi~R6'eѽAΪlSYJJMjkm]L$I]L^(hy` fO߰Nwk>Pn*ty ^4:gI:*x*zj>Xs$jl, +2Nx&UFQX^ZP:fxS!V0~j]VaTR`e-w@ 'M~)(xT9 ;&j& Hi!T,:~F0*|q~ O2)'p~yRPzaq7R`Gckl*@9f(8h*Y~+ֱ<X1{|[xY=:8M5"D+Q̄N\lxUĜGCe8.1 K +30Ƨ>LۜGw) 0s +9# b.c]0Mn~% F<[a.3+E2u>iPU"޵{t冉9m"d2p3~{$KcΫɼ""c{y\n0pDXntS[н; c7,|qxEXn>WWJվ3%ZodǬ.PKo.fp4- +l{`'uYѼlEEHc/ +Ki,uׂ 8SZ<6~6sN-5Y!Σ_z\Byrsbd gQrWrΟfFtŕ ϡ#Efؒ-b @8M/fVDk+6_k @J6"Q 1J;2'@ƛ"4vsoVxO6rF A{+.@a!4nUIw8yr4ŁqxyrG(\kR:g 6,ѡ%V*sWEin ̊ Jh1t簢tH8w=gLjB#'3ݨɆS;Ɖ(f!/eA?-[IÆpd!臡)ҳϟ`#Yc82O~UTN- #a"Y6/k󔵢'iDE$)(@vN2^RxxMݷC/rI b.= ~es|QX9prWLVzzBe9C/raMo\֗lG+bheMiQ۟,3ӭ1IRegzhY,Lh!FMkߡx<>ԊDRLr1EʁuON%7UP??=^V5ĖVG~ mSqʣIo}iMGGTx@ܵ~޷RsrED_ + % *Ȇ4i췳Ť,Wɭ&cuWlYP/}6A +}zжT"IvK]/7;<$'2W<.{CwbU.3:*jVU:~N1A +# <͛;,P&zW#~CtrRD641#HӶJЃ!UoѯPejTG?:''x,aȑVX| +H4n}*gh$:oH6X&%E 8|lʇ8%8̫}(A} a{e_}aHO5@NHc)qSěM'qiuط{ש7O7FCĖZCzr.M3aH D>{οl=H*C^2ز. ~3 fEӽ FZ/VG; 3/R;Du&~  eYi0*Vϋ[FB2+0K~^/ߚXF:|TmyZ8Z}bCr yb\&< p d#e +JD.0AG2vRm7m20 FW$LR&z`Pel‡ n;Q']&{xP"mT40,%!tDnD/T:{߶s V\t088ɓo:sk'ȋ:>"C +l]Tu>PYj}QDN$O/aOћxKdK7LT~]`iIɹ {fr' 3SAh1 +7 +ꐟ\yJϱgVXЋ` =G~Y!.%߄{GhY52D+U5T3*RvwpiP`9n-V{B!I 85aD-S:+/kP,|6ɾ +ZxRB&ÑoX9Wp˂w@rW牪m-60;PxE{(IjD*Wm⨁Yq4wfr5$*{eUp%糏 ɽxK«/yT?|6X 1Qwsj:wM?"7EJ)>xq,H @n\wB’ZA1%{o`CYk%rS oև(&4 *d)uyCnoWsp2 cJo\~# 7JEqLy"GxXf 7 2ruX2: | Hs'\ + A]e.["нk5W~k̪7QL1=6^Q bcJ4OiG7A <4'9Kr­= 븾Y*b95k) eI*zy7Ró~ؐ;}ysѷŏe ٬{IP(_etiO02[<ؕn1iJ퐆^5e}$αkVK65{372ZG8;} mLy|T10qP*puL쪌V!v<~1`eKZ38;;2` A@_*m^ǣ*iW- 06(6&Xai TdREқo-}@MY]9uU-$0`77Tȫۥ< Mvx_=I\8XGV5uEf#ňؤe6HW +ž7S4>t?r6ߩvs'XX%iO5m~ϝm IKco^d[?nA@_H- V~j^n Wcш+lPSjW;} +ȀA68 {GOeZK֋Qr0> +bR։fkp}Ÿ&Rkx L `^@~`me5!$vBFw; 컅npIG%z^6k%_)ĸۗZ;&S } +$QIÉS| +m!#qn?UO:|Au;iؼY@xm6U+G5hOeCs^ȟ'/WMk9 0M}>~YruNh iZ\&R8΅ý(V' &uaH"BLTHiN -t.jZM.لS/,4=Q0YHfs0CO8S#gC9OórCoڋ43J}6zS'AS~ \ήە)d3=MҚMky:;I9N o?{V[;M`6Ct"E.ePPDK:v@~HޛRiʧ+ K!rCoD&=\4 *خ+غlZ??720>̪Y=PrevTfe !o$*VY@z̷Q|k; 3~[%;.cPAD1s%NEtmV*!lXkjĝN!ei:()8IJȋ"٠Bϩ868T{:-CAq)J$ dhEICNA9?>iB!KN!F3W%cñ`2,5BjMᰨ#ڲ`P^L\~MiIepPKԍe+bYވDݮGY2UϮ"֖%KG+ L^H7@ap}fQ`9T%~s{oisN|5GRI?a |]5.Cmaw WE:+m>{o'6Iw=Nޜ/H5lY.Р˭V2ޟC[&ڹuVqa0NCNZX.Ʋu,|`$d>YV]QRK+YEkR|KZdю My{#SU/`8[.чڻ)Pͣei0Œoޮ!Fr޳P_vu V1+%TXs`#'auVMs#O ,,8-BdPd|XOjl +Y7hWfG[sr< ƣHGֵ +%iWp:Vp_";` ɍ|>E|YdWLa^ +K! >]49 +I;2*(SݞvA*A8H5Fo/y+vT#n e +-'vo|P@5xs-V Um_tߢ{)ݹ(AlFCײt'f=M$#!yNŏ/N(?ΩW ۶m۶mxm۶m۶m۶曫IUyגKEQ ?ǜ@VEˏ` Cw5(M._rq}!CB!ש|s@mb^XyBvC2[~̑ 7_;-馪xᤍ-maçյN D{B_¯mʡgnbnj,KV,%P' T% ¤76 K9԰eaa;9};&spLqWf5`hhV!=IW9ŦYQګ)eԱa,c-1y뫽8a>LV'yߎ }fRam_5֚%CĿoRY[?sǓx]C걹B<;yLRTA]0m#컞 lWPy|V; 3B˳1W܌J pUFΛM Gj7`%wi,”ܟA]"PK>n08@7(XU~*,~ 7nB?z,hͰFF]EH [,NH +6=Lz! SX@۹B./A&(`:h fߕaB)hNtrB +ѶN8iiG0J7Hb2fE-@ exrdUdNp @hH[&,8zz~$9y9!dFGt/yr32B0T4xvQgNKK&}knجvVFOxỿ4XI'* 8;@+7НmҹkHŅ`@\(m.._@ϹeO9Ik RѪ3qW&b +%4Y2ظ}׵!:[4S`W=Qw?^] xHUKž.q⃟8"N[͸I 'ܪtSwU۪9Tz#gR&iOT;X`h\䝧bWxLjbq(}J}P<Ŝr9;ʘNs2S|-PGA_QK؂%6RxٴxI V_4,}I2`'o{˖. n!?Jb3~?KsÜl{ O?͍|}Ҳ(*鵑q:Y8un!$c?YCz]IlDü?&zM%wvaV/p҆VW/v\4=Ё7v-H rF)9yxxEN"77|Rh ~|xy6D`HN'9b- wTF}_>7Rx+,$Xt?HxI5k9HOʉAfVݏrGX5. a\MGU\{cuiE*Hfc_Dh( +_Ȝ8ܸxѰ '=uGGfd1(|Gys7 .4V/.{2A%B,>6KTB!T#b|5VqӃ9-|ܘte4Q"7ogcj-k +H"DA-wF +xןzgDdAfu~=cbE2׭>'~6S?Mmdl@[-R@r\,@)-|`>?8 lEѠQC6? i2".$If!@ulF2w,R'mD!;Hpm|hݾ(06e +"r!PN%1)%U$(nw1Q[´t2rBVvM :iȑc10,!ōb֮P-o{`x#AM@]4 bM qUs]%s|#,ãٺa ZW=To{K6#SZn|W )~&MLUZF)(C3"$9ΰ xQE[AK@|$NNO(7Ϡk6@2Q;zt<= /I00Whr  A +Bԃ9dTb4y_ I'H0 }iw2O2Nu/:ܥ3K/Cϔnbz-Ր#`[q3.~W> +Q/d+5Y_PZ#Hق B@p9p`8hc3ܓ`HyKC_#SA p1[D?]rԏVS%q"ג +UBVBݶZ?_pDw)A%{^> ſG(^RN4="A-~[],uKZLiEfe$3C|6v~ ct١glPaq8c;}"X\ ;uJ|xrKzHNDpYt9}7ߟ7(eZ,t-eky/KxT̓"ߌ~pVs&j7yP 1oEo5yY>BW&|Os,uS' I +m7#TLԋm3եNk(bˑzc,~֟K +)74/s&A-\]ㇸ^=Ӱ +̃Z#ZL*'Pńε|_r 6aɦV>Aj~jpi6M M{m:sF%g,EI29fJ,IZȘ]otf,/+b=E +qZ@ͰIa+B4xG +@f* +{v[l̹ |ӢF:F iQu #{GC.@,ad%9oLa$M-s|`؊zliGW?:؜Q$5 )wSEQ5)'׾mp@l7)ɬٯ#rkvARAesw]0L#,!*"[;RjO9^NhBMu$&Mò]p}7z`J(ۡwLZ<&∝*v$+af )No?kvtf8 ŵ'X; KcWׁX:P쭧@ZIGm=xr907e'*ئ8ؖ$!Po}s!ؼ%2c5Gx`)f1m*;sg^n 1>Ѝ 6,k/C$-ZU_3v>\R#^G>2L>s[.9%ΌjV&`ԛV+:aٗN 1&n1O\vT7?o**&We34|^ƗҿGG)Z[l8SiY;]rNACש4T>MU -[/ME%sJ6LǖP]7u-ߜWxSu_C7H="?v3> k=ލzGlu"rMlIF]a34Ñ/ZQK,1f%M/8Ծmx2{ŤpM[(Swpy1 +cݷ +JmvXQiAc\nzZ|' y"w)x'뼪sA8`+s#R,k|9f/8'[e@X=wlۺҨE>6aS-{A6JEOln+nv@XrcgVMKDR;p&T䵳^):c8;Е +?Rpc,NKM?0|Ы_(e2o#Gp +~Qkp<]E RQ,rq4CgIfR0x_\ 6$~I|0 +^FFǾewl9|Tr Fmƀ PF"]SѢ>qdfsEȰP賯·M(4%FhJj][Ur/@:~WfV#ǥbMTk6+CF]hsڦ_e2+VB[EMiBK p5=ݧ +>^5PhƨrGن54-si]fhl6?N.+2s,<\kLLEt'qX9l'Ym, Hpﷆ%ˬy9YN%@ ˁ-i=;ˑ/.Y|3! +y+r@@dh4O!u3`B7W@F]9~`)kt(]``T]#!$y S"*<>נ15Ǖ鍑oe`@U%G8 yQӵhvK+觖!KRŞL*|>brFE|ְyQP}#qˍvv*B$}S]0xoN\oMW,<"={ {]U^>^r[{,j*'prd{PwV`Mo^cc QA?WmE:ľO--,&2FM75g-S^,aDj^|~TqUXC6*D}Iu8=kơNarG,uN1b9z%V"Rx_c3}Ǡd&b͆c/܋dwÁ{s 4U~YHYTqB uhH"spc®Ze1=c;PA606@g;bJg2/DdA ,@$s[۷pIYuR_xagOVP4o?=0%rq%#n*jŚO&z{~$`eWRY[ +oj:K`[)k=]"Ba]c8{}mҤO@%r2˥ux?ƯA3VdH\y[q8f߯7Vp(d%}PL[MJwbB(-`.3)b~Z KW]Wى6k^:~VTIR<\+CQ"7B"X mw~w˸HzFOS`TTFG[Lq/kX 9yp N*ȅiKR/Pl]c +aI)?h#%~x58VG+0PE Iȭ +{)yVB[Nߑ \ѝs5+'ak^Av3pozEOCs[]=F13^D*it"dҥvjNT8.[9[pI|*[54Bh3&$ۡM]kRc`h*6)򭐾8tm&5uE7JW_FbP vT ٽ-<(~\?PЗc{ʁGT-22kit N>qZ.O57 )b^K063EّOm%O4m\6pG䷯CA %g`,@WM2-s.t2  UV#VC(ZYg,p)ˀ}b?M\ ~q*Hl,s-Gۋr<&4쬇`m\ v xH&G QH*L͒^.X=mKIaq)yy,9=&NJ<>&:EƞtyknhIQ0f WVRF$tWT'Z60L'Q: VHN{/),[D/7ߚLdfCͯ_e8z8zVU2@@1Shi>%[ۀjpfFj fZp vlC;hsl3Ʉ.G<V S7n`Og4.1f,R.ReU5ѵ~3.jE10 ķ@ĝZ5wg47&T4ٳ6XHҩN|w_~Hx/)% n~RZ}< (~^6%Bt̯xu&wlB&b&e%*0`k(j3Zge 627f\&Τ?1Hn@޲MK<<YidhnQiꪣDn2VՁ&W̛BߖD

xD̦VRAd`Vo]|T1UN8=:: \Zjљm|~쑃zfY5QB)Aݴ:n/_V8_b2;.f>uuU)IHm[S IY"^gzHfh=h-cRw#+NXB,ngQn˻9?>}!aBu?Z}`z8PA0[)OLC4(6aBA|o=&{HŚu ً3ҚHhA\7t$O.pP ?Ms } 6960\bR۠vзfVD'a޵l>J鶝wbO;/ %zN` C/hPLYT$ ml *22}O5a*p2TOoƕE\o)|8\ou%ZU+꟫ICi[M50F(~k{9ߡ#wxk<]h1WN@M&k2ID|鎉Jd"pǸ 59Ag޳4<|pPi~[mn:28.Q2) 2_RJ(\1>,WBK/*S"9ЊRLՊTw +7T͝HD{1.MB:0uM7Ϥ^]XW=MAyO#ASBș)l!#k`"\]ώ4J'叇wo^:6_d(>3͵$QSڀS x"_>sI$oMS%8JTfauӆ8ˆAͰ<#Q3?-! KF>va9lTQ(cG$`][crvxMSCs4g,` z(GjmLlSsto).Bg",Ogf}v WL +~Jr>@'fa:^4aq>κ6.KTIcS4˟Ĵe%bi/e[8RHN 1rQdƴ' Abeaj',$d|2 )_ocpmd_mU]/,w 'wSP<CULUc c}V:O.[N,;9b!6r5ͼ*CwL^:$})-Tb^'gYO& v(5>9xAЭYTMpdTU*LJ)8mUyFgdR?:q?Ƨ"Up*vv7Dl)rr qWA^2 +W|"6>j<ާGJ2ϕٗz"{1$g&Ogf&Mx +oGx6-=4y±2Ga"@RR8:ͱ/k +_HvwtV9Ze>n.#vrސJ'Ҡ O1U#Px>oDɊIԞ /ut#9I7y33#/tɜ$uكu9VPAX G`>>q6m9"S8W #e%R#oٶUHii4op%,0,A+mUS_y(hV.qc=$at4ݾIU=K6OeSM rn`k^ C7S=zmc^e1򙜈B<'U`\܋So:l B6d?;%̻Šz W;~K``UU:62닾 9Sh'LDK [XƣyHV.[%oH;b鉳cK9/<&N'8b[g9?ijAg >;+"3 ?+R`sdQwQ:HG&ttq88\5AmH,vvd\b A2Ѿ 얣b )P}I$҃U1qFɉh&Vd ^ _d|n0:dke=x8ɠK +bknbzO'^Qq7Y߰p8ofUqmaQwDӔx7?ras#§m,xmNcz)ụ tIf> iK3iزS{+fv|phbt K&lot|I*4[Ʋi"]pF-] `AIRvN7,-P3T[5TB` z!EvηJ}Yi=xCN?8ݱG ˜ʌB۠N>ŪUpn6"#34"G’ӿG>eZ{0KQDBxVx} $j#^J|#l@Vv澜H "ZfA'tEWfL|4aN=b4g_ęéXR V +HL5G,T*GUM>2T҈pU!xw"ޣD\r ^Nd\MkQ$wT +%6;l[|}VmW<׉`H*w]u ;Ǒ~ T:/GP''t@iQF2(L"R`Fh9Wp"|~+FDQI_ D0ո ˊ فr˼oL}U]F|qڒw" e1 o bwiVʨ\YK#JcKKC uJ[r>W*z:ى׸VP/?gq.Xퟎ;AhYIimpˁ7S)‹%Yڡy^ilȉk0#_4 MX'p>lCR2B=5zTV +nڗR9Tje3iBɯx; K쥗7^gicRg`}}2-eb]JR&˧mǷV0k4 j f\ϓNd.[nO9̂uc+ףIkEדZ!Tw߰=m1ПtnBWtLYq/Ä 7yI?fKPpS[b'K +Uz\d' 1\1$KnmD-= Dm=|EG&\}'iRm|N*$-8s,>b o*badλL}'׵M& [*37zl5CEsd&h/m1d4ckk8(#~V3N~vyD+.͛ovSx #+[W{} +/Bv&"#];7,"pglD5[ׁښLLMQ߽]p8hBQ%;"`iL+ɶx,$byDK袴"|R,)vۙ'=BNɰ+4$i1xKڗq OyGp"`&q>ꉰ&T +g/דDY88YGYPl/Y/(Gӣ[byol:ågvlIo]$qO c,B}N4B= S);jHrui'!(=, S]K ?c*UEu45u@AB*֌|-2jIi~HD} +HH;ҔX OSA:Fm;T<5_Jxy0,oﻟ*[!j]-?{ztm!߄-_1|?b8>_@Jj=*x TE?'-Rlr#erP`sq -"?R%,AKLF'V@`eFS9a~NWtM%W1Ruum OOfrЋy@zf?-LE:5B~( +<Ϣ,ufHF`ƅDC a.sUSc^_sL ;H`”z(Og9[.w/bmrͰvzMm"aӊ#.\{9T:e8m&K4\H!FQtPTI a.ԵLau뤱 `7UТLMw?%3LQ$j/ؿ\EyaRCh 0.Ė/x3:$8ގ4Q1FR]9yJ8p=CBY|^daL76*3.'@9o0\8/۔4%NYD7a!8}ʎ9™+abh!mlDސ[T5J-{DتQtW,}Th2{&;[4ijBPryDYf.W7D]yaZ 7v^Q&xPcH'Rhw*Υ&pX/mX |ҧS;;bà$I8_x *٢t +Ƽ)_o;Kx'4/RIzuoOZfI!뜶DQm[m۶m۶Ym۶m۶ͻO[ -#[ /zl' + ~@9kƔNm&cZdtpMRzƊD%{8<Y]?|PJbK߄[Nk?5Qo6@ȳK~;xm'ڑ׶31)9T@&zhǸSd WFi(LҌ’(8&1[-ux7t?!]{=L=򹣊- Ϥ +Q}U?Sy fLXޫdA,K:n&-G XNNđ͘0|FT{r׵H!c,sA/IUw׺nd[ajf wER$3p;JK~V큿| L +< Nefڢ+/s$mIݯd|5nṁ3B7In؀+9jw|.N,i D`?õD<|@q̔ӗk·뾟 >鱏S$ +![̴Hff7lEjd8eB?F#,,50p?1'-;Io'Y\0M +9NuNQar TfU)"JaОoɹk_\BAol*t1BAƂ)r$!1p=Z4,GD'Q@&Ej`Wh vp!ioxa"Ce0t>Q\ +Qe$tdSJN:0C l/=";JEfY3Vb;M&yȉ&׃v]p&'% p M8Sp21jVdh{d.]kKaoMړqAh![3f,Ή=OִPdni@Hr$|LhLhm{U3^So|! ; +j90:jWKƾ,Hd`w,p+TL;oыu23i] DwK5 wpR,bm =DE8dR Of@Y* F)s+{(V9xj|AU`dꣻī JƢiq )|+k, è Vjd[I2iWGFD{;H -^8!| Snقt_wmUEwAo1}هxw6-EsFU%멩yTߪ +[ۀhVN&WE{ .}Z:;&J Rm4vjkM 99E=ZfF~dYqoH-&J?|}61 hHxdY7nquj: W_ӖFYqqep-E:hƈU{EjfIefM)[h9 =oU iw#eS 艀Ft$ՔoS޽RL8oX\_@\ 1ۉTbr&pc<|NPn0¹3fAGQ*'cC5$ +aE^-~D(GQ|?P7zC>+? SSUJ32u8QNנq:_0J B[2ÃnP˩`aJ8{NBF ̍j6|/;g9e9$+) k1Laz總WTz?JFa"nf꜡x>ddHQC߂ުTC}h]IMΔ/n)ˁjyNE=UL\,nhi_| piMVkv~2Eog-©{c!WS8`T &ʢtCY^ޭ-y[7ÚtS8la\k;'|]&[r/* 9Ailb9g>0[Ӗ=^ڵ!_pEA4rp|IB^_wڍ"[Vwh\o36M. `_eC똭M>gL`6o}OMd0Bz3TO)o8ݬ\PJruZ6Hi+u㏮cyf;a ⌋#/.~:+oҤJ( ݎ'P`[J?f*/5dCyΚ02NC98!״t0ұ-X{x Nj`W<[(ķGĦtׂrcZ>5EMY# +k|^2>,B +{[f>SmDD:偌_ΦKd[ov"!zW*,g;PuRʂ,ȓ!Ǐ5)r&ULc`l|+'~߳H8zz]-dp >N.pŋ`Pie"INl` 7bVYlcl%gnDtRW<>GbE.H'ʦ*[9C^$ˏK?TU}GG2$Z¸>ٻ95o[=y$F +!ѱT;aN ^S"vXA Vo>+zW$w|L;&yoʎ=^8odoju}pݲk+/+3~+ F9H8;ԜaRO!Z- `h > lCݴ,ޫD<_|?%}6"H,dpB=d.>;q. L- 3Ʌ$}j?n)88;y2dojE4ˤq{ +zL1*y؛<~d*=G^͓nKVeR#4[QQsnQP En ߔ(vD*kc8ɏf#]H +(ђ dwIcbMP`#?"xG@9{4P\6,I^ka,Sm<17٥rVOl^]5ռ/-nwER=J֣9kx;*S ^YjSK{&MȐ|  8yf/|Pm,VE;! +N.uN=#dɜϤz"!Q +k ?H1U )1Z P'JpPE #"+0 +Re A}EK{6sfZZfcCB  2ahFvug.Uz_H >ھL➰piZi#L"]QE(t6ݐV)]2"9+ZսiV~.;9ՊFD+S=<5)Jy6KG7ZO>+SP]!0ŀE(ׄhd_;ʴOGǜCv]+Wk@[fCfD4" X|F]VŸꏇ^IcMB{p*I1]Rp~"37W=ҝnb=F+CD]gI偾ť'ZIݺ2?=>jDwMz18"LK:MgozBL=RB=7D`.[HRbIpj#;rjgVSRy`t .Cn ϽDI#xLWΤo}9_Q{EuۏCĢnxos 1 xrV]э~- tRMOB^f%Qaഓ;D_8+lOt_W@.uZ-& )d,Q({8㴑CV292ٮ{gBhx R[*`,I#zto,sw +`LSvyXa&KpK3 +hHH3LԢyr80SH$0'ɕuuPkZPQD2Z\ 'Nm3O]Rzv>@.Ew9GPD鏸n7* +p aVc#WV3|(<mW7@dSN7$!Re*L" +UICV&_*%T+>S< EH#l:OU?aD6ZaT>&1ard +X1-a* !AѼ"z@B@\%szvcb l%/__~L@wR3o >y.O4u!hǃ Cܩ˦\BU;n[]?E(JfՃe_fbT_}I'LT[r/#u]:;:u&?paЦ@ϒE +ܽ " k|VI=>a̼yxv crЙ9xsz;u 1_(TE8̸.`RmN)y!1`6N'n~K+H L0I.K΂+Vׄ;<%N#U8jO1Vb.8Qn4TdFeе/p4t!Xbv!Օ_PPm P{xmى8$]#p@ +ذtaKWxOo.5}k1KBsS)c4͈{Ty7o}]+I)3i)Œ+>iG-~Îz5RH"c#mj0po8(L͞.jN]KI(K؞LTWfJ  +OJ?Ml8ʌf*,NѶTH$ PtsT&A`;NkE}pu 7RÓ"TMؑk/߅]A,)]( $'DSbu2^O t%n0z13ۥ>֡5mzSFtGKul$=w?ō4.g tFY+# y@~#6Ļ!"`VB&E?ޚhSm?dq]#Bg9 N*b swmR㓃^{KsA"L]ixoچ:q֚4cmp +rRn?-߄l6a ,oeMiA@? |QRdq`HRK܎cM\yO٪%y/yw"Yjg##I8ʃua. zi*H]2<mlhjXZl1! դrx(ln[LtSʶ!z,!&A$<3qk4@j?Oлq+F !tC\:8;E+w/Řߙ?d^äȦf0&)]|Jkg9\Ǵlq:p&bOQ.mR켃&C&ˇƞi !i@ N +Ç諙f-YM6,"jG^/RdXD`eȢ%}dy1B6 ;aR=>!6(Zw-cO#pM@>̍cCHe$@r]$嗛#(-'*]?w)0l>dk 1dcm; s~8&+(Q!y܉2v8:㬶+zO'ieTdyW=qbTy[֮٠ ρ\tæ"z- +3o ~.#2~Ӆd{1/4ON(7 +<$|msi0Q䂼uqCl1.zq5Kk~~D'GCIKߎqieW |Kl|S;pxFtw:Uq MbKAЪ0gP,֛{ܪ'* Ljݑ@w-#;'i8S.ѭ{j/]̷% +ڟI LSؑP%Ѩp|w7|Z|Xfźaiۙ5-C +\ϫ{!D2zw"N7{K#3(<~:Dʡ:7"A<=eO1pCʞ+)f[ dN&xa7[#+66.Jm1?|Aۗe߬(Oyt"Uvh#pS;&EI{kw%gqUux[%.Ԇ9}Zxq`tn'26*J;[Z0^&bX#j{-QVJ `xZ$HI +$){#7߉="e^UH˿b&@3&NXʼ|{ hZCY㡧أi2fz܄v0΂"NxH8PtRݢ?Vt4q#| * Ö}ؓOh@NHcw +&/Vi1%ED>ICBvQcјK`>Yʈ_FR 28E:[~mMUTz^7W 0B<0Dw^A"C=S׫9Ƹhp;Rgčdwԇi9|`(}j1WNJpm=Q6.3iܓo@Eś9MBi)D'h sa`6᜸fEmY:駢׭DQjSh|?lQ.P$v'_8ߦ^̡\ +A?|<h0k[ofרuZk'xiJpɹsa,^ +݄4N-Sihn4Tl7;R>I&eUF2PtfzAsmLܢOY}P"sV\A +ә/–`nj;v@rkĶq')yKNOr`4l.]]}.L>ӱKfSW_&d:t7QE]JIVfQ̙F2CK!>I]7 δO'XngKnhsf٧M7\L3!c%,y_ 6)K +K{O(=NGmhe̺wcR$njHԨ ǟP`g_# :E oȍD8+9Ѹh c5IBSIE?XW{7 +:޶m(63X GA@ FљE<S0Qt{GKkM(E-Ųwmcdbk,`r"k'SW#1iMV&Y[NeD癝prkb63hjPkUm_`R4j46zTPRA[&xI +}OlX < \ha&U1 $Uj"_`K͗[ev3[yZv~)R@- -ǷkA';ڻuF|U!A>JZVg.y_%</Y(CPa$%36fX "mCSOxjrfe03 |&||+2j) )~4jujG!hq57.ŝ([W%Io1&fyW{HFNnFruZ,EgG9̫&< {EzlĞ(tϒu" q5j|ٿ U^btRZ<1$n&U;9 T2㔇ߔFL?T,@ +r"*<$+)oV*gV0zȲIcϓwh +ru-z0> 5zϼ>~ 1 +`kmu#?s% YR wø^<ÔL8ga,mLy%C[ռ^|&u u)|C$UD mY+"N fwA~Q,x*aEs o2QHs# L1( Y >d>_P!pҀ: Ȑ\GxVQ#?7Q?iɖ a/!?_q;ң KUN\^v8)e@e@4xw0Wz]KU榬D532J,(p=z9dI`Usx;#FIKqEDhǟ7E$=v[B^Jf\f$q̼X'rl#l[ɨ('U>x,ܼ&"k8nXjDw ?FfǠA#~o*m^ MdaT֐I _$=o_Z8 p01m!5)a>=P {g +7/A +F5$жWQD9H~^yY(>8@7Fm+ؕ@eC3~,%H(cAA2!457j@fj-RjQhr"ЍtpV027™.fPċ[;XL!ǤEQJrɟ3E{}^mnVxt26Q؃Y;R9LPUL-ZF-Va%e^@Xfq07@):UWA_B >3ŎaC[?x#t&{MKѺnAn-86֪x.7B(qt#>|fvѨ_惬AU6$mS!Hb1iXE酖Avu N/v4izy)ygj-#RxI3T^eo>^,{pg]ݺ4]흑zːSFh_FXf#xsww{dǚ@[aWk $oː %\OL1]j +x^pI@F|cVWJxeϲa>Q'qcKT:}!qH2 C Gw*XPx`[wo 5dgR=S$@=@83-BrPA M_ڀD9fـSFjcY$sm݀C +M} +.`x]^"#\=\(),TaY9?~څaax{#)?42úSwAY&8Wv2Mr<]ZD[iH֦w(gqy-ysL ]Z+("B"ɳ׾$7snh7RO:8%0|m6$X `KA6dSO3RM,QL!-ܾu/yz9Y&[R;zq\#ףlqp7k"OVWT_әVmN!RQ\S*{5I6$C{tеBѓ!jEh|?R@@%kل̻FY+4?fq=fjy52]XD ̸fmy>weV^ǃQi_Ez*g@uP#'܇XVzߊOm=KT'Mc3 E>w M(2>^vU񊡮ޠVY@l˲\0Z`u5*2PX:wVfi`R=񋊩Ώ_Ε "ZxSm2^pZ@''U+!rax%(kjhӱ>DA^fּ,&k0pRő5FI8A>J^r49ܓ]$V8s,Z'`tZ}?\*3nMxbelT-]it+= +vǠj,*x5Qj3N~ {.wRe{[?}HG~5r#u,,Ң ~ۀ2h|x*dѳc?<SKLٖ" >, q~$& Pf lby4$ +L`5uUsnƪ3_0(ȼBV%Ԅ1‰&Bhwv?"/##2hhK6 ߵdsacS,G;7ʊRe:PvU7{rX4P9cώcbn.CA5EL` ?! +?,T/EV=^Ƅ)U&?þF#4ܹWw2\[ʸZb$DS_&^YM_qd)wGl7d6x ;h< +XՍ m'}ȲLN-Ϧ%^[|/mzJJA–%Kɹ`src1gs1|ȵ"dz ׬'7lW!G֤ ͙ܾYc28GO[/p^V'i4"BndpZ(z4,;I5SVC9G0X((_2Y1.v3RJ&[:U53|lS\ MӀlSCl"1 v6t{e`|.C~͝U؋cPZ)W+u\V>"$(: +LL{Wְzof5wgUÚ⩾L(ۉ +^!M_CÕqNZƌNj L.Xupy`mass2=:1zkG0> ^N5M|F6^ vdczswɆԺ Yt֟[Z,-\k>-]ӅU*&'K ev MkL 1Tj:lDɨԢ^23~MB^ug.]?%w?R6>%z XC+uuee6gpm%BoswruycY" +)`l'7G;@KmUcE $Dǔ 5;W񉞪9:"n%yg-̻R]cWc ,Hh\3oO_,<6#DRD+jtu|S?|\YbxX=@5 ;ϱ4F"}&|6F+{mS7CdsLJ7QI$Mg)kn2 !F8k9-2FLaX~j'rK `Bh \,p`VL&L3E +ftZ7$սƒr"UjZx9Grxhm>w#r8X됕p{\~k|u &<[jct`?zP_%!u~FL;EuxWCӺ8;)s4|,yٮ8W*뮌jj׬n|YSH_A^8Y׏ܳ!:EDx$]O +?k4"q?2{XJ u~ΠR7Ixª`jCy>K%>e@lʏH>7mZr$36K`+A5qun2vJb_udAUF}Sh sZ _puFY4ϑ$T]U8C~%|6OҴ'k…Kc=IM$7;Q@&V""z:C&VY)y>,n {OjgRe_ !#}# {S-X@o9:yG6[D4rx|x` Ho =<2jwFIy:A<ݮxI!v/\s9#A08U<'pX3b?$Y|j8F* z2ibYX:K޺Lt_-p}=k^;ⱐkJ#/ujCTodgذ q0B<\ke|RAf+=H \u/6NUaZjaT8FYٽQ=6;K 'ՑJ;Ϡ,y#*  "qgruMh!hfcHS ZAsGcrhU?!K[Qdߍ'츰+Mtǭ&LcXHT̫il"6{)ZCtH*E}܉s{`y;bK}>uB(z`@kLI*Z4!]zߛ:]9-#WpFɄP-̑4;_?<}?#C!2 sc3F\OaB.%:G7ԥ%䳤 fo4txa'ĭ; _Nl{ 9œVG,[,8bB+@LQo$,TtXF aɘ ![mYXS݂6.eζ#-R\3ܞ%H?F66rQOBr]ZB`Gӧ{:( +yykk +|&\dž2pL/œ1Tvb쬎:QgkW}\k>ncHo~f* ]䠍љ#, 47$OQ9D xKZRCPko.FЪp`S3ꯟbxfkyNP#P`1N]'9ft=B`D9#Mq-_Uy䇓;*G1 xOپZ͹q'~STHTJk8JKdZy bwy),+9vm_R*YrHkۙؤZz,V0\̍\l}retr9:59{cfkNE5Nk@v65E K<~ j;cPub.zd$7N{ 89.€:Ucrg,cԞ'4& x\GInZex-"TMENR:iӡ 8aՃ018SAwsWp< =Nn#92 uwT^?CaՑ! vLѽa_ bßȳm|+ 'QU*9ET|b +&&rU bT "J~o8>es9w /^+< ŕ.]%'[ўWX nE`Qc\Bީ4~U]т6*GM!'PMahx`/O@XaQ;W9x*P\\" b̕{a;>/lgfL@l[ +ʪM<04LG!H@<εB!ӻ?ˠa8=*ӎ~|U/3*ʁ8=`hjϞ1}Jz0\5AUvg>O|iw]zk4__G*lPN˹c.L銥*Da8 ;dV8h8iT OjD;~m{[x6m`9*ǖf܌b  7ʿ 2yg7EY,C7ОiV>z_-ۈ`?[MŤT| + D K1P/.gn_rE D1=P HmOwL-FA_6 T@3au>@EG Cm1XרYV {|9])_Op†;1  fX +}M&Ve#j*(/zm%ΫUt v@>$pJ务q{2}\(-_|ø/^m*gQ^ G:59sl@00t'dHU^(* kYV>x:LIQ1PLfDAîU\MuGwY7E#.Ѧ[y;V| +m)[>V /rYߖ1 ­Sfo6Z͎Q=,r#O(}{@.3D,v}r7r3tٸ{ȼSm!AP6O֙g$qP.MgwJr/ 1q'Ańy"a\.'ww3&)n(ܦoȪ=".,ա#}yzOF;}Cbc~D:4_|L@@pShSuؐMRŃ/ɻqqzb$J~szy _ t e S lx;}ԗxDu}Z!al@.S*Ӷ]s^Rv2GӉ+˼Ly2tߩbꂨ)gϠXi1>MQ,TX [òT1v#tdЅ,H +?ÌN'>p~ޠrYq]5f(T6-o]2R 7.gy=p痕s0F6@<( z~Oϒ{FGInSAӟhsݢ,$JzFמQϟc.5S {uOKS>uc:D )rK;_9yhІ w1x7 +8}#B'iXFrO4|627eN;`&-d0(.YӘh^2kXf%AjƄ;}+G (Nnj2Nfo9d5is(m݌aD}bnzyDd?Ge$bu BOG#%)cןucBș_nI1aYYP+*>j@p8A5/}ƯR#bWH+:o?J]Vӓ/PaLe!n'rш"6 ^ncv'_e X e ߧ_G/մ޹e!d-K+ z%nSrZ1[-HfZgEFyJv|&jAJa=xղwTiڵЃd(->k4YJVna+ԪP$0C,։i[-8.I~`M^(f,& Knt[%qLM"h'ecJi)yp,hp}1?&T>iNWw{OUv4MbX-]RWժAVpC ]P{TZ&$Oi!M γbNRtuEאn(7d*Ro!!9>TѦ2#1C| .~ s +8i^MQǽ)J~[jH%7̤ ϒƚ; +%stJզn +>\eTdrg&^ҋӾ!]7?a!#E>j APXHIq*'{^[[T&MճNA20!^1M&%"γ*r}xW]k9QϾaދu#D7 $\ߩaΠvػ JCU8>+ #]N +wm~W j6zNvEr5RU[WUv9:o+8γ)ya佭3=gjvy:^Y ,>ͻv?H%KfLψ,g9"F?^ծ5@`QV1g炿;|b~.~*S5[OZ.CSPG. aoLA\M侸"'tĤ[<_ fea&Z-@/v` vywpyԪ.~-ҟaF~ =O߿k!@,=b4N痰̥qbrĭl)Jc/ +v6# V>ThFSjInܮ`wDx bLSyJN2PϻGLәr0yuwEΛ($vSofwGc hW“VŔg `> i>hiZK(>A0nB&HD!C]`߰(QBNy,5ܖdGR脄Yb$`GLR!+Cp2(I;a9͉R&}@ٽ}zZ;D\(u.!x}~T4E'U͕-c;IuFT۪KRrϪ{C=O9-։Uc6W۶0Ia]F*jzU|q9(|9n=}rmDRz3.(1L@KO"}'Rp,ECOd7@'V=~52(SxB0!d]D`ZjUs?"^G7ބ S` r"IJ! O),?Iw/NrmDsA>.CjIʭqnUâ^ c*> +iecF&¡+ǿZ,׎325챺=km{#.Y_\ϑ]lA/] :MP74<&P)Dʪ5a`m)v$Cc5kShf  bk/kXO_|ӵ8/fhj=L9e*rGbY+{p;vtUQJH(ᢲ0"Y/bReLK5@u0&L +0;kD_כ$`N_eЍs8c{V|_`{:ޠc1 lͽkT-[mD;zIsJ j& H؎)#.)Iك +@|K΋&-V1%_]p6f<қMV%UM1~>MGofpd3f>/$;}N]a11Cw"xYA)J饙guM ˜\>["1%{ ]5/Kv)d+hG,Z<<;AN7Ű`1yh)rfxnnbZ 2߳ 4x:gu ȭaA2](z$FjZ/c.{.n ?j^+V<| &'Uxpk<,[fWol5'mypLl*T/fĹ'OENMh{5}Km/`.\c(H%H_,r1[QuJ8Ȗ<3NcXy]EbZH2?CnRk bgT,llDfC>[qX1G~ w2T4 |gF>h]ƕQgM Cf-򇗀JI0q3i1n'Vj-{/0nkS> ~)! k95}[QÚYںړtH[ܨi]VZz +e\SE`|'<e=Aeϴ?MviRr9/$?Q{8hp`}Edh0H;5G؈xH蓙zl7 8 $]4Es5Łո،UlJ$ƋK$5ŕ@vx]HOMV鱟^qћv,\R8akt_:_(l4h"/bv ʢn$FGuYeRDD譫c4Nݠ&&.o&T׸ +t#ɍxTY$L' ( Kobu>-O4#R + +> )fuR, d5YT=¬R}w#w`o$\~ڋ-4 -)tPCw\(Z 76*(i +C7N|  yᇧ@2:;-?kibgߎ5߽Vw෡`l_B(⹠ `ff[ہ40SvGtCQvn[ +}u;KhPn{-I -3+>{;lG+Ze\\}4&5f#]5sae<S r鏆Y}6>ݦ0d_2I +F"zU&DNvD^~pRK2%OQw+kES "YI!)uCQ +y~zEo'gbK-Is6Nf4 RK,Xb{|m13Xk'<ĐŠhdgs :0 (k %XQSt**bG,z m_f*cF\^,֎1/}6occ 8hv3[s&H +fp/͸|ɏ$XJu8{u%*!Fx5ou +vrSS1#ŀj(:2bTؐ~ FՍxEgq98l@_h$. *QpXo#p4ڦP1uFW[E+ϙ%U7HbP 6^ʾ8U[ Q$ J]2ŧ]S.쪲@-]mqYmV֩_'Dd»x$C >(fxOb(s]oHt + p8H:Dzښ*J.qcec#|#DO_|fN5uqQZNp^(yPU`p9G?}1״c̿Grt3W8}1 A;M?kA1NeDmU멟-˿b{oO}fu\û8!WA4HpjsLFP-R'hfܝ1{+,g.l\;;**|sa<ij;r}vlM+X$R-D${½N\%@0d$*a=q`4t` _)|d327ͻ)z}e@ɝ8M*ceOo&”5G f{E[}6yHnqA[dnA=F@CrMuI 5Νj 㽔R䲧Eqm 9EIƎ4?*w”>GfzPʯV-~ѐ݌ԋ/=$PoyNa8"YZ'篼z%lJOElv8ҥ,].`2)i}E@fs ÜtO|D*Y }xd`*Țc-gg4H!`IgHqbn"#CǃH(C +epJُ\Uʊ7iz_CKӤVM_NUH~h9?5p(ᰎLEJ93=jrߡ=oȹOkT"8TXe /㬜 YDOcm)#(N̟Xaǖez~Y1}N>+!yUy;-_71BG%S#/(j ]9a"bw94smΛޡgJ G(I w􈹠Mi!{mk#Z8қ$,ʽ koSem,p%An˸hNEoT$Bo +0C׉>AB CA z!OVXdžD [D9Jbc5nSz1"nMF8^rfG8?0\: +r7 azkӌ̧K [Mäh I g-Ml)fA@Gnai=8a1Nlj_p]q{C-[}-s$M9s *ڪBpXJu OvF`J"wwn%:zLZcLVN`{ q{K+m`jt8pm6L2AQ#fqKRΌ\bs'j>#1KT۪M讁yp1j+|PtPx`q5j:w`2ނ9߭J[:>lߟlш [~AS#.E{zps-{k~0b߯旽X0m$c MR9%<4ӴUir&hF K;7p4QG8B͌ )̵xzY%쿎\@r<ژ5lz ٠F v*` +*(|WW +d執mžz& 0p16#Y +"L䣻uFRw"IBޗpS'xp-Ms=`rP*Z &ip)vJy]UcZ=Jr%*7W,ӑV/ c?d=ob~|SHByV8dds/=|Lwsȶr^Wj;!h6Wuwgo]YM1R46iW9 y8"5h UjVz(rCPb᎜@/p]L?iTAEh|E9 Iy0Z.0q"fIP02Fni?*` )- D= iݡnW-Wic:u^pX'5ٞ$}kA~؜8\xu泳L2ơ +]#SAB6%izYc;uz#LYI~V97ZI8<ˬ^y8V_D 6<^#!F`'4idAڽE6M P`']gsS._sfYԲZm4Yq~y;p wk0}vQm怌zC.(7mexMi+os,wLsgFYY5DMAZ,dyi{8*4ِxvز3DukOP|XG D|fD8%]^쎪PAaϤT7BKD2(\8ϲ 8IU陽byK67"{;2>بIRReɚ52 uOO iQˆX^C{*bybl\s|ߪY#[|*@j $unj`4Qh8|陳j8”5W0+_M'0\xzM4enaW,f1?Mo䯷iZFߐvR+2D:ݽ- 0M-#%&.y@y [($CC!Ož0ծ-0׾Br^Q1 sxI5,ƎohmS">;JRzI_ +tr +m(nU9u]MVh39Rj]lJA(ҙĩLx +Ao +n'Z U71Mj@8=D9w;ި~ǡ XIT4xVhA闉jqߗ:ߘLt(!vMd7biT{ >Va.t!(9+t[l3B3n8,,H'1$ލwdc=x;v܋YyS?YX;+(~7s W?'3dH(〶71G~1>)Op'jMXa^o·Nj 5F +gX~Hԁ\{,CrIꄆ:CBB!Gŋ;TR˞ÏYAlJ@^mr'ͨ{Qjow=QՉ衵- -E8Jŝft$ Y:g/տ8fi9S6ir'*FN$oc[l7LmL+-@mw uϏGY͆R#%|I8YK8 + YZcׄ9iLƭSz2WN^o{xب(R'CWb8٘~LPS0/\$@ol7+CsŃlKdE~;

Mx{^%P6GbFY:cNC h ^L,3g̮O5Gwhq &ۆt4Gnp4' \E!11vupH{6t h`/*]hf$1%YHLV`E5qg%O^6$ɘo+fr* k& {o] +E[Kʶ8@YJV]ّ ~^͵juH!ÆI~%iB @wGIBU;}*G^g"/f[6YG{MhxSIKf=k*fp\p2LrJ=0adg5zBR3dNJ- Z:SÕt6\7F,R:F ]ĂaO$DJdF}biYod #vy,V.yִ"lk(xH8YbYIblA&NT+]%K>Eu)&=4Ķ)f"}tr՚V7NV* 4?]tyzD #/621-JUpi{%2l?|ݽ[kY6¬.}T=c'#؞6v+k}Eˍr1{ҷ&ոKb~h_YDE=fuca_lSύE n &HBs*b F ٤@e%!q^oH!3bNyP[(Ŋ_XlzTUQl !|c7 bڕӅB^y~ JSD&yY&zxXyjrRގ+SA{p!R*R ̜WĬճyPM?dʯFI^o\ȾĹW\b;53X_Іs9n]FK旤_.U(ou5 66t# ǗcF"4ycBOL`Zl>H5^-|)jO?8qXTg=oqyѫ: +ϓ{nEUxXI~ު>jWL|Z`אqc}i2ladZ(l 31ӵ ,6[},J$s[!0elkIT4|6HXc$7CmLf:jZ[AƋNP{d3U%aJxɴPTVܰ X?^a)y]Kǝ4!be*Yeȸx|{ HM _LOٛ±fҀYcβ&qZ&anK}e +$r]ӻϻ6t[+u7Xh7a~ӌ8@ϑ0;Ԫ)= X)~ߞYlRNs/uxūu$"ݎ@E-I]qyUD +-L돐\ND6*`1c|k<},aֲT O ObA1rWUYB#A.)3wt moA\`lpsĄ5D4'Tq!M}ߐ>8iGa\1CcKh{VF_ QO8CxN1lpoO vn06<=c)I)n݉yxAwhy@v1S("nAMͥ + pg})mrQ 6#>rl-W[ea#>-g$-+iN9R0W2lgѫ.=sr_B>$@ZkqAX5#N@dT;}~AhV02L(o[E4ވr24-Ьz96]&#=~/] khꉪ#YUlK0QEݗ:Yl^l0Nu+2;4ohr2ݣK-XlIq$%(n 8Դ+-󳞑#!yr(.,IuAxקo}0:Zצ{HEUqzK\uMD +7RwPN_4ZCi=^4$]#;ƚq>d 9QXsBgG\+ YϙsnQ׷Iعvj\c՚+ +2_!XםjwQ uz:Z{MlO7f{^s,͍ R9J=og/X L9av7=N~Z-.P+?Dk=@AMZ8[pk°凶Y|;/}r jgTWz8wi[N0ˠ"ϳ#z_Շ}k$^de /dbuhoOO#d%O[Ƃ3ށLIwS&ez*$-j$k;PWݶ[\X2dRfdh \$6 +)D :C;1L-oҐ[NW >:h q5SۺZ}DZ <]5* C=@n$Q8X_'דq6~-e@Z&zlxN\=c.4mO{(GoI:-d2)*ӈGlL՟FC*NGh!bȝ? +;d@}oDl!#abvS4i{E,<թa8`f/`}:i͒[=0AgO.8+YN>&,;Y.D֥ -fLio֏IP-6Yx&vhp3eg?NC?E׆t$d(HNpLLqy0MAfXsEM}i+xa5; +sR7c52P;O祲e;Է̀ (ɖlQI v6{'ōF`F :3WXIMQ +vG?/YA95ckpebNDɺFNxa8@s6 H9O魗X|{OhKٛ%e`pa R+СUT=P-ᅚJM]՗ R]d=y:Oa'Q*|spXפ4>\w~i'LksxP'E`4[I,n)CP[Aе3輅L@\]r8FJk8DQkV*j-dLm¿=/3.5l65symn.\ @ "bN +^ھDveË- ~%Cb%\M687jf-c,Fj"kJRqHщfC1l9C<'y7.2bUyZ漤 i`kR]fc;Mqɦ[.[^ Hm+f|n e? N#טp2s# bi ){P{u=h0#xդ3WF3ԓ6r& +ts3c{w\8VrI H#Fٗf{*<2 &Ra݇e54Bq);"]qd% +h(&#`WP~yA+ I6BUҴ]02.7QvȄI%"] @):rhAKKt~$d9E^'€}@tfp9+y Bܵ8:=ovJ\ri/8ѽYI /ǿYlnvAޅNDh/5)6ÎM X,Y䠊"KY/] 6qm"7b-ܫuPBtC=d6 +BeZUS7P5{ o 6A@>;؅Tiv4i~m:'=jGet-Ap/_[-Y7}E 3Uk.x/B;pA-6٢mv !~$^t%Isp9 Ɉk֦t?l,c%X&Oꣶ{g'|5TDxP;H/bɾ36P1&d@TxVz묳_&I0$B_ߓowB _,~eay+1M +SFww +{qMr0&.MZ +EZ>GRAk ύEVuop]ݼ1zun-R=A7ό@ȍ KtsgZQiJ#DfN~=-Z|`^dL꺿x,D彁DQe;Yg& +ǦQ-bf9UKtYЩ1j[-:҈ +hSQߒO×<(I h;M0%ƋEBzg*H٪l$j1@Xna(Ϲ +j˿{eԗS<e5Ӝ#K_5Wgˮ%BLG8Գ/fvtQe5b1oZGp0k薯S|lt˵s?'PO(lT,?1s.c^"nnc_ :vTKfa^hYJ4[M|  ?SXI >9>:jO]P&T]sbvSzYлaU.rc[kke[. zvuAVnt"oS^^VJcZj8cϱ" +?2`=%!pb?\sqA-{^ߒ6:3$1<8|Aa/ˠ[ŤxD<YiH9dTj"4,(_ [S d6>3}ARЪ%@09̚抩wlfoZ4 +ێ +; .LmɈևvÎ[<ǐ,vY|]7_ B T&nBEoBR!];"bV5Q`l&^4Iw3k|@2_]kFImtT@1&EA˅D[Kb龠| 5љ +k/?)n7Qr~5g>\յK;\v&3 ::-zY$wZD6WMUZ;MӪV^3.ҫe[mhĭW]M5 +01Ur䂁:Bi]ݿNg2y-j;şCxїq %Po/?T)6KqU7:(DqgK~tCൃ @W?PHwY.>,2ߛtSN'hH"R9e$e=HBhbzZ Ͻd&g{ x$эثԏa/n y-XX?*fltc?̿ Y^I!D,PpO܂IRDG=D 7YF"*p?)L U`gdyg.wM$ZTxEh&Ao +#v좔CM'"L4#F Q297C@.Uҁ/yz8Z=*+x2瘗vL + AD0FK8$V?Cf˦hޓMxk!%oD~l3*9-[N\\{2@Sڴv"D'f?2UUkGQȺu;hsKKՓb^w v%fkkɱ^-[x+E\DO蠼.)("|_g|<=&oL+I /9oj-Pېx>zZy@3XK9l~;_! -!cT+ԘYO4wKdQجDUؕrw, 08,cʒ;D;C\ /^gEvRP2.=OdqkMov0tsqKy/HO1,հzdYoI&mv,jAsy>˗zD=[NGL!TCi6ς:.ReI~Rߢ2mư9k\vֺǤ#;kٓ׽>ɓ%UǶ:$4sFfnٝ%X Е9=(q~ªsOe腲GYeQ`=U܄zyLԅVJňY1 CV|i$]h*'@i-QUĞ:C &ӷj$Dph/\8Ebo]4}SQz!dQ5'y\ÿ Fe+*b o8A"j@@IHqL9&VDdDƯ!G~lN0x7iv Z+^ Erkpݍ6)A+ +D#;bi1\zfB%2["Yn!!Z!T*xZ&ʝ, +&fxB)fIL}%'껳fVNyy&,b."%ŌmmeGt'GKtOۊ7O%KMQ쥔p*-5t{3T?ꠥJ\+3(>~( +Н paZӄ nj8 tfWysz5C 6%uuɦ( SuV):t(pURPcnV^Z 7QDbR|{#pkv孏,V@̐;^dž*?si8"p'> Y4)ВiCn#s3?R/ᣒ*A{AB% +Iszp!̽ԑ1e% ]7%ЉO?RY"D\V%7|rJeqv`TgKq$ۡQ=++W07eힷB6T_R_#=)#BD7h?(eaYS#=|`Ͽm&-L~ >ĒKǟ`&|P<}sMk` fcCBW̐S!!9< +7\,m:WM JN? +Moֲ}遊9̞蒜hn53}+BbH&_ѕRc?Y돚3dW+1"{beo")'3fuk +8?oןҏXN# +qa?wIt,F!\nRwؤ]I?%Su_p^$PiK(ȏy\EJ# ylWdP|)=Is _'7W0n1"*q;C"PX[Bèvjz8`u mI vhRafc˴ڛ_ +p5Eca3Ob;`[*nlQ +{/Y>HxsMo7jqq ++[|ytyR&p;~Mus'C5t2o?Ģ0d}* W4+ou/t!Gs_3`Jc-#jBTGlHz( +G&SS"@'a'H^by'7vswPאGkenjJ_ep%o<,693!>*z M1+?'z:`HUPbrX6γ66^ >kPۢ\ܟO1;g9yDXA2ԸG|3vd(h;7?nC7E(< +U7<^(lBt +iXBS s'T$]#QpvFX&e hP+;n$Z2 R!q?zoONS  .PK@5jخDר>{`0R1[Т£oZW(!6Q=YT3RFiFT9{&s>vS͚T! T*{btfa۰AOƍoAUM|? +>7 Ј\-BZEŕ0co!aLx)#JF]!|%aBvO]B;*/t$c!-^C]3IJ} #u=غ"PX1kY뚻>j>%!yҶL-(xur/Ev l]i#@%r|4/7isx:{vKJo.>'%üU ;ȦZ +=t$C&8ˠ~I0OH]xX%%J6ŽV025év!bPlIF)rj8_a ˛K/Vga)b$Hu<qv=Ɂ "BG/SfM1G- C%)Q}bQQtyG7utʍӸwxZp}.Yf/lR)kKO^#һ5`I@dx`1rיI08Q&O{$ +K:o@6ŭ@i3mXB4xڊ)}~/::|&ĖB W[<𺵛bJEb7}򟩋+gmo#Ie+ +t +k$f_ )LERU7*\i( ^VF7X2;c2ƪgv<.lPE˰YV'w.X B@ +-!+| +ko m#yU#UmL +5;VԎ*Zb_J/˞nN.8Yb5ʬt4C쬕 V^g?ho6 ܦ+j_;>ʴOJ,MDRx2\DZ]WwSS\mAUjwL#l8 .stw9',6iXC{%)ǕL]йks[w/t^)Z" ၠX Q?Q6EM 1q'r>J՜S @ wHztӃywР +CJEz,Mv dCp+O4+7 T%V|mSg7»ާEfqV6^Ћ i[Ӈ~XGgWoշZj@[36vtyVbGjBBRI <x%no[RV߬Ÿcҕg'!x`{G 79m/Ƚ/XjŤDve`j{S@Hb߱ymAP7pyT a3^ެ>B*VrGY ʎ +C)C%XtcDrpV4e7+'Pΐr@($zO~g"cB-+'(!nLZ*w).ܨuͥ1d$*B9{?>&dk/ʀ >.h Ok{oVLLtör).wmdLk' c PeAqo"hPc!N'' jE7kj}py@y><ɬ+)_nC}v1ְv]<`ŗ`Gtuѭ F)a'գ˼f8Tmg0skW)^\0lz+,5)5jIw}ܷ2S,Ic9%0,&}Gds%ve1WϽԻrr4d/&'[Z{@+(F. oWj٠ϕ {(r/[Yl܎4}<bvuv?y/vD!Q]dH5t P;sJ9T'.-2^ѓc)3~I\.sk O^C*Sѩ(ts_-8g{굓R> Tu>5NP51`C7$J!8g VUl*<߾DgE !x0Lol @8 mjmj qG-:>lQmp+l< %^ء1'f0n !4e=d9lׇR?l ?A7!YT2)>{%~/䚸p:6)iQNK({37 z]iV]dҢvnW&<"" 9>O(5o+ye\8Rf,"/tS)(kDNw~m5 ɝlg#;ek?t8 \n؃;L +EDB°VD QVxی;j>W +^Xɾ/7gKhS(4<2' G@IyhӠ UmV}ZAb1^^0{e6MC1D b$]"Gwf+aYU֊p䙳:$~()! b0X 4^!BmJ$wU[*bˑT=w +ƃ,I׀Y$_(c ;$G cV{%T1öwg.qy]<0ξݦ /hsUuN_!k ;0A!kǨ~燶GK.rC+ꇌU; RPwnL॔82+2At<]d#D-R{>8rn`%fuVx #*Yȳ#}@&?I_8Ls4ڣjYG[~P: *]eo F|um/{ M"˚(lr_pgf'nsYѪΤBhQ İ]?l'@.{ b)2?PnjM.eWQq]gXu`2/o=ͱ2HhJ &M{6)Мx_f&|U[X7WC%k| +.FlS{vq*S+%M3֖SӼ_!)9$ 1c-`n] fCGD_0Jc!ۏCӐ+-|,LV`j$iTDD^n=Q'+e˥͊'tu +̾nhd{/t-GEEo6F%V}Iz% 4n)\6UkRI +Hkݔ:ֳ`d A](NtY-q` M}N;4mY ,i˘YY%?x$\I!/3_WǏҤ1!cCC*e?kz{Xę1MeC|X@f ~λ +u}/M-2GӁ p7iV7n}n2:3裭޽.qܽ#L'(s2nNe۶W @3ǎy1ʛR/bYP^jHQ +Ӕ}8oN' +Oj4#!mxx{WEE]dw7xH]@ îZKuTB]:PT%eHywEC=Imav8+kwE8D-;xF'3bRi&2ɨ.SHʿ<<ܩ}";mQyC2}9Rk!RwoCnw0:h0'k,v%m|Z:h<Պd=6 O-{S1<}Fn7wmZ7"DcFd3kDjPd9OxAG~^Zn _wv=$q,$$ +[rj>hE S+U݌< +t'dpCϤƣksa +tW Rd .ʽ.r_QE'G^>v2 4$2^ ]D>X%2^?3Ź>Xy!WϨ schG E4yB͑-ErIF{\ܲ]ʧ{5\VA +.4;*?YjY:ʅ`tݯ i`Kt)]'-B'Ӆj 3l~濸{CSwI% 4dCaC?%{%TCe˪F5ى Ɖ9L<>R#t,t\?-4YAE(sa\ש[ +^,HK)<̐ʪC=$X *ki}cn aV>E,3-spCXe~*1hGcuIrd5!t&>VIĬ=%jdWk'}PmS{M40 hD|<0lwaE<(fp5qPwz (p0\7fٰB7nox$;J.m, 6.}]^963}m+X]ΐ'xF-&~>3WLa}Mēg\7j~Ee* l'|t LYona;޷EFXrB{xSk}69#,D,).NSko\\ebs$2ޓUk"|6[ȍ03nG]vΫ 7.!~W΂m>f_x3[t ~}&eb(j,ɐYF1Mȸ+xL늞˂86ۤ-kr+{ H^e.A6߉[X~ftCdy/jw +F]@+InisD@PU:!Z~q\J|\ͲX<aY{&GqSru.*'n'{DЬ t!tXWTHSͼD@&z˄Ϩ&y-KM6;IP{{ ,oT(ۛ#<_ߵ4L#7Z]ލ_"kswPIW1ܹf~n2!]} f+0hj};&^*+|De\Α !b G쮋8Ӯe>G|XWk(70er8+3\SD>j|Ѣ e; p4 AHCri!v)]WA8dTTaAfȃ@:Pl]VG VKBa8aSWsbE^O/|`zS`S79vb}XYvsN30@Q72L`z2Qsjx+}&6In6R (K'5Qp@-бxkfb>9N"iQMꜜWj pĬؤY3k潵YKcJqMqp,fviFuL5@M,5}MAtŎ86N Jn< +pnߎŬMϳh(i2 R^(َ q'7:i6>BҔsS<}zjƜeֈk` +:7 xVu4BY5qA #Vb2_j_٪2>GDť)OSupO< @in/kaz% fk0I/jCm͐ u*JWr0c  @r{.=d⑄1|D&6ǂJgWZV~F^'~W0?UE'@zl{ջEUg嘬h +pSa[&θ@ 7})sTQˀ+4oC uCzgJd}` YB΢qqRq5,䊯fi?QM{Y:ͪtaF|~nCPna8U`}m,!K$"WfomMqAGYCI#VZrLH6.֗"$]N$ȘЂ V'pueV>>A5rWV~7wT*uEۛu=U1M@IF 5=)[BX@,g*v4tC5{9WH~bt;306Wƕ]ǷKI2>z)ȳ ߾vbI!l`#f]dNǥn(C Z՞Fuo5Ǩ4137UaV`A,ǝ 1={F:"DtOSܖ00ivTNj'bLU`No؏ф_WjxKƸ$'%Q&Ao +endstream +endobj +598 0 obj +<< /Filter /FlateDecode /Length1 1748 /Length2 102745 /Length3 0 /Length 102950 >> +stream +xڤPݶ5KpظXpw !;ԹnݿjVZcs̮nJRuFQHޕ`m`mrvi8ؙX()5]mAeR b`/3ϊ=@`cccesO3@ ٻ"P;8z9[[Zd3/#@ `kxeG=S ?1\?Pk{K3Pwp0u mwGi4lj Pq36'aj'@ ǿ +0#Ϳ/>=-A+WWG>fRXbdrg?rm[-_?>qԿjC+ ߖ&JI{ݟֹ 3=_4AVdgESWgkO>  +_w:zdj0KȨ)*s|adea0qrXYlbjVowY{ ? +9x@iIOS4!/̿ң '$oCy1_NMr0J?_3ߑGn!=Q)S;k[!X{@#ɺaJ_c"e X!uͿb'2- +k ?%RO LZkg ?@ sEs`ZQƃ â``NS[M#D]LgU18|`d_GtWDz +XxA-(uÜMZP^CAΦL:ls#sqX- ߼R[lF6_O;j[%;T3L<f2uOz'Q#y[dI2Jzp쁪YYvKL^Hm:%cX(T'KLCAެ@ʽ +5>a3㹥tQ^阋@ܤk?&*4~Ҧ>Im='FAre7IxY,fAo̱dX7O쿛Y{m% +]9!*[0!쨉dU(zgg + +,]⌖Եd>PːǗZ*//#_-X6/E1'ߋ6Q\6Ç RFh!I7.ZYBmτе̇ Ծ*k?Vd` $roeN9B6;5B*An條Vv.!Tf0Bͧ1Rpy/By7xt!3Σ$bDJzJ{<*nGe%a"pJ=`4`:Nv7VYQ,}$pXqm3eWaBwp%7z9/I +~o❆qxg_u">6NK˿P+3īܹ̋$P? O~{V\t"7tZJcfŒDړǝ$8RZF6 =&Ud $j\v*4mHY5aX pc&N^|.:߶(ْy9&g$U2yV)u*~* bqmdQjv0z.\-m1X)O;*C!"F?k-ahxĐkBQ>?%ɍkOP<7E-R +֞>wtəla$[>G0 > [vp>P/R0edb9JaYͪ9WՅ0x2Σ6B_֊[$2r7ӑ\?n%TwdX~UhYw%4EPc e[L@ ^w#)HglYr;>DtΓp&rs\N}U\QOA9R}@Q9!SKWAT5%_ieR/oaB3a04" `[M^ğ;CKkWyPcyzlq[{{=ķ7l9W]Ap RflvBIœlСϏ.y1o\ψ! ^OcĿs k +& KywYjjN-xCl[v_\BtW@(oŜ|(|Əmg T|Ӑ6^ 6P6H!_1 +%Ri+tj('ѷЁA_f"!_aʲhNs,3cMH =p6fvAhf9Sh_//9T1*r 40Հ^Q= zl}Xum%#Ӄ Tjw;͕Ԋ$dTQS3ɵ321 ջ s&'#2u^]&ZX?Aȴjԁ=0xUG;u +̫yؓƘ<;>Q-y?H]Xo&4⸾aOȫ@8VC6b Ya+oP(ތ]O#0˛lH;áJ%!Q4lٿ>OBEKV+7*5ftij{iC96Rd!nWJك"T\B_9:UpM!Y09c]S^iH(+e1uLS>}/qy[rY|Ə~b}moϰ6E#~#%!qަx"B}% dw^ BW)h$ql.;qx_^z@2~u/7jK}|hyfWezH'4l~A;S /x#vE^ui`b 4|׷ڣتia$8.Vع;;s|UJB[þT YCr (ަ0V}7ۡºZ3[ +_0vy k7G&L~ktm쨺B1Zۮ}nFlj΁X4 yZ1hHݼތj<|w$LG d +f|J6@׉4Rշ8d؊( {Ca=yOȔbUE) +J}dTv2)q@^\=Y". !.mZ7 %Ӣ#o _0$n`rZi-AW̜N"9n/g51J[Q ӎ&>%*U7gҝS= SxǴ^IۼۧA/6($5lx2,(mZg#ڱ+ \ N)UE-%np"lHUzV͓|8 +2ė7i-bBUՅn(2OPs!p|FueC 3xvsb[*_\'ł3)7qwI8cK7%AWȅA<^0"K4eD E!/fȏ4V;Y} !q)hքƽ"C['N_Exԧ24#dG\N7n>T -NG)Ei mc)/~'צ\Y.)ms|l,/?BI賱pte钙mL⽍"]& +~,RO'7Dn5ٸ5r;E{ք%A3ʘl򈟻34̚C~}q+1ɱJ< Z&\"de\vt (d.ioz9>4ۺ$UWZ#0e8 dc:\}O@́?p-[D;-zh@9ܾpE1a`es3Jc#ı~CItTĉaDK_¶1EQxa t#q/{>m[ k#Ӓ8$ƻ <*Ҿ;{ +y5M5}72~P"vEyp]6~M6CQUSݒҐXwx5N)22W#nwd1\ؑN2 D &`pnTfz>\>V'N r=7%v,k R5q.I'3jHהJOL_ͻi 4ν-c|Z36XG]P#f| (acc⪌* +V?c Dks 3c]_,8#KT97eRkkm4ŽO-ҕ}W~]"FL+^&]|_:ɝQ\sST!Rǿ^Œr;9{a x{wh5: V>q[otC^ccf(* Tq(D裢5á1gIgTEGf|5$r۷x+0827XcZ5FnY:W -Y( +X;75wM$!hτ#qM"T}_ɛ5߭Ҟ4vV^|?8)Oy-[J_Z\ ;ZV ex |ٜ ߓ&kҖ(J_Uc[}[6_ۋ^0s%Aޣ~Yْ4}E6$ 2*fGıKa}5 5qhG16)ܛԛv]q50CġIzcÓNZ~zp{N? zIVu|ٶ| E>(W^'ug{ +]*c u$B_8ށIEwzx?@lkB2]eBjӤ_oCaͿ^`WA_-$^{"y{\zDn8y8E*_pV/A AK=@:"\ЏI*|utH!Fqc 5β 'XKǖ} 1/`Dl b_V0#4*rzתލ3k6FBHS]y@}1aɓ㗍B@1Oҡ!Bqzm?ҫ7c0\*CTF`V}_#Ľ +&[B-c"kQ=W@$|G.>*d)nubO]snl28$uV3[&t>FG8o +9-ѼK16cv8l,o܈D> 7@[#,*b&ue"i'I)7̵{ɐ7i_KfN +OǿOv6~ +P@u&y +-[dx/(u~'VC)v:s{Zczw1>yA1$з;vqs8kDU7bP_URw#%{(^Ŏķ!*v3CfRueOnTOى(o(;6ȸ \ +?-L +Ъj[ #03/-+>"Lw5gs+Qۇx-2 `:m-Hw+K:⢓`>Gj!O u;/ӐmCFREkyqU :hK'8+Gr+w^VW>Sۉ$lԔŕ6QIhiErGߓ't1^Sh"ՒӶ*~egIYD4}!ƒڹN%ڃpl]T(oCupƊr63ư!8U*vhs/v[B֓C1A3#jRৌ1Ac sΞ2>-v6ن "2iYO%Ԏl"[XV ,ҋˤa{\FcQ N/"=|B<;H,!π օ:sxɨo/8S+0" A3,ʨet*HӦ$R]|pQߔc 5NZil:5,ijPm>͊UJHuZ"L]z'5D3HbaeTp k^mbA4QV>ڬR$f*夒2Ss T+~SWV *᳦K'_& _HK̆[5Ѝt=}Q3ףvDqp0)OxMD:p3a4Yˠ*ZTN݌JqpH +ެa8Y35KsʖxgPL<=N,A&=e#J@`ΙH?g{COXjx3r~eɞa71uMA{Ik\ oy?*eqLEU?D oc/}r]X lJ_K*C$}눼Vq3nX4}Jv옪D5mG6*kM's!Dm/ tz -h/ҪMuW""uQ& \WBcZ^- BJS& `5z$ɩN4 }ϑ03,xFA}G7CQVEaUa|r_z(ga]9|\o_2f3'S VIZ@J7Bor3K.*pD~[-g#ޡ8|ͣ鸚h 7jk5]!00`tU=ND;ep@zWSgSB8XQNJ0Q0:(\#jK #Yqos(ݵ汲G0 ۬y.D4RL/BhSi{<3bDbYO(Pgڙzic.U0Z;\KG,~l~x1EˋoyVˬNjtHwq4,tTTE C9Br ?&[ Uy"uK"M~?}+r - nQ\."> +›ܻ eeۗ4 p1$HOY:H=Z&,Q.AS6C  V\xFgu( m=Xaߴ +u+%_H7 +cGWMN=LϚ)}sφF-,"ׅˆ?u>J0Zl7^FzG5>s9?VQup8|X:!0O哻 +74X0Vn::۱5kn\)"D3]N9x>Q~@&p|=,APF]FuMKrmˌ׈,}Cǃbt٫[H7K @!h + MxEDm=SJO~k̚'"׹m]RmrQ2Y$+9yr5'g;Lz}i;^`TaX'4.f}މzV{UXP`[F?vZ[ { +w:RʟifN m萆8JY!e}'E(!X-WRQRk.(BhA+@˥5 U&FcHskA ^Q@ޛgU˳'#LA{c _5*v/ʁX]K5:ڒ?Q s@ll 'gW(Z[dm=5OģkUMI64w]E:b %bUh]3!;i$NU5^ԄGhao_H`s,S[ =D0rVnj|CB-ߦ=5 +G5'V"m]ѭ*!PC+C+ I5hy_c&Sz52OiRW{I bq+6eB'.T{]Qc⊗TF>c뤯^wvD?ϡZl-W VCCw>{n=P+]8rRw'Ep[*å Γ7o{D/80J8Ð'/Ul_mz3.qb>oJh*t)*1[h{_[n 6"+6ՙܒ*j_-l- Eg=)GVG/t%ХBʓ4'bYt2=7>^غ&bj1]4>G!ĥWhk[!7m,ݣr<(q7$z7̀wOYk#Q7=Z `2VŰ13 QoR}L;Ɨ#ԟs"k4 0wΩz] SxY?w g dRۖ@·_yw +^x"sL5ZV|;/[>\m^<8,+bQsY +جXexa %G?TYθLǺb:I.Z%ww>ׂ]]bHo%o.!GV(TC|~<]Scہvh4m16%UXrg}\+-Ȓh$~M-RweZ"~J4[Ѝ\Ɓ%ŃwAfSa'8i\Y?_35]GktGT=d*1&lѻ1Uճ`,Р.yMOzZFFy>5LƳqߓ:f'ҬQMdww ˶97m3(SfJ 2 +P'zM'k;WQr*?nG#orj"3Frzb%7Ly,eQ/`ͮ/a-|Y0,en:XfZ]YhO^%Ծ;W؉rVh=ӕUYΒ\QqIvͬAfl 2$9hF}8:"XE"|~ E;IΦ5ש~tgd]|ØëeoK[ɚPY:a8Kؓ2=bd*̓]%T#U)e<2,pM:6 +NW9͉nrZ +xB^ IrcmBEo6WҮpbЂ%m$n m& `gQ2&էSP˧͸AN9Z|:c@ydEZT,$Cmt a&gY'Ǜ{}..o2 $Ƿ5Ci1q-Ȟ#ِ;q4aL\S# ōF^V*2I苳BU<qH5r]ù>y̘hd$VycȀr&67< D}tLV9;ȑo$t.^Ӷ5RTB[܆_z/sW%kבdQKaT\w]=;yHPYկY"_ƅG2_z;-=|*/q?~ 3AD z-<5k 򰾡[I  +̪Q~USe#DRz~<dJ_5ϭNzƓ#jsYrw)=$;m'VUB8tFuz<کbĶA}TWwFe5onoX|Ay/c.5g>ċ^e:gX-rM't_8M'F^d4!Mjk轎k3ln5z6.T/k_wG4֖vE+qqVL#EWfQ@對COߡ^~iش*ҬB S0@yϛ֩i}k!@Ӳ]{! m7*j&XKlU&C&w+r HSh@d5xf +5P7ܶج'uu-ů.T!X޾B]SNQx}ؗx,0LE@&~oV2Z):vnM8lFiui* |.SM*۪k DXJh !t{B )糋I هZ4Dn3@ty5ƅ{[@ySlQ :kD-nQ=#P,>yb߭Hl.YLWg;b0xyydRږ݌C2 +=/:\cO*Ȳtڍ)/'%vx#9Ck /ԕb1z5t_@x+2/2)rl6ӵ %9Nl Wh <2a|]={2xgQ;;)neKP f ;KŦ @WBl 6DnxvWwo7)uNΉEe}eM? NLB>%3x؇B?gxvvs#C 2hy}ɣY&?b݉-Ƴ4s/[D\oܒ"xTT!Zx*&ZJFc9vnLwyBߘ]aH*C4\s#=L{ twJTzE#pry`Me_ - Zء}c4,m5U}wv]W.&/LiA~5uz E0,N$s:iv"~[ɈD0oyrzY{N`ouJU4-8pO |v7$0Tߟ8=Vg-(|ھc>a$W7qi0@. xȐFRj=Q6b0a3{BaH6fxĐɢ"A1S߲2iAPK?Ӻ컻EDDZr4k4ʑJipQ` V3]Dc%xuq[rha"'p +%Aq^vx8QkAI`PD-q,4V,p.~*#F$`}MR l??[yt=cBaaE|!Ph+@'3G+>`Υ%cb^vmws])-?۠q98jm$>{Gۣ5p,%:Y?9zg^/J0> NԀPְ/KpXKǗJ;dD4}8_Gd<V *D'Zɲ0YLS"Io"x7bdf +o`zbWW/AAɪ/m8dk &e1,FRKF\!|M+(iEX0be2RA,VbD+J9y4buX=UEԵS_RhZYNb8,ծފӆ(wDnol ="VM/IJ%2:+b<73DRt1 j) #A=({uu@)ĦgN'}GDWA[E$FaKe, ]_CAU<;X7+jh$z![2"O{ly=o +w_YKw1j(]6TqqZ6Xry>"i7HsxAJ,'Qrq]3KXffK7R_7ßhAsnXq͏.ps(2e'b@w] } +,t_: :͇P +[܏=Ϲĥ&, sxՎڿ6#YH8 {MV `1>lMoKƿXmDб +ECV]VQ* #߁z>і tCQ^ZxU-jwOMhf5{ M+`\jl +cpM +aǜhQ):e;ꎅz ~%XH V6!ɱugWXfD8Qمs'T ll;3;A@hyRp#8*EZo<̯G\N|4ghޭZOK!h2Rz[E:KqU0w6y_MGye@̕j ;:ߐ %d5@E CΚC + uy4+E%m~ԥ]D4[X&%>‰eX[o0Δe!/G׵@` 1C֒vY1=uc>fh5 g黍tNFijp:ǷY]i>|C]+{_j  T +i\=)A +lFx{7V%]hݨ~k}EI]ʷ\^` 8rI_@;R$FgO1nz ?(B9 Ƒ)T7oө>RA 7\Y!f?pMc2?sK}}^.$3 +q>#pg{UZ"HY_UwEs&i/,/+5wVK|[؉`֙JQ *鮛Nr9%86$ÞǗo0<{i~)l؆|.5e6ɹOiK4&iPNxA-tx -e=FO'fn>f" N =\Gw鲭y|m|ԛ8h#]͒@?~fk%(ykHԹޚtLbYĸc3K% }Xݮ.NִV05\ O>|9h<7ARijUgh(羨s\ KRƓ5BRּWu]Y>19D3:AaMj +!$H=Bv35"PX]G (FB}UP}=F1!=4T^h|º,h3z&ֿ4 3eDx~{L#ӵȲ=xo184y$Y^pHcl8lI_NPU]f"C3P74 pqF5yR_Vrœ^0t#ƾխxyU`ͨ+ RpiYGe?W̯ +az*1/d + Ļ[YWz E~O[5S.HH56bQɊ6NK c[%. : K@2(2YپlgTș6/kS#"$5`HjXiay<;@Ed& <8]nDΏGh{Ys+{))[Ty7l{~<Cx!buK 4{h4ЖCkoyEUw[}/d^9#.rHz˱LjxncM38v$s>?9*I?saC/k?P&+ ,_Ij߽ua\?)EѼͤ;Oʏ'I]s6iouInunh0Uj@)[nEZ<#{Qy'F_KZI2tw> 6e&sqܡ&ĬXFL &aɱ*|T6ݪ٬4Sx T%̺9w ٤zCz .2E^ɸ s|4'tF}WGIH`BC7hWJ.Nk-ʄ#.s6zvN/WbDZ5:0T6)A:h4#D+ۥBVk#$K +4K~KSk>ѕA26l. JEa7*rAk< +>\a/5x_#f$`M _!a1\Ik:_R916!ű-'+6Q^ܘX=<&)#Tz 5?jSM~#ݻL z~ R9 @{& +NJBc~/5g:2R|-{ gmBfVT+b$eyQOxx0,'WNMFtN*%)JptK` (e„Մ)?:۽PRvmeI0Yp!fͳQIO{[,?|=! vPE.Ԣ&@8Kks j\AU"PU`z&o +aZ7z[JXCZ4np36?\j]IBdqH5c1”I*yE4߳ER\V*wrgzQqV( c8s{pVXɿ|DaʐLp%;دؿh/%% 3g11;FT/^OOb)['Gyd3+@P:]cFG5@#*~l(7Vu]¸iZE9ksb# Z>Fb^nK83%ǿ3`-~ĆX/d+#ZZZACrWdC2 O]"R$d04[  }h]_7 J^_(qL;4TY=LuF Ҏ\TOk.Avq$r-ke6$J [V(2QqՀʴg%ZZ>j$SK]%p9YJ&Si VNCP%G6CBv0,y#q;jnD"y3@tPixfLG#]ƒ ![+FJ ^(TXܨs6uUg6eǻZ]H`L-&8{|H=^`@vf(dh7Ѧ4,GJ ˿`8W['O.՚1ᝪY7f0KG-RPNSjb tA9 خ žoD)U^]OhC؊۫Dq-!##2Zp^aޑXd2>n`]9 <]4*m>n-?{P<4H36#Ϡ~XU +AHWmyQ1H:8 ^΢ +? ]DŽ E |EDe:+~(GkCPhlUCTuLEo$G<k;/?g~:؛"|L]AsCRf VF麰EvzljF3(9W'p*}5^g` x/cѳΟOZ;kSJC?H h=q+OD`$d> ;!qs ר n$oriþ'enn\Vc&;Cv)&+nY& L%@ltXxfh,n%++8kTr*=EN '5*U CXk2>OH{֠UFo'" +c+'Ujw:0S/k3,J/0QMx?A0r!q92.&bJG'I̷, -e&<¹7Ŧmq!Vwp'>r<咝iԱsM _f[]"*iz98e +i<==>cBj>BLхG9vFe`͟(vK}qIQCChY>i8>d4I$ȅSNϩܵOɸ/ ^҈@I+%H .ܠ1QV0f"EǼoqܭ]N(MfxyJ|[7{Ù?q I 菸4 FJ/GU~9բKsؐxIxYw (%zŽa@Msisd]DtDTCK9&`$rh3jvK?k .'T^n_7#WљB?dmMྐྵ`}.sB8u_˪Veėq]by]@J߇2_{78zo+]%8Ӂ;cL}!u!3m(o'DB :A&kVF-!m͒8otY+xQL>`V^hWw͇}3 ٮ#gAf/IB%gG{Xۊ2՗jt'}1@$)^%̿ =x]E3`@x9c# %W9(7Pu%4v7kg8C_Z5] +\A8 kss]csWo2yJK.wfjo<{T~ߔG޽ȯmnzǖH*FsIe9y2J5bǯAz:ABGhm!.#P&b#Ry,bGM[,|#;Hw;:THKJ[ȶ7c-_ +_D!U.g!Es66pZ"dYw|n8+;[\f +5"Ժl AyÍ'S +ſ*wEfhu>$U;qMtT`F&=L2IzD(+0j~ S#k@ v%c$[Mkޭ69|$t{]vSfti[tB%h[K@Í?Z4""#.ku.OqWڍ ZJ1 <ȵIIc9m{)qy5&N\P7p^t(A^9N75W1%߮oIWېݭ,(%QS43B[Lx.t|{ຊ/"l +P+00p'Z.`/BC1y񍍁vRc&AHr@*!>ޘ`nVn'AYRZ|oiMB$̦ )/Zi]zXb 4!;bRr~?RR 嵻̳kh7z7 M vP] R7- zaigy}GZ}=%iāSaN( )BDq{!S* +h Ĝ{:oj>)6ڱ,?M^Tm:X;-z[Sh6n%J F'hZH6]:n )f t!R"/f41 +Lj~%!k^ +/y'n0NoV&qQ的p*h#01AA` m@0m;'-O+y697 +xE(jw +މ.|2B& U8_geqq"}GJcܧhJFž+߆3O>GΚf;|)=S8e"rhs[l*%*C.Є {ZTu^u{6, +ĭ:'l@pry=D2kvվTy f.dvu8Ulgb>ΐô8xf!!+'U2XF;Wǔ\_J\$8:w=EYuo4cgCX<2&{7>L%1iә3D3s=yqI.Q-t|c#|]%j`0ۆA <Řd& ;Sv\ǜ_Ռ{w뫰`su j1 8UidBmxqNZhbD^<\g*g?V,dES1^ q5 zFC)09[SrJ)ZR\*#k hP6=8Ta9t'@?IV[2ܫrkAV>(CPB{D&<fjGH%P)y l$Kp$}بjAi`w|NSݭ&UgeT?B^䊟K}M Cao +yNFt)eV(6`)эHp0Uɨ8DW8NRSPFLt@--7XĤafa7Ja]J~C ;԰3BhsD~k; &$CY.F0հgh +/+/Dj:oH\֖5xp1Q]b` nPc6PPD>MM8`Yafvb ԎOMOX¾ CUڵud샏%sRg܋fS~te~=#`[k ְ%Ps +,6ɝ/"qR{8.kJ񏴛Jԯm1=u9 >,}a,M'1b;o A<:.`ˋ_7"ƹ &k +<]^42:`$1}0/[´u<&Vdo|f)هpʣ/2a~y>,XI'@MilS5G! VGT~I@=Q<=Q4*/}'Qd'-XCPVrm >rL]l߉*i}EcZ&`˃#޴.I/<䪢mj8G-=+OE +vAd]g2\E@?hVw^Bm4PϱYS "cVy9DaK|)^^\ >/iّZ<&s$[A|QCBȚ1N%ké=w΄X h8)$T՚~ Nx"8>% EV'ڂ1\镭 ᤊ0 pnĕZ2:lp] r4qyh +OX9al"4p?+bIP;mTtSbن)}_| xiof@QXV16y.4cg0BIXɈ5K *D}}M?zyqkFWRLsDQ_\R[\®T56N'KOퟡItƾu-㝸)^'mdbM>>If P3N9jk +JrfruY%:mU/w8Q8\?JfyKG f 2->ȒUN:(w5ѡQB15xq]Qpڰ6?FZz0\ڼAlo$Y~~顽z +J.ބoЉ3z?F;2r;ƙG<3I}R  + +0Ym !y.MF4 v03 yJSGM3X6CᤆFGUvJX(߂ ;_ʽpY.#믍$S32_~􋒑>Bu;[ç*Ν3PQU~rb dNa.}ْD=a5F"(HcpxnDh돚̙e6԰JBuf w($Cpt:KcSTA6=9@E"t[dvˋrA[] ϽQ˨6gIG$r{?ĵ2ș4( h C-SEPmMpdO,dc.}F+dC5H݂XLY^_k$f@?+\G^M9(x;,|acB V[3cVr9>mf۱z;jFǶI +iJϨy»FY',?fq*7,z/1d"(ahg{ldi=; Q-y((usGbuk鳒 khh+-Oe~[mŊ:P +16 +C[HW_& ҼBKd?zzOWƐ͓0DžX> ?D|Х=f"-?WmgMA$;LC)%_ͼ`CH +TɀBuߩ euQ6c+9Yu,L'($7(q O=<Y)3h1m_}Y0UFt򴃃ai,Nqx^$k8r?G}MU"z2J, y)9_p%7~a&"zA˥~eb(O@?~Y^ܫYmܧYpbN2()Yz +vL)&/hEGRDȲ\<l} eZ!bIy\:@ =K{W 9phFk=? VU[}aRpoȴ{kճP접(-y38K_5Xލ(uQ :5A%m\6jGGԵާL?J{fE25j16ݞ[f济WWld+*0ab" mqE/(+qƅ4-@ۣLieDo֧y&`81̒Y kO"/tWp MՕCM6*W7*ɴ^Z'}Pbb0r9h5 ߺ{ Z&& ;eM&dHd+6EB2MP{-2* 9ԒORxuYK@"~ԩ#JY-cIHo8{v"J^[n95BKJ[ץ=дmRgSuIԩ>#7†sJTF5.Ď9kMFajI3[6 \ũS?yy?Zw4l'!}RL9CKƏx"x tڟGpHj)0L/Bæ%) 2~`2xDt2RM^T!mƗ^TPa:-K$ƕȄile+ LPv0{L%wͷU>(s渫i +0b-ZA#E}NM<Ȫܒc=7E uN"%Ϗ'پwTMc|.݊cY%v, }mmg͟Nݑh_'HlX p:?"L!^eiGylČ+*NF/Q^H"PEU!֊JFdB2v/jIYj-lnʕ Ǹn0""mS{ +n}!joD,8'x& n-v5nL?-ބ5sJт~3Y?Hs򡬘's:A{9(th1`rJIJ;tBnZF4U3Ea}ǽe`ĐGO5j<$6 +q'T)9OahKKcһpL({SGpm'_}!XVikX Y60-VԼs˳5wG|Fk'<+n&GR}3 Cm%aJkPinzgwv~XS0'YVZCW|y4IʺN) wOfC_xYT;ܧq^x|>7Sb+E:a29h߽nė&reڰ|^?9>wKUy?BEL%*\z*ML+ghrkx:UQacsGTir_qXlJ1u?/3t`v LkcfzT>p^onFXMųdΓ CuA0u U-J <,2q1DY9"jA>,=cH0;у%3Nξ 8ew;ߵ+D,La@$ Sp4Cs#;d,#?dٕ0~BM>ryh3 Y$)_#|tYQ7'VW iN#%iɰ|#UP=LYl +[QA!2XɇA>0VƁ`4 ^ i? +νnaw b4P l@CՁuν P^˳.GZ?*X(N{;%9 9\+-{m&p93=;וtz!AG<_,Dڝ&WïMN4R/bDՆbl(/]52oÎz|Έd#d%0~(n,t\LԞ֙oxKl7$`hܲcMT;w@zFUQ^B:-Q ˦m۶m۶m۶̛m۶mիDWחF G|`x_0Pŭ0Q+?x(C +vTv~%@,9/\`^VÎ\ұ)igLQy/}>}mdwt" 5!8M=4t蓢%KUQ]x b : +lBr:81g+U2OxqgmTcFwl )sHW[`hi'AF,Ax?to4 +=4oiJF';`,?\jn۫bO9s'"eA?mkL4ejm?tPnV˩u\b_ֈ9'"Y|3yΝT#sB*0NFPfN3tfI=10M }3XQAz +mq 6NF +"ARGdVlwUqW.Ekmc`F!@1R +Cƒoo.5\G6ZѸZtU7\}m4;Cz>e{CwWuNp/tggbSWh)ҫ&›Lze 1$eҔIՀ3@]@9p? GJZbY$OxeQl 0Gwﰜ3O=)Rd) +x 9%b'+hpHo,TRoq}y.ɸnKc3ч "V01ASJ\ +"jڋ/=p2d|7HY[OK{kw,Q~ؕecĝ*؁ε X+XWʿ_hrE6^R{–):ࡪO;Gg1q;fК^Cu$}m`~Ofx-1ދt zG;,w8J>ج+Ffց'ܤ4eȚxu~ -1vr ;EQѵNKv)dw3hɁn{ᱸĢ{ 8b_CMc˱_pLډ.?ss|H'Ʊ̟(JO_(7lNG WࣜO~!1sFϭ?m˝ t3[ynǠg/`ԞqoXkd.4"F-֔#N҉ +=J%5hK $y bZ9cY˲R!EkۑSf:*>i9B$̆ ZTm/Nu+B6:"k T,:Q9Yw+Glq_>O9u%e腲YQ`ݕ$״ڸ"7!R_K'y +sok2&Q=;.Ρ`.E/&f<[xcI0ڬ"308zm9gY;?ZcUq2<.#,-Sʏ긪K-L.ĘG RʹUB^`4<)Gn{j:V0ڎņ@PKe\\cOZh&K[EGCVb 0y&GjbmCvvjBaINAU,Ur$1dQ]qor@Bs,ړX2;zz1DCqoPN¼E9[!Lm0g1\,=S`W}#:hXxU꽤KȂ4 +Y:1, ZO='*93FjkDÁsndD?t3fX,ؒ8'mM#cw vh ?U.(/s-vV16^ОG64'?ܫk`rfc$i'apD @C\^hp~?&R-C S@k-*y6"%9 gV/{ఄ-z zȋ\9t#4bS(v$jdM,Mf76R;*z)]w+^$t.LDTZ6;ď ^T<~#ڰUF҂AA _A#9fc;%@QvB'%ҥ %wP4&-\qDf Z-Al~P.D ;7/I@j jz< *~=VɈ~w~rg쿚>:YhhA0V:?9Bm<s@ jCn6dj79 hgnq#F#%+!$ذ4bWw/_d+#FIսKG1RrPJP ,Y`gKO [^v$uEd\\Dd'/7ڣ#HS3j=$vw +,mx6!]ޭ\=+^mmХ`PY欀IqKG?'մSm \iB"1НSr"D*uLsg;C&zh0f ]Kgqjkq䑆2(}=M'2nz(biQ3Zo85'N2$^Rss<5F?BяCZp _"9_*?[M.aƊVh1<ţQ(YYGYtѐB$f>T@k8+vpÚlGQ99奆Wڱ" aOۿaUeot)JHy:HP`W2]1\B)Wj@R^cZzD7&R)W wBnh,v/8f=r#&hر]#.kA8jt&_d5DTy!dg 6c +_sD|]e֜NrK!3c3Να0_*@.H",a=.r;u}4tд[Cx -m!6= +5 xrm\#:LW|]]5me묃5J:DPamo[Ἱ |Ss ҄q8cQ|V^aϴ yo DIݍ.\4/߹>Emɬ15@Kz* +VUzJ03zKxbermJ4?5I!#R+7*?|.h ^oJ(: +Fϙ[2zF#Rej6;A3Z%(ni oU҃ieSXӑKg^i]Ƿw@W@J\5ccҨC-[།}\RN}P͊dt㑻XxvLGi5:+;`AQhFx1YvAl91 ~kN@ fTHaS۝5,]1;Pb#@#ڣiW:f[oee>o&k){:nfw3ܠF{f[2LƪX~m7+n@}5ӊ[sy?EFYV`VόT7m^a쟘19ڠGszfF{|L*^ss)gNSci`ձ{CϜwLc歑X(E9{Y ޖ&ÖNW~M΢0!LįalM@ c?N-G]5N1ĿE"հ$:p3ǚCc|dRjl$V+סxI҅ aQR%V.;\5W{͔A NB&}*_cOI^SO:?\'_bހb&$ٚzݶ~6W融_U |J6L rVdտ/ lr,"_[R + %_jAbP7B0q((E#e G9buV]5wcAyD _pqBAS@e | +s."o^fSk-b _5]0^a~Z<,B37W>P1sTaeqtPPJ-(KC _rx Ad{YQZ#bJS 6qrbvBϟL8؎O[xIف8h5|Gy6 .4VT+hI 2ua> +*coYkZ:1 ܉-,z|'(E\>l.P-ƾO6~pGxa ~&dޓ,^ S|l8{N&M|:A;g(H-$k(MzʧHi,e GC|!MBO/dSܺ?}pz*kΞ@cgfžsFAGE(?#(q3LvbR a'#Z^ 4sA6E2lIyzkjkq|Dd" 9R6wg+1߅Q%.+yPriVV{@{iQC!aH]pꚏlZh-#*أo.]>7;FNm]Ǫ4[8‚_f?;ڤH}r5b~2{Kv6#4ϋe0Hz{{E ~{E6O%7~S<,t!uFsZ06{t3z1DŎ8JfeS;#@LRϝ^py:Th)<rўn3+|C81w@ ى6)7 Kc? ܔ䪲Â[xBlj(\r,Ҧk]Lyr7\+u10B(d*In2@sb/ MkTеXZqzCpiI@ !S*B3,X{5q9Lm7򯥈{,֠JP]a`ZO8.u}ˌ2YM't} ;UMoc_R _t̞cN踕G,0E} ZKv958Qu[ FLp<2M׫jj+RG|Y>#W w#kzo72yYt_F[^/z9l+kAwfJ3Iv4XB8l(B,hƓ!}xf_K,QL$ Yd7kAjfߣce#3HftsG)v*&79DN0wyOEz^*M?v, EF>=5OelUxeihGy1wpJaY!p.Yi$##:3z_,fRᗃ'S_ ӈ}3’ҳNG'K\C免Fۢ0-]㏅ﬞjZʝPf'a* H z/zz~JL%!}/5?h:BQ6֏*DQϩP. 4}aWCvrOGsꆄ\n! cI39]Ld +3? _t/@ +F}6ŪK VWݺB'HB,I4௽c>FAݖ@@oæ'v'<)I͠m|C!Ӌsr(=ΙN2[oAgs-3W;5(UfrJx 9E]bp@O"?*#Ok?piU_m +*K^9(cR*2<GQGܷũ/{ŧY;o _Ɂ~ً0*NyZ]Qa*חykV\%r͖QR:I*cSnQ7tμp\U _~&6ʳV* >HK^:&PI# + ƍTll]LY"9$XX6kjJy}p"x UFO/ +s8yͣ)emB ɫYx]g&%7)PjM|k`KKF9?ۦL#a[2>:)[ƀkӑS5 3D +1ݚRr/(9!d K@vsoQ`GF4M[!m$'QPAXBxm ʗiyOqJy0S~7\]یp *v|2$fM\8FH">+ qԢ/j$^Z5Š9-ƩXMF9G#Eo{Gx.q? +~? ٞ >)B aqAt Y>;fbzXH1àCZxKY .W#HPA^`N"ͲOiMzt`|cW 5TݷW DBI,cKZ8PXqYk@S#jPQ>r\08ܞl{i+n|7==ͤehgij gtRi UUD:3 sqVƴ + V8qZXb3evZyLYJ Yz[N>d%94K^=we@C3;Y$OLKf|"eoTVC\~Wt"|\64g1®D-ބ'0- ׄsi7kbN^э|5z mX?_6-U<%̳]vCm05 |Loɸ6<2jXi<%iֺٔ4.L1xnWo7h:t\7ć-z45͒cy̴0r/5&YH# +n2P>8"]I*,$Y.n`؆;KΜAÚYںړtH[&E֛scFM D nݧpd ʒL,jmlU.={f\! )\֯$#1+W|8!%t<=8hfkh@N9U60&3W +:ekޙ9kf@0>f~aF޹d Xg0ـG+;XWɘu, ~B7%ϋ0ocԞ'4& ׫5r 5OnNaF3;Ny6W^ty6"|6P8PRf+KMLfm)ćt궫RPc3|2? Pëzn&;UO~tRV?!fWSb?.H`#e+e +dOad"+QŨjiE5m҉uՎ[Y-*|k%w"+sK#Ä-זlAXmӧ*JOs[:S[-@".e \:8[>%)DL[L4cdfMI$Yl>dy۴A¶Zp,I0˳8,Ы qqmt`l֦zV僿= h+7B-4uJ  fw9Mv>-Ī3/hLjmlDtQ9z,5<'턒f$:qKA'Ļפ(䍙?EYcGDź%"PD%Q#]!S/bBK7<" SR1UHä>1\Jeu4*pѪ!M(x!!Vo=j+c8FBMxn|¿=Zky>" ۡ,6 +}C>UG|QFB_޺8Qfucw1RJ0 ̷}-99({4ӹ:KbC9Dpд8;@p+7QѺlo%Z\RMG]{ğߕ>(1αZAѥ(zgg̒ES)Sof&g3: Hׄ檰0-UipA_F-bcZ +H00e 7#~Id#"NYy1,mv_S&H)t/Ƽΰx"f}8^ iG5ZiMuv~cw6`-Vu6r=~,/SO}sK`q j'dҙ;9 ?!6<5%$EqsĵsTuGR<[~׽&16 nݲY޴d!nSP9"Ub:-am 8i"yz`AgY+ߍcx\OjM$4DD'eؙyctΡy<˩ܦþsҋApa:Bۉ1Ke#m Ly&""a:Ǟ5!]Zc0uX^Am \[.Y3twڲ+uCS Y1qx^Pofg~T)IC*t ¶:QN2AwlSgSLc'֫!I-  M2ߢq@f& ZLQ^zD$~ڌ@7)%hT.TOi&v_N>}C5#9!m_'iC=lM육 ܇pcahe}I~)WIbܚ\a6iNߐb +fxZ Me,\J'ךˍU@(*.q)^>VճT̙_ɉrۍ\_Z΢~G?ftKI|U}N鯝@ nԘ^8:4(g,ټEhZ2V\"ǧ:q  PcEy!X(һ3bh6"!Oݻ{;u'Kq7]5|Sjfs͈@!\?a~}$[i܁i 0{P$\VMcJlcg讂G{ҏxg$8YG!>*0}nՖ Ɏd(.l&9c k{ҿY +`xud043ܻ*^#G5s"}F 'me}u#QImZ1Vbv:Q [֯0s)xjnqrǀ. ̝jZڼ4|$}VF1Wmߏ0j.#v96j2PP @KRH+';?br !55HO\ 鐝3{R T2f]92G>ZkѣI'%{{:O~ؘd8ǯz(1rCWz%f75X\L3;N2j8r}2Qcfucl?5k[HKKl:7ɻ@+A?DOK4_OƄ< In#eЬxs +xv Sf5H(!׊"OD lFƛp+Bf3hfj}jEfSQDe\蝨w]VUg5(r u8}7z7+ӥڟ>w~8z/"jhUinYMyFH'ؗK'z(lY@tz﹞5XЃoS :F-P.<ߨTE,Na +W7^ݐ cWk] ;1u,JT +a![`vU"6V29֝[Yz(8"bto̒B:0?{OВC  6ʺK|C9`ǼYc֢N]DŽA,TRl4;Dj֑ݳLMDUǍF3qy-0A1aIÅ\~Ha@P9d7Y%[hSB` XYB(Pgi' +z:C&VY)y>,K!~XEk~'+x$<T3p;DŽIaoiwoF.%S:ۜs-t ԀU>LuL߾S(@ŊX%Z(#@i-RP}r |܍o^]q%MdSKI=I M񤼍:.̝]8bL&G &1oR9hj_KtSD*nԼ= XR3{*&(0P&֖6U.E>qai곔JHvEuʱlâњ[`ᬌa\r͍^W n4KO *,6BYBCg^k +}Iz`AL:bARcLkN\PAԲ ɛ^krZ'`l+Etjiza l+/ƟMն"U\Ke#ѫ+kM:M}vSL[nBR'qꡲ*: X U}R, ه־ڙw΀Kr{#õ;v,p'qrGC:ahWbMUzt[ ?mbE LiXieE~GĒDpa:]G76l +fҪ;&_zi1X7 ]ݸ$58Zy:=s xW]|DbZr*Ku$82͆C9!6."]J^hS|#uD/1wB#dcBAFW #L2(M鸝Emw18m;]TezOR`>Mlˠ?VY*w8RrG5/Z:XeHX- HD~Ph4dg͹`>AL"`6!F:I-;AG`r<,:D!*% IlH\a$iG܎f] ٺ&?YCw-<9ώf{'"z`y(:ۢL&|2VtЖ/S~3CAsr 3>2rIK]6kv3+)oz tsAU;'4WРNkF+*(al<3)'n9'o 'e^"wE4˶m۶mm۶m۶m۶]tw̼*8ߡgNͱ)A +#W^gTPfL-.nRtyX[W@o[ `3`(56[e>4es 閟4j 3w ŨsG&X 3ASjx bAi݌Q~zX!EG4pnKL[nrq3s8Vˬ&0.X?BD>kϳuBnt=m[*d{6:*%Rj;KJm49Ѧg0p"p n\ +@^cTM.^|466o_%!02=5^2?aL:ыTVx4t:8;gB%R@:<_lSr>3Q,P Qթy܇ >'s3fSp=l朄 Şp8& iyv>t3yc'] F2, z?W4>rx߂KTY\¿U*3; XT8k?\2iY=im|-iU,WHT)|s^؝A*+X"Eރ:3c‡q2GyPp>Ɣ]t2F)ҒA'-(5#; +9L7 Q48niXO6Nj8{ `L;Gp)ڜ @rՔ.SPҠ[w^̛*E7C1=rt,gU P7AfctWO;leHV[ꍅfKȉ?rB2R%xM:DܮR\pTy#^XBklhՉ[kLAGbg"+c{^M=3r͎0kJТm/lL J:L5t]v؞&plpC_p9e__ +ASy4sMSl ՋPW_7M-gZ\Խ _Tv ˋ +sުۀ^nbPvM*lE7Do|4-Dj-lsa·'b/U\c7G1P=Ht!i?~8[ ")PХ߿."΋D+8ʑWrvg§"D;79TN%)m,QG# OY(?ʄ纣@| 1<۶\^ddHmNPK"1ifmŚ\=C: rApy4b~qT.T3a803dZ/^=1F"x6jwqeNBwuo¸o"Wk/ y@ފ]˿Z;me25\[[-c Ǻywr P(%6'iwKmUnXql0`BJKzUgW=xA>s SCWj+pBj[Gr=SLlxo]eSVC-`x<#p2^^aHhCL%b #8 |g,\2Y +u(_i%r^g`lOzvG sj]t|.\ Xw/ ">K$L{Y)r:n򗘨]o +H9^tTTI1&+2'b]fW +Lra'%N'ҡ1(­UiNww8HX:'G+1K`c,s#oU" +~|5hBպn~BJvnњ9rM k$kf +m-=mj(CRbYLʦr~ěnMLg`jƚk OƁ3$GI +ŗO4^០K=b 0pSSPWePK`Lə@ʂgp}<,Y=D3!C:@ \~-Ukxsi07},̤4cD5pqE݊xd!$H9s\mWܷ:BŠ%م)'TGJ9*Nں_1W)sC +{MלػW~(+ZfR1?]{!1.=)PD}`* QO7G!BNГD?=YIx@-j"0 ^&SwJ4*9cy(JBetI|uȭ[j:Фy_~nH܈t}ݡ4"\&)CZrY[ V3Y]Rf-$;rj`mif Wq9@=OV+q;nK&ny4yٜk[?L&U⚌Iz ,Jٗ^~K9[Hz;cVԊY,JO1' +=&.KLi*JL /@y`CTCJqX`N7C|~5vz8n;FF:Y +bbECJpJW&|uuU)qHt;n-'Ix3rn,CwtaOr$_6%2>}rJJ~#6^ 7Tcl G1xGy=ښm$4`e(뛑/G62|uUV/|A閔!oAt!Pi//>cO$nE$uW3A3gN?ŎΌkpVxZٵB/nOl9#-/~a-ҝo@8-˶5ԈDjX_k2<zf8"}D-@i_Jm@.1> {K~^W\5ZR܁'7JE,۟zJbx4,BTCNЦ[25jV :|`AGUnKZvF;}+N [A2K!ޙ[O$}'s|N sGbG%!is M@]5-gUiO) MpfW)sUIMTl-,W\̒)jF(@һ6r}H=Gh@1)SVY/21)<ߧڈF$] -IU!k( LCʅ#2d f{{NfN%5Γ6m^LgN>Q˗e%ZhxrvK5Yöh4rLPk*+};I4.~>5?Ȑz5, p%߸in F/DˮCPeɕL"fȹM^tzLy_U#խ-`ͲȈX_yg_5fM ,&R[ (B'eeNT}WI&k5;EfיyUHC+P!_0=z42fbV,0J6*0c3itt.izX$쳦8 ! fs)⁊.Nܠx-ѾD_^5Yuc^"WUw23k\SD!+΅Ѩ5cr.IBtT֩zB3*L9{_ !I]WaјJВk>{^;W~ć)#u*-+BHy hHm٧uJ +/95qA-:6`>>2 < u=Kw'] o.|q5ll+!\خeKe-ݱrARFE1_c +҂>_[v.] MIb۠7a-cr[Ζ!;YDФ_>(6-?P_vE‘YD.hT`5f|ར+=ֆ>81y&ŗxXݱA~xgT>3Nd@w%)41#vn?BQ46aPp} 5ۈ+KD|+q&JHٓ,->^rW2L<**eGn\-8>FbH뫢u}T'XFN$"(L[hQN(5˹d'MrQӛr&eϯXby/0"3PTO-)ʙjL6~;0>MiT8- g&g~/vQ9Ĥ-^bArI*om,wܬ?*"vm?h:(ԐNoգpcCcutpӣtA5yd`Xb*k눩^}_F,;˄p~O"+ fvژLZq3zoqG0'ɐjlLz cf>Hfa=4: +WƱbnq `]:(}y߶ǂJLNL7pqhɈf޻W)[0?Yt#ˀ o&0㞕`jZd?"!$יu +o/vC{锔! ;蒪ҜDVճR:zB'[s-tЙ-|T n9K`6QvtXҞw?̵ńw* p>2\X.w6A +}zжDMN7cݕK:mʍذC BRj\vݧ$XJօUer }҄ſޢQ2-ҠQ^POM|Su螒-pc5>bil SU8-~ɭUwԱQ:),O_x0|ZFJuY-?)"7[8`∪2 B9e"~LkI3A)ğRg+2xM㨢nPVC nvo _@nNc >3 ϧy bic\ZGF!~̦'F# ҽжn +G?8YP<"zݢa9E@.E5jm纀݆$V!JVqu6'b[oIePDw]$efZܶQhqbjߑUcwU! +f?]ya)n߁uy _oGcr G:;xj3,͇ibIOKa ~(ٌQE&z!ryW|5|D̺.a#6nFW-#s;{x ;IG9qfѦ !pNOh/ * ~qܯD%#+[ +20M mLf2!}eR.t|q샥 =FsR߲`O2peӝTm: oh)R/O6ذ*UH$ivT@~V?.B:JKRZUc:~x鰐 FQOiuT0{i¸,yRVoariǢPv٣/&Gj։Ҏ Ĩme;1gH@&7$I($]m9S _300+_ImLMyzqZ=nޱjwu}COcj,|Jlxs޻t?FLz 7& +p^Ez- l4hp uwMw8S +Η@9C*aCw{'unP͓G^4oC.c@QD_C>26'D"⮝G$YM30>%=@t*H$0N eJFISH PnxeqX?qGjxgExr \' gk{CMIOǠR3̄.5ٜEajӻmM ?~-%bv[:E XwF-~A + +ûH>]j}#n}$?æ=-H+~IZ]Xdg䟇C}\Hb[^DL/>$5BF1_b,͡0L|RxDhO.(?Wݰ"KQ+EsS|)%ZgYye}ƙOD[_9` a8M%ΠҊލ⛟syAݐo1EEf7S;qMJȿ{{Wtm^e}z4mF`8_I["IdC@A~\gf &JPM[5wʔtq_#%hT%UV7"KzthRU#IcO%ޛ0qn~u[YAuMޘsDWCzb8^pa* zbLd,9_hi}L!7֩ sI tZo_Z1[ݮC3d( cKBS22K+f$С` 꽣{քxg|y?P#wkXUy;v?J>E*@PCc/ *SW:6򴱊^*3mKfQ Ho1P[F`T31rh06C- |m$ڽġ h(wb݃hA9^W)w^td\&ߨ +_k;mK 1T+a5ICX)l*(WIKVrZ`#jz" feҮUDNDRxDa?zP!bY#WS(6R묳HD7`6˱߄Q(K̇^Z r Hiz7+l0T۵$Md\S̢ +a1(d41M7Y19yeVi 9!2Qh٩,z˨ff&[ElƉxo$z8koÃQ:nL.}Y3b + (&bhqV0?; uCPرiyrKaC/u0y\ZI? Y$ ɶ^= KEg${P_ +YS,%.)XMz|:!u4<R8ٹ}L{W 5^I@6VSKH'rxRn JZsZ(_8ըkZ2wCTS8 ψ4GP!b`[nԆQ*SƈmF:ʍ÷Z0X+2 xU"htL +3(]eEo7`4 Q4ы`A\s]VрkIP.Nv +, }106|:"J7zصQ|&B$̟.ō25MVMHߪL"ޅ^hoi|NK8H::Ήm5m +U։ kfl0PO:Pm;) %f [N%{`jFlU-EiAEv#f%rwnm2V cŸ,- +ƿ^9bz=iN~j>3vmdgZS |3zrؕRqʘWP-K@ue` 6Iw3LXg.lp_!XLcWUJ`h&3.]%p >Fm_rppǺLS@N 0{wKB_VyE Ku`E Q)hs.[HI*LGpe(&{јʗjsOgP{ܷ;AE{ϊQeZڀcϿp"{L K.Fzq>`4y+D75b櫩^d-\h'8L`J΀z6uпJ{y(Sz9@3aokޣ?ﲧCSdn4c\bOfJL H+OR %ZhPw[i*l +vYv~ + Y|QkKaMu -p#1ݷF +(ow\4TDk@ 1bLȞƒ/}IDeQ^O$Ϯ+vڜcLChYj444ٳ)0|h @46g>]n&%ʞMF] gyC%_ p_ۢu8qD|BƈEQq.ͯlߟw+h#.kB e/Mޜ#SWۍt׼^/[y,戓 $u9ŧ!o/2Sj{t`wZpP3T߸M՟-\)6"/U spOUBJX̣5fx1O(R(Mq뢱IfN_0=|mozɴ>01PiA^+|ls :4 7v2)?BmH^O_WsM` YZn +72K/} AM pM* _|g2%uez_o nwrM" +Ǐ|ˡ}Ek$6SOk5Y/sSdհWqsK0J"@ zWT 4TRICO_9Ug-(%N?J2U$z])ϥce3j` AϺV̊„ hsxrMyXKhk 5OjV%[z2l/*WZ03}-&^T+L ҙfL4\zz}&}L8w& 7Y+a+JtLŸԽ#L2U~Gˬsr̂!z2c 7+ȏyS9jO4T V pXܜoud=Y4E0?jIП:8tz 3ZGqZiP jbKjْj'$ܙ5o[>y:'F +!zבF7C*=d{L\= ,TLlja:83~ 𦛷RwX(֌ +,^v&::LZD>}L<2ھ:f$X[81R*h_Q"iH/8ٮ7'lhtt뚪/g +4,zo 'M:}|:nAS[hp7^ +G:l呑qЃK) wyUsu|oϙ?:mj#.""W0Av0Kpj9%zcj?{ϮHff}˼l0/_\7}D(IK_͢:N}C咛 K7˽IOWL: %Dutj:ou=V,y>QR<V]LKK+PJ2(\d3</daxs$u(%Ђ U "mDr9 +Q3C\OgDnr. +޸DK)(a[ȀthYtGqR6Ky]6xWA6)zظelbf^|깣o޳F\1e=!Cry1ShփmdD->Yw:h @skؐ7퇣50&d&7зGšu'Z[1 +:| ӄb3ymo˟dzR +sᭈN6v5|y32i2:-REWI6E:ʁ̨<&7*8`#?nXOOl,T [3Ͻ?o!"d8Z˴.1tH(p v n`F?1YwCϺ1d%K]$U@ʓ>2x^4HҟÇ`*r?Bl[m7q72i+2զCf\`1xK*e];eM#gɑnq R +i/WQ^ }9v|?~^T\'DnRwo7xd"NW zs|YJT̓t)P5H 6$4"Ƥݎ$! +-oGLl;X7n!#y run}p.R~2*0zH^Uty$e,y>PY-_}r-74$q){j2Nl!хTx }I/oj`.EɞCȸe;>f|>@yn"NKu4ו`?" 3̱D!9;5>eSr{sT7ۘo[Hͣ +ySGE;׀5=} 0K|P ),jWs'F0D~+*꧈[pZc + BߕgIu3(u< "E0r _7U#YVy3nw K4g&ۊ>J5(Vrl#:l[<|d lg$e2-ݜK^ꉳpMc١R[s(~~0=+ytuX׮ֈC"^!ث(coKulڭq) ձk|tk>Nd5sNH4+W-kfKנ.LԒuQ!-(-LI?lqN43թҤcϗܫwC; Iޯ8@?H0?_*ǐ# m/ hkA޺y г0E´-ŞSW"Ck+UBP &^2p107uUn`W$XrHljƷ-Y_Mbs(46eQ2P{coCu>}5wvMjB|uNmo3yBtG5z5Vheg_XN:=?nf[š'(o8CzT! eKpdӠY}Yc8}DV9]BRfUQRxBS-ez~bSjo cR25xmBy+y* bk_Bmsax!UW{Vr6ISBlz R +xc !A!Ujrvt<'#/$ghjgP \֦Zib``JĽ+&JT2f$X_ +e0U  䓡=kli!w-묛T5^ zs `N 6+%ϵP{qTM߫ءtNU|ٝ&`!ݰR*XZ",QR; 0ޛеryHjQ+^ Kce83T-Qo??|ۍ@NK8 dTe{v77B`ڄA9V wۊKtnE<̇|#ހ[m~a-^'t9_Oϓ0\TD&?9;Dp/T6 %H#7fK1vkpKvigi*>8ʒ(}7*H4NXZ+mrw! utsF]ꌊy5| iF>0S74@1]JӲYX='%6J;i4A$VGu# Sv@ Jձ$og0V\m"7[԰gʼ$(ˣCd3J>:KXȚ1QUT *v{q;^d'׌@Y3tOTmWQo1R-\sCIu ߘ)NOQ, /0\Te)tWޚWV 0Ba\4.%ڷVK`NX5kQ434.*8ErFLq[XGc eJ)-t3< QN2Q%KE*&KBW<N1 aa[|u:Qs0qT ?oxxCLlťОD.a +:fFH쪸dI_ dasfKtw>Ef%ao o6 Vɮ%C+z+nOƘ\h]3}&RK"yȻ۵:`+p<p'Vl< "VZO KSG4 +LY31}TǶ paSc[|DV}.QˢD<]ߦI3*^S>N3mTLG"t0W:ggSbWtwkc6rk&˅1uvh,n( 2GO0T" +Wo>6/m .-5vn 98bLp`y%Ttnkekd&'OnL&AgVjɱus8%r_RXHaQ&:%uA+tWq4; ٱ~n='n *ܫ\X0p1Ou?&Ĥ9Q-I|EcM^SLY %t4^SwULFDeDmv^( ;cLY;eS9Fxmdƀ:tkE}}1ߴqיƺC/%3wH ,H~c5CCGh~(Z?Ḫn."54ҝ`kx>M 薈 k&*@ R2 #(KRLQeD4G`¢Ȟ U@q , 7D^32gF0IH8pR|#AFJ|ӳ0Juzis8FOpHv\0 >Z#Wgay8B.\][-YΛWb7 6MH_bn<*_NWx(IZiwn:fT P/DBTC 1#i`Hex&Z>Ro[;/k^/ҟYAp49!GЀ|ZOgbE~ 5wl[M36qV625`7P)9r̶8 ɤHuFXo +7yd4@7("!y7>SX" : =ؑhsr/ܖo2zh~ Dc~u<)BӃr9? e<]dEX2/ t01k]E!d؅+|sEJGXu71b6HIѨNAڥ ibtۃy_[_:aw0 ?o[$82>u &Qq/N!-[=\gԦVLx { +(nl)!ߩ7Fd.ӌ߅?YZ7ff~'/@uW}.<&^6#m]lusIH"v ~R?i_HǀJ$dʏYpk]wp5m9sҌ<.[^"#މ~_T1`?k0J+2*=jڡ[i<Gg B iٽlr*>YSD HKwQ;^b`?ٿ?F}w:rk H. NߐYފnP/)M;͖Nrf43 FZfagzۙPճHݤDLLj(&3L̤nOxBX JɄC~EMݧfNE=-^kI?a\[u,7:v)':T|.;)1,Wͫ +~$qC Œ +^Aƨmή[hW[u7xJҊ< 7u!$4mtvcY7wJn,$C$OJcMãΎ#÷8X&Σ́-&-SXx ़aQG䎠Ǯ?Se?cUZ4-`v9E4}z]۵e%d, tV\Rl˴@c}^Śc*0vfC}8 TcL"9g|C0L/BsHe-w>rkeR).?Glq>%lC5h&A@a8#Y3EdBdkirr ;5žҥ6vf[o6 P l5Z~abN/9f>Q耇%_FU[nlǭ)P/(UCUA WRZi[^Ko#n +Y)KG +=󷭙KM΋"ЮUHQUNǮ-&e+~ej~0 $0\5#2ye<]#)QxV;x5Rmv~sDil|m{Sa!B,ꤊ +\]l1S:x qxJbuO5@C a'ӻքl ~7aBg!RXGz'}u XJ%> ?3YK ҽ=؎HZ + .&>b1Za7?fLlsDgOlQON/LNgˍ +Tʲd<ĭ=n47uXCѶ!5>zHtC?@6 \o^sAWj=bq!Z[WOo0@ ʌ}XzUb/D#J(KN<FvwY-hGA{vAG0ze)ʛ+5ٰC)yEr#ݩ&t7{mB70Inw;[l/c.emWvS4GmKW唀Oj5z=0U{!r&}L|ԡKi/$qh0" K{d/YUs/\B&vم3eƷhb $W_PTg Id0ZQԤ]a @g+|jjDVAm} L[b;0=q-&%\D&޼_CyZ Y͓s-58tm/N6/GP>Ly75@gJؖ˴c OW#dHXvcv2\Z`P,R(#)ԩe*y0G`q^jfD.uԓ57'9%aAHlv4Aj{yP/5s4q1 P7p1C;HUT~""U֋l|(h:P@h7"kwg=kvsEfjߴVFjuw0E07\>OOYEAMB׍0'w9L1w'Sfǡt p-HSH-cwpArZG1TTG ԑy'0MzHչ9S!isoPݐ`%ّ {^h8֯PBpvrF"^˒Jt5eOFj3ФE+wPWDSZ:3\}~$>86Kowl۽$,ϰz%f00<بKVp) 5Qw0%m9ݵG_>igUimbn:05 nMJTI iP[V)=AVu8˖9F*Ez5v=47TP{zo A'_EPs{xfdqLT;heyK>,v5]c(:`G<㍉ P  +*ɲH=ȜX<.]u=@:}22m=͛"J +}(f?$ՂEa/N0ȬAK;%Vs@DUL0,+~)5/ L&$o-l@yG ["LYex۬8txF!3[ $?OzyW~T,aHNQzQBcבZV:I2[I ~#2rML`\Sۊk~y{zPO(Q'y ׾_ZU 0M 3?_@ɸ%HK$\n*hbV 3D4PP"LQv~-9K0z{"DMԸ:y ^!'Jğ +Y$'G$0hgY,]MlMCQY/(B2Bڀ>X~ 4my-GhgdYgYP,ӊeR;Ƙs&cnOje3<`xpjOrJXVk]3bFNa$*A7&}U4sgD DMܳ#gT +ZEFY~U,im7 7"NGQ] ~m4 qAlChchIJ2>*Bg!ɬrg E-@Ti3>]"1xHޫ>-e͠PI72j? +|fi~[Ul#_k3{0 5OoYx XU0Dc9L+(Yjݵ,#aڛZTb\f:j6@@[0VK+5=OxT +>Qe^JoY}ֲ{~UXҠB=T1h*՞Щq߽iX:j,iK b` N~ibz<Ö# A/9vF="UF µ}H +10W2LOj I"xyu3iJOjȅ*jԒ 5~T_ Oygm"+gTRq1Ymq],pRX<$^?H҉z!}ƹ)<]Gu͋i{]+/׫o/+̷d!?>S@=>᳡v4!jΊTY!?QPuñ*'K~kq'co|<^O}Crb+je@`n(pf4?48Gf`BRAߨvv3̞ܸ3]<1\2(#ό>iY/4ϑZ/P%= 0;x:7BxSMjENSp !MK֤y"T BxX hN/ڗ6ijy]X"&ɛ(J&3&GUMN+.1Ç>"s@KCOҀg7 aߡUMsyA39` ꦧn}7܊OH3QZx<9r<E5+dS.~2#*F.HĖSOcvI%6Ӎ U_)\ܯذ^lDw=5pFԞn?5mT5//eaC,({gKcAG Qc24:>NhNvxUBa&7[-v_r)BY'9.ND1JOMy6l`:Ķ×ww N\7T?Uڞ .% uAVȻ `a,q;Y9p+`De%&oפY e?q9+>ߢ!*vԼ/qM#Cv>+UjJ$ZcX msV8`8cwotU$.aLx O|fTP*` {zD&F #;8KXӋUv)Oubœ^B`/+7]<.vI7nkhP f_/W<{Gvk{sx +%z( frhamcj>g}NCŗ=ڈ.骵#F+00-[٘k))jrmԿYj%mc_R=}aU u'x\.x~uqIM_37o #&h`X$T4idZ4*KNJ]皏~d`fz B_ >L"EUha! +iYRLMjVAY5Z;׿G8l%#"iG}'@xJ40"96s Ѕmm{$yB\dgT>?w{ϵ;:믿pe6t}tQ 0Upԋ\QLhX~R?Jτ1["^_!%=yµv9쮍`ۘ꽱$ǖ39/ +[@Fd%?xu#Vݸ!q=ۿY߄NLt +v-H`\)#6ƏE(#{}m V+zᡃRJ5$fOYaJ=ϛ`;Gc4:+P-LDHSFL9f{ȻՁ?A7o4|٤ J$kt0j$D@9,;3pTZ]'Kn~Gz %A̓ 2g\V|$v@74;C='N7XUSt`f!^&g$d[2* &()Pذ,%塀OFVEUIH=vl)ٲmbMpsvϏRG &<^̬x?2iץF`>L@%Mb(7U3C U~04_ ŗP {C9 +J\gdmx{zbխ_'F3i=dp}2CLdAEK_c :豹2ԉ43xE4QoǑ"Q^*n\?ޒ 2[ &NHZatdMAOfG|'5Ρ[|a07"G$/91¯O5~"p=;Sb +"*;e&rb](o9?Or AaUP343KC)lñG8¡%Akm z}w_;p9ՙ  +dbAY2N)p0c~,/2Tˈ}4V0̳:%pUAbʙ[[Ñg,L#| %S(#Nh5<` R3uqBݓ=x8T F4E(a%IJ/k~ES"?G씲E[sHvF-E_ twv 6@& sG;  а 2`{{\RX5]lceziTTl~ -N}fЏ<$W! -Rx -ģ~"9^>-!SU8ȪBY^JŽjk^Epri3*3ÉWw+^y'V|j5lKe}0!b%- x2AdڌC>k[J׷N84AvWp$y">ž^.l 7/3S`kU=$&/낚62>0n@0EM% + +6W8xT{h&m<ËC(E["c]Sf7t6=q`xܔGL\hh ]BUQq>f2wLA,c+}f8{) )>蚋w㷻_ + &1J +ڡDt Zk X=ҙrZ(7jAߊ7UHD7#RHG +5iJβH Ҫs{fUKoŽS?NJ~Bk?Yʩ +M(bͻ$ȵZ>2WҭTt*)|NiPBS +jQy<@zCVBN-vgmJO7SpZF]$~ۨI+ܐy޵ܮqMy;mK;N6E ۊ@!L3#Y4`UdC,ErkQ2^ blCoM};kg8)Q`ƛ$MTGOIx7N)[ *rL DaO^Dq+ءq%7)Ji%ez@]/`au+h@ f;̐Ѝ33,O}١Q aƎF aI FߤOԡp<{k.a\ ;\*z*L[0\##*̈ c$))ͳnǓJ,"%\b$#jF"_^БLyݐe‚6ܩvr(!p>ҭj+v! ߓa7I*.dۤW"L>=8}fQ$k?s:\NFpyޤ5jR)đ-[thdk0d1^>v4RO{JLJƄZWƪMwc>}0ۖEJ7 +s4EbZы}mBUqx\{%` S*kו-/Abj_uPobLVx@-C9[RY#+Um#^ r7:"Eox;Yhi׫4Q_J xdi4j=h3z `H~b3C7Dbo*2䃽*,uԗQ-ԄLddUfT:{Y ]E!]D@X7Ũ7RT/s㉮ԫq{%“DM(ji)ArU*EO$#Y 6lQ%?=V|]ō6^yFeӯSƒTjLm<<;C_I3{7ԍK\f{;E&vsSS)raATP? zXbϦ扬dWnjMpW~PxpҐ]Nffz*{1Ƒ'hS/1-Zmh&.127bBzuBf)YP5hGM \?Io 6C>`7f* &FLJ@溞À;ux+K69yMl|V\C\9Л K/LTGfa:L1^)kܨN=D8%i-wTU$a̸ zbG.ԄT0R=tCe^;bq{3s܌݆X zC4U̾-.LC+:ifd @s6'MtUJ 'a5Pb,wLrDSSH6._րRmc]kf 'jyJ]Q@(>\Y/~rc fQb Ts,%=[~!EtQw_.k?07-8ԫm%ОT|.] }Jz:ͤYTDި963JMH̍΁ nGM4RBdYaV:B:;WG33oo UtfGb@@oޙ?SzlU oeYK -AzG.78vwY/"23I "r76{ ft,*sd@zN?`AhiTsn0%WQ(kS,iRf2:n7 GƇg ::]6L]w*HWsFzɠ'irB1ǑqX\ԉos^ P%W{ +HH=)PάTWbJIsuϑTLTɀ<Vonqa풚ְx:"9ebAQ6wģ$:֎:mbA@k&d|ftj^ +D8pQힲBJ;I\.3pRqulSO@M=a^AV-<&,`̀*M?0F)}6eFI>O4fM)d6j|0.ACHuX[ JyYjt,q~8.~ +$aǂ •QB]h/f.:W*o<2(!TNBx\0w,4?}VJ7Ь[4<˾Ϩ"XQuܞNXXZQ=|=W{13mрRR֞Zg{I=FO36o^>aҟjk'Z2`*# \_t:=V"CNUc@Ikcbڬ`E\m 5/LX퍝1PC-`9 megs}٘7]vHع7W}DAg2ثķ^۞3w[?/z  ϯ_-Q<8ۢP9ewJ鮶ףic8Z6@!JW8mgsRdˮV[~%1US1giqckb^|B?t B6V .ۣ0úe3CX<gy>h=x" $zCuiy9Եh;%ZQrNe[`ak)5(q 46y(!؀M5"H-JqS՟v~`*ڨoa{]̗~. X+0^:|i{ bQӨŚl$Ok;8)t ?XI|%I2pp(al4'_q[^<ֳ=|4H{d(m۶m۶m۶m۶m^rnRLOt/U_OHx^-b[v9 DTj{DkN.k1aqx.EhZוĂ8PN0a$mSKI%-i8~k9qZ\5ˆD5fYs`Q?qe +*ha/.jhݍQcv̀Zf?rM QʇP1ZQ<gvA91(%UK5'v_4Y[ +yĦG + qa +-lIKؠiC7ˬr2S/B HLBU`3٬ޅݐ_ L$Eq^'hW1)uP?r@훵'ο@wECPk9zSTc/ǧڅV7GK;Hb^ K$Nnc:5>1`_ +{OPeH*}]ە [(/8$gq+HB!OE +Z2Y M!52r+ȳ ԅ6 Y/14O[ j`MuCҎU ſ=K&LS/BT1dotqd6{0U$|zD('"n7lA41j58ٯ/&Tx$˒Lzt8?+1dHhxjl{ѢXOę/-\RWmbg=JpAMiO۬4""K9aZ;sB͂,NO]{=OwՊ$Mˤ:'/ۊ=Ru6 SmeYn=9O:ګ u.dt}-w*" խv) 0_R[ZXI,lSu^B%ƀrΩ-!dcB#F* +Bf!1La; DX)a^h/`=BV!>`B,F,ZmW$ėǁ@:9> w՗5bO +3*Bj?F4LU6m|G{͏+%ruq;J. 5'ggl Y9D1E5,Np~ԗuϙv3EX?c.=xjL ,aҝ 0.zweihKS&gVo,KMMi:,v]DV"_v8~9EeˑX"ov7ZAT֩գ(C1s$JFE "(@,s*Gm84C7Qrt%cx!% +dwy8d<ik7A(;|)l!/S1Ϟ"hU?_Re | +Y4Q-6&VTYZJh Uk*Ʈ`;Djc\^fFq;MǬ0OleQ}TuNDFcX]^͞%'#-?\ ]NJ^$~V UZT .Y>TFfvF((PqpД7ۿs2!,q2H./pRaF~+YdnTq݅ԫSږeaИ[i8yn३ ʌ❡Fŋ.}%1dSW{n8eA_k2XD/>tz'4~cv~,+K9A"\-aי.&{%EׇP@|%iVl rV<%em rҸ[x? n=(m,?O(QPjdWg/mn4೙/x\u#8ły?-@9Mg+x׳68Ļs'Cdx QfN™ C{,;G咡نkⱅ7bvL"j8Q m1fJs9zQdvnp3~_4 +h-y,wOMB[.ߊqTkZ3?+zްhR3GARIfXX":s'} Qtʈi(UԦ6nqVchQrI 8г9&4K=$~L1q"X)\ ů XvCRtbn?זT}%cחy)n_ʅDKX! у(!7C"f!b5ृv 7UƧUB9G#m6Euopn4B^q]@?L[Rpδ(ޛ 65p%|jԪˢ]) :CJkiUJ.S +sl]Ju8o(b!mnM*ƒ'"Wk!_]NhVV)b޴2I)}'w$%nTBeiIڢW'eXEճ!˽aρ4fE73Δn)] "Pi|upD3{8ƫ;= u`yj6(`d`Z% lV1 +ɶ)$͵=rƩ?LUψ$Y=8XY; c.ުb۷ VKkEXdK2Fv6@6]Jv% }ߑd˅) U|Y6)S0:g2 K2.Nn7!nmN*NKRZ,;ɍQG.4O.bGkD)w\p+j$3'T}6H!zPRR)w>v"Qb%DotNJm˨*ۦL 9Z.~g0Ipï)v/ `x1lmB~8KFp "|bxzsJ^M63F~~+i!$Cjn"Eн(ƚvM9RUre9v2:^JSbW8A/1./5 ۣ$8"{\pE_?D9[-L$DxC,5R/M`mRO5kv ޙ ^A"6-'ٌ9AA|xu)4M)R)Q<|Ur_ kY _0yNrO@aUrſ߇TC}kެLr0R-=Fᥩ] ex! +PZ=EηQp,n7,ΦjpgHweQYi4ݐ(|B`2zf/Cl&RqTdo &#J"^9Oel8nLeȿ :yGdQ0Tq޴T2tN'erEW (\^s}(kGNTf%aVd,GE.(Ji+fN[ C“h=2^+}{ XEnW"ʄg06T~ӔE3p_2壼dɝ+f:fw3y9P(n;O; +}m>{IW.ɂG4=#! āD2(v@-saaD E| ,N㓁N*1D 0ϻ2B%OF9OT2Ǽ;_Q_U~f?]˯k3ZcV}1NYˡO?Q² '~\ QЩ#׀;b:A`e0Nb2ˌc^z݂ L&6$]Vrb^*‹ +Y~K\ꎙѕʎӺ}s_~Y12ƑJK8t8 +A oWze(S|XO5s+G>2t6>CSbݬGG7U6ϟ?r'?M?݀05| w+v?p-ײp6x/^癒q[K#"d5n|zgɪXb1^E`&8<k}o#ן=϶}3P%HXi2 +PNSNLv !Upܬ=sp78p# ~+:.8&!-X&E|.ozNVTB4(V~m2V\Z?MGzpXLCM:R8u&|\%P؋Q..&lRRb yt hS$K#L;<ԜN!i4ײL-b4K $px6: ƒEgUrJ2 -+~T! )boE4}eXF + ԒRrf$&eKR?*&Ŋ?0F)KS~r,.FgVk^$2'' 2 +Ky\G0l8/Jwb3@(;o@J6iɻOI .cɒD@lF3 _w=0/{v*ʄݴ@h˲2Lj|)oXLhq S~/DN +g&$#Uvi#r9[t@M^x~M5UIhX/\Jwz[JHښdLś߽+ Z\<*i d`~K]ܕ\L2ܑg+(Q3 E+.HܚoA(&_՞Nbf=;Qi|?'ǣu +bnsunj4Avh9d*]i[R}u("_#[qUXE~wQWH;0"ݒa*b k60?Α݃X[iV\5IO|V:3̬.vƺ}$:@f^4 +]&Ŗx3s1GѦ \ |(BtMUҏBV!0ʞ%;iZ*t`٥z̀Ly@pJpofϠbSGGbUV[—loLw>&%l*sʐ7ͳp2Q<ԍ?8ZfR+AD^2x(k> T*7rxvhu6xCߤ` `wGљ'⣟€ JƱȊ 0hHu +;wqZ3?Y9N st>R[s q͙3.j +GX!->S1Ylcڑ5mKVKq?`'Hs9_C P't& )0N9t ۍ5!j%ɮ\1z]WsU)ÿ:*0n"#2~ +nDWjy6(`;ۣ(z^&q~U[yW/Q)X"%u8-;jgJ!#GPh*F` hKMŋ?$#Ԗ+׋g\_ tJ ZB& ϟLkH(b'D6?n>u2d˺TzO gkm φv/v2MF[AUSִ"1h#5]ǫ 9J(i3eC`beJqvt!5ag{D7|NI"1n0c" "ihpb (WS8zR8;,‹z˅871 +\ka˗Awͧ(<,.!] /)U}G~GLR9c)$vM.V5tLHcBRjȢW <ՐR9#l҇EsB6nmt:+Z.puxq8&a/ԐcNxm=@Kmw\PI ٮ=t>TY;Ԭ"IhKW7,b9.9hx2 d 6KW0i|- +ݳr1#͹quW~ma[U>W0 <~1֩|)tn᥽έٌ *s7MSET: q%tҾ.ꕜ@JT{h N{'(Šlo[4R|A'QR:mwORE M1câ*i Em`U!1&i'|1o1}*պ0*`6#q' %*^xb"?c0g44&}_w4ꙍFgbJs՘CƄNʸ|YQm!l BOWXF|]vMyhD@y!~-<$qKu:u֗C+~ԋTudnL]}*sd6: " OHh7hrDg "?_­$t5 3ײv wo!#.hd$ 1]nX) 5!q11K88> =D0'9xl״jZe9\V32˶qQvr \!xd+t)u(o$jnf/N28?eXs6Y;ƀ#elnC?0`}<\2zS' r/^ȸ} .ySLk+V_LgT~)bAo*WH6Y6bݫH3\].sY~Ѻ郠qQ޸V$| ?o:T(%n͘¶bp߸kPdqk'㥈8 +.mS(w!6EP_q-tO\݀Z gp_|&FB%{ծSNH˻`9fۛ>&}3:Gk?v,kAo"৩8!SRTFqFfNI",Il[eN\`)lE5)ޝ< N7xَ7DV>^enMx^94f 'FWl4=3%1l`0)1N Y"<Ѝ+i)i} oviN0g6O:GadΡU$tV?qb(_oi77L c7N˙KE\?n3zFwZkJ C`R{H^ckIP᪚I՟in3fI1!Q\]5Uw.rUV ,Hxad| +/0,agt- V6(Ƀ#]]4aP)q_Ybܻr@~Cuv!HE6XAc] (0PʻV6sٮڣrXxRpb=IW4xc 'ŧeW~{Bp.d>$mn\&nH ) A-\9KhПdbQSɫG`/tz`WK,9y{ *2^[F[ iIa0T6Gu><[ +Z إ[8҇SNUǬdoL,<0j +17N=P#0f05YVA {k vw%n~Ԁ^RiY0*6mVO h;ԌBe@Lo/q1W*pl:f3;$ 6AM.GtDkMq۰# s$@<;{Q [b pf̦3Qd@l1O׹=RXMRD !T)0#UR b) +KU}J49{‘=,^rA n]uE| +K,eX+0"WV· +@K?LKgeXZdsu :]xv%s>@fdA9Yܫ19]S`'* +UY68x~W"H Oއ]=,)D9bI!oLmOO:pXU" R$VY`s,A+!eزb[O{Z4 |a`ђ䈰8X $l[wN Sl)/="- 8T?誙 +dQvܐ^̼IjTL)̡ys{gE ^]ﺮ.o!v-h: 8JIj}|VUbâo s +maXra uͳox썱A **>04o2uq|m1|YF(6?]U5VOʭ5. ,Pʯ%`nv6“ lߝ[\L$BG$ 9xH +*篖~zL@@LpF7x+ǹIh{jzg`͝E{}=IS:'0J @n61#ӽ44Glm~N=)Kx,3V鮈ߜٲr{oM?mi++JZM64摀z4|)Z\~U5Fj]Q9Bnu'`ъ+zFKx/H}SOF8A:q)7f +ۼG*8=|7~8C}K"iy}P$/_r˄HޅdJ|N@$%IT2q~طP=ܺ(?-^Oh*ϳ;+$a`c36p#}bVl3"Ib &3B$Jl-P{Za`ZR`Ѕi.SKw + $diQsYsjkVB)0[„nKdCkUL{wݣ! P|4_"$ *9D?pIeS)'N TqC-s.<hӶRF<# +ݟ0}%x5]OIJPg^nu &flax\2J { (Pv-BD26>47DsbbT&z7ʇCԽ +|3[gVqRdž!&;&"wb)n23dPOEd%} +ZlD؎1Ȗk𨌡0\OygFW[HЙ9ŖE23!ڦVH.#cY𝈬;Ё9\'&yx,͠4;6x's +X{ ~3HΔ7h\Z8F[l<;?/|BY9Ԟ#K< u)np~C‹!U>Oh^ +U gs6rH$2(>3OϽr6lO +u'̗t#.an䕍k9jDl{r2MoCtHȖǗ.:Β0ߋ<]>e8XŊrU?C2°d'uw"cv`dX:dj,)-kAiaT#[pG nbʊKTPRLÂB1+~OSgVJ}HLXHJףjh.R4ШK=Rs'@o s'E?[~@I%jЛAmnFwJ.-Y +"ƛOd#woaHɲ!k s t|Ji5 +OdsRL?W)Q:^&漯'M1[( ]_9lI|Z.-ؘͧ!u3*_]A5JwvA}7>:>k}ԍg9' +uʾv@%C]/"'򭃅X +sMJp`Oeh0$:Yy[F(.u^sڀǺE.X!2nrv˫οyb>}%C3=ݕhc7_KU;[iʍ1&Vzb2.bs*Rďڴ7jiR<ۻrKV:m ?謕jMC!L 8@|/Hծd3$; Jߍ H*5:!%@GrQR[x)vcai\MAO\s-̞s1/$PQBMv"HCS)y6j[0"L#5E+5KTHg/7M#+ in6#0$CC]iW.W"'L]"|5 AW2VG#ЁFN"c60]ԌiNI&6j#0|b'5d(U&DWr eRg0҆!NmҪPo|~9|:7p}:6"ӈ|Ruy~kGG4̒k 9nD?6*LE;G 3KM0u*<,v5C!{<)MZz_ άD)Od%K\5O 6ɬM7?5eSЫ6<A&; Ɨy [jo h=k[(3r b3cxiў'XYy#JjFęNG̣rxuep0K{]և\C oj =P" Qpze7Xߐ#6L"'EFpƑ 3߭jNɯi&yb[5d`sP^S8%dfnpjJyKY Z6'[\۫\.0 qřL/Q0[nh>o +=48pٷ1$u~̎c!` *z3?x;CoC3=YK^"\%"?ʽ#~pe|1j3[4aZR—( GT:Y9úe-#_O?Up iSdQJDu+Qr;@pTЎ +.!+Œ,]!@a(satDҌv=nW4>tLi2DڿWn:xSƵ+B<r~x8^oNӎDyh8bj&V]bd)c3yc ^`dL6.7GU|P1Dq긜 [ќc름$K褊 =AOc)p5c[_b ji&`d +@V_*!NV7 +s|[ir[V+GWWoqcee7:֧Ge)Q[5Is! ZO_lEG|UǐPJ:݀ξnd~buDwQW,9B8Ee;ˁYwگ&%[Gsǎ1uZ, ܄ ֈ%rǃJ<Ӑi3&z:ȝ׶kʵU p$лر޷AiYSuWS_?nRo_?&i:ok3UiNW9(N\[.w7\0 u@}2T[k%JKՋbHkxPAJ0,_,o^ @[ Y;طJ DuȟѸ{Q0Q7 U܃yx/&A#H2Za"YwNϾ!q2Tw[b/}oOȡ:_+CIߗ.g0y5,@s!{nXNVc +*I!WsEz{)$06v?DļDD!ـ;{I؜2]sͷ`3O$=aZ@ P}zt5|MTbuˠv@z=@)W)"wntނj"𯄡SꉊшUgG+&[\P6Y,}U[6x).vޯHO~1舣oJ +djx\ +}G{m4ܫD[˔%rcw UжRM vC6a]*  ?M`rAc>u9oZ>X@~;ƀ^3ލ!eZEAF(kg4܄R:spǎ)1I&GߢA|dQ/|8ȍ _L2kiSFo~| +/ۅ1O,6 ͣv=SDMSǂ%urqߓL{=ڙ,dnOg2(sbW/G_ YH&w:p !n[X5xͣSiF$lŗ7IF6 `EVớv]v&^8 δFM _sqGwhQCb$ݞfvo:ςoFhgiVڤ2h&˶e_ +^1 | }^f0p^\Rl 48'.0ʄ1''/l%)<&| i)KAjR$zsPK3X3%o v?qgd?'ED!ʯAIT݀ȯulҥAH)HF̙ƚ{ן&2rR7gLx9 W@!;`L{v:ORAZ9=*UZec}͕΂,rLRoj%NGM@m'r{odžralI8WSײӈlz2Bgvk@=<+ u@l|;]}?wM^ZaȝT4}fhBS51$%zzm&/$xT*F `2HQ:Oa?Fy`+3}5aGϫ@gHsJvh V랃HSnb!X?yD:RhX *i9k(Sds4RMߏu>w̷C$J~sIZC0{r«y0^{[efz:r$Fq_ L>>3U9!{Dj B,oDbmdmجmr$rFEs.~a%鸥|U'T/aihH.`ߞ8 PW'|kjJ +8cL̄6?ȪKҲGfB\U*MVsV?Ye5.L&&wbzBâYaze:Wj݆6M.O1.?討@ cnj_{%JhT~ &W(Y_~¿]][t!7+ΥLQ t{ܖAuvS--Ҥ"o jȐC|N 0DLE)FVDPX{&{rth{1A4Y~9Ӛ,[J/Ɣ%4O8 2w^(pm1=} Ǥ1-n3svr Wc*/Sؠ¥鞿Apw5X06Z]}!` +W\{7XW6_86Ql5#m[(X4Y4t?$YMk7&v=t+yAs~i)>˲v1Yc\v~gGS0|62TkFqRhyfUKJ.fz.VLĹ5Z8+,y,?8RpV4=tmFʴ@Ia @+qٯP(4әs9=}Q~mjSO0 yHF鹏!v۷Z&)ǔϑ4gho T;|fȕfYAN"&_R6kZ3aބ +endstream +endobj +599 0 obj +<< /Filter /FlateDecode /Length1 1388 /Length2 6325 /Length3 0 /Length 7213 >> +stream +x}TuXזnF7F:m` "Hww4H+ +"h)AB>9>/2QA >)sCPw?p=VG|L}&O@XLFDZt g{ `lszxNܿFzM;ix ngSFHerevt xP9nnw70_i' C#P PbJ>𷻁 ad{x$bb)Qq 4F?dvw)d?ÍjD(o7cw ȯI!`m= +RvwA5r8ü!f`uyxC@@ +rux{7ACݝ"{8>8 HE1 BT@-FKL~ADEB(?78. 9 ȿ⅁-߸+) GP흜  AuKI07OD?Q + PTu=/ӯ!^>0H*D-0j ߦW*% 5|`'j\W*qO)A~E} 6t((o)< Qq¿{=RiT6օڪ_ȯK#_#~;\rv.f=@.M]g ~_DĈ>=ƵXWݼ*w~$ZTވ;_%muڮgPͽrtX'~(vo3;Vu&gKSyeW},!FJJ=[9NQyj6^2yЈ7UhϙD9 +%t8i.sGwEs;x%ON-e15t6Z3SMR\5g{ͻo"ﭝWVKhCyG{c8mNe(<'\|+*؄-;ӱ:猪Jۣ,?TjNUy/:0ٍq<d]z]4 KĖa:S.WDV09VPbf!ը\lWݰꆆykfMZM#O4wz~y }#./;T|Enո|]^OS2lMj^pr˲䙾ݪ9$oYP~IJSR_SC1F[ŷdG^R>CMTG :t1.,_+)?Z D, =ۉ6YN/*͗~{2QF/~bKUEf&"u 1XR72lh9$Ҥfg>>0I^86 +?Ѵ2z'y8qܘBKaz>IVph;MqqԒH]`-ąfg¶xo!gdz^TaోX|YA +̃+e>oT1-fB.tZyJZ_myI*#^Plj:ȘCg>$ȑ*.mrv Z'1P*Y}Nd z8A1׊!"'!P'd5Soy_5@{SZb}֧3)AMa"&XyEr ٰF&gz7fi0>sCu#eK\9<ďzO;EI?`D ae$A 3&;d;JN$Pmxш[@ 2rOK5#(2'Kp8Bnh&eN ݬcUdO M%&O& Fd1TR 3R[Y1,bjiK]MYt`ȻB;r`im֊,/:F$k2GYk#_/0I !Vv0 ä2o"ck),ND1wB-o^ըDݩ)Lmc˴jͼGBs2[YȖ7?;Ӯ1Tmi8H99OqaRUmD9gҜj`l*"d;z M_Mt*'0EH?qUVx4k,wSjh5uN픲Hp)7#4<y^#"!ah[yTX< cjZϳ\&@+E80U-Ԧ ➩9;Ao[u[Oj0y6uEV#Z +B8iEIL=rusբo$:=?ܖ ^jo\j{J`OzYQ߈cT¸72''ILAC_^;٩C?G2o$ل$&|6&s-F;)v߿+n= ,Ҷhب*|Q m~oAE,$ \7I݌F) +./B">\lWИ<<6 ҲR4!49ZW̦d lغ+nۦCeG方l3{nVN$h/uTx~H酾 +|qd᳞CƘˇICecaʧZ@oc`b)#xRkx]:##c†~b/^I⪴N-{ۀy;@9]š0NSc'Pg7O-X֯&L4ms[h- % 3/NSR%ŘR +}`/bǣ + % 8 vTl^5pRn'^}X:,1X8h&m:eYN$g+IN:'|y V:vCDJhwV +HUtNъtQ?4F yЃї~v`?5gPy˫FI9/ u#lACI'tQlQ 3 +erNL^뭛->fE>9I%}dx}# YLR=sYHb9Ztf:}' [d] /3&'h<tu#ZRBMnTkn6ۭEJiƭhD)u'%6pV1A ҙ`-$JJ%Řag yu13#B?kt~kd +q@ -ݻAٶ]*34θ gӎđ8:de<5"(puzK$y.ɽB;pD8IWvzyl +OK'8%QydHK%Qd |qM,(:)@Z# +7WKlȺ͇qx\$яjWRSE|;^lcBOK?:EV ћyKT .w|Rn1u+HLEQ.d}e\>g N?FT1&S84AɱI|td/s!F,^Y/g2,d.ZS~m0>[GOI*urUЀ8d?0 +endstream +endobj +600 0 obj +<< /Filter /FlateDecode /Length1 1873 /Length2 10572 /Length3 0 /Length 11556 >> +stream +xڥP\֮q'Hpm<o4݂{pA[pgd&̹uv~%DAN$fkDD1798ۀd d%B +_j Gs[3v82Wu1@d`b01r9X@F&ο mFf +BL^U>3([lI_*gC󌉉U,__;蒘)ɎE& 8/C###фL!jc,lkm qrD`b9 A6 Rq ?_`u&6rm.g` 0(jʉs[ ~x1t@f׉  )b,icb K_]\_+B `rNF]_Ghza02osNB(cĜ5A$ ̍1ks+8aOCuПٕ@VGg.AS+ '4w32V0w22X96 ״ +T33s#K##?$de{홃NYYLHml^]vN^p@cX8 Fψ0؁mf@ I7b~#_7b8~#_Ho+{o+ oˑf_o qFB_EF]buDuj藈ѯN믌 [=_"~gKoW'Xb/lfc~K=~g]6/lN?fuob@AF?fm-jn]y>/nX0/}C.i&A>,G#1v*ϖ.4ύ+GH͗MKo7% 9a?B)Mjkd. ȧgRB]< jr,2b,wē?Lη@%@f`ARSܨS2gC_ugv[mD3Sk[Jm^h_p)?@\mfrd#w<*3\kWQ79 AL#IDJ M T-0[$SdigSǶYk-?5b5= KXZM2UՓSjƱ5aj$! o{ton +Ӎt0I܍ʒgfTurs w D/?^%DT!/e0̷̣֣7cha' 1lpCBzGU4; u/3(řǕũhi) jJfRgOv0?HӎCobtc0'4m\ଔybs7O1iqw簈 + M_)E}IX[T:pN-Z~j3agq} +4K1I +|d9ZlӪG_ȩߤ?9wXi@LOM G欷z|1>I!A} B>KJVBuGʚ7ifrzwljW +]rWr;5+#ÜWVv5ZКտG+XQk\1}}?t6EF(D*k9a^B=d:qqXD~W AW[F{p|㈰wy˿!٘p7d;&Ɇ(펗O$GiN/&52Ȝre< ܝGm745%Aݫ93}!\ďEmjGNOg#(o09XMioJ5gm%^>##@]ޖ,_Cx9IE;|r tW|)NuЍm%/[;:@t hmV"g"3t0ֱsO}GB!=sK?"]Q>YCjVF rwQZ_%onxZ]Z$B43w &C޷]5PRAd~P4Y0njlCQI[SaJi] B[8Cv#:"1D&ԫ"MjT*61+֊>a5FJEE)S3n'[J~ZKg(V[PB~vdApZE"M(. Ri?U !:+Y~\e3mDiy$%/4e~_ᒖ8JG,姺[ RaA]'lQK8ζT #~Sխ?cְn`QNf`|tU3c]Ec۸u3;>(Ò_V> = Jd,RRn7#46e>E(;9#v'B7֒ݚ6L|^3Sh:y55V)e'm!_L{ g#ze DzZ|#֞&IGT`E2v|^9t$!7!R{En'bږ;y\&!g4ࠋ:c(e"ڮPZk +dƕ*jIN߇zPXB1 d21 Ttt!6Щ(-q54QO$rw$^NEM9R٢i\&<jP~*2c>&3 QW#Y9'Ł/q%k#"T-  |L\b[rVg +-NOzi?aZ9(`{4cNk-bD /$ZOuLfq(J;MFMA7 qJqED"*{LR)~4Ǵo3bB W+dόO~6}&6HqI +ydܩwEw,%hv\ ^Ք_iFeDoCB4~&i4hʰF4WlLΧXzG !HIeCӌ1< +<[*]i,x{XȟcYfa`<#iM!;:I BX+v0 rZi8`ԠuےJK)%lf:Z]m}t"75=fOVR:;"G +JS7qhP_Z*6$YVw=&7PF]J'`N|80odJvy + d3"e:cHv?XDk)ȼTkuGq}zE4A_A]*f5u)C X(vO(>co1+rtUqݮ)0&JgҊVmܦ8(*A#؉FJqW*t}Ļ qޜԢҚϹbyUeI~%:; +R^C,peˮ ]|Fd +cܔ24kx}F0X}tWZ%A2ORRvd#ǭ/H}D| % +jCWLGu2=x=>Ob-ZW]Qډp ,-s|WzczUOK/㩡K >FzXF9nuyYtdg*Pytʦ\4ɮq!EAJLF{յb˓U*%\GI;s0 Kf}PJbD<^lU4,yb)(P~\v/z>\5f+Rҍ9[ pӮf37i(l7 L6Ubht&F[. b +2 +JHҕ>K0['+oYF/K@ +`څrf7v`Uɳ/n&!6H_PJ0rH)m 5,xwA(:ɏEYEc"wKyNt%#Xvލ +Ӽ+% %="S%k h˜/YU{ul +˳ _м72OYTMk,;*qs;mW̦{#}m vh/NϽw27YtRb( LԟumT`ٜaFHU4JJGg&`f610/^;S bsfbsk#p(C-Z9L,&\3VDg]OnsKWh/_G3n; uVP}>bY1PhygGsz 13AiZeH9in5> + –a!ZPH&oX +Sط9y?`ز5to!RҕVK|xPo{6sTO\y@:#?f9wrWRCwȷؖc3.p|! +>Uq JX<9dׯ^~GFYR[o2oje-kIA@rRۀ79rwtYS}!;}m{4(Ř!dDD4\>1b3]}]i;44RLC38E7 W#Q{l'gL>9ުc ݄m\ҧg5:r{cs&PA*+W`#9@}q]ႜ̉S˒Us8R;\N Vؒ=N3m.+Ճ\shcEH*ttcmQae[ 1.pf&Q'~Byҹ=8FK֏W3H XFR}7pކ4L<|oX0NާW${9fc!zNjKH-.5H%9iP;+S|9nat? "xf`u]zWmښ:˅OdRĀ[2s^|dF +2N!2 )|_Sh'jBcqsd%!o \9 urbA ֆ<6'M!Ǹ?ݖ@!hbyA!²,9;&ot4!${}$t3gŠ[adrQ+]FK!4yJ5wOHONgZ*ֶhzt1[[ݵ.Js$߮Q'`I6_+5q\B9 +vbUju\DgyQ[x:ib_#X⩱:?=S!{.z RBT4q<'0-;k%m'^ 1x vx$%Z@Q"A +2$|w-@'^WQ.֦“vv¡oJ9߂q\]kfv?D  eCF,R6/aady$|ݎws@yc(~:MV@Xr& + 9xʠ,Sp6x@ƾˇw LLo'QAa0mtUX**'_7#5kYqY u\ c~0HZ{#g!cG.%(n3%:OktmtJp3w}ry׌U\Gs^ %6^ACP D0j ophB\1??TτG$#sCa閃oGPYCOUœ~l*`0ԉ-Q4ZXfEۀ?Z]bEC-%r_:f!weO8#q !iueޢ ++R ]jyh<@0<u'U6e_ `gh`N +8`xDTS{*Oݓi|6fq<λ :tc>]( =ocjmalIۥCV|pc\fع?l+T%"%U12ti(h9N GDՊ ++uHF!|xZX,)Di2P,ԹR V*i҅e"0{ +WA٪} +hM$*-u`y'6"sUj{`]xZɹ8KwQIr۔*v@0F: +b)"dwӁmE)-#\'@5}O~lA@8 k`I(PNlqw=)%=[cnJJ2m}eO^iڻG" zj]Ng(f" pR6 hקlzzU醀S ]W}$R& +R67Gnk?8SaXڃuoB7)60f+l4M+g_Ð{k(Af%}b+Gfv鞕5$C̯;y'dW}ư?e>0[#9Ң[ftD7q3?Zh;A# }|ӇPxQ +P+Ye䩺&7l.'\U<2.5qiP <ؤO[úȊJelk ݏƅ質EIZHeӮ@.hM )gꑀ` y!CZ_:čφS;.9f|A'l 2cyhEHq.A+8UXۘZ;} ⊉X|4D:U' nCm6,na`mt yJ +V00"i +?mA|nK]RPO 6 +ΛiK/ .1K"ċb}*7$\qTEdM06ޙߖGc]9tertTmB?1^%P0F!w7% 6~)1S-u=F F8wvRhw ;^s*LRj08\V\@ 闽t%0BMrJvkY z#w5nh`^lWvHaof']C<]&/"©u7WkH$ +LCA2*M)gG*˕JsI&}W*x36 lA*?߶s~"gՉh*AVCz6Ô2V#T*(&/eN*D<~r.me!477 tBR2#$ j#v(h2mW ;k3)q=/0smu)^(*D^:kdH3 oXE %& _'mzY0kf&rf?j9~8IpF)|Zzc\QRTtǿ"Q{cUq)hl~DP8uɣ`r$@/ɶ6)hXz~Bąk?`ޝ?o[Aݻ=8 +p/*.B?'1}D-9IhT_S h<9KSw"ʎ ݂ߦ슰ށ G2> +stream +xtt&]5c'?m۶'ٱ1:m۶=5ZskUQUdD*tB@q{;:&zFn1*`ggZv +7TEUEic +4 );3{5uuo@oq*.&@?{-,m,E{ZX_v@'*`o0mnigw*f.FNLv@gnX EY.ddPt54o.0:xL +àg.@)nd'Ł_jf2;]t`UQ032 YlF#ǿ/db72Y9qW*@ _ #[Kk--\t*ZS6#g+ @_FΖ,T-,MV37WU\錑V4sQt18Yzt g=Hdo ԰4Eaa{7; +3q08X|7 hK៩@صe{`?.X9ÓDb,CVgԚAҰtᄳݼd|6Ee~Rry*P;a_-J [-oΡ ci1:1g%zy- a{nonwJSe5~u^n]0jf* eTtsA#exiLM1'V۴,>&!gBnp'c=UwEP4-k!OwFUu5, +:6Eq C[*̹e贅]Zr<, +dˆ|W~qGikG/'{Z ou}*+x sH 2>OFs *S5q7:=_`-!BI3cEyvy*xHӿE 0D0_dyۛJϘqA^>P4;,܇FU՞rY}qE*[B:3sSo,AZ@2s5H2^Tx~iix9N8fr92r7T.6%$ǟe)VDZ"JS 8I[mç7*=ia +>[7,=׽kt]1>>﷔06:_N"t( +w \"h]!ͭRJKX2.z/ 2lMiJ +805%J}u&hENqNuDŽ"3e#OL2pqKjXv-j9.'FLڏ;^/FsDD+&'>6MZ`hЊ')vuhlQ/XR? VҟLu/33Nw c:Xc-$r 'rD]g!~M&W0m !S.0 A +~*̓5^2eؽ<ՙW͏.w07-q-QiÜA:> VCԹ&^AhDGDsrIJڿwj!.\~}=-"XCVԊm-cݑ].԰1gDlĒ"GQ;]7.s % "ӑI-arr:_Nq]nOõR{+&G̤sYH$ Ck'Miqqa =&.HvgnI8,2\D+G?4/B F}ߨxI'IJB@ +LL8qo/e2NFEf.T )8LIVN A"R07HAQ%U # ) [ȞY1|]$5v-^R0&iFJR%Fݙd +@6&&"7[/%M˗J}׈,GQeLJsTpuV rX}4xH Dv>AOqNܵz0%ohVWZ1MŦiСcs36͏s%}?5FaM!/asXz؇ܷO<&=]W;7v1~t/:0/~]m5oGעThtIMR@N&H^3>A./nWQX2HKuFŜ5=o>K~fX!uoIfTTD^W~9TSC8RJJ5dyS;\Pמ^ezO(jdR1&ldYOHHin{- Wa浐]d8М-2i-ۨڿnJ5Kf /gP{]FaV頩w<4wZv}8ހf:[?.T %)6bIFsK?Zq;=$i"8o~~|Ym7YW띹2p[˂h'%uв6CCChK!mfc*bⰞ@|O[WDX +Ftޯgy8o b1+ZI*$?~PKޒAoBN'c<@lZ !7+o"/":Uʖ' oS!-m:. +32?#!02!*܄8Pn/3тo'9t!!K4~}ݵ:[IU"E`@N/XJDEiAR-룈|0hK:{ v dl #_zȰx څ꘻eb3cQ+y8V@1+{={579=~/ ^q܏8 A\  ASĵÑo&M;׌7Jy~z@죬mR业D3V*ā)qr1*Y}/3D[lBlWib,z@o,c":pE7KzoR #t_Bɕ'a*Q^PBXEiپ;?ώ΢}Zxhgk>7s4Q{O*׹‡qm{(׌FM|XAՙ +ƾ='p:Xraؗ΍hwڟV6l,;x"<[>^. +'cż ۱l)c?c +$Yt(3Mh8P7M`jIvYM +G0/65Q2OS +u5?kW;D CSk~|<DMӽ;gdVCv>n~,8@urE0VpSx\X/9z ~,0|,<{iWځ.@WkǨsFo=X763ȇ%tW:t}z{x+AnpgGk}%a[&s3))Gr<24H=+h3Q=HqkgO +1:/Z''<96H:t fd~,vYo]cLOz=@%̩g\0v|g/,4Wk NQsZb_! `#}b+`gʚ_Sw & UX6nZ3V?"ZOt &{ aE}G?J[PF@ y" 8QPbNstڿ'kQ[dr.%yo4>kk+Bu|~B8yG4_O λXq2}u.JԸDjf!x 3iqCYK(jp]At3J_8p:k'(z =Hc9]Pc:/ub,LfQ1`K9d%yOYhufEY&x_1D !fJ@jSWh=c`5-osN0eb#_]rO57|h+QނGި<# +G22AE˃mʭPUs"XR?#0Aiv0=ZJ@NJ +4n1J 4äj 'C~ZIe`RH +Ibb iLnIR-r}1m*TMb2JKmvȐRO}eIU,,X[LށA!.(ά(vw|x.#خJQynyy4ѮdGk[]H:&V* ?߉T(p!|V7_6SMG'tMԆ&ʓzj"GG'8V~($|edI6D <6ԆyƳD;!ԥpեҙcn>0 %+}h瘌%HeZ+TǾg!Sxn(|OERA,BHn%te s|Ru3, +Ic8i:.t_͐IIr;7cx\2HZCT?&- 'D_5էZǮyPZT/bu0P J.\qDŀ 5-g`Jkf;(axn6C$~.5h罀ނ?,:u9ÀT+r Aڗx8DCz@[xV%vLVNq3֠""YV-?̊lCKU/.CfT@CK犥KLsR4`;c/ZS\#|64R!:~`FuϯcKvd7 i/*%'VۀX`\@17WVBۤȣj$6Od]R~hcE8 ~/zKB7E%.[G$pSp9)<18/_qhZG+LE3*h.o!m*N ruc>z!$npݴvWm5u/.k* Ul\ˉga[\3ݬ<_4ο#X-z?H4k'nꮛLP 5-AθŌ~[ [:aOcM UAgM;5ulR/$A-{~Hqf`>dq.%1xގ@?}{~m,qCr#N`]]xactJ8΢$,B@kL. AZ XѓyN6ԋ&(ɕ4\{Xrq&YuGyU.Z"3#20\*x ,D]Gx^{rZxdR L|甕疨Zmh0 +V\]%Bh8sM1 ޴wzxpыI+5{tWFwEndzh\C4hOYEE9)(>0P3r݄6;blT@9ø|VŭkNmu4v03H -e =J?ч]upU %瞵rXvX1(a*A{̗Q?yĂEHىFU'7Xc}`T-s<8~V}Hen]r)PXhk'^1K9I%Xז{u[3<GFԹMG=K#]jLy[Wq$XJ~]WzDyTwbIAdln\)gJF~"#"nDn1 8QRX SKA1΀S\}n= a #W+Cp';s _8ȧdžAHeHXBGI6Tf7/\΃KAx%-ruk\6h+<Y:롌H^ •s4z(-09> _|-qSY/Ca_PO#Ane==RY+_9Axشhz;5%G/"||}ذO.G|$8s㉨ۮ)<=btW*3uߓh._ ‰2ӷOTS줢I{_ P +$ADžxD9qsh+e=TAVT3490m+_Ձ/=$} P )9DHBSK= %Nybʭ,b3iԔphAwlmz} UXJrj]#~y#hN0`b+7] F ywSq?vWޱSƛYa_JD> )jl+H'Qvhk QIaUd?pW!dyA vð;>% +cMP- E+5d?¨ҬZiS0:F19 {aܰ},fZA8C.VtL5WؒD(s +HIgyV*q{InsmFddJiBK7ۗhp*ô;gVql`&1;=I'쾹.]P +UG($NpƇ'_-qd 第.e7z˹̲6nfe;6SѾHiy-xPmWp!vgO]02AӒ Ǜ;Rz`N3k !a) ⸏;,RP}/6TŚz'M?fqVg7P( LP)Lʗg!TŇѫ@9D;s#(jY/Af.YO?>qT""4Y0oOݖEl* +8A%®s<ߌ)mPS0fu9b@#S~>H*U{NlD +38ri iG~NVψDw`ئނpkʿYB.r2#˗%],.tBV M6c`\1f7C CP۫NՐI^;1rqzs|6n-iZ 1`pL^'[1%^)p_ob'`N`{1LngIiam=S'1[r[ 8Ҳnm5g0bO3N94&~4yNq͘Ul(U"csZyJ6Kp-CAoRMlxt 0yJwngAksIr Ɣ +Ĥ\LX'm~]51dWMV +hq +C[d+b<Ea%}_# AӮ]`Uw5DClj5KQTY⏣(\=h깆eF(O(Hf(kdJlJ,,3,ެ,]wj}|sZBU;MMg $]YHF[&-?n 6Wi~gt~W~1 0 /_[y&E"bg[&ڊM؟ǚ,R!b q"º?li.١kQ-/L|XM +!d|L̋ZOj%hBϞ0EdVz;GB\-Ӵpi0 *ʯ6)'xJTC& H +̬?~x ?޾:‹4WiU!:Ҹ*}߼ &̔+x14aؤ%gֺf 3I3UnYXܴ_0D6QW#CNFq^KTgAۏU`3aq9ꋢpfL07iS5GLk9W +v(s+FVWi ݺ5}qX ASZ(}cEDZuhٯ6Sc;V$ty*oGI[suWUpV^( +EqӮ}widV5IHp3ڣ2Ym`6$NtW9ǧoa^at{~'Zy@N;^CYxRМӰ"ܭ:QB }n33bЁ[`lP Yfx Jb*@2S;3;b7#q'GbWaα0ùn8=$`Fkv 9 Eqx /cmH(<d}v[2W *VBQ7MOc|.]KTB6s"oi^~E[i,'[;iNݾCK!:I($ +:cx%󲠇^"f. Xi$t~"6w +C%,xOhH(n"=5$f@*'A+{WOMߌĺ59lo9 +&A+l7 +Am^N3s67Ϭjނ([1qL3&BQ*6uנ#´iuUȤ\o(' zvV݇ aoՄsȈe섫j:8Y>422@VѸd9/1W^ ӛ]#|RbA3J1q(FO fg})fL'mtM=). +}H`zBda2uj\-1Mx3 ?,me@*=OuYmxf,"p_l +THC3.QV5ṱ# 7g"ͤλۇ]8ؐdǀhy彧5ϵmÏ{ r0wS=El^vM:xޓ(Ƙhyߓ:C' +:CC[!Ӟ &L%3/2NH+6`3tpfcjŲ}'Wv%+pl5>`L4W6v1 Id%\:-{{+#%z&W1Ɏ3ǁ3N|2>nBbjuI,ԉ)(~\\\rRTANOfjy&ǧqy=2CZl==0Wa<01J:[Q 6}tn{=+%#M 5[^ 8~Ӕ~u-a![^ X,%*'=N|q S >_P>FX񂏭JT$P?z(p藦l"z8T0 8ѥcW=yJ,iaɥo_{itxJL,r" +ƿOZ$+yq HiP1E$SOjgAQ'd AWh KYqQ*vfe$N$We0tB\MUM̪dZf.w;cC#pAW#Wȸ9]#2e~SʽdB$|g +FJ Zp\8{jwn3஦HFVR`zv8v.= OF `Tv]3uw“`/t>u-:_kA^;SHŹp4`:3GS?8=cQ+^?;] Vs1*TNeO_;h̏rsZ1,  5/Z"bcLOu7-|V, {ӠSb M1 lA)ƃ CSF#-V;r m +,γ~U$=@@^-0Fs2B` NXNsF"ܘs QVs̤Z˥6QBY␁A۠FƦ>ڙ@L/ +K]aTowJvstú7VBд|}eQt=Ȱ?<$V'E¨@oK2Ω;Xұ۶m۶vضc۶mɜs7?ZzUU?[ԥU >jn\!|JOcSr<oVuiMFDL?9t1ɟ Q:\bLZsۗ}ܜcٶI! + #!hݖH<J]3Rs{˻bF[-:*ĸ+S统Oȶ`܍m]4uO|&QJ%.UORRA%Βw;J F:ʀLCFMH IjD_Kgr%q3q7=ח)cx2v" DÏmL9b(Vz!^ͯ}ٜ +'F:ti ?9dx ";'cJ>kN*v*dKOX ͫ$Y 6U$ ,3b9cŧ W'dŰ_jh tՍᅥXZpj͋ZR*sv&V z-)%i\.m-yܾ>*tw-hZ$Ydz<M> \".D^l=TjKIڐ9$C%Y!\Zn:hވ1P8S֖)IEۦ˚uX%bnTʢxR%uPjj<,bӻ#UYz1 d ~ u#˄9-u[diff9)UפR QIƍ=%oeڎ2~cܷMc3=lda$zh믲TH4{[5Q=5,N^؜(g52Rv2l-N<~m&kPI]ɧvv~qB.ii\mV6eF$zoY#dQI֦̓ڪ#P- M& +(3ņUpM^Se%3Rp>Bz|$.gYDcllm>067UG7 c0ƬpH>Kn75s nbJNʻѵ{V3𾝠~z? ѓ6Fb0RV. YTW0SI}č}} =uc3C %!S-@bS0x.;@AF Z8#x3yp͈8Φ&A]؄F[ZRȲ$^H{_9!8E[#r.vc1FDW}b]k=Z2֜=Gs9{(ykQaf 5Izf÷VO#q1]BlǶvˮ%7bhI#7K L3o~ i%z~l0;ERgm:v_n5줁iF +J2SNVn݄PH =7u vLϥX/F7t$C QyaKbTKuT-4oTc'Td)++BW4~!@\EЯN~`cԒwm2o8 4;v .zb>˥=p+cS3 [tl +HF}&>'N8vyX䌛::-xi J ߹3]ݲH9/p!+*}brgi5xq޲"  +Rbw^xo(EM5y3lY +M gY S]a& w qFߗyx4WQ2WWnZ6ɮE5 JH +ԑ˜u@^)T;BPK<X5Ne&"J9B=bS$g!L 2BKgvf3N{3x7蔓up,"Z1- Ж \ma[o,yV\}}ƟwEa~Mb\@#Iùn=[G._oR^]ۃ_e߿ QyRpVP]h{ {'Re1EM!6$)(G; וtPg専t&_LgKKeݔWtWa,)jPs8RQF?kg뻨*:%(%L%ҹR|2 .yު(i{,լ?cT[ +j=/ڝElLG`nʅg~R66Sú{ '14A"Q훯=(JSdhmU9ɐGzDƗɅƍq#6XjNAJ =^{pEu=k-Z-s$ 41cjqyBQ M.vROnU[Gw-BVJt)FU"nMT%rC/,0Bqa[MY LkMɑ"|w"as5]^;/^lIek; U.<̀#ʯ yVU 6vwuˀ zpdh-f%+NS$Dr3r9b.(yءfl gbª~X5= k"Paǚv-%Zac<^0NTy.#X~W\iH=݉xqT)yVKlf -sMFg֛kRe-j=P -a:.>H^8\ަe;TA]fML? aZsg):1r|yq%ri^xHZҷNsj)3a׃i,_t"B +o oM'geEfG\}OXjEZ[L,b'68aamvg?f_pV3lSH(&Y2w>cTx#ܘsvGwk;VU,.aJuG +LmhO1iE1N/b7y+̉%Pr@*]K=Qj̫6&4Kݴ9G4k4я60Mgs6((#I{+T9Qb.+T>c`O{7@xɲ\&y8$ii#ΝU!*?qG}ZWY$P "ln`!زKRg9"2 JC]j$*9 3^͊eQ໲:fmkly6*gZwb@ + P9HL_#K/P̥P2jebM!.pJ.#rȡ"Eg 6dd%,+ {DO2WkzH |6Avy +Q!l?.p柤Z D9D~𹛟 zDl8'7 0yxfTv*Mx0Pwbqnb_s`Njz,e _CY~ fՖt#Me +;XUd?9#- _1QiDd/@|̓Ƞ dMe?J}P؁z5'i{@HtGTHq،#jsDɽ; }^8y+rWo X0htZ( . JL! tur9FULN0 ݓLݘߞoB>F7]oz}ڛ)Nu{<`O8HL{b}c* پ5'gw*2$^2.4sTbԳ24ӥSj2ӷ6UAp{ThW9| xL qq\@!t FBXLXA 'wmgy@oSNFTo8WKZrՔ2RqTYaJ~iюtzFK{;d۾Ik|Ƽ_q*~_]p~sN9 <ʺ(1: XBUH99oRBFl7_mk{[2l\{>]-_NX}~r Zm낥y=38{ZpߴS6.;cVf05 +*&w$tޚt[>|-lw Pl@:ĭ?>?!.U,gv5&fN\/ K_ͬQ*V_F,P?lY|Dm;c*+P7^  +yAjFIgB1I9ut~`B4xkO[4+co6a]y$MQBl½넙cnS#RA7Үܽ9]Ș ۸q1GRW ^1+۟cDXVYm))R y@?}a{; 2Qyi#SA!lN i C!Q-,+d|#ocWt{;|CY|&β"L%uRH:ujJ{?gnwaN, 4j8МBhԆçnH+OrKU*¿qGKʚ iq<,%pj(x5Ke3Ʉ""l#͸@?t x ['P1+nc)Z?U:0ߙ"Ƅ6G4+O$:GM]L {hRl)ˋ85*T3McF w`fSK@kpDa,{eIt9<?7>𚡹PHg8Yr$bm5@A:y'b)"˭sRYnd}MYo'e{/fCI +n.1d:*w.XsåH[άݺKyȶjhvJm%\Nd^-;{1 +q~+>ܫXr&ƑLf*y8I?ş}{FqZoI`3;nK^5'a$=q:o7@22^p"MDkF}B Y˴^{t=JsK`cѬ 9k-)[Vt*U Wa8JvMuMr`_Ψl*a4̔N1IٚkE +G o +ꞁ ` ^PTK*C 3iNuu?~Cs]}v|)#`;biad':[`'qoBTͼQIZ*ju+ {F@w^(aZPlSla#qŤyMٜ*C\jjˣOԢ4 JGvyøfC$zTиZwz8֕c`p#ie+[+lGĈJw8?KPi {t}X6/ĵGz quMt$ [\@X{4Jo -`X14ND(ZۀpBGlY掚1B Y=FCE8s+w`<4WY=w??h ѧn_4 +N7{4m_46=TCAi+ +L +1V4RgP~.sYSl{[= +\9 wTY$>8jnމP*践N.= n1'@`nfgd~@)W K +g))rCXc(=O,= Jtr "ϭtޖZ.^:WlJw[ - +džoej_#'0zuMF@[qW wg QiʶxBتqX$hh 7p3 +w+%Jxg nWo7?[Rqr( 3yQ.rq&o,ZfLg%c0XD_ ;t!j͑Gi;,N2 64eGnx17śQ7K Q*$X8dj#[/SDRǀr[a3 +9mUpV͏d.ſGX9U_жrme`zKsK|n>q8gBRJbe}Gn/̱6YCs|9FC7RT\B`F!_kA$  Q]j't$))1Xxײ 6xDӋ1^:KpWYsu>IWĬ3Tۿ>o+l|$V7ݮ0ju,1?wfEpCa5q@[ڸd޺5}~Y`+n.VrOL|֍%HnfL! +{U$|6^jSKŽ3<D>t*AԐT4!:cPh("&{fzWjIjζfdNIu;KPD;,WT<6<2h@c:/RPiUZ'Q4V]IDXilňJ#g{  v#~ۂoKc، +LR*M+M^IK3렟7'z;YI5͐6V+zjyDp$|zoH[Wԡwy%ؗ,.ڹUU(:M&j[m< r!g˨~>WAF{ aԖu`2D +@V)D#,mk˪&^v#n"/;I|/6ud#N6ĄWC}ƔƵ.p'LL +zVU-`=_?ROjrCr(_P^K%J(ʘ!VJ%JfTsu#1w/Rg-[z*>"^k}4 bjE-O]s=='ڀR(u,`~˄~)Vp, fVU5炙UY\9ѩWC Itm +0rV@.{ +*;9AOFKG؉2xTE^$:{;llQSl%1uBeҌWMQvH(!e{4 ]%HplOd{˪^Ge:O+Z=*!i{> Q:"P*/`*dyLQsB{Z/I!9qyL-L: \Z[%wOO$K(PKYF^&wC`0~)rYfQVΧoh0(jdgmgѿ5h$MQI.Y;( +}s)wR^:4e"vɭb.kgp  VgEu,KSOo$cAG&xeBX{Ox#nQmpy)E`i9#@;,`ej4Aͨr8\^+% CNcwgyaXGvllQdsd"\p  v(-4\ZŖ/7ٖ\ +ֈl:ܝ&UO'$Cpl添{_C%$^S[uȾwYr8RoR܂VEh?"f[vSYv%LC7P6ƶ4E`7_3Uȃ7V9˄'IDS=rT +I7&2ZmLR*[U&?iOߴA7Y2tuZiY$(G͵)?ԸS^f/tAn%O( I_-B_9gʧB3Cy?`agmLRX~o1Eo:zKi):4fg C?Vͩs#B7$De<`Y(%6Uav!Jw!Kg5j UIdqVZ?[)qư~΀% $z'5G >lNw]\ΥN4tA~Wo@a:$jS ?UFZ澎mAU2*T)ܸ2ZY;֮5sU~^;-9cRsɹՌ=5RKer Zߚ~W~)p'<[VlY<cqxzr:_38;,P[4qkr˻#Sz٪K)1}#[/sRv(Cbkt`%#ٷr[KN=o8tZ_:FٯG +]넂I#v9ŭ[./dX +ϢnC%\\-=sځV+,+YqTیH嗖&X~j'غqֆg=(4=XZˇ3^ +B_x]=ʟL{@s'K]AѶ6@x };.5B3|z3/n)ψrѐl/23'ԚjRח|q8z3b8f14\e/b`YL(-\dTuTLTe%uYmM1up|x0*U +n[ W f6P9PUOV \7#OdP-e՚֮Ԭ#P>bOLpEv9'3Yjp$K֕E4`g9 {.-.$Vm=B +;RH5g厚bԪfHq c C@$StWxv"fpWH%SR!01KOqӵgxJtR1!ј_W׍l҉g4k8?ﺑKlj΂*& <PKwx=ܝ3ABre.cY;̔e*a,8lZ;\Qxj\`1񺙺15yWOFƺ?9d?P;1V>2 !DRI:K!Qʉx p0${zgo @ 'm]t~;ȓswMt7pFl[]&|@]љS'IıY +b}%-Խ}72 툌?%m&10 aÕ"QFN aҹ=fÓ ;iL4L|"c/:<+Y#㷲-Ck%&J<9qS#SUFm:1|92zGRr58Fqy )Ƶ/6H=8.16up},[΃\Cw[CIxRXO%v-$87s /PJeu'CW=H"}l;m:a<Xx +-L63"Bw|xT<U^FnyW:ќ![2fzc| &QpS+YV潜WDoRZRN;SF b7,wv;%9c汄]qF +7`-ZӐ,_>K]s-Nٱ9Ok Aō ;WyK|QD:Q`x9y! UÓYyYӿS;!#Kkp#Fڮ +oÞy+A0k M;2N޶"id"]g1DLxBy)-*O*r:Sm9Ȉ(ouKؖ'Oz!Cݟu&}5qBWNQpH(ÌL)5U'b)=YK'C۔ h{u2N8EnXeTi4tR*KŦʦ$M_VLQ/Lk^E` >= ~}BU܈Ln;$? z-1&<Y +6-LGz®ܾŗJB ]`t +" po>8Ǧmʎ+c/wӇ$k3#-ta*7mLgaC{W}^lZM̠ MȠɬ J4$~ʚ,SH㍑z}(C*jܮ0MU=\\{YxgSu@ ϨZ;@\ ]}6 p<1WNx3M& S%Lf'xp./FgžHmOPCGvrv +*_&>\Eӑ!2!+{czh/g0 MƄg)}w^ +V8Q;h+xA=nZQqijnqӍy~㜙% >i|ý]m!G{S|rAtLhαU(UXe%K4K1A.^RR"%m?_x~_9w4p=װv0VUހ`kU%U4kX#>.*g+zt8e[*F۬6(m(&M to蒙8@٧ތr.%GeXPPH i g[ E'kFgJjR])YVZ1˙b)N4H(y{&}x_J,|5D|%@R#m Oa2=qP9+xԚ|%oDeIv4Fj `Ej Сw6IE\qs dК +v +OmJye5$4+ޙ_#ى1HAkl,Tc8Vy +uB_i:Ef"|M9G>3c͖ac'?8\ k93RU_o KhdB[TxO/4K?_"bWtgW)XRfOgڏt~8t8a_ho=b_Vtn/MN"4EcPGbᥤe+v_ŰZ!~{Z tA{7)ڞ~ !V-EU&Ers+X~{l2sQS^+bdښtz4vGR 2 9`|* K1B漞A?z) ?rGG\ȼaɉrWF䟥YdL^c4劥1E_cӱGLz]-{XφeMT +hǙ%_͡^5lJM -ayG> 3d:֕wQ +s|uѪ2վ`v3zkޣT97.dzBH7O-%Ͻ87Cf$eUÔiQF +%x⬿ښV'L;t*;w ^v,I8#4oSQmfY +AU+gn}ґڀ1t+z)7lݳ^͑Fm"9-pە4i*-cՄ1ߢ%6OQE 6rգU5QoډO!JETK0!F] Ea KhJ-|bӒc6iA +M5& + +/'ABAь=p 5Pi1 0'x /wXq&/l*L/,!%vg/ +ϑ<=$k K/QcD \Riga3XſGDTuWcGR -Onct~ɚ/p㡓qմzAೊ,xzH&!, )~x{i!-ѝ6NiyZ0C>s3kgYV.q?d6,͔d + ",Јȹ +oI_iqU|땻W#tbpG{l`][i2u-gQ 0[oH^3= nX"$k^ z%$pZ0+&A+CwRR~L(SMѐ, ՞EGz;pnn]xKpO͚I)zA)b"le-p 3d]S/8@gu)":O9}#aXH9ŧoJJP<]D.3 +7g=]BS'6S) 3Tgr}n9G~etSa/Ծ_Fxģw&ЖUo:W}6/ׯP +<Ҽ> oݖHOX(+Jh2-Ț|,Nr~Y,@!| }[KL9:&/!vI3/AmiW(0ɭ7bc $M|E$gUxR5_*Tӏq1`[~lPMxR\$g+n[{`?уfvJҎGK/'~nl@yiAy|J- :ԜfbG+p"v)Πpkva<B b*-ZXw:2lC"Pp>=b>RD& U/߇K1 $,,FNޤesIKzT ,'4f,3?[`D5NQT4IҊT?"x9ޥ͛dH+:,1~gmkW W!F_U[UZBIM%x}C%؟`GFt2Fnaa]U?FV2^:|NBV/طNMPid4JV$8 q hV +0ER aq6VlFz^jCGH90i$k"rDž|5Y*PhE{%{4}sp{&&`؍U r' i\dxGgPOԊK-K1c0d5l C6ݍTs?n-o-a %}$?:ϗb^q#cf~> bVP]S"E褎xWRzg`ǒ'aSD,GbB5'̇r1 n?)->$9xHaGCݤinvV=JLPBƤ^c;/F皱)t1GnJ"\:6rwkHk)*(q:e + "xnj( 4\bJuL B}ԅ^=bq"+HHqtvn0?=J)NoAc[,ya|';p 7=I')`'?@LB׊>ayT͌3mZe$ΖͨMl3̵"®}߻4Dnw}Z:1j~<{͌c' $ףB`*RZaP 0קT[[,Y36ŽɓxouTX(q. Oڑ0Um#ʷ KQWWƵƭכrm>'Uf#T Z4vIk- +^G'Dnv&OG fa ACuh._ǗcmzA8uxbv%(NF*P*ʄ?XELF3na@d 0 9q +S+A/{]$trN ȣWc&ipR.)9'R7KvbF &s Á46FcMm'Liw3 `t_@oIsNqzO!|{v*F{~BI=o`gxgU 肼SϡԦ_Cpoax[(jqR%5edIJ_$ YFpO_XUt/?U6 3 ukӧ4N<#f&_IXBTq:Y4gLkxXiӏcB;AYړ^{<|{!  ٖM!fUo/x ^?xJ8;LbФ(]EXf"lj> gxEByt"j7^j~^‡k): ڌ?n)4[K|pQD6yc"] pZ TA.vlG{=wtU_9:XÜW84=enGkA̵+ NkPٶ$3M;Cp Cf6kGeC~W7l4=xsx~Qے#TLO>OT/=|)b+KFL{~_p})E*c&.Q<)ȣ<{7tskA~B]7e&UWER`|>;.pҖ)e_g^x\b?N*9WX jX?(R3:%e^*DLZL\kV}Ӷ &Seޜu*35 ~He4?q 3'!eNPT!1(X/kc !-:2w7~WKĪ}"B6̉#]xMsڵD)c@{Zo|4*rI񚢣N(T^ \6@u,Vx=}TNkVK So +/淉={`/BKqSGף$h)}OoCA2st6Vݤex1f!Q\7ak#dV@S|FPb ņb+g`F׾FZ-d4PP޳f&AŹ۬nKm6pDf5`pЉS3<)kV$7M/B~\nX6ᔵu!~yQJ?%7 A:C'ɃaDHxpLEWKNF{O|Rxn94x wp&[$=߬|`ʹ slN(vG8kvxטЩT-8$k<ߓrBdw0VF?ƄW2x;H +RZX #~u&lhZq={{{Fʾ<0~LђA@Ehd4pܙ&t֖Rl 1S%@˧sdf4N%ty`~A#Ff$}avHv$rWk6C)JY}%OSqЩX#g_Pa޻T- .t1oI(Mο`3b}F'N;Hg%A>2D iRu㠎E |̣O+1m{pQ2E7gգ{ +X\VϴtbTߝj +:l yeIzVdIO +Xcq{Շ rAQOh%zʖ-NAh{C=D9 "dqJJ(S G|?K&b?^G#=6Yynl7w~NڱQj׭NIe0W(ȯ;Yj"fӡR4 rE"FoDP#+(&߬\_Z"cRl7D)$k>2"aM2D,7/id?ZAhxƓ{\v]UBtjIU`%uYW} +zXO^qQOW1a%3|{B!קv8$ 3LLI1#GN\)I'ąJ,XKm\!%rA6H_ eR4CTL+U㙮n;FjfzqZg" (]!5`DlD(v9:GΥtݏCQ73n{i]ŽBבMP%h m4־䤶VxBi]fu׉ ⸞GΌ];!fy 2'am59@ 7j25W"}5MߵWG0S4U\TQn>6f;n}NO$Տf֨횕lvk-K[Ǚ2$\ơ FO쎵hȆ0@X o6+'TcW519h0vx+^;LJ5UXJ=YftY)1'#%J_jp S:&Zkf:{q\8ebFcI,AC#E&'a[Wvhr!-x}]1M W9[ nG4J5mmX; |+3gѪ竽4!!yULparhI%/ϼ[)ʎ|!M~ N‘ANy8svkqd +z@"Z؟A4ֵo*uNP4q)5 x>-=AniMflN'$zdzwo{eRXVΌqJŽ? +; dd :ݬ^~ε,ad==5)Пaڗ IxcrPsz2=Tn|B +ͨ(q3cJ%Z{jpآ%[RP$#P)  ȹnx +ѲB@GL/~ؐ*ޓA$:a|e"l="N P-q@A Ef ?@^A; ˦ k.=15EWtêhu^77Zˎ1-hqt8f|T ,>+[6ew\<丽`.|_F> "6b%NϕN=`;qK^ms"{f")/@Bfh# k`Pش`2ע +i3z-1GuC6*/~' hNC+‘Mko1dedMrG8~K}0mslzr9$ rZCTMvsyP)- Z0] Pnh[r)$xRvVzExYbjb n9u{s̓Imc;tBwP)*%LQ,E_-څP_",*hx>?"1O"QTOnh6 5 rD#v658T LcG@N~ga6^'νo5F63ՁOdj9 M@^n<ԉ^ᱶK/TFS].&59cy  zW,+-KvA;td̯y3D3ƞvԸIݪ`%n5HJ&|ibE4c{ caзNGIGǺQCw5!Ogn Z-.qLIU*XScq8U>-sgD_K)>vȪ:c71Q" Z= xT(c!{"ad8ox*2-|Y_D8تyJKz41-0 dot`i=ܿb"mCb4jFz1HFU`.]pA<41~Ъ,wRl:@ x;Yr/ 2ªˎh9 +S[qJ=L* #k41ox{g#r5sӷփITlmR[>}w5^R}ܦ@ q.Ay>:LTb֐'3V )f*=?  0(#*_Vԟn>O<9G7Vq[V8W0oD*bk*IТx>& C O\ UDm’%DsոdQ$ "Nl[l6jz$K_=$ecB'6fN5U`2|֤'ʂ8/j՝zО + UQٱvt.@57D@`؏r@9F{!8 p7H'GIK +/39Xz4ȳ5WGaYkޡfz5d]q*SDl,_)8#k;~pJ=oIZZK}j663S6>te~\Pu&ʓtΰ`PGY9v\*]fuȬEOx +.S)i^穻)!zm7l(ΪM*'I\*[0.ٱyڜֶiF'#QcJ$fNGY5J%}E;]зЅ[+|\h΀IKM| Į|8"WRvk?js6#=9P+x.Si+j_A-BsU4|!c7GJVߎ:sǗN98~%TOȀ?-|(O[)TWi f]+',eHdߥbg*8y6[IL\X ~368A8^O2̋32I.پB?3jixUzL8%Amet~:K«j@]_JSS\1&7R99rH(m_;C])E^B{!%΍e~[=ZX.vęr?m' `byX2Eץ06  +="/ukY +ʺxSSY6"YvB\FWF:MjCYt[ E.?Hۤ)JFDUո˛&:s;W w̆Dt[_SN4xY4Q9+5@2sUUƸYU_/*w9&i3;N_{0hU3<=-P=Ǽ]h%k0çrSx$bcILG{ mC4P .uW|f&O !h5qC̵*\#tf6__tf1SaopSNCw $|mk]-ڪ?qt#{nXg rj~\{4!w|nL?;###EAOߛpUnjH*BHۤ1I1GT2}"hNi܅;Dy!oa'T-ԣ;('ܳy )$"Њg:* LцMz я׮5uOxaUWĈWr0df[1L_| {(`Eyl۸ +vdˇmb|YꄦE`}{UvmAn_O>x_~Ĭ,؁=b  +)pK'ƴA&LEdAFd&<|FWROLT +MX#Fg92Pl^kxD+mpN57(38NZ6yϘ`!Ef-D=-Qdír!Tyٝ({9(fЫ{Ӈꏸ *Ba +B47c}+mFi:\cBhs CThj]w\oؑ|Og? ^iCP@#ځ$2W eC_v\~x͖wVq\!3\ dhKlnjyH.g!kV-!7sqj} h8F^w1Con0^5yjnږʓG3z8DPP{7R촐c6ike'veVEd )-祖s7Vg% ORH.(V=~x41dFN%=H+\+*~qׅ/'>7=1~6f'Q5g ?o}oC(9ee;ׇec pHFAA%@\q<EZO;FK.Jp hw@H@/Fے!Ġ#j=&aܑCd x7Sr"288{3w=In20P& 㨉Yjn:dRSW8C NY0ELfPa]h;iν=k "&~P}ֆ+ ܀#UGb4D+}M $tK)'D0nSata5fd8_\W うԩcLJjN ހD&dA#0 Z!SL- Ƹ坶{j3](R;YEuqa08;]ܚ㌄<#UUMWL I&qE +C9f! da"8l D:&[iHIf:z3O$qDv;" 7-v MAk@u60#Z=hF |]{ + x|JU蔊:XI၇E +t"}_c}U:6 ʬ8#TfK!Jhj`3Ut>*jiM5TЏKƶzZց7:eeUm4̩x&/*YDHsla\lR@zr.\9=4d˗UV G [ g%lj}vN)Zο&I7yUO cmc]uZ2zɻ6:r!(1sfQpϘx2駥Yo8س lR 2ҏA^doejNꎶ}=(eK xcb<'c;sO- +_R .;fP0Pq`L_رrgG9JS6oZ'eɎ"Sib/B cxmDFɣ (71 k|8N)ee#hU) Mo9m&fɟe?/##mE0;@< +-P`űk(*rDZДvx8_rxWiys}ߊAyZGP>% Rj%wEr.4FKp ounNYrk3])sCNX-MSL|ZQ ~YLHz4 +" w66 +y<s2!\l2Df`oG>[P]=FpR*"9aZ4:h6 6`@-Fi5г⣥g'n{)}'10dϵL1M;חsVMA>/ޙ UqM@Ai:GP}[.-vAԓQk:gN'syT@ vӶA;bne +UpJu҄_|+> Y@kQ2ZXh;aa +"o_ôw6Oו,J)ƢpJN T~zhd]jU]wvBu$K/ů,&/. <滱lY3 Z4IRM_`@ +hhE0w|z7{Ȕ\jßT8(3԰܈,B-+l'Nh>Ԗ׳f-+G^D( ޹%U-\lNʛeCЄ +8s)QSKZH +-9mwSGXI/x5]RvsL"=|uzgvVVsDiI-4 iV'E"?pX7.^iW9P@}*5wQUQ٪}\oB + gQpYm-]7B"5鵁ExbxP:Ln'ȃX /`r6_Z3,UBNU$ 䧉a#ڞ~m9^At!G<(J)jOt<}Oݽ;ps-?w~"Hv|}eO'2@ tp~cR8y9o1Lxrv6]]jz(Q,y;^ +Ƅjߑ}?ʺՄm- DinWNJɷM &͋ߤ-[&xe"tNϠee ]Nu˦,s` 5W.GYBD86Q#ΟeL+59K(-rHXIZ/nQ?TRFj #, p L;щ_N<3lajFqX2>1!mӽ5,}5;Ϯ]:"~aΒTCʅjn +l޳Cl=WV |<\[ב-dPiVI`y.;J1Q!l-Hӱ-Bv@"W];A􇩀6;\y_ZУݗl-Dzymr`fuj i~}]Ԙ2d,V;Χ?)!Sp$j-n V:bۂW1}ܩt8 + +s2}+?K"9wm ?Ro34BVQ+ddɭ}<vJFx7f_/qz栬%(UicPj?Oh_ n1>QMet gy`zLuzqV5F*oa,YzZ+kǪFlٞ`_HH~#oq"Jdi!/r}<,ͩa A{TR}EC,~wHGw)+bВ#p~eJÂT<`~ςjB֊p<+вS;lHұ:DqP ݐEK]"w9h}1{F#b6Y(=IFnҢ&SQ0K'|x"hoc%/օ⋩i(47~C_T@5.Eh㓁8H)a8B5DƮć5v~Y a76F 6DGōKoVʴWot5~e?z̩##/D] ^}<<ǯ,$[*kDiMŽ8…6IZ+{}9 o )V`WlMJӉ=*- +DʾyNJ`"N?s%LLӽ~ArJE`QgNJ(1ǯVD3>7XXK-Ya9gRA5>ˌw0یa6)GFr ķJȄ$Z~h=8g)\dPrc2,<(qk0ay݇<0p&0P1ҤXP%#[_R2yu>+ed`` +"~'j{bn2g2i Pmi3mGП=d.4>">a[{81B"X=l4? +\4/>Z2C[5:"6؂Pz&WAsoHcEU 9]k< C$}DH> {|_,0ǕY2rh s(exj=܊f4EhV{h+wPtWBWeuST+*H3ۃSG e!aG6%i%ӫN }j|.9j$F7]hDT9Hb] fO㎏@T)Dq1jdJ {zkKz G1\?g5+]|N4wMۊ\N?FDz +65*u$JrFBr%R? [M7"6{jmXHO\(oghZBk̸3aXI_̃YGv/CJd< aϳfw i QОS3.ru^IKc[s,vaA +y#bk!'TG?9liQ!I,1vHnSc4ybp]{Yf yTUƗq\h]yxJ $ +[.zYt^ .,5%2j(O欮d;mn=(y ++h#&vrH37do;G6~<:wg5k`6ig38vcf O ЛSa$lLm)%U9L9y'F mw (Rf-eĸdZ'%{OVKB]!'*=!%Β*&hRTZ̨BΗSގ2Vg Ƥb$#(?KfveL6#t2&ɭ0xYt\L֪4箱}nuH1y|judml[%uoH!LjxM&Pv,_ioGUd% d͵Z)e3UpZ$e5}腽Ȧ7=zCل +JLK~<k؇(Rz`{HB&q B!*|n8؝̭n:|1uWHvŮMm;l@;ާ P>ۅ-2 :q"B +pېRVCJSp<@MAoU0[mo[vC ~φƩQn:(hg-ݯmzGžU=ɰi9~IvIA@+=7W{ ;o(h7?4K"*+)Lce%K\T?/G"ɓUmrF-D6<'GL^ +Nঋ5TA50\yN;P6[C9#I>v@ey4 g)w6 W{pS"H٠9/&eE/V N뺂_\lx` y"Q&[ ;/ 4; w MWCY_ArEOжnkC.YE`h(@3IZJ\D +v~ vdSin{Vk"gK=6劣E# +cLY>Uk@8ꣅ!#}NN!Z*nA&$\_ʰ;'T0XEYڢKg2l1h?x\fڕ!-Zu^Rwb_D|;JjurK6_M=;ky̦UP2i_Y;g.:+v,s>n[K,Złc㥆{?K x;70}\=$emVF'#7`mK͹3XrTR6kl^BCT.KA2\>:3Szo^1 _73W~s=TLQl +6ϯ ;/[!B$b%ջG'QQ/يRZ.)HIF$9&_ ~u v#ȥ$ k2ڎ3ֶj |!cvs+;}|b䣲4 &mvVǧ%^wG:D*_SM(NF8˭C!p.IFT2 iSWK*菢UV.;͒icP +v|ݾۚzoբk&x>}ڕ 1Dgלb'P#;*WF6t/[[%̼,;U (Y(픆 (fʳF^?VrW::6ix+-}A)x$"uxz_SH5e6;17xqzFd+#(jUvsRJPu8@PH@ܫA>#9Glߏ٢Մ_Ximc҃^RKF&VXe&bpԮۘ,38#!vSA+R_MͲ)8'R1wAzNW;`tк}_X\Q$o'|Q{a`!R`7"7N~p&Exv2NSd2}v>ub;D +0WJn)UO5B>[n uq`q# g +K 'Ǡ|{DlsvR+J:mE:(e@ 7,,?)`F +ЮkfO۠>8]D܀?@uPS21qap1ǩ2Y` l3 +T 7.2y&:_Ѓ%zC3LmcOUIG݈?uP|Lgbu&O9Q -E~15$bJm\APi>V:݇ezs.'yWs2[:%B?X;>5&TÒDst} 5jZ ,W1F ++r/vX hg j|ɇF!q`8{^`!}od/a$s5dKDA`u[{M;)2|2=H85w5OI2䏧S^C&1cIXEHhPrޑl끠.l@jR$k98q4Zdԟ3zhTy^"MaBDMnHLaZ_X'RDrH#wKCǐܱq)Ȅ 8~'Uk7y|Ҿ@\#ë`mz3Qyg/MJS Ϣ 1&e|Y9%:d@_7im?ZBᶣ`ؒjk1d(Mѻv8,pCm#D^]҅#&fDdp~膃韱G[{O[/S;|%Ni +N|e 3o@Ld Dvb$)PߙKfsɷڲ,4kktj {#SQp$B\7\HyG׈!dVU.FPR4%^1Qyzet(A_cE(nI?tݴCvm{"aGJیGi@XZSpx~sl"I瞮aGy"lً7PYt^҅:<6 IW> XL%&x] 員$(" ~t9A B u kD_k;`%+D ' x`V`Ŋhξl= > +kV7OG 5v[dk9m _@ l>uCi_G3Xk(r2%)3*_ykO& Hg",,!y j@6s$og%W9'Ib9Z4]J4&f{+C"zAdU@! lx==YBj%}6{Qa3x+W}[ ju+pсB{(6ߡBrZf%Ƞʩ1Z 1J?WQ_WYA$tZׄFGbl}~.IiMejsR\Ja4QgĝG1[ksӄjMy+f4CHd)Hd&,.jm(5yW!z/tKpzB~O39Tjƽ8XWJN_'6A I5drV +9Tx~ikSvo4c8?K;Aa_@D>D̺x1|g"g&X$tn-߷yW@FdA-f@gq$ 'Zl;<7ztE/#NmբB*uc +vmXV&;4Ԧң#,%/uym7E d-XmU.Sd v ?ݿ$Ft@F%gL:p_b@0Ym‰h0yi,G aڍ`-z"2= <#lXA8Ԁ]e9) #(S:;e >ޤ 0ZѲ'1{ং4& yuXGs`6\ +pcD t?zOA{i.2Tpv#91ΣUK{X<5zy8n_oKbAݩ}Ÿ5"sTM_"zPLMnT=\So {"_'g.wdD*1PΣҲiNy8@XZEX M1%[md+G4Lsc;*Gz1˒`{ҤmPZѷ\A̿FWmybafpS˽=״b~f葖WB 1ʁʆtGELL{9o +fLP*aH|.!r+L:0]fF +Fz{=;+/8e@a`<\5u+3t UYs_\Ӌq{*Ӗ[MOh%He({D,T9.BV]&}nd׋I^r8P|qHф ϔGM|~8dR~0kg9$xCPL1l(~YU_sH:+6[\eԷzuIn mSќ7UV&¤C7vA50;ZQj I;i959PC~Ii_T&x\ϾF:YmP%N1ZN%UAn:_a*jqiH盕q0Ty{z nbrg":?tSJZҮ~>}B!<(Ul0ʀ=4.T-ul75B+:C[G\L'PK yyR-<ͫ(CtK1P:S xѾXo9b^>% |92SՉt2RcT2K9*y <"L8+9B%b,LQtI_M"g!?дCid;?_s<ȱyk9 WI0=nRY1ՁQ+WH@$0-0&Gލj(pA3jFw"uy4^::S4 0 `w6s%i,nc%[ +ӯVXgJ ƻie,#&ےrpZ䋷Ҳj>-8,4šۄNƴo(p{l` H:śNZFR^Ũ O=[$5))ҹOb9{379wHVH&97]r=%-煇>6+=cO+;\gnվ%Ѳ k]|5:>I)t~:t븝zd5:!w)JOUQ(@j~SNh5" P1䪄o,>/[}dW~L(^:7b)*%Qq[L?FUAK [\`h%Q %'>^&MG?eZ"ܧ40[RG=IڀI +it벳*+,fsf|~BkjTJGCPuO fѱUR[>bAhp9(go]zD/7xi̝^z +d@d` #7CSb`Z|EJqRd^ne,EN@GV0"`&h!+!}MsV:s򗳌w  8n⹯8mAeimGŢFK2ϭҞ/o\L戯e+˥@P"nnW\o#B< y'V"Tӣ7Hscs^$ݹ;CT_ +U2M*K܊杈]ou+!bY$ux_~ѻDi~vr0iP-ꕒW>ݶJfM{r+oվ+zc- 4bЩ2Xp"J] M.1#eD{EK:HYzqE&ܨowӂ ʔvXq}k0S0dA-w Ğ&2[S .jp=&Vb|-W体0. 1-R5aZ%xsq7N+{,ڕM%lTsGe%6L~u”. \D" *XNoۢ~cOuYd;L6sC:f +4oqPܲ&O~'!dn{щ萅OzX2cF^eڛ\V$|NIX^ˇG+BSX3.l7x] +nVC-x^}-pƇ&oL HYaZlH:p1ЩZw+2/yC8*2qe5\TnQ7ֹ=/3.\Ww ~ |z4+[6;.2I NhR%Ղ_rJ>J{h6N)A3hrnʿ}ͻߧ4w̪4j!ވy +R+H]P?vT4G؞kܕRR,:ɔK/bu7Cc +'=$5Wxqt)s('e5Ahn9;}s)%e|OOjȥzu_CվqFv*Di`d|-shC`f<&w'p*-w4G+)̡W{#]g#B([eԾigw&/d7c!`U|,G!7>)s )R.i&OT[JFtZÔ(;TYl}릘/e~|sz+2p$$8 B}Ijέ̄+۷B́l~e"DŮx$z 4lk`e3B^'BH +Γ RG \}p;N2; 5Ðм&~e4" }~]٢ JPhpVm կIr"O?m ad r}kp +endstream +endobj +602 0 obj +<< /D (subsection.2.1) /S /GoTo >> +endobj +603 0 obj +<< /A 721 0 R /Next 567 0 R /Parent 99 0 R /Prev 566 0 R /Title 722 0 R >> +endobj +604 0 obj + +endobj +605 0 obj +<< /D (subsection.2.3) /S /GoTo >> +endobj +606 0 obj + +endobj +607 0 obj +<< /D (section.3) /S /GoTo >> +endobj +608 0 obj +<< /A 723 0 R /Count -2 /First 724 0 R /Last 725 0 R /Next 726 0 R /Parent 6 0 R /Prev 568 0 R /Title 727 0 R >> +endobj +609 0 obj + +endobj +610 0 obj +<< /D (subsection.8.1) /S /GoTo >> +endobj +611 0 obj +<< /A 728 0 R /Next 572 0 R /Parent 102 0 R /Prev 571 0 R /Title 729 0 R >> +endobj +612 0 obj + +endobj +613 0 obj +<< /D (subsection.8.3) /S /GoTo >> +endobj +614 0 obj + +endobj +615 0 obj +<< /D (section.7) /S /GoTo >> +endobj +616 0 obj +<< /A 730 0 R /Count -4 /First 731 0 R /Last 732 0 R /Next 573 0 R /Parent 6 0 R /Prev 726 0 R /Title 733 0 R >> +endobj +617 0 obj + +endobj +618 0 obj +<< /Ascent 686 /CapHeight 704 /CharSet (/equal/uni2209/uni2260) /Descent -177 /Flags 4 /FontBBox [ -400 -243 1032 871 ] /FontFile 734 0 R /FontName /UZJQXT+txmiaX /ItalicAngle 0 /StemV 65 /Type /FontDescriptor /XHeight 450 >> +endobj +619 0 obj +<< /Filter /FlateDecode /Length 844 >> +stream +xmUn:+Et=%@m&֑\d2ps,E61O?7vxE<\ooStscrַWcG?9$ۥ׬Jk +'k3w6ϗmDS7EşxP_9TJq#.RZ]3O(~|?Ox߿ ݝ i|'_tz!dۉֿaG/qyz?yY1fhthx_}t'N9|3.y~=W*! ' +{r%⚀R@2%eByl@W(# ICl@F<~$ <&VVc&jmX@) +04P#3GfN7&|3r1)YH W@X2^DL+Q+X!F!uE*KLuHN W'NY' ؍902aWL9&9(%@{Z誹#{Vص8k8FHj'n!h!CgW)lIE.3z]lcgwEژ> +endobj +622 0 obj +<< /Filter /FlateDecode /Length 590 >> +stream +xڅTj@+fxFoc^`<ͲW[;[z5lA}Y}P)79ve&LFc.#ۮ'&9U P˪hoj a[B򲩎\;v}{Uk[ȶNlH"m9KbXX!t7H;{wm$T.Fy]ȮW'i,9_e Y_΄3&.񰩎z<)O"J@X`b'Gn.Db Lj0r`yfJξA5.* f# @Uhk2&}{HYѝ[*\q<1Do8bSĖt +w;z=H0ωu^N y}3|ѳpS|$mS9 BBu?0sq|s^9o[w_|h[jӊܷTZ_kf5_ +endstream +endobj +623 0 obj +[ 720 566 492 715 719 304 270 678 553 848 714 732 540 732 601 506 578 684 637 923 681 594 618 535 524 431 538 443 336 501 563 307 288 519 292 863 574 494 527 543 388 400 351 570 438 741 515 ] +endobj +624 0 obj +<< /Ascent 441 /CapHeight 660 /CharSet (/R) /Descent 0 /Flags 4 /FontBBox [ -45 -187 2238 794 ] /FontFile 736 0 R /FontName /JESRHI+txsyb /ItalicAngle 0 /StemV 74 /Type /FontDescriptor /XHeight 441 >> +endobj +625 0 obj +<< /Filter /FlateDecode /Length 713 >> +stream +xmTMo0 Wh!$;S 䰵haV:v=$ Gݷ,ڽRھtwg.k6k+[MxrkqnMSi㥲kRb7k6RIp8_ OyQJ逼zo>I١nn#)-F˓k~MshJ_a?tWR͟vu.?)s'|P!^\BWɊW~Pޮg+4UmeݮyJʵXڳMߙC[TO0Z;@ 0@`@#ɑđq-oPȡ(G, Z2H89@F1(դ4NoHÇkSZpqR:[(.tJ rΑ S`{׹Nds\'?'?.&Ww=p3LO5iuԵ숰1lgy`6k g=9R*%> +endobj +628 0 obj +<< /Filter /FlateDecode /Length 558 >> +stream +xmKo0]P'!BHyJ, +  (h+X=7>&޷4s9ꛜf}mM&I+QA8۾ʷԱiI6TF羠1qRDRSvvsWG*4S1]򇚶 9JHeWZΎ, ;e Vy7Dz/j3P]6XYɶk5k +jJybo̶3z +:_bG Rvךбm\UAmϩY+lekdc.0 5Jp!xj!, F!B3őAH _%䋐*MPFGK;Zkfe9[Sc%5a]07:s6xi8pdXpN ̬9;{fl;K9<|qw1B=}/ k=6 _pd {9Ӝ_}pPn:FxM.%\]ը?RDo[B. +endstream +endobj +629 0 obj +[ 808 945 853 853 853 853 853 853 1323 ] +endobj +630 0 obj +<< /Ascent 438 /CapHeight 438 /CharSet (/greater/less/uni22B2) /Descent -9 /Flags 4 /FontBBox [ -342 -214 906 786 ] /FontFile 738 0 R /FontName /IIOJER+NewTXMI /ItalicAngle -15 /StemV 82 /Type /FontDescriptor /XHeight 400 >> +endobj +631 0 obj +<< /Filter /FlateDecode /Length 673 >> +stream +xmTMk0WhFKv 9l[JאqXWoG3Fz&ݯL^G wwwWtYZ]#źmC^R3)M(8oY۾^86팃mƣaL`ePFepnGιVmw'trW5l>;4m=\%=zBkFd,A<Ӻ=tbof< f6?j`[.YfK6og\XeUWsk?|et[[-&o<7uH+ DVK P"B!"0PL3CQ,bHN`I"XN3M4 H?ӛTRHj-`T9 +<2POGE arۏ&:(3Vq%.)BęBxA1*81c4 +XyځaAr.YA K /]NO\~b}/8ArDI4䫄idcfiGXY%V+,E>_?ۘFl!TjoWGNKGv0O/ hշǧzTя^DRzx +endstream +endobj +632 0 obj +[ 418 418 636 441 636 ] +endobj +633 0 obj +<< /CreationDate (D:20250605072812Z00'00') /ModDate (D:20250605072812Z00'00') /Producer (macOS Version 15.4 \(Build 24E248\) Quartz PDFContext) >> +endobj +634 0 obj +[ /ICCBased 739 0 R ] +endobj +635 0 obj +<< /BaseFont /AAAAAC+Calibri-Light /FirstChar 33 /FontDescriptor 740 0 R /LastChar 42 /Subtype /TrueType /ToUnicode 741 0 R /Type /Font /Widths [ 507 507 507 507 507 507 507 507 507 507 ] >> +endobj +636 0 obj +<< /BaseFont /AAAAAE+Calibri-Bold /FirstChar 33 /FontDescriptor 742 0 R /LastChar 44 /Subtype /TrueType /ToUnicode 743 0 R /Type /Font /Widths [ 686 537 503 355 474 488 537 347 226 532 538 246 ] >> +endobj +637 0 obj +<< /BitsPerComponent 8 /ColorSpace 634 0 R /Filter /FlateDecode /Height 214 /Interpolate true /SMask 744 0 R /Subtype /Image /Type /XObject /Width 214 /Length 2977 >> +stream +xq6E7 B)8[𗿜* (l>ЍqTCrۇ}h + + + + + + + + + + + + + + + + + +@u +}|V7&4o?!px7QoohhxhW7IoǀP``1"^' ƈk  01b^,@vuE?y#=/X4XI + b}ǽ!ĈN#,`g,Ĉ-#>LpӂEg?6-Xzfgq=1x$ icZA,Ĉ w҂Eǎ'^Z #)5c5D1`IQ 1"w @I`ˎP 1>(;Wg_`;+pq:cĝaPj:-XRl@uJcĥM7-Xnda"f +ܴ`A Fln:oq70O$4h? zlW,,Ĉ~,#]=f`;)(тA:"c a9,Ҡ\E[ #n +1kX0FlR`*d@X5M7gq` ^ՂE 1ذc6ql iQ,ĈBh67,XAy~+ +Y #nqZZ0F7`gW؂q6נcA7",Ҡh~X #`X/ \ЂElHC}Fg/?]?<O½%#M$1㟦ՓT7VqNæS_ܣKMt`"esHM^+hߔ ?&B1Z: +7A>92Gr |ׇaoUJm3L twG)e@u:*x&dBww E+K@WiX!DӲxUU 4L<ɄEiU^@R5ȄTiJsyQM%6/L&  n+' s*.g@俿æiB~\8~aYH'Jmz{!d%5-jqξA?؟FB4z\ly@ 'v J~pX]Jö׊ AXؾc^jζ{ ¾4n;4%L8G ^铃PŴ +.Ѵ@{^ ۂNu+${ @W)1b`o,S(6{1Bǂ=S4_ O}tFS[ 9#N5+Fx0b+^c:L_a0 51GL0Kt2Tf@U$п4Z&p2b%^Z琥f燵`O``fȉF`!5ဏBE Fl f_ނz&0DTU`G!k1bU{G{&qKaby8Y0p$%01<Q@,d=b)EZ%؛e9 5(dq v2ť4ӽd6x|8lVKgB3{gl9n'YpCGԷ].*JJ= +Y>1Gr.f 9`|#[.3~W=_c>fB@w鸝O° LCE BX=OO .S9T2_wTV㪏BRZޭerɛ=uh>1}u0b/7Pz2e WlyUj9~qd zU3.Bx\|!F\kXHzg'Y37/5_ާ$`{s6f:9.5u51:ُ'61wR^8`[+ L 1n$,x';RJ>9|)ksZXt?NNPp^蚀0I֏BW9){bg􊹛@{Ə+)7!Ĉ_ɶ4t-6U:^vNFlsm{C8+]|@i ?_8[bsGbEGGBҎf2H.R_65뙴 5zuY + |BoH(((((((((((((((((((iOu4 +endstream +endobj +638 0 obj +<< /Author (Yongji Wu\nYichuan Wang) /CreationDate (D:20250609221353Z00'00') /Creator (OmniGraffle 7.24.5) /ModDate (D:20250609221353Z00'00') /Producer (macOS Version 15.0.1 \(Build 24A348\) Quartz PDFContext) /Title (workflow.graffle) >> +endobj +639 0 obj +[ /ICCBased 745 0 R ] +endobj +640 0 obj +[ /ICCBased 746 0 R ] +endobj +641 0 obj +[ /ICCBased 747 0 R ] +endobj +642 0 obj +<< /BaseFont /AAAAAB+HelveticaNeue-Medium /Encoding /MacRomanEncoding /FirstChar 32 /FontDescriptor 748 0 R /LastChar 223 /Subtype /TrueType /Type /Font /Widths [ 278 0 0 0 0 0 0 0 278 278 0 0 0 0 278 0 0 556 556 0 556 556 0 0 0 0 0 0 0 0 0 0 0 667 704 0 722 0 0 759 722 278 0 0 574 0 722 760 667 760 0 648 593 722 0 0 0 0 0 0 0 0 0 0 0 556 0 556 611 556 315 593 574 241 0 0 241 0 574 593 611 0 352 519 333 574 519 778 537 519 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 556 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 556 ] >> +endobj +643 0 obj +<< /BaseFont /AAAAAC+HelveticaNeue /Encoding /MacRomanEncoding /FirstChar 32 /FontDescriptor 749 0 R /LastChar 121 /Subtype /TrueType /Type /Font /Widths [ 278 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 556 556 0 0 0 0 0 0 0 0 0 0 0 0 0 0 648 685 722 704 611 0 759 0 0 0 0 0 0 0 0 648 760 685 648 0 0 0 0 0 0 0 0 0 0 0 0 0 537 593 537 593 537 0 574 556 222 0 0 0 853 556 574 593 0 333 500 315 556 500 0 518 500 ] >> +endobj +644 0 obj +<< /BaseFont /AAAAAD+HelveticaNeue-Bold /Encoding /MacRomanEncoding /FirstChar 32 /FontDescriptor 750 0 R /LastChar 116 /Subtype /TrueType /Type /Font /Widths [ 278 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 556 556 556 556 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 649 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 574 0 0 0 0 0 0 0 0 0 0 611 0 0 0 352 ] >> +endobj +645 0 obj +<< /BitsPerComponent 8 /ColorSpace 640 0 R /Filter /FlateDecode /Height 587 /Intent /Perceptual /Interpolate true /SMask 751 0 R /Subtype /Image /Type /XObject /Width 512 /Length 3955 >> +stream +x  Om7@a 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` +endstream +endobj +646 0 obj +<< /BitsPerComponent 8 /ColorSpace 640 0 R /Filter /FlateDecode /Height 512 /Intent /Perceptual /Interpolate true /SMask 752 0 R /Subtype /Image /Type /XObject /Width 512 /Length 13152 >> +stream +xeey{>ootӇ\i}"!t äTpT&b3L &fLi*jf*sI+Q'3ΈFPLy+gfמ^ZPֻyw^{},?@ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @<,[eSNN{ݲF'sS tZ:  WZ:-.L`O|hB `^j@iWΏm,H;v?767{ĿXOLpA$4{o ~q& ,[FU>ToGG=4toѱZsz X%ࣵaom̾ ack!DBV 8+  %`Z-!J"4Lj'/@ack|;x@*Z`0̾ _!j-Av2| 6:VKC  %`Z-!J"4Lj'/@ack|;x@*Z`0̾ _!j-Av2| 6:VKC  %`Z-!J"4Lj'/@ack|;x@*Z`0̾ _!j-Av2| 6:VKCžCg7mvي5@kB{= } +Lj~ n򪱩Vl5UTUm+TYE^ѣ욹I\kH^dljK-w?3 %Xz[@0&@_,]i+l]JMB@^DbI$ 0y Xp# W5gǷcqh $`~ 0FƮTkm-G)5 ];'0s &Ğ踷/X3V[v$v9_P[@1V,]ɵai!-` W1EҟfoM: +1yW_?&8ƫ&ˬ`| Z"Ǩ<@7M*?jA!/&8濠_Ǩ<@}$?P$xna¬5ۮ bSvp}t3yi,[@0S#dQO/#xAoaBcV t+` : ؾǨ<@rsv6 yK;?Z /zaz 诏QyQ1ߊ0aV te`^.DpcTF`h]8[t3 `!/.| 0un1*#[1 +&`!/.?Oxw ز2: )SzML_-[Ɨ?(xAa +Xvs(W(A'!n+V/S/l@ +侀QHvԀE! vAk}~4굮)"w P~/=;=|owM6܏]tD[O[?\lʴNՀt&\_(#R*/Z:@]8DB~g^u(k/}2H>ʆ| <?O􅽇WENANuV!dҠV)6(]2e_r ΠH[kW,C/e uJ i)ʘ| 4S"m(Tn X1e$YS"mm:+cZf;%\}zB]D FLW,CEuJ  #NY2ہ/QS9ti@.ϩ 0F`\>Fti@Y2ہ/2]E)6ԶAAX1է'uJ zoDꬌi@@":%Ȇ|B<5#L)6:+cZf;%0.b#NALk|B<(#Uti@m$Y2ہ/:%7b"uVb!-rS"mlqʘ| HrtxNX1)6Hʴʘ| d*NA |B<H>=S"m{#&RgeLl^''Tb:%nꬼB0lտ·;{y,_m:rGt%uJ If$:+rf y٩qLM-ݹŕ0`FlH<R u nySձNPVڗ&W«ퟦl:7f  ߛ^b:NtMJ;>^G m4eӹ! ~qEۮ<ޤJ1X" -@@$pW(X&eW!ӔM'& +0 0j"˚v\)KOS6Qt]4)+^6ퟦl:+Q8A]MJ;C i)ʀD.v\HOS6( `&e!ӔMge@l"]MJ;C i)Ja0@@EҎi@iʦ2 j&e!ӔMg% +0 IYizA m4eY5[H@EҎi@iʦC uѤz=D  -@@$IYizA m4eY! hRVq"mH?MtVD uѤz=D Dt]4)+^6ퟦl:+f  hRVq"mH?MtVp.v\HOS6Qt]4)+^6ퟦl:+Q8A]MJ;C i)ʀD.v\HOS6(Ȱ3_?˖[753]{ϿYpKnfd +d;8ѭ/}pb|rpEz1Ӥz=D |o7.KJ93;{ 6%q7䖛'<+7jճ|q#3V5)+Ҍ& });:ߙ|^I / 9}܁?r7Dal'+oIY pGҎ׬͐ii)WJ_۶5Y?Pq#jB.S'_*~DW 7 +ss𙧕Kvv\)KOS6B'ǻw>rZ_p侧>8W3¥/w_K8=_h|$[ϦI޲e|#+Wm8q6)+/i)NI w_/]>ҽ/7eΝ_= +B,q_j0X]5v~d'?$Me0n@Mӛ FwnߦJ{㵩ap*܃wELpuW_. ^wSQ+L50b@F*6=Wwm+=ʮ +]׽_kD$v[^U{ݻ[{_(i4L wGFMw;gH#90CG.vWzHv}D PuV*'t +}zr^^©T>oDJ=է'K zoDb溏zkl͚_[y~v2d=_i$a1B۴;ƭlqN/FGD?_ԗ0x9u#;F5zDMK 9hW'/1>q\>F&W(Z:q9#}:uv&tg}(wblM1a"A+d*~A ꬂM_eyMGnηOֿ`PCaqp_|:`)uǂXvݨWէ'c , oWqvl6!a_Rww(usb4ױWVZ\}D 0tUT{}7NRyF[݁rdrԖ࠾ʂRa~A GY1} m!R7rחiG_ +꾖, ;zSlD/6i-Bݟҗc!u߭{7.dɞφzKx n2]E/6ԶAAUT/=q\\½DfuH%a[zDغb~wV+$WP/6U|Ϳɰ%:5KoO/ %waa}gRO"%Ȇ|:墨2G=Ö,gRw +]0\b~wV+HrtxNBff%}aKtjٴ:qݥ˖󻰰Zg\>Fi@Y*t>>wRG#q]?җd*% 6H> +NaU~[4OxOOظ=^0J@H^r*߬V'1B;ad@dÈuta:ʝ?|ZÇug̥5#L!s„;XUf]5}Q~+ҏ˧؈~ALk„;XUfM?/]yi@.ϩag9Z3^zWG3%TJ=)6HʴJQxL+;Rl7v`8%VXTٶoݴmWhst}D 0zDaVr,irt!`>y&/5qr z}6GH>Ug{ Ljΰ" 4waϿ|ݓxU+E#ݔ\a°ߥůza|go/Y>{!ڜ~A |:;*'?ٙY}H>S6ՐiЯ~u|-f̝K.ڄ+U{j Mhdn-gt bn $oeNA^w#?,&dmBb,m:0-$Ιի>]1_Jv +S%6lz{rI&T!6H> sޯ^Ld5g,Ys׳|]䒬MHj *L˹oعS_Btoޯ^LL䦽{Ͱk8w=ArI&K m|PggfWKRGݶ,L'dmB"mm:0!Ο.Ca9>7Hz%YPHj *LŹKxEޯW1O9q󝕻=rI&!6H>Uhpo+2~ +~}데f4eH;=rI&!6H> +o9>9_sdH9zn\vzK6^6ԶAAUswU_ג?w=H95NHK6^6ԶAAUxs矯/mk)԰``R>$+LHr -`a@rI&."!Puu8Ʈ ݕ^:πȩd/|s >M`Za'dmB"mm:nԷܷssZz%YPHj WV?z|!gB{pZEϟ\ zA *Os#?҃s*z䒬MC m|Pg5Pqȏ9?$ki@m$Y:mVաA\>I.ڄz=D PuVN[FUuh?%1"tVNOf8E_4vqWx_" <2LxL+N]G~u,HcP'jӫWV%}MIR?,],lQ1 cӕִ:V D68] +%o-zoZ,f zH1 s(ȰJ a:Qӏ]C\O= ωŸ [B\Ǧ++ +-2iuƭlqN//=q\\bˑaK.>tP,fۭ'b~/:6]YQhaM3n@dÈuz.~q}u=oz%{~ cӕִ:V D68]懷~k.}ï}Bg=+oc'_U}~?ױʊB kZq"Fkrg^??5T_eWO#?ze75LMO?kMWVZdX[%0t^YT髸H]*c۶zuWBGX{qEE.͠a #NX<V_e!q~zwt >Sق~:gqX{AEE.͠a #NXtvvEBFn}ie ^|X{qEE.͠a #N8yc/ng_ы;\+ +-2l|]FllqPH.j+rݎ>?qB _"F3=6~E__jm^yq:V.(Ȱʢ'0tMלyX_|䵏k\;\ѫ, +-2ltEȆ$ultO6ݣX2rnu1o_:^eQha+rD68]'9|#~?f_ù<{ pZZdp9 Ȇ0+|f=o2qfvR Lݩ\Qhaz=D 0tUB:}M՗<| J.,m2|8# +-2LH"F>55 +{XM!:@Y=S}Og{j][EC  #N_9Q従'GkN"Jb{"@ܿ&fV\ HJJڻ諢"FWx1lq%;?әyxskoGNw`Fmz7B !Ȇ< .}[զ!/hBtveu!" +p'a3Qo-Qhaz=D 0W-'~׋9۾[/0+6\zn覻$.K}wt/vwCOf6E먷.H"m'ˏ4Auc|FAft@ǽ<.# J}G_qY`jw_AmRrн%LnzACǟ/%F~t!It`"mX%tV2t'z=D 0z @t)M"m(WtV2tUPA"mHBz2ct!b"zA ["vV2tԚ8]HVFb=$six*) eRFz=D PjW)#1V@g Rz=D U%A CtYDi@LD-#1V@g 5q^6#0!{H!(6RFb=*zAL_ծYUR=VAzA y HU Y貊!nYUR=$Rkt"m#GX +:K@ϩC =Pl0!{HUi@]X%{Hi@vV2teC 19X%{Hz=D 6FΪ P!98SH{-w?sU AP[BHpήݰuɒ7lnj !Ce=!4]<˷rˍ&9J)&*^qt2O<)mR rW+jt:"Nj@F=p_O}/s+) WfaLH[W(@&@ 7E1Ae:!LTIrvkG~t"2i@FVp-];ڤ&b>"Lz=D *iuVG~t2i@FYuGTt]TTH25R%*uQTS"mH:hJ 2i@FYV!.*tC VgmR ruQTS"mH:܊#?C@EEPNzA S#UꬺM*@n.*tC Vg[q@w躨*ө^6djJZUwI%M@EEPNzA S#Ur+]Ae:!LTI6 躨*ө^6djJZUnő!"Lz=D *iuV&@ 7]Ae:!LTIʭ8C;t]TTH25R%Ϊ;ڤ&"Lz=D *iuVG~t2i@FYuGTt]TTH25R%*uQTS"mH:hJ 2i@FYy|nё] Z]EEPNzA S#U6^<yMuQTS"mH^2w;TK *i)/V̓.85$'۵|*tUdjJe+yH:wYbmEPN}EJ| djJM[ Ńx3KvW4@L5LTIй0J+ Ea?ϫfsAe: )K S#}aaZ,@o15i$L/L4=ή - 1$3L#adV djڴٵX^@+Ţ;Z>V+LadV dj"="/wQy&7-GFzgzV>[@0h< .%n1 S,׳)'_{aOܯr 踛Q^?f-a,?k'+~A.-Fa"Yhgwe nA6ﵓsnt6JZ,׳x +݆NdFa +;yrʛ+X$Yhgr0vx̊b^qQ?fƭ v[EŹ"4?B;k0ⶦ{V䣞>^*LH`mЋAn ;TI@ I_]VT$tb/.皬/F/(m0`[ʺO[@+âC6^Pko9zϳ}Gr 0߱ݠ;c z-d])[@&!HEi}DB`2+3>5=.^܋`䅽m}`0Vq3wnH +a-C|A@ @#bV4s!Z%-C|A@ @#bV4s!Z%-C|A@ @#bV4s!Z%-C|A@ @#bV4s!Z%-C|A@ @#bV4s!Z%-C|A@ @#bV4s!Z%-C|A@ @#bV4s!Z%-C|A@ @#bV4s!Z%-C`5/q[Nf/2F*X:|3A< `@ 2/R-Y _Z$/K`[ 73[Vk{;ث= hُX,Fffkj8~^B0 `w}}?[&MA^e'Y&!0I} }?ld3)@[/=>e+9>!0F=MϬlߏ8W0l"cō;A`j!3n> +stream +x1 Om@a 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0`  +endstream +endobj +648 0 obj +<< /BitsPerComponent 8 /ColorSpace 640 0 R /Filter /FlateDecode /Height 512 /Intent /Perceptual /Interpolate true /SMask 754 0 R /Subtype /Image /Type /XObject /Width 512 /Length 3453 >> +stream +x1 Om@a 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0`  +endstream +endobj +649 0 obj +<< /BitsPerComponent 8 /ColorSpace 640 0 R /Filter /FlateDecode /Height 512 /Intent /Perceptual /Interpolate true /SMask 755 0 R /Subtype /Image /Type /XObject /Width 512 /Length 39513 >> +stream +xx>'=!NHB $D Bwww;R uRF(rko{pJɑ}wfޙG$I@@ @@ @@ @@ @@ @@ @@ @@ @@ @@ @@ c"q8)*S$)HmGIR_R~EW;L$#:KQYRp,i@@ p.4ɧJJ,5jrT{*R.p1XJ,"Z뀚C C+P̐R)R:*$1w&OT}"%0% @Wa +gI;Y&Rt 2(wh,@@&zy{z<-,dky ^4G ] 8n:)9Tϓ}MMIѰI݊4T}X@@ @B5ͻHӥ8:Jޯ5j'D^#2 _K*]K}S8T@JW[@@ p> jA&?v >@_@@ r&TX:fnջ)5!샻 31΀lfn]5ʬ-+:C 8=Zw#,8u<9 Cc?o5L,UoR }!'Eȫscõ>"57 q"$ExI^)gi\N*[+v@@KvBN86pVL-{ [b)6W+ /)aЧs/jW b`X'@@  l0?aG;AjY)Ŵc m|8P$S.an'9W*u~Q@e \Yz*+p^XcRyv:pZtٮ*^\ P D3VֻuΓ +Ą43E4H +S@;Rz`Ax^nI G8,w|McGC@AWu-|$W䅠($+MR.7 + cיU G00YmI]8U:xON`)#$j:=X\jD; xE84"'D~nǹRl nQ<|b&j*zxq"puE ڎ  :<6hlMQ}0v@@ @R]RL),a>`": +EV4-(@R}jApMV6q: v|ø+(nշ`o7swq(NV^X38AU!`&JZҪ\8[VÇ܉Vjs lհ?Wo$znRh̪p ~P!8% g,R}>F`2CJLdè_ ?W՚g U'F +~s96G핳v9 %ƻ< jW%aKH!  +?#`eSw Zw>3??_o]޷}&m]=8ؤp=߻'q΁ʫ(LySm|cc=\?ũ-_96AgPr ve?:͵:+)pZT8p;Ҋ@eUs|5o& R|ǔVkÏ${:Ab(ܿ#+CXV9bR[2޳^ֵK? IQ>҃Z>+OJS"Pj(NT ӹ{v,zۚSة5wӬ*79,o=B@@ @@ naaʦ-loTu\%Eƨەھ"Ŧ{Z J!O*wX)W肣иz62e^t3}e ΂<$J*(_(t`KpȧS-CVnn+x߂r;Lr= +sQ@zQ⪀eГ|w\VaUA^ZQL<,@XK}T=z0R?'1e`lV, znUCxm1"Y m$OVWZ&|Ҟx;K +/D@9pСTXgȔ@"@@-2T3?q'/}9ʇ| n>A-}!&fZRB{qE *<߈GA`D'd715@͢,恜rS=TU'py`1ί#En`ϝ0o +l>aMtgm#'is@ +gXmn%vsF@S7(}5G>Ry2"7~mInGtPjv{O +KcΙgH4Q4_(r6M ꪧ_oGMyfe/*VCO +7 +9knQ#U` +(!ATu;"-ci;m; aQKݜZ"Q:8"U ,cJ_WnCOT ,xg*5T9O2*Ei@Cn*<ςiL#_->Ja­_N +!Fz>f'f@`k] +8/g#\{H)85@uɿpˉns)`Ŏ]nþR҂lS@\ii;Ryc;;Q,pS~OP@5KekeB(w!%{i 1@`y=7&ءtd+AӋC OƌM l!+e4)w' :QIz}wN~˜d~%H[Oa)I C__Ht v_>?AqUpr { Y#&J[a~XJ0⥡šl=)v\6ۛ_?v>,}& eh9$An)@92/Q0p)J鎅ԛ, GJ]6"0_`M|\EtX{ ؈c N XL$ "@{F9:p! p,8Y oNp3.GL6'#NsoSaxvaDM!u@7B1t)p0g!"#?ֺmxt;(龜̱4vx|#/7?[s]Cj;n!;\ ǯrԪ龁#r4GŅ~t8Gq}I=e:wzyxFlճwIs0)v^7 S@czrj$y,ߢN aV| M>lYоoP_eԸׯU )\Ԓ 70 T?WH'ȰFm/pXl\q@i;He|!hRxǗ sٵ* I+D+-'y3~0j7g/!.!˪=#/;GYZ; ֿ1("t܌ů~g!{s _T |_2:{x@z@SoLEo]^|ea1v`%'Ǐs6U( )'6E[| t|U^mW+U:Ph1)yrF|J +^Yuq@@TǞVI.!O⫀}mYOa+yZA +4ƘS@p: yI=6q~ 88v;O 7nMҝ 5Mn Lm2Wa +(\$8y" n%!` "xhLJheӾ+2?aP囜p`릤@ڌ0xSF/'6A@#u^3NV&v ?Hlᲇ ];(hUxn:/ +y(8 z*uzs`WSx^>,8*@u]JXz#^-گ7 Yx³A\`r'56*B{*QuނұBlX"ENhn:pwX$X׭LH')$k"P {gM9?\Y77 +8-ɹN+mFBM%Y2eLWC3@>ǐiYuv"_8֘vu m8}QmX4Y w}HG`B +!vC~A Y)q})w2FAR*0Qo=BRdgUX+ tAo(U=qg# 0kasD +M^ }bx/Q)i$9;XytR +f6a>,`A +vj|t<b?"*HpJgFACTz3Ş*+.&z @ l8]W녫ģE +?Zʇ*fZKgrgv)`O~oOTz85xFDk*(PT^ hB=X7 t)@ p-U{,ӹMs*OLLGDcXa"=->_%v+_qNǝXu6<_Bme 2)+16r!D`ByꝓZ- ׬S'@Wzzս) Ea k$qعsDMOE"8:vǷT^CM:v{et)-AvJ28z=z'WL >A^DŽOA(b'+A`_@ }q +~vF=89[2_8!<1,2:sky,[ +DuT1NڱM¸#N3z2|8 +>m HʉB LXkɳu>8gIwpSyu@J{> 8wh}#yн=M;vQ=h{u>js(Yͺ0Lqs'r0PuA:)k"3>ɟ@Yx}-_$'\ޠ4G*̚鯾q%Ao=<8 KjVAģv Z?g/'=n]+`Ǖw0!٤yִ6P:/87(q۹v:w ^9K@Ѥj˷?2o&-؂ߨi+O^:p>æVحUC:ΏKL $=EdHAtG(`\Ɵje=z +XɂGP.OΎ>g(@41f>|x3J=to$& 7 jʔmD|2Wdt `a{ߠL_~NE E]>a@;G!wbɖϒw=}XgȄk+(mּ/ M"6ՀUӕ_YOR(U|]'Lw +ZDX\rm&uTG9@PCQ]sn41Cq`0~mhRZN5;Lѵ +*B` r}*^!V{x޺5Ci[KGz}َpa2aGq7UO, 撺$>T%s?}ݏ|wy\v QymبXQKLq Sf~9! OLOqסSVSB6n:r]DFl8}Q 7U:w)`c-5orFFuj#\=k<͍"!E>"j'枿VO=M9dW٠eu+`>+r b䣹SM+;Y!<36*d6!m5;X+E2eY4F0ɉb[ebi7v_> _nؕB8jQn iy5ضez$L7"~G;BQz8^񼩒C/0ܽy|IbmZէCt5I-i"$EfYuoL/\n/XZt%|vZC/i!aZJ;|~1sVFòuevɛ)Yw)`#e )R $S88yOp|W\h r4ZՐ*^w3K3Mǿ Ʉ> ǞT_i?u+[|R6taHT?4Mm_j'k)V`w?Jh cퟮըg;cr/a53.aG|b |r\쵷{dNf=}|w_MQ;_}k͌$Cu.y3ʙS '疉>2'/6$`ڐBacvVWz{_aRx8\&#u7RjչL(V^sΝ XlyI$Hk2BUvyڀ/J|t[ 2ճ67sYvpZw/Şʡލ_b? @OLrJ *@h a%Xuj\FvL7 Ưג6qǕDFR7/ilXk"Iϸ2fn3LFQdE8"hH*%4IjmtY"ps;()#VA +) _(89HR}ճv +V >QG"EI㽚:}rډfU7{{iGchQ{/OT!'=M݇Xg/ǃo+Hv[J6f݅rII]1eitUǟfǤǽxVc=Txy2o4zfݒR;zj. s 1Wao,Ia2ϛzcOcҵJ&mOr +^=+7T)NudypOG\L\W)rv"O㥾 `W- ^b"<hS}ϼ@P5D6Nz,$36m O; ǔ29>rscݧco#u R訴~?8&TvS{PzjwmLkNu=uϨx:HhGhYWZ{K7I%;%c +@tSLjJRfjx?mAMjӏ2Hٛr|vd%ODP:x.;.byx1en;T_zl) {Ejѓ0k3.v2+vk<ꘜ;ND~!)̎IPDzZsakv)pAN|MmPK?yws)#mQL3rdҬ}~\]3 g0-YItV +\ Grma#:v lg6Khi*>ms qETR“Ӳ +{q`qvXxW;NhZHܡJX@'$L[3MYn~˄W M n i(iއlG`) +ic#[N=آ[Σ^\g>4QSc1AGp;p*_}S/3_~{H뺔}ZY"_q7ޱ' 0HhqT + +9ߴ͆oNzhv5jgK@4:'k8;GNTcD)_UG_=ٻG=ײExrAϽJU~z_܀-Ҷv]lgPLW*_mGTZ7jYX +Lg[xV\AB>ɓ?Skr/Y`gtFÙu6߾ۤ +dE P=EKNRhxMlI}ϺY>zck'|uO[ս)7Ym5]*5Kk_6IoS^x4o1b?hy^>'T_zG9'ySW?~!4yb'Tӕ}R |e +AfD &k9_˚k룸W5tӑD}Wg~U>)bgLO00t-*u}لYfƦ/ &ٻ +r)%ϑPym'O)Oe| ~}Eǻ.P5Ϛ8g>i ťgs7_!?*.;degΌ+dB\ѳFuݶ#߸б[F["[(+Ĺp"{% =L +{0.mx^|O + .YRi]?B]+?چ046qMݸαsɷ mpw[:s޹տz>uoxZ9h]7w;>9Oo}5y{oQ@/k.GQXxy:]Ŷ|HCDųr9#i]70۔>ϛz_K|`,mQEZ?qSL+&1bN:3 9_uK,~ќ6sg\ew?^s*DȦؚWveejY:9uKZvs1}7JD)ӣcͭ]~=wsO/{oQ$|HMshd9;,qIidxS:zsiO\) pNT{V`' UE}o|C>ƷXLSPޢ8©YRb?%R8:0Umv':ތ ŝ)+B[S\BoSϩ +7C=`^W\j!xQV7*3&PR{FE"uxS ﭟ6=gwfe~~'ڑ*Q_QF≳-6iqrS%J-ȩ o9,e ,ei̬6eM]Vi' !yBۙX*9XX|ɷO*hm>W/?~M;`D-g4FG[ylߕU.ANd>U/(E#zchX8c{>h!m?TFL:ΥR‡LX*Cr/\]u9M--}?#*93zo,Aϒ *9XFYzciWJ-_CD"1e65Gy6v^#}#W1uŌflYIHOHFY>aoVYF 1hCv_g œSs61_a^IG]M×?̎iޛMpg?&4bRh!mdlV<7L>v[?bWYꘜz2}24f]2#zJԨ_QvV{_h s245-ꑗGr͟vt5yLK:ۥ ;2z:0zMu?*Q:X.c674˧ [niS?;S$o4}c{O$2j" FHLd̓Xʇ$V)]2,14+&pITN%2&"C]7||DՓ6G閄xDFfa| V#TI +w_//B50%OggRFNQCW,ӾW~5vx25Xv,yX6uXe(I1m:~&U,u~rW|^*[G;)NPځ&n{3  \{PXSvXr5iV2I's0S S;Db?&a$QY嗏Xf̥qw>4%Ɯ-;VyfJdƢ=[.>7C]Fj52; Mh)VOnYOge~:,޷%j| O:Q߱Ѵ( #&;;$:~F J'DDEφZNYؾq24 SIqSs@Ҹ*q8%ɛz) +k_m`~O0ҳaBO=,!p 1|Ẅ́Sj+jLSEmL@5X`4V);LbLJ%gxJ~o֑XfZO'Q85(TC?"poo:pY +GߠqS6!`9:=m_m$oVc[//IvҀ  +qh[i1 +}\W[gSSIAq!OXKS5N/ I~+Htxf2p_\<7LVo7ݎr'҆LZK%v}N^ +־~Ϩ +K-y_1#-`8f>#lko0,v,jt&)O'oNv9˷(5ENmKCdaNo['JƣvZU={NN{^C?4o>4rBXrB;$yeIBčeͯo֞ESUxX䥉~~UW=̝Gg.;#5iRWZdQ.,ZN׊Yf#X52KbLe$zRUO\X]ިe_PK_+˳?_nzzod<o];9PqVg/PsdC/F/ +v{RN*g_ݱh5Ǧd{x}Ɖiy=u`Y}^;ϚbauƟvzp{- ovhR]^Mqfqsp";c+Sf~!/A8/G +Qd؅O[Rb䰥20HBՅ5 SU?mo+ּת|_X0LyA%\s̠?%VeY4N;|8gPqZa/e3$2/8-/w75=GdQܴ~<}}M;TzVUtf:DW-RT39;+C̷?eHQd._>o]1K)V{`GbT X h-{3oڞ(y{?s›^dȰUNsrUmo$;9GS!y;om5Ң=# _\ R|N-G~Lp9c/MV[JT9ײqM>r7sTdS/tVwiLW 'fR}A_v78eǖeF\cs~[{{l&y8D5@UGu_&`%iFTwX-;Lc*=>)YKW*hJ/_WVf+ 'ᇧ%w3lKů5`TS?k/Z@aȗ=c ?دvpw/5ʖfGNR{_6SK-e)\Kh Ui%}8jYV.Áփl}a~@dRLfnGLbxڿ8"\ "*yx#̦6|,YNSli& WC,B;yB3음*A~ǯ1-4Y#f_0'F[ y;|$`Hj2My 9%{k<^W*i8xQ,۳߮a?;bѷaxbM+{^ 6_nf4r)k(m @?H9Ko4GJKR9JlѲqpw؈3ͤ+K/lX'a݂O}71uk>!1}9QèES^РC^=>arGx apن#A + ) Akv- \ Jv`7>=c'/7콧Gy\b6[gf6?e/LN|>`oWBrJhXxZz =[_Rw}kȷ-t9l~˷I[ kdAkҼ]xJ~dXC>T4e 9b ݱ"/_yzLh練A;4hOF깙<$+57v/i;<%UeloĪA:Ei4ĎC<{,r Na!bE[$w~Lվz73(SA7?!$t&BKf?|N};|ܢ gY} +M剋#^BO5͹ AngK »ȿdx#ol.V05I !0 0S轈 \gߐ Gl;|y;(o_DǠ ?n+7GS$s̽$#H Dn P<t[OP/.G( kXcA(+ y|)tt{H"=|XI¨YM7^i3S6q]W@3 ~J=6| n(Gur[M`N)#DAGM[ɜ#;`׵*҃ +v՛>v_f͖32Z8^$}h˙(VW,Ow1&zd,3]+ uӗ}){jguw6|$n&ŶdzBiqV?kO_ױ"'yCc}'.HXq%ϯkXCw${/wyjNCNn(NW:YBԢtֽ!0vI2%uF xJȥJD;9pU4x"2cU)}y_=ʛ`꫄ghMY4z~{Kn:UK߀j4z4}nNd_ IEjGFsJS`}\Ngw;5hڭ1G{򇁼O[0)rΆ/h_Szt, +@j^hM @=^GO3ۑ0x8nwvE֯kHtS"^E|~Ė<0g_S^4I%)HjqTd`ȷZ gXr5) Iu<>JSC}E ((E9|Cwm߭^<ܼ8VO!<㻷*RRyz8iu8i9d USsMO_.o3VTJeN{HBd*C*EA J# +MQc,dӰ>f#΋xF Bijԯa.QOXn׸Bz<S@#gfFlҏ]\4|v7 .* ,e u2rOJNUQl6O}^“.*K{;ͣD"8S&=k6PWLF72T}h7'AXS.*w&SN`O$"Ь2 ևGlw^h2:eWHe*ּ4gcZ! NG!XnO4#=$4MyD4m2 +H[Bg!p}rtz|eˬ]V +5SL9σt#E N`%򼯸G 'Yq걉M=e _R]|xN^X"?'~ф#.$ٺ9qw?6 +Qy]$sroh$x"6 9[t/,D^k!u +F^4_uomg9,榕JW_s2*462ob>M#ojFJh; ;|lͬh^nnZ~t=$]=@;GК"֢t41ᏺOz;-"] nͺlO0!0"jٛ)8wҋIr%?gh3ZƲ SE ?{.g&=C_\'2G)%M]O|}i'L N!+Sܬ%nTW>/ۙ͹cҗov";C%[{I_hSMB%QQv>#=:͎!W lqOPE^c=B8vڤ]$iۗ|޺/W9(gVSt' uC轰Sio'h݀!w!qTムaK}f]~LWm.qL_>[V>ef9A|uno٪ y3{.d?MݧO8זg]dCM"C0L"2{2 MΈޫ]ܵ^O-%Y>'-F;^Ͱ^rˌZvL|& Rgw7ֲ;8qeX\A5Sȁ. 2))#:o@}7"|r/?J k{W(3CYxF4juk7}F}ICh񩅽r{ NHI.VxU)sDƈVJhuJ~&6Z_ko?dcLfwIFiu6762T҄n$;3R.p;:EV/d>=&;rwx⺞qҢݱG ^2\+4!fAX<|N|Χ᧭ײ.& (`BdRE/%H;^C5e g]}{<%3Q+X7+{vrfohtwoZu$:ox2&i"έw%\|꿦NB/oU +H]yVYۈr6দji9֮X}{i=G,uoM=}« I,;"_VGusP(ʰ {ᖅ] ǢzצוUA5}R >VY2Ibl݂q z;Sim83?r4nJUG@#+kb\C1;X~M6blljC3r|nyȜNQ_BJwaYrۄ2YV[OJ<;|U7kχwiB]N _n\_ʇb@9*b (q4kɥ,Xu-WZi-,*;a\Glv ٛh&;T2,I8$EOHCĶ>ÆnZs+w5i\m+nRlmǘLW~:wKv}obfk _yxEEPx].ZY3? "3$7+?z,{hC#^ydlvyh󶾡MUCBrݐn9 3MB!"u +zd M*U&?r\SW}WD|| 8~ŀ +(C"` LX"`0mjImBocwr?_oCrSvDf>Z< +'܋0.S{±m+#:C + zrRRp-}m{nxD>PBVf_팕| = ˷<8ٶfFz6k<!:ÛO7~YoXO?1O[bLE9b Ԃ#]M;hD{/r/?f+~<,!J8yGm)IobL~\C/>m 6Mp3t{ܧ[vó[|;dY|ÕE`[>/޵|3Ǩ|k'Ϲ>, ~DiJ_t3Q$ƥnրa5ܽu]enS<^_nQMdkx*zx}I-+% #/{}7;!vՀmXxAa_uxi} dVKP䩼kޣu4͂}Jg~2GO~l:ۿJue>+9|AVWσPϫ{{/eC%*MFrf{!TJxhrwsoG5ъ*8NWcweF=&76U0@>}[VT_.8>z~;uLኧ\@n)$ƉM_H m\l3>Y~L(n2^ &uiܲ'd+owM}5ܢsz)\7knJJ\20=6KP0Lܴ+.,RѾdݮ ?e#,H +q$YhҞZ޷*v~µ_ aЋGMor{lC88DrKv]|D-F$+hWK4-9Fx4(`l&*j&fU+;}C/r;+헯m|1\>;u @}_&oَ.:o/UxrkOk_eM!)h=4Mܒ!Qі-?@R;n0ؒΖ!' \x X\zvSxgW v_mWmt}zk/7˚^sGx}i#/;&UQWe5zlԟ,K7B{0>%󡍯_eҝ{j[].JC!Z6Z&$k;ku׻9eɜaꉓk [Jo1e>U]]Qv%X|BDM\Vnwúq/97 f-_HU+ Z_/A7'3)fy*j0AwEȫ$-/|XӆMT' .Q1'l/h$5gܠ][1 %w1 >ƿn65i"-[Rg(Cj]~ݢf͟9&Irmo9ntwºx~.X&Tcl4C$#ʇg`H`BrxVvr0Y1a~9kt;$6y_+>1ZOW5w/k@umW$.ח[ְZ ¤6\"FgO,iTPe qpz?8,YL{_s59 Tٝ-nN3J/W)DFR2o#_f1NGpo`~y4e>2:a&k֦Zz& E81n/@QꟈM\famEHCTY:T`0.eqyyAjbeb/? ~ZAp:Bx88h׳M8eO78nt 2h&T l[֩$GJ G!j]tgu:Ζ,z@Љ)6CݠrFgsh4}*V2JboFP)9lu !>L5-ȶtS|L Y@zm]TG r,b%PSEȤNS7bV*yVl?F +KUKWJh* )ΚL5n\*q@ % qYN ?Tl"|':jW' e.07S |aS`5Ts9b囩DPXB;D >r U۷ز0M)]3_c(q]Tl`֤ھ]ڇ<7v4.`E9A6ěN@NBmQvnZ]B~ުOs@m[TwD9! t*}BZtpz樬k=tn4a̶>׾,} mz\yXh:}0J*=Yj|0*hw &n~/MX=~e@Ӽ` u`: B +[G<AY8Fh-{-JƖ}5K%'ro Ӗ-6n#_a1݈+->f5gUBV] +rs W}X H&Rh2$E]HE U^0M z yr9#p ?{mh8c4( ~VLOp}LMB!\esHmM,!ʨ'!_ҍyO'M1ホw/U1A]M:h3`]^J.$nش 9-e nItc;OKp〖uoDGc [ FnGR]Q8`-_p9W;^?.MAqޱb-0]Ɛ&dg[Y#9x^HEKCꪀh?,)'f|F7dBcY$6.D ͈WUGM)U'HTfo*^3U*٣JTe;|A9ͷ}]u$U,]Ϟx鷯)jÊ$95)=YG +Ҭ]U[ʝ(! +=/r-As;n]1iVDQ< oճul+ּ, ۰4`6P3?Ly:qQϭztIRP3(p;xj-՛>/\Q;[Ǔ˩RO{>O +OKЦ׼T06?OdVCD}TW<," Z+꽱ٗMכޡmxh41ݺ.xQ`7s;쏊>~F4E(Mg @Oh:dS6eT3 ˿TTұ)C,0e9:$?Jes}zMl1 V<OD3l1_>t5+||DۉF/ɧXiC ZMP٪splkx"MCńz*nzpcs+gS_j؏$ٻUsjS;s`F"3n1)@m" fSn|cCģkVh*/y؂bV8j`9Z.3-wv3k!>1VS’/Qw~AM’;D +4-x^ujAxn_~̬-@a=w/>HHmyH5TYl+~W) VY@\b>4Y +l(2nZwUXv`$} m(Rcm¢YuS(,dMDZ;d"rJ\iAdsUص`hLa%lVB/B]Imi%1 Yu{Hc_dvcR{3n*}dJmIex :"GzOF@þՎ$(zO!5L]vT/FF^UIl攙Q[H `pS][pt>1e8#̍@,`>vS)k#ƒE6u ۢUNK#D*?-gLh&vŽ@轌Ŋl" +u +%vX9ö\.`9\m(C71u[e؟gbE&:VğR^>`,~f5{g48`[9rڰ +f i <=:wdwqg -û 7혐m iS%gnOpޅ=~S`4qJ4gL i2a+Eg|AvQ_3p4c}ga"E8DfJѻr:N<.a ~b*tRqs]zk#pـġXE.=f{MApJ%L2@*TxfF\Fn+Xti=<_vD&aѢdlgIeSX9cBA^q"&O4 "\x\ghv8B3JD8CM}Ȇ8 T(<{3VUdSLkAk@*be뢡o'Q7{˲ d¶z-J#^ T^2! C94b-KH)eD 4,?$')o%n/`WJ KEWD ZADAjU$3dxP?KO2kbX B# ʨ+"L*,.P"r:&0GL[\?y7čJox0|9 U!CP)3bB\xy??e}F_zbmL\5c*I;2U_b3w xS4Zm["XNr#`6V1Y|)mmnl7=^G ._͔^߫@2g|:,*[@8NX +(|ܯ٧]FᦑA H2|ԎߑU.$:%pl@UHaY +t͐`L)Ab>;7(0Mm**1 MIV~ʈ6=l0,>IQ{IEkj8/]QCՍV(/Ca7۶%^"ʗ(m?_64P߄SJ8_@ ,yk4 J=\p.dEiԏH?A7C#N^z-2B^5(X0_PJ~F%dA\ϣHuڿh}& џ +m!p;>p4#7lk q* Q2@t98[>dݔ$?z:tkzoA3#E +%g 5:! @si]WA^B"9-{O?̥BX`$UyJ tp#Pc m󦏮J!`6-?%<`CySL!nIt`DX,(2c2|h2EA ?^2LĜQ+Q# +WNcөq8{ Qa*!Ϟ9Wm{[_QJ+@\i}0.e~ 5ҡ=2 u6߉0"ƕ{Rpj=p1̂5noRy_[7J[x٧vuթro|_͢2c}Br+<An vQ̩!̦A@CD!-)㺌_D./']5z~}K$U?og.-Y/[q` +'L?0Gg,,+g ҽJ('(!"w>D>&5'ީ9E1r~sV=eES( + @{&-AwrR"ǟ>j!pcK&r[#uɺ;}UP85b2+ vYCA@d`t%:Ǝ֞2mV 4&%aڃ[>2VeAZlNӆ(T`^p6g7[ C5g0 `?90sG[mNexO)XP[BX<_7qOnƈ<9$Cfk|3w-Cn. +ڟu): SDJX Nj?9ʏN< ,1 +=#*{s7(uSݕpGrEsэ@qyZY_FsZngPWYIj#iRi8uiCWz2T>l35ae*+]pK7jΰvloiף/5xbS?oLiPsH>:XLLh._l./y$5"vLYߡ͐ÇBLnoz$y(C?@8[Z㽔T̖D~=KPEIvHt2HB LDQ3v +0˧ϭ8?N }5 +4\'wi@gjiCe_w\Ɯ\ϛ5.BAQ%43] la10h*`ƓuWݿSjYmոx螻u$ +eF%b\ڏZW?ѣn_4@x("`Jj򛟓G]d߻DxrEK)w1 >%sRd+9s"h@Ѷ q\$ERG۾96ϗk?LUՔeL'g1kQYzЃR"` #jmD9ۤ,38ޜvӽL̟JB|43KL&09?%8 n8Ҽ:,+u93g]E>(\OFRqUQS#oͭ_+G3\i:@^"e'⛞m:prD+N7ڿseƒ绲"@pj)acA8[6"&]?sM:S*Rf axA`{kP`o+^#xY*iL-VsR dD8s2aN_j֐Sn7T,QBθ|S&!yM)CY0s֙<~CZzN¶iMDDq)\r&JД??vQq )uOU0M 5o4|mI8K)=fcKOl$y&Ƀ;u ]SdȢ"@Y`9GV)b9g959mwPv.΁Cn5YI ΋f&xӓdy-珀 QlN`[qt\5NAH:rsxB̉EMSNκ[gN[|%U?lB$+X]Y^vV,*Jz^y,Z|#'}6~4$y|zu?x 35͙u!uڢJ)#ҩI,ol +&G1 + ޠ_XEb" FeBd;c|%=Arty?OA5հMxD_΀N-6[(%*xY#ܕ p 57_'A޲"qcRaINUYj%÷w/,OrCqgF7a@Ԃ\Z#w1O?mܹU,(,WP^f>KծnEzKw|/#`@.0Qr[c,.jI S;KG@XVMK]l2@#~ }h~׏`" J@/%]=y#:V^/0 `z3+##seJAbjhayΨp";} S׀@4{w2v+A6R` 5@tHA; ʺmP)HSXSr?$6E% A,/-eregx.~8jHtzL>(k5 +a{.\g9QUgdF3#A" +h+MgEU,|`MF~sVyΊB G(O ̑)5g I ` v/{ÝRJhA_oO9!CMċ7~Ӻ}:jf˩i|X7jN1)|;ݾt<kF 1f;iӖMXnr~ y?yȸNh2ڍ=s08Nc2հlVX{m.Cت55ܳ\A$`EeNp~#sg"~{NeNrbfmO5?'=2HPB$H(]íԅ5B%)r `q3-|[[`EFH!9UP5OhlZy|!95GLƿ٦jKd{ql=2fB@rkMJ'S*q0 6g85KrPX 7/•-\vኺ0g!\/b{3p9pkZ'A?WǣHؘFawAPI 8z'dvAx-r%n &W/}sA5\9Hq^q\\;U2b5xt "/K<2]"=-\/Pe4B@3ضYw?=-ӕsKtPhWL5`㠒ӟ +?HEf7 z_3bW[Y'H]^;{lb0bՇ#*q^D@g֮ ]r-kykNVOR*KG=V0j~WrPf3h;AxDrw-_!cU{F-PUH)km!Fm5c֜xgnX^T RP(B2T0Ww  UCV 5x/ݭiD평 v H +T+1y걢ep.V*|%חt;>pj;֥\OoӚgleP+Flvy0 XИv`~Pvrv c]Չje'(ISmG0"B?XD<Ȅ%Wt%ҖG3T>)Cƪ[d)忇`Ӂ*Ŷ'hAYOu&TXxSD-`#|u.G֜|ϕ^U- ҈E{WRI@&^oxxl՟9* +BxckBu%(,_kFOP{EzfP9{:Pb]!p;_dJu?{iQ5ՎәxUAO/Ks!(P 0Ň/{(>32c2{ǘڒUŇ%^K,D}(ooar\*8tE+B)UR?c/ F #Y&M]ő"9`BLRR!)`;0.RLt>:ٛ#V5|\>ޮ+VnF)CS@c_ߵ}`)_s$uLX\mG*=Q?!0uv#PCHc!%e-mBaY8Ҵb ba,ڒEa'3?:{HNZ]#̺˺(#/U "KR7."fS G<9LNT1&-wD8PDؓ4O?Sxy DDZQ|MR^_~ǰ{-Cg޳:{:TrpX{8OƱ w!Hi NrW] +c1( ;:"6nx%@S*xEݫ1|9' t]+7>KB^*dJI]ĢMS +WNぢ^=rb_)PG*CDqgjY_ʟ/I 6 vL*^N%JO iԼcϋ6hyb9 F0eCa:FKPfS ~W{5ɿ.D'#rk Q# +!=ѓuxqbO;{63VʘhNQBe1M{4B٧7E@ #˰_҃kB6' +4F>}y-uSrX^z*MaPc#gBe!p>07j0%Їmf +wˑ8Ec"oXCB*7p/7|It?(hLշ?K0%OwhQv?7O=,@CJG{7+RjF8!JxXeCAtm]c-DUuާvXy֭ +-wr-8]y]Mg?_ku +piWf+?Rn +2kjNw+Xʀ\DU=r3 -Wk6W{suA:3}l[QMZjЂ#pt;tCcRi-X +_o,&e{'`L.[ɇלx'@F/Dav8!DJ^-K|#m{n 2pX.aX$weU3/$0]5ٍ E\#M `T8]n" #`-4*896]f{K]*8#sɐӴެMӣ{6i?0 ^tv L=uY{#! +kw<֟!#le}wJICWG25&U VE ؔ 9 DU>4!0DBԫoK _W:#Bg:t)1Zu% B` +u)wƦ !@A7J'"'9L!@,@oT.!@C_v/ߦ*a:tB!ڥ7" !@?-)asB R=.6 !@xC"=Xok!@|S?aa!O&sB "ps]n'k#EB  FSiad饉!@^ȏC?ۑ嵂t BeLW uB #D{?>.^7J!hͅd黁!@>$kw + bQ9L! ڕ7& Ս!@F6!T)di!@!@['BB)9Ls_wWտP=B iZ3+%"B`1XQke +~:}B &/ h~4S(!@6X?&6 Bǝvai~hQJJ39Q_T+KBB B C*74] ƿPjB ?!EU$. B@ gT2~ !@b&,Bx!}Ķ"=GXz"heI+9B C"({`MGO!(p Q"diu yB $BrqUD)QQ6!@H@KJ؛2 FwKc"BЇZD 2$B _gX5YV42eKF7ĜKy+Ѣ8^)m+xɅg;_4nSLB B B B B B B B B B B B B B B B B B B B B B B B@28 +endstream +endobj +650 0 obj +<< /Ascent 435 /CapHeight 435 /CharSet (/u1D456) /Descent -11 /Flags 4 /FontBBox [ -342 -238 1247 786 ] /FontFile 756 0 R /FontName /XKEAZI+LibertineMathMI5 /ItalicAngle -12 /StemV 82 /Type /FontDescriptor /XHeight 400 >> +endobj +651 0 obj +<< /Filter /FlateDecode /Length 590 >> +stream +xڅTj@+fxFoc^`<ͲW[;[z5lA}Y}qP)79ve&LFc.#ۮ'&9U P˪hoj a[B򲩎\;v}{Uk[ȶNlH"m9KbXX!t7H;{wm$T.Fy]ȮW'i,9_e Y_΄3&.񰩎z<)O"J@X`b'Gn.Db Lj0r`yfJξA5.* f# @Uhk2&}{HYѝ[*\q<1Do8bSĖt +w;z=H0ωu^N y}3|ѳpS|$mS9 BBu?0sq|s^9o[w_|h[jӊܷTZ_kf_ +endstream +endobj +652 0 obj +[ 354 ] +endobj +653 0 obj +<< /Differences [ 46 /period 48 /zero 50 /two /three 53 /five 61 /equal 65 /A 68 /D 70 /F 72 /H /I 76 /L 78 /N 80 /P 83 /S /T 86 /V /W 95 /underscore 97 /a /b /c /d /e /f /g /h /i /j /k /l /m /n /o /p 114 /r /s /t /u 119 /w /x /y ] /Type /Encoding >> +endobj +654 0 obj +<< /Ascent 658 /CapHeight 630 /CharSet (/A/D/F/H/I/L/N/P/S/T/V/W/a/b/c/d/e/equal/f/five/g/h/i/j/k/l/m/n/o/p/period/r/s/t/three/two/u/underscore/w/x/y/zero) /Descent -174 /Flags 4 /FontBBox [ 0 -177 509 835 ] /FontFile 757 0 R /FontName /ZGUGQH+Inconsolatazi4-Regular /ItalicAngle 0 /StemV 72 /Type /FontDescriptor /XHeight 457 >> +endobj +655 0 obj +<< /Filter /FlateDecode /Length 843 >> +stream +xmUMo0WxNWH +Z&T~3ڮzy87?nkNehܤ=77U\;?:׺v==onU;O^uu#½O +ۍ=٘a?kLy6F/7}̽][H<Sicݾk^90jYVH^v}0<rL ͯ_/CkBnyWTHkuqö{s\녚"p]ϞќKյ u/A )`JbD>`2$`TY'`(ZqBJŌ +)Ǩ%553<,(hlwB60aG+LgıcW c rn +q9Mܗ8% CMq.5ShrAI皎\Sȩ ]8 `Y7ь1Oyezl,d mYĸSSJf-1i:C&e c4R$D& +&+übLaj by+bYBg YJYYr֟bx(rGT̛`F+٭L ,C9?d+͊11ӊĊ׊T_~+Cg!o!_??/?㫄Y +?^B\jUP{xᇻL^U}9pQq0O}c}3tȢ}Ə!VOu˷ +endstream +endobj +656 0 obj +[ 500 500 500 500 500 500 500 500 500 500 500 500 500 500 500 500 500 500 500 500 500 500 500 500 500 500 500 500 500 500 500 500 500 500 500 500 500 500 500 500 500 500 500 500 500 500 500 500 500 500 500 500 500 500 500 500 500 500 500 500 500 500 500 500 500 500 500 500 500 500 500 500 500 500 500 500 ] +endobj +657 0 obj +<< /CreationDate (D:20250604140153Z) /Creator (Matplotlib v3.10.3, https://matplotlib.org) /Producer (Matplotlib pdf backend v3.10.3) >> +endobj +658 0 obj +<< /BaseFont 758 0 R /Encoding 759 0 R /FirstChar 0 /FontDescriptor 760 0 R /LastChar 255 /Subtype /Type1 /Type /Font /Widths 761 0 R >> +endobj +659 0 obj +<< /BaseFont 762 0 R /Encoding 763 0 R /FirstChar 0 /FontDescriptor 764 0 R /LastChar 127 /Subtype /Type1 /Type /Font /Widths 765 0 R >> +endobj +660 0 obj +<< /BaseFont 766 0 R /Encoding 767 0 R /FirstChar 0 /FontDescriptor 768 0 R /LastChar 127 /Subtype /Type1 /Type /Font /Widths 769 0 R >> +endobj +661 0 obj +<< /BaseFont 770 0 R /Encoding 771 0 R /FirstChar 0 /FontDescriptor 772 0 R /LastChar 127 /Subtype /Type1 /Type /Font /Widths 773 0 R >> +endobj +662 0 obj +<< /BaseFont 774 0 R /Encoding 775 0 R /FirstChar 0 /FontDescriptor 776 0 R /LastChar 127 /Subtype /Type1 /Type /Font /Widths 777 0 R >> +endobj +663 0 obj +<< /CreationDate (D:20250516111417Z) /Creator (Matplotlib v3.10.3, https://matplotlib.org) /Producer (Matplotlib pdf backend v3.10.3) >> +endobj +664 0 obj +<< /BaseFont 778 0 R /Encoding 779 0 R /FirstChar 0 /FontDescriptor 780 0 R /LastChar 127 /Subtype /Type1 /Type /Font /Widths 781 0 R >> +endobj +665 0 obj +<< /BaseFont 762 0 R /Encoding 782 0 R /FirstChar 0 /FontDescriptor 764 0 R /LastChar 127 /Subtype /Type1 /Type /Font /Widths 783 0 R >> +endobj +666 0 obj +<< /BaseFont 758 0 R /Encoding 784 0 R /FirstChar 0 /FontDescriptor 760 0 R /LastChar 255 /Subtype /Type1 /Type /Font /Widths 785 0 R >> +endobj +667 0 obj +<< /CreationDate (D:20250516003513Z) /Creator (Matplotlib v3.10.3, https://matplotlib.org) /Producer (Matplotlib pdf backend v3.10.3) >> +endobj +668 0 obj +<< /BaseFont 778 0 R /Encoding 786 0 R /FirstChar 0 /FontDescriptor 780 0 R /LastChar 127 /Subtype /Type1 /Type /Font /Widths 787 0 R >> +endobj +669 0 obj +<< /BaseFont 762 0 R /Encoding 788 0 R /FirstChar 0 /FontDescriptor 764 0 R /LastChar 127 /Subtype /Type1 /Type /Font /Widths 789 0 R >> +endobj +670 0 obj +<< /BaseFont 758 0 R /Encoding 790 0 R /FirstChar 0 /FontDescriptor 760 0 R /LastChar 255 /Subtype /Type1 /Type /Font /Widths 791 0 R >> +endobj +671 0 obj +<< /CreationDate (D:20250515122242Z) /Creator (Matplotlib v3.10.3, https://matplotlib.org) /Producer (Matplotlib pdf backend v3.10.3) >> +endobj +672 0 obj +<< /BaseFont 758 0 R /Encoding 792 0 R /FirstChar 0 /FontDescriptor 760 0 R /LastChar 255 /Subtype /Type1 /Type /Font /Widths 793 0 R >> +endobj +673 0 obj +<< /BBox [ 0 0 72 72 ] /Filter /FlateDecode /Matrix [ 1 0 0 1 0 206.37436 ] /PaintType 1 /PatternType 1 /Resources << /Procsets [ /PDF /Text /ImageB /ImageC /ImageI ] >> /TilingType 1 /Type /Pattern /XStep 72 /YStep 72 /Length 345 >> +stream +xuR9r0 +}@٦>H6~PmE^Gi+/ +q}o_|1/3\r Σ"p Ǚ@.' y!NAy) LKUxE%vvyzg ]!7gzeUmeu4XHĕ!9@6$+( +A~WuG(NQ%Ɓ?#xKu6xocQUm'GT5߆BO *=)F#Ӵ47T{Ț&Z!?WuX\7EcL5|#0ƹuo! +endstream +endobj +674 0 obj +<< /BBox [ 0 0 72 72 ] /Filter /FlateDecode /Matrix [ 1 0 0 1 0 206.37436 ] /PaintType 1 /PatternType 1 /Resources << /Procsets [ /PDF /Text /ImageB /ImageC /ImageI ] >> /TilingType 1 /Type /Pattern /XStep 72 /YStep 72 /Length 564 >> +stream +xU=S1 {vKC楡H2IƓrju3ka:5~Jׯ1kV{;$(yWf}Hyj =?K#M4R>Јi%["phlKi!n-n؎iwkA:jkѴt+tLͲEYqAV,Uh95لtM97 sx"r3'pox%q -1%!L,ި rQ)j~Ж`1 GaZ%1_LZ6kF X?_qS"W-wr.A0:Kɉ"L܀FpTpr/8T0qQ3L0fa*/BlSL`:TXsL倌9THdleq A۾,S Yr/ WYsvg8~+rifFTz2VV' +. {ZcFg%v)Ҷb2 +d0t{v0)?_q' +endstream +endobj +675 0 obj +<< /BBox [ 0 0 72 72 ] /Filter /FlateDecode /Matrix [ 1 0 0 1 0 206.37436 ] /PaintType 1 /PatternType 1 /Resources << /Procsets [ /PDF /Text /ImageB /ImageC /ImageI ] >> /TilingType 1 /Type /Pattern /XStep 72 /YStep 72 /Length 313 >> +stream +x-;r!DsNLN)Q [4Yu sj{6Evo:#h7f=?Bh3} 6$S7j*%sڪ NT[޴PĉUH88NN#8QyJ2*S1HFXa,'VqfQ 69*J0_2X nEv"3p!kelu$ +qG3_ zb6S IrMrO#=01pqȽ]ځ/{q{ӴB7;ho\oGr%{߸.liG:6|瓿 +endstream +endobj +676 0 obj +<< /CreationDate (D:20250516081347Z) /Creator (Matplotlib v3.10.3, https://matplotlib.org) /Producer (Matplotlib pdf backend v3.10.3) >> +endobj +677 0 obj +<< /BaseFont 758 0 R /Encoding 794 0 R /FirstChar 0 /FontDescriptor 760 0 R /LastChar 255 /Subtype /Type1 /Type /Font /Widths 795 0 R >> +endobj +678 0 obj +<< /BBox [ 0 0 72 72 ] /Filter /FlateDecode /Matrix [ 1 0 0 1 0 142.22867 ] /PaintType 1 /PatternType 1 /Resources << /Procsets [ /PDF /Text /ImageB /ImageC /ImageI ] >> /TilingType 1 /Type /Pattern /XStep 72 /YStep 72 /Length 345 >> +stream +xuR9r0 +}@٦>H6~PmE^Gi+/ +q}o_|1/3\r Σ"p Ǚ@.' y!NAy) LKUxE%vvyzg ]!7gzeUmeu4XHĕ!9@6$+( +A~WuG(NQ%Ɓ?#xKu6xocQUm'GT5߆BO *=)F#Ӵ47T{Ț&Z!?WuX\7EcL5|#0ƹuo! +endstream +endobj +679 0 obj +<< /BBox [ 0 0 72 72 ] /Filter /FlateDecode /Matrix [ 1 0 0 1 0 142.22867 ] /PaintType 1 /PatternType 1 /Resources << /Procsets [ /PDF /Text /ImageB /ImageC /ImageI ] >> /TilingType 1 /Type /Pattern /XStep 72 /YStep 72 /Length 564 >> +stream +xU=S1 {vKC楡H2IƓrju3ka:5~Jׯ1kV{;$(yWf}Hyj =?K#M4R>Јi%["phlKi!n-n؎iwkA:jkѴt+tLͲEYqAV,Uh95لtM97 sx"r3'pox%q -1%!L,ި rQ)j~Ж`1 GaZ%1_LZ6kF X?_qS"W-wr.A0:Kɉ"L܀FpTpr/8T0qQ3L0fa*/BlSL`:TXsL倌9THdleq A۾,S Yr/ WYsvg8~+rifFTz2VV' +. {ZcFg%v)Ҷb2 +d0t{v0)?_q' +endstream +endobj +680 0 obj +<< /BBox [ 0 0 72 72 ] /Filter /FlateDecode /Matrix [ 1 0 0 1 0 142.22867 ] /PaintType 1 /PatternType 1 /Resources << /Procsets [ /PDF /Text /ImageB /ImageC /ImageI ] >> /TilingType 1 /Type /Pattern /XStep 72 /YStep 72 /Length 313 >> +stream +x-;r!DsNLN)Q [4Yu sj{6Evo:#h7f=?Bh3} 6$S7j*%sڪ NT[޴PĉUH88NN#8QyJ2*S1HFXa,'VqfQ 69*J0_2X nEv"3p!kelu$ +qG3_ zb6S IrMrO#=01pqȽ]ځ/{q{ӴB7;ho\oGr%{߸.liG:6|瓿 +endstream +endobj +681 0 obj +<< /CreationDate (D:20250515122158Z) /Creator (Matplotlib v3.10.3, https://matplotlib.org) /Producer (Matplotlib pdf backend v3.10.3) >> +endobj +682 0 obj +<< /BaseFont 758 0 R /Encoding 796 0 R /FirstChar 0 /FontDescriptor 760 0 R /LastChar 255 /Subtype /Type1 /Type /Font /Widths 797 0 R >> +endobj +683 0 obj +<< /BaseFont 766 0 R /Encoding 798 0 R /FirstChar 0 /FontDescriptor 768 0 R /LastChar 127 /Subtype /Type1 /Type /Font /Widths 799 0 R >> +endobj +684 0 obj +<< /BaseFont 800 0 R /Encoding 801 0 R /FirstChar 0 /FontDescriptor 802 0 R /LastChar 127 /Subtype /Type1 /Type /Font /Widths 803 0 R >> +endobj +685 0 obj +<< /BaseFont 804 0 R /Encoding 805 0 R /FirstChar 0 /FontDescriptor 806 0 R /LastChar 127 /Subtype /Type1 /Type /Font /Widths 807 0 R >> +endobj +686 0 obj +<< /BaseFont 808 0 R /Encoding 809 0 R /FirstChar 0 /FontDescriptor 810 0 R /LastChar 127 /Subtype /Type1 /Type /Font /Widths 811 0 R >> +endobj +687 0 obj +<< /BBox [ -9 -9 9 9 ] /Filter /FlateDecode /Subtype /Form /Type /XObject /Length 132 >> +stream +xm10{^6>6e&N(n$FE9G,;+Iռ0jjZmdŴtv.2,C@tCg toE\1&'ÓA EGb +endstream +endobj +688 0 obj +<< /BBox [ -9 -9 9 9 ] /Filter /FlateDecode /Subtype /Form /Type /XObject /Length 33 >> +stream +x3P2Pb\.] R\\N\l +endstream +endobj +689 0 obj +<< /BBox [ -10.303301 -10.303301 10.303301 10.303301 ] /Filter /FlateDecode /Subtype /Form /Type /XObject /Length 49 >> +stream +x3P2P5P5360660T3 rpn.T +endstream +endobj +690 0 obj +<< /BBox [ -10.706339 -9.854102 10.706339 11 ] /Filter /FlateDecode /Subtype /Form /Type /XObject /Length 100 >> +stream +xm;@D{NZKOb޿;M!a1$r4xt&)Rks?b}6 .ƫTnV9q %[><6zh& +endstream +endobj +691 0 obj +<< /CreationDate (D:20250513045445Z) /Creator (Matplotlib v3.10.1, https://matplotlib.org) /Producer (Matplotlib pdf backend v3.10.1) >> +endobj +692 0 obj +<< /BaseFont 812 0 R /Encoding 813 0 R /FirstChar 0 /FontDescriptor 814 0 R /LastChar 127 /Subtype /Type1 /Type /Font /Widths 815 0 R >> +endobj +693 0 obj +<< /BaseFont 816 0 R /Encoding 817 0 R /FirstChar 0 /FontDescriptor 818 0 R /LastChar 127 /Subtype /Type1 /Type /Font /Widths 819 0 R >> +endobj +694 0 obj +<< /BaseFont 758 0 R /Encoding 820 0 R /FirstChar 0 /FontDescriptor 760 0 R /LastChar 255 /Subtype /Type1 /Type /Font /Widths 821 0 R >> +endobj +695 0 obj +<< /CreationDate (D:20250515122205Z) /Creator (Matplotlib v3.10.3, https://matplotlib.org) /Producer (Matplotlib pdf backend v3.10.3) >> +endobj +696 0 obj +<< /BaseFont 758 0 R /Encoding 822 0 R /FirstChar 0 /FontDescriptor 760 0 R /LastChar 255 /Subtype /Type1 /Type /Font /Widths 823 0 R >> +endobj +697 0 obj +<< /BBox [ 0 0 72 72 ] /Filter /FlateDecode /Matrix [ 1 0 0 1 0 163.62 ] /PaintType 1 /PatternType 1 /Resources << /Procsets [ /PDF /Text /ImageB /ImageC /ImageI ] >> /TilingType 1 /Type /Pattern /XStep 72 /YStep 72 /Length 343 >> +stream +xuR;r \`mIܼ&׏سcVXڏUsVH>_OHxF +V¯Y{BN3ld 9dUo8W +tM 7@Is9'<Vr%*> /TilingType 1 /Type /Pattern /XStep 72 /YStep 72 /Length 312 >> +stream +x-;r!DsNLN)Q [4Yu sj{6Evo:#h7f=?ƿ>ڌz_A͟"-ԍ8JI5*U֪7-T1qb)>e N"|$#NT)G8j 'VrdĉUjYTEʳė g'|.B@H +hZ[0Ŀ15B!|ޅTb{s@{HO! g%FroEv`"o4PڧdQ\7 qwڑ ] +endstream +endobj +699 0 obj +<< /CreationDate (D:20250515122206Z) /Creator (Matplotlib v3.10.3, https://matplotlib.org) /Producer (Matplotlib pdf backend v3.10.3) >> +endobj +700 0 obj +<< /BaseFont 758 0 R /Encoding 824 0 R /FirstChar 0 /FontDescriptor 760 0 R /LastChar 255 /Subtype /Type1 /Type /Font /Widths 825 0 R >> +endobj +701 0 obj +<< /BBox [ 0 0 72 72 ] /Filter /FlateDecode /Matrix [ 1 0 0 1 0 163.62 ] /PaintType 1 /PatternType 1 /Resources << /Procsets [ /PDF /Text /ImageB /ImageC /ImageI ] >> /TilingType 1 /Type /Pattern /XStep 72 /YStep 72 /Length 343 >> +stream +xuR;r \`mIܼ&׏سcVXڏUsVH>_OHxF +V¯Y{BN3ld 9dUo8W +tM 7@Is9'<Vr%*> /TilingType 1 /Type /Pattern /XStep 72 /YStep 72 /Length 312 >> +stream +x-;r!DsNLN)Q [4Yu sj{6Evo:#h7f=?ƿ>ڌz_A͟"-ԍ8JI5*U֪7-T1qb)>e N"|$#NT)G8j 'VrdĉUjYTEʳė g'|.B@H +hZ[0Ŀ15B!|ޅTb{s@{HO! g%FroEv`"o4PڧdQ\7 qwڑ ] +endstream +endobj +703 0 obj +<< /CreationDate (D:20250516081045Z) /Creator (Matplotlib v3.10.3, https://matplotlib.org) /Producer (Matplotlib pdf backend v3.10.3) >> +endobj +704 0 obj +<< /BaseFont 758 0 R /Encoding 826 0 R /FirstChar 0 /FontDescriptor 760 0 R /LastChar 255 /Subtype /Type1 /Type /Font /Widths 827 0 R >> +endobj +705 0 obj +<< /BBox [ -8 -8 8 8 ] /Filter /FlateDecode /Subtype /Form /Type /XObject /Length 132 >> +stream +xmA E=E/IKE +n=$;M/ oT +O4K7 )Ĉa^-r b\ 7FKaJm7n=bJg1d•cp=E !c|D +endstream +endobj +706 0 obj +<< /BBox [ -8 -8 8 8 ] /Filter /FlateDecode /Subtype /Form /Type /XObject /Length 132 >> +stream +xmA E=E/IKE +n=$;M/ oT +O4K7 )Ĉa^-r b\ 7FKaJm7n=bJg1d•cp=E !c|D +endstream +endobj +707 0 obj +<< /BBox [ -8 -8 8 8 ] /Filter /FlateDecode /Subtype /Form /Type /XObject /Length 132 >> +stream +xmA E=E/IKE +n=$;M/ oT +O4K7 )Ĉa^-r b\ 7FKaJm7n=bJg1d•cp=E !c|D +endstream +endobj +708 0 obj +<< /BBox [ -8 -8 8 8 ] /Filter /FlateDecode /Subtype /Form /Type /XObject /Length 132 >> +stream +xmA E=E/IKE +n=$;M/ oT +O4K7 )Ĉa^-r b\ 7FKaJm7n=bJg1d•cp=E !c|D +endstream +endobj +709 0 obj +<< /CreationDate (D:20250516055607Z) /Creator (Matplotlib v3.10.3, https://matplotlib.org) /Producer (Matplotlib pdf backend v3.10.3) >> +endobj +710 0 obj +<< /BaseFont 758 0 R /Encoding 828 0 R /FirstChar 0 /FontDescriptor 760 0 R /LastChar 255 /Subtype /Type1 /Type /Font /Widths 829 0 R >> +endobj +711 0 obj +<< /BBox [ 0 0 72 72 ] /Filter /FlateDecode /Matrix [ 1 0 0 1 0 119.88684 ] /PaintType 1 /PatternType 1 /Resources << /Procsets [ /PDF /Text /ImageB /ImageC /ImageI ] >> /TilingType 1 /Type /Pattern /XStep 72 /YStep 72 /Length 343 >> +stream +xuR9r0 +}@ɺ4I&YZuBe9yg|_Y,=} %6rc: +LXMSRC,'d:TPgQ`3,c CU6/ +.ٶ+k˵5`silʴ B)Lɲ82[  +ɱܐN3#GY֨2o#xFR6 []lN65fd4L@%%k4O<ע݀> /TilingType 1 /Type /Pattern /XStep 72 /YStep 72 /Length 563 >> +stream +xU;0 D{B_lӤ4r& ñ4g>*juJ[j_m]k~V\$;j1ߦaiwp|(ZۃEV="E<0^[(O.Nۺ=lvWVsJ:ӹ, +M%4R."XJ+Zi,ق K6 D"s3'qOx%qI-3%!,ިIrQ)j~іd91 GaZ%ǐ1_LvkF x?߶7pS"W-r.rA0K9"L܀NpT}c8/T^a SyFa0a4 S!C ~)ƈb*/Ƙj`L cw2,$@YCжu`Coy5\5μߊ?}`zXf֊Ю9ǀT11t׃qKP[m!0 +de8>de8rd/ä0la8}I/ +endstream +endobj +713 0 obj +<< /BBox [ 0 0 72 72 ] /Filter /FlateDecode /Matrix [ 1 0 0 1 0 119.88684 ] /PaintType 1 /PatternType 1 /Resources << /Procsets [ /PDF /Text /ImageB /ImageC /ImageI ] >> /TilingType 1 /Type /Pattern /XStep 72 /YStep 72 /Length 311 >> +stream +x-;r!DsNLN)Q [4Yu sj{6Evo:#h7f=?!|?EZ۩5qbj9mU'ԭUoZb*S}$@XE'T'EIFR?J\ +endstream +endobj +714 0 obj +<< /Differences [ 27 /f_f 35 /numbersign 38 /ampersand 45 /hyphen /period /slash /zero /one /two /three /four /five /six /seven /eight /nine /colon 61 /equal 63 /question 65 /A /B /C /D /E /F /G /H /I /J 77 /M 79 /O /P 82 /R /S /T 88 /X 95 /underscore 97 /a /b /c /d /e /f /g /h /i /j /k /l /m /n /o /p /q /r /s /t /u /v /w /x /y /z ] /Type /Encoding >> +endobj +715 0 obj +<< /Ascent 685 /CapHeight 657 /CharSet (/A/B/C/D/E/F/G/H/I/J/M/O/P/R/S/T/X/a/ampersand/b/c/colon/d/e/eight/equal/f/f_f/five/four/g/h/hyphen/i/j/k/l/m/n/nine/numbersign/o/one/p/period/q/question/r/s/seven/six/slash/t/three/two/u/underscore/v/w/x/y/z/zero) /Descent -237 /Flags 4 /FontBBox [ -1082 -268 6171 893 ] /FontFile 830 0 R /FontName /PNRYLF+LinBiolinumT /ItalicAngle 0 /StemV 80 /Type /FontDescriptor /XHeight 432 >> +endobj +716 0 obj +<< /Filter /FlateDecode /Length 881 >> +stream +xڕUMoH+z2`"RFMF6H6x笴`QuUݧϡƃ /ZpxZV_ ˓snW}g~'?ڹI{)GݿPp9ńa?tX_Qߡƌ~9ޏ)UUşnqxPvCWgd1+V7~&1Hu}Ȋ۳oΏq 6/F>oS~xUw<_/:Vu7>N>{ũֆ]c˾u~xuF46pCk&yn_bl MDSr!Ǿ{4\}!#:(P0vyT؁`h- 6hi*.Qg|'ȋ`mk`QDŽkıF'\S8 8: m-ב.y_T\/k3M̽P,c`xf} LS $suD> +endobj +719 0 obj +<< /Filter /FlateDecode /Length 678 >> +stream +xڕTMo@+HɁxYl e#YJ(*) }`zH۷f {eE7/ū͍|5MZT1).݃xi|czquUw6x]ǡ0iVV!XGnw˽i2۵nyu=0gW"ͶVIfڮK)->)3 K*ڱ:GDQ8w~݅xszsZWY,Nv}{s[GM[fhA!R`ײ}~ڝ|cWhq-y]UoYH,[:*bCé gKg+}Ii%"1%4k DDhӜbK$ VL$`.zZLTы9Sgo]xc|`x{zcTSڏl~,FJy`^W!(Z1jg?e-+ѵ)+ZfcS!iZ +. mHƈg4em +0F>Dx>?j'1zȩ#ȣyhԢ50ŤO8#Al͡Y~=3NH50ƹqҮ9Mq=ж\uBў TUui1zΜ +endstream +endobj +720 0 obj +[ 633 ] +endobj +721 0 obj +<< /D (subsection.2.2) /S /GoTo >> +endobj +722 0 obj + +endobj +723 0 obj +<< /D (section.4) /S /GoTo >> +endobj +724 0 obj +<< /A 831 0 R /Next 725 0 R /Parent 608 0 R /Title 832 0 R >> +endobj +725 0 obj +<< /A 833 0 R /Parent 608 0 R /Prev 724 0 R /Title 834 0 R >> +endobj +726 0 obj +<< /A 835 0 R /Next 616 0 R /Parent 6 0 R /Prev 608 0 R /Title 836 0 R >> +endobj +727 0 obj + +endobj +728 0 obj +<< /D (subsection.8.2) /S /GoTo >> +endobj +729 0 obj + +endobj +730 0 obj +<< /D (section.6) /S /GoTo >> +endobj +731 0 obj +<< /A 837 0 R /Next 838 0 R /Parent 616 0 R /Title 839 0 R >> +endobj +732 0 obj +<< /A 840 0 R /Parent 616 0 R /Prev 841 0 R /Title 842 0 R >> +endobj +733 0 obj + +endobj +734 0 obj +<< /Filter /FlateDecode /Length1 885 /Length2 9111 /Length3 0 /Length 9745 >> +stream +x}seX[5 n +ww Zh)p(R,{5vyw -%PU b;rq@n6NZZ-+'[0Z0 +b/7+ܥAN,jN.N' ߁@hZ`Z);I_,92 +-@l6r( Z0X:99Nޔ7فQ89fV-Qrw8f`U@NP+7''?  +صu.ٿIII sdNV7e_]ß1* `g?3AvV࿺`PY9|7 +N [+S { CVVn`3%d'moZكGF +=O +lodM!fV.^> +p2//`G v6;=O +gxP*5'd ןa7,:Cf_ws?E(3S몀J WY=-Bo$:16e(=jc՛5ƷP{G%{ȐIYw?{6iS,s-VS84O d5B;_mI4!+$v+b43QwWrc; >!58^"gQnSG[+];=В]k*g\9<Ǧ +җ͚]Ne-^e!* +9S;n d0|9`5NF&G;ya8.W-ybQc+#}<^vuJ˲Q;_&/ugdX)44Ԅ&ᐂvvL"k<ֺ 8EDa;nk6K7؟!lȧ^"M0-BQ[sJ{ \gw@~GR]%q< NE/m#Yd'?O]Oi܆Hi{94憆Y'2T8:oK +SIVa(EbNzs@/dd3jI3~ ]|3\$:$(ME*񱹖 6yKiX?yd{WD-l4s MSm?R`4?FiP$Ω:vRCp}bKw4x>W >:S(mqbV2ʡzd[M9c,MC1Ig}9ۂ/P5^Un3V%{l!u8(*㐾z-\ԮhKn 8\c^>ǠA7,SmA0Bgض YNũXWW£ðSeMCqh}:tr7}>sg۴QobS,p1rцSU J Z|Fui6&•͠4$~˵ݞ ?\P xZH2L|9\0 +ĸ[_'Dv(Ǝ;k QÆCrjKyBZ5,UK5陵IN*'q%i7OR_r}978h!Afm'ݓ>v `LSdo^[ b/{Ez~ E WRiדP˗0ϑ3[G+G<8g3GZЉ +IW&"ʴ5n +i6XC}!7F!DC \1-5-gESLH%a)z2d~_.u#d6'͟A?=`@oQt]bNv{ݧ49i D Os`iR!Op+o}z&8g78(f_&vXoD/c21JâP/IkZ ~¼UX$arfwd0B>jytzCBOelFÁd@]A2mV\ lu| Rs_O3^}զ7ѧd*Y|,28*aw}Ud1>ZEFP c-ob悇g9➅>U)bZ +eN&lC`MhWFneoj}fPDC~Kirdu-ޥ>'cxsV]шnOk-&O+g^*Mm2Y}kxl܅㓆 ӀŮ/%"LP>dzmV߰](LLUs + A81!!EM>@Q1Ƭ2~–%jwط\yT8%JC6gtͺua5YB5 FRQlDj>*qq9BqDpg[L3᫹%9r<@E \J*%<=SϮtP< Td tlu,Wm'/,&zbsEkG&+M%QiQy) k=T^'` $D%XkpABn5n\onS6QXqQw-dyvZU?;t$j4VKduAq #, ןy +]9ʀnT|֙Z= WϫG_1`.kV *'3-2DZn&`1H*C텲w.'mznlnSΌTz*A1$-Rl2}P@W,*:3Ҙy*U?d<䋙O^UκIitJ:0<;.BGbuI?9Nqc_ +,+N BJ/2:v/Y(rkӇQ3ވF VnW46@xے;^t{\ +GIԛ"j#xQ4,jkwjl񏷂 k +5Hub}(ɋ|tSIlAzԑxq$Rś2(n`3&VbsEza-lvQq!l[_UHBuJ&_sFLՄ@2N ؼ(Wmw,$46dRJ _)۷_%V?qL:%>W_cP8s7¿v8 +wy Dz֠^zk&ߓnl*giXR T2:t4Zaϔ͐Ki^UǴW. rB8{2}9^1bȶ+d>ob"br ;icڧf=?cHfRb;`1)UjI 2Bǒr:QyVkiF`*\er[Y;O1/|ŗQF忠.5SBBZUÇVIEo(GռcDP$h^PI9 -R0;K۔^fNu9HX]D,NsB2Nhx?,Piȸ5h|vgv'8F\%'Cd[Ldp|C=):_q=,[ǪTpnG=whŭ!0Ǝf]'Ij[cQ޼G.:hit\:J!W  Oh$Vu#[]quzC?Y@w;_oJWhg5 gcر"1+<ChWs$ +hZQ'%\2_.ALw">i"SpR_W([p!9}rJCA\YkKK+@ iw/L2j4A`“ĐF*`Ewtfuf3l-M~TR-lI®[(S397CL?DϐJsQĬU~29t4B_aGal>v +^v~{>wjسT~*IHTmDeTrgnxd/D-C4Õux$d- Fm|"PP[ 4 h^ϔ.Y'Fw, ?^TQ Y4~L}͇|xc"&Ë)0-T$w~kf(%cφS=A~9mddaսCpIWS,gʍ{/G=ϴ8_]MQvA}2ColCrJB L' + +rdEsn;kD}wn%oNg>qOUY(lF"L[13^w?.D~U\B/%.1B}(g.!u#]l1߉_6Klu8}x rR,ɪxmH߯ nZ@gкR9)~U{.q[E/&Uxby5q}"C%[_{'/î\Iw&yhE4+RO뵩I:Jg&%e6I[-4} ң'KQ:m#98!ؼ|IT?qиH N9- +-&c݅s n bwT zJvVr?WJ4I7; H~%Kz|'T@̞-{ w xHI=2otR +((QǗV^o=+!9D~Y6l.  6Oԕfmu1tHRy߇UWny'sÑ;C>| ;jZˇ,%+y9fKn|NM,)> C󁗟']r2+N XDb&]֖vaIӸJQWYQL%v`m֌ڞ`LW|hQZu\=KoYpD O=H +xUkO2>TF[F!B7]]:G:dznsڑ^`tQe`B*oQ J`U7!MdK6hib͸BvtA{2 grNl]r:E+]qX5} ܎@ˈQ•3ϡ5Fmů)BIE)_`(w.cQM$ ߊhPcQȎ.{jW,Pn1}hn6y7iU0٢)_Իh\P"L;R -j\ $\(;+; +V--J zt_5@$g6 +ɒ AvSdöYPoپ鬍'x$r=3VѸ+=B^;NGTE߰yL;"I@-} D7`k +;r$r͙ٗPt<aàGhD j_Ft3@_@]+Hbu+;Qܐ$X,Q.c4DnŃ2xB I=.Qtw0esn+ΐw /Xc4DeRO s vޚ!a 8dPp/0Y֎1{ A|O{f)fqCb>aU@K92A D>f`5LncMt/Ka?2Ĩ~y}WF-HbX5U4F^\yY:Y߻{Y_\y*MvWA94hJQ70?ā! &H}\Xk'|mp-Ÿ&YMK9q+'G"`O˹VTu}q(n[A y=y+V:-^5De6/AtI8 : 'U>$ҒJ+ZYjnY)?)W?HT@s?D?bӅvJ'Ȼ,e_?>5Jw<̢>K33+4Hwijbռw=Q|52|_t׻4轩{'xIԏ`CP졁T}wT^9B ^hT5PO-!%hrw/|>-NF 5B.7֓dѪXn+Q'17v߄ՓB ӓ/(/s +w#ڿLOCilA)jM7N/%nTV*W% pSfL%>A9Cb Ӌhl;ldkp'w8Z-1;h@`[2G%M`\ף>ri=l X tz]c74TJRunZݧD^bt~[c(dq,v$lTw5վoFzMs{-᫠ ]#y͵K/u$\T!q{n`'T@\Egwn]Yxs:83i3yrEX/ e׫k gБjKk zS`SӿY<4yv4`J>LxgcT,9^ +ZX]aˣ5hy+F=VN ؂,{G:@ti<;P;yB:qEwoH`5`J1ZUfrea`78z?T!~U\sT2I0^rK*ue#^}vHYHI%ig[`Tc` S ߙ;i73azi D"|,X遴Qe +oM, ./ {jaQ29(/ZTxi)&2%N\'cKTОFT +Kbn%*EWa%N~Z|GUbcϟ}%bwC$uT!\BFVGOsuwXϤv qpuaԏvzX&IﶹW[wk6fʒr^~ͭX$EHJfIsn+揰I$:^F1|eM-hijXŔoN FƜviȺT)ۺh8ԊY(z.%R'饹A`2V u3FF>q;nJ9u{id R"E/Xew 2ZC/_S(LGKtxVǔ6@=}j# 62(\:ճJIDh]x\Ҙ;> "Yg=c4'u޸{Xj(\IoSH$O(`8ɏzkFRtRSyXm6=)lԶm.8wlڔ=~S4G_iqDfZ:yg|ݩU%#"dhO"󜈇޺O4ՈQnR{̞:sp|0ąM\C*Sԫ'b*T펇1YEо%lmbq]-,%Z-JA?&zU!x~op }*%?K'QC-RמּZ73 ݯQt8DNQ2eCY'lZz16% s ﴛU3׮&.|Hfd+VdmAO7='Iܒk 5&ϧ;夑&i8yH%#s$kK,14d3^ | +endstream +endobj +735 0 obj +<< /Filter /FlateDecode /Length1 1572 /Length2 7543 /Length3 0 /Length 8410 >> +stream +xڥP\Y׮{ hxpw @k'I݂Of2u%^kj {(C19 P ,xZ-u@N{?l%@f&e{pt@N' / S/;{'A2lh͜@Nk0 V2-j`fogN+"fN=c)sY\!00gd읬AF0 SOpazAANxg&~cpZJڿ| œ99 d z%8AEpA=K]FB@RI\N~~?! $qvZnW׈03= bPFv܈Nk%a?f]~c?;A,|.K\~zPg~ g&X9gR Arm%!3\88XBA?@PIZ<j >TaxyC$7!7;ppy? Dq]8x~?/#BA߈! _!_#_._%2JFwV\5>b.^hnG\2mp'zx !p~N3b*S]u ([% )-̂e3>Gɜ\rHK1X_WJ9Pϕ,v\֭myJזzI-1hK^%A{zJ22BH?XjCH^p0"k*{ߕ2X(HD88ρMZeXF :m4яbLS#Hy˨Gr[_cWz3}}'F0)'pʺ?> i?ªm%߆c&h{(h1\!c5$;5y~~d8~Lwn$%kU["fN:I[utv?{WbnfeF$]i`w2vv-L*= DÂWvT -H0# OqG߾xNj#S|=UQ6+̩|Yݩ}Fk_>-+%|*^ezA5Z \$ŎfȓJiIu`ȔqskwS*c(nN$/'FEy +%hNFO*v| Sxlg4p!aHFp~’NҤݱ!v9F "\+) Μ*~r4yV\#wS7Rl<`d0sXh`j4v}h  m*8Cohj +g^[&,pL~ϖ /Va@{Oh(uSZ6lP޾]\߻S$0q@"+ L "f _x^QGrdO$|9ȒN)R^5~c3>̟ق{ZX򨖵+t.\2wt"Ya` +Xm>`xلB&w0n|G[M@6dM-eMaw<kɐw?RG`SыMWP2{̓hCmh*)8`qпEXTؾui不|xNM +SfXULڜm68AFX3j8|7YsU%܈x`IzwG=%;oN|FYo;nƧ* M#h 1g(%gw:p yvzP|Z##s`2wv]ZCbE:YEgdxϭJqDGj,J +V褼LLZU +vΌ{L_He:̗lI!]8&=eϰ;a^D0i`aO+F(eAHkˮ`%b + +q= pZgt2UZDKz0}oZrc>EbPγhhv C-8KKSjx-4})'U /5{qμqjQ%NVy?i/IWil ̪Ae5hX5R`!9tأn~Q(БRHWHX7tw ?z:܂&vEi&YȡwnT $43Ո<<9Vq"o8aaJ^`q=5SI-fYUnגWTekgm3{#{x!-zŷr0mI۷ӬH IH-YdofKsK3B % MU#^LD:{k[pH8lk1_C%|У,f1w5M$抨:PbBw~Qo=+JczsUY7L Q}zjlP< ¬!@'n߮9zcQKdC,'vnL_{dQo) !5Uh#} í\#PD7k +I\[I0Z7^A WuBݼgѽ +xf>'aڋ +wm?7د;DzB6FqG_}N'"tr3ОP"T]{nyaA|yi%cbZ$XEtNx8}z5E4R- f/`b+*j H $Dԅ* Wq "&ň/yx4'ɹ拋I)T+(]GF}夅^TwH4M_Nu>繕Q`*YT nQ +RΗ^s4}aId ]n4\O ԜcDtWXΆ?0 9Vv>LjGRCs)_aSnP}PQBOqJ>BCzB;¡3?ƴmԝ׊XeOIk3Ç>t{L鴪 ϶9ORh-Z`JݳF쌉,#Hܶ0(}"V >;:@-HJ3lέrYeB2D4k$,s]^V ֏OXɌLW1iE|}Ƣo3y4gز\o Ad}wqOa,JWu tO䛆Ubh\;/&o9.qͫ WY:k0OGEiS 6"3b>T:D)dInĩBy=ȉvᵴ4yNamkab0Rd]#.L h\p1`aJ13"^.ߛ: nVKN.U~ a\J +V kyo~CNh/X/ÙWBBkhSNA +"BKtdhs7c~Ӻܲ"'յK# +G!U~gVΰ4(#'AM#!UZQE'[][lgz d~#hB䃜Ohw_+P0+Md(l*SʬOǣ=E\jókoK-ow\^Df]U qC DYVBTX$ 2> +stream +x}R{4Tyo+dt,R 0cf )&Y34ڙ;Cg):E[BmJz %RT,:;9"{:gy~:lw-0'(TK˥)J!8;8l htÄNR'%q`;6/g?najJ'ѩB8#(d}*ldA Z0,EQt3 JB*,$b~ J"1xÈ0TY8_lQ1 "u@0Fpb)<{,#(Ƥl&LRB͑y<ǚN4f4,-S)1dZż5G6"e`_ XG"{3軃C}OD!wEb\+Ir+}^C[RV 4Z:!Ժl|5glr.THvRpT`cUswFm9Utos$ަaMJuԌ.d{|襙RuGTlk:px~U;/ M\9''S\wĄ +Z +OS]w!Zd1)80@5$Mx^xhi;|}Fw+X&elK4b>Ѣ[V窐}r^3sMוd=ypwoO1߭X:S0!^Y =%erva^9OYe{ݭ/X*5G2.Oq([RoTraq4J\k*F*(ngWeu{rG>МZ^l6ʠm?v{Qv6U./h~۳ᰱu$ccLL7ç*~fҟG$JA{ C u5|: +|Jc촯V4:s[5r#Kitn> C=M3kqCADMrUq-zVܚ:e㭃iq(4rI7ʣ)SQWSO#67ZQŇ^zV^Mδ6fώ 8÷ +endstream +endobj +737 0 obj +<< /Filter /FlateDecode /Length1 883 /Length2 1288 /Length3 0 /Length 1908 >> +stream +x}R{<NN.#~dTf޹MqiC;e}y'3rlI-J d[ݶlmsӮ6$%!ۊDvڭ|~Z}u`T2V \ ϷVa P<&yv*CTPLi4F}OĤ*`10 VJsk'w# +x(,Ui @(ѩ @x4B D (SlTgQ&(.#, >%P0,|WsxQ` AT}BT*VAI`@ +b{y`9Ca0 c.1 +1ޯ@xH|lB>a@4Oyrq[yD?@x /~7#|W4B<,"8_< ~@T0!sl /BQ(,w +>1FA#R +^UفTVΦQ Wh9TC FL.i"d a>~/JU;TBD +`>A _'ɋf= *r !EE^x$Ќqz)wm9jf('VYhB}Z_F*Ӵd~7eg^Y}_"f8eo_"m߾S /|Ļ615ݡȄ}:+ᄇ^NQQaؽjDO~y8,ЁYsmJT\rml1R`1Oz< Dڳ{h] fX;%ZtVvHR(q7ݯ-oxZ6:`)i;; "YѣUX*1mA֧/]KyciEK5O3 fLg#7ǜq]Nv({G^(mG~S s~t wpa߈u!_?ְH Tѫxq~+|@T˘-avB#aG.`k^|dڼnYc"L=TsqP64v{WTҽ77f(QDm;U/Y&QL;v?\jcv8 v_W%T^s#CQs>smV 3S+*KK>S&v[gƀV QY=)]ƨ]Y:iK܃[븆sSc̲Z#:%*i#>)¢9""4cdWvj:Xmp*0_ 5i_nCM41۹LzadsӃCnk&e*\.NMʽ۪U]vT<Xd:n43.~9A*mvN&f՛{JS#K ]^f̰F5D}%Fͩ%MSv_ءnUl˶/Qn[75Z1۪S_w8/}ݜGH\å3g.3'?Edt<ΘtV|{-'3"nד1̖,[1Vk,P;۔QMwxe-WF`3|UurB~NƖ%JPw\@YZBSQ:'FI`q,l~)s~/ +endstream +endobj +738 0 obj +<< /Filter /FlateDecode /Length1 1284 /Length2 1147 /Length3 0 /Length 1893 >> +stream +xڥT Tg"k%d@xf+1(C2ILBH +h Ԟ *EKBt9<ʢBA syP%b`6(4; P?@q U("o$0O@0N7dḊ'#*82@g љ=C4B g/"RdI8"˘9,B\ $%*~8\Dć5Å!a'ui'X&IN1Oߞ7K{գu7>n`[[lohוX՟WNtÒ&[O[AO쟭ϫK):qt|C8V\qnQ\F +Ԝst~qhGő%_n,2,`Tw|_tFAm:<`*u/~Tc.Ofۯ{u4=/#w-ZZrq(k:5M‚ц :16Yiudn'_*FO:{huwƚYzdW,g]jx0y8F MڳE.39^i>enIfo_>(YTV`͸w֫0TqojYtYhgҺ +endstream +endobj +739 0 obj +<< /Alternate /DeviceRGB /Filter /FlateDecode /N 3 /Length 2612 >> +stream +xwTSϽ7" %z ;HQIP&vDF)VdTG"cE b PQDE݌k 5ޚYg}׺PtX4X\XffGD=HƳ.d,P&s"7C$ +E6<~&S2)212 "įl+ɘ&Y4Pޚ%ᣌ\%g|eTI(L0_&l2E9r9hxgIbטifSb1+MxL 0oE%YmhYh~S=zU&ϞAYl/$ZUm@O ޜl^ ' lsk.+7oʿ9V;?#I3eE妧KD d9i,UQ h +A1vjpԁzN6p\W p G@ +K0ށiABZyCAP8C@&*CP=#t] 4}a ٰ;GDxJ>,_“@FXDBX$!k"EHqaYbVabJ0՘cVL6f3bձX'?v 6-V``[a;p~\2n5׌ &x*sb|! +ߏƿ' Zk! $l$T4QOt"y\b)AI&NI$R$)TIj"]&=&!:dGrY@^O$ _%?P(&OJEBN9J@y@yCR nXZOD}J}/G3ɭk{%Oחw_.'_!JQ@SVF=IEbbbb5Q%O@%!BӥyҸM:e0G7ӓ e%e[(R0`3R46i^)*n*|"fLUo՝mO0j&jajj.ϧwϝ_4갺zj=U45nɚ4ǴhZ ZZ^0Tf%9->ݫ=cXgN].[7A\SwBOK/X/_Q>QG[ `Aaac#*Z;8cq>[&IIMST`ϴ kh&45ǢYYF֠9<|y+ =X_,,S-,Y)YXmĚk]c}džjcΦ浭-v};]N"&1=xtv(}'{'IߝY) Σ -rqr.d._xpUەZM׍vm=+KGǔ ^WWbj>:>>>v}/avO8 +FV> 2 u/_$\BCv< 5 ]s.,4&yUx~xw-bEDCĻHGKwFGEGME{EEKX,YFZ ={$vrK +.3\rϮ_Yq*©L_wד+]eD]cIIIOAu_䩔)3ѩiB%a+]3='/40CiU@ёL(sYfLH$%Y jgGeQn~5f5wugv5k֮\۹Nw]m mHFˍenQQ`hBBQ-[lllfjۗ"^bO%ܒY}WwvwXbY^Ю]WVa[q`id2JjGէ{׿m>PkAma꺿g_DHGGu;776ƱqoC{P38!9 ҝˁ^r۽Ug9];}}_~imp㭎}]/}.{^=}^?z8hc' +O*?f`ϳgC/Oϩ+FFGGόzˌㅿ)ѫ~wgbk?Jި9mdwi獵ޫ?cǑOO?w| x&mf +endstream +endobj +740 0 obj +<< /Ascent 952 /AvgWidth 520 /CapHeight 632 /Descent -269 /Flags 4 /FontBBox [ -511 -269 1309 952 ] /FontFile2 843 0 R /FontName /AAAAAC+Calibri-Light /ItalicAngle 0 /MaxWidth 1350 /StemV 0 /Type /FontDescriptor /XHeight 462 >> +endobj +741 0 obj +<< /Filter /FlateDecode /Length 278 >> +stream +x]j0нb"q` !%E88jY;JBwq43gv6Rp:xA3bZdw3ݷ^ehn1r_nTDZ&p'9{ u}t\]\Uֿ=SZAigDXF҃ѷC.<өR̿p7EUJ|TY K{Dp"`!܂}b.lAmw"YVX-í/򃗇 +endstream +endobj +742 0 obj +<< /Ascent 952 /AvgWidth 536 /CapHeight 632 /Descent -269 /Flags 4 /FontBBox [ -519 -349 1262 1039 ] /FontFile2 844 0 R /FontName /AAAAAE+Calibri-Bold /ItalicAngle 0 /MaxWidth 1328 /StemV 0 /Type /FontDescriptor /XHeight 469 >> +endobj +743 0 obj +<< /Filter /FlateDecode /Length 301 >> +stream +x]n0E +/E@CHJE*=DYvT,g.٩yj 2{j9XyX|1VF35vNdxl0ɪRfr󨧞٫썽M'l$QR=wYf)m4&[:>V!&ͳ^XTDuu>ׂWMpm-y-C +IwPj%@QP@K(@c +мzvP1P + +n;~`|=v^+3X=\@yq +endstream +endobj +744 0 obj +<< /BitsPerComponent 8 /ColorSpace /DeviceGray /Filter /FlateDecode /Height 214 /Interpolate true /Subtype /Image /Type /XObject /Width 214 /Length 1345 >> +stream +xa0]Kcg.I}4&BL35|ROOߙet`:0LӁt`:0pQnU8mmsTC0 @*o!x @oS  +@OzBW xW @/:A0)>Lt7:@0}sG0yZG0uG@h;M#xe=,hv,h2V,h2Fh6h:h:hA +hAh Ah A*h +A:h A:lpjbO|n_XnU,L`e%(jY,\@}Yk(3ݪ&q̈́ .*}>5G7쪆@!~g,]{5% JGЩ*u%TPBH22*?^* )u/V^Xb4?tJA9/T|U$jЦ +x + [kC(W - .jo]*ë)mc+ݘY8N"uKl!h'I]D[ RED;:,QGyYׄ5o ɵﯕ^UuSˮī5Y4QrT5REcta +D\P)Z||>> +stream +xW\SW?7fž2Feˈ"&b R`QѢE:QVƭ/RAZ\X}~_p9YB[xRi.!')'pҦ"M4y)'.. IDH^D)B5vQ>:DPCÄ/"2 +$. xQ^ bd.eb>+\+ax,wWwV,?SV?\9i7^oWB !/C|^h"`oEAQlSGs97f’+GxBQ() (rLɜX _pH%sf,?CB4+ I9I(v.z6/2`;an8FK =O-Ɛ(>Q(J;BYUfø"H9K*x1d Ћe2#}PL8BR0|4u#*@bT@Ygh0KM3 +Pȳ|' +r X>ʄrDBA%#wU +ͿFr~b0 XZ܁I(Nar7%VH?T +cm + CMLGD[M3JB>Y!sҷZ炭(xιd8>`;;gxh*42;H5+⹳^\6[̿rb\~bj9 hq]dS6GlѼQ0I7ހ.5C Ă/Nj/{hϧ\ %IUA4L y +u1 !l{|/'dȔ2Jg՗ .yʝ]쇊((Ǿ#O8ނV4ޢ@c|qA?|H 98 +d>}lGb=|GshY4:c+2Sʴf1LGĘ3Y3LC`3C>c$c !D2u/ a/ST9ް#kd>gd&e 9WETd$Fĕ=1sMV-`<6](͗fO ZebBh0ĢrZ$`raN\zEp"hp2p(}|iL%*dqf$dq%|Wg; =Ћx 3eEJAH`z#k^poEI( DKĶ -ABfB BG1t.+݃/PzK4at1cŜ0w Bh,K2,Lɱ23[mv` طX v]`XS)z3eś¡DQ(3)YRJee#D9MHtQRq%{x,g2|!^ux#Tv:ޅoKMLBb9C4gD71@jPMNT_*:EGPGjP_h4%MO[NJ;@;EJ{Dt'?=Σ+'= 5ÝHgH^ 5cƐJ@De.V*=*CڪI٪KT76SBMMJG-^MXmAjjouՃgWV?~GFFF +35^3uL.S\Ĭe611ihjr4gijhּٯeZUբuKkP[WM;V;O{^ ڽ:t;PNN3:tq]k`]gtzzz]џ__\7348dpୡ!Ph̰+qFAFB*FFoYơ9ƫ?0!LMMl39g?No8q5:&7ia:hfnn&5dvƬ<<| > ] :OX,+u5`ija)ayr*٪kUkoLum66SmlܵUnm}egoj]=׾~} @u7{uGȱI3Y\|E݅Rϥ5ڵ 6'>=ۃ ߷{n:nnnn;koLԘ6q'9MN6鶇Tmyzy<=l2x^}އ3g17|s;~pɏy;X_tZ=dssMaOM92UoS!xHxHUȥP͡ì² {?AXqks^ "FG%Fm91Z:25rکclc$1GcQ,7vm8qkMpK(KhOM7eҔIm))3RR^I6aڂiLiO1r͙3g^e2+wٚygP3R3fxss7 +}BL5YYkDQ8XY<;"{{؜9rSs12Z$:|R'iksdQ`fAsS!w.. +(-z=/ebbIqGcɲǥa_'緕Y-)^Yc!p¶E֋*,_g꒜%?הYgf+}JfRۿ qieTXͮ~/ݾ+.\mmdՁ^SکkֱUsj&lߠAkcM6VmzYvJ-[myUڶmͶWo;w4,뮔]_{PoR_]n= {6x545ݻre|_|MsKO桨Cm7~gݖ#G]iW["[ZZ|cj_yBDʼn'KO?uQ{gq6sQÙvN]G/z^l8OG.y^juϕ֫x?޸yf۷f-{'Ew-}5Mktyv9{R˻_5~yl׽X_Xߕ'ӟ<>M-}{z˞c g`×y/^U6~o{GzCއ b +endstream +endobj +746 0 obj +<< /Alternate /DeviceRGB /Filter /FlateDecode /N 3 /Length 2612 >> +stream +xwTSϽ7" %z ;HQIP&vDF)VdTG"cE b PQDE݌k 5ޚYg}׺PtX4X\XffGD=HƳ.d,P&s"7C$ +E6<~&S2)212 "įl+ɘ&Y4Pޚ%ᣌ\%g|eTI(L0_&l2E9r9hxgIbטifSb1+MxL 0oE%YmhYh~S=zU&ϞAYl/$ZUm@O ޜl^ ' lsk.+7oʿ9V;?#I3eE妧KD d9i,UQ h +A1vjpԁzN6p\W p G@ +K0ށiABZyCAP8C@&*CP=#t] 4}a ٰ;GDxJ>,_“@FXDBX$!k"EHqaYbVabJ0՘cVL6f3bձX'?v 6-V``[a;p~\2n5׌ &x*sb|! +ߏƿ' Zk! $l$T4QOt"y\b)AI&NI$R$)TIj"]&=&!:dGrY@^O$ _%?P(&OJEBN9J@y@yCR nXZOD}J}/G3ɭk{%Oחw_.'_!JQ@SVF=IEbbbb5Q%O@%!BӥyҸM:e0G7ӓ e%e[(R0`3R46i^)*n*|"fLUo՝mO0j&jajj.ϧwϝ_4갺zj=U45nɚ4ǴhZ ZZ^0Tf%9->ݫ=cXgN].[7A\SwBOK/X/_Q>QG[ `Aaac#*Z;8cq>[&IIMST`ϴ kh&45ǢYYF֠9<|y+ =X_,,S-,Y)YXmĚk]c}džjcΦ浭-v};]N"&1=xtv(}'{'IߝY) Σ -rqr.d._xpUەZM׍vm=+KGǔ ^WWbj>:>>>v}/avO8 +FV> 2 u/_$\BCv< 5 ]s.,4&yUx~xw-bEDCĻHGKwFGEGME{EEKX,YFZ ={$vrK +.3\rϮ_Yq*©L_wד+]eD]cIIIOAu_䩔)3ѩiB%a+]3='/40CiU@ёL(sYfLH$%Y jgGeQn~5f5wugv5k֮\۹Nw]m mHFˍenQQ`hBBQ-[lllfjۗ"^bO%ܒY}WwvwXbY^Ю]WVa[q`id2JjGէ{׿m>PkAma꺿g_DHGGu;776ƱqoC{P38!9 ҝˁ^r۽Ug9];}}_~imp㭎}]/}.{^=}^?z8hc' +O*?f`ϳgC/Oϩ+FFGGόzˌㅿ)ѫ~wgbk?Jި9mdwi獵ޫ?cǑOO?w| x&mf +endstream +endobj +747 0 obj +<< /Alternate /DeviceRGB /Filter /FlateDecode /N 3 /Length 357 >> +stream +xuKBQǿj! CÛUA. j E +W{Oh2h jɩv{.˹sﻀ;2VP-#K- -Td1EYT{>MYN8z+_ެgї͙~C3,%+{|@0R5g5i%f[r/_r"&ԟ7('`(AEDl} 'F1!Gf "w^ɽ_rMb5z(067j8|:F#lHخq`I{tjrީ7V)j +endstream +endobj +748 0 obj +<< /Ascent 975 /CapHeight 722 /Descent -217 /Flags 32 /FontBBox [ -576 -287 1987 1160 ] /FontFile2 845 0 R /FontName /AAAAAB+HelveticaNeue-Medium /ItalicAngle 0 /Leading 29 /MaxWidth 2225 /StemV 0 /Type /FontDescriptor /XHeight 524 >> +endobj +749 0 obj +<< /Ascent 952 /CapHeight 712 /Descent -213 /Flags 32 /FontBBox [ -951 -481 1987 1077 ] /FontFile2 846 0 R /FontName /AAAAAC+HelveticaNeue /ItalicAngle 0 /Leading 28 /MaxWidth 2225 /StemV 0 /Type /FontDescriptor /XHeight 523 >> +endobj +750 0 obj +<< /Ascent 975 /CapHeight 866 /Descent -217 /Flags 32 /FontBBox [ -1018 -481 1437 1141 ] /FontFile2 847 0 R /FontName /AAAAAD+HelveticaNeue-Bold /ItalicAngle 0 /Leading 29 /MaxWidth 1500 /StemV 0 /Type /FontDescriptor /XHeight 650 >> +endobj +751 0 obj +<< /BitsPerComponent 8 /ColorSpace /DeviceGray /Filter /FlateDecode /Height 587 /Interpolate true /Subtype /Image /Type /XObject /Width 512 /Length 13388 >> +stream +x]gSE׾[K[: D %Qy1X( +E(##4 ,Rβl&7=s$glsy涙33-۫o{jiemtܛ^_Y*v9ǷٖɦFU635Q1Jv5hwCF,_׾W`fOy٠ +nL9hRٻMm˾:@_PRk s'9rIQAO'푗g9xO& \rNAOL⏽WsDKm=~KfфjWuCN}$Ĵvӱk0MJZ}IyZPqu4$%^3-|mtGDO|Ҳ8}Z&iի`L1]һ>m2d]m> +fd \V]r`^| JqBZ⟝W⮚{K.[[-<0$wnh4vfy-Q6[%*t#ɺ@ǦK 唛'mw=!ԇ^Y|[/n6 3c@@^&L|lrZ`L3i*nQJxq155zG#mJ˱P'R\>.W6qP q$θ5K \FY {qmI!+$eǭ8 +47$AYp2 K e&z@=îk n}5%@; *i8nV: /4L(T}w{@Qj(eB``V1 7֔&Ĝr%J268 WjIFO)`c /ghE6KϡIIN}i@g}LՃSsi0Z&b)O ++*f~LOC3+&tK P>$'+' G6 .) ?D@0m-n="0AZe ?F`]7~Ilѹ5"g 0C$Z 9I5 +Uq`7:k 8"p4 l j,u=n20J| +´,Hd2A2JYr"|]r p?<[i` E%[?aX%9\Re`#Ё+j%Wʡt/p /Fʡtq )'P:DT +Ur =A%Ё'H=i` lz@[Zɭ*jY?1CrEK':0Jb.6 ox/Ë`䟸\B= K O6,M$D`6~J5Iwʎ vÑ%l*N}a"TC3(tt@2^?Qo#^F5R!!O 6KdI&[A`Op؟|U0k4*A*>m{(z| K@`StߥP +2qAztҿT=W#QMLyZ>E<+"py8뤼*i8 ?7z q0l@٢ FkL~3t[(m2G'ɵe%3yfιR6D 4#(@QGf٭TP΅GAB2Zj +pBژ4 +NGCu91!p 71eFޜwU Cd0uo yWEPW>\}yRzWsY^+kh;T$PZA`#yY?kGH1,~ ! ?H2O֮Bɼ$ 鸓b呬V2b{'*KQ>Z׼w6 A +͢2a #nNDf :R]*fhE#sDvpT.B;dM!1ty8jf[ICg,IFlw\ikb^OG/Ff˜9!MAj>zB +SJv--<H[ob.p<]4WϑwŠTm|Q.ĉg8і-1 *s"ܖ:Cԓ8^K(B`+ ,Y-o^%X[`Bc0[VDo*Ґ[H@$c/Ew@@uWjQN[܂viWOU8{fZŝ2U#9\Ihf; 8^L$&+Ez+яOR-R(=&u )H چ`J] < +ym:0A]so rwOT3C/dfcG}uA/6apg{Yh.zJ +-u&Rj-.U zlz׽"d#zM H≮coBZELoIl9o틄b|֜{YKA]V=[~lK091MI[}Htn=uv:4s6ky:XUzKnZxqve5ڞc"{* M+ŋ|pj*.xcP+jQԶ c,ίU:#} - \򝾋^7n 2^f;H_Vh?J2nICLͱ)wve["dU,w.6-:m&:aPoCS4zLP#V݀g# n:n@;vxHVe.6\"zycbU Kkw[pyAG|4XICς˹zYd0Ҕ]u~=awN%4\[kYX2 85[ ƴzHA3XGo6[Kv4;p^kW2@%*ݭFcz{mBZB^h z:εM}4ZӓAXPynfl_g5s i$3HlOyF>7 L1]Hk.jcW2;!?, d!j$j/k}AԼ>y%E,#Yij3|$/,{vǤFrٵMC,U^۪SOχŶ8LibK o9\V=l]vh̹' uf 9F'Nɶo'Ha/|=A)摑kAVGn֔W=m(\1}T$d5c$ԴYbf$a}o kt|5wv+үgL'} ]OZc@C,rDzxf<;l3(N`4-`G~4^>Wy$xFhֱGǞytn>tQ `pwzl7'j[ffr<323V 5~r= co,Ɯ)R`T.X.       CºsIq@- ۚ#T4l%VpCDjHx f3ҩ7,<7cpbU G`(esSy W=7-v3ۙ{(<1CS&M" 5 +b-9ʯS3 .< P=_9(%'+x3b=4?V}p9rPJ>͔㓆r&2(Wȁ]5_9(%e:2p?|{Dq;Osgybr!FYUv@KY3 !(7X ˍq/kRf[8)AjȾP R&avO ЄʞV +M#C}*U|E>b!&![N# _4;5\e*&RiomWO&0B*5MȳuK҈ M&K"(]ô/Ofrs,tɁ}dF~S{|!~4pɬ׼.tY@ѳ,:eZ'ԟ9j~=nQ57m{WaIVP:q,3HDBʁ@uG%866AhPoa7id!.sjPxo-xCJ2UaQUxpBWSB9Qd8.1u|^d0,FD9YL^Ԭa"4a`HxHO#@s4g=zI_ R~P=qT%#Rv{GdžN+"MGR)a 2N.D*jAԖVDP$ƁSk[Eۈ p bHcNJ6oCݠQmZffGA\a6g}LoXΤ-}ExNA@2孉f"ax ƛkҚ7gةM"㧤TM#@NTNG#lt'ϳDoЉ&iK&J pjaUYJJ5:66dPtBR T:-[ޭ.hifNlc;J٦3]wU&BU6gf!'t Q J-HfMuƂNb>Y̾jCAX{8@ X"eD`+dN , B)hh9R0Ԭ&oaKrye*!𢗻P>Y JNy dahYXC [}͹t 'VJag3{ۉ+O#mJ ^ +G/ύ\sĬM);2ߗ"yc4AT6~" ViY8߄١f؟()B(˥n6B-*ؘ +hϨ|ƄVV x=Ee|҆zT5Bc)Je`y5lѦf~i@[>10b8V&x~κizRs0 @"I. 7s>YYQȽ\<[Ӗ +SjQO@k$!s\=&v[/Sv(@,ܛY(XГ̘bk)\Euo˩POq6o "P0]S%tذDqNP[ Ȇ2(n OWo,>-i;$X:bm_1}E,sk/^I@#D UJWV+mweuv$PtK]''u'؋sg-GuZ'pEV}<IVe5)A&ˬ^ -9U6R8Gz`kv^ngS3RC,%7Ĥ=5IP'~b4k3c'ڑd  hXַ?ۜt9w8F9sF23dJ->_%*{\P~͢C%>젞~: )uDn +w(JUW:~-H_cv4Eť_hM+^$vVj DZ?Q +ITw8/W_/#!%|^:Old{VBw‘(ŜB;C,# QZLaf$Gr SQFvpcB_J?t}l^{><ty6:-*2 pCqSX-W&ɣ<,A2g\O7>dk5ÆD2V+iX(@-]AXc%Txb\\R3ɈiT|{t0+U8> +@Q0 +*HcNdғ P h؊+OFC({ WzD@\%6ҢG薖DX6]Tor +5Z+qM>M)#Po黤y*>_C_9`[^k`c]1zb_ŋtr%,(2q~(8NXykx7 ~/&֢f>? Dk*e<K ѐK6x3Q7eD2FdvS5ICCϢد +$Ƀ@׭NQxVA .Ȍ/jQpEh,)#P4\=*J1Nɹ +S@U5##VٔFKr(so! RU,ds k=V|׎BKP;6DmRXޢ! +vƴdHd2M̰a :|fybx~MmkT̅b4DHe059mI#$TMz!|M\p'= j;*v$ Iɽdy &pR--ɬp%"M#/||YR=Z{3c[0̖n _$M``]ܛV XN;\~pp]3qog ; ,o@eɏ3S^a+C6lr!E~jOP,ٵrUi0uSh05vX4UꃻaJ R[kWtw#n\}St|fTc3 2_M`3[h?V^mcA>,sʮ8&ѯeLBf*+Cil:(ƊrDfNj3WMM}/ږdL4 +ݧ^)aS"Xzm9VצdVՊl wQ9L6d E5_ܮ)7=x#/:Mj[/f̉jRz7Vf#᳊V3vSuO>hY jd“(f e[ +2VMG&dFjېwOV 3a`˒ydn%Q}t V;%״G/B*=X?H榩VWR=ZsPQqpWg1l"Y ӹ23=B;7.**NP.Rnj}3,MEGLyc̃)$Z:ܝ]آ(\o= +]6)CcKV۔qej̵:b}?xV̭S߀d_h <1X8NuL:5&jPzD]yFmo<7ձ1lq[\7:>wRaX:kgc֜H${s]pdacߑ5TȨ^*jmQ5LFo:\^zBiu9R_$`ڿ"]Qx97p9s>0ICnj82Qs*f]Je12z&gn[xX`> C5a;aV%ۋWA~j 'Z>s,r:n`aa!C(ezܛEyO-KvCy\ Kq< vZp5U&`$lV#-n>*hwkOu׏(3j3xgmK}FMN ;Bo^.!pfŕiL'Ȯ--V&QEvWύݩ=XoO~X HtO𓗋ּlʯ MqM~Yh1?p.c7ÛMqϞ`҆Pp:EސqFY-]ת|ŔYyyAznyk_:P!=pqޠ$]A'q%d[ބXBLj2i%W1?ٿKؑv.G5ga EoKnR8xܾ*GZб^s F5T9:ٷab3.ٱzѷ3t.Bzf}7gYzǑ 4`A\4ѻ&h^hzo5 8Vmk5̹IS1&ӻl~uw*9! @@@@@@@-NXz"Ɖ>n'&U1e2k H'-7U8 -)P\8#gn 5玠[({f3rPJN~!t)åPYr`6W@eA)(K&x"IUM9 \(Ah3`7u'PO?|gA(& ~2Ac} +aViUekRf )AE&Aط +vE~db%( ÁAh' 'eKPj?^<; `( &9,'X ~ ⫶a&Ӫ(ngpJJ\}S5@@W bώ߅y=WNBAT\&ʈQ*}~/B*273r/f`kd (s\j ^U0POjbG3-%Z㧳<>%QKTSAkDՇJ|Xq0AVȿhXfuXS6@P'\n~>= +;4VZCSbE.7L\vpIhFN/agET*Q.@4W=0A=Zq]GF}zR#F/W'h삉`.Ԅ珡ٗtS҉o}~l1Βb +OgVWbDA6 vՑ>_@Wd?OQWij11ѾhuBlM@O'U_$C"( BSlJx҂ ߚQжT=t|ﭠ#G'RU죹(7^ #a@*tbvGCi7'" PzWSuLv2J\BقD؝TM߇'S G +\+~ ;'WwlNtBIʻhL 0{nov!sh3sP> ,ZS# .?`6+EJN gL}Sqi!k +{H^;|bZ6G%d(~:#{=7J۪|kalHѥRp0.ozG)*D3NL㑕IiH}dLQiͭ 8]B+Lc3⥤f@ƟuY TxD6n&{bQݢCd^yJ1VkHbU@?]+w @.6A\.cIRj"*$=E~S>HH,_;F0ˬFֵI6nv۱L|7ўce֚K~q=ܴKX%E k#O#CK85yE(GlZ{po SD"vК˷nqrX9@G?ݸ772>/sT&'PI+u[O?h,nzU1MאxMk@>0]uTXqz1aTgLkq!zl3u:b`gi͹I˪WwNyĬuI@ta';='c'x0X*z6LHkN"4F+$/8:M9^5}"O֤nb:kdsAa1h"ZCipsYkWKgm’}-vL 'j}顛$3CzZk0r y]IQՕ@F|uwK-K^m+@lQ&Ϛ]H /Zx~XΆ+]Iy<[]cLo[fھt8~wtzޓ;ƕ@/c۽mmӘẻzunx?[M +endstream +endobj +752 0 obj +<< /BitsPerComponent 8 /ColorSpace /DeviceGray /Filter /FlateDecode /Height 512 /Interpolate true /Subtype /Image /Type /XObject /Width 512 /Length 5305 >> +stream +xߏτ/$ިLIſ1AzÏJ!`QMЂc/%3*`JE3EA<;˕}^.㜵]3sZ-'͝_kgM>71kթ > +W:6N [hgo-̎.t +lRز[nnၐk޼04dվ2? 0Tޱ9?)Ѯ, v򒐍BZ̺.BN>ӽ&`cWŽN-cn9{肅mZ'7 :菑n9} ^h]ޘ X0Т?F&̷{^0{`a>`S@1`-߱jo#߱jo#߱jo#߱jo#߱jo#߱jo#߱jo#߱jo#߱jo#߱jo#߱jo#߱jo#߱jo#߱jo#߱jo#߱jo#߱jo#߱jo#߱jo#߱j+o=|!ԝ<ҩCDW8ۣƚכ{ny޾w׋@`g4_ |Bo~aOk_m !'ŒQ-ͤx4^v/yݻhQ?Ylh4^O` DsQ~FJB!Z"p1 Ijb^CKIZwT#Dv[k8aCOQ,ຫ!V^B>7.it=J|@RLjYU4 s-{qHك54n9Nƻ{8qHki8(#p㎑25459Ne8)hs30q2(%qHAkAaF(;F +\  s 5@ p1Rg`d0QJ㎑6<'q2Pw8kc5 0`\ #my9Ne8)hs30q2(%qHAkAaF(;F +\  s 5@ p1Rg`d0QJ㎑6<'q2Pw8kc5 0`\ #my9Ne8)hs30q2(%qHAkAaF(;F +\  s 5@ p1Rg`d0QJ㎑6<'q2Pw8kc5 0`\ #my9Ne8)hs30q2(%qHAkAaF(;F +\  s 5@ p1Rg`XD/5@ ڙ߶s;ˎm4՞k^}d1YYh7EU_{)jC`KW'c*կ}!EWk7LWK.ծ^׎oLW tv]~<_ ]aǨJP6FyN=;PUڡ^rP; 43*8~܌㎑R2e1q22*#d~ocdeTr3;FJɔ0˨fw)aQ8)%S{'/q1RJ69N_F%7cLm s JnqH)8~܌㎑R2e1q22*#d~ocdeTr3;FJɔ0˨fw)aQ8)%S{'/q1RJ69N_F%7cLm s JnqH)8~܌6e0R2eٸVrgf(34&&^}V_S8`'+D׼[T/X슗Fk9w1M\슗}8V!bh슗O>EpS?V_q®7>oF@o=P5[ǂ?`,q +`=c[vzrB@,{9`qS{c%PQHe/=,q +to9 + 5N!@9G! )ҽ(($8@7VXsHJ^zX\X s`KkB +rB@,{9`qS{c%PQHe/=,q +to9 + 5N!@9G! )ҽ(($8@7VXsHJ^zX\X s`KkB +rB@,{9`qS{c%PQHe2gxUƝ:sRrYVّzQ.u/a}5vE>Ck 쨑]~cզ*D׼t%K`Fv߯f+D׼4-%*M`{7{hn/_!?nE<]qіO0֒'W>]­؎_Bp#-4*1}p"`W@7 (&`apu-4*1}p"`W@7 (&`apu-4*1}p"`W@7 (&`apu-4*1}p"`W@7 (&`apu-4*1}p"`W@7 (&`apu-4*1}p"`W@7 (&`apu-4*1}p"`W@7 (&`apu-4*1}p"`W@7 (&`apu-4*1}p"`W@7׽0>p"`v&ƷEh1#KGfߢjfV_Gπ_c<KT.XQВk?6WZ'h3s|8'h3p'=n1l0v7'U c0@y1 `rNյ@ (M8F0QirŲ (&99 W@NX6E0G9Z i&(4\] 9bلc圆k@ Q,p"`pu-4eQLssFl1 `rNյ@ (M8F0Qpw-iO1J `rNt9bٿJ?F 0Q避嵐L Q0ke_9 d +Kh#+ʽt":{^k:@np~=Z~r/`5H N8?c`?N$['x1 X}M'ȭOGO 'ޣu'V_Ӊr:F{Duh#ɽt":{^k:@np~=Z~r/`5H N8?c`?9wM_k:@Nhv{6PGpX_BF`mH&Q( MJ8~%VR d +F2b()ňZI$p>P0SAߠHXF`OzAIǷDJ"MOzeFJft#us~~x”sHE8C k?&lx,N<|,ʻDpᾼ6#G&JAWGh>ƞ=;WtHzw;@Qhkn32bo/#\owk4\c^y4FƑys]g.T8nwꃛw~k'.|t͞>K h{?`aBk|A1bZ)ɿc9Gȿc9Gȿc9Gȿc9Gȿc9Gȿc9Gȿc9Gȿc9Gȿc9Gȿc9Gȿc9Gȿc9Gȿc9Gȿc9Gȿc9Gȿc9Gȿc9Gȿc9Gȿc9Gȿc9^]~,К޸`)E,ݽx Zȑ.;φܼk᭐?.SHY:s硁~YhNt +*C+o;vо>[h\5v)Le)z-ιO_}"'>EKvr 5\^" " " " " " " " " " " " " " " " " " " " }I7 +endstream +endobj +753 0 obj +<< /BitsPerComponent 8 /ColorSpace /DeviceGray /Filter /FlateDecode /Height 512 /Interpolate true /Subtype /Image /Type /XObject /Width 512 /Length 8514 >> +stream +x]y` a ,&EpܐOEKh]VcOS6kjK.(AEDAUD";a_H sΜϛ3s;g5+ ++ ++ ++ ++ ++ +$ХK}tiӀ$F@.Yރ_?bV}5բn;g#LfۢwucyӚhFa +45`jyXV4 _C<8eeuB9jE/ +L5xষ#Jr{):#q [JN.07퓃g+Pqyx (p[oqӿ.:0el[x|LZuqhf2qމ8QBVAr; EyxJ\*ЫpS[\|]f89rN + I?v +*S]7kFq1jƍ/4( ֹ0V|A Y'Jo$m. +7b(г 90z}0!mv.6>,*=FXΧqaM0]1ן \NgsBw+5QKZC$WFii[ϰ@Lw۠_rB=6L*;e?jT/[5*CQFe "t,'O]L]S%ApjYo/;WmsMYe+w{qm`d:(#T*G+&)h'G|4Uͺ []67K'Y]7C^׫_ݲvHiڅBHReWj8ݮ[YdxV~ a^癊dAg]=!-UF6{O eM>?M>ԁf){bڒ|qpWxY=`zZ_<*=6& -RxGP̹'~0Ń`1]=kf6*Q@/&Y]?MrNMypH<,Y=Eii,˶ypvߞMR/X$g=7QXbWx=֜)eU%+zJk4uІ7JrMI'.X{#Wzᒪ47yh&HqqjܭR} +L!о{T}'Ҷy Odה㿓i c+1@'ώ\ I2% [Kt +7лit]~$]Up*uvꇌz:dM{L/N3ב?d+?ww|SNurSidm0T(ά(=L2hQK]ܻ +3fJ:k*'])MѨB3cv/1yW;;P6"&͕X?AMiYS9N f+)v"Tqcr5boݔF4Bp5HNDr8nS~y'&֛=k>zy^sx4+-)OMiMF3^3 $+h=ޤ^˽> 35ak V࿑f]GM` t|#`|ХE5 RR;$r" +G7ssL}wQr167 ϕ8kq.sip:2q&iRqϡiid987Dۣ2[ƣGeKsY 5?IN՟04(h]ePu3{]SPz6x#KMl(!gƱa{ w'Sޞ4bHA$!0nk$ӟwFGH,vpz]}탱҅kOe6k~v.i]AgBUl_oLW`N )Z/wN)2m!/:H+ '@DKɋ@RF p ߅J t%5 c$0,rM^6? e$oL|8G*#V%06rΛl))`UM W8吾D}<$䔲&Jm~5D7oA +6kPJAp{Kτukt|4 :L̠C9NcZe }"PcY*PAxr ."#l7?~չ<"sPex vߺ!W?$;2 ۃ^oI OВׯ % N1@ jP/V^m.nkEM!˄V7K֜GCHd:" +TDzC{@ʸvf(j7k<2YI?li>@& `]Hefr!& RSצzu|mA} 5I4O _7B^(cW-oR 6^Tg2)/1dB83my3d(ymS@;a="?Յ>jq g'g +4@pqw&|8vQ'@Ö9`J3v*pf5cb5|ThN|o5xYQ+pZ' L ߭8-㏉?_+Ł_8-wL>D+pZCNNZջnb_w2~RAx't` +9)# sUlo]Y,a`VV5¸iM͸jtRvNlrN.+s.¶w|oPi/,dNba[u&gAt95*%H\lFL'.d+l6=ӆsFDvT?nhlTo>R%Z҅a[3IC58CSW-1i{o$5Jzw ǭvKMd/KXF˭8ejzv8S+|ŧhNy[83i@+eԻb8Ze!❫weHAXNl)4!jH A _zR>oh ĕ~^K ֹNWF!a:*7?>Q g/O)AԵ/ҀD tL}z!HG(tD2 +V<\@h5 +̐4Q3 }JZSr,v</uwhʰݵ1z+!ZDZ[U42;;e/1(/5 O7g. D2qi(kGpyD4#۱JAiy+uPJƆe" fqQW.%1埳*@T뷜Y:>Iқ\wR `?c< -m6ԫk7)Tw4]$ k-MA^KceP˽A1h;1nojǃpAw@5ƧAZ/ǵ='t _I!(֌?( +!l3Qm_|"DڻQAˬ J`mh'ҜRcB-'ҝ:=6~M6"x-BvXM^+V!SfU6SMJ[bTK xqȃ^> Ն55 +>%@,[/E_T;mhU[; "%:-]^,끄ƨpRE!; ͬ 9\.zs8& +3B+.'8 `@ir9N1ޣE%`8xKTNC{=6`t F@MϺL7nL<'k`gh h 2R&zHఽ*Cu\m2Zmz?SCnuG{ .-,AWStx ga +g<Ϝ5L9';ҝ<z9#u ibYsHGL96O6?謜F" q DF8g8@.uVƅ菹-oBpbJFe[Қq‹)R=V)M2S+@L{ ( )<2):ux8ݽ3r^G0J\]1iΐD)WH#OڦO4V=R~b1{{)QHh YsfskQ;yG3C:&9wuD#%OJHІe:=DHFk/p􇲾eUH<ȆBý +~;H{ב ځYem;i-g덗gGfA7[}%@a$nVC:O{B`e8qTl<7=7Fh:fۈsMz| +G9leIL>npUj6vHVDBd޶K>kl _ '͋ ZxC);C+B'e+&P]6Q` YoX^1YNs8)㘞:o5>+Mh Vh e~_(A\*X8[Rq$%Wo{GU+Wo,*:v V)id=P?- 8DT ]ҧZ_n'tN J9+dr@yINq90l}#I\1b@tʃIdSPL4 +O-?x?F6GQ' kXQc? +4:UuAc 7HO>4U~=:6 + ++ ++ ++ ++ ++ +M\~- +endstream +endobj +754 0 obj +<< /BitsPerComponent 8 /ColorSpace /DeviceGray /Filter /FlateDecode /Height 512 /Interpolate true /Subtype /Image /Type /XObject /Width 512 /Length 6368 >> +stream +x}U8Nj j+a&"Ԍ#/($J 釤4,%@Q!0DPǗzH%H89ssfY{53y~sff}3Ws}[+GUwH'tDru/5 $ϑ'. | Д{'>(á&>F$@.[]G>2 v*ϨR~h jN3@j[f]H)`pm!WMaT 0'a((׮rAv$vUm0ǡ[DU1z_u"]W?Y_?2}~eI/>}v3 Bv 0K⿭5sf/5}VUҗ5i />Ftdǿ_A7⟤_90S#+1X JÑ? +KߏE"~-9c9lsMo4?ϟM{65Qer/zMyJB.$& pw͟qKƗ9 kv  +P/'Wr5{?:n9* A.;T#U, a.X#AfiJ3$t:..QFdfJ<"3wpm>}e_?&o|#wiV?h6q@>aB<"O?)83_ I]~w[$ϴd[Nniոwl`"C13;v,gP3lLٳW@EQ.%@ՇG8+?Wc/=g\s#`̃uߑx]N?tNpǂd)O{Uź1wO$V?7|gWLNwvY㇝[?3@t G9?Q̪i<tga;h&y-柺\?r)ޞ,jK-DU'E&RJ?GR\\| 衜k7,e•ٗt/o -"X.ܮ/m~tr}o wіQeOIt= +ɔv hL٨xe*n +];隷 OC,y1~4fu'|u^~t/5-ζMoJ2^okjoKE{Y3VIEѭDrn]dSbK8Z{":V7,Qh/P|5I-K&D u +aOr5E2% -Z`HxD'+LVJZN.~*J䪾.XqCSkJ(asەoՋ{K1<=\b \̯ VvB/%E/4vEY:=ݷzJTnܲV3 +?*-{Mzf{K">C;(t + f-hg7`l 퍼-wYR~Ys#џoloá\F~ìe;t[ ndӰ-v2y ?&9n>H+~rsG~ +N707mSS -uCw +`߸NuN2,ԩ)\ƃ:!;xoR:tI2;:!,Q7% +QmT=dtOT[?Uz_'*Y=]VO.j+JVODS% | +z>QmT=dtOT[?Uz_'*Y=]VO.j+۟z׀Q/jBH)_9\{`ʿ}(LObǿ0swHDU'?TrH'T- _q>|KJ)~&OU?O޳D 2voɔs4 +W䡖)o"5oO4}^ž1ꛛr~/yk늉QI%L7}dZ~6Slmd"dETi:M6`5eQD?Q_((XMYifO,4h3' +VSkE) 5" ՔFmDjM6`5eQD?Q_((XMYifO,4h3' +VSkE) 5" ՔFmDjM6`5eQD1ڔy9ɖiĞ?"eWo"~( }!_)Gɿ7g\s`,p`+wj-a ?clœwj-(ܲ3[PByU2S]BaO?MfL{_sf\{:pha^f.;k `Ze?}7f $%CpI.K0+?3\ aVf%/¬̀Kr_2Y䂿d"3.fEg\ !̊ $%CpI.K0+?3\ aVf%/¬̀Kr_2Y䂿d"3.fEg\ !̊ $%CpI.K0+?3\ aVf%/¬̀Kr_2Y䂿d"3.fEg\ !̊ $%CPQ3 +M(,LV4A@"HWb*^Ϫdku6`J\V3.S-*c'?R+mU7C xB- վXdȁAkl2?Æ:;poPǺǀ Xw0ႿaCf8\7lc݁c u;w p߰u3.6Ա1`ÍG<Wy0Lxwqo} P}T3V_J[lUNV+3ww>Q+?1>@,Wb*^ _ax?* +SUB+LWugwߞ. .P#օJb{޺3@^o[z(ًyB%{1=o]]d/F = ugwߞ. .P#օJb{޺3@^o[z(ًyB%{1=o]]d/F = ugwߞ. .P#օJb{޺3@^o[z(ًyB%{1G]s~#D/ocݘi~2jEA1=@5ZLբUkĘv{#c5ÿFKV'vQhq4 +9P䱎&F+ _&Fh:J2re_#]s帉vcd\B U+ŽIv_Q?۲NC5rO~rooҦ_!G_LўB&Zߏ2ſի_zpi^9͐5m7ߓZ^|}C};毊mw_+*)*PԵā/['='%±nցMNtCyLWe17Rwh+f{ϼ7߻3I^5 $@2AϼwXomsz-coڀyWZl1>>V{m`u6&)h+oM#3K3|3(5<=JeKqWw@{Wϟ6A,-[ +g64F0'8NJc;u!5 mEK} 3աƊ^.MvV uVfC:4{}ûmzZ s)ʤR7ުv~O FT?2Dj{ӭl緅_g5~&~`ՑD;Y4%S"7%UQ;͚ގv>f36~46>@}^؊-13݌oϖ${k\hc V;e5_X;,ų_Û.WU. +3 +џ(ScnqK +o7k-%ZRTe`:OS=@ȥf9wT /Y|`9k:[:zGU^a?>IqtT=w߮wI'

> +stream +xw`U'$^ҤO"O EE#"ntQAY*(.JQVEQ@~X4QW E, BBW̛S;sμy3jhο0uv [s +ssN۳a"I?ZKx<-,Mq_s'Ey@w&ݓl@uY7GdweK8Z`m:Gq{n֧ZuGlwO[Ѭz7~ܝCZOɱ%DZE_y-ImhJ?>nPHteSɀ96Dn &xW]YcݶCU?pjXx'W^@\;Õ +С4yG[\Z7Ȏ6K-8@X[u\;R_W:U~uWz7ܭ皋@4 +e`wH~ՒFi2whAwo+jw-djoo~aCqL)gjt:f̐DL ?|.` nÙӍYhԲN~4q[U&y>@čUY毶Z]ͮǬW~a~k#g=v掆6Ү张Î0WA&|'z(7 n-]˷e65ЁÊW{ + +@=gZ5}˗ o;-EWa\"_ +U|I x_32qs*{JcL&;뉁kL `pe)ۅoL1T쬬g͚>qđ#G>7\^ʗ(ػ>clȋĭ+L-;p`M<Թnr!&fv9 ?ΏY`ׁf YvwpMz*l +%c8U%-936){1|Z"sޒ'ڸHl@keO\6Epkh =dI@g|/z=вЗ-}sC b$vxȎPC@^ϙ,%Dt`':ӏMNth6ѕ!;0-Xf_t8F@ 9SB^v5A;gQ!,|ь# Y>OאP]Ow + 'E€b-4Ά'[$#^Lp]Ɂǡ?<4".QpFK6.ǁ-P?F㨒(lBYX{fh3@a,Úk@36ђρcm@͒ с5q׈$_L'~02CM5 +p$ @QБQu`23.h'%K/A$xX`7`Ɇ@7'@*jd0t;\,G1;r%A)d9ہ s˕| Yv(hUr%WBN۰Hl^eHt 3H@n enZ ˑ@˚̰A?HGB:W^HH^ e'^V o$1YXQ$Dt¿ PEmEB%$$(Fe$$( 2 +T$O $A(TR8/?0 +/q"ߐDetĿTRIEBdtFY gLyE. gLR%0I E* (4IL>e +(>T$p*|AILk 4IH>*@sʿ @dsTIDWQ0rٴJ"|E$"pCٴJ"|/%&V;MC%@/ˀO?(,m6VI$"+%@&Zi0ksuIDP,,(S(.:C p`k DIHVtȡUr;"|eÁ K6(+9\JBG=zIg v%A6aKx A47!]"t|D eA ǚ6ϛ)Kx%:i GGBVb:Cu@ů %To+SlTtJ7LYIk_?LW:B{{rW/_ˎ$&ۊ'ƃƿ:跦G6N>W@_f:V߮u!b-k f%t7@?¿1J 5.V GO|ram5mŎﰘ2Sο!ud-},OYi1cݿ/ǿw0I85OWF4ny&Y+; +iBr_ƿꨀ*`ծ0㬦dr`e_M\5]_=~ j&AY \&ߔ M؄}]>Sz_g: he2rgK}i9.`Eʕ^pƄ\UzpP+DgӬUIQO\ ڸ쏿CH+:? ns%;Y%RJjr8?_}2q<h(_ :@Ƣ:puÔqp֋",'/.G]Z-J̺xzQ,* 4.,2BH%%kb-E=nX?S U!g8_ +y %'+3PCH+AY ڲnQJ~!Qj0ϿfeZv$e?nQBR'6?P/B;2!9'/.‰7NTC +%,sܻHB9QJZWEj\Cy0J\!.(V ?-fYq +Q1=GE"iJ`&Svjeis]N{M/Au:W_QzG_J̦,SVV:JE[/JV{әÿDuZֳ~"J*MۙR2i[pFR_v< Z7rVJH:? j$4W s+$.B@L~X-IǒZ%/ILsَM{዆M7RoM̆^1p>^(W56{~{O۱ e^VEsq bwklV/=vznh~ި6D+u~t0ưȑu1t3c>5PވTCsF١Pf1 |2{bUQOtptUVc+~|Wŧo-3b_v`:x'\׻t.9BӺuKܱy\hCZc%]_vpAI!4y۷⍮6;gukL{M~L:Md/ahAƯSg;]JJ\⃫Nk2Q| [j${EC_ ѡ +ܢ2gwO2븪 +m|WJT]V9c .wy +>[~CI '(Cm{Dz˺ơ(_w`§Xś^۴B]3*Qt+4{-g]Hᗂaf;lMãzč@oQj[آt⍇ +uCөy}Ű$Gxb;ǥ͝L:S 4R敿UlJQ2جTشlϋ: t<72rv˜dS}Ѥ7]>P]_\yȴoXOo{r<y/R/}~#g-=gⰌ.i,sd6uOժk߁O{+y0gÊM;>]j~Tn #xa]HURwpczzSzw᪓q=P͔`pLSBu`&[Q+`fx;4cNI%^LyK|qP90{q_-9P@%[3 FȦ<q@Q`PJ# SF8p6.(P$;@8l̲ N cR}XnXÁǨ/7E% [ل]Ů `g{gsτo4$` iEHW }-ғ߁5`ԧW-2 (})肸7Lib].O +tP,WIsphj>䫿W `*~MK~(zF{ɕ3Ar7.ms +oB8Mm_ +eʲ Jeښ`||wO6_=E ЁĿŜ0Rr=wWr 2fjw@5g 3y_Q!}TU`/|k~wV~R|篜pS >*oslYdeLd?2 +endstream +endobj +756 0 obj +<< /Filter /FlateDecode /Length1 1269 /Length2 1151 /Length3 0 /Length 1867 >> +stream +xڥS TVTVÊΠ")cC1ABG 1L+CD\^(WAD_ȑ3.IPXNr̀bDpGs*#AH~RGg0L%ł9XepB3i4"2bA.f$"B _BJBH+sƣR`P."QhR21`C{ v@da)w_andӽ9Q{:ngowiNNxZZni `  堿Hރoo0eFżqmUC!%xLoF݋-Z$cN6\X 7N*-(y~?D-H>вy6GL BQ|Q+|HǷaR7>7:nX>vxv1ĶRE-huuMt?6,KsVj|Vvz KO}YTL _ٻvEQyUp^,B/6k%/C|rw՚]_T`_ѠDcKDv{V3,Y klW4Wឺ?^4mx|m>ipDګ|_:iU飆ʒj.Cye76xAYL%-\GeGØθYͤN&x%G ++=8VCkGzW%u:Ӊ}wi6 +nawX/CթLuv}m*p6kt.Dl1IS k_TdDu4]<놁wJ ~n亊o6?Вvn~Cɟ<3*>~L?o²ckw*2G'MM, وqŃ}@L֘)ne`1+E~hRͦ|PvgʳW}۷=o/\5qݟlc=}*63 GF^az F[e܈*F$.ԁҳ*&R+#Y̌Z~wDeO՗?y_H67H.)d#9OMCec뛻gGSM-A9;79{~Ry#,9Pf+ʸGukS+mE۸c,cL1&:}>FwTo2:jaыf[WO7N7M#-yPMS.}Fs?@'u@gE"ta-[YUI5gTO^`hfwcA|AB-VbXg|w +endstream +endobj +757 0 obj +<< /Filter /FlateDecode /Length1 2227 /Length2 13588 /Length3 0 /Length 14726 >> +stream +xڝvT]%NBp(pBr6px,{I#V⸁]+&d RSU2R2h&)ᡝLk*t34h.i. هi瘗, oK!n`pڭ׍I lI^F3Z{׉nK}hgl߈"zywÙso};zGPjd5p*6?7_k[Iu߃ZC~縿rt~@f0`:4I2N` +F!^Nך*z.LJu =*~,VEe&V\gZSC^#E\ΗUt횢LVpL J`>COHeNi,_EsTLxPqshR1$Op5%7lO]5ΐ |bknp l|pj<Þd;$1 tyFb)s.l<>:z* f4B+}aOo(b֟@",,nIIR$c5'`V8**i&2%> F?0uɨI R8G 9U`M*r:ޢT[}.9ӒM;-$rB[KYEZkŷj4Kl u=p?50̒ /-jWk7N RT겇%y7D?h'+R%[>Ԕ8&oE~Udŋaڟ_:xmt6IEWE&ɷY!f,'.Z1 cنܥnϮ6mT]ƗE-,;"L{X'kS| +Z G*+9 +?-Y5m{댉g8;2| Q%O]H+IwTd;L5=`۾z+5RV;PݚNlJQ]&rbiS^ 3<dѳfcH2 ͗wKt̮';,0"ʻW`%|dGW4Ҭd[Z͛J9$ )(?KXM|4I: =uQ 2_&[c-R{_dE47e U1m Ы_/s2u;:<`H !iWGm+K_ض5} ۘZ~rtϑ8ͻk=Fi.4}GۍGfEIi>~xibŮ$^HbnXFA ó:T616s~da:QuN8UIl_6>Q[STüOa3(QA@\ۏJVB.Φ;W a=7/"hN"3a2F/zmnv$Nhn<̄D`:\-7.)QI9WqOҤS|Q&u4wg +9$Tޤ g{^I~/[@%CsLZN!x;`$؜MFQzxgψgFG:Uu!Fy`-^vit.8z +ddgnRӲsttv?5t:%+U&z(ZF)dZώ׆)浖Y¯[1>" 8QU^ݺʀiت7)rRzP d'ۊݑʪpnav-i%N<.AoW-T $.J :\a:l!X{^#"#㲟 ׄA&T| GȰE^DGD+j0m,dJȕ'#VK{hjfU\:?Ү8PyF#Ng234X'fPR(E.~1yyGeVhuS[_DF-1=+If6Z ҪcrV8 +_<}>_|!ߊ!ǔgYV5-tJ¦B ӑ:y/v=$!hL$-!|& +@%]<2!ɉ͊+ۨCumQ5,mJ|déb% 'Nzg\4!Ըla+IλS{L܏1`¦byq=Į:w^TP7(%4ͪՅPuGx'h`h_oE{i r9 K?O*=c|v ?/Sl/ K9s_!qɢ~L0MțwM+5v$\"xxɰDgGtG\ €AZ[G2]B96 Udˎ.G + >ƾSv3A#N1=Ucvy>.)vIz!P9n8x%f{QH7BapyJ[| ʜ5`Qw~vFk]PHF.T,odK a}@kE'Qeskn')$ +?m4Xur9}Y B`426ym9Q6SȈ_M wBբ~Xu2evւ:;H*:6W"G~@8s\A7Mim +B3Ǒ9X]4ɾ<x.U * SdQ2.04[%s*owif 9c4$ñ&j'`hȰ!("Egwi,dW.Mz"]T3ς +H2}e oͷOkk_#J]P5;wA R6ڡ\o7hG`܅N(k0gt5tt/R=u 2EZ"z6mzAw^.?ˑsc%uSk/&/kSeXe($a cݎ0 'pRٲ.# ;;A ^Ͳ\ {K +١i"7+E80աgnN͊3+Rw?$G&t0n 9_t^\I3_pZW#4@q"bE6),UkU׹2_XT9ˆG.ڥu3{cӡr_L$zHKJU׻뀘߰>浳,wK+F 2YՁUq⼟) ofQx-ёY-X ޟuVX~U²^|"E"#qቋf'xo. Q#8֟\eK4\ćT|F(S#cV>Pø9#/ƍb+d%aXZ=CP?1%ƀt< NA$ڄ>7OL _֐vUS&e]6;]Pn_Ns[̺<i~㦊'ѡjE [_Wv,-]%%#&o3;^OT>IaWZ8!9c̄1XK \UBCc%;N!$0i{Q*^0g7,~tdig6@L2_#D[+SÔ٥v1}8$TXU}]a˯ UyTyL glW׺g9v7RюM6z=<$DRW_@C17 +-O(("X[ ]voxx)ң/y7W%\V{~>{뮧U +6xdP)V!Eٜ(."?_tN59<~MR˼DXj=KV:VZ>#zPXD'a{S@6C{ûuOp6KR _>sc^o| ]׮*@`oӜ폩r&-n+cA:ӨzMMM,f[B:CAgeaM%,l{q-ok>+'8&3ԿTb6- ണX +\NxHS:Q&@y1qf;7ì^p`H%A dp_|i܄lmQQ8ŏN}"<>%52^fs ;vz՟2(RTқPF;*`'v6 LF4oXk~, a;'s!tecŕxC95[FU*ּK ܏l7U]^=s-7w’[X K$sYDþ>GtT0eУt bnA/t.E X:Qs@$R"fh7,#w4bT{\ x5cP + Y|2amY4P-a`d6|/C15q:K/F'ۨ3A0}W-(i}5LY-{CC%{4l&aNL9P1QMI/G".<+mzW]80(&ߝɤ_uU'>H7F\NBf*B}xovַ1xCC͸S}HUXϺH[ǍX,9ƭygutob$r]NIGJ%ɗSf7-sP"$tԩ"z}?7{jxV&f\#4lj`85?{ +]B2]HCf+a 9hsZ 5v=}u@!V =j I#b7jyK + D ېZ?VV/8#I&=-+TfKt8&C/qҳKW*h.ZU.(q?Fçt掍{vw/ G[Q|SN~d O)P +LU$/Emo_/TK9zU2yGJZsj +)B'QKj49%IT2au\h,8\{t`VU6hpeSw#sboeqDj Lv= +̆>m7834UQ1 5IَJ3'A}.s睫VG+Mc )sR?]:K +0k̂S|3 yl(NZ3ItJ +9~ѝe + olT}CH`wn}u$^UA1&&6Id拉lE,gB CX#F%Pd?cOYE-5x( lk~j{)_dR s5;GT'ܴErU5_CzvQ6-W-*ƈ'lwoM ~,n|KZ Ԉ JUS{GxwZi;=J_t59c?OBP'sNgt~=\m:%AhreDzO|-"jY9M#)hm gv 6vE +A,JhA苭GrѐyMKx0Wjf+h{=9XJy[gһԴ@ -ӭLw_njRpɩ@,GpV#Mpt9Փ[e鶹jDiVxTBhy*Q"Y)?(%Ʋ@:f_ȥmۨ,N$-yza80[ ɚ;"cY)ܠ6-9XF2",$ 3%^h! +<%\ܭ.e6w6?HU TEy{yU6#!>)؁͹NFaۂẖDU) >ʼ<"~~7hJ2;%GFR7G`جR]|t6pA&>s"EUCK;8VNĬޝ}r械Z#x(%)-.J$}2j_{<.4Jɻ1 谈b9.h(|8N$WS.Dt؅zn1:Ir]!KT|E.r9ryl*t㓖'cT8n]T?DiC\yl!2ted-s+m:N_ןIhQ >J{;~%#eѦ.&@rIVrNeW:&E Ԏ +ZBD{|X߿naf>ZӪ!AUPc}Mg*ͺ]: +$bhSـЮXEF&Y:\\Ib>/wgN ];]3R$*0:ŃOFSsGN+PWWGTѰe>}&xi‰x BQf4g[0_T:凴`2FϝRi{PN*}ާAq?$0"lFo 2`g +qF;u:ߧ +N~+P-Hnq;\u\aJRW,y-f&$)s]7́h%pn=ז/rE=4DYւuL[ %"S &Q)ʚ7*[7lku~`_sEeA\f7+z<;wƷi{S?vod^?v#Ѫ U~/ mUTƇ'SNETxw75K `ؓ%bs$.@;toХ24p7H$^d.4sohc5qGHGP.(Y + jE6}ϯwE3GgKuU ԩMa՝(^w es&-<;ń3cXGqxN+NTұ:0hܧ#c-s韇ز53IૺZc&ދpckMew}t>[/^= +*j)*Q, +erU[Z7tFe{Gw0+)e/$ڥ}(a?!\o'F؄ UrGtR6 ;C7`{GL9 _vb-ShsWftҍheM6ĩ͊WؖDUJ޻fܜ/H2r}_*Dkt]3Hv- ++R{M^ݜJ7сJDNrkWfrü]mk}` + ? (w> y.,' +l jYd +:oaZ-3!0 +IvNҀѥ0#'J~D,j%j ,:)p\];iK/>󅬷'̬f]'kĢsh4by3(CGXm8kUAcNP3lB9%609/KWAFƲIrV-*'NN*賈!j,y "c 4&S>i (vNLgP \p(:Y>PHa.tw rLƍ|` CWIC&"AVEb3I9,Ry2"46 TWaJMJҤ:"{%?(`bp'Xd".ԌBēkL"0F 펦xn54]d\e)0/Jr'5~sLKd?f%p%@ Ώ '۫u h"pc]sa^^ +dž0xKE /_~F +wgþ6qDFoDMmg'ڸKqjMzpU:I&q1%ܨN`ε\0jx2:d;*]8_)jxN.f_b/㖅צW Ҡ `v>ڽ+byXDϖދP`/| 0إ5u>.+gj+C, }`{Uf'ݻ(p`OvxALnT۸(}1,X'xB}Xk7x{Sߙȍ]wg7Ļ3mMMB%lteS4+2:hïF!3vO XS{c?D ja6z/hx]rvHty$zF`{jO][S>e "E[JJ,ٯ'X)Ny"2M,]# +H/-4X!&8GHQiyBѺW n\bs(S[95ialuA,t@S- +Um)^X5L@M@<$3_XX(QE& +ƨ <4*IWhч^'V!Ow>0ׅϲ_ lǪ8dvCQ@G.xr< ( 3<,P2ӜdRM?w!r/@=ܨ*(࿉29).iACCY@>iS+GЌ5J}}~F40()yش|8Vv;-^ "=MMfSSÔ$L ?6[ K$Ԡ56p> +endobj +760 0 obj +<< /Ascent 953 /CapHeight 953 /CharSet () /Descent -285 /Flags 4 /FontBBox [ -174 -285 1001 953 ] /FontFile 848 0 R /FontName /UGSFAT+NimbusSanL-Regu /ItalicAngle 0 /StemV 85 /Type /FontDescriptor >> +endobj +761 0 obj +[ 0 332 500 500 166 332 555 221 332 332 0 332 583 0 610 500 332 277 0 0 0 0 0 0 0 0 0 0 0 0 332 190 277 277 354 555 555 888 666 221 332 332 388 583 277 332 277 277 555 555 555 555 555 555 555 555 555 555 277 277 583 583 583 555 1014 666 666 721 721 666 610 777 721 277 500 666 555 832 721 777 666 777 721 666 610 721 666 943 666 666 610 277 277 277 468 555 221 555 555 500 555 555 277 555 555 221 221 500 221 832 555 555 555 555 332 500 277 555 500 721 500 500 500 333 259 333 583 0 0 0 221 555 332 1000 555 555 332 1000 666 332 1000 0 0 0 0 0 0 332 332 350 555 1000 332 1000 500 332 943 0 0 666 0 332 555 555 555 555 259 555 332 737 369 555 583 332 737 332 399 583 332 332 332 555 536 277 332 332 364 555 833 833 833 610 666 666 666 666 666 666 1000 721 666 666 666 666 277 277 277 277 721 721 777 777 777 777 777 583 777 721 721 721 721 666 666 610 555 555 555 555 555 555 888 500 555 555 555 555 277 277 277 277 555 555 555 555 555 555 555 583 610 555 555 555 555 500 555 500 ] +endobj +762 0 obj +/PXVYKT+CMSY10 +endobj +763 0 obj +<< /Differences [ 0 /minus /periodcentered /multiply /asteriskmath /divide /diamondmath /plusminus /minusplus /circleplus /circleminus /circlemultiply /circledivide /circledot /circlecopyrt /openbullet /bullet /equivasymptotic /equivalence /reflexsubset /reflexsuperset /lessequal /greaterequal /precedesequal /followsequal /similar /approxequal /propersubset /propersuperset /lessmuch /greatermuch /precedes /follows /arrowleft /arrowright /arrowup /arrowdown /arrowboth /arrownortheast /arrowsoutheast /similarequal /arrowdblleft /arrowdblright /arrowdblup /arrowdbldown /arrowdblboth /arrownorthwest /arrowsouthwest /proportional /prime /infinity /element /owner /triangle /triangleinv /negationslash /mapsto /universal /existential /logicalnot /emptyset /Rfractur /Ifractur /latticetop /perpendicular /aleph /A /B /C /D /E /F /G /H /I /J /K /L /M /N /O /P /Q /R /S /T /U /V /W /X /Y /Z /union /intersection /unionmulti /logicaland /logicalor /turnstileleft /turnstileright /floorleft /floorright /ceilingleft /ceilingright /braceleft /braceright /angbracketleft /angbracketright /bar /bardbl /arrowbothv /arrowdblbothv /backslash /wreathproduct /radical /coproduct /nabla /integral /unionsq /intersectionsq /subsetsqequal /supersetsqequal /section /dagger /daggerdbl /paragraph /club /diamond /heart /spade /arrowleft 160 /space /minus /periodcentered /multiply /asteriskmath /divide /diamondmath /plusminus /minusplus /circleplus /circleminus 173 /circlemultiply /circledivide /circledot /circlecopyrt /openbullet /bullet /equivasymptotic /equivalence /reflexsubset /reflexsuperset /lessequal /greaterequal /precedesequal /followsequal /similar /approxequal /propersubset /propersuperset /lessmuch /greatermuch /precedes /follows /arrowleft /spade ] /Type /Encoding >> +endobj +764 0 obj +<< /Ascent 775 /CapHeight 775 /CharSet () /Descent -960 /Flags 4 /FontBBox [ -29 -960 1116 775 ] /FontFile 849 0 R /FontName /PXVYKT+CMSY10 /ItalicAngle -14 /StemV 40 /Type /FontDescriptor >> +endobj +765 0 obj +[ 777 277 777 500 777 500 777 777 777 777 777 777 777 1000 500 500 777 777 777 777 777 777 777 777 777 777 777 777 1000 1000 777 777 1000 1000 500 500 1000 1000 1000 777 1000 1000 611 611 1000 1000 1000 777 274 1000 666 666 888 888 0 0 555 555 666 500 722 722 777 777 611 798 656 526 771 527 718 594 844 544 677 761 689 1200 820 796 695 816 847 605 544 625 612 987 713 668 724 666 666 666 666 666 611 611 444 444 444 444 500 500 388 388 277 500 500 611 500 277 833 750 833 416 666 666 777 777 444 444 444 611 777 777 777 777 ] +endobj +766 0 obj +/YHUNPG+CMR10 +endobj +767 0 obj +<< /Differences [ 0 /Gamma /Delta /Theta /Lambda /Xi /Pi /Sigma /Upsilon /Phi /Psi /Omega /ff /fi /fl /ffi /ffl /dotlessi /dotlessj /grave /acute /caron /breve /macron /ring /cedilla /germandbls /ae /oe /oslash /AE /OE /Oslash /suppress /exclam /quotedblright /numbersign /dollar /percent /ampersand /quoteright /parenleft /parenright /asterisk /plus /comma /hyphen /period /slash /zero /one /two /three /four /five /six /seven /eight /nine /colon /semicolon /exclamdown /equal /questiondown /question /at /A /B /C /D /E /F /G /H /I /J /K /L /M /N /O /P /Q /R /S /T /U /V /W /X /Y /Z /bracketleft /quotedblleft /bracketright /circumflex /dotaccent /quoteleft /a /b /c /d /e /f /g /h /i /j /k /l /m /n /o /p /q /r /s /t /u /v /w /x /y /z /endash /emdash /hungarumlaut /tilde /dieresis /suppress 160 /space /Gamma /Delta /Theta /Lambda /Xi /Pi /Sigma /Upsilon /Phi /Psi /sfthyphen /nbspace /Omega /ff /fi /fl /ffi /ffl /dotlessi /dotlessj /grave /acute /caron /breve /macron /ring /cedilla /germandbls /ae /oe /oslash /AE /OE /Oslash /suppress /dieresis ] /Type /Encoding >> +endobj +768 0 obj +<< /Ascent 750 /CapHeight 750 /CharSet () /Descent -250 /Flags 4 /FontBBox [ -40 -250 1009 750 ] /FontFile 850 0 R /FontName /YHUNPG+CMR10 /ItalicAngle 0 /StemV 69 /Type /FontDescriptor >> +endobj +769 0 obj +[ 625 833 777 694 666 750 722 777 722 777 722 583 555 555 833 833 277 305 500 500 500 500 500 750 444 500 722 777 500 902 1013 777 277 277 500 833 500 833 777 277 388 388 500 777 277 333 277 500 500 500 500 500 500 500 500 500 500 500 277 277 277 777 472 472 777 750 708 722 763 680 652 784 750 361 513 777 625 916 750 777 680 777 736 555 722 750 750 1027 750 750 611 277 500 277 500 277 277 500 555 444 555 444 305 500 555 277 305 527 277 833 555 500 555 527 391 394 388 555 527 722 527 527 444 500 1000 500 500 500 ] +endobj +770 0 obj +/NZOHYK+CMSY7 +endobj +771 0 obj +<< /Differences [ 0 /minus /periodcentered /multiply /asteriskmath /divide /diamondmath /plusminus /minusplus /circleplus /circleminus /circlemultiply /circledivide /circledot /circlecopyrt /openbullet /bullet /equivasymptotic /equivalence /reflexsubset /reflexsuperset /lessequal /greaterequal /precedesequal /followsequal /similar /approxequal /propersubset /propersuperset /lessmuch /greatermuch /precedes /follows /arrowleft /arrowright /arrowup /arrowdown /arrowboth /arrownortheast /arrowsoutheast /similarequal /arrowdblleft /arrowdblright /arrowdblup /arrowdbldown /arrowdblboth /arrownorthwest /arrowsouthwest /proportional /prime /infinity /element /owner /triangle /triangleinv /negationslash /mapsto /universal /existential /logicalnot /emptyset /Rfractur /Ifractur /latticetop /perpendicular /aleph /A /B /C /D /E /F /G /H /I /J /K /L /M /N /O /P /Q /R /S /T /U /V /W /X /Y /Z /union /intersection /unionmulti /logicaland /logicalor /turnstileleft /turnstileright /floorleft /floorright /ceilingleft /ceilingright /braceleft /braceright /angbracketleft /angbracketright /bar /bardbl /arrowbothv /arrowdblbothv /backslash /wreathproduct /radical /coproduct /nabla /integral /unionsq /intersectionsq /subsetsqequal /supersetsqequal /section /dagger /daggerdbl /paragraph /club /diamond /heart /spade /arrowleft 160 /space /minus /periodcentered /multiply /asteriskmath /divide /diamondmath /plusminus /minusplus /circleplus /circleminus 173 /circlemultiply /circledivide /circledot /circlecopyrt /openbullet /bullet /equivasymptotic /equivalence /reflexsubset /reflexsuperset /lessequal /greaterequal /precedesequal /followsequal /similar /approxequal /propersubset /propersuperset /lessmuch /greatermuch /precedes /follows /arrowleft /spade ] /Type /Encoding >> +endobj +772 0 obj +<< /Ascent 782 /CapHeight 782 /CharSet () /Descent -951 /Flags 4 /FontBBox [ -15 -951 1251 782 ] /FontFile 851 0 R /FontName /NZOHYK+CMSY7 /ItalicAngle -14 /StemV 49 /Type /FontDescriptor >> +endobj +773 0 obj +[ 892 339 892 585 892 585 892 892 892 892 892 892 892 1138 585 585 892 892 892 892 892 892 892 892 892 892 892 892 1138 1138 892 892 1138 1138 585 585 1138 1138 1138 892 1138 1138 708 708 1138 1138 1138 892 329 1138 769 769 1015 1015 0 0 646 646 769 585 831 831 892 892 708 917 753 620 889 616 818 688 978 646 782 871 791 1342 935 905 809 935 981 702 647 717 719 1135 818 764 823 769 769 769 769 769 708 708 523 523 523 523 585 585 462 462 339 585 585 708 585 339 938 859 954 493 769 769 892 892 523 523 523 708 892 892 892 892 ] +endobj +774 0 obj +/ASBNUF+CMR7 +endobj +775 0 obj +<< /Differences [ 0 /Gamma /Delta /Theta /Lambda /Xi /Pi /Sigma /Upsilon /Phi /Psi /Omega /ff /fi /fl /ffi /ffl /dotlessi /dotlessj /grave /acute /caron /breve /macron /ring /cedilla /germandbls /ae /oe /oslash /AE /OE /Oslash /suppress /exclam /quotedblright /numbersign /dollar /percent /ampersand /quoteright /parenleft /parenright /asterisk /plus /comma /hyphen /period /slash /zero /one /two /three /four /five /six /seven /eight /nine /colon /semicolon /exclamdown /equal /questiondown /question /at /A /B /C /D /E /F /G /H /I /J /K /L /M /N /O /P /Q /R /S /T /U /V /W /X /Y /Z /bracketleft /quotedblleft /bracketright /circumflex /dotaccent /quoteleft /a /b /c /d /e /f /g /h /i /j /k /l /m /n /o /p /q /r /s /t /u /v /w /x /y /z /endash /emdash /hungarumlaut /tilde /dieresis /suppress 160 /space /Gamma /Delta /Theta /Lambda /Xi /Pi /Sigma /Upsilon /Phi /Psi /sfthyphen /nbspace /Omega /ff /fi /fl /ffi /ffl /dotlessi /dotlessj /grave /acute /caron /breve /macron /ring /cedilla /germandbls /ae /oe /oslash /AE /OE /Oslash /suppress /dieresis ] /Type /Encoding >> +endobj +776 0 obj +<< /Ascent 750 /CapHeight 750 /CharSet () /Descent -250 /Flags 4 /FontBBox [ -27 -250 1122 750 ] /FontFile 852 0 R /FontName /ASBNUF+CMR7 /ItalicAngle 0 /StemV 79 /Type /FontDescriptor >> +endobj +777 0 obj +[ 706 938 876 781 753 843 815 876 815 876 815 677 646 646 970 970 323 354 569 569 569 569 569 843 507 569 815 876 569 1013 1136 876 323 323 569 938 569 938 876 323 446 446 569 876 323 384 323 569 569 569 569 569 569 569 569 569 569 569 323 323 323 876 538 538 876 843 798 815 860 767 737 883 843 412 583 874 706 1027 843 876 767 876 829 630 815 843 843 1150 843 843 692 323 569 323 569 323 323 569 630 507 630 507 354 569 630 323 354 600 323 938 630 569 630 600 446 452 446 630 600 815 600 600 507 569 1138 569 569 569 ] +endobj +778 0 obj +/SACKHC+CMBX12 +endobj +779 0 obj +<< /Differences [ 0 /Gamma /Delta /Theta /Lambda /Xi /Pi /Sigma /Upsilon /Phi /Psi /Omega /ff /fi /fl /ffi /ffl /dotlessi /dotlessj /grave /acute /caron /breve /macron /ring /cedilla /germandbls /ae /oe /oslash /AE /OE /Oslash /suppress /exclam /quotedblright /numbersign /dollar /percent /ampersand /quoteright /parenleft /parenright /asterisk /plus /comma /hyphen /period /slash /zero /one /two /three /four /five /six /seven /eight /nine /colon /semicolon /exclamdown /equal /questiondown /question /at /A /B /C /D /E /F /G /H /I /J /K /L /M /N /O /P /Q /R /S /T /U /V /W /X /Y /Z /bracketleft /quotedblleft /bracketright /circumflex /dotaccent /quoteleft /a /b /c /d /e /f /g /h /i /j /k /l /m /n /o /p /q /r /s /t /u /v /w /x /y /z /endash /emdash /hungarumlaut /tilde /dieresis /suppress 160 /space /Gamma /Delta /Theta /Lambda /Xi /Pi /Sigma /Upsilon /Phi /Psi /sfthyphen /nbspace /Omega /ff /fi /fl /ffi /ffl /dotlessi /dotlessj /grave /acute /caron /breve /macron /ring /cedilla /germandbls /ae /oe /oslash /AE /OE /Oslash /suppress /dieresis ] /Type /Encoding >> +endobj +780 0 obj +<< /Ascent 750 /CapHeight 750 /CharSet () /Descent -251 /Flags 4 /FontBBox [ -53 -251 1139 750 ] /FontFile 853 0 R /FontName /SACKHC+CMBX12 /ItalicAngle 0 /StemV 109 /Type /FontDescriptor >> +endobj +781 0 obj +[ 675 937 875 787 750 879 812 875 812 875 812 656 625 625 937 937 312 343 562 562 562 562 562 849 500 574 812 875 562 1018 1143 875 312 342 581 937 562 937 875 312 437 437 562 875 312 375 312 562 562 562 562 562 562 562 562 562 562 562 312 312 342 875 531 531 875 849 799 812 862 738 707 884 879 418 581 880 675 1067 879 844 768 844 839 625 782 864 849 1162 849 849 687 312 581 312 562 312 312 546 625 500 625 513 343 562 625 312 343 593 312 937 625 562 625 593 459 443 437 625 593 812 593 593 500 562 1125 562 562 562 ] +endobj +782 0 obj +<< /Differences [ 0 /minus /periodcentered /multiply /asteriskmath /divide /diamondmath /plusminus /minusplus /circleplus /circleminus /circlemultiply /circledivide /circledot /circlecopyrt /openbullet /bullet /equivasymptotic /equivalence /reflexsubset /reflexsuperset /lessequal /greaterequal /precedesequal /followsequal /similar /approxequal /propersubset /propersuperset /lessmuch /greatermuch /precedes /follows /arrowleft /arrowright /arrowup /arrowdown /arrowboth /arrownortheast /arrowsoutheast /similarequal /arrowdblleft /arrowdblright /arrowdblup /arrowdbldown /arrowdblboth /arrownorthwest /arrowsouthwest /proportional /prime /infinity /element /owner /triangle /triangleinv /negationslash /mapsto /universal /existential /logicalnot /emptyset /Rfractur /Ifractur /latticetop /perpendicular /aleph /A /B /C /D /E /F /G /H /I /J /K /L /M /N /O /P /Q /R /S /T /U /V /W /X /Y /Z /union /intersection /unionmulti /logicaland /logicalor /turnstileleft /turnstileright /floorleft /floorright /ceilingleft /ceilingright /braceleft /braceright /angbracketleft /angbracketright /bar /bardbl /arrowbothv /arrowdblbothv /backslash /wreathproduct /radical /coproduct /nabla /integral /unionsq /intersectionsq /subsetsqequal /supersetsqequal /section /dagger /daggerdbl /paragraph /club /diamond /heart /spade /arrowleft 160 /space /minus /periodcentered /multiply /asteriskmath /divide /diamondmath /plusminus /minusplus /circleplus /circleminus 173 /circlemultiply /circledivide /circledot /circlecopyrt /openbullet /bullet /equivasymptotic /equivalence /reflexsubset /reflexsuperset /lessequal /greaterequal /precedesequal /followsequal /similar /approxequal /propersubset /propersuperset /lessmuch /greatermuch /precedes /follows /arrowleft /spade ] /Type /Encoding >> +endobj +783 0 obj +[ 777 277 777 500 777 500 777 777 777 777 777 777 777 1000 500 500 777 777 777 777 777 777 777 777 777 777 777 777 1000 1000 777 777 1000 1000 500 500 1000 1000 1000 777 1000 1000 611 611 1000 1000 1000 777 274 1000 666 666 888 888 0 0 555 555 666 500 722 722 777 777 611 798 656 526 771 527 718 594 844 544 677 761 689 1200 820 796 695 816 847 605 544 625 612 987 713 668 724 666 666 666 666 666 611 611 444 444 444 444 500 500 388 388 277 500 500 611 500 277 833 750 833 416 666 666 777 777 444 444 444 611 777 777 777 777 ] +endobj +784 0 obj +<< /Differences [ 0 /.notdef /dotaccent /fi /fl /fraction /hungarumlaut /Lslash /lslash /ogonek /ring /.notdef /breve /minus /.notdef /Zcaron /zcaron /caron /dotlessi /dotlessj /ff /ffi /ffl /notequal /infinity /lessequal /greaterequal /partialdiff /summation /product /pi /grave /quotesingle /space /exclam /quotedbl /numbersign /dollar /percent /ampersand /quoteright /parenleft /parenright /asterisk /plus /comma /hyphen /period /slash /zero /one /two /three /four /five /six /seven /eight /nine /colon /semicolon /less /equal /greater /question /at /A /B /C /D /E /F /G /H /I /J /K /L /M /N /O /P /Q /R /S /T /U /V /W /X /Y /Z /bracketleft /backslash /bracketright /asciicircum /underscore /quoteleft /a /b /c /d /e /f /g /h /i /j /k /l /m /n /o /p /q /r /s /t /u /v /w /x /y /z /braceleft /bar /braceright /asciitilde /.notdef /Euro /integral /quotesinglbase /florin /quotedblbase /ellipsis /dagger /daggerdbl /circumflex /perthousand /Scaron /guilsinglleft /OE /Omega /radical /approxequal /.notdef /.notdef /.notdef /quotedblleft /quotedblright /bullet /endash /emdash /tilde /trademark /scaron /guilsinglright /oe /Delta /lozenge /Ydieresis /.notdef /exclamdown /cent /sterling /currency /yen /brokenbar /section /dieresis /copyright /ordfeminine /guillemotleft /logicalnot /hyphen /registered /macron /degree /plusminus /twosuperior /threesuperior /acute /mu /paragraph /periodcentered /cedilla /onesuperior /ordmasculine /guillemotright /onequarter /onehalf /threequarters /questiondown /Agrave /Aacute /Acircumflex /Atilde /Adieresis /Aring /AE /Ccedilla /Egrave /Eacute /Ecircumflex /Edieresis /Igrave /Iacute /Icircumflex /Idieresis /Eth /Ntilde /Ograve /Oacute /Ocircumflex /Otilde /Odieresis /multiply /Oslash /Ugrave /Uacute /Ucircumflex /Udieresis /Yacute /Thorn /germandbls /agrave /aacute /acircumflex /atilde /adieresis /aring /ae /ccedilla /egrave /eacute /ecircumflex /edieresis /igrave /iacute /icircumflex /idieresis /eth /ntilde /ograve /oacute /ocircumflex /otilde /odieresis /divide /oslash /ugrave /uacute /ucircumflex /udieresis /yacute /thorn /ydieresis ] /Type /Encoding >> +endobj +785 0 obj +[ 0 332 500 500 166 332 555 221 332 332 0 332 583 0 610 500 332 277 0 0 0 0 0 0 0 0 0 0 0 0 332 190 277 277 354 555 555 888 666 221 332 332 388 583 277 332 277 277 555 555 555 555 555 555 555 555 555 555 277 277 583 583 583 555 1014 666 666 721 721 666 610 777 721 277 500 666 555 832 721 777 666 777 721 666 610 721 666 943 666 666 610 277 277 277 468 555 221 555 555 500 555 555 277 555 555 221 221 500 221 832 555 555 555 555 332 500 277 555 500 721 500 500 500 333 259 333 583 0 0 0 221 555 332 1000 555 555 332 1000 666 332 1000 0 0 0 0 0 0 332 332 350 555 1000 332 1000 500 332 943 0 0 666 0 332 555 555 555 555 259 555 332 737 369 555 583 332 737 332 399 583 332 332 332 555 536 277 332 332 364 555 833 833 833 610 666 666 666 666 666 666 1000 721 666 666 666 666 277 277 277 277 721 721 777 777 777 777 777 583 777 721 721 721 721 666 666 610 555 555 555 555 555 555 888 500 555 555 555 555 277 277 277 277 555 555 555 555 555 555 555 583 610 555 555 555 555 500 555 500 ] +endobj +786 0 obj +<< /Differences [ 0 /Gamma /Delta /Theta /Lambda /Xi /Pi /Sigma /Upsilon /Phi /Psi /Omega /ff /fi /fl /ffi /ffl /dotlessi /dotlessj /grave /acute /caron /breve /macron /ring /cedilla /germandbls /ae /oe /oslash /AE /OE /Oslash /suppress /exclam /quotedblright /numbersign /dollar /percent /ampersand /quoteright /parenleft /parenright /asterisk /plus /comma /hyphen /period /slash /zero /one /two /three /four /five /six /seven /eight /nine /colon /semicolon /exclamdown /equal /questiondown /question /at /A /B /C /D /E /F /G /H /I /J /K /L /M /N /O /P /Q /R /S /T /U /V /W /X /Y /Z /bracketleft /quotedblleft /bracketright /circumflex /dotaccent /quoteleft /a /b /c /d /e /f /g /h /i /j /k /l /m /n /o /p /q /r /s /t /u /v /w /x /y /z /endash /emdash /hungarumlaut /tilde /dieresis /suppress 160 /space /Gamma /Delta /Theta /Lambda /Xi /Pi /Sigma /Upsilon /Phi /Psi /sfthyphen /nbspace /Omega /ff /fi /fl /ffi /ffl /dotlessi /dotlessj /grave /acute /caron /breve /macron /ring /cedilla /germandbls /ae /oe /oslash /AE /OE /Oslash /suppress /dieresis ] /Type /Encoding >> +endobj +787 0 obj +[ 675 937 875 787 750 879 812 875 812 875 812 656 625 625 937 937 312 343 562 562 562 562 562 849 500 574 812 875 562 1018 1143 875 312 342 581 937 562 937 875 312 437 437 562 875 312 375 312 562 562 562 562 562 562 562 562 562 562 562 312 312 342 875 531 531 875 849 799 812 862 738 707 884 879 418 581 880 675 1067 879 844 768 844 839 625 782 864 849 1162 849 849 687 312 581 312 562 312 312 546 625 500 625 513 343 562 625 312 343 593 312 937 625 562 625 593 459 443 437 625 593 812 593 593 500 562 1125 562 562 562 ] +endobj +788 0 obj +<< /Differences [ 0 /minus /periodcentered /multiply /asteriskmath /divide /diamondmath /plusminus /minusplus /circleplus /circleminus /circlemultiply /circledivide /circledot /circlecopyrt /openbullet /bullet /equivasymptotic /equivalence /reflexsubset /reflexsuperset /lessequal /greaterequal /precedesequal /followsequal /similar /approxequal /propersubset /propersuperset /lessmuch /greatermuch /precedes /follows /arrowleft /arrowright /arrowup /arrowdown /arrowboth /arrownortheast /arrowsoutheast /similarequal /arrowdblleft /arrowdblright /arrowdblup /arrowdbldown /arrowdblboth /arrownorthwest /arrowsouthwest /proportional /prime /infinity /element /owner /triangle /triangleinv /negationslash /mapsto /universal /existential /logicalnot /emptyset /Rfractur /Ifractur /latticetop /perpendicular /aleph /A /B /C /D /E /F /G /H /I /J /K /L /M /N /O /P /Q /R /S /T /U /V /W /X /Y /Z /union /intersection /unionmulti /logicaland /logicalor /turnstileleft /turnstileright /floorleft /floorright /ceilingleft /ceilingright /braceleft /braceright /angbracketleft /angbracketright /bar /bardbl /arrowbothv /arrowdblbothv /backslash /wreathproduct /radical /coproduct /nabla /integral /unionsq /intersectionsq /subsetsqequal /supersetsqequal /section /dagger /daggerdbl /paragraph /club /diamond /heart /spade /arrowleft 160 /space /minus /periodcentered /multiply /asteriskmath /divide /diamondmath /plusminus /minusplus /circleplus /circleminus 173 /circlemultiply /circledivide /circledot /circlecopyrt /openbullet /bullet /equivasymptotic /equivalence /reflexsubset /reflexsuperset /lessequal /greaterequal /precedesequal /followsequal /similar /approxequal /propersubset /propersuperset /lessmuch /greatermuch /precedes /follows /arrowleft /spade ] /Type /Encoding >> +endobj +789 0 obj +[ 777 277 777 500 777 500 777 777 777 777 777 777 777 1000 500 500 777 777 777 777 777 777 777 777 777 777 777 777 1000 1000 777 777 1000 1000 500 500 1000 1000 1000 777 1000 1000 611 611 1000 1000 1000 777 274 1000 666 666 888 888 0 0 555 555 666 500 722 722 777 777 611 798 656 526 771 527 718 594 844 544 677 761 689 1200 820 796 695 816 847 605 544 625 612 987 713 668 724 666 666 666 666 666 611 611 444 444 444 444 500 500 388 388 277 500 500 611 500 277 833 750 833 416 666 666 777 777 444 444 444 611 777 777 777 777 ] +endobj +790 0 obj +<< /Differences [ 0 /.notdef /dotaccent /fi /fl /fraction /hungarumlaut /Lslash /lslash /ogonek /ring /.notdef /breve /minus /.notdef /Zcaron /zcaron /caron /dotlessi /dotlessj /ff /ffi /ffl /notequal /infinity /lessequal /greaterequal /partialdiff /summation /product /pi /grave /quotesingle /space /exclam /quotedbl /numbersign /dollar /percent /ampersand /quoteright /parenleft /parenright /asterisk /plus /comma /hyphen /period /slash /zero /one /two /three /four /five /six /seven /eight /nine /colon /semicolon /less /equal /greater /question /at /A /B /C /D /E /F /G /H /I /J /K /L /M /N /O /P /Q /R /S /T /U /V /W /X /Y /Z /bracketleft /backslash /bracketright /asciicircum /underscore /quoteleft /a /b /c /d /e /f /g /h /i /j /k /l /m /n /o /p /q /r /s /t /u /v /w /x /y /z /braceleft /bar /braceright /asciitilde /.notdef /Euro /integral /quotesinglbase /florin /quotedblbase /ellipsis /dagger /daggerdbl /circumflex /perthousand /Scaron /guilsinglleft /OE /Omega /radical /approxequal /.notdef /.notdef /.notdef /quotedblleft /quotedblright /bullet /endash /emdash /tilde /trademark /scaron /guilsinglright /oe /Delta /lozenge /Ydieresis /.notdef /exclamdown /cent /sterling /currency /yen /brokenbar /section /dieresis /copyright /ordfeminine /guillemotleft /logicalnot /hyphen /registered /macron /degree /plusminus /twosuperior /threesuperior /acute /mu /paragraph /periodcentered /cedilla /onesuperior /ordmasculine /guillemotright /onequarter /onehalf /threequarters /questiondown /Agrave /Aacute /Acircumflex /Atilde /Adieresis /Aring /AE /Ccedilla /Egrave /Eacute /Ecircumflex /Edieresis /Igrave /Iacute /Icircumflex /Idieresis /Eth /Ntilde /Ograve /Oacute /Ocircumflex /Otilde /Odieresis /multiply /Oslash /Ugrave /Uacute /Ucircumflex /Udieresis /Yacute /Thorn /germandbls /agrave /aacute /acircumflex /atilde /adieresis /aring /ae /ccedilla /egrave /eacute /ecircumflex /edieresis /igrave /iacute /icircumflex /idieresis /eth /ntilde /ograve /oacute /ocircumflex /otilde /odieresis /divide /oslash /ugrave /uacute /ucircumflex /udieresis /yacute /thorn /ydieresis ] /Type /Encoding >> +endobj +791 0 obj +[ 0 332 500 500 166 332 555 221 332 332 0 332 583 0 610 500 332 277 0 0 0 0 0 0 0 0 0 0 0 0 332 190 277 277 354 555 555 888 666 221 332 332 388 583 277 332 277 277 555 555 555 555 555 555 555 555 555 555 277 277 583 583 583 555 1014 666 666 721 721 666 610 777 721 277 500 666 555 832 721 777 666 777 721 666 610 721 666 943 666 666 610 277 277 277 468 555 221 555 555 500 555 555 277 555 555 221 221 500 221 832 555 555 555 555 332 500 277 555 500 721 500 500 500 333 259 333 583 0 0 0 221 555 332 1000 555 555 332 1000 666 332 1000 0 0 0 0 0 0 332 332 350 555 1000 332 1000 500 332 943 0 0 666 0 332 555 555 555 555 259 555 332 737 369 555 583 332 737 332 399 583 332 332 332 555 536 277 332 332 364 555 833 833 833 610 666 666 666 666 666 666 1000 721 666 666 666 666 277 277 277 277 721 721 777 777 777 777 777 583 777 721 721 721 721 666 666 610 555 555 555 555 555 555 888 500 555 555 555 555 277 277 277 277 555 555 555 555 555 555 555 583 610 555 555 555 555 500 555 500 ] +endobj +792 0 obj +<< /Differences [ 0 /.notdef /dotaccent /fi /fl /fraction /hungarumlaut /Lslash /lslash /ogonek /ring /.notdef /breve /minus /.notdef /Zcaron /zcaron /caron /dotlessi /dotlessj /ff /ffi /ffl /notequal /infinity /lessequal /greaterequal /partialdiff /summation /product /pi /grave /quotesingle /space /exclam /quotedbl /numbersign /dollar /percent /ampersand /quoteright /parenleft /parenright /asterisk /plus /comma /hyphen /period /slash /zero /one /two /three /four /five /six /seven /eight /nine /colon /semicolon /less /equal /greater /question /at /A /B /C /D /E /F /G /H /I /J /K /L /M /N /O /P /Q /R /S /T /U /V /W /X /Y /Z /bracketleft /backslash /bracketright /asciicircum /underscore /quoteleft /a /b /c /d /e /f /g /h /i /j /k /l /m /n /o /p /q /r /s /t /u /v /w /x /y /z /braceleft /bar /braceright /asciitilde /.notdef /Euro /integral /quotesinglbase /florin /quotedblbase /ellipsis /dagger /daggerdbl /circumflex /perthousand /Scaron /guilsinglleft /OE /Omega /radical /approxequal /.notdef /.notdef /.notdef /quotedblleft /quotedblright /bullet /endash /emdash /tilde /trademark /scaron /guilsinglright /oe /Delta /lozenge /Ydieresis /.notdef /exclamdown /cent /sterling /currency /yen /brokenbar /section /dieresis /copyright /ordfeminine /guillemotleft /logicalnot /hyphen /registered /macron /degree /plusminus /twosuperior /threesuperior /acute /mu /paragraph /periodcentered /cedilla /onesuperior /ordmasculine /guillemotright /onequarter /onehalf /threequarters /questiondown /Agrave /Aacute /Acircumflex /Atilde /Adieresis /Aring /AE /Ccedilla /Egrave /Eacute /Ecircumflex /Edieresis /Igrave /Iacute /Icircumflex /Idieresis /Eth /Ntilde /Ograve /Oacute /Ocircumflex /Otilde /Odieresis /multiply /Oslash /Ugrave /Uacute /Ucircumflex /Udieresis /Yacute /Thorn /germandbls /agrave /aacute /acircumflex /atilde /adieresis /aring /ae /ccedilla /egrave /eacute /ecircumflex /edieresis /igrave /iacute /icircumflex /idieresis /eth /ntilde /ograve /oacute /ocircumflex /otilde /odieresis /divide /oslash /ugrave /uacute /ucircumflex /udieresis /yacute /thorn /ydieresis ] /Type /Encoding >> +endobj +793 0 obj +[ 0 332 500 500 166 332 555 221 332 332 0 332 583 0 610 500 332 277 0 0 0 0 0 0 0 0 0 0 0 0 332 190 277 277 354 555 555 888 666 221 332 332 388 583 277 332 277 277 555 555 555 555 555 555 555 555 555 555 277 277 583 583 583 555 1014 666 666 721 721 666 610 777 721 277 500 666 555 832 721 777 666 777 721 666 610 721 666 943 666 666 610 277 277 277 468 555 221 555 555 500 555 555 277 555 555 221 221 500 221 832 555 555 555 555 332 500 277 555 500 721 500 500 500 333 259 333 583 0 0 0 221 555 332 1000 555 555 332 1000 666 332 1000 0 0 0 0 0 0 332 332 350 555 1000 332 1000 500 332 943 0 0 666 0 332 555 555 555 555 259 555 332 737 369 555 583 332 737 332 399 583 332 332 332 555 536 277 332 332 364 555 833 833 833 610 666 666 666 666 666 666 1000 721 666 666 666 666 277 277 277 277 721 721 777 777 777 777 777 583 777 721 721 721 721 666 666 610 555 555 555 555 555 555 888 500 555 555 555 555 277 277 277 277 555 555 555 555 555 555 555 583 610 555 555 555 555 500 555 500 ] +endobj +794 0 obj +<< /Differences [ 0 /.notdef /dotaccent /fi /fl /fraction /hungarumlaut /Lslash /lslash /ogonek /ring /.notdef /breve /minus /.notdef /Zcaron /zcaron /caron /dotlessi /dotlessj /ff /ffi /ffl /notequal /infinity /lessequal /greaterequal /partialdiff /summation /product /pi /grave /quotesingle /space /exclam /quotedbl /numbersign /dollar /percent /ampersand /quoteright /parenleft /parenright /asterisk /plus /comma /hyphen /period /slash /zero /one /two /three /four /five /six /seven /eight /nine /colon /semicolon /less /equal /greater /question /at /A /B /C /D /E /F /G /H /I /J /K /L /M /N /O /P /Q /R /S /T /U /V /W /X /Y /Z /bracketleft /backslash /bracketright /asciicircum /underscore /quoteleft /a /b /c /d /e /f /g /h /i /j /k /l /m /n /o /p /q /r /s /t /u /v /w /x /y /z /braceleft /bar /braceright /asciitilde /.notdef /Euro /integral /quotesinglbase /florin /quotedblbase /ellipsis /dagger /daggerdbl /circumflex /perthousand /Scaron /guilsinglleft /OE /Omega /radical /approxequal /.notdef /.notdef /.notdef /quotedblleft /quotedblright /bullet /endash /emdash /tilde /trademark /scaron /guilsinglright /oe /Delta /lozenge /Ydieresis /.notdef /exclamdown /cent /sterling /currency /yen /brokenbar /section /dieresis /copyright /ordfeminine /guillemotleft /logicalnot /hyphen /registered /macron /degree /plusminus /twosuperior /threesuperior /acute /mu /paragraph /periodcentered /cedilla /onesuperior /ordmasculine /guillemotright /onequarter /onehalf /threequarters /questiondown /Agrave /Aacute /Acircumflex /Atilde /Adieresis /Aring /AE /Ccedilla /Egrave /Eacute /Ecircumflex /Edieresis /Igrave /Iacute /Icircumflex /Idieresis /Eth /Ntilde /Ograve /Oacute /Ocircumflex /Otilde /Odieresis /multiply /Oslash /Ugrave /Uacute /Ucircumflex /Udieresis /Yacute /Thorn /germandbls /agrave /aacute /acircumflex /atilde /adieresis /aring /ae /ccedilla /egrave /eacute /ecircumflex /edieresis /igrave /iacute /icircumflex /idieresis /eth /ntilde /ograve /oacute /ocircumflex /otilde /odieresis /divide /oslash /ugrave /uacute /ucircumflex /udieresis /yacute /thorn /ydieresis ] /Type /Encoding >> +endobj +795 0 obj +[ 0 332 500 500 166 332 555 221 332 332 0 332 583 0 610 500 332 277 0 0 0 0 0 0 0 0 0 0 0 0 332 190 277 277 354 555 555 888 666 221 332 332 388 583 277 332 277 277 555 555 555 555 555 555 555 555 555 555 277 277 583 583 583 555 1014 666 666 721 721 666 610 777 721 277 500 666 555 832 721 777 666 777 721 666 610 721 666 943 666 666 610 277 277 277 468 555 221 555 555 500 555 555 277 555 555 221 221 500 221 832 555 555 555 555 332 500 277 555 500 721 500 500 500 333 259 333 583 0 0 0 221 555 332 1000 555 555 332 1000 666 332 1000 0 0 0 0 0 0 332 332 350 555 1000 332 1000 500 332 943 0 0 666 0 332 555 555 555 555 259 555 332 737 369 555 583 332 737 332 399 583 332 332 332 555 536 277 332 332 364 555 833 833 833 610 666 666 666 666 666 666 1000 721 666 666 666 666 277 277 277 277 721 721 777 777 777 777 777 583 777 721 721 721 721 666 666 610 555 555 555 555 555 555 888 500 555 555 555 555 277 277 277 277 555 555 555 555 555 555 555 583 610 555 555 555 555 500 555 500 ] +endobj +796 0 obj +<< /Differences [ 0 /.notdef /dotaccent /fi /fl /fraction /hungarumlaut /Lslash /lslash /ogonek /ring /.notdef /breve /minus /.notdef /Zcaron /zcaron /caron /dotlessi /dotlessj /ff /ffi /ffl /notequal /infinity /lessequal /greaterequal /partialdiff /summation /product /pi /grave /quotesingle /space /exclam /quotedbl /numbersign /dollar /percent /ampersand /quoteright /parenleft /parenright /asterisk /plus /comma /hyphen /period /slash /zero /one /two /three /four /five /six /seven /eight /nine /colon /semicolon /less /equal /greater /question /at /A /B /C /D /E /F /G /H /I /J /K /L /M /N /O /P /Q /R /S /T /U /V /W /X /Y /Z /bracketleft /backslash /bracketright /asciicircum /underscore /quoteleft /a /b /c /d /e /f /g /h /i /j /k /l /m /n /o /p /q /r /s /t /u /v /w /x /y /z /braceleft /bar /braceright /asciitilde /.notdef /Euro /integral /quotesinglbase /florin /quotedblbase /ellipsis /dagger /daggerdbl /circumflex /perthousand /Scaron /guilsinglleft /OE /Omega /radical /approxequal /.notdef /.notdef /.notdef /quotedblleft /quotedblright /bullet /endash /emdash /tilde /trademark /scaron /guilsinglright /oe /Delta /lozenge /Ydieresis /.notdef /exclamdown /cent /sterling /currency /yen /brokenbar /section /dieresis /copyright /ordfeminine /guillemotleft /logicalnot /hyphen /registered /macron /degree /plusminus /twosuperior /threesuperior /acute /mu /paragraph /periodcentered /cedilla /onesuperior /ordmasculine /guillemotright /onequarter /onehalf /threequarters /questiondown /Agrave /Aacute /Acircumflex /Atilde /Adieresis /Aring /AE /Ccedilla /Egrave /Eacute /Ecircumflex /Edieresis /Igrave /Iacute /Icircumflex /Idieresis /Eth /Ntilde /Ograve /Oacute /Ocircumflex /Otilde /Odieresis /multiply /Oslash /Ugrave /Uacute /Ucircumflex /Udieresis /Yacute /Thorn /germandbls /agrave /aacute /acircumflex /atilde /adieresis /aring /ae /ccedilla /egrave /eacute /ecircumflex /edieresis /igrave /iacute /icircumflex /idieresis /eth /ntilde /ograve /oacute /ocircumflex /otilde /odieresis /divide /oslash /ugrave /uacute /ucircumflex /udieresis /yacute /thorn /ydieresis ] /Type /Encoding >> +endobj +797 0 obj +[ 0 332 500 500 166 332 555 221 332 332 0 332 583 0 610 500 332 277 0 0 0 0 0 0 0 0 0 0 0 0 332 190 277 277 354 555 555 888 666 221 332 332 388 583 277 332 277 277 555 555 555 555 555 555 555 555 555 555 277 277 583 583 583 555 1014 666 666 721 721 666 610 777 721 277 500 666 555 832 721 777 666 777 721 666 610 721 666 943 666 666 610 277 277 277 468 555 221 555 555 500 555 555 277 555 555 221 221 500 221 832 555 555 555 555 332 500 277 555 500 721 500 500 500 333 259 333 583 0 0 0 221 555 332 1000 555 555 332 1000 666 332 1000 0 0 0 0 0 0 332 332 350 555 1000 332 1000 500 332 943 0 0 666 0 332 555 555 555 555 259 555 332 737 369 555 583 332 737 332 399 583 332 332 332 555 536 277 332 332 364 555 833 833 833 610 666 666 666 666 666 666 1000 721 666 666 666 666 277 277 277 277 721 721 777 777 777 777 777 583 777 721 721 721 721 666 666 610 555 555 555 555 555 555 888 500 555 555 555 555 277 277 277 277 555 555 555 555 555 555 555 583 610 555 555 555 555 500 555 500 ] +endobj +798 0 obj +<< /Differences [ 0 /Gamma /Delta /Theta /Lambda /Xi /Pi /Sigma /Upsilon /Phi /Psi /Omega /ff /fi /fl /ffi /ffl /dotlessi /dotlessj /grave /acute /caron /breve /macron /ring /cedilla /germandbls /ae /oe /oslash /AE /OE /Oslash /suppress /exclam /quotedblright /numbersign /dollar /percent /ampersand /quoteright /parenleft /parenright /asterisk /plus /comma /hyphen /period /slash /zero /one /two /three /four /five /six /seven /eight /nine /colon /semicolon /exclamdown /equal /questiondown /question /at /A /B /C /D /E /F /G /H /I /J /K /L /M /N /O /P /Q /R /S /T /U /V /W /X /Y /Z /bracketleft /quotedblleft /bracketright /circumflex /dotaccent /quoteleft /a /b /c /d /e /f /g /h /i /j /k /l /m /n /o /p /q /r /s /t /u /v /w /x /y /z /endash /emdash /hungarumlaut /tilde /dieresis /suppress 160 /space /Gamma /Delta /Theta /Lambda /Xi /Pi /Sigma /Upsilon /Phi /Psi /sfthyphen /nbspace /Omega /ff /fi /fl /ffi /ffl /dotlessi /dotlessj /grave /acute /caron /breve /macron /ring /cedilla /germandbls /ae /oe /oslash /AE /OE /Oslash /suppress /dieresis ] /Type /Encoding >> +endobj +799 0 obj +[ 625 833 777 694 666 750 722 777 722 777 722 583 555 555 833 833 277 305 500 500 500 500 500 750 444 500 722 777 500 902 1013 777 277 277 500 833 500 833 777 277 388 388 500 777 277 333 277 500 500 500 500 500 500 500 500 500 500 500 277 277 277 777 472 472 777 750 708 722 763 680 652 784 750 361 513 777 625 916 750 777 680 777 736 555 722 750 750 1027 750 750 611 277 500 277 500 277 277 500 555 444 555 444 305 500 555 277 305 527 277 833 555 500 555 527 391 394 388 555 527 722 527 527 444 500 1000 500 500 500 ] +endobj +800 0 obj +/LYTCQL+CMSY9 +endobj +801 0 obj +<< /Differences [ 0 /minus /periodcentered /multiply /asteriskmath /divide /diamondmath /plusminus /minusplus /circleplus /circleminus /circlemultiply /circledivide /circledot /circlecopyrt /openbullet /bullet /equivasymptotic /equivalence /reflexsubset /reflexsuperset /lessequal /greaterequal /precedesequal /followsequal /similar /approxequal /propersubset /propersuperset /lessmuch /greatermuch /precedes /follows /arrowleft /arrowright /arrowup /arrowdown /arrowboth /arrownortheast /arrowsoutheast /similarequal /arrowdblleft /arrowdblright /arrowdblup /arrowdbldown /arrowdblboth /arrownorthwest /arrowsouthwest /proportional /prime /infinity /element /owner /triangle /triangleinv /negationslash /mapsto /universal /existential /logicalnot /emptyset /Rfractur /Ifractur /latticetop /perpendicular /aleph /A /B /C /D /E /F /G /H /I /J /K /L /M /N /O /P /Q /R /S /T /U /V /W /X /Y /Z /union /intersection /unionmulti /logicaland /logicalor /turnstileleft /turnstileright /floorleft /floorright /ceilingleft /ceilingright /braceleft /braceright /angbracketleft /angbracketright /bar /bardbl /arrowbothv /arrowdblbothv /backslash /wreathproduct /radical /coproduct /nabla /integral /unionsq /intersectionsq /subsetsqequal /supersetsqequal /section /dagger /daggerdbl /paragraph /club /diamond /heart /spade /arrowleft 160 /space /minus /periodcentered /multiply /asteriskmath /divide /diamondmath /plusminus /minusplus /circleplus /circleminus 173 /circlemultiply /circledivide /circledot /circlecopyrt /openbullet /bullet /equivasymptotic /equivalence /reflexsubset /reflexsuperset /lessequal /greaterequal /precedesequal /followsequal /similar /approxequal /propersubset /propersuperset /lessmuch /greatermuch /precedes /follows /arrowleft /spade ] /Type /Encoding >> +endobj +802 0 obj +<< /Ascent 777 /CapHeight 777 /CharSet () /Descent -958 /Flags 4 /FontBBox [ -29 -958 1146 777 ] /FontFile 854 0 R /FontName /LYTCQL+CMSY9 /ItalicAngle -14 /StemV 43 /Type /FontDescriptor >> +endobj +803 0 obj +[ 799 285 799 513 799 513 799 799 799 799 799 799 799 1027 513 513 799 799 799 799 799 799 799 799 799 799 799 799 1027 1027 799 799 1027 1027 513 513 1027 1027 1027 799 1027 1027 628 628 1027 1027 1027 799 279 1027 685 685 913 913 0 0 570 570 685 513 742 742 799 799 628 821 673 542 793 542 736 610 871 562 696 782 707 1229 842 816 716 839 873 622 563 642 632 1017 732 684 742 685 685 685 685 685 628 628 456 456 456 456 513 513 399 399 285 513 513 628 513 285 856 770 856 428 685 685 799 799 456 456 456 628 799 799 799 799 ] +endobj +804 0 obj +/WQSQWU+CMBX9 +endobj +805 0 obj +<< /Differences [ 0 /Gamma /Delta /Theta /Lambda /Xi /Pi /Sigma /Upsilon /Phi /Psi /Omega /ff /fi /fl /ffi /ffl /dotlessi /dotlessj /grave /acute /caron /breve /macron /ring /cedilla /germandbls /ae /oe /oslash /AE /OE /Oslash /suppress /exclam /quotedblright /numbersign /dollar /percent /ampersand /quoteright /parenleft /parenright /asterisk /plus /comma /hyphen /period /slash /zero /one /two /three /four /five /six /seven /eight /nine /colon /semicolon /exclamdown /equal /questiondown /question /at /A /B /C /D /E /F /G /H /I /J /K /L /M /N /O /P /Q /R /S /T /U /V /W /X /Y /Z /bracketleft /quotedblleft /bracketright /circumflex /dotaccent /quoteleft /a /b /c /d /e /f /g /h /i /j /k /l /m /n /o /p /q /r /s /t /u /v /w /x /y /z /endash /emdash /hungarumlaut /tilde /dieresis /suppress 160 /space /Gamma /Delta /Theta /Lambda /Xi /Pi /Sigma /Upsilon /Phi /Psi /sfthyphen /nbspace /Omega /ff /fi /fl /ffi /ffl /dotlessi /dotlessj /grave /acute /caron /breve /macron /ring /cedilla /germandbls /ae /oe /oslash /AE /OE /Oslash /suppress /dieresis ] /Type /Encoding >> +endobj +806 0 obj +<< /Ascent 750 /CapHeight 750 /CharSet () /Descent -250 /Flags 4 /FontBBox [ -58 -250 1195 750 ] /FontFile 855 0 R /FontName /WQSQWU+CMBX9 /ItalicAngle 0 /StemV 117 /Type /FontDescriptor >> +endobj +807 0 obj +[ 710 986 920 827 788 924 854 920 854 920 854 690 657 657 986 986 328 361 591 591 591 591 591 892 525 616 854 920 591 1070 1202 920 328 360 617 986 591 986 920 328 460 460 591 920 328 394 328 591 591 591 591 591 591 591 591 591 591 591 328 328 360 920 558 558 920 892 840 854 906 776 743 929 924 446 610 925 710 1121 924 888 808 888 886 657 823 908 892 1221 892 892 723 328 617 328 591 328 328 575 657 525 657 542 361 591 657 328 361 624 328 986 657 591 657 624 488 466 460 657 624 854 624 624 525 591 1183 591 591 591 ] +endobj +808 0 obj +/SUXZQL+CMBX6 +endobj +809 0 obj +<< /Differences [ 0 /Gamma /Delta /Theta /Lambda /Xi /Pi /Sigma /Upsilon /Phi /Psi /Omega /ff /fi /fl /ffi /ffl /dotlessi /dotlessj /grave /acute /caron /breve /macron /ring /cedilla /germandbls /ae /oe /oslash /AE /OE /Oslash /suppress /exclam /quotedblright /numbersign /dollar /percent /ampersand /quoteright /parenleft /parenright /asterisk /plus /comma /hyphen /period /slash /zero /one /two /three /four /five /six /seven /eight /nine /colon /semicolon /exclamdown /equal /questiondown /question /at /A /B /C /D /E /F /G /H /I /J /K /L /M /N /O /P /Q /R /S /T /U /V /W /X /Y /Z /bracketleft /quotedblleft /bracketright /circumflex /dotaccent /quoteleft /a /b /c /d /e /f /g /h /i /j /k /l /m /n /o /p /q /r /s /t /u /v /w /x /y /z /endash /emdash /hungarumlaut /tilde /dieresis /suppress 160 /space /Gamma /Delta /Theta /Lambda /Xi /Pi /Sigma /Upsilon /Phi /Psi /sfthyphen /nbspace /Omega /ff /fi /fl /ffi /ffl /dotlessi /dotlessj /grave /acute /caron /breve /macron /ring /cedilla /germandbls /ae /oe /oslash /AE /OE /Oslash /suppress /dieresis ] /Type /Encoding >> +endobj +810 0 obj +<< /Ascent 753 /CapHeight 753 /CharSet () /Descent -250 /Flags 4 /FontBBox [ -49 -250 1367 753 ] /FontFile 856 0 R /FontName /SUXZQL+CMBX6 /ItalicAngle 0 /StemV 130 /Type /FontDescriptor >> +endobj +811 0 obj +[ 824 1143 1068 953 918 1064 993 1068 993 1068 993 824 787 787 1180 1180 393 431 693 693 693 693 693 1028 618 707 993 1068 693 1236 1386 1068 393 429 710 1143 693 1143 1068 393 543 543 693 1068 393 468 393 693 693 693 693 693 693 693 693 693 693 693 393 393 429 1068 656 656 1068 1028 973 993 1048 899 862 1077 1064 506 711 1066 824 1289 1064 1032 936 1032 1019 768 957 1046 1028 1403 1028 1028 843 395 710 395 693 393 393 674 768 618 768 634 431 693 768 393 431 731 393 1143 768 693 768 731 571 551 543 768 731 993 731 731 618 693 1387 693 693 693 ] +endobj +812 0 obj +/EEICHW+CMR12 +endobj +813 0 obj +<< /Differences [ 0 /Gamma /Delta /Theta /Lambda /Xi /Pi /Sigma /Upsilon /Phi /Psi /Omega /ff /fi /fl /ffi /ffl /dotlessi /dotlessj /grave /acute /caron /breve /macron /ring /cedilla /germandbls /ae /oe /oslash /AE /OE /Oslash /suppress /exclam /quotedblright /numbersign /dollar /percent /ampersand /quoteright /parenleft /parenright /asterisk /plus /comma /hyphen /period /slash /zero /one /two /three /four /five /six /seven /eight /nine /colon /semicolon /exclamdown /equal /questiondown /question /at /A /B /C /D /E /F /G /H /I /J /K /L /M /N /O /P /Q /R /S /T /U /V /W /X /Y /Z /bracketleft /quotedblleft /bracketright /circumflex /dotaccent /quoteleft /a /b /c /d /e /f /g /h /i /j /k /l /m /n /o /p /q /r /s /t /u /v /w /x /y /z /endash /emdash /hungarumlaut /tilde /dieresis /suppress 160 /space /Gamma /Delta /Theta /Lambda /Xi /Pi /Sigma /Upsilon /Phi /Psi /sfthyphen /nbspace /Omega /ff /fi /fl /ffi /ffl /dotlessi /dotlessj /grave /acute /caron /breve /macron /ring /cedilla /germandbls /ae /oe /oslash /AE /OE /Oslash /suppress /dieresis ] /Type /Encoding >> +endobj +814 0 obj +<< /Ascent 750 /CapHeight 750 /CharSet () /Descent -251 /Flags 4 /FontBBox [ -34 -251 988 750 ] /FontFile 857 0 R /FontName /EEICHW+CMR12 /ItalicAngle 0 /StemV 65 /Type /FontDescriptor >> +endobj +815 0 obj +[ 611 815 761 679 652 734 707 761 707 761 707 571 543 543 815 815 271 299 489 489 489 489 489 734 435 489 707 761 489 883 992 761 271 271 489 815 489 815 761 271 380 380 489 761 271 326 271 489 489 489 489 489 489 489 489 489 489 489 271 271 271 761 462 462 761 734 693 707 747 666 638 768 734 353 503 761 611 897 734 761 666 761 720 543 707 734 734 1006 734 734 598 271 489 271 489 271 271 489 543 435 543 435 299 489 543 271 299 516 271 815 543 489 543 516 380 386 380 543 516 707 516 516 435 489 979 489 489 489 ] +endobj +816 0 obj +/HNIPYH+CMR8 +endobj +817 0 obj +<< /Differences [ 0 /Gamma /Delta /Theta /Lambda /Xi /Pi /Sigma /Upsilon /Phi /Psi /Omega /ff /fi /fl /ffi /ffl /dotlessi /dotlessj /grave /acute /caron /breve /macron /ring /cedilla /germandbls /ae /oe /oslash /AE /OE /Oslash /suppress /exclam /quotedblright /numbersign /dollar /percent /ampersand /quoteright /parenleft /parenright /asterisk /plus /comma /hyphen /period /slash /zero /one /two /three /four /five /six /seven /eight /nine /colon /semicolon /exclamdown /equal /questiondown /question /at /A /B /C /D /E /F /G /H /I /J /K /L /M /N /O /P /Q /R /S /T /U /V /W /X /Y /Z /bracketleft /quotedblleft /bracketright /circumflex /dotaccent /quoteleft /a /b /c /d /e /f /g /h /i /j /k /l /m /n /o /p /q /r /s /t /u /v /w /x /y /z /endash /emdash /hungarumlaut /tilde /dieresis /suppress 160 /space /Gamma /Delta /Theta /Lambda /Xi /Pi /Sigma /Upsilon /Phi /Psi /sfthyphen /nbspace /Omega /ff /fi /fl /ffi /ffl /dotlessi /dotlessj /grave /acute /caron /breve /macron /ring /cedilla /germandbls /ae /oe /oslash /AE /OE /Oslash /suppress /dieresis ] /Type /Encoding >> +endobj +818 0 obj +<< /Ascent 750 /CapHeight 750 /CharSet () /Descent -250 /Flags 4 /FontBBox [ -36 -250 1070 750 ] /FontFile 858 0 R /FontName /HNIPYH+CMR8 /ItalicAngle 0 /StemV 76 /Type /FontDescriptor >> +endobj +819 0 obj +[ 663 885 826 736 708 795 767 826 767 826 767 619 590 590 885 885 295 324 531 531 531 531 531 795 472 531 767 826 531 958 1076 826 295 295 531 885 531 885 826 295 413 413 531 826 295 354 295 531 531 531 531 531 531 531 531 531 531 531 295 295 295 826 501 501 826 795 752 767 811 722 693 833 795 382 545 825 663 972 795 826 722 826 781 590 767 795 795 1090 795 795 649 295 531 295 531 295 295 531 590 472 590 472 324 531 590 295 324 560 295 885 590 531 590 560 414 419 413 590 560 767 560 560 472 531 1062 531 531 531 ] +endobj +820 0 obj +<< /Differences [ 0 /.notdef /dotaccent /fi /fl /fraction /hungarumlaut /Lslash /lslash /ogonek /ring /.notdef /breve /minus /.notdef /Zcaron /zcaron /caron /dotlessi /dotlessj /ff /ffi /ffl /notequal /infinity /lessequal /greaterequal /partialdiff /summation /product /pi /grave /quotesingle /space /exclam /quotedbl /numbersign /dollar /percent /ampersand /quoteright /parenleft /parenright /asterisk /plus /comma /hyphen /period /slash /zero /one /two /three /four /five /six /seven /eight /nine /colon /semicolon /less /equal /greater /question /at /A /B /C /D /E /F /G /H /I /J /K /L /M /N /O /P /Q /R /S /T /U /V /W /X /Y /Z /bracketleft /backslash /bracketright /asciicircum /underscore /quoteleft /a /b /c /d /e /f /g /h /i /j /k /l /m /n /o /p /q /r /s /t /u /v /w /x /y /z /braceleft /bar /braceright /asciitilde /.notdef /Euro /integral /quotesinglbase /florin /quotedblbase /ellipsis /dagger /daggerdbl /circumflex /perthousand /Scaron /guilsinglleft /OE /Omega /radical /approxequal /.notdef /.notdef /.notdef /quotedblleft /quotedblright /bullet /endash /emdash /tilde /trademark /scaron /guilsinglright /oe /Delta /lozenge /Ydieresis /.notdef /exclamdown /cent /sterling /currency /yen /brokenbar /section /dieresis /copyright /ordfeminine /guillemotleft /logicalnot /hyphen /registered /macron /degree /plusminus /twosuperior /threesuperior /acute /mu /paragraph /periodcentered /cedilla /onesuperior /ordmasculine /guillemotright /onequarter /onehalf /threequarters /questiondown /Agrave /Aacute /Acircumflex /Atilde /Adieresis /Aring /AE /Ccedilla /Egrave /Eacute /Ecircumflex /Edieresis /Igrave /Iacute /Icircumflex /Idieresis /Eth /Ntilde /Ograve /Oacute /Ocircumflex /Otilde /Odieresis /multiply /Oslash /Ugrave /Uacute /Ucircumflex /Udieresis /Yacute /Thorn /germandbls /agrave /aacute /acircumflex /atilde /adieresis /aring /ae /ccedilla /egrave /eacute /ecircumflex /edieresis /igrave /iacute /icircumflex /idieresis /eth /ntilde /ograve /oacute /ocircumflex /otilde /odieresis /divide /oslash /ugrave /uacute /ucircumflex /udieresis /yacute /thorn /ydieresis ] /Type /Encoding >> +endobj +821 0 obj +[ 0 332 500 500 166 332 555 221 332 332 0 332 583 0 610 500 332 277 0 0 0 0 0 0 0 0 0 0 0 0 332 190 277 277 354 555 555 888 666 221 332 332 388 583 277 332 277 277 555 555 555 555 555 555 555 555 555 555 277 277 583 583 583 555 1014 666 666 721 721 666 610 777 721 277 500 666 555 832 721 777 666 777 721 666 610 721 666 943 666 666 610 277 277 277 468 555 221 555 555 500 555 555 277 555 555 221 221 500 221 832 555 555 555 555 332 500 277 555 500 721 500 500 500 333 259 333 583 0 0 0 221 555 332 1000 555 555 332 1000 666 332 1000 0 0 0 0 0 0 332 332 350 555 1000 332 1000 500 332 943 0 0 666 0 332 555 555 555 555 259 555 332 737 369 555 583 332 737 332 399 583 332 332 332 555 536 277 332 332 364 555 833 833 833 610 666 666 666 666 666 666 1000 721 666 666 666 666 277 277 277 277 721 721 777 777 777 777 777 583 777 721 721 721 721 666 666 610 555 555 555 555 555 555 888 500 555 555 555 555 277 277 277 277 555 555 555 555 555 555 555 583 610 555 555 555 555 500 555 500 ] +endobj +822 0 obj +<< /Differences [ 0 /.notdef /dotaccent /fi /fl /fraction /hungarumlaut /Lslash /lslash /ogonek /ring /.notdef /breve /minus /.notdef /Zcaron /zcaron /caron /dotlessi /dotlessj /ff /ffi /ffl /notequal /infinity /lessequal /greaterequal /partialdiff /summation /product /pi /grave /quotesingle /space /exclam /quotedbl /numbersign /dollar /percent /ampersand /quoteright /parenleft /parenright /asterisk /plus /comma /hyphen /period /slash /zero /one /two /three /four /five /six /seven /eight /nine /colon /semicolon /less /equal /greater /question /at /A /B /C /D /E /F /G /H /I /J /K /L /M /N /O /P /Q /R /S /T /U /V /W /X /Y /Z /bracketleft /backslash /bracketright /asciicircum /underscore /quoteleft /a /b /c /d /e /f /g /h /i /j /k /l /m /n /o /p /q /r /s /t /u /v /w /x /y /z /braceleft /bar /braceright /asciitilde /.notdef /Euro /integral /quotesinglbase /florin /quotedblbase /ellipsis /dagger /daggerdbl /circumflex /perthousand /Scaron /guilsinglleft /OE /Omega /radical /approxequal /.notdef /.notdef /.notdef /quotedblleft /quotedblright /bullet /endash /emdash /tilde /trademark /scaron /guilsinglright /oe /Delta /lozenge /Ydieresis /.notdef /exclamdown /cent /sterling /currency /yen /brokenbar /section /dieresis /copyright /ordfeminine /guillemotleft /logicalnot /hyphen /registered /macron /degree /plusminus /twosuperior /threesuperior /acute /mu /paragraph /periodcentered /cedilla /onesuperior /ordmasculine /guillemotright /onequarter /onehalf /threequarters /questiondown /Agrave /Aacute /Acircumflex /Atilde /Adieresis /Aring /AE /Ccedilla /Egrave /Eacute /Ecircumflex /Edieresis /Igrave /Iacute /Icircumflex /Idieresis /Eth /Ntilde /Ograve /Oacute /Ocircumflex /Otilde /Odieresis /multiply /Oslash /Ugrave /Uacute /Ucircumflex /Udieresis /Yacute /Thorn /germandbls /agrave /aacute /acircumflex /atilde /adieresis /aring /ae /ccedilla /egrave /eacute /ecircumflex /edieresis /igrave /iacute /icircumflex /idieresis /eth /ntilde /ograve /oacute /ocircumflex /otilde /odieresis /divide /oslash /ugrave /uacute /ucircumflex /udieresis /yacute /thorn /ydieresis ] /Type /Encoding >> +endobj +823 0 obj +[ 0 332 500 500 166 332 555 221 332 332 0 332 583 0 610 500 332 277 0 0 0 0 0 0 0 0 0 0 0 0 332 190 277 277 354 555 555 888 666 221 332 332 388 583 277 332 277 277 555 555 555 555 555 555 555 555 555 555 277 277 583 583 583 555 1014 666 666 721 721 666 610 777 721 277 500 666 555 832 721 777 666 777 721 666 610 721 666 943 666 666 610 277 277 277 468 555 221 555 555 500 555 555 277 555 555 221 221 500 221 832 555 555 555 555 332 500 277 555 500 721 500 500 500 333 259 333 583 0 0 0 221 555 332 1000 555 555 332 1000 666 332 1000 0 0 0 0 0 0 332 332 350 555 1000 332 1000 500 332 943 0 0 666 0 332 555 555 555 555 259 555 332 737 369 555 583 332 737 332 399 583 332 332 332 555 536 277 332 332 364 555 833 833 833 610 666 666 666 666 666 666 1000 721 666 666 666 666 277 277 277 277 721 721 777 777 777 777 777 583 777 721 721 721 721 666 666 610 555 555 555 555 555 555 888 500 555 555 555 555 277 277 277 277 555 555 555 555 555 555 555 583 610 555 555 555 555 500 555 500 ] +endobj +824 0 obj +<< /Differences [ 0 /.notdef /dotaccent /fi /fl /fraction /hungarumlaut /Lslash /lslash /ogonek /ring /.notdef /breve /minus /.notdef /Zcaron /zcaron /caron /dotlessi /dotlessj /ff /ffi /ffl /notequal /infinity /lessequal /greaterequal /partialdiff /summation /product /pi /grave /quotesingle /space /exclam /quotedbl /numbersign /dollar /percent /ampersand /quoteright /parenleft /parenright /asterisk /plus /comma /hyphen /period /slash /zero /one /two /three /four /five /six /seven /eight /nine /colon /semicolon /less /equal /greater /question /at /A /B /C /D /E /F /G /H /I /J /K /L /M /N /O /P /Q /R /S /T /U /V /W /X /Y /Z /bracketleft /backslash /bracketright /asciicircum /underscore /quoteleft /a /b /c /d /e /f /g /h /i /j /k /l /m /n /o /p /q /r /s /t /u /v /w /x /y /z /braceleft /bar /braceright /asciitilde /.notdef /Euro /integral /quotesinglbase /florin /quotedblbase /ellipsis /dagger /daggerdbl /circumflex /perthousand /Scaron /guilsinglleft /OE /Omega /radical /approxequal /.notdef /.notdef /.notdef /quotedblleft /quotedblright /bullet /endash /emdash /tilde /trademark /scaron /guilsinglright /oe /Delta /lozenge /Ydieresis /.notdef /exclamdown /cent /sterling /currency /yen /brokenbar /section /dieresis /copyright /ordfeminine /guillemotleft /logicalnot /hyphen /registered /macron /degree /plusminus /twosuperior /threesuperior /acute /mu /paragraph /periodcentered /cedilla /onesuperior /ordmasculine /guillemotright /onequarter /onehalf /threequarters /questiondown /Agrave /Aacute /Acircumflex /Atilde /Adieresis /Aring /AE /Ccedilla /Egrave /Eacute /Ecircumflex /Edieresis /Igrave /Iacute /Icircumflex /Idieresis /Eth /Ntilde /Ograve /Oacute /Ocircumflex /Otilde /Odieresis /multiply /Oslash /Ugrave /Uacute /Ucircumflex /Udieresis /Yacute /Thorn /germandbls /agrave /aacute /acircumflex /atilde /adieresis /aring /ae /ccedilla /egrave /eacute /ecircumflex /edieresis /igrave /iacute /icircumflex /idieresis /eth /ntilde /ograve /oacute /ocircumflex /otilde /odieresis /divide /oslash /ugrave /uacute /ucircumflex /udieresis /yacute /thorn /ydieresis ] /Type /Encoding >> +endobj +825 0 obj +[ 0 332 500 500 166 332 555 221 332 332 0 332 583 0 610 500 332 277 0 0 0 0 0 0 0 0 0 0 0 0 332 190 277 277 354 555 555 888 666 221 332 332 388 583 277 332 277 277 555 555 555 555 555 555 555 555 555 555 277 277 583 583 583 555 1014 666 666 721 721 666 610 777 721 277 500 666 555 832 721 777 666 777 721 666 610 721 666 943 666 666 610 277 277 277 468 555 221 555 555 500 555 555 277 555 555 221 221 500 221 832 555 555 555 555 332 500 277 555 500 721 500 500 500 333 259 333 583 0 0 0 221 555 332 1000 555 555 332 1000 666 332 1000 0 0 0 0 0 0 332 332 350 555 1000 332 1000 500 332 943 0 0 666 0 332 555 555 555 555 259 555 332 737 369 555 583 332 737 332 399 583 332 332 332 555 536 277 332 332 364 555 833 833 833 610 666 666 666 666 666 666 1000 721 666 666 666 666 277 277 277 277 721 721 777 777 777 777 777 583 777 721 721 721 721 666 666 610 555 555 555 555 555 555 888 500 555 555 555 555 277 277 277 277 555 555 555 555 555 555 555 583 610 555 555 555 555 500 555 500 ] +endobj +826 0 obj +<< /Differences [ 0 /.notdef /dotaccent /fi /fl /fraction /hungarumlaut /Lslash /lslash /ogonek /ring /.notdef /breve /minus /.notdef /Zcaron /zcaron /caron /dotlessi /dotlessj /ff /ffi /ffl /notequal /infinity /lessequal /greaterequal /partialdiff /summation /product /pi /grave /quotesingle /space /exclam /quotedbl /numbersign /dollar /percent /ampersand /quoteright /parenleft /parenright /asterisk /plus /comma /hyphen /period /slash /zero /one /two /three /four /five /six /seven /eight /nine /colon /semicolon /less /equal /greater /question /at /A /B /C /D /E /F /G /H /I /J /K /L /M /N /O /P /Q /R /S /T /U /V /W /X /Y /Z /bracketleft /backslash /bracketright /asciicircum /underscore /quoteleft /a /b /c /d /e /f /g /h /i /j /k /l /m /n /o /p /q /r /s /t /u /v /w /x /y /z /braceleft /bar /braceright /asciitilde /.notdef /Euro /integral /quotesinglbase /florin /quotedblbase /ellipsis /dagger /daggerdbl /circumflex /perthousand /Scaron /guilsinglleft /OE /Omega /radical /approxequal /.notdef /.notdef /.notdef /quotedblleft /quotedblright /bullet /endash /emdash /tilde /trademark /scaron /guilsinglright /oe /Delta /lozenge /Ydieresis /.notdef /exclamdown /cent /sterling /currency /yen /brokenbar /section /dieresis /copyright /ordfeminine /guillemotleft /logicalnot /hyphen /registered /macron /degree /plusminus /twosuperior /threesuperior /acute /mu /paragraph /periodcentered /cedilla /onesuperior /ordmasculine /guillemotright /onequarter /onehalf /threequarters /questiondown /Agrave /Aacute /Acircumflex /Atilde /Adieresis /Aring /AE /Ccedilla /Egrave /Eacute /Ecircumflex /Edieresis /Igrave /Iacute /Icircumflex /Idieresis /Eth /Ntilde /Ograve /Oacute /Ocircumflex /Otilde /Odieresis /multiply /Oslash /Ugrave /Uacute /Ucircumflex /Udieresis /Yacute /Thorn /germandbls /agrave /aacute /acircumflex /atilde /adieresis /aring /ae /ccedilla /egrave /eacute /ecircumflex /edieresis /igrave /iacute /icircumflex /idieresis /eth /ntilde /ograve /oacute /ocircumflex /otilde /odieresis /divide /oslash /ugrave /uacute /ucircumflex /udieresis /yacute /thorn /ydieresis ] /Type /Encoding >> +endobj +827 0 obj +[ 0 332 500 500 166 332 555 221 332 332 0 332 583 0 610 500 332 277 0 0 0 0 0 0 0 0 0 0 0 0 332 190 277 277 354 555 555 888 666 221 332 332 388 583 277 332 277 277 555 555 555 555 555 555 555 555 555 555 277 277 583 583 583 555 1014 666 666 721 721 666 610 777 721 277 500 666 555 832 721 777 666 777 721 666 610 721 666 943 666 666 610 277 277 277 468 555 221 555 555 500 555 555 277 555 555 221 221 500 221 832 555 555 555 555 332 500 277 555 500 721 500 500 500 333 259 333 583 0 0 0 221 555 332 1000 555 555 332 1000 666 332 1000 0 0 0 0 0 0 332 332 350 555 1000 332 1000 500 332 943 0 0 666 0 332 555 555 555 555 259 555 332 737 369 555 583 332 737 332 399 583 332 332 332 555 536 277 332 332 364 555 833 833 833 610 666 666 666 666 666 666 1000 721 666 666 666 666 277 277 277 277 721 721 777 777 777 777 777 583 777 721 721 721 721 666 666 610 555 555 555 555 555 555 888 500 555 555 555 555 277 277 277 277 555 555 555 555 555 555 555 583 610 555 555 555 555 500 555 500 ] +endobj +828 0 obj +<< /Differences [ 0 /.notdef /dotaccent /fi /fl /fraction /hungarumlaut /Lslash /lslash /ogonek /ring /.notdef /breve /minus /.notdef /Zcaron /zcaron /caron /dotlessi /dotlessj /ff /ffi /ffl /notequal /infinity /lessequal /greaterequal /partialdiff /summation /product /pi /grave /quotesingle /space /exclam /quotedbl /numbersign /dollar /percent /ampersand /quoteright /parenleft /parenright /asterisk /plus /comma /hyphen /period /slash /zero /one /two /three /four /five /six /seven /eight /nine /colon /semicolon /less /equal /greater /question /at /A /B /C /D /E /F /G /H /I /J /K /L /M /N /O /P /Q /R /S /T /U /V /W /X /Y /Z /bracketleft /backslash /bracketright /asciicircum /underscore /quoteleft /a /b /c /d /e /f /g /h /i /j /k /l /m /n /o /p /q /r /s /t /u /v /w /x /y /z /braceleft /bar /braceright /asciitilde /.notdef /Euro /integral /quotesinglbase /florin /quotedblbase /ellipsis /dagger /daggerdbl /circumflex /perthousand /Scaron /guilsinglleft /OE /Omega /radical /approxequal /.notdef /.notdef /.notdef /quotedblleft /quotedblright /bullet /endash /emdash /tilde /trademark /scaron /guilsinglright /oe /Delta /lozenge /Ydieresis /.notdef /exclamdown /cent /sterling /currency /yen /brokenbar /section /dieresis /copyright /ordfeminine /guillemotleft /logicalnot /hyphen /registered /macron /degree /plusminus /twosuperior /threesuperior /acute /mu /paragraph /periodcentered /cedilla /onesuperior /ordmasculine /guillemotright /onequarter /onehalf /threequarters /questiondown /Agrave /Aacute /Acircumflex /Atilde /Adieresis /Aring /AE /Ccedilla /Egrave /Eacute /Ecircumflex /Edieresis /Igrave /Iacute /Icircumflex /Idieresis /Eth /Ntilde /Ograve /Oacute /Ocircumflex /Otilde /Odieresis /multiply /Oslash /Ugrave /Uacute /Ucircumflex /Udieresis /Yacute /Thorn /germandbls /agrave /aacute /acircumflex /atilde /adieresis /aring /ae /ccedilla /egrave /eacute /ecircumflex /edieresis /igrave /iacute /icircumflex /idieresis /eth /ntilde /ograve /oacute /ocircumflex /otilde /odieresis /divide /oslash /ugrave /uacute /ucircumflex /udieresis /yacute /thorn /ydieresis ] /Type /Encoding >> +endobj +829 0 obj +[ 0 332 500 500 166 332 555 221 332 332 0 332 583 0 610 500 332 277 0 0 0 0 0 0 0 0 0 0 0 0 332 190 277 277 354 555 555 888 666 221 332 332 388 583 277 332 277 277 555 555 555 555 555 555 555 555 555 555 277 277 583 583 583 555 1014 666 666 721 721 666 610 777 721 277 500 666 555 832 721 777 666 777 721 666 610 721 666 943 666 666 610 277 277 277 468 555 221 555 555 500 555 555 277 555 555 221 221 500 221 832 555 555 555 555 332 500 277 555 500 721 500 500 500 333 259 333 583 0 0 0 221 555 332 1000 555 555 332 1000 666 332 1000 0 0 0 0 0 0 332 332 350 555 1000 332 1000 500 332 943 0 0 666 0 332 555 555 555 555 259 555 332 737 369 555 583 332 737 332 399 583 332 332 332 555 536 277 332 332 364 555 833 833 833 610 666 666 666 666 666 666 1000 721 666 666 666 666 277 277 277 277 721 721 777 777 777 777 777 583 777 721 721 721 721 666 666 610 555 555 555 555 555 555 888 500 555 555 555 555 277 277 277 277 555 555 555 555 555 555 555 583 610 555 555 555 555 500 555 500 ] +endobj +830 0 obj +<< /Filter /FlateDecode /Length1 895 /Length2 53767 /Length3 0 /Length 54253 >> +stream +xڌst&_5N:xb۶m;Olc۶mul'tlwwϹQcQ sUXI^(`JrwS0303pq̬L]&@ +{8*J +*ra4O0tvrP45hl`o)@OI7btvLJVV)-#N7{33@ lp0v𰲷H8UsWcg?L.@8I%9$ll Pr325*(4:kfEof0W +J88[ɶtuuadW5\쁮zpT01 F#翑/df72,[9 7[Oh_2lj,,]T"6;f"a 4Sr5G-c[Ү)loa 0ǥbo9鹹WH` ٿ8ﰒm3_[O?v6@M+F(" gfbгpp89\ܬ?'hmn.@'nm7:#;ӵ;tZLRc{:̊F+`D%}Hn)fg؁lB(1u)g;5OP"KzZɈ)Y7d?\]N{9@(!]6YjxI+oԯ6mHP˥Z r +3]xIdtHLr<ʞF}k){UƤ^ψ % -_OT 'z"_I +m*ql{?Lj $A"U'-by,?hpOw_z#6qwD{3>Stp"^1O4飜:ar2:ʙN6AAA 7 +1u[EL3hN4_tb~~ +;~O껪Q-?cP h ~*n>uugp_D[oп9A4fԂlFi j>.w1isch!h;1v LE_? t1ZeH 4l8QKdo!P6ޟ;iQCXwFdL OK[9&o؋[Edx۵L](_Y Yξ=ސIɓLpC ڂr +s(A[iKyė>-Zy3S Ӏn3pxr+|5Gםh +DlY_5=.x*!i Pe[EEߘ2s>Wйrh$OXuxr _' >J = Yf *ebq͘4N}:sualGP&N'h_e_;\L9!ZJwR7fxiq +(}ƐzBg!`]#KаxvըYOBᓂL(gcM +0;1֧bXiQM=zo'pQR_A? S8̒_|:y4uVkJcTn36*^1Yvq#m@A@3M=p#(H#2 E>o"~}q.k>ѣp%J =i? ۹w_{l,Mq1ݰT[?31qܻѝ\}!1D&; cg&.[47$*+$gw4u'? wTwa(:ƝNj+*Z(MD4i-@7m[ʉzOƂ{kDM)Ʒ")ʒ[|+Ȉ0z/ Vd2>fkh>TH( ḃ!0;K1aV~hʗvyfWbwJ +F"]dF(TDk]Xg|RrZ%ܓ!߃ee :j440QH-Q?p~+-~ͭBtkvMpKQ7ɺЃԡ-D;TZ'ua¢OD'm17jƎ0J0#f۶ËنWwޅYц%πer}Z&Ȇ,/ȷRˮ4], c -/D.!8kM?E`/jR0Ѝ7|ϵ0 cR7S݇>*{߾˺BU!j<ȵ/kFWQPw{h'DHaE/`әA\ +e¬?n6S$lV^q }QYeJ (Jn0`,XE!lU72a *̩ƛFGD1U)gS9A=2b[~S}$(.B0ϴF^Z3BJ`(O̔{g^8PBϠB)G^$3ڟ%턓nڶ5u:S!uVe1ш{i.:h` +]B+V`Mizĝ _/Y5NKdǽ ޅ`|U]m`#ݦlPR賸Lkne^$R?-yH_$QKzqxd܃\V&%?5@AEd*Q" +/8T?8dz*k?FkO袧^??[j9D* !wCeu}T<֢-gG]~v~ax,GDI=ӆr*ؚ`uo^*b+bLcbX"LV/E0¥>U ֣]*з= { v讔ge[䑶s2?vQ*78Ē-ď+ =nϝ*x vVC]o1%Ԝ>>fR@-Knt,m&ណK"yV>q$DT55/0}? brx6?l(1lȤBDJ&]Ml2SX jfh9_UXKiB!HLñW$`- !L |x#m" ֺ +>lhC-`$z~,#i<]4]7ˣ\a{!BW|쟐ӜN8/=GT PFe erx N>oi [}n_Z#ǑdI/&Gg웉ӗsm36.0Pϑu6[Q?C|WʫqщRXnK.aAۣ OiPjOMOP@ z-Vsty 7ePK]sԨ!.1fE]rMDA6ѾO.¢n)Lf<TDX1!u13E{'Rjba410PL Lhͽ+صUǏҎLdd$X}9P+_zmwH9C/;,7L A=CA:Ap}aڳ7qrZ ؚ]B>6Cj9,͈Jde>yȰ\=EQ!~EH]f7!5;!ꔎF ;ۮx'Ij4)*Mn!tdIUULn݊0QQUoe&kSv\7O>@nG1$V"^&hy,l9LMxk~p|4lE(H_~5;_=#[)~29/6aق֑&̈*(=MGPⳓ`(}5+X6Q,2I]vHɵm7$ i]i;g_**9"Mc%yh]^UʨW*Ye'!Rע |ϙRxO$$9fk7߆ A6c!e!F]'jt]}v]Hfa+K&)(;V1u6ZaK1KT=Rn1 FN0/.8("CT+D2$[ƛ_K"ZG) CIw۱iٮFv#M=c +s{?y\ՠuDV J:^QsjsKӄݚױUzf]_Nqcz Ȕ_~pdzF8(>C~%e\+EͬpC{;ق|UhS-5(~ѐ%Z>H K7eIG CwJɲ:ٰ5Y$xY]_Y^@o=(Dt];1~?:^Cf#s)ei-12nZօXm.uV?3w"ŧkl.V.K0s [V$MB &mwu;& ^IC@cű'SZOVzw=ָg WԤ~]kҿ')ubr +LI=| 2}"2yWEY +bo*8LgUVvxu0jIPvcCX?B H nj<*jzG{qlS1jGL5PA9%kl$|&IzpHHͨI҆~߬z-Mti{R1}Q%pfdIRP2o:^- +Ð8Fv?W>ilF@Nߩ^t!06KO.KԜ/MTS]JϮضvtwakLv¤fyA{Q +c3<Ik^֑m4fLHqyl?W6{;[~^:*H=}#':.#)V#o I./6 k꡵&?˿P斅W&1m ʩW?ti:HCYyuI6 +7O;J隬tHE@#b՟G: +xg\ +G7=ƭ*6SN[d%VvTBu]bOq^9d,xI1MN+Uy2_F ,Z fvg',${!g~3Ipٝ @{QVJ0_vV\h'QU_)Uj7]n)`Cb;"@*vECݒD1٣J6gAw{ðap M~J4 ;T^/6ڙ No8XAþP\3tNYv(ng.!ȑVѡG[V\/׌8@ŧ"qA> ܹk5Y ɗZNZt-УsV@MB\iɅuІxQ۸hUh-0B; +Uh&s QHկQkۥ̻!k5r_彔P:GhڋӫUƻ83љ0}+M: ""tӣP;ԅ:1+xtrMyT.Fj)[y܋_Dywe!z"QKˑľ٣ʡ/'<[ Ʉ&b;_Ҫul[9n$SbBNi's5w BJ3>*=J8vЅϡ^wT>^o|/ /O&72F~H_ZWi z|q8p4v.%xoɷ䃣3ļf6+RY. /4/=|.%jə/BXe'Xpې?Q #w NO$P֘Ÿn9ReC P,*16mv֞*NS{ +bsS;ZiFyT *,QG + +3%V#$Y72ȭy_  X:]60rrx}9'kq!Ʌv= +G51 yF:;̾6/Qnޜ:ѭK)w+qKj̹^nU,1eGeP- 98CJ * ~Ugeɣ[d57ccş@XgJ/hÿFm|kK EQmԬ>kW#L2Z7pyQ+\fKb4jE{ +cHר(z2Q?:Ld^TfSWN'm֢l*nNT}Si䣰iZ ajDIbt#S'H.ŜPll_V%L#F6ӽI53IAVl-*ClҰ`"dMv/n=I:F݂}=NkE=K{k}qp*|H_=n4GWVyU8V<P~XKW%K{Ot_yg) d3◐Fb`b_-T=!ڪCŇzIVuIe?_ ɫjf2D紎ubID:ȵۻHJo- TҊW"@fնv[ȼaBۙ,+)^".F#A7`=!*͎ێ(^EUZ_Mc> <|7rdJ1.Ygc+Gy-zRE_ +Iv|VeHʮutm/ZW~E,BG;CBEf^ s ji])xᜓ<3,GsF? pArvnXL+H-ʋ=Sd@2ڞskPf]%>Jhfd {BpB>+ORS]S-' 3r!7VjAOp/(T:铫@Ff\}, +ќ>3C$@w`Et$Ey 87-KmZ&m,Ks.GIED>Ұ A_S048M/TkN=Qop45-{{" Nd|X X?e*ː}.eP>nŮFqFa?6!W.Qǟ~'e0]?ݿ/Z;X{rHсՄqٝ68K.e8SN䨵fܡ]JL>3D}E|3kt +f!MClǡg5X1gJ +ORK2_=3+>VKA)tSuGߟ|Df_c).*X,Rz,oh"+I iZxB"F|Xj823 8{z ZxBUՖײ G)DžҿcZy㦨WNz^f{`߳wiJ詚**XXLCc] R\ŒnϿ%}m+54gYpR3`yJEW㊤jBvx˜5Yl-h_㸖g\@|!pv +r9Xe].h6Juzt_EԸ'j`eKŽ/iRSѳ ~FI6l7_YT(׾m/zn +MV(Tǟ *˿z~I;h_ո)?qL1@[Wl?woU2*lb@ɓ @3tHݜ—kQqp}T$P);M3hOϤNFuou]4ñC[ C +V%od"S0j ؞F.ۈ:ld}gJ Bpm ~vjjcqF.OQ-L*gq{zl#{ԃjd)&3'ˑ[pZRqleI/yH)J b#߅Z=ݤFQIrlL! W5BR9oܞβFA?VՐbNHẺ5#Lmr|)즸e]IZ| 8:%^(ߖd/Xi-ELI|tr,?a^:9rGEˆݖ6k\$.E\l w&xAGSԟR֖Ut`K}q r<!W8/_'Βаw5Sl(#%"8r^QgIpIp >x6C!SV;*d5-OY=ORY;D(>y+}.7?&VKԡaiX-+ :MSϝ8'zlg`Ѯ72zcO\YF`7HzRj6J"y;sH'JQf~lʼn͋~*7~dWLVe?L_]eEdfH|tMlmX<.Ei-苵L\I;KZ(JqMݙ[`4Y@.86#gFS\|X>1\07_Övy$ ˗&ݮU=+f\*$b)0nY`ƶ'd)9o<ހm"jPЮY3V'aT}!RCŭrߺ%Gv+\)Pnip\.ibD6n qs/BՉ1LRH} +FP'v1s\ǖ1̦4eRNUƱR`|~?R oݝ&x#W=^_ X^8$( +P['"{exZ,,Qk?K0ש!2+28@!)e> XZgɐ| a:WWn]\VXx ΁ۑൟ~I`*9Ȭt@Unp= 3qmI>., 28W(Wベ55]OZ]KsVd57((eZ\ 0Br ?}7\.JNcNg¸ RxNO:az24-rJY(8o ٻ3P >Rҕkz~3升1{eBhWDBkS;)(ՖL%!@udocRO/DeS+Iju + a?|z{ ,a*IoZ]7l6pwD SklW9f? +j\'i@z'} +p'6JC"L]q/zgW{4 ϠCtrg$L];ˆ8bR45 Ax8697K;J*alѥ"0(ni\T \iנEdzw}z9j +N{UE/p\pR\D]ݬ$;bj{=CRMpZߒbR"3Χ3 <+g<i o +K5pyS$HB.&ϰ2)k°&Nx\txŽ TL]@PWN&0Y,u[Ja߮$4մ5}c{T|T":l7H)a81X37րUx_QU>MeQ¶0(Yh ڢG%H\=Ktd +LCPw5ҹ޼pLR-Qo km*eǟ,jPX^)8fp%#5#o?)Qt慜 O+>CvFr֊"o4ݴ88nqsAԷUU/l1`t|]4zZ84N`݄T*![kKƍN5ѲnNleٽi|gt\&FMP|Z7U0k]&bñJ7>߆R{,w1۫K'h x\! G?>?a0<8n^h8{W嶤-rk8穠 +Y^We$eߏ2],ɿ9wd!VYuᨤ w(4վRyu0.MQs*:+N22MR:w=LBiAv"iyb6eaABU\~Ǫ +|YgY9-k9!LXj]wjEUcL{3{ώ2> Us #ΕE>l)>+1jn;ܻwa>5[Eyyh0 ~FúAbਜm¨{o +'_ijwœ.?'ʣWf;{[.;Ȁ] qaC=0xGi0ݓBp{)MS} _WٛAsyeAM%)%+7 }񿙬;gԥ% ZXlpV62 LjԲ>҇Ԡ) :W4z @ݦΗ) L!H`P(wڎ {U谊ᩫ)t6kU E$; q̡2ȍ\2 iݺdwJqXq޵M^9 ǘCxtNL O}u sۃ$F /I.DwCf1O}/i[Y``۸`}K9zT_۞@J(S4bS 52˸diB[%pt Z iT?lsUZي1lr4!:8cf[͛>,vVI\ H`4PNF}ԕW<|X&:uBRЎx%ŵxm?!ɚAUϿ漰L;r~cY_E 0)ik73-:!+VHZvg8Af6zsk- ORVII_  N|mgO;\|nZ6T.$TKUZ:Y%<1*R̟ +.h׵:p/%19*4!]?D,cV~)B,.L=:=HO>:VX$q?jnCV;>k$V^4W3&BYcӻ¢#F>a3:yd̓\@6߱s szWfX>et˪D䡽v*UZN,7YB r"bsۡ-OA8E8ɳe9BOlTf}D $ ob4$)]Z ξ죿½49\,aǶ#%s3Jӫ,E+E%]}?? +*`:hCRn٤=SLJ۠քvd<|˹ڃGd15Yi +C7IӤT8e0ޝ Cyv Nw~1CcM>yb*{[q(GdնvlJa^\~+v\QEn/Ҷq{RdR-:/WKN6 +njI>-]8oiZB3Cw %%AI +ANAr$~xFKWē+§fS}^Lf6՗g4 OzPTqIHJɮNs"A>Bh4%}éqJ;YrrϫFqowk0hVv;}Z\yN:wj҄Q۶m6m۶=mLm۶m۶݈ʬ(%s{TJs4wl>Vq +`wŰBZ7eWH3g݂۪L`bTolq$¶^#__=4LfRXTSӲUFemvs.:5sCH7DAB4k\#dh㜡C㜩iOfy|g>v"rqN{N$N=+u0g\b8y$ +hj +evw&-84+bsfg74szMVmfpdZq|w4ӶR4љ n,o@:"B.)nF~ෙLGbO,G[h3clJ,L_=)Ϥ` +*ӐMPfHCt Gb>K ,cbd;zZ6a1k):1vWoa !`PHrEP-I@$Sh˶ `{"Ԧ!{tq /w`SP(ڴfcR +wa'M058-ҭo#[4g_kev1H(9$:'㭁J .oK~t'sXq`ۆDMtV?osN]w$e7LgMy#c~"k( *cuY=Bσnub{`1'j:QYTD+Y}"iߍPxQ0RT&|c +R|jxz}č:JI*^e1Y:Cˉk-t.>{fo{ji4,~KiH(O=U>r+ӒUhѺZbFe4Mmஊ~ZT{ϖBKPֲ^}/>l@ y"8\{f (znw]`K]]H^eLBX"O7K,D{Fp&8\z;C$Xk%N]=Т:Jhɚvg5,I NGQ~^s(k 1O50SW"a[xi?H7SF:ȬYv :E$PK 1x*N2Egݗ,*ڹUS( +DuUgN8D@#xd"DvXszw$A#[A?rN3/yUF}hEka2\:[J(S4N67LR0#8X,~ +D!." a:|-NAMe1-cBδyWFGoh n6dl]@}?!jQ_DQRkP65إV1cj'=ߠkiLS"5Q@?b(C񅆩Ma1sYS;@ob~&_uFõ镄t<&mJqq, |9Id 'Va3ʬ Պ;EC wyz*0|")@h${ӂRB%NU_!?'z!C}sdRLށ*\3_gRΨsZZf +~z/NOic=pa@_[UׁǍ%;Tpg`Rw`TxsX񲠈 &C Ϳ! Jvw1/kVL-'aYr` Dl&0{߯8ʞY\2\>HR4J+lq(DW>Rݪ9i¶s +-W͇%P$$!CR%L\KvCcnB﯁ +1|Ywo +]ϑ}D`kAGo +7%K\铜uYΎWM&nwDX|x ᄎ_D.F6xc#]{8r \%Nk7dVn{7ܛ5!Nanw2|g5T,jtAE\\l*6@gtTn߼'F+$ot+=hKudj\#l%/߹T)bW+tv-\bX{V b +4\fq&>W]3<NK=Lgƒm(=:UEwi1u8Wl[ +ԿA`O5>a:>|h1Jǧ[5KDfi-J2l\c쬎Wͦ"`ж*js?B (U 0d 3 [VTrL~`-[,Roe=k#'9E 4:Q^1~ѴzBz΃[g/cC 8jH/*Zϱ[2]*}}B3P$nMu7d'(.ޖXr,W<ǰ؞>*>rl'o7U8=,ISVdSv[17-zyY2 5Pڣ@ք5$ υgK}egXvp?nXW(.C'  U@8R,6f(]a!bݼNF=3;| 3Cp-}װ줵ĘpJUs~¸68;Ek* ɭkP.UUo}5%h Tdmmgk +ac|0ܑD2Y-_֧rDgzo X_Y_JńBY闝ޥږ=CØ vȡȊaCe!kt l3B,J}\ej^#O&xK膺>*c^%Vl Jxm{#k? ux~u{ o8685|B/ ?dmҙLC +o. 5vƶv`7T-\-IvҦG'd|϶teL~\:.ۈ%PBoyM> ;D mHw +M~*9bFň5AYK>%_bp%X*fH,Q/r_!)>ϋ]mSf&N%d~ ͵ -)iDSBy]O !G`Cx<%P lp8/Q_9i72Lw٦S@YTdN* @ٗ1I<[( yt;ɑHiz>JXxm:SihLIg2δ)nMꗛ +" ΨD"Gw ȄDI"q^͈P[1g0b mè)zwD(9! 9Xe)O3UK ~3-2r&]D,G1$;UDDۦr9b-6@X# m%x:?ml:6Թʏ-鮡r8I"q)d F U~ݚ-P?da1H8UzN |}^nv`d<\RQZ^Jx('|"gSHMv:jࢀo>q "JȘ/NfY8AbkOakQ\G:]3F;k`V%gUpm;.q7ՌfnB\}|&?@t2wO[-wb;|xC7'mۖU7yG#[-j]ۢHeQ1tW5?!C<NuX9eܿ%}*CG B[XF_c8ܥb5Ya'.nQM)A<Q!jNXDU'!taMxH J8~NrDc7LYF<mg0eó./5@1,AV:mS,7Bk6.zX OvwyYIBx H]2z F9~7ٵ% ,T&KZ$5H`aaw{VC3H.g 2rB(,ysha5lQ*0!?szlH X~g8I{}ڀ J/GDc됪<;ZG/@[zc=NWs#}VMFѾcdxR oO}G\үLz̕G8 zqBCtW.DYYb EI#ΐҌ?KYNgHc=ќ;` v'zؖڛC񁃫DvȆkuz9O|uage"i_MgO=3rXх~ /eY5>ivf(: +/rRWs+ClHrtM6jv#(ytCtQ]9՜ҙa||d%@7HY 8t4Qoc@a>]pͥ(R ;CzL1 `KǙ\cJP[T!ʽ Mgt\{Ri0'')X7:jQ'r'"]D#!~6l"}q7UẀyBR~'C|:, 0VzڤK"rѥD- akKlqUCĽ1QE ̺*Ώ)̀PZјCuզmi[߉gG;[gق-c__PEC8'hW).%mL$]r&D$+p}d:/R.prދ#v6cBbL. 1aj@Y[4Xưvb +"qnFaWAI8ǜϝe|%DrJg&!An?A8A +$dn΄Z pAmu2 xm +O/b1,RtĽ@ˣOœkR=pTZ8+Nꈍ u>;)XqId_Z߻r LvL\Eroԛ?:T@MR{:\Z >:i߻)}l0pF1)0ZGp6?vݸ­tv`ɬdMǶt(*|v"dE$ʑĬ#x?`$|*ҐFkc}v¦n: " /wA5X7۪r*VZ3|=^2WQK{#Wz|qKu)n6 խ$l?tx +'Ky5 5(uYfDïhÀs +PƎTe6ц ^8,("?uQttvV\2jta)t 6ɕ[}Ya5c{Qh`f0Eɷ-.3#S1\NQWUg1CI=T&M)} " 17zW-[8[A-.8iUdq c-PO6.sJAEƝ}C\ggM(IP +V=dHaH'0PloY}_3:xJU4CW(xIH -]㧑 k{aJPz<%VRMr4s)5CvEGtVWKxWau$\C{;#0`Z*" w oP4(\*_:u{%rz_8$Ժv *8oM-bs8| +{zn<\&,t=! Nl3r<憦$4BjQ/B†@i_X`MZ6 ",20/K[_/)r-{ޮC7"tTz>J^X)b3؇nu B3y۳M2{%͊䫊9t"pHحS%;:{b4z"Gw16?ckW> n5^+X(+.lso3{)턡Qβs@d}adJkÊnVPtM7! +Y4(l.tC<%Ha3S +b\YxVb^x{G{v+M>R'WuJu@ZVBt QF i"YP69JOUel$Xe ?-)O"]݈ة)fmǨNH:km]]Ǭ@?vԾ39Xˈyb;2TGFKJUMgz߄eY 㜏oDDF7\g"]9.8u}&+/O/i%= p } U)CwBQ3g>wQ~ &fN_9<G!)^xc|Y?".<Îjw{ݲplL&C,:YJٔd6 N-d+#W^b{gû?mr-\HCd wEK窊ЪVU ĚQB_j\*֟:M/͔£TN(O"X0M#fӸ*N01H-a#0y;Jxݏz>쒁\ugfo(= "~NR+U/5Y.lu3LOr\u'QL%/9EOwN@z r `"a ҿ"+E=a, _>.ƍ?$\"=KݟN;^#cWlf J4r`M Hmp +".~Q]&iK#qBAwX#T#`HPeN T;O߬HPfww9)5ְ{O!zfÐIg"1I9ut~&I_Kj囟D"46 Q + qC$b55C|$K +NoمHۊ GWueldo8f8! ˿[cu(X oqvM.I5,7aYznoyK%zl.]sm9ؼd͡#c m4)d-ju kȯ27zx[cJ&pKk EM*&O:9y;ق 33}_xaJQk +LYHzAv6řYp΋NrR}mIh&WDҵ&:;R0 ށhJ$!`{$jTj71^:݈ϸ^Le̯@&t1Á3^N%aPά{ķP 1 '!g}m_i̒޵g !/R[4ȫݎꇣ}!!pSa9YQSne\#]x "\T`ˠV]^`C:%`! +ʕ:FGbx9˻ECnЊfԸQk%ss[Oﳆ'sZQKW, `8|ufI"›/*> Lm7%Z [0/H֌6 ,Q/<5HSإ^ߍ gʬT-t#.i9aaBi#m^ϛC\U*^1v }D~=ITu!g'>+N1c'V0`qv0'Ƨ ]@y):M1|?e1ŝO,L $_8P,DP0U^YC-^ư޿%֝Y"Sj {6AiU9n줾,9*_e9%iRZٽ 2viIu_ ע6cp$@tTFf "42o]S\qd,XCb`cIE +Aޒdu1B?wf)Tb{y,2Odg TJݞ"ЩZ-8}Duُ;XqC וm̽V!QϷ kvAze~9yc!6#p*f&Dj& T+m~̯IiXwif`1{>4_s蚺,yv -x5U7u>iny 4vkF\"`L)K*9NR<6Yֿ}vfUO4쑫ݝYQۿ[Z\Mc-r%K +\%ci!KZ99<3 [ $q?8_57+\Դk]z Pf>*OoG?630zj''_Tg"IXªŠR2/V4"KmB%R$boF.ϋ$pz?5͛30P7w=,Q]wCe*5lT`z2C,b+h.dnQ ~{Tp 2g`m<;u3e5-:K侅יaRv?ڻ7m2IvF~I^%VG `[7غJ+QҐL aYf犾MI7e\LI=Qq*ߐPK$W΀V`F@xK 1}"c -"ɷ&мi,9ַo>~uw1S^$#v+:mi;n]LPNC 5y[؆ 09>R{)7[mV_pg1Ł2Eu *Y; +8ױv8Dbmrn1FW27\ߩߣeS9(j"qJD@uFA DVGa/A(o& } >o Qq 4+&=-N3VgX?[࿡ryyaZEYU_!pVHm.sO(3mBb{xmmq <Kv[( ~vI>w|SGVfQ;`Jj>.ݶ3a$ڹT寉gpd1HsM&bG5߂ĺLjXnJVožV@7{aɈ-:82G%\JQ)o"+Q<@O/xIHVa)bf]ּoQcSɑ*_U׉V# +ۿ. nэ4(ִ.^&n q+2}Lj؟d5_P1.W}H[$Ė2V@ ³+uɚ 5S`/`*dߴϬXR5&ѿ6(i)y_!ϋdc:Se\l&l]ӃM:y` Î> S5^LX3#/73e6Pߐ.h(O?UAJE k P&S^$[egj HEj("FbЗ1.Sߔ6Y& 8h82c4OW`I3N D*􈃩yq^A-MA{r[s_!FN~LJ:"73:pTt\. q lOE6[ܮ<]P}cf?7 4Zb!2M#1@-]]x5tS$FuNJwҐYP ibC2&y$ycFk}FpFkb>p;6nmwY] e61P|S¤: +fW,\Bb5%qS )#~))xgsXt,?tWb~$_s3מ"x쇟@ak'}7(e@1~WHP'Nๆpx'7@Ŷc"-"l'G'K`eQ(ZC4>$ElZ+mDe +_`,*ZxѶeԜڲSq XaY,N,.YL iL@RZJ8 .?VWsxE/,e[^eg/Tٕ7R KN^z yݢ ; + P iBdJy罀:j쀥[ sʚoaij*Nq j%"[$6sJE(F_gIzM;V +N\yM/ׯ<+>{=u (9z)liWi/NDMek,o 9t.VAkn9kd;_>U|Suұ%l1ԒJBW:H^zſr07|(9$]shv;W$ +d[m۶mWm۶m۶m۶msnO̼x?"z1H\:)} ^. 0D$Mb&6l hا}ǁ&MGB↯V 5jG3A?L2F9X]WyiEY []Y$K<$׎Y}; ѯvrj=y:zMPf%@%,-7&TvRs'@x"B d5xGϸC#̔%}]0 O*ef\gL/wqW]Ph<&Sd>J68JP&5}e'LPH/hԢf5e\H& +3{u](v@/Vݯ߂:ŭ}Evu3c²cAl(ή|l%S:Ђ*Gh10( +@$8a7:d_uaJ~Wy]v{F7\ v˽,ۋZxDڢ+XMɈ}FN h]:Ie:(|4ɟ:S!*Ni>cX.,̄M2<1(&(}cvj;qWҙBz3b.\HH]weubaRppg6~k kF&`gbL>W5]T{ 0&sPixKpsK/q!.0Ix3'{4TfmL4˙Y0;~u o{puJ!#[ dbjOBB p.# Ev!(R?C e)RɌv99lkJ`Ҷ>˕O]& )Ec lz6| Q$>pXeC?[߶s/Wz7~\a>2rjw… +[l6F̚D=*0 7W@|)N(=~?\y JݕD~Y)Y.Ky⭤4XppZWo|^fٓ 9r4I% y=Ѝ HP*UpMBUb1OڅRMuQTq*pee{uoʂ`'R*[bK it[duWxp| +9NE!0۫N 8w3Q> :+ M:DkFMQF*ZEkr^FT:UaK #K4Q2~ZR0T-?.Zd`/usLEN5br 2=38ըE2!ܥ=s:sN7_o*@1|}@Ģ1ޕǹ N`ZMv'4z$Yͽ4?[JSvobR,IiRFjAZynߞrţ͐ =:JGjd0/p[,cT5@zɷ9cԙkX"H`w ){DPmJ8NA(U +b(Cw'(\%U]oG6 b/lR4 C +^y@XS忮!dDH3X/z\R0qDTtCڣi^=gy$DNPg18HyIV$iZklg$Vs<)Jݓ3>R߲K rH ͽ h  1~_K$' ʰ9V[17dcm)yw%xwW +xʓ-i|BM/ߧh?ܑ *A⩎ȷʂ e?)J{iOiO2N"9 'cKq IW-EԂlƁ8nmH$ps^@n`X9U_* +W&ٟmI0=Srbj0&Kh~jQ +w~>gG4Ga>ޅ6ӑL(iw7.eDm'5AǷ 9ڡATvL*aKP?bW׏r̈́#J4vsW͞" 01ErcRɤ6M'qhk(,p]v~JҬ֩) +\ilNJ9TR7`]hA`!#Ly{VS%B0zJ +Q~^FC_폂u-=Z*oO3"yROQ7yڪ]?{N4jdX|ӥN&HcrZ@  +Y9&)}F;ZUÎ W=\E;˚&*EFҎbL,`k,2/"2eK X!H]Gf"vI|9:g욵Ks +U  ~Œ'>>p9CzUA{,o>7/*sQAWgD6]hum. ^uCrxkk tਅbqHP)!f4)j"R;8eA+ ݢ]# e") 0h,FՇO~21']6t$hoviOV3/* +jբ)GK\3PZ}+\,c2';H1&ɚ#wZe{woC,| x˗T& +Z~uEC=G  ɨAF}I#ސd~`<"%1|vT4PUBg092 =?Akr"DQ,,- !\ &n FʬQUZk:HHC`L6 +<_~C5$&H<)!=8pҩ~C;ؿO$`}=Or 92p%pxyy'O\wT?ڑM9 +#[ѧ-τ  Z)I].H;&`q ݠ\'y$ѹf(Ss*:~)VW"r@ߥE^_CVR!i;#_ %8/+i,i }؇ *iFs5IK;O-؆OP3@#@5n-riO~|G 싖fJH>MkX uѯ $wPZʸ7 $[6-\ڃ;@N=# u|ڢ.B?.w!ic:PI#C& +jM>!^5XA *Vd3[ʮ_p&% ƌ٪:SSgyvptapIOr`M{| 1:ՖyXzՕW#߃F!-u^APL7c +;ÎZRFx 7;?L-u~fybl +1 I kGqp\ƀ>0d~XM? Ulzm:!Fe(L`NZo/k(!Az퀿oP8Yo-vu g&.5#| !9\a$:#w7CO71DlcMFg+)N-_˥]5v0 B:3EVWJn!@-×=C[AG(#oǥU\#{XƟiڔק1dJfvMfĢw-Y?><aF<#CZ$^YSZ_G[*~ EGwKY}+F0`k/ݘ1*g)i1 o(&dEXbNa|)_#[FZ'ucogȕZ.JѳKWFaN|])y?̠D !,=0]t\" Tk8]Bqg,jj 6}Mq<(m]fxoy2*kfjR,Z1Awati]V bҢ4nD۽O A[<|B;T$GOnݨP ;rljH@z!sP&A,1{76 +<'UOi|Y TEy!JQg7cC@?G?9.tX .}*S^H9p6vXUAXg$㿝 YP:7aU)䄩l@; {dpu6ؚ +Rl Md{[g\4>:e +|(ױt]Ӭ1+(Q(8㸐7[6EUD~zd H4q?>}i'{^5PҠ,?HM2 zrx3ķhu*dHѻ;t 7yv/5 y-nT~EBV yǔ^|}ͥ|hĺ-PM>e o,jS+D +T !xHqTyVYq} Q[iTNHm&S6A.3/:iM8A|ںͫ&$e덛aFaZuJŨi8$ wlQzMb O盲罢?'E}\kGy^t +f.YZ +_< P[T2a Q:=%բJ2 r{@fd{jBI.g8Uʑ9QVE=wOK9m1fkڸJ)MvU`?M4~dۙh-~ ,#{l027Pwj*,`A}FG)ݫlfn쳲-uij)͎-/W!f)8lt8O߯3״ 6 75obg+r.K?SCOSd4Vİظl$_ f'st5b;2h vR s?On9"b +ti23_s:gDjNCr0~Y[4Wʮՠ0,$_a/Gd+\/HV"+>4مu]D*Ƈ`CѣZj[0rԣ=Y +ϟN syju+~͋2I]Z!{N=@4H޳YTV0.ˏՆjֈs!c$/f2'`EKi4Suw5;VWR8D_@-B +0XM?1 ދX9@;^:hm"&'aC ,ܡ1)2LeFZU#mo+;κ.L@!GuN(պ1{ ^!9ATI㊡s'|E+rO;{y )%i)R^CТx;#DlvSNu +ƨKT:`dS5qߊ'ضBAMe^]|*Y9rA%UR@ r,sӐ +8otĈNjC;3GL9, +NPB½",Nn#2My1[?4A%YO ajaMѢDDj܎wey'VXSDž^Q F]> Z[QD6/uxB^./LrO^ +wv9c6rI>J[ i$q"_ +B$7i%v3K9MNAxӤFdcߞ^ϐX{Ok^YXnEySt"6l],5.JX U⴫3ñqE(BlVn%Si9__5S,OD:1`UYqV<gqk#s k%meK(۶9jNx6ʓ]) شlx5UƘ>ԛx!*|~fC+ E&u'/2*"4*-q>[$vavc2F!`q̲Y.oaU*4+eL?1\9ZSUM6Oǖcm38s:y-I3r`ĵ;:.$RyA1<в\BH_+O'Y&#TC|*Y_`Zq~*2׼zJ-j_袱fi|AKȔW .!s@CCFF(n:z-k$<8:Hm1e:mֿC*oe)>EYb?l'"pkҍjpS*t{}1V>1tEB24Jc d#di Ži_ bD u?egVT>EIu/WCzBјndKkq {Ԏ68bS>ԑQi2SVii{pDꚘ U!I.BDlc ;+]8ʹVKaq78č3 +FS(/%fSvE> + +M7ȼ̄pqۢ]^H~uLWh OFEuhp[X.('2j?%twU3l\Lv ?NK$ޢSZ"|HkyLj>vV]?{`_Qd}` oW<?vKhKFvJMn186.7ϼ>=<iw%TE׃J7HeUo~IpS*6RY8IE]z߉1B5<ӌο n2] N3b&F76c +t8ʕCxH@K0C6fh$?h ڝ%2ЯԺ;8q1oF~ '\JbP4@G +iˡ:?b6-Γ.頷(ͩc)^ӂLp%7$1N1WhCcMHJuIt<ލڛB s2<@\ۚQ1amڝLzz໎C?-_A&A?>+%I>[\[c!M_EywWKUmE?# Q)\b*@Mhnhr@Js19?ܲ CFzyݠմlL={-ko6d4L0u. e +iu2cFz'fBN WeT1;khj㧔֘X_KI}m}ւc+ cTO 4 +.qE/VP t|Al`q2ȥjw0\@ 3#uǍ+eS(Px#?E&cMގn ϰuz/ +D; *3u0$p뫴ϮTK NG(!ۄ-?q$I {a%gJئb~39^Fe.)Q+ U^YaY.hTĦ`Ҳ@ycəEB!8U"]#=fq(<,hqGyyʾf= +c.)Cމ<|=SBtpوDt~fj,idڠASﮥ F&d̖p `EwmSsJR"1RF_`( >f%(DTb}Hz[GAy.WpM6ZjIYQ.=uHZTG6+|3!<8J*yMn (~L. %qq椄 +sʾ`\m:zxjD5hCQ84C@]BKΒ#bؗU:(6u=+6 Q}F 67khS#>BvƃL^_],y}8qovzԂn淤X0|UemCz3shAr~TM`Hp :Z=Vňu LV:ӫ[QeB=h,2u2juPEI'B@l%w/ŶRf.eN *L/9*(Vo4r #ձn ɗjw&7Ɛr)ꨒ3Hr}N2;aX ]VVwTЯ !zș $]48ӭXʄR:{Or)$qv!A +?Ñx~ꎝwG;Пw\Rk6ԞjNF$DƦK&E8-ǒ(^pؼAv@dBێé@7ڟSN0^&"P^jqLzu«D+ho6.`g[|eb\d|WzEGimedXtҟ-. + dQA{$ (wEӑiJO!x7~oc]e"">ڞ&aή"bWd&HLAgއk6SoG}0WIxnbgٸu%G%B8%2{ۑ*87>{B>B,m:| +Em_Vm giBjG~ T+ \C@f 3CX!E3 V΅{eiR4FZ4Cp~P1 .'DHFlبvF$|} +]9}]q#ěe[ع#̞2 @kf6ozoY$pqb$C- @g$‰${\!L_u:J>5Vplj0kTn𠎠6 PZ+K</zƴ#F)$֋^Xj0;Ut<II.xYqj& +'-bWTv*dA(嬓Wd\p]P&o%z;o g+5S}-p2I5gTRSvny*Bb,= XeSyw?Q5u-uΨD) Id]ڶ@%?ZRQі>ط^˘GJ*4'#AYtNA';d|I;G {]ϵcԞ0mQv[;dw?yuS9Pef BU . ֞EvJa59[h]횋nZ:J,Zxw +[7E4h2r(Fj2]h4;2mQ>R΍yetdKģ$hhBwzEקԺ ]>kmX 5H=1!/׽WaDFK!*=h+ f[=F/̮|*(賌t䣠dّ?(8JQt%i mqi{ά/ FBg w㚍u?؊RnyM]nu)z5Cш-oz|[ +ʼn '&U y+F qmSY>|xE9THlqg 8yWNFK~$u.*80L4ĸZ"hMcIO_k}xּ"ei[2`Slhneృ]KB7! / VBO{VjE@30c a6:aVsv ַZepmLiL4i=0\jKnȹV1+Q|Dy׺k J'lG1\9{u@:>i@eKF3!4V3NmCk^4tH_U޻AwJT:pgV-p\]3PhX&NtOXcС X"i`trNPƪHiVh[F܂12Q&rΩ wttxV;ei\NUyE$ٕn"摓rP1Tu]m1oDk +dIڔE7UER"ژ \` IpxH)Q"<ñBtB10ʯZϻ9%i^Q.R=WXy JֆNb4.BV8!pΦ;9QJmG+9\+betƎr{"\M!gRx70pV](ŠS )eAT% sCd:de :C4bEU]otɊ9ɰU`Va6WvoSTKQ2_Ɔb@2F;.aѻ[wpz,C Ə,>m(T%Prx?ғ`f!^s+1_Yɚ5{צAa# 65̈@yKT.N`"2ȗ=B!:XRْ~gw `HWuFPK0pz%ekގ\3@n,%,ׇ/K)Q% cR|ڱj/$:L_xK<}ՇcdlHHw1l+a@ ՙx4^l"2B)/][M!d&>]`E]rv2[@'mq|5E?%=4 BHa?4 &+6++#AT< -w3)8Ps#,F{==)wX +~KUlj+g&,x7Xdw*5) O͇5=yzo&!)PDaE?^ߡ6u f^TR'IV^n-5Od0oʤ}82 3t`2U\RĀv=~^vX Rxb4.7O ƴlkC:Q\m< 4AqŌf,%8bhG7,$̴KS^)n^$t/(xtB((]̢M0vI#QuW{ bD\\_3XB>++1$,<7'7eDÔ5'h)gLO?sE+2Fg3.׺}^EQW$qx夤O&foBKA}1?Ȃ9(F + ]lX%x6{:ĉ/unyS(R,g);t?Hn@% +tmϬn$𐈞|vIדb$&Ŏkl%ƾdO)\rUxқ#pE0.!ۑBQ*aT$a9/HF|Sx1@(0JC9ldIo&?ޤ܂u-irm0مލyta;n7tb.uڒn]5]_gawPkċcHԱ.<wFJ%hAͩøHkHH_]֡]ѻ%(^nÝ15JNP%+JQ۹z䟾 ̬.yTe&o o 4>co`dzȧn 8w_/5\$ wkBE<#Mn*@jvH`B{ +ḯCWyKڜi1*e1 5ϟLN8l rK*o 8 hJ!V:WΆG-6 sY}GAc|–"dzX/&VU#:FA6ra9weߟO-l!Lj_ҿY}Z|"CcpŒ3[=p cadyDX +=e "݄IFXA{V"s`ww$r\b i)w[Ź6TgM\Y;PƳDvEUFz*m(KF^Cgi9Wַv&1='Q~#[h#Rmv.2P + +ŭH$6 +'^TĢGBgf:λʛ fQ;m:/^;t `ߣk lH0oV6{sɚQ^AB\Q<9: \XYVcծu_0zx]]Nҹr=XBUy(˩z/iI!TNT6R6M? +=?fP˸Sq 9DtpBˠL!8ztBWYPQ1Ndjå r޿v38&:aӅUxZjP/]uvx#L{pM} +܃A7s`WUguJ#0p7IFV6= D.LxŠɜ9K@w,K6fZ=Wkpe Ó_JXQzQE؆C;+` oj-ˢ<a[2YKA0|WjPmo 77sAyUm&Bv=EsjCh02@CpQO[ %@ i$* +#3PםyE.36[apVE2e4|ˇfp/YSK;Jż">,gv |A + H'Ilvƕ>|\ l_`kz 2ʊS gE·,>s=`ua9X 5:I_PV䖕#O'FN*v0[>6-g -SZA*j;bQdđf{u9q錄\TpW`򮇹.ZJ׭d&;gXd6t [sq4,aYlŁ+).wz<<e Or>'V{ UP}R:6L^GSɰą`Jqr!0 +d}2oAu;4&ndJ]t}g欄"ir!QI癡(ͽ;ɬYzqĺ^-IeN(evNuUڑӌ'fL"ȓ19^qE!']ˎV{jnyNt#$꽨8t6hF#g9\RP +\riX4@GjZAP+!^޳.T d4mFi1 A{":`G =20 }ꘜ>>3&_S]A0=b!}WwmKBYOyɡcYQkum~BݐO(<|YXU +7c[R ?eנOoFXƬ6@]RP/& QBMcn;+$ָWٝMX~sdg'͔:z@:cI5A6'?q-^U=!Oɸ-4qnxo>Kі$0#\|w& gBԢc?ks$@KѰS[Ja!vFd9<["_&:M<&a*- hBy8F~KlMdz~䳶#5E;aEd܍*,ro;ZV|+22IvNH' T-vhAoDab2_#l{u Cj^MB$]J܌6Pnr8Z rtySKܾx2J!$B?,7Êdp}/zk^f|hİ"3D`]B| w;rMʾ{dFG豠Q"FZ6iY>A6Б^I "DޚP[W +(s+Nv> r{S^}c;φT%5iIU)mmxly3ɱv1޲ fCdǽv+T#qK6Xp~)e&eʕNq]b@1k1zU(̠(yBϟ.]sG+9 &ʏm_mI&)F/"Lk+O~J@|g&Ebr+c`#^|}BL\@2mDž3MP{k[RN`X7\%+dGC'ZYm2<|[pEM<`*[db״C.gR'>B-7PsEu_=&rF"Mki\g+CBNLJڱkUA"f"?[y U윶ogx%WrCX-ϼQ{Lw}8y_&-Y1.&4.A"wa~sjbUCI_Zi ~ +lo P>m e}g~~uQebGȿ$G +%]*]{蹽x᫓h[&U%6kޫ; tQEnC+ +;,Gs1Ne)eR܃SvʪǶK1oF(9ǐ]kni>Eir)SKm&'BUS +Ġ2<&\eUC( jS[ r Te- \ڮ2e9-^^J@!kL__U›‹D3RL=̉2?tNI!yk)^ח[ɇonNvaI(=msڝ#ǹl|ޕKUfo.. K `pv!ls]S#;yud?i$ rW9PQQ0囷/Ex[ _W"^pڢj JǢG#@_ȭ +Qn>l}?Otayae JK='I ~~}{fFVؙYS.n1c%*Sp­b3kjMg!b{dNQRcԮku zVw}<6! 9|bku/[jt ?jɏlH{e+gFi:|!:t*ʸ@ثMPk;&Ph9=Re`Liy~z ʞP&V*0! q㐉v J +[LU0-Rl*\i;n^w~w`Qojvt;k p?v';n9# +|K% +H/r߬opIN8=[m"_-gqy4> OD'GuԠ|xD+deNNhIrFQa\kC)If2r)soms;ȉ/-w,W"vd QmxjWw$ﴝ 9bLK^UooZL,Bb: +N@vk( DYKq,of[y~ ;q +\RyxĨJ~[xF=}:# +Ezٗ__1a*)MFo]P7GOfYeS$"K2n~KT\rMJhv z֍#Ж>Һ Jxܕh +endstream +endobj +831 0 obj +<< /D (subsection.4.1) /S /GoTo >> +endobj +832 0 obj + +endobj +833 0 obj +<< /D (subsection.4.2) /S /GoTo >> +endobj +834 0 obj + +endobj +835 0 obj +<< /D (section.5) /S /GoTo >> +endobj +836 0 obj + +endobj +837 0 obj +<< /D (subsection.6.1) /S /GoTo >> +endobj +838 0 obj +<< /A 859 0 R /Next 841 0 R /Parent 616 0 R /Prev 731 0 R /Title 860 0 R >> +endobj +839 0 obj + +endobj +840 0 obj +<< /D (subsection.6.4) /S /GoTo >> +endobj +841 0 obj +<< /A 861 0 R /Next 732 0 R /Parent 616 0 R /Prev 838 0 R /Title 862 0 R >> +endobj +842 0 obj + +endobj +843 0 obj +<< /Filter /FlateDecode /Length1 6916 /Length 4896 >> +stream +x9kTיhFBo 4G$ axY(`&qLiqvc9Ng{ +lqq:=&ݴ?=G= H;q}w RDhxIEGl "p]Fp]5BK0IB ť#Gbґq<#>ppt 4q!Rbu"jH4&a|,= g{u՟t4;(ssw~uC%`$/!;P,6-Fn.T\uyw[-7 +@<Xp@@7@BG38&1C;a$ A%\:k @R*V)z :WIc`:x3PB=˸*Cul̿/3!>(d>|{޷3?{yVsVyu槷f-ena[r΍̏oWv0?Z 1?\\_b~e0hE®QvhH!+柗7ۘ/GZg`2-xq'<|W EY_vdqyc,`^_f^|ig.f2.V2߸Uy9ç'IE20?>?>wÎccpc)L}y)r +OuOOx$j$ϜN'1ew#CQ^уQ2 +cCA3A>>2 + |zyO/~8wwy +|{oo[i?3AZ < ||@ؘ@ɶ(bfBDӨ\EKgkv _꼚 olO>ʬ +/fuHu@sϸ4=qxğoBPE(mmĞP%bL1$㝱 0aLO*"4)4zͣwPQ4Dt{E +Qtξ+|(:@-뿣뿖_.^=RC`hokmٹc{s!{yܵ5[*+]e{^50fAӨ +9MH#BN  +D !|W`PF`~a`= ɡ$=XV;Xf=.:YaUjoڲ =X9R + ȜTJ/TтRMƁb @Y;)_Qq +mDM3 8Fhu9A}3΢u +DȹIEg{/qo2bf;xVY@GDa`K<-9@> ܨ`b b .1O0y%8|B7':FTPk -!' lNT:E=d/8%7!6%x:|\hS?頀^e r d- ~xqu t =ZW͆p:Ybq! zҳ 7TJ-:@ sbŤEY`} +>0( w= qc + +ѝ  +Y`+)D/Y!n <-!9%ۥxtbM^%#$mSEJx@7ц`7n9l.\|@93Rة~Ȓ?̱z?^\[xF}*s\GJX8qnó <Z#vps\^hE#Q *EVDđQHKf$L"Hx"F-&4M!GMy$Z'^}XgdSY(?,` W Z-(:AՉtHwHsuNƐ}nV/ +WˤDb#ɴ-P߄:TT?UFVbSd YH w*HW|/ʀ&g !n ]و3\T-u$ +"o ^h;jdȉ+)}]!$WFF #GM7T;>UpWVQ16djr,DYn,%Js8hZY A2F%$0~OJ$VIҒL \w+'I)y,ߐ M1:֙->=S!;qq2z2Jv#%)Г!@&D<ѨɩqhjT&$4#9q,s2'~_cW޹<"A1E>I)ܷHXuʹ_FUұ'6%E={z6 DVVZ+IqƓǬ&/-jV%7"/7OvVLE;'_6mZZF`JPYw >=~%dI3am8hgoxhsnべorq2AkT:ʨkڎ/-T}੐;ҔKLgߚpVvB=`)r@?̝_'O֟wWj<[vm%^. lD8)ÚA2pjr~2L.hvk +NU+8!ۦ[t<ʊ,쐺NWNM Up#w!jfE~ R# ZC[HRrg r%%Lփ/}>:Ѹk)]~|!ӳq?V?ܳO?ێ.oh StrwA8T,ˈ5yYO;l xԔ-Ȇ&=pe+4xzJM,*p&u1ו%]u%rܤ_ODLZb2WO4XP_C3:*g4n)_B?ꟿ̧{j2{{>8}92= axJ-V!'%Kt(]6va͢bò\%z8@tNtkl%tV{JIS j9f~7e frJ^e0k?5 T6QMQDڤr^Q&ۄ2ZI/ט@ xuxs1UTL(N^H%#8:$UHedeg2]خ\/ByP)F4vތdpHVX\90%xڥ7CZ%&3T6/A Z !?QTwV%jZ)' UZ84&kQ׉ZJP-rwUbdWZahl|bC8qv"y VR[-yފ?l$j@MAK%ۖ-S'TƘm1u{kW( - +AqfጣDCY\ Ŀ*PdGnz̖+ڔ ݕV2"O?v%R=Qd)5E.n>;̤J4`k}/>RIMZy5OMj6ҤkCQ77n2 vm5*؊D<Ͱ5s\8 ^N)1-1%UIRac"$fsuRR jzlMрN4lm7~+yC|tzCQO +aN*5PqX1gyB$XR-r6|3_ZYbYkv> +stream +xy|Ui{RtKQZZhFK)є,Ev +e_TAGeܪk@ :*2#Pqۼs'-,oyr~{==Kb]9"d,jnlYJzDjyH; p\ғ[4&~F浢tLh]d9I s등YԼu͋dr{=reXgI}0p?bU99j<0XDjdVwna]hAˠ!nDžߤ^VjlX5X,f½((DFYrE!!,bpO%F}hK+bc[" %B #y\d҆GZG_TZ!,h@R_;T@e('PDue (wE9s ZۊؿnVP ?yb-^GO\*O\{wsT-i&%!</S/ &|A8 D 4F">:>!|L\>$폄#K8J o"I8B8Lx:5U+ /^$@8H8@}>Ox g{Ovv<^OL6FP"I㞘,ty({0᷄v#p5J5n];- n%l&B]o 7 "l 7n$pzOt.:Baa-a a5aa%aa9aa)Ap a>0000BEh&4ff=#qdnՄ.B0PG%L"L$& UJ8BPF(%bhB0P(F#B6!)Cd A B:!JNH!$';K <|BēJB4!I tC0f`$ #hO AMFAIPD@`|B?gyO ?ȷe3bߑ[7 g g%N"pp%Ϟ0˾ a"| G8'#OXC PޣޥΎ@x:={Mau5h^ +/э^Q@$ wn!Ff-:l<7n&t{BX+OH#b'db'd:&Oxč_R y=\܉g3c-/r*EBىʓ(O<ʣ(<[PDy~(QZ-lE5(w܉rQnCUjٌr &(cy +,d+X:O0l'o%3__ mk  lOI# r e2 A3D0 &z%hjZ圊+JiPr +ʧ(|ʇ(Ĵ|>(ϡGه,}ߠxYEz7 + +r2RB'PBqKp ciʡB0^Q_8"@cYEOj  5 jxB0PA('iVG% QHBM3ۏ3(?QC9-fQD3('P9q(oP^EyeC(/xQAye7.{y~j|? +V< +\lB aDIANFJBh$ W" !BNH#RɄ$B"NI J  0I>PE9wP~6[(ob(-3,7 ]z\*׸q^#D#VYʕU=+]!+݊e=\e,`ieDN1sv;:APCwԙ_Xykt2#7w+TvtBv C@] {DGXT#!ֶm{ہ6庶mN, 6E1/rPyD]>a8# 8}l y֞y]szfZ2f3\33fLwM˘3՘v]We4\= ::פhɨvMvϨtUTj+ٸ +Wg'=+lB+=+ǜE3cԺQ/]"-#GT$Bi~|̬`lni'g}FN#x @qR@u Ȫ0,8A,r fs2+I3RqF NVO@dVƀsZ*cBCjW\-ijJldWgIAה^nieBiR]77mؒj)w-ix>^tiLs%S2c1Zt[+;K.=eŝ3;tF3 \.]3ً2_մ∙3䟻\upzix^-o7 o,ŰK77(A<*xw +K,[P W$^|}W xT$5 o`ogYP-M< yYY/@;t@',VX  bo np+n5l{^} 븾 %ax @>ox1OSh# Oe܏ևя{=ON'A/x`Ɯ>y g {1`?<cbf_m2cO  + +pe o^xo\kG]xއ?G)|s\u}>{G/zaO|}rGcpi;&ag.9C[<݃y{H3Ny(<7Ob̟=l<H(ml}x,x<)oc)g"'rq1 <<~`t>$_~ Kǣ|#ȳkƪ<9 +O'y:WVͳ:wyYt||lR/IU*徬%3lM;ؽkYMm%u%sb+RVΑleR V=yLRM6kwQ_bi[Tvw+$2p8x>NU& +=t6JB98X5]5C͛lr[y5BeMOocUfr6鶕 1eXp6Yޛ@&|:䰵K!6{z܄RHM-V)}{!w7+rC ++Ť$wgϕ,Mѳq}ε%g#ȳd3I)vʭpnWx:%]cu b#xaIR3ZRduhtû=x~PMK+qq˯2hC IP^Rs.e"cAY琞&`يIy&VmцkY᱖[]o_SlWIeS A亥T9<>Nև+[u}7!XqarTU7fVl^_׬^unn[n[s)5+U7SkczlC^,m7{N*p+7r+WxOQ{]rB6zl#'1h +d3 hS);,Spu6OF So&1aA lsJ$ۋ*nWJ$08^_;Ht_cpV;`R%i刍Z07@}*RWԘoGd$´SD~#Gcw-/lttZԻ* \>#uņU-MV̀ZqYyC<sdE+OK :I⛗!Ս>yY[w{$=(P`tqM&ۖ-JՒ^Ռ?ףŖ?عqj>ho'+=|aB^\}Ajo@" ~ %Ijj~jXJX+߉a:C.^O0d<J@ :T~#ey#Ta5Y,1Rתoh[@ L``WQeet +`eo` VbF\U(4@q#A#I{}W}Yo%$PPU*۰ aDRb^NNaDnmX rFs=2F:?yXޟ /RT{%X-q{X]cKR*4*QQ'\{SC _TFxjEم +^Pj5%Dž&dŌ6 ^ܿ5Ӆۣb/{?;M4xjrF%y۝Fm5؊Ɉ0لgY +$N1DUwr^"ǩj⨾Tcj_*s8 + +S_vfV3<Gsr1|E'3jƧ 7iuJ.P;kl`< >)MњL@1ẁ-$w^!bb #'l3 +/8`˱{Y.*6*v;C\ >\v'l31 nR!]\BC\vRbN\@V2ʨ_!m/&}i+/FqѣZt&JFC>fYea]8Z_rIP7Ӽw⃒‹FgeӜzb^64\EKyq0Ypq7 (O\i|1㔌ehHn1r: P`u#3n]յNsg['9S4M + tXkNj\N(m.{4 +f1PjSܟza*SfLhgv`[Ue":BS'jqOJBcKfWeQPh ,)Ykyᘥ]Ӿ}n9qqHH+>vXpx:(>2f 7|vͲ7VtafEzfe<ۤC)e pC +MбUO;#MU |((c?̃i%S); ʹ3cr8򎐟F3/6W-<ʧ%'BQһj$gV-f7Ee5)hȋb:o_ֻǮ^W69#eR8dz6ʷh +M2,:N4:~`u^V9S'CUB3gw=sLSK&ȏɿZJ؇;P + /Ag+XSk愢 Rۖ=6(8piE/hϘ͆{q_PU8 ^Ye>h+M >gp)10xp>S}m! Cw>vŋ<!/eX#j^\8idcdlrUITMXO^U֪ա T9,|:'$3$2*A x&va?e"oB7ŀCv3y!xXbO>yښxyx@hML wy +CM|$ug6pg-4=L hP;!J;(KU.JW/G tD'?pE8x92811%SnXIΌ ʁ#~G< +NZ86EY*2P̵C5b@dxeh쑘5A( Wyji:槗-? +endstream +endobj +845 0 obj +<< /Filter /FlateDecode /Length1 22008 /Length 11130 >> +stream +x| p\ՙ湷jv[֣[Zzڒߎ_X`c[ca;E@&&&*M"pNe 5xfBs~9(dCՌ฿>ssn4*RgG)|>{7EwQ|ΞzJ?fY=RԵ3yBn5s1w۞y|8|~orc5rR}҅Vܾf|.QIUG'J +4I=~Q64IիU]Y."(JGT +SʉL.XTyd(t(@'(WULjP[ZPU9G ꨪQkp'N~/Sqz6!rr 3g#scS D?9S:|K۲5rΓعLύML ;0NM +khf^vt1u,ܽ߉pl,hwVxp܂\ĽpaV8Q' +&0T~'AXyfƵEPs߉% K"e(}^XZ3`wgy`ˡ~eItZ?snQ=(IJǾo'߫>7mhA%yڗ<\3{##XkU>7/, u1}*bm|O*T}P8YE*:ψjMk*>'GHDY*Z?dNʟ@e%j +M9%/ڀO<s .?/}ńPWʬiSN `_[\VlC⚜5/w;BV|RUU_GjP[W@]oo9ߙ`,х=ApU(ye]QFsoP[q;?C :G +/+*520(butQb?,.إ?{[syܪ^rjR^rR硖kXD@PԸЫCw "Ջ%jbhDCzH^^]XTTGgE/]bC߭IN}c;6us/qqdhzo8r +9(DISeN2^"(sqEڌy%,KK4P ;[םp)Fz8I=$ܣbaXNJF[rKO|c=*۰CE3w}>vcgd5-D`;nKN܍b1֛Zzՠ35bM%s.7{SQMK҄+=B j^_TN +N($e^1mp#PoՎ4.eQP,\p=x/>3Рк@U[Z?Z>ğCp܀9<v'Ɖ X3& nQ?L9t(s]0BnDGلB3 (P&kuNCsGtW,/EmRkcÖF+-.桩k[ ;;{GxU},>xdSSֆ扁DQ + xTu<|^AɌTFE5bZfwQ_Q`.y{ء{iYm*k\5Az9%WzP?J@Z +8# +Myi66-Qh46Qiin0+oiY36CqH(]UpNOJӧ+`K>x _k5-@%~g e 4|trXs vQmuY;m}1'YK>9pz/;j9)SC"(!֣n#p(ìG7>4z(㢄SwPbH ~0Uca$E"ƂPPP6ӑF kQG#N55F9g"DN @+JNJiF@WKy -h+yp1yXv積( ګ̢Wy5FwJÆנ0;e_aF_aF_a+QJ(G|` *F,t=",ݺn1bӽ6XuI{;/.b%g֮;6{w W6Gj{+qe/,/[1{<ęAMh\s5_4KC@5|Ej7Rx6Jj'KTHa/L}P@EkbD~)J^C7ooRizVno &Tiw Sp45ֺrrxP[´`f[`|q)[h(Z>Yip0S%Zw/ӎVjN +@8e+ $Q$jD(_%J-8""cfBrܙ ,MSYfNj&;(e+i#5o A+m9ɨ =U+K/ϟ=X5V &t@c smu =c!<&*We< ?9}ė_9!sB]Ÿ0g܁E~{$& r5r +EOmŬc v.@BA&}F6/r̀LY'Eꆳz}B,0,~V#i4( m9U#]Xd+U\ۉIO\sDɥŅ ĺ?2w{.9ysO:R!ȉ *(⮚\(IO |F>Ө2UU"a[frE>{BiׂN*A ܦ@ jby3mV[nݵn|'9vwnrރX#y[,Vpz17QѬ訴Bb )5x]:x ,]s\$%PE,[*q}ݠTB:0 0GXGE@pѭ`MD׈3/FhWz=C/w]U]h~1C0W&BBB6161ҫb&L:-^fϢ*E_!ͲArQ@JF~M(^7L2 :qOޫp<\t;]Gw޾cz}FFvwWUuؒ: kf !ϰm۬pzK!͖yx_B9[p::@9 {E' 2Hn~NQX穧Pue7 i&& [[>ts v 2ŗC%0栀JZ +9# N#݁;U!T$PzQ6i@&m¢jBUyvb,GO7p@:_KJqo)RxR32]]ui z<+&:V1bJF@sQ +^_+ k]͌S +0`)92. 082 ᤄtao/Cwm~Z% 3Y*/zQSa }ބYEj.9(VRduA:V@U>l,6l6\UCzn9Sb $=6tOo{ta[xEyCK-n_ZukQi s׎*dqQu!;FSUV/DLZǮ1!G]#a56CGNH跜٧#+F ]յ\#AF䀛gL?"PÅe5 52Պes$I=dR`k]>@Igd' +8Rfn8w,Il +DH:bb<[aPm%[VblV6[iVБq'dқ2r"YNh]Ws?#ŗC*ʤ𫫷~|}+ 2jx7<WV][7Wtm/0|"H^?:jKP/fiܨ(o:āqVpс92ǘ}k4!4&50fVBuI<$}=M%mV{ӆ )TQ:;<jMx(1m8Vں媎#[[{CmDNx;w ~4MɼdGoMX8*Js +5C{{FpZ`-q͞<-#wd/0@WRY3^h ~ *P(4U>h)1$/zM"RCǤ ㊉_toyܺusۛmYHXꛜlh?`ET1;~7Yh`!L|]ͨK D\iZ '^ZXرL<tP#+Ɍnɤ:h6*rE\e"z`\}@ s=`GK0IE_2zş$^hIV\cﰽ9v C 9ޢPMk]cYYl~SKs *ܱZt=&4-tс.2=Э3a],gPt3r ^/%]-~:tF ծS믎c2Bs18#`L82xHV>+ (Dbow'8sΜ]sʡe͕]mb{~i}>tCk2efpuEMG{ֻ2MtӏCDX%w]+ڴ<J 4V*5LCS 1}L2J N^r iWb 3\MBGYi CqVh 8< uށDu97;rN^(2lB œƉy +Qlg/`e22X=g1s=8I#[8yD9 tg|Ϧ}v ^[-Cwkt.ed{UQgZW][Ru!>L~2>Ɵ>6-jsPr:СԪ1vOYW*ȊIc !CP GC8N1S С*%*KM\{ͭpretM_Ua"z^ۛi_1#pˡ4#L =wB(|wZ -D N='p/=Hf'4Zqa]!TI{2gsLĮ-9hchr̮1r@ud]!{R'ZZ'#%et#u~؎TGjWmlnjٸ>=tDp\0ħlvVdDkeEq!}IT+/j."L'8 zUL6C*NYݽBpƣ`+gRg)YJ rRȳ,<~zJ.Lc]BAx 7B;CvpwzvG-f˹;/gkr>f)\[>|3|MF_L]0gn,2UaisNP t2n:_B*ZW 赚8 xu +M.~?s25Jk)W픷ʍMGYﺏ{HXr3Q_Կ>MTjaֆJ1W2I^p`Q/d$F: +rZSTs!:g& +PbbJׄP6HKD\@&SSU1 0MӸ+Am~9>_,//__ fc68Y_P_P_P;:+*ՒpV|&c~?~4!Z{tS> +uH$.0~ay\\B T;;]7ҟ]aIOAz\TQh\Ͳw:Œ٭]ٹrKކӮ~޶XkZD~2˴b8WsbNQej7ޏ$TmBz"5Is9h2c%2:gW,ܷgʬάO:6=N)\m=:}Vϖ:Gw'.8RwF92Q~]?4+6l嗭΋BA}( PI KZ:n{kK󊪊jӉм=l SѺ.;= i#v*D*Ck4b_6 g29W`Zj.5#]je+Ѡl-kk`C AOr8)ٽ)2>GtHiw&SrNg0..h5+nՄ4CIA5MU5>nS%$,y؃Qtz`VÁ,cinq%|/E`qG需N9YS_NIrSumeM +}>itӸym{KDQ}(R?^ۼ~Wcaz[n| =ğN5jSߝhk\j]ߦߴ2_3#\OW=yU.]g2Yw)s~+VE=n eE\ +[2毹S +Ub־BkᄑK^oy9hWꤞ!#)w6͙`;B'E".8)Y9닩`&ͨ 'N/Ȭ$2,]K>1kei6hdG=AMi>@jXG]d̬i{xw0,ovxۙ4jjK!K+ģV fI0ɚPr +T*}&њ7xF$ h7EҀRs*'>d=Ն4/WZi +@3WF H3CyFBgE_y \ԩc1(l}E NWg$`ղ(L}j[B^(]T3.'ڡ8!/OpDFl Uo .@9ed6{fP?/ߚ/Dڦ_5cVDBj*`Jd=R}{ ]KMINa|׋_"50pƌa,D׸\&2V&ȕ!54yS|ی}ƃx}V֝,޴,!BKw>ʼŹmWEO%7(yԂrk =d؋R~gBkrR#5-1sۄ<ҏ I^ $xBV9u^m= ErdB&k5O/L"{HxfL xY@RDT|q_9g2k^om}nʍ~H"o Ȉ&dwieկ-?tE_p> >GK>b>< OWp*=oB@?'k~"4N@}Sjq;9#fv2K@ +~(5? 4# ĹST4PX3r(b^t\@;l! Hӧu٪*#s4{53"r hx gxn=玗qٵǫK&9~DQ~d^ȼ%G63BFh_gƵ$#iV\qdGwnY]5dr-`Fx{|aN H1Q˳1C<^N6&KKʱ ߣAęށmKE@kx\yQdԇ9 8eE$e + =R.h9 + +O.̩]V:l6Z8]e#PWJՙ ͞dp=mbUyU!Ds)$V\pܳcς'y(((Pjb"A҇j+G.'/ݽSG HZQɭ"h$LRi;.@@Fַ]5n)+13~T}=[To=l.rhjRf?sӱ#7sLn揸?۱D +endstream +endobj +846 0 obj +<< /Filter /FlateDecode /Length1 9380 /Length 5521 >> +stream +xZ{l[y?R$EJHK(D-ɒ+mڢmY-2j/45%M6Ӯ.mzS;4U543 +,A3萮 <֤%Y͊|{9!2:xbmLhy5Zێ]_f3k흌ypEm(:kރ6t;k/k\Gm~v ?qHy>Ym'O];.C*}s k iy]־7IEr7B :MknRWA~G|=<ڧi>ef!0[" =b`ZVJϨc|hfn 7u19*[>7I$TFשiMP< d|H*5|~ $A&l$Jߡ5 *>>6ߠaְuMniVYIÎaҰkM4.ҰgMC%ZhȲÿʽT`nCK] pwlZ1  >"XXp06ڐ/O٘̆sQ?xa:])}~D>B-o^l`jgЭ=kBk_Ok`L ̈36fgĕyB_?&G[<_¯Tw^i:~JvQBkt?_ѿ͡moL|eid yX0BL5R /!?SnTjjPh 09^PC.z+0 LzDxd3…բ,, &ZL\%e##&"k:Ʈt$T5cH~4,|4ZmƊYgcp!t^V`W@s6AM, +*&A19cK+ymЗ r[VL$`$ 92>j1ZJTU')A(`6읣ќTFʏ]^o6Y+ys`.t~#Eg_ٟ Bߞ4?ܱ.Gz˟(Ǔsw Y;fP#r9RVz݃?=~b)]ؠJ=S̃khLLe( ՈjD.5QF(FQ#ʨeԈ2jD5Z2ԦzLaJMZ|am4VF⦌)c!lWff'\ihh9-.D?-N\h +Ĭ"B=uxyXANopC`w쮃uv:]`w]:T݃̊З/3eЗ2C_f }A @@W@-Гzj@WPb!Fk׬ +T%J&UQ?}sWfx_ӷoڎة}\|r8'dvP;r&dߖܧM{t`轓hc(!_*/ &bUūг ^Yϒ ZZ Z Z Z Z Z ZPEdfg 0i2 |PD!v\ 駖D4_1WsZ[[[AdڇKǟyuS O[Ŕ˕KER=9KhûOz4'zTwEv?vY0%!ВT,Љ tb,U@@g@@@.(wOieki< yJC+#IxRSX?w Ʉ4mX ;n*SЗ>8;{}[K˙9:mdX6[bÝ >{fO-&W4 '&ݾ`@XQKnBUB}[X!47{^;dEڄp,6R ilD]#6}{ϣ~IVo[]Bo&:[)6B-hb hn֨1w* r= r1,'< r³0s³,'<){,^!ԓCaRJf|ЦKp.(n&ׂ>aSY0D)sRJnKP)Lx_2S5&R7U#` 99r"N>߁ +ԼS<)x⁧x)3A> y +➲1Mbᴦ&Us4ൂl%^k$N@HI!I $ $%>q3B@m;)FpRe`7F&QK&8">JIIx{L(kMZ[:қޝ`ƬIBGTӯ +A)(F ɖxB f݌G/P)b+q +F(HQJʦhƞ  +U#6[nA)~0GMF +S~ ^C>P)RgE/m.)GίuuV=Tڰ-0`>UgufIfݒBי%4Px(#f5J3˽OU tj9q;R+m\Ҍ׈_}qM8_#65ZV[ٰ7lӕ'%[c9(e.t׉^APGZ;c>S{YG&cpQ 9\z݈ZbRpV6R֭Yu8U$}HS2 ,?!{RmQmՆÖzTzTzTzTzTzTzTzTzTzTz9:e SkC$n{e|lz3Q +J g?0?s!5ruORi+=*F''cɃ0 _%_&cjWV_jl7`S.ԅяQ"+GrZf$u5Ց%My@-vӿޚs?g^-zikrH+'i r@`Li 0 4@`Li JCk5s(j@ipKz5l`PȹK)V7e5iGLǥ%oj%5 eU%ˋSsSݎpJK/wmFR[ fqjt35n5nvFo6/r%(T@3h)sر=,=~2S;( +bo?1K<+o`sso*0,YAOU%)O)8_ErPΥH͘n. +sq$ZT:ӣ!d?dS3V[ה|U|BT% G?Y(~A@!1XE%yC 1,wk6XMP6ޥšm8Z +}2y{Fç<|yoiYo6v'9ϱ^۰[úsv:Z&vPp3dmλ;f켛nλn6o`mЕV?~q\17zŸgh]N-= 6Z} T+/n +ZQQ Hc1{X>:Pn evVF} G1h D b8tC?};U& +endstream +endobj +847 0 obj +<< /Filter /FlateDecode /Length1 3988 /Length 2293 >> +stream +xWmh[~W+Y_dIWH%YW8ر[8Ҋ{sd4$f `5 M1! X؏ycJa0 +6s]څ׼={=/^zL:4{ǼW^Z>?󧗪O޷T,g>}6g*օz.3NU-o^XO"|vvi-isZֺ$q:T{͠ qͦ]qo:!?a͆5)?y#hiy5U?77oZM7OmqJF;(H^jD-{aqP9t$<۟IR|FhWAyyP8 Yt.vjF.uZɏ`v$!^|@>J&|cG$ڊ䜴DMFV2q+}턡ezOaj&/ +LRmZ)678Q!BD'`h=TTJ2>" secDk10RJe..}*||?F`J YV3%b)1! ѩbrW*|Pʊo+,J¨jJCd†bhH A )!Q*wMbb8LB" +@Z(#: ҆ R@*=Lw@jG6i!UmeˏAرaW#IKC5!$7?Lw C^p6C2m-/r#q'Cly )ʱd'zĝ#ӑE3ɋ`4h=NUT[7qg׆3;8 /\B+g6g_ +u{*lf#I(Uإ-T:dW6Wή`vDN{.I1r!(B\DV!:&=},Ι>45mjh72)A! CnAnCއ4<: io=$m̚F:3]n-V+ j'WVZ;qq0p0'6|,gvHf?kM{3rW{wZsTd6s'괎},Vq=JZx+Rilo[7ka腌CJE%5[ې! 3ֵּ +"V+q`.{q[qӈiM#nqӈF47iM#nZý޺ׁt nnt NM@ /WWf%s\ m-,hJ_p*+,Rӱ}c_aݹCeoL;i87;ڝ?Ogc]P)۟:#6i &V+G: 5\xhֶauZI11b$׬̥t;\l&b= Wff^Xlx ' x6_z{ 0cPs.8&QŜ +T09̩`Ns*S +T09̩su:tBp:Z>{QF^OGs?g uF +pO{\Ntȹs~:騇HNILFmHK, +Ž>9Ntn 8w6c=HS).o܁s*L{-Iв5bMR> ]DA2h@YB9Pt+SQX#f.[<;?;(~7e0b}R4QLl|/?765g/ WHWɃCSA +5e";V#w dI]x%.w K]x]ɻѣՕ`%Q. Epd=t^aq-E[u xgv}{Zg(YNI% vw׹`HoKK_q 53-`0~ph[I{~3ż);yx"vkX"bً;[b&֣J&\GVq)ڪg5³X׏v\Zw}k7ƉRn8yS+HG{6ߤ.]»}NKn'h6,lH<)ϼ4xj ѿH +endstream +endobj +848 0 obj +<< /Filter /FlateDecode /Length1 1144 /Length2 42889 /Length3 0 /Length 43673 >> +stream +xtctfQ-vضTlyc۬bfŬmۨعNc55$J "f&@WFf^1PA 0Pv3__B(nWr|3v2Xyy9Xlm P2w5wĹnvf*.nΦ.1G/gkK+W& 1qskK 2;MJf֮(@mhalct`2MTh&`.p虸ߢo@-f,͑Ihf.#Bp,]̬<̬s'?By9dl 4qtpX۹Y[}\|̬M]&oas:[{tYx{wf@;+ۛԥT%Eu뒁W)=pG%c- u ).+uG_TC)H' `DT3툨4͢KK._f6oIixA&hېx'Ie(0w[Zg_ Mn?;[ }LF>SˆΆ<_>Pqkt꒤ +X 2 soY2c!we>ܼ.^WzS7 n0De 0͇_ڢ(yTtBn 6ҏ“L9ߝ[(fߪKi<\9FJH,,~&X"3́-2N r_aAfiִ",Z(&׽M|;G)Le2&7CEL᪦m|Kw3}o~G`͵$1&d,{Z(jJT%|j>lJJ^D!RN^x-vUm\I1Gľקx 1p썩Piu~^ޡ+ 85Ssd2VCI-ޛ*eM ^9?X +V/[H)@'~O=F*f^1qNBU lT"E bpX]I +<"s ԛW=`C8][~taNJNx٨}HZW4bLߤϼfZP,7er1a m2p.N2d2e LSH)#.$%u]U209P]>ydDNR~Hey7hg~m .[oa)=KO]Z'W {%T@D\04Bg+a39Rg@[T 4zY>fPh\.(y +( +1G"VbqP wXkl}딫F[uEG*fv#1p_UräMS;ځs m1&iǯNxr+eϱyV<ՙ9OD( ;Il*T^~b*NT;1Z?L^Xʾ!m,p^䋕JPIf`ҟ B6}vO Nu) Aj֦\߆8ѯ+]RTC{ߥKF ړySH#sKs^2݆y`*Dvt,!N~'&ϐ2(k S>8MCV?(b%E$#2FۡM™ͷkej<@(:9v v f0mۆu=fW6/Za:Bpm|mM:6% +?ь7d)Efd$fP]kY5پ +0 A/$YhpVCSGHgB8QrK( F +A4@O!?|om*[2\(QyIVBf& lYTwĝL7>4zL6?NlJjl[TCRRJI+Y;l`y1h' f3{CFld4tM:2I9t6'vQRDKgxKl2 3z"LaIkmA8胺$*X:4K$(Ǹж}O<{BD8##}CQeJ.O sM"|߈B4 a;Lr"F byG<2[iK1ŧƫVK| 20(~tUIrYvmUngmz7vt<%Oem {Y*c p +!M*8W×jPp/OM+֊do24] tN=L_J7סr) TEhTy8.~Vx 1-M:9`dbv#mh{u' 4Hշ)+\)NXGU1ed8p[-I(l+N|`n3Մ.`y@*6/c"&<[S7WXN}Z +pӺV1a66+r"b?#`y}/w2; +oe=n +y ,<7uǕwdII&~4,};@SVǡFl[L +<ΌQQsP 5H^F9_4IYB +N^Wgxg8 ]!6㰻|s5 W0O̓`S{Ξ@*%(|}_c8}:H&,z(,B󡆥kRϴ +թ,u!E |9}mi-UqrH^I2頞1&iC|dg(Yh&}<:nn{{8(i9\jƷR3hޣazSjy +Ӳ [^x3ډ +C._G Dhlț l4j.eΩ Nsxy! ꖤoApB*t7Tshb$!8>D^+(=.#xA"M䛧+xcˠ +^֫Nq +SPy7`^6ou +*mDE:%A͏ݓ&Tt5=c0zgx>raD0EB H8jVbü<=pSJu{SVizHß T0JCj1NpP 2wLXW&9MX ^[$F'JSrc]_(UA{^F^݋~DbwnJD_Z3WH~FpGg\ o576~|9Zd|M w F~V\pm8SxK>t%ANp+ZX~#C1h'p & (}r0 ^SV.VCuA܄~؊CԻ3Ha;K[8˪vpx2QsaHA~g6o 攇|y ] +N;::)IVS" (1tJJ{R)x0d*F)az#7Rjjf~}oP~Ҙ $;x1jS` qon +k4xwFVRkLVQPG +4n$1o:>jh+؟8sqJX̧VC&4/'$>ibdQ!?/H8ܖJYQX1ݼFOsh4)Џ[{^`l<8ojD;/`MS;i aHxKTq!/uQb@;N, lzָKݒ=g8ӎB Er$9O)J**Na[ ;*Fkd3mPQOg&Sx:Q`ߟgU?"ihd[j2@%Hjb7ˢ_΂*g$h z +W/M;N$[S3RnwM+'e֋vL$,Fp"[s1ElEێKt!_~6N+P Mx{VaLUǦv+^|o+X1{RX /0~9]3%}B +R̝`?.T0ߵ'Y)<3/Wu9rZ(3N3侷 U,nMcr{qB~Qe7[W^T^dku"-*Y݌ =RLK)l <|ClO>ռYr; BC̰:>sCiSgO,_^)6Z}%t4x\w\$_P5W݄+BU% `f(/RH`BRgtps-f<;¿DFԖ )ePkzFdKH(XG!,VzV|!lxk8pl%g8kP%3{o{6!I a3ZsVrZH_xuF5E| HϞ] $XbHe4+a`Ctã4W}0mirXr5Y{%:u4[ |dN]2α&_}(Svv zr^v߿xx~4>ZL55Zn DfXwY8PxûiKz6 PhZ]EȋL:˥adq>G_K$4L;$0(Ja E^GCn4UnX M?i]q.QZ@ K +ހZOy"!oBZ }2 S1ʷW`0~Se.i+QѰ1Oxjx=[;M?\4N\uqvqQDph?rRzcSE`dkoxkHҌ5Z!Z֝|g #)?“$ih)+n( ȵ`DzNѣ.H֕o_(n):2|+lp[`qgJB|*_ |d(R=~T8b'79KT1\aY8fMIK~Htsz&F>&ukf6]HZ4d !0'9_48 W_҆d=}`[&xͦ=u&K p&Et%F#0z u}q׍6 |X= n @#sN@gV|k']UJMPZ7*(xlssp}r#)-7!&]L+Kn +#bPjBQ!iV8\a]ͫ XM%5q$.َ'<<v>fycco8ޔ):ʱjrg7g|Fq#xxB?B%\m^J~^]A!k#VbW>"'lnȪLFU),Wԭk&#Z^8C6h\dݨZ`zAbrAl1d'e}CdbTE*d?N^kn⧭I$0$Y6]ɱ_u"wznT؝em`7aVrrQ=K $Hw 2]=elO>R!L=zj|*,M8Lf1Bb-/MDI:p'SoG :R9<לF1Ibr ]ٍ,ʝ&HΖU"3+*2C\@AŒUaԵ.c[t+/] RX7mݷWF9XQ ;%T9l+i< Zu^livQEˊ;d(oͻ2tA='&A!*+}.X4j!nUW_M>Nd]յ"x$ӦQ}cStݰcJ)9f fY5\J+fKЛ!B)7$d,no6,pO':Ђpo\p.()ɩ ){H݉F;z6 W6c۱))8~BKbJNd8o(P +l㡱U:+!uAzHNu>$hɖ;!aO% +{@Vo?HZ{Ҩ?k?kUQtᬙIiu3[<Wϙ|2Xa9CmZ[of b.pA% 㲈7B9lt{[%.v =+lEE מYr`-ObN9_Uf dyhnX}4bAC!Q7YޭlkXI,4U\;|Njd ر $ƌ\/8'"LB%D9jA"B߱ p*{f ~8PS0(S?bT@`1PnSOC&eyt^i+2;/!7ƂeV s\JnT8;=P{GPcCմ +C#A4VeӗG!xtd2К0 %K/Nbv|^)]9@/imm+R[/Wǀp @MɵUնb\K-m@qMg48 QäQ+̹ . +S=<3Ӌ-y= d8_K*1~3Gѹ8tj9RdC2%iaBPH42<VGC C%lߋ6b~A>g(,(6% ɻ +JOwADFq_$O FTaHB1ѹ.uv:DG͎"8ۊ4F#xaw9r*p]&bΈau w:dFqb?U ?4X*1%7td ,9neMb2+//sTXbHUͯ iq꣒,1g;gI ^se2mAY3gۆy^lܚ'nǟZɅ'ѼrQ,ƶ -EyJ(ETb Ueub'v2y\Ж]ݼTQ[gm6&.YM.rFE\!w/-tնYp7EӼ^W>,5crg~Y&Zs+@gk E†KD0 +O<|y.ыz?#k^toU=O%Hu^;!5vX_J2r0I-%2 z:Bh^zY-6@~3:]~G֜o82=PKK6Q@*A{_/C2`ql>8N %@@,$+l`$tlqW[ze5``۪_%J8/IFA7΁8(\I~Ojں* [[v JL ]-9-FR/ k +8RS@8Xm27:+Od΅-w5fW k' y2KJY,GG<«<5HLj4|{IEzHPfHeOoFᯎdQS&rw +̓t-1W?;֜h[y{یA8gdD_ [P>YZ/JG *s+iJ%$sv CTN+섕ONbcpGUwĤå+G"ReϦ#][mbI(>v-̈,&OʥK,A|iW^KQ+.jHV =j!ƚ e:Cy撫'ؽuj!w۠>uo #Pfe{B^o~,pOvLqH/ecrrN*zJXv^Iqۙl 0q۩J|m? qP(i Is#EoRZ7HPP }h hE7JyO"ra(jscQX,uRxa4%]2t-N&TQ8!4(Ρ]TqoLTb_g;{po9TbS@<C&5;)FhoSΥ*q0``-j:#X |XQo[狝5H7W͋Zi$8{H/c᎚BIr#_2r4s:_K|IqN.յ=|#Iv8Jt XXLdpIp(K&Qi x)6DֽklU& L9JF'W(t75HT|(Mwɲ Yu> +s"o18>I^_n%zf?Դ%SNoZHcL#Q*dщr7-0rZ*9Zދ[&M~BI6=wN ӁovvC=eA_ʞ:Mm==?p+P7JC.;V |ۤ38h`Y*tHHIR&DH݌3S8CEcd\v.=V+䆋K7L~j\\nۀZvԋ,dUr7kP=毣|.MTY;$!$6g{.]ݒ % P6ɒ2=:Qs>,@}uۢ{|H QF;l6=u9r<.O|՜ Ҩگt!MsXxvD}6EmT>st>e_i;Z t#oEee`}%҅VC̹Q5B;Rx ZcE;[LN.o^h!:!QziW$/ +|"u<VuN$<0~Ruk"TZ|1Uּ5`O)`Mj-%~G0kecb;Ʋ3I{%</qXp/wy1]K Bb%J*()c-i(βPBWժ#ٗ'l  4rgrD9o 0<~hCD̍zM߯\4;įَPD"s+!b\}$|˻٫Lqknb^ lR>R6Q%yp/pT5(F]rHxC^e S$ `[exW*s*Hy3W\ @C +c!Wkb.lv`YX]zrka _43UuKCخo|TCbtf 2A-r* `?bHEySI'i@3 ;6* +}=ǾY]Iy\*KJV^yOlR-F?sNDrcBɳ3%Zق9tajYu4pPQMFvyY1q830^i +.) +\CdN&IK eg~QuCO&8Z13AhqN6šNܧFƉ+_RSpn 1R_Bc@0;f_͓o2˭x 6Vud~ w[ N֞`kCՂDžО?3 B0_lXV~kp<ϙaE%g*R54vyǞ{ 5olBe讈vNh={τ;K,hq'm=&|9fIL|PA;GOvm;eq{ls_rSVxL=l%L]⬂lvn Hg3gN(3V@n wJY^} ޞlG{`_țxb^  Ln\~nF{ÆUmw%IN[@Qk=g?9heEv2uF(r.(jB7oû7ݚ1&8KL}lg\+4R(H0%}~879腊bCɊ3s_39uu1^5'm_O`n|ui~5?-6817,(2*]@17϶-Uiq>^Ct[WSZK+S_XRSuSS6lq_@ 1wJlvgMo&Nq1D;zrAɢTZCTIGRa +1Iמd0]1}r>Tg5Rp3t䌛w !VM?i D?/{Su $[kl$O#D+Q--hv L# ![ }i] +%N̆ET?]0h*%tFCXwCgRtw%L4OH)CXec?vٹf sǓ(>d?^qG:ODg*7Csn{o'f"uߠ ~67]^! 8Zs&b'VyܣdχRéODXkM> yEi֞y ͹C+iCv [8Wѽzoc3g5!jc[<;c0H6xAQ0t 4Rw]ſ9ylЩ eȋ~)څ8_2LznVHi'?Bkn5@ŗp(x ]pa=?@Vk7Ia2v,.Y<9Ւ8a& +8 +vsO;Z!$L!Tɕse-L9:HG BXb5@8+}eW]Y +%xW_}đV8 &EPYa'67G@Y󔰏ĪJ)VKs7yzց`HBƙ}z1zEo2bCZ4%$ч}׾1Δxq]ŚSZ~?q a.O| +-'}q>+r V+$3%їr̳nakIԜYec!`m&ބҾGUBd"@ݴjZo,mrԥ 戋ʨUf@?\L#I0H2GIqkϵ8ϯA=ܢk|-1Rܸ(Yu<߉k[})oUC7k6F+[n{̤e){w"^ʠe-y:OS]$=mE,Scgw?҅KHVC+1X xG5`@lOJ)қjdI$jJ"#OCY{"8ŝ|܋(Ύ߾5QrezBQQ:TH[)j]C8ĩm}@i4J.3 "VVa] ~ΖZQ{YcI +x + ]h2q<˥@%Ꮠ;JǁirbYY +kH /E "yT9|CPȯl݉%S3K"L{UY䊱ĵQ ++eR4CD$xhTsVJ34 +Sfyfc/?S~w˫Pӟ63B/db=#V %A9Gl Ձ J,xn[蛅rhl0g&XDb,u%Ob&$v[i\3Y3)N~b\$gV47YVFt5-Y]o P"DqU!ڝC;RwewRU *cKy9[5[Z@-vVrΟ^5$2Ni R?@ՙfO]aDs~V Dlito!>|H\g.ֹ+qZtWBQl,nu D哰Stq$L-WS0HЖMw4-!~O~54Xz):l.vGP?<#N"Re3p9͒sm{YȩEu2G>(ټ` bTX +d~N2?eE_?:F#QSM%dᢗuڧCӟy8D6`,6 tDs}Ҽ% + +|{N'eyr*X;p~~A7Saqֲ![BYFKĨ``Ì_M旱jc FH K".$M;nHǧ#Iyyw-b2!Cٍٖst!,2w%%:4}Ό6x',Gs7Hr!='>UqHk׿mIE2ְ-\VQ PfAD^)"1l8ira6y(b]#+9 )l\y2(g"{ vITiTHws*|e=Pn 7`HYʔw;0L ztH5?tn JuhN HْBQNUnf#&U%Ga-8?e ϟX +bz"W6Pi~gxڣTe16;͈K=*:STfFt/_[@qۊ BIVfl00V! HH H[za%;9[+`.eZxPn)HCVM^kQe7O*.лÜ hNf=+:3yƙ;ߑhW+&aYcaqz'IP6֋ WK?`v,l:*BjtIL\vV 6MOD_ _|gQ0"0tܦэ77Tfvfg@`ZkbX8ðJKV'A]Mےwؘqz䡙ȎMl,(>f>  pnu +9 ;\_9BwR`tjyH/~$D`ofO4ܴN lA;1x) +H9ET Ae³+gZͥ(`s5DMpo6ShBU + o~41~WF0,f$%_o;xb -R]*f'{͍t?'-s638l VpT隇0TZCs7JCq~YjJqU"X2wZ"D5OsOH%Ypլ?T,{hwKqΨ=wn O9Z#8 ֆ9{)3 ܦ)4W#1O[x"V;Yhk\PK%޿KPknz;k?|4~=21!.`}ǧտ_ez5$S5~lo^%I$LA*qAZ s^Jf)}wҎ{-*i0XKj+g\0f1U+J46͍~gJ:w=YTQ::uJT FM} 8wbN.3bxi6#Rp (- 3i\ץ+cM.f dٕ$K)bZ`x9&tľ ӂ0u9q= ##_kb{cAhBvļ ;s1I%4kZxYձUt)v>,=Ii(FE.bm{b !`aJX# + ;z_Ne39eD8>3LIc^-DC4\А^d c `ʨ&&n\h{֡a- A1cg$7E'auUꩵ}D07nZ,R۩Q"#5P =ي9^KYi |ڀI Q_NSi0Dc(mm(+RY#lZK;Y IZi~ߨ D?@M4<`3 1OTN]-G[ov&_jTAPrm"6Bܳj@KW5va I_ZmKV'N>d]z?W%r9PK- h>ފF~J!lf?aA!&1 wHRnR10|Zq2z)P8F*2,H+Kygqq@yX ]mE1. +S;qح.[ +Xxq6Y 9) 6'CtivӫaOb㱍zwtMM>rܽ@OP`lq'Ӥs:j\E3 Kd_Orړ6=Q'>&,Ϙ8(r f~Y|:4CqjX=-;GV NG W%whg"籆#.!Cw'֣i"7:C;f8IQw=Bi]*{UJcht~S\|NyN1PD`gsUtzpn/sўfP/gPqׄw]^=%gΤeƌka;T?P'Ko5];gTˤVh正0|0@@wJTARynHfx 8zٵgPVwR:|t~{-3$]|oz硃@zT10Ͻ6VL1j- z*贮G0HZ{\+iADg#%neMc2QWv-9mtP\?g$_Gx +[_9Ub+D!1);C@. '">]$RM0]Zw>+QJnȆx]OXiJ)7{εhkzo8ÐHF]{90!v4,QW"iqT5?ydL3 Hn|`؇ex?bpIEEWcs5?|kE]?Z1"]ǠF)ȧIJ\LS⽤꾄T.dMdvvcyW C8ESeU>e BD,VzUJzzmcCPWI&;/N9#zg ֨LX$s'\>9$5 -PA,JTV#uH0ܺ+2}PM.DQws.ŸVvvg9|HkР.-4ga)^15 AYjư[}$/]^%T0Bz'~='R|lA]WĽa(E2WL$¥MMO}˵תqbtnh7J)ʬx ڳOdFS!H|5ZAsbBl jNpX^3ѸF|9]V|(x:CSZAkjKCD}1WAFX9ՉEشT&Y#[l"#} rIVq4=r,Vhܠɭp}aG '3Ƨ_h7ҽ/hOV z\zn_AhNp˥2gKV$b,E"I3 8@/H4j"s. +T1"QV#:O(12/Hudb4Gj` ;F*S- o俄xهR]1qxPDn6q,e\N\D ߎ`doxҐ嫋-V98zYSdOP_ij˿ ԧ{Z*^eIsno԰p$'BSA2ghWW4zIJȪf ̓T]HZ+y+bVSL'9Ѭs ˁCѹeq1}}l*OV ΢+ h>y5C>vML)Dh1 o`Vp^4[ۀvwS䝏44ڪަWujXW&(N|\E4TX-\98RЀ]4P?vyV&eyu,"(rRrۗE{ٽz}DA!v >~ˑNs%b4TD#\-jظ0+soRdkLhaKL<6?7%RPZC\ր[i&,_+|iqp""' o; ! -EI8J[A45DխI`ٱ&ޕ|lK"`aWN뇇]nwWNxL.rx\lE߉>51GAl'&䶔A9ڄpEZ޼zFvpxZNzvm]3bI/\&ߍ.zL=L2'N$P.u7(!(RΦ6Ĕ! &4 "Onː"Brؕr,q~,PUj}lЦ9I4R~XL; 嚴 Q@Pzț+)FMhڰ7хJYwP\>CX6KAxBZ_I@bmc^Y+򎵿L?c˜=Ak5" 4hc 56KP6 ׏S(WRAw\@y+wM^8LNo:*;#/ ?y-! w_Wzpc%zh5A0yo414KOK{'kzU_rR{IT>W`Jtlv\Nʓ着'Yaʀ 3}+(/7V1U={in5n۳ t[+|H;*k]A GêҺN$߼AUnQC+l\ 0ݨU=&{+,gDأYYVJ#o2zAd!/Yv𹯊I>8{l>{1Lҙ 'w5f&SR^B`ӿkPvP#`ªW!MItB3<A#ƀ@s_dSAY2WR¨}x @r63:@Fyd` LƈI% 6X;`PTE;LOw`Բ4OQ%9~1Q W7G -NOю3YGzËA^>|3GdzB'ij"XO dtīa>sюh*KQu>nM1ڒݱuV= s +HY`ENپt聳}4Pg;ny.R9U%y:(׾'(|ji%]a5r*z!'sBl8ؔQ}m-JD|yGAU%\'&o"\=nx?KQwN;:H9 qGXsŌA5LY ]+3(Z-ZXQ"R7y$p= ehe' +Tϫ?ԁ9&^='7Qv%4X% \oV=67WhC2z෬o9QgUd1Wo)Ǩ<)Dc/Q m&"pݼd-,6co,rp1gy {1^(pd~iօvdoǿ"ҏ)Ht8S`0Gq`iMzTFD[Gc)X-ov%jcJ].F#h~zڜq«^c'K抑@e*_dnNУ7㖍k`hF+<0˛.1 ؞GǺRs]nGJsYU $*4V"]"Wy@-aJI,i?IB&]Lj+9]q.A*msePU|||H42D\}Œ1WA蓔FXxin,<"t .:Jn:pp_]0s= L6aio 6J+]?D΁:{(K #*{?Zp9dWj1 Gjqnz +h|*fOX󌁛KFjml,+xo6a4f9 }5(a Gށ_/`q1 nwLa~:>_ +Cg8jȮ&]3b Г̸"D02jJ7[AK/H]~p*@J&?5T³DbF88RwrȺC?bׄu.ٌH('IqFa@, +o㡏j+M@KX<ԶmOܳ {wa ?U-L/e}?{Ŗ8cASeh{Fs]7CآJ޹eHp=tbV{]8lCfddAn"_LimirF7mVՑ\N4*}|X6 q8Ǥ.I[0UgNv1'I׏"Ɔ/Q %#@~N2 y?^٣hO=1x|\5yYRCN6 DeqZvMTKp52^֓.;^RTᠷnl:C 2\xr:G(3 +f UD3Kgee#eXѧ.󴭐2ǪR +̈́2T^᚛ց Y8tE-=&^wGl0kTL.IAAbAݠ;Z~*MBC>\wX֐|zehH}.3,˜ !1ï4p!p0Y;Ipe51 j_p ;z OQrHOINJrKȄ]ġvx NF+cI`PzM,V: Oɜ }e8_gƦMmRعk4oe0ӏT,;R}q)3Kʈ>s&ڱ!GΖrCBegbu"k/2iXn&HoNεuH6Ýb h#VŶ`YzqkW /X|F;oh:xf0Þ2hBi_65g,}⠫TK V8xF@Z.cj%,vJ+ڳ 71" !h. NSz~xD fi ;m;1O.4яqeۻB3?gէ졀%ȵs6,&HEt#%3Y KtP\[ 왺 0Y2kƢ)@d TUʠbP6XӲfšCY%ccER=qX0%n(DգzYdG] ٚƓ9zI HG505E^e-"xWʖ:%b-!p~umU22VHv{\pL)BU%RD7Qj# 8,}x*ŤtnO؁6nd]9|j T\h*ݦ +a* ;p>k}[+Кm9Ƽ06baLV0xO F)ux^ڬge u 6K2&] 91}҆gR5e%#&#'}JvUQ04o-(!.pmWzjt+W^fnfzaj>LJ(L..(B;QϠ%@E{R2l6AfٗR[/t25@S*pp >˪paݵIFg`xƗu0r =K `u@EPo fNwy^h,o-NOg+RC}OWA8Ўp6@$Έ?w"ۡcJN`Nѻ2axY*x߃#)4+N(ɢK ;Ɖed(buՉ:|sfSTEG{#c]7AD,6|M*]lKOvx~һ +."kl[\-d|(m; +Eemmp0LT=lu+#}uymݏM? 5Dߋ!$K{B'5Ie?Who_=x~SQj%2 5_t,p +m01tt8QlVS\S1 1 =hŔQs9'Ke(޺>c9?5Dj\-,M2Ȑ`Vmg2(0ȂBmpKXft,cxָf\z_@.&X:Ctg&TIBqϐ)suQ]d\PDG_$7tЀ,'0rZWRdM*ji%(IEaO2#r9Мxp~OY,S{{l\Cs kmDQE#\NM5Q7k,X^2wu@R5#hnyŒJiz}8Vb)'Ὶ9Pf<7߄Mx!j6?NY?g+X,,l$t/d՚{V4#naXE/\d-}FoxF>&р3/boX1=QHLAZԠB1 _K <`Nf_Rc +4w>ãByl4^g$ճc+*{^f +F '˹أx9ֱgK*ϐpn0o`cA @>>Jv M;RK"`ܢg[D {ӄ3iD8 倅h&IM}$ytz `2hqI{Rhĕ ~V]\Au,[SpTD/l~u% s%q[˳. "bDm<;D`!;!#:XN;Zn^Ra`g]PRkԜ=}Q[ϊo`I2MPipSiԿ†CsƂ]H0+{KSA ۔8_mI GoOi~=Ůo¨}fHs .I .뫮A+ C k{)1Q7@1\f]XIun .s5GOcJ +)d=H.c+T('5?җcIi7TGOX,9l賎b7IG̃Oc6Ҵi" +DX[VD!B;~ HKc ºJGѤ!7ui`*>7i<*goXK9'Bl 皋< emDX:Gh]%;UyfnCb=Y]d%Uu m:kW#?Jx ̀)u^ȥ}F`uSQXQ䯀F}d7!ōT&Z OBzT=H0A衊ޮUk{̗-D݃a~c"OЌZ&ުUUgcN$G.^D\YjRt3E3by:CzBE,=t }hx >4)Jb*piHKd̝چ2W-&.x]=/< qފUJ񨓅Ikbp{| aPCʦwWF^4+Kd}H5Gҕ/1D΅we`]34QP/6#R_'0#6w" b~8V91ƣFm]g멜)g )+|GP@eޞS_VcQ +Q^T5SRWQ`ǍqxN0.9eJb&@'TV1-;>vxz_m!MД06:"Y]%@%Rg_"U ${,,<&"\> z?W?L:R+OaǴ>Dq셢fC|Rz 2F ;ATL9"Y>hɴ,;l@>WR*-?x#kXPi pєMtRZ)åGtI>l憈ti6 +/Bёjuy1MH=8t֏:.q+ pwetF 3um$ao88@⑛:G{iAϞqC7!"_k6Ehaxu탟5spr ] qpE'P9en #܀l&c +AQuە (\/gg>bIjtCo>4]fބdrM-O]L>Z'6(PIucȠhf:ÏTu`iֻ܍n: 9\8/Kd\:XwhN)h,zh 9J{iɑ,{bѵҗKkxF/YUQ5QFxWkabNrvwqN˲G= +a(@ a(Ës@DXZZ;=\tr/` WpW>;أD]nc~<^0ό} O,?R= Ic톈M7һX*{GI!Q4vt2l + l۾81j:'{V}]4M9;5sx P.RlEZX%?vv~LOFZlR-=Webnul-.c=PFpq3[[TZ* CʤiWU,Zխ،㚺[f*'>n0 *Z +IzyW#8S^}<p5jrx'L8ʪ+nCQe"zx Ƽ]XzAs0mq_mUQ\ E7E?Rƥ/*Cz} Wk)7PQ>mMlp84ĸp^fJGN\ CۉeF(솚E=瑇SH޵2nu!so%wI7i vq(#{yc+8Ee +`kU=Ƽ Plؙ ՙ@_,/Ac&ɞDrw;vǓQhCar5QYs?Xh%N4sڧ&h[ODr7\M3O+W䅟 w0{_{mVtUDe:7+1"בwPʯ9<"+!C 3rÓ7{=wR72"?N2*t ops]><$ϬԽzYL['B2!7Yջj⢙;HN*]wL5&n9cZ;Sx) ĥ U%Em.uQOrRCf,Q`Ask:GٗNN& p} eNIH7S2r@M[0㜹ps*֑:uTXɍ25N*OCx{&͙)'OKT<$o0xHG0$^riֿP/M6{4qoa9S%M㰺 3@'~F!g Jǭn{4?Ck#Ȑ%1ٻBU+دKpn-ٞ.i?CEbһ˼ y@grba}[h"-fTNY`maXSPL g,=iʯZUBc?Nqe!8 ^C뫑BRͧ|b)s^/i콅b}x+Y*Ԯ1/[BٚXkeTO /37(54^?IJҘZ-76OaC-4nZ7KJ`W c}ߺʢ@ut;-4L»GK%2NmT2J fR ²QSp?wCTosZ-]EqQaF.|#3A:`.WH΍2m-Fz S{.3pp}|4-<dQŇ/:hpd:uWhڗ3|iY%RG0X~eHx-*XU,Cl`D%D =$G>yw}sx0UB1VCC7W mMO_r'&6Ps\jx5u' XBa"Y>ˌ>$_XyH/љpOWhWY|e +=Ёz?eWU h`Bt ۇ圽U 3:3|^y$GB鬀R= +[^.mAdpߛ|Pmlׯt).Po*2{PU5+ d%ҍ-ɚV|@Ast*AO;#2amǠ9Qs~ܱ)Aǘ`?܍}C<ƭu#FCO33nC{WC!ʕ'8ރ9HYQ\썋:R:VL.wPr-3X`T A5U)ŎY9T3a"yԭBT1&EwܠAkXt*d<)ЏwڑR4mcMj؀ÏJ]=9KF10&|\x6g&9z!X ++% ,7B mΩ4 @{!ccFl-:\EBKɕmN7%͇dgƣS˂D + *.5/ZO܅@Cgx]9Rmș7vCNK._kV026[-քNjPZ)YWt{Xq@b7\EKSfmf 2´9_:V&H-_ +>]p/q.6հG$ɔešZcBzUƶ΋I袆^ͫ@/Qx0h3@1n+8 O]"U? > +1_S\=Z:S 2yI@;)n1N*+T]M2 榼_;C}fml06-Ö: ,f1.xC.0B +WcvwkZ +Xj1o*p<`O$ś@zMslXYQ"_nut;q5\3#ݚ/]8LB˦tzCͭM|k04z 9_泾PϨga\aaĆ5jPgFv^v}cБbem3}d<)fT#a:8`6&Ds1=U.[=d[BMuidyvB6) +7oCе3EzS +Nmk3\Vqծitj[<="L/i-As\!a|Pqa8(ZջEP +@yKӵ#OHT-ĔaO/Pܫ>bWpi%2ѧa;&8K?d rMO+pݶk$K8O 3QdّuʬQO9+?A\Ġn}bM{:6d\$^ip؎^83vs}&*CKCxQ?Lx5e8T/AoKq% +!JRv%W굶/KiF6BIA5G#wQx&omIap@.[9F6 AIV6:dĐ\]G!Byn[%jnUD8ۮ?_[o77@x(_ypxMs)0Ѷ> +stream +xڌP\ۺ-[p4wwwwwkwwww\C/{}{UٜcZ$Jf ޅ *`bbe`bbA 'Wrێ@trMPdq09x9y,LLxbnVfy @. +rttS߷*Sj377'a;=@h-@djtT|..< v ' j:%@ tr`lg4r swc' h' S*-Pt ,/030WJdew)`ne (J1x:[ݺ1@BX`gs6urpqfpkFƿfq{3QĬݓõ{{eneoffV@i0Lmؙ98ؙ@GԒjrkel8}?W3+S ῳ1sNV]?c0;? 3z7#fTЖUgr<,zn&333yJۛj>e8@@K@]υ,W$\mmS Yz\W?*тj%]yJQF310n,a4Sr1keWKoV@%_O?QLLGd6" GCYWdX9NNƞϊGf@ `d `rB`ÎM]o)ߪ=+ SƐzaw)voK9,"Ff_q'ҕ+}h.=s7Ӥ]ćD ~[@3{hWD(FPɚC9XjKztvGT4X἞3hmqm=8t' W5i7h'_{qsFYod8aazɐM$a%ȴ9Q?V&څQlBqn|KeumP 7b_ؼ%4Vň۞iIWN&B^g8qUj?M iYWf%2rnp(;LkK`c(IC*XN|cx(lk| DvI#0A;l4M'8Kא:>/~_%?J[$,i+JڒX6Op<*2e%Qj dP;*W5ya,Bf(6)\Jr"#]0 +$ŤSwh,lEIVrɚhbzm:^JzSVMiʨzqS®כZW~ tDѣaphQ}i h.b0-V <-_DI\VƗ۬?K1SsU[^1-6WjZGzq +}X{eh؍9OZ%%^-ufs{A;PgrL.\snj8,HG/d1< ;`iMom䮔u YZA]ݔ|[0@sfg|q/f7%4o}*wH x/ bP'1Y +( ;gqߔ$ Z8XWqf?, &]յ/30JTuL0tFA0-z +v2fݴSX'"~dE \ow:Cv֧RK&\sjnƙ [}sl-Q% ̵J?kU9 Ju7݌Z^cq{=$> BJOg| ;?MUHLe7d%~7'L3]x3I%d-4@!mQ,E.x3.4mACCT9 B"4s+q +iiE0CBsP-K#j~853jvڊ} ~P, LfEAh"oS!0]mc>|[%O=s+U)_Z +<)Ν2OH { ʕV)%v3!+Xd +W.CM쑈bqLHWh{Ʀl*] \#"<)5+ɀsrƫgƴ~˗@G0 u&rI|KtJc9 a%哼|'BKSEkXsw"sWLR^4-}Bg%oaa]?{LTpVfYrͼ6T 1FF XG³pR<͵;RD&-ߤF݃GU  +$*~Ij :zΥw`*mK  y\y,]VlShm7<Va7;܈ϯ8 e +< J쪿^EڮyU5m_vM;G~~Up)>RV&/&P h]^IZ J;Ҽ$j%Om琏IsZQޒ؛]_=x|yY"t͡r6ώv];QD[VV7gTsPVbP2Z~(oI']u!TI +(8 ɪeS:y-*k-LE2Ĥ&nr"~2CLqkx Cwʠ.}QCsMx a{:lIJP%G(B19j5)JGRY#eY{ba9ɑȾwoC\ oߎLDLi{i"(l:f-%x\R`TMGYDtK9m4_lJ*!&(/ʰsPָ@fkKȬ_o\(RͰi9a2boc:r>ܠ}[*5wËֆN{|r?$R Y+p~%Hʍ^Q1PWLfK#)$YۖG" ' - _zn,eơ&`@rpbs}H2^oՆ,|sSi#P.U39 x2B%A'H.e-n0a z&{sXLӴ6,im{VB몰A˄E1]y(|8mt&e; {rX$=0i{:gR<> Ye O:`vts:Ȑ2mO +MƶUftP}\"eiqE&?;Ra6t?dlD(7_׫ڢ[?`P7v0d0 B~=a5X~01eLG5Kx~૯̵ 1G,}O&EU,V|{KdQzd0m6M֦<~ƉN~\zK"*1GdT3C )5]Ǘv񕪅߬9Q9q,}]RgѶ>F,l<6cto"3Fa%t1 76G:Gm m]*ޱ! Fā>tL1yH]ZƙF"phh, }򭜟Jt ;}nk|;H)o29"; v.;)M7ExF"4nW2 6h\&ly$H4*On!'F724nb^6i#3jC>*Q+zsVw*Pkļڅ3ukϥP>6Sm5xOk5)4^¡ +2By5;[辆DZMV wD`dKdV#zPaoU KVFvHz³h].%|\lp +g.bޢdG-N +2?BԂIevpe(a;TΈn 9wH /zީV)kq4/ 1 ʘm eJh3b+ :%Q]e pi]7|ZWu涄ua;v7#h-aNhV]NӠ') )~M ZW!⣺ISa/? +گLʚ嶈R gE>!RDgeY^Ґ,z=j~~ky'P)ꏯKЗs@%ŷZY(cGRMoې _XdafjWS:\J+dڬ88k9Od4r$Et!B\`*]eA+xJfx0C}K •>ռ鴘5vטDշ,tIg܏X 1ֹՏxSU߂S;YMwmogŀ\˧ZeWS P{'ԝX26 +'fO(Epg9_;YJ.r$)?n_{`D7_%dz! <ݎ$>"7$gkIQ]oi57+Ekrhv#IJ$6Z@AK%UqرU=[(&R[ʴ1 ŇM;FXyΌ=1>\9I!Oerݘ"\=)`vN~Bk# Q~/_lZOLKu0Ÿ:YE!zo-d*a }R@Egv%-B%PhbŅpl-A>0)$x g,`Λ6%=U<3޽t:GG<Қl0l]`wǠS(*I 0̀غ.v )2r̮euϻE33-fbŅmL3XT#/X@W&JX.5 hny! c[} O6y9Dzӓb$Bot#[lmS>Z wa,#̄gŖL5&sN +ӖNa<( +ݔm $ڪy[`k_Iն^nUɝ֪D S5”eg0Sy<2)qRZL%h9}"ba>zs gmT'Id`B>>ݏf鬯(Y\l1p5QHMSvܰsq,5Qt8V`[ۮa/Yt56C!k'S$3瞖qo[ +' T33Q/ڵolh+HްǜbYkPp0XI\$[,xg_)i'C B#9 L%Ǯ.d1|bWQ3J!l$c3 YYow.h &)=48bobjl ]f'uGa)Ҥ„"BN dЄ҃ZDS)kQ$! 3zQtJG剱BD6nĚh&<,zFWgSVy㎎zYy{-%>I E/Tm%l\&6\2bSp -T>܃RQ#Z4n5fSyبu6j vf +NnGx-/%hWv5/ۥ]Zb5"Mע,'](ZIk[_| H=Eyvd×0n܍>fé[PG'cX砇vːK(22.Z֯ F>{y0ZF>'12&-tkqZ+V>7P%ƪ:Bm^ͯ&fU/RGu±>g!HmeGvj ̙rQwds(uQ5DŽ.^(_D ȞZAzn{w|aBUAލԀ6_ƥ,xzy䋔56<֦`RlM`{"ß@¸4<_8Yw"z-yNU,&Rr'9+CQ |N:6 +vsSq0` ѿ@Vq!FBI\(l +WȜsbn|#1\cD\aw, T@Y^>x&Cꮚ]<e)'6zۡ-RJ~ I)G4M:_"weWTwN H*F0T˜ gZ)Dt'[Z" dE{N3h_mNDk|jzq"h,8,J\ǠVpNN}#5;6~>X|9˩ ^7hTRpZ!23TyI,gP=/2Xȭ%> D],;v'8+]k<'_y B*/vC#!np7gN_M9c/-{Svkt8,!=n0dX6o| mmh1#;(:U\.!:siF"B3Heaп/FݐdnU + z0ca<.8]ֈrCZcx {bַk*zzhൟRNrpfa0P\cF(=S^{ͩDkĊO6 \u2'P}b>pƽ." cs``v흏lAH +_!ѭ}1h5B$"d}$z%4 ]*;#{n#Z BT06 s3qe߭Ѐ JQkpW ֩N#o$dhT$cҕSF짍3| U-{Z;[qk`iAn%{}d;DY7{!;)xBa*y*ڥQy6>ȯduzy@A19?8*ޘtTWhY8䊯dV`u8=`>#WwL8Yn֓e2Wg\zSݸN-%I\d=(nq4򯋎3{.S۴\>.6 Ml0e|Rg ozq&Ś3ǶfWj{Hbf\ |<_jw%,L eGzN=w5_5,P ~ ܏9hYsM%G"–TĒBy>Sp=E2lx bYFDC7ceo B0P;66Bx8;ɸc{ |uyh\%PQ~"CQ/.5C1GX6GSJåk܏|UU?$:ΞQ9iqVPД - 1٦ +I-tEpxD>PBFw.u$"d|(H}Ĩū+YaG Qk5A;h)1uJ ![=o}{ġmlZW!>7Ei~h}ZV וL'}H=B@3c5Nt߃F]8b2>i)Kݺe0hp=יUg@ᾅC+hT7\[*ZkpLj\>.<)x3,>`[Flad `s_HTtpw\Kq&1a +:Gdl*&@W!-Rڽ[ +: TĒi%ڞ:1z>{V׳$O# iY,jgh9r{r%) +0HӺ!.֍ <@j}ӹB~s#:iZ<]h`$[ +Sv +u,1 1/jP~(b{bR +f-jГoD# 9HP6KgJ^ @b%Ǽ+Nf!FykqsFWD= ۵MăNXbuG<; 1Bp>YE?>_:@~ŷROSO[hdZ0`퍣}#(˂BxiTQTON}e8.2B;3Lgn EYo^:>,Ց+I=tCaQlvsH <]9!v]a*ZLhڼ(3-hh*#{_*qWM#r(>`N +G49YdaL`gf'U5;KFݻ,3L&jw c?|F57 vrSn/9ї][7Q0!G{ )w&>ʸEEҼz)?չHsg̔zdkg2VoV1Q`g&Zv?ru q.AVg>UB4[+\5a3բ_`In:p9ejqN*7|5CevE-ᫍ0*RrI#M@1=W2i9hHTq 9>οW K2N+{HmZZGUi fp?kנ[hAl NQ1vc& 5[ +"ow GIE!gHKA%5SX*p#j$[3(.j;(J?hѐtbDA)PY\jlO`:Z+-02JfqQFfF9*U#:ws.F^RW=BuiH_MWȎfV⌬5 +|"H5I)a'ewI'O`Θ>w{|Ή[e/J8֙gE^@p0M'~5(=EKKx8p`IIKqCϿb/O>eɠ27fHL/E)ʎ߂ {Elr8!Doug%~Vg H89kāy+b"m<6 +ψk.1VeRTb?lvڥjXaB%,yJm'O>Zga2]&sԧo! +RϘg]pǷ2F$D>_lB뽛s?umY +ԛ҂+]rWʵ&ES1&n= WK5.!3Z7 an0唡Q ۜ?u# /XK%l2!1E<%XoeFPE5&M;i={JHoW^y,@R !ssd0u||Fh-دiBKк0y&(.j '}Q=cyn*g'lF8 SC%8D[4FAqx|fQwrsU}Cw'&Jxy&,fIMr-MiO| "ܭH?ًo5gqL]Y7Y +50 <}}^Ϊt6~쑖> *m%to y UMCvoz9 e& X.nݘ" rdt*9 #MaBX N[ht^3?³sng]k̗Yťh΄ +gBsto"]/Ɲ{¬_>oF(lS@4wHQgޜ|Gfs'_GmTdS/[ +";M2~繜e'F5{Kk?H?`M",gӗ.Iy;vEZi̯YOUvb^wbf=Boc+ K&$?paVCɷ;ǚh3 Xuf ;[AN6Œht7Xw*"W4 1PPʰbF)c5 S7WbPWF\roQvFO/{>:Y)i~ji~2(׭Ūyߪb,d W3;Gg濝5 GcZz%V4 ,G?UptQ-(@d.rpQ*؟3+ƀ,^SRL\BbCyUI)Q=bmH˯ HΏIy f&' ++BQ >9M~Iyiȯj'-ylhbPOm^&cE/J ˆJeೂB,}[)7ٵX܈tCj~.^Q.2.g)O ς]ZZ8m! +0w@\SW֦,Uc_۝nP~Pd}lzOlx\_&%jF\'{_amKgLe37G%̞Awe;OpfKJ!\$o4F7t#x;/qOC~HKwB%-SdMD 5PFA-H}4:Hk}& "nuuñ|&6 +{c˅n(t戈y7vZ eZT:-N3VOڢ|Cltfvna &c]/'L]2jF- [Jᖬ+ne7"q m._({ +Wi=U3z-eLe?N Һ-vUBFZq]PeΐWi^#}5ż⥘y9cnRpPj+>m7^A +CGC^N8%]٪3nE +'(؃#}qf]Nsgߔȼ̴s k3Z O#t蒕䃢."$N׹7 r ',E/[X K\ uiQjߪ9@`+.xr=b8oX`mvQhG#k urzw'ƞ2ņ_LzII_/NJlSph.Jk犴CBF hQU aDP*[nP$5~^EW !svy5CY"w(buO+l1ȿM]p'`:T&g0|UҼӬ '|7 ˀ"sn,oL'v(Ae`H82;c.Pxt6rKOFe2;=i҆f[ '0Fd"Iob"09cڀq֫T-jup@HZQI.8N:qWSxb^_ b l=Kr:1$75cz/@^jDSEEJH# +=;m)o \&2w$y98&CPooW$e)0jދ;TP塕y&`H$יVѪ +MH *-"]YkJcM)0n$<w;*m)cD®=_#u-1Ўg!xoMBn׀Ev-`4hdQ +LLP]X6#.lx*%׽JH&^h8m&Ȱb$4̃pȗ$F`u)\uX|  8ƹ*4"`‰ PЛȒ1 K~8yvGr/kLj;4PWKN+5Kꂱ8(]4GvڙUN5Nz%m䌩΁:ALcd')M6@: +/H/p&+ѣ+9ׯ $nEn%rNnt&0=xRĐC9/qjD3\/HWe:ُ˧7R,PN$Awf`~$Y?AX厱'6&nKQI^|NĮzhJZ,9e^(/_?>"DZew=雷"(|{[¥sE*Q/yߌ,;Y]7ty@]Feԗ| +, r,}ZzmK?| >o/$=J8&,«>dɯ5xG7. ,Yz"rdj 7F6\e['E @oN.-B-k4ՆDa#MBCR3] kUm3 WnNh}YE>>*1҇ha`sw gBlZ۳oO0YMǕ|nNL9!F?<47ɣ{0@p3wxE;X}vr~~K,R<oxƣW-\*W,0.QD>JC/ +lGЫlRtUf +L y-xwXbp6%IR;s@ 䐁X4IžhLg8+q͗v8^E :{LYLq,F<% +!hx'\ 1MS8@MfXlYT_k[ /T$ N3-+DPv%:ΚlWY(]zZX?<<`zJZ}ƺz C}5YYAcg6C8̍dQoqn7Iu؏c.ihUʢzݕqKS%jXWS@YL3գ^!vq({c)8[bAq=lڨS hN/OH#;Bg?Ni|W{N_󋙖[ӝ ȗ]X+#4q#BD+O'%/!h&V>&(\fHH$7aVqA1"^ڍ@E[;x3؝x)#Iij{() {!}m ;T I%}NBQ0Tԅ;mp`pIP9U!#w.hslhEnŨT²`uLAK،B@z5čKA›PP̈4q\ұw,g/lB&T?'-Z \UnoHBrMǎs1j7Ky\3"eiy2IQ4U#r3zac(׹C Z]ㆶNSB}EZ1F#_|fل~A#+lRPX3~xtqH<"] +SHd@_͟) 7ƒ^ 螢뫣2xBOrWaXka>WgeFQ iڋE 5@Boi RmauW2 +W:$+wFГl?P +4PM;>IFpƽ$5;7BKr[Z$P;Z[2ݿd^weJ㚽CvۃfY|,Ttsr3ї4P7B0]9<vlw٘c+Z(49{O"{**ۊBKե mC-+8sY lKz9l!Go]j"(yܙ6\Ϟ'b,d3|zm4 jb 4"'!hBDBؒH-!oyr}G[<@O%` /_6Pߨ Dj_"BV9竎m|cGG 2@>^ۻy>̤AeSb6 ( +Qu2NCkR~LB߅m_M y#}wa2t +16V@5j(<@>s :G}N5ܰ #JU}zZjXLПG*C8 dO./-/jI٬B8ܡ0HJIW kdӖZ_oOخѪh[JuZ+4hE'Bt]#J U `?pI8<%'('u6W92XE0_OГg;:26x%_3B%hiK,e&C2)lOE8VP| W[X΅8By zkCRhl2w"-;⛳]>@Wa] .C /vi6AP"zŇsvIҎsCC S]aqe XYuBYseSv(%ԫAY39IqplGO4&g~M3ѩ~@ir/KwC)(mǑ&9[}QHH5'9 e}dtv9&jkFVLm%;N3qd:DYZ$߈JڋYlwĜ-Z+Fcg^ {v+Lq z [ {,OYrt4VVAy@8'"}"d +R3am {Z =qKT#YsaVeZ?;ڤx`k5dTc {ݭ/ȿZ'@[8:R|lKOSlֻ?:bI.-UM]7p}&;^K,T󜲍UR$Ux0]U4+[уgPzNpF!)c&Ĕ M}sSd=;^;$p1Y5#x+zf PV%vSQumsʬ7_E?j_zr+g`J5J(@ 0{wGkF8E,w`]l<#>scSbP|mٲLo@9J[/.|b ԈC`ݹYb}ц+MF4z~{*^^!um_n+kD0AZ1mLu=C0!#P&" mYiW`~-G7pn1?M0VbAb ~{K{opo1TEV2u`-7ý)dM]/#J!KG\r)J q LH-_dሇ͘yPll\NVe`feR)c_:z{FT `m>P1Cra߱K<7{;#xjR`/VTp(ȟHvX+1Y )q^Ey|b"b1)O^l4 ->;3.3{X&C 8ʗ$ָB5;*LC%WgE4*P}jvT܊֛J1k/A-*.)=ed6q"&Ee24 ݟ|*#tb쀔ˤfЇƀ/Gc`I ~ЭTcr䝽h.v{o@38 =  (u=g2Q຺ؤr2TU'¦ bC}6|_\l95fLQ0fT̄_8\[^ءhNmai +KI%wpSOK7z؍^?EA s)uԈEecNv)־Ko}NӃRP9׍:&ig+c=11<'kh\pWblv2=}LOpzzK(hxċAU~V:JTCޯvF~ДqGV,:P.ZaSJ$Y0T[c?G|Es/<p%S?m3Cf"' + +A3Ⱥ6O&{vWgz -KTW!f NpXjuwAOg*]A9peyR`N Dȼ|4#=Zt07KqpΜZw "4.Hec5tE`GFK;ˇ-y[2آf;:+o1K(:ȭp赝@h!o%Dåv3LN5d>zC4h֕ch;,miA0mFiv.SSxG͈<HmRDG⧺HK/jm `ˀxUT*~ ՠDv{`j)z$hqQ82Só[PA_*:P+6YXu&(JZX,qS b)$r>nj\Ta(|^ +!xU2f椼.E&w K1^%DT)DM'_Pߌ{fX@=`Ax&6BO- euRmY- +=Y cү}eU0Sm={<%mq_<D͘T`Ok )3tXIvmOp;qlCVAbTQRX<#!z6oĘ;@=ZA#$s t>_ivq< +A "%Ͱ^<ꅫ%WgE:}CZ< '{޹||ˢM旌Xկ`%R s5nYch,s #6Q=0X#4DH%3 cRKOV Nm k+\I=ޏQzw1+2bB{ +¢ПndFJ8:wtI(B5})ШAuZun$5IwvZPs(0 B7~d=@Kk9*EnY2b` b@Ѐ[%c.W٤ "I+ݜ9z[pbFQ"Ly o6j&$q 9ܦK﫿S0yYib&Ⱥk9/շF p@) e9aVAk&J']7\bJRոi&3& +o%8IMbK;+[W!$,Q>iRC@.W1m\-0*@ݡڳRM1E.,( U1Le Sz m'Ɓ9Cټ=7ŕ',FS{Oqm^q4\W}%,#SlPc@`)<&4ŻO|ƙ: 3Û?yJP!HDf}IZԳpusm2Ce5bU"n˕E+N|r; xQi5E|XFRA\&)W@q+?6l 4Dju/(OJ|o[{(1T])9iLmCg < :KvEq,qt@Jb`h c8ѕZ-57/?z#2$OkH SؖJGpC2~,\sHQl]GoOH4)SDU<)[5'W՟*[4A)dPyOv90XRʒJbV"kBX5 ('RaBJ UmN#9 Z6dPR2+ +XUErtH54;9(~/|󑷀v 96e{[&Za$tY +"OavnFfǩM~]Qn]썐g&>>=V|-2VLx xZYgN[M{'#ld1-h~˺3LtM3wۧW6M\;(|Nra0 Y562gaKmKZ> mD1ץ&*rT ݥg?'Fmm ?d2*zGŗruM›l]1kP_f& Q`'w!Ï-XW*M5#՛YWl?>ݲ2Pe5,|$ DKLY'ojtan[nvSj>v7x+tI|sعn?%'x|98^N#ʽ}UMzHQϛr񆦻9oߧt|61ׯF\{LOGKg[4|w|[H$@o2dܫKvoK!wPR}q R>}{]90q;  W\wM:/~wFplcu3^T.CuTҤu)v䫋<ڭ+0k)wa J8i#Wrl-Ff2{d3@͐Sׅ(_A~uP9_-QiP ,ƦtU': 0B[h|!V mk .f[;Br&rw;{<ГSך淢N mNE{T.ѓWKfe<^7WGہQ;[u!bs2AIb!"k#oZ,F"c$9(n0#ߠlNAvɇQO+Ho)2e|Q+IţB";z &llh=<.: :DŽxI8o0QGouBԄJ4$v*zt +n-F {5@0_X?!Fm +d4FFہiC%ߠAp" d"ɂpx抡] es<-Yǃp~J퐖![6C/LTsPblc\Wmaѻs܊d?!U I9rU>IĹ"ɋ껪ty6 +۱ ,XI˔~'ˣv3iUٺKPpV_zʫzMW( ++䉮h揌DJN +;2Pۍ{ö׋G*L +q  b,@z CtR`W*U0?!w۫5 5M;jhnrcik` ^[Lf6v7[D9zSMa`mU1fpvJ;Xsg!6ζ8[@`@Ծǯ\&LqSCj¾s`9'{2lJTr]&qm ҠJG^3mtRib\{6deHq8@V^9nG (O mv#`ㅮ2qo( [2򟿽)Lj88k$'Y ֐GתMX-> +stream +xڌt]5 Ƕ۶mٱmtq۶ӱ:͎ys{cﵪfժZ5um +e5sGS #3/@LA +GAnfBj\&nm&nq +Yw; _@G^9@ tstvr{-f4.D.f&7+Ljf@7͉ӓޕRifP]<(#@v5G 7O 7{8@MFt7XzO"k9;8x[;X,%IyF7/7z?@;Wǿ&&v& )0s5svrsetEe s1G{{+?[^7ӿ'k?M;1i8X;ek% :^fVLWvoNNM-|]M<7w;8hi?@/X|{e^v|t5DE Vf?$iMS+`w*#Υ@\g+F Iw;q[ynW7T o*ͭo_!8X5ZJZ{͕̬͖5QP +h/_i}t\GJ89#1VN7!q|Yj/ptgr׿K@ nuь/Ԧ!NߓpJ`P' K+t +MmַmG~} 5߳&舎$7wp+3XEg"C G~~d)ݹ П=*'—Uj9+b5bK(MqHahѮreDzƽ,lV?MRb/o x"ҧm00fZF;{LB[1\H0k;l'=]}6ik߷&ͱa>R!!KOZ +uL4 +I^-  +G +;IW<"ff]n K<8kŞe;O4N/_iEVlY}T !L;X{+7#2R5cQ߼m^x +/55:8 +UHΣ&OGECyO++E 1O5_=#EvչB*KDIAIq-@iն)ҴFP0 >%fc:Sema|-%d Ƿ Ը7la$lI*ɑyIW?u<?kŸM\yj5fu]}\W)4k,tǕzo4sβHx'6, Ǚ"r!i$V0;C`FD|qĮAi$QPwX9|dǼMXy \5˹<su exOVӑcܐ3FG-5.k)+* GDN%G>0Һ3 sT7B_OB$э}pTWodC #rϔ "yCMDo<#5n_3#fWRI/+XX%?֤IP]ƥϿ +*REj*Vf<閇 z(1қ֌DWɫETWz{':}ᅸZ%%c$<U-9𤷕McMpy^?].iHzM_.m,?6{iz?4fक:y!fH\*ctNЦ#V-:Va.LT 2c-W7Wg!.X +~%F.og4}+$?e=ܵY*QL8Lj{lS%iJQ{po5$"tIg\7֕'xeMW1|ua$X"!ːPk3|U +s3l\T$}|RkV`ػ"F6r,Y_%_bZ&PhnT`RFި!_ CtR>0~ _!@(~ZщP~Ր(8 ]DjhUBYq(q8Ahv;9v1LQul"fK*Fp? b"$"XYl0aYj<!c6(.߼6Rfjkmj(Sk2n 4>ƭ#B2f jҔ_]@nlolCw&"$]7t9f$^אL>2Ymf.'*OE>Pp^ lnml5ZX!V#6zi;h '摏lAG +]N%f%̑AyF8'6ä)#]$,*Q I6lxyAfݘ̃ƸJ.#p=a^7Jɉ!|ՙ-qve+B5m];GwǦb+s9}5h[͑5m^R:uP)PRy +KdgA7rf'$',SZW"^cqggh7E^[ NَP=l3<[FBtȇm*YÇUЈ=#դ~xĪ;1WVÒXciaK;T&s*KSN5zjH7qA'(# +K+rײ!P+ùv)G>J"ϑN+26 o:UAݳWsh:.(kDЪ$7T3z iR/cDPAurmO˱=0h|瑐Boԯ. +^f?+^>{ :tUSWǃECY,nW#`x~UĘK'MÆkL9޳o&h%QzSQ(s=9x}&D>Gַ&h6ucoQC^GWZ-q?h|jt}SѢێ/g)G MeƩ\+:<~wM\P-S4[ +q$]-Հ򙽳c9ir$7?VQ5J6j.jП^&1Y|a7,tlqqT8%xN3\t.퐐QV-Ez+04rzXf#}hQX5 5FZ-q^2gwfVv] wMfrnUD(W{' Cb!9Gwu/cLi󧁗@IlsM)%zc,W*ϢCrGP!䪗]*Pr7a' ZFfŠ BWrW.pBS\v/CT%j{X}Tdgab?d|<V$ɧWc'Å$pBǪB}ٺZV.>?uv9%2bC5rG"B5P8eC7y4KxheI}W0a4|o$,'Vi$"kgz\[Gt^1!+nv/ruDdhN'S;x=}/_lRC* X*a_Vgh޺.шETMpevۭdžev8S3 +wwB֣^)l:Es+!! {JhF6/ķu!3Gz Q5琾F؊ҁFӮCr!uZr@'T fTL~!"KO1q8%K1Sd? G-GT;dVڡrdHU +v܄ʌ2Wv-х΁C$vSX\>UW_:1…mKnLWӞ4 )joqwG<)hj7B1LL]{P[AS{o(eSQ=rq$z.fJU~#}F}-_tRw;k~\|'*t^?jӦQ4ꭡ֟1Q΃j1E\7}+-{{Z'^˲LG1hfT6jARċJ J0I1K[+g\%I`3&1Jԉx .\\Lbқ0LP螗e}N`r z`HVԜ߸4`s@8N=aѰ>#MB|l1 +jpD4{3NB!E~xPsG0g>]ofi9V1>ѮG| eW2XDA$  Uh6xC +nu|ou]6mT6O%]G qgKV~9F#zB>4I2V'qӬyDs~&g]>Oo7S8K=-E!sd|tu UOjv jd9)>(6Fl4YzHokSnn*_%$PH8r8'ϙDZ^yi`̄\Fo.;8%m2#]¤g[R  VC0nݛP\F{€&}zƙHx6a`j?n6o_) vմ˥8SviK!rOCR=pMm2 [U;}5pa'cNq`-_NWǥfM!_遞l~a}ğ@s?6?\.%0jQn{|J\ R N GIͫ,~Z^6R{ނ7uK"ȸ}%\|~Skdj;n!C>«5ӇX,t-Fס􇼟QB+{i + . `ty3354s;6>ކ/=Oqfucd8tUQ@m5<aQcl!q{Ȝ}O,oseME,{B48,)=n- ٪ Vd FeAXP>1斥L0f~7*ì +D /vr괞+LZbfC]שW$.UBGs; VǙm99uɦwMKJ Y$qbB|8+zyY#tx }w{K6LSSٍ̺GةC;$& 42 VmdLfj[ZOn~zi +TSz\| ְrV́"'L7,~h<qhZ8 +XmWA68Ħ٤q*RuVPf-zmgUQ+ +oJG8nElԟV\CJA tNB*Nfb+;T^ Nw}1A?IcB5<7f%#%Z6ڀ(tcd(:0 ol=AqN^| =R^o}lUjQne= ,rȠx ;;ۼ.W|'PnCs/?b'  Lk᡻ɢ#lI.PHG4b*/^ƈ8 ak1U +g?6E襲js/)sO"݂0E& B/eXiAռ80-|^k' +hyI|EISJO;(\BҺA^[N )=]ee;?|Ҽ@LdU/C_2.|"ǎ:e`ICOfC@{Nw+[+EOgGB +2 v_:Yt>'%?/;jY{bPd +o5;ʓ-Xo'-JpnƻĮ)D7GĈWo,VBsGS Ղ d^ǀrĮJEӞE^$y^6O"b_p,&?,moۤ:LvV&O6Ӡ|ŽƏ3ja"i8赦ƳD}YJSl +9y8i( 0a#R`;ȡ̭vҽatQ]{R|͉t>Y6]AXZ? +M1R X 11JRJ=]"֬iL_LVp߱XG`aic +.yqck V' ))W8,Vè9ȇx@ nz~x[fPIyk1CR wnʴ)vqQ_^` {u]u6"WA4-43' 8Vy|tJ>F<"&"Qiם~}}T95!P?,H؎I\H%G~bKm^s~0/#Zֽ峞uv:!Q z^*-p4md#]Sr"H.@HBo/NQY1wԥ.R\q1۠7KX!x(Ŏy*rQL9G6wƆtƚC2B*X/L^LLx^x]0-:Q3]f0R2[G.;>Xr6KZ !F%:)\BlѕP8r92 =룄#`:hJ_Si[>G@Z7[*M.4Oc۟jW§ϙS㵘3NހqUIEC=6.|"䋟Mǜ^ƉV ^Jt퍏uAr +n5}!/\)6|o{L;1C8[˖־AmC zno??JVBح<E{)eCPKkLG\+^U+yN8U=bIWHM˨L3_<zJg+dE򍬢0FNӼ3i`X|XG?xδEnMM<(>; ھPصOuVGMc#`↢`#.S" +tvCyYvwe[x56 fL92;>R6 .#nQ@_)b7[a0@pa)wUoeG[\ng>;O|,yw9Top%l싅+ EhW ̰nQRܴ_΋߱ޏyswV?*z"ق>Tڧ;IM<tz"}i[B_,cKgϦQ{Xy\$D8,GҦ{Gk +JLtqf}n1k5 O; \~t"8r1VxtAz=j4ɊS=>W] uA5!Mn Wl&dw) nL&147Aq$absSͩhWe ;˅lm.ӊ(h[!:»R>3S; ` `*M$Z⾽ Yvc퐽 2 ZH* uGA1QVE‚3WLB Rֲ6ӄi Vj-1)t<̹:2`P*Rp ƱFN~=u 6Flj64՗nrȓDX0ހ.ηKRtڊN*PLCށ4,k d0QlUaY+ʝ8+/omO(ַ A ;TDi өI Q bس+]ɜ˓fDC\Wëp]Z<5X%ix/<{J0쓊>iΡ#D.3d?% A2\}v%L,q̋s֏oqjZl`i'84[νz 7ƱwA c` V ,nY8Wjb1P(&^s>l I3+\USܼax:b8$|rkRH~q-o/M{޸`[p -Y߳UUHy{Z`SI2dJv&Q}*Žqi? {i#d&l]ʊxߙ˶3G fUNE.Q͎TpΆ8}͎v* +y-"ɖa4аaD d1I~FvᏭ_y %QJLLqAlZO ZO,/!q)#Zg"BWB~+ CjEx UxRI;[VBiD2sYS3 g[NN^ ߉̀-a4MG|86) Xj)^^"[B59Ja/|`yB-fsp{鐜K6_&:@3nxJjb7i*ZBу.dܽ6OrVpYxQxTFMmA_צkE(ېf6 +kj+!oIK,*pt}<-O*MAd"j]K]rͰu4oҕA|rJSPpo_dIwhiM]&|uhbqt!*#PU..©7[o;ʪPdF87]uӋ,!dts;֐ѭ7Fδ4@u NCoC`~+;I6< 5ch)Xe|~X e3 }P&k@/{"~OKo[].H^KbvoAK>M+#OY'WM>DneӵcI"fYɆg<,%[T;w*3G+)ЈD;^qM]t4UY4FeoU 1v8 aRH <x?B\>#X=yOa +WkWabPej,ICt3cy/){K,Óx=$a4j~8?{)%m;v.ӺϨ >"}jOA<ꉶ 2Ƴ3 +f5hڸU-d*WL4R|m'mxjvpb4!X2Ճ653IG{1]n8/iWҜ+\%JE+= &x P/j,&̔]qi:}#Z C>O5{|/\P+Y(Q[Er$#y qr&c<,R' $Ü^4ixhgMs? FU(bd?|'χ˩P+1HЇC,fs/0?Ww@˰鳃]C76QKw[5T'E,@|\-03$UULчN?{u.7KIRz׬+bχ2ѩO`GC lbAcє#N'CdLCO;:h ç=ŒSz>rG #.Z_- +JD;5&k +5ppk.΀DS6|<u4 J)AdxOO;x SgFVKQ{, 7Dހ" ow+[cr^fnFFi7z;;Ey!f p²z(Œ&h_ xL%|<_$t%UMw|%&jYRCdkIfح mwN2{ +QC1@<#܅% AK E/ *3.OqW*2 +2?ie3jY0u@ۚ#bkQ".:>ؙ{Ȇv?#q9A;`\ЋR8=꽃1@@c 6|&玹ZA=MV}|[I/XVTeJBOmhFA<)X̴Jyv=-D\@@{jsbNKdSYܦT[ps M"d-sfp49-NJI E)^dN3s34& 0OƱ +j-sOl˯KKSYJoWqtAր*N|JP~l05zn<6 n{zax-Hϣ1cC莦*,ʍQ @ZV[ڒ +oTLߔ$f JG8%iF#C{$m&>Il1cgRPfmWhdN9-jKe/qest^;-qk `%rv/y.{@#w̪֙מK> :<cjk%L$Đe(k-Mp 7n [u-]%!oXC6pJdU\PhC1Z@07c߱(X[0SgrKdv'BR১֎~oz։KL)H&'C8.}'"bkEݫ@kBMCZ @,1d,+Qze9bOv&~BMy,fq. GbGnv^? y2|hOS|܄uǂQ("e]l'`]Q)Ӥ;tCzi {Mo;}1jC-_A8h9,gUL?&U]~E+nB +8\)1l'DW5En< kA7eza7eRa&$nz;C3>~@dds)^i=Œ(s#Tyt(tT@w xM |nK)>AR(;nnRTEm]$jw̆$jɸ GN+PIt:+.؆upF$!:ou]?5Q rCYֵA)ȦsySSE@%>OKBrc@衦!-iCVZ/vf$y'xJbTe1Ekvo Xkk +je,hѴ+'g^%. wK4nXEqQD<$\ nqL"8Yn +'}|XC/0FQv*MaڊbeHU Nv>OF `pr8د˫Vl2\hl~X +Pϓ)U>f>:`3hb,T;IգETʫ8iH:ֹw]c{RūC;#Ҥ<:&A<u\$X9P",)#ӌ-{Kn1ID"Z?d0ZpWi] 0{q90W2B; + +NhF +L0e,lӝX|y}a(-5q=v6LWۋ T~&UإzpTژc1|td`_&Y;5?>u3v +춿R6#xw Hh.Ҫu,/?,Uf-8IhFҧ_&\ B4GZflingp|lr3ܨ(p_hwoR9k`T?țVIs],HdaKјMZ gi+({xB[){,'Z!/[̑ W=G"SZ / Yі:Yuȍ/!63`?d^ܮv0E& Q!Чĉ`fBR!Z}f,Ō4`7dqϸ\g~,|1&+݆ p`=def?tV1%s% X7'"QeSzb7J[ԠV8 (bG*^-90_٬ZuuO_Il{#}Q$M`vn(̷~Rp#LRBɋA6$;FPͤj:*1=S[k?jRiqS ^٪ezΒ eI \.H_a(vm}C x10ss>IJ1M~w!O[P<:ϩSa׶kh +ek+} tpVG A@deqbSO5OL0G;A`>ɝY Z;(FCF.L:4U+Bv$}xѯ-{pn"~WJ1*_aԉ7ANhc,r9N>BK}B;5)yo8& -.r^=>+LG1 +MHIX6v ]òn&#oz\ҡ§c+6s|mڃ +KI_M +LjK{AElG&iV8^|dt7ʜI,/:|武r$2me_x΄,jM; +ʴ9^7v.^iYgm@5@ٻpFri9 v-K\ct3!䉂֠L_/ba_`b>1| B}magY3^ᬱdX/<]RfJSt?+Rzu\)HbD$yGDPG>.BvYD9Eey`hJH D٨ۨ| )up?OhɄG[+zEEf>A34~tQBWK2"tfgX3TIEKa72mT &d񷓅W,Z{ X{ӸD^g,ʆ  kx]=N% sj0ͮQw#f#2\w4ZAVwf=X^b^iT{]^\}QeD*$=Tl.XPi}* Sq;/:c$9Xr6t6ՉyiWܪXgj Ĭ?W[!#K[0?U1GӘEoQ3)})+ E\'_f[څzc(sɶ^ 'asayq2E@5͡8f+hԁB,a{L}5Sd+riɇnQzFLZ.P=:%,r 8ђEE6AOAx䕻;0K% +1 ˴SJ<8/(hu6-QKfsUv8di~Dh,q5³98b {M. c/F?.hC\84>>8 )COw܊ϵTb ̣#z a|h[ >eϤ&?1ً[$ʚ%AL?0XBy@l~Ic?/k\` ӨYU/18SʑrR1/a"`>8*'x]ҨllЯ/S.Wz%Jg`U>d_60$@D-AIӹ-\[@?MY{4U3$Gߦ:\Nw@3w o +Qn4(N?LFO΢u+,\L ar}<)Z#2`e%+M`<ͣ)aTW& +!7o-sqڏRuq#*D*gI s :t-(t)xL !XL+B T.( yIR3/~ S.WCH*y#joG-O>4gnRȕkLE"sɓG^p7pjРQ> +0meciYVМ$9X@|pDaDَ AB>-9n+Sj sϴ 6ПEJ <=FIo-+v5_z|QH#RV[KuAPtc*aa${H:] V)Xs:۽5! ,A~FTu_hq9Ss*ۢn!i b]WMG! - ^ kJWbZ$2kaשwşvZ l/qm!BtS7ocK{ps:qbU#m@fu1㽉+eCmq\ToKKR"_z )@ؼ3sY0yqҡH;˳94K?zᣅ;ҷw=﫛0m&YXI) 7Њ3pJ ;b~ !Cɭ *6mX*s%N*EL "qsvH{5!4tĉ282tB( 0)heb[},n(D%sP$zu}\boMrЂ(*|"iJXƴ$MC&%7PP0?Մ G/DKu.&T(+<Tt~I#NNu8aOR4{KQ_z8^ P Y%Ki8C_?V!*Ag<׵&"{ +)V}yVgIBD(g - +igO?xΰ/Q튵{U +;nt +wvFۿsMeh=bk!AQ6z&B\ŝЮH>:$(3G:7q"'@0>3ݻn=v<vxOq*V*2={%{,ZyFц Ik>ҵ"m=e +nH~wtU3}=O]l1ט +ET+1XJE S\;@3@ p2'`*P{avO»)וuj6mV{`8>6Ϫ= d/Y2b.+D]K@EQp!](gOQ_n(BC%r,%qB 0AZtԕئ#+6#gEEd ]?j3Yzw +W 膑2H6I_+a +Uu,_7^V/M 3Co`k%ܞHZEW3~#MsB]u4D'NGm&@Yulbk/MM7pv`ad9%I5ggRhXŕh^N2rh.'] "Ks!JB +vwLzs(Fl_Vi;?ER90_N}k:2 P"LlWv!i0a-+N (&[h (NGC7(P9qCSyXK ٜ/"0:"c0ۈX;peO F܍ѝiTZT1rtD5菂Ot!8AVI2ABOM:bgx%:b{ F7DJ,Q }I` +-kkcL1vګ! f!RU"$0fG*. ]} +$V*l6q\v ȜACQ=lqr=ڤn/:k1. Vydbzx1Bf&A%O:/4+3AK'~_kssADT- ݷNzS0Ft[JLcHF2F-$$|5!&c_M3~EƘ ˤϪFdqk(K+]#?.hQL ,Tl3(e-Hjov7 d<Q9,'TXXE7"t$j$ 4;|Ɛ/=];Xgl+ ym5e{wg,abjoZèWriK$'onCD֨ $txoa8]pY-uD^u+!p =־NMswj%_T\/O,gpuuNAt`r5tjTV+A*tɇ£\ﻘ蜈}Bq$٣5. * 2Uh6>h/1p*8on8T@j]nu:ڦ?*6 ]!e߷u˲I '_tjc.s*ZVxP[:q}VLwqn# +@BAH[̍TVN#;.^>ω Ii:1lڪ>2daT}!c*GL(?kk. :Iȳ9G:K[d8udRË='؟Y5/wc +6'v&R`2&o#ƥ&*JJׇ5YVsŞ1[[{/TCD9ӍSWP( R.\>26XKurgaCH d"TX+D0veRj4JAu?;˒Z7-vj*Z|!-U\zS uyI9~ whZ3;"KdXKx.o8< ulkڊpΏ!zwJlu)+^u+W&DS—VŠk{j6Gg$tB4q%A `a|w4\*v+FM9`Ć NKutTvP qn 1R&SlR/Fw bmuKHh(G\-ۻv+0r9)QM;.}Fpߐ*(@ Sg +zx4v\P׾{QS<}(Z հPۭHa`5-CǪ\$Ly!1%EGԆ5PuoB]?,mP[߱7~^/ٿux蓮-=u~{̦C0'hx1? wtY45{!jStb ' ah#kQQυPlPȵǨ"Dy+݆;jRI; /O=AaZmSӺk"c@L;L5jM 2S'"bi)HVoTJw0`oKI66V-|ӎi &hpaE}3XE|*=<@/ǭjY_5D#ۓK:/ {KrGAŃ!F ^ú\7M`~JFbty2õkd\ ~6; ⚰$L~R^xó60v ƨLDڀ]؈q{,Ǎ+izuLOPy#,K\h"b׷ͪ'ZnsQRZ^ҎC|ѫޔ~8VӋ@:J@Xڹ8z LOtTFbb2f;\1eJ4o>r.v^;uy#s84#b +DVKO~\MBl(ȜqN6aLAiFR3힒02#vmTidq޽j 꾯vPPxiۦڟ+!7JiD`u-awn⧚wԨ3qpNaM.˼XA ?ۮlgX<̀}ZECk1V9/]%y~!~ja2#4zo.`B.F́51pve$HV+{uQI8EUzbyXu6*To, +o* Wf T$ys5)T!;iPiⲮF_H̡ 44[:2k[ nZSD<~kow.LPM77-6Fݍ^ڠ`ޭоGq =Zz Φ|sT)0zQrp]ܚdfh::fャX)Qν;ػ4)? +P[-4k;nۖ}99VH#W` eZĥ_!wL /@iYD]WGr~eU2>ko;fP*I k)x5gc \h?_ _'H)-EVoPcds} a^J٧%RMwm%|jM.]t]fPrN:$!)-;9a~ۛxW4f:fCE7/\I-pX{O髈ҳvP&2F?}XVq1 |5f8꽓1HZf$b%k>"92i}d۬sZO-T$>0Sآ +jFvna:H:&+ǐ1: BP|GN"d4O RjͣfCzk%kW`,`q͊Q"/ LdU9s=cIuq(lA-nĮH ~7.AᦋAÔGp,tL[4%l.z)ݗkNX*4~]|sfj?܅7w{kf;|q>~H!qIek)b"C1t"Cvc7Rr|XnG#WŬt~:.t[ZُEH m5;S}[V5wƫ𙽗#Oyt}<}!',0Lxѷ]gyIqeFK'ѐy_ۘbqz7zNw>, 9/4d矐 ުp1GKWkU{k'" :U{4ьhg 8:VjIR@*9 /r *нHz' ƪ5(*&uVBd/wB(K VG1=2jWǘ\Vũ׼pa\`0c3W/Nl +R%B5q;4j_JBk[c %F  +5Ns (u=z"u&o,0,5:vVl $P' _w{9SqUr%Z'MA†& v ̓]&!%+ +];FoMd-CnpY*OUD%t)V%4FDvQ"cNT.7SSsZB*(%C4,hԖ˶ZPEՃ[S5|,uDQABe01U?j5q.I~4T.^_ ] +SFy!5, "bk^GnF6+g[\ߞ,}Q2ͫZyѫ zE#K#f̓bǦm4>D!DWȩ̉P*6OЮ+_nH!AnYUjK#;UhGp/;dsa +sVNCN$ħlA*cHB~g%ԛS2Ϝ2'QFE! zd+ aU{ڜeLuK~;!١`Cִ6KÇfqWܧOxa`uo09pKF~e^Â2o++ΰ6(fR|"} +x А<^`]~Jcp]fƅ#2؟(,J d$@<cuM5viE&7hQ،Ty; Y?Js1y$a^reG)Jf^i S tP$$^-8:iOg{r'&Y2ݿqý%e.Gr3~ HRPUvγe' 8)r,:PWEP7$P'Arّ2K^*1}ڑxQ;b9BD29hJ a2?뵏W qdaV] w+^ d%QauU)KeAfBy V=5^5eAㄮ Y-0z/Ar2*1CR}ӹK$qdƫRf!@3-f+45J$Ҕ t2~̤} xu#I"7+G5MU~  9} +ڠl\o4? ~P~Vm`aqjMQݖ3^D ԑ|z%&d)Gwۼ5c{iH+?\$vk7j 5Ԗ%mYq A{I/eküpfl*7I"_ܙhb_LJzQwZ!)XVӻ'"sBEEnJH + +E]NT"bT|Ns$2AaZan47aOcO,Dܳ +@yP |'_pUWR4?YuxTMJnp J + +[¾zrEqg%fi5 ";p^\%'՚m:7~$50p{.@1(!]uż̰!N [Q +I}wG/"čt݅:Vtk.BGgQO;>y^!9F}#ۉk[&I*m[}/4yc6vCkXIN;(Z=w (7HR,AOa`"}EOk{Ɛ7ˍn8* Ć;8l tqU81,N(ym +endstream +endobj +851 0 obj +<< /Filter /FlateDecode /Length1 1373 /Length2 27162 /Length3 0 /Length 28127 >> +stream +xڌdݶ5vfȬm۶m۶*J۶m6*mv}~{c8{D+:0sd4L0$$&1Ð8:YrB&mq2vIkHO_@;GN1@ igkC"dghafȍ( l +ؘ8Zd MlV42(Y8{ttnnn6Nvf7 gs16&lno o#oq4@`-IY+ `jamuvw4voj`am`  +g<'#G {g'Z' F']51uv?a GAs!]Tl-\L$ko3`0q72'ɿ Neog0;//'W?W0 c #g-gk61;Zr@utrRT^4 , ;#7O+akjw:v# +%k&&6= o+d"nH_nn kdCL-Zc p6+[da`gIXߔ]Y[ؚ9YlEO|edr+bkdgYX0b+Hc1@Gkk7wF#?J?0#_ _61q71Y[3 +|q9뎈 +gq&]2-6 o\9*Z:Cy2ݗ" $2t xC7zgfolm6ÖqtC_ L,Qm#D`(~B$Lq $sFEvJsQP}z8ЊMJe˗a +EB$̞HBϔlq"dy*K#* %_d%ĢZNX'8i蜛d$&%BtTN31e"7:(>k{GӰYd/ zBnlz<.ZVi~SKn~N;_:׎2Ǩ7ab4HQp= m#] [N7l:tՎK.O9|jt t/h`4wY_%|HBEQ[~@˥ _d.iw|%( ; t!YNC3JBvh Sñ8E?(^ qzAV b-[A#ґ[=y}A?:ykǝ+n']y;&)ӌZnG ͩwBgӹ3$*× Btx,}f&qPo'rk&HCHuS6]){)` UzH^m-OaJm6_B^@]nXLtx&Ӫ?9O'8߇#P!g="dg6PW݈SJn̄LӍ*q+v&gBBSU%SbU42$)SkC +cjSF޿7 GkEQ~0egt T$;s7˶^\@>4xv}W "E$J{#oIu*IBx3RA#Z\'kpE8yR9pXqk0/ߘgi  e󧹁i7@ [(UN9`vY? P-WF㾒1@I`LzR-2~m1W)U+M<Qt"$c;fȨb ++xvp=Rƺm27a{sq!&2YV fsȱS a=Vz4'oݘ4Rpǧ4ANraa.śSӰ ] d3hΨD_mA24.\ (c ݪx4x? ֆ&)gN8ϡRZ+}XY;~3ithE+ LfAh{XU?tT$ȫ:ցG`4A"$=Eh նxZp$>Q7|-j+PŃ1zL3?ΓӪ T qȨĒڅ o(LpǺQy|QO"s0%"B9=#FJlNy^FRv&%tҐND H,~ewi2he8SM`as)ݐ}b8@s3%^e7m + ؋e9|=N/h1tP*;eo~jQ(m R T wWvJS\3 z ;gһk?SΓQ7|hx7*(~i5=R׌O|՝d9~ۿ_p8bOϲuD]k-y 9Bk%b8\Vѩ)f:VS;D\!p?RlilvxbE`8! cMʟE8~; 5JJ{i רa\fr%0|\uOz"9E,y$ë:的h\Ol$(f,yHyYt!/ +('" zrp 쪙ZHڃP.mw!Q),@%M +h!'շk``b,-Åʀ u^IL*sm bTAS/#󽏷#AD {K^1BF(7ZT7mW`#M$u3L0ɯ5PE>6y#&E(hW®5EO̠E]0r&Y,I 돚y&ǝCS,jK$GTݖdY"k_Q#ZBӝx@(kA/ݷ0O?]1%/*DĤnɍ>:-|V3I"8P-U/!%(+}6jZlafՇ8֞U ڶQ]&o+!VƐjO,(G~+S,6CiR@p ]ΆZرUйtKU9qΨhKɼH69P Tax~rAĖ`A۰!$DTs͓6* ,@8<6>DLl2ڔAHp h )ӟ`%ӡ{p>ʶi#~A6:4&aJv%m.Z`ݛU YZ@nw7v=H=)xNv^rtD|+nÑ#zSK G NhT +;H`.",{ +T=R5QQQ\z#}>_Qڱ΋R-N7 j#`̛1B6۪%iV.T?*IAaquؖr4,ubY]=_%ɑ~Ra#V ~i N]+UF䏪W;We>vj9U hcn^R*H$$%aBBفY +N>HzX ] +jak_ N8#t954Ia KЗiHS]saU ̰C4)@YPܙ|X#.\jefǮe:[v|v Gd nƎdd/)ߕ %yCqס>vlЃ9C+]=8D@MVXP*R;I+.^Eh4˗9@).+&fY1me,㛕 x&[V9c'k"[g6<^ wBlBpLUM E Y HЦ(%e}. L HhVT[x`(DIR]rQ|ȁ#6 +cV2}4P90nP?*gV_FXt_dɗؘgoLTXU}3BC➖+휞y3 DҼkllܴ%p<;56Y+4\ (Af̮,RbWĎ:6Rπi:*C-څ*8 {4 #T@ME8 +M=)}Hv1 $3xNPЈs3^aRY2q-7>8 x£Eה6f1Lqd? ڧ +1NRdE+rŒ]J U0`X-胒Yǽrb涋5xj6vZk-V+/u$tmϵ9'`^Ts)+uٛyAՌUM$:Y 5/< oVuUqvzu;,z`h MᐐU"z.Rrt&G!Y #%)RKw1k;.+uD +5M+ d46ɳB> Q6R⁻~onIm{Fn[Q(Q 9!5pԋ4VjqdG[2Сc`W$I3 1{lymdkjX5`2Y (&G;]$*.M巘ydkIEYǐ7}"bi: MdC p@OE((:*QY嚺/tHBdBbGt-zʖSjɭM7&J!\gjTGCkKMJ*'Bj(quү/~u~4ڃ,Q>ur!RH_."BPuQXfȩfLpU&4I6+mCK6Ӵ;4}U7zG*+ D `[icu"8`>TyxppU_|▪^6~ك@j 5$xOz^t\[T* +苸~-Sj8xVWt?<L"0N!F!!:L̪.8t%3(~iCO#$#~Hj9bؠ]w#":*Cq=sxEf1=MJ3롈9˼Q1Qo{S[.c)Np)Sſ1YM(ąT2Qc.lyu;H?bV>~ZCTT\4|v3 tWSI(Mkb~$lx_Ùђ3+]sELS^LI?+oPhec +(^("nhjP:뢧WJdO zk_ϛx+Ę|_E(#;(ssN&҆E`=#:='5Vh|̑6 +MG5xʹFeh{3|%M۵%Y$iy.<m!1EZmC[B:%9d +N"<3 +z\:(#KEi093XmiUٽ1مU 5(c=3w|&- yvyF559+R9'/B`y-Z!g|ߞRϥ Ǣe?|=\/CwL9ȢN:neuFI+u(|C,2|0K"k>84ASi~"Vf?'ĝ~gR*Z +X&,fB*rϫbP 7^KO_4l7^2v)6hϥuw-3L1#>Z3w6Vշ&.d*`BC2O=-bߜ-9@#OKt (_꨹TM3Np61j- CY3 +>E.h +QTjFOqE=`K 0dMdCw9Q9ER]I#E]l\X9!vEeK5dma l@!Na\F'.۲7ە5yr}i]_$V +.:۷%cԒ{= fK= J_1Tg~?Q +z~\һI|oh]D2b7wB<.* Gy1 %9ݨaNWi[9'|4Y|.MGi;& +nlM _-LvKR _Ův `>uf!|ݧonb3{a%x#;ْɉ.-yJJTVaA+J '" tY>b@ ^ ?5 w[ϝV&@otW]s+AvI/1z\.;ykϮ_hD4;LعӺsO8oXm[$ެ @ `)>ߗgHD1K8>37ZD* tjbÀ6m]]ll^%] { \6iE=ΞȧV|[NmxG)}H AccwÄ7OǸABS͡0N`A(3(9 yn|z 3yR0").ȵ%+G)vKw:aV.ً  hCT(x_"w"iNN`l2vP&f{]IX]eF\jVv۹%~{CdE3Q6nKLJg4(uLZb߆ akII>{Pd:LXm ObR^7 ܃eWZ:m`*C(tu'>?~wlf p,Ɠ/*֬RM lo<2&@`j24Y|L"G#;G"duf"v#0햋H%yL +J}oOW4Ϯ%gG`8)鎀 +OSH΢EL"qtvѥM0`z  +n0ZdEerwi9& Br䆁)CH%]QX#%Oh|Ud9=ݿ:|"3*C..rv HE ",'I݋^ҡT&Ҷl'q6מqFb$D=D[M2O0 +ז6( u2T]⹝O'K]aWn +hDM t:n _ s깑 7..Q*B?^C2q%^VB,{U%6ƒ&!SY1M8Q&WC㈠vn._e],R~ MVlղYQ!#kRLnl綖E<&:bjw+c'b'bkB_jS^/ZhٞʡIzȊ +9f6;.mq#k>\qX05bD&qjcD$3 e(4 "p낃BV;̓ܶp#Ԏ#:˾:y5e;pK*e@ص߫4rQw4 ]{ZʣpP  l[ _& + +I]| oͫty d }0sN5PWL>:&hrP8M wX9ȩ*w%8G +NEkFc ώٙ0OP#uՖdnɀ L/=ɢ! +U jx: âi.JOnEX_R [fO]7p\a+XoKRr<{|jPV#8[kT١8 ׺DY ͽkU&^ȤqRJd}9'E2 mZ+چ3l]:5ՁZ'=].zk*r@źLL +@<&DFx&3Y类t yz`ӚT7J:} F},¼`ѩLN!K^smyUbݯCuAǤ8{߮!g`.oK5,A]QfƩV۹?e-wA FP~gG O߈~)%X׭yR8&e֗nY*8]YrVH,HuGJzK%&Ip̻W8҆xSv!4ć<^undߌǓjcוD>ˆh1]q,!5OϜ! 3/v 1Br X̝ɖ_#tu2B'=yEPһx 3Ta6LҖ#H%Jj,ZaX͢PX&&Ef{#0X! ~n`l=@/ٍ "&+KU]hׂ_K%l&)5hUD\L,*aK+4)3'Q:vڭp6g%8i#gJ'؟%m { 1x'Kb;ÈQgzq]Ї/bOlt +;%"F.v/;?~h1<Z< 1<[ liϨ)- +WDyUBsąa"NTYƾ푯OAWj\صQ?7l%'7A\lAD_.-x,L*t'`:iLs}&ȟOF7[^*,JNxpxj<\:_?CgsW 54U(Tߪ,DrleD9j˚i>3&8!Fn/&6ЦѼk|z}gу!(~ IqlJ>gr̳šc,>w2㦱oyYm[0>cZ-)*N4L_8Q1c,@2@ͦó/%c8q5{-Q;v6{(); d߱jbD6|q:ӱ.M϶tA^8H1iKS?(M{nXሮ;aav~d\Bx -yS߿&gkiXyteE;" +2yq-IU++ˁ^R!]q7faFgˆVn}K4"XN#O.Mx Y BJGu`Gdx*cܺG +_ڌV_zIsG$Wl43$bs/ Vch砘>! [kXdFXՔ>GVAxl&/;h?9Cu\bAVW^ vfuuλ>״),\]旼z4!6U/nxnGte૊g]-6hleFlɯ*e3NǪ.uIUb56;Db9 %ڤ?Xe:seg$Ab +6n\_ Sm*v?IFJS22t@{gu_Ui!+}v㫔D5[ 7Z̚c+9ov3W u0=gd2(a EWŴyw;J}韇h}HnoWd {i2L0{jNhT.b̀1o˒]kNoȫ|]RV#.4#ig2)mXgLr.Dxb'*(TR5k` +x<ݴZl|٫O_A6 +Ys&|Ϭ8:~wUH6U"7AN!qctd۬ +Ҷvkŵ9Qq]}hk%ɂe<|RQתQ_}2Rl +Xt7藺9|x{Jh0Dϕ1Y0OBo f8ez:{pBhH ҊkKh5<`o" *5+Ie v1PI-yz4bpӀ9hr=M3e? `H|T{%Xk(P_U醔nּN&H#[K?wVkG}ilգ1mԛCT,]h5?gڅuA3e "wBt +zS +m2{#Ao٩}(CXI#v3=-:ފW*r5ި$PE;™Ԗ FlW/a=G5}ZĆ}wa$WQ1b=u%Ν')K)$_2Τ` ME y64҅"=nPSjdڠ&+YU*?'qi0!JO_'[O ܒm`)J:=H9tW>&̼rG"'~x0 +ty}"]:ù3dP~-8dSj6}ax0(X0đ;3s2Ψ |K; U p_ #Y M"88J?|+W{}`LQ f:1כ{bɍSqT&?eiqw}C&y&ylS5o$ydWs:қ.\US <|gν(Q{ŠvUD]zAig{+Eyw/Z/1\􍸢Y rkktJ@o~+eS1]K\O͍[7r:ؾی._az Ρ$QΛɡٯϲ28JN9ږ7[ _3u `aNSHl/ĽsȨ ǚ`B\_8,$T^DT˜oA2~}m'F["w2E7YQ578dOa* }x8N~  KRuKV9}).kK~GQAc4$./pAaME+xad%Vn(V?@oż`j7=RS\UH/hbz>Ql]L,p!z39y,PdJ W^᛺pdx5L{ll׌ ~1א#T=³fwhIG +w^L+u +nK]?6"8XP ##$M{Cy['7ƨ#M2N2$ yT%2ٲ~ub̶ΠV򱓎ThƤʿDJw Ai-vqO&xlBѹh3K y~F>&_;< lxP]9We,]/A޹ ڱƜjxW$pC^02ܶ)Da`9S6w7Fic?R +ԩ +xFcTd.M*Y-E2}pJ%11{%UOz@|#uX\W<ۮcWqO;~3{HzHpXT #̿3&4fYZFڏݲm۪"󛷓:.I|!i"rQէ Y$F4Q4Ғ*JErWl-C4j%: +|Li"yG<2 nI]HPL ; i5nrX:a?Hr)i8ql}t'F˘$ nhcmPUmvěU6KK`OYjna"~|ܵ"7́G+i*}e)nS*IX4ԗv/#bslCL

ڙ3VYSj͕gYq5mZ<@Zޟ +c +>;EMV{~nRms:+H}I6yɤ'%hV6!_7I|]pXQ4 aav7 Cno>];hqߡ-&,feqfYo}Wt,U31H"9qKYIցLpV"VKBdk|4<ͧNw?)(FN5+SA[DoDÌao_c*^2Өy2 {b>7`۴a) n|Ǡ p=gtUG^7Z{}@t +º-(5EҺEwR\Ϩx:MšC?Rm +V~[)Դ fM4o4P9dPYHw8-J:d+ -\Т%]KيWt&>޲?\F$x:D[3QbeB< FݒC_n%hV+6if1 +_xZNڡ4wI9ρ*V=\~ Np.-T{΂:{ z­Jvf%RVxhFJjQ"dH6 +XJ݃^+b:GŀYK:d;P \Sn#8^Tޓ qVma8W~{ za=٠:NH|tr"-}˙FHS~ަ׳WmJE0С +zBt50?hZ ID(8]]Ƅw22`(\^}M@-yYֳ z +Ui +0;W^8d)2ҙЂ!Ӷl~!Ԗtfbá^$PV1*I* ɬ}]dH}*/wU'z^S9F2g0JĪj@obH0Vl 2qZ?h{c]YpF=KrCVCJpzo.")![TU(ԯWz}Ib\]2dZs+a/f}rb|*,Ѳqb]Հ%< [R|v;֝elY ۙS^7i׵Ģ&h~)*2MU_uP8e?ei1~IA;׋4*Ry#³8pCG]ŴnJZ'!Q$. h dl4$ʼ|dwueqC.@2P̞b &.- UDxdF\´aK̓HMZV{ em؇WyOZg{R9!TUwXl ؊+,fLk`&/|Flgh{"Q + kUUF0C;ґAIZvc3>W[q.U 8@W>9~TfDq:hn]*ph9OaT͛Ը`(Ԫ^>ih){荕"cg=Oc[j-UD+T.O5A@U Wv*2tBqH܃-gU"K6`~W_ s&ٮVL,c;]aNȉXYj޳,Hp|LJ œ(}Ā,5oЧxlQ[Aރ*j_`.GZ@PF1!eG͉q' Oۈ)sie=F=IfDlSn.f4|Bv͏;{gТGFwcs#`wB3NUO DZa\R/}bjSŲi=Χ0d=aEgy~Aoa/oƴnN[8''S<5PrBaR3t?! kb0*,&;N#OjweY nL KBrV#BTnIk $8t..hюAI-(7EB%ʆhK{y'vM~Grw̲ς)UB&Z}#p,Et!`+AULhc0LcG$.X?qi! , +m|%lҥ |wԧ'Ͱ}(Cgoa-(b =ƄMji{ mGSp,;弱2&rJ>+&{V]44N_md4"7 +C^6+sC(Jy*3_u,7I" +Хh躿_1 . +obA-űFLF! MLNEtb-2_4Jƶ,n*ة,KHk)`?^WeAn|hMׂ;~:E&EzBl{3)1cy{7COaGbl\=#hFmQ ]r nΠ ;3GbKI= &DzaK[E%'9oeu 8WhEËRV߱GG U#1ًm` YO50>Lݎ}1cxeⲦaZpu*%}tH-SF7CpPiMH Z7Y =)]aۯ%#C$kSG .dt1^IȫJ{$-0Ka)wd!(d׹"vUD@㊅+~4=3~ᯚ0/[Y΄Þ |9M>Xϖdӑ[&Z0Rf<qAsįtSXPհ]$GLO9{/+-Cr߃ SK$GqGye{=u$iuC{ϒ&k>J)p2U~Jj1`o7ݤ{5kKm8&`˜twз֜S'D?ꓻd[S5L1Di/;?Ï*ǫ-R%e>:X13F0>uȦ h|#]t`.fO`02w`8DOJIn0AhdocŢ{lf0HL p;x c;VeIHv +)ssM6ckIb h*s썯ecp)%ΘQ7'0pk>Frndh̬Ψ=DHmwĔ܍kyɝM1aTUjMk*8h-vY(sqL)Irڞb萀/.|g~ tP}c6tҾb5S(-jj): +uС/ *I )d{ff + JܕvzA@ABb>:SC $rڽ8ai{Zw WPgZo5cO&r[mR,p)%ߵ 폦f="3rxKu?6\3^,k\)Q`ɺ 7yScAAn/R8A3LF veuIyIB3hȁQ 9E +4=1$Ev4!d~:.ϞF6HXmurme MFrfZcׅ)TW8( +cю-ۛӌO?nVc|R3"Mpv+ ?_vH_ 8)nV!|m17#WcRDص\#}'Bq]\3-md32&+S`>AwS4^'%F%x2@ (_:y&l9֛W#`彿9|il{ &iͲ/z"yTH- ^ȑu p'>)x) +X(;N8Ǟ'=~9"qHQct{ v "KF=oy0:vx9f)I@&R't.%m ![=+v3A[V!3;yu. 0.9PGkk볻Չ??"EgC1\kי̺`l6B0#WT(RyT`y~ H>%s]@RotG {]7*):>,FKSAIǽex՛0o)i,GDy?ʃ:uOxWˎ?p^'T $Up33L'/tT<[. Ѽ,o(eW5iݓEW>'T~<̳~̕1P%YG7`ܮ`DʰT}׋3-4Qjfg=ƨ=!ud lCUނU\Ŧ F8D9ӡc쥳 3}e_K LN-)h9}p/D&$H8 v e4@HXRNO[g]4-$&Tb5J6OI<-ڑW=ō3NDLE%?}Jˠ1oM.1~gW4*|©׏O_Sf-yzJO8܆$_mE:9k]~%h.yD[Lkb\BM6ZH+B<^q +2kj/C +9-Qe{z"Wd{JgqHD*`>jӞ;=Iϰ]flC6-7.q{ȞDwd,0??=MSI2"-79,!}Gyz_ef7}GC1wCQ9P+g3rFiVDhsI}5Q= +Ye9D^$ 2e)iXDuÃvCK?3,#ZuAip0ɯ' +Oml} - @37#_]O&79)/`!y.`7KPwjA>B hs.IP=8Ko0Jh5= Z'j{Y0&AU< W|b9iSFBIh$ZQ3~WT^'m)>XHV ]zwYb!؍9-D@לMV$e8W l(ZBJI!srqL&?prS'w䧨7K'2ehj&OaxŀZÕqM2z[i2>5Hą*_))o+·!,H|^/i4!߷\K_ "1N~OLd5zI8]S{lyuٽLK[ ] +`hMG}sxHU_lJDƁ-܍ϗ> +stream +xڌT߲=i݃CB&{{ofkuSNթ)IUE-̀R 7FV&>7 Ro+]nmnA9w{+;_.| S "@tEwtvv{-4V^^nD.6 5uGsJA#`db%Dq].@ ?Lj amo `oc pY]*;+ہ2wD`SssG'S +`ic(K)0y1LA8ڻ:7075MRӿ;Ws'7W&W:d'KY;:8An'a4{ﵥ ,ܝ5A6@Yx5! +daae@/skkx;c[o @K_WS `ac0Zـ'_3w%+ rd?.oHL `dd.wST? KGᅨ{KUOaw.%ǿh,,X_!o'z?PS{?]"DeLj@deߗh*ePq37UmG`6 ?#+ U߇_> >Rdh8..Gw e+C ׿ `f9 m`ItTZƷwZ'ã7@ %*FoOػ\Ari$W.k$Bk$w$R(#_lbiHm* +81FJ-R䌪[ f C|pq ~ߴGYW.@*{v>bH=4ʞ2B +~(YԎJOX%'!,8I79׫iKbKT@F̠No 4O;rMH~_$m4>֫ڼwrJ%?czO<| RAҺ8YIwKn7\7i֤uӢ}h+d;2YԘy]+b=A!S _O۝8kpsgk^Y unΎ]:7lN%۱B +F'me6,̧v,y"F챖 ʠșf\L-9.˚5Akѵ)>>$G%M,`;j9`1K1JA%Sk̟ ԾC"3:.zILIOSi(*zٴqi{ ݧٺ:wķć RK`c3ٌnPpcWLPH +A_Q$vh .m(ͽz[7:i_mFp" :;in.;(&>Xw9vwIY`t2\Fy7X^*fV;Nhr:d W51VyI_(g$ӱ;vfgWbzyo , n{%B4'suJ֞BpS`C v|A\[E % +MU;Q;E(KLQzA-#y{HDod!ϧν7 Q}?L3%2.eѳ$z.' -G^.Vorkޓ1/2`#<)g_Ӣ_tՁX\Љn$GD+IYd b&D6<9p3ك"_pp:-")/߀=@^Z\Zq[5`Ĭ=Ө'$(cM<-0ԓ-Lu~a4'<6B-gˊB۹%R"y+$haP2қ,?і~twt {7u:-;*pU`Sz{5Գ~{h%G!Rw'7OAo;:|,M" +K)T&T1hFC,Я6tC@В8B|@xor!Chz]α +Ηc6_ٞ!)o裐F9捳PQ6~gዒ2(z+qGg&I?qj6SFX'l#tkMa5*UBo-B6锣U,LWw5zJ^is+|Ӵ쳓9q =g1 >#D `24nuπo}ղp0(c(艟AFIL£-'wol 9|~kncE~nݚ:O?CD~85BnRh."-o>-6HcތĊ*~"D+K(NYEsqSFWBm:L":.r}RAσz[!{]J/m +QܦӜcfO 8Ѧ.*έmĘ+|U.Xwh2]r?KdK}^*~%k,>щvNWx93Euڔ)Q9etj4g!O:zn X76P>YT232QF5^ ;,# zIL#DEa7R͊:E t༟dκn:S r]o]CѸY#qQ73cUpd`i.udPm18ʛ~sR`ãܢ$io@ +&n5Jda}ѣ)?7/,4N /qqs%R0,?lS+y4p+(3'Ј >ڸl6eU|@lB#6,drҙ^XsD`Zi^ xs 4!Nڙ䑻fV#Ȥ+";CL/B!7wNy0Dcr#D2Hϓi:j.}jbq:<1Y-d.b WE‡iYPceϚYL)⭷j.!=F8>y֙8R/";;6T|ǽyRKlk85ItEG^q]07Glt;m Vne2MgZk(ɺo[gi9o _*>9\&_dM{43SW%/VfLg\ٍxu?@OKs1ᔔ| Ygx}FlBG##"/*fB4śumT% Wa%Q͒ש'Msf +ZӑE1Jl 6%\@1yvmUP>X́plg qV75r/L$EWu`<HՌ3k bZ)P2뺸Nr[NrŮHP-tꪺ +b*!2PbêV^B3}uDf(o=dBv| I'^lrw9=34h:*pd.˻Bɠ(Km+"&b P .+.qCXh^_!?MQ|g47+4K]t$s Cev“U(]L8W}__ ,#D^'^^e#N͑.:ckuMt]xmLsH57w&Kڊ +$ ?Zۆ޼a%wTJ +E`jW4Ӊ8?~a_!}ѹ^'jz0V%CL_!~ d(9Hv#^b~u4_|e2|}PLCՔbcv ;_ h"oʁ_) +]{:wsŴ` ?eH^M|/zJH +<%%*_L*yLΣgE|bJ&IQ,i+%kV2)C\ʽf2SwYbs"7r赔nl6ZNjnZLr r_ +!P뷠uy4&R.S'%]6Nzk 7 +&z:3=Z|[tpQ)$Ao^l)֠> +U9PAZaF-n+2Ϛtr;ՆL1 %f-/QЭ7U˖=1і#|(*|7*ʟPz5KgH&o;*ԇKRJp;rd +HUԔ4nĪ3EA,\]I؛!l=#;t cg3 ߻lLRxVnXYiͳ+oTUF}_%VYW$R۰{n+wMb%4.Oj~spmʽ&Vƭ [HB*tp,e}\S= ]4;)`􃍹a7Jzzڞs6_k MVREHr,VקḟL<~wޔgS"3Uݾ&b{Bg+~/R׬ey4*fiJPD*>erh8v3{nZ?E"Y 7l`wQ03n ˍ#,SRܖU}{=ɥJuWRMFM, N(2_o߉ݫ/`Gzrxp$Г~t4ah37xl!bo~ i3qC]85"6 a@s-4}r/*(n^Gl/i( t\ +3 > +\BY+el]Rlk]S`>qoPhElQwRv)sϔeSwĐh'Xl6Rt䃘[zd|^P [+C|Ju q_Fp,][h'cAQ,32=i7/p*w_7fmqI~C̸DR{n`{GIYG^nSqjY +L,o X@h^sjIԉI30P[xї:N$ 4X !Tk+:<}Ƕ9[2:,7P͍b hYi|?_]$? 8hG^W@A:mEe\sj8<%s ia=wӎ~AQ'n>Vg Wdׯ,uc)ZVƑA)j(8Sua(SOR}xTzV*h?5 b*T`5nY{)iM^@*8e4NE9ȴxW3͸b,Fe6M {Fh* (}%|%j}f)F!smx&Bh4(cvE\`]Imh݋opᘳ!SS2@k׬a.hʊ*c8XD9ئDnNg]܎H k =jyڗ-Q+g^9]ݺ l#stJ-21ð1N+[*sq72jp .@u0qU]knf;Nt9^܉gۮ;@[NbfOq9swjLv^[)[jxk[r;p)Kw%j5PR\+0Ѣl#.]&a"&5GQW%"ȹCÛ4,Wi?Rfܸ$ku +37,n>nxȅSw/Йj.KeyO_$)婕g &𣞅]-8+DTz)}S?&$Ȏm~~,RM_1/Zԫ͊А}]-GYY+uRCŸˑ.aAZ-Wpl{.QJr`phJ >CV+>GX.7~I9T)og Mk!'P-;7"MSCIO80cl+mRx2K2Kw.i藓@ ۊ%K ^bF}s@(.^S iB:՚"BWY4xbquIB^Kd}dgs){/4{nOVoAsƨ0̄] +e &.V'MYe۠yZjN2JϥOCn }v|z z Bp͏sʩguJP%>mZRPT&0ւQpGDA"4DiH~[~vU/+]}x'WJhBe//U +ESymfB%liOxY2 dƒ^*V1V"j‹b)CsCP)$#i+vˏAn@нkQ-Cg<~)۬ Lpj!HxAZ5ZbJժ<+!q"FeҶQSu!0ή$5̈ I.!h#4w 8O*Bqd|"Z`XB6Op-ZIWtV.NJo\0wt?l/bTQ '}8ӽGYTZ_x +vX 0^^ǡ}2 +'nlA {O1O +Oc>{-<3I3?L:wVv!DeXuze5@˲v20QQhTaÏ>dt|ֵ3H\M(lr0rzL);wBO~%oi\1y3F,3,_4GNQU`IbGW3\uH!@sU IQ6 }z;)oBXK)0¸2QI ^ylVQRjf%+!ms91)4>Ud3<h"OoڳKeH~̤:ȈFW xLW9&7=s@1C?gaV)#)7XEBũxCn,/خȊ_1EU2Ԕ@cWg@͔ߟIyHvr : ڷykwZ8~D|ޯjL6inb,*C'ayAՒZO!u;cF#ן?V Xج 9;~G@yK9)WO؜*q8~rf"Q3";omjR5)]Ԙh<Xx;'=Č. [6$E1`si_6/bTsC-:+x'@N+ZVLrH(5[,n$N GˑW&6gEE.){MU>Ԙ(|7teR+p$2\d<3 4?GV7zyQ ԔN/ħuW|lKґH8H_o"/£NC@8~{Z8[AYۭoB??BTfp9SdX:MS3вsz~/^,[#lBR&ew}H6SN"Yu:F2}:CKg<7Knbz}~>C#(SxpR'NqfYKbP(`[cW{:3:+A\OYk!=0:,H3i!˒Ÿ=7=,!HE|_m ua@HlD`nGM~bjP߃>w/Mԍyeor25PB嚅tKyPNwEFwBJ:>0e%r !)SH8K ?c\>q79CZhyahqD) +vlr.A^IxGf .E=rgޢA˶cG Cr M,DP+_"ʍ^xrFr#1mByX)^ {u=P?3GnC4JMli\1OB7Ec0? +E- ɓx?q2&`'΁-gĈSH4E`[aP 'tfLSȦߒZt30K~g'6wޜZ=jXTL[E^:jک;Ϸ3~kcY5AI%a%K8XEUPn \԰U4a +Ǵ6U Hl3y\FǾեS.*^XiYQ!n?^w+փan*&JM! E¿G+#0,J\-]VWSxW2tO/eDD> +mz `z(|͂x>))W*]Uq6hKCɠ)k:M%J\詫XJNOD Ao^v EӀJ>oN +[KNh*Ds4YIdDXtLbi`gc(Ue -P$)@ɥ>Ϊ|{9}d)b1}hGUC -qG$j'j+|7~ï}.bU}d/?(9[l&'b\*Ǔ*/CzVU]3!#_T>em072?0{SR PNնh믊5+o}-u>MwT' /|i%CXP_Vھ@=-T= Xd>$<4y^{3/m&>dԖ<&r14˨P :VTkhw> J@½LԪڢNމ-W#k #LiHX *Mc/fkw +mUbҦ^ۨ$ncapt:8EJ:U6~Rrbu6S/H6$Ĝ: ,[m:!oc]PG*&J%SH=2Ÿ nc"=X-2³# zѤc "w08hc$iu)c[:4\ //(Ga7-ӸtƯ=-Z k>løY?0d235j`l"7Ba:pgñk}ߨዬ@4Q>i'Gw3K?FX(C~ jj  o71;kY^ǧoKux~=][Xm&AeXZs5_17[ \+`x}q8_p9v3m=\Dn_;YGu0LVla1l'^"x2f!^!>!CوQ*^G\ꆊsqi:Ξ'Zw dÈl+8o+]: {vJe\GǨw2o9Tt7='Tr^4Ϭ_g]sV268 18댂E(Lu9n&p@L;&OIUIAu~FhG1w5w Jd +訡؋4k`F1!$;JMC Xj\£UWQVS]rŽgGOQ~>㞈 %?/g.@¾d 葬ZOlRǬ@:N .Z(` =6PD?m 5GQ[%Q +ǧQ#G#ι<+w;>d!=t=A> T;)r#C*04mm^QJ{zyѺo%P\֡AӒ??Hpg\iW& +Q(X."z`=ǭq  Y`r|FkmIԐ($y/TF(Y'E~jڎB%9 oܼ|9IUeͻMѥ<5a(w.aoBG]r Er#_'E+ײzKmr?w)vP%!p?ZPQ4wcwٍffxe/꯶ÍPm|c Tn5ݫjLgn)0#ϕtނȉBYt3{O"gT$J/Ӏt+X]XU䌰J6hvKLh)ۡj/~Jayp,`cYοD'!e T(X߉ t'T}0Roι02ãوɎԼLM=2]%/WgWfGz)rD6,v;(f_hVo?'i~J. SNFV.q{ 0IbdbK0B0b[n,wkXe4ܫ&ڨ~hr)ŒW1l汙Ĥ·)pk&۫(*uFkBɐl U'pzSkD +n͛HMp59"_ԥA0ļ5 jJ.Ўd-bHwLRVG9ݡvӒ83K5QJU'^ +Tl-Hpp> N&PXpz($4ѭ"R= 2`ustL h6YpP9)zXWA.;D hA"%!RqD6S 70%-H#w',?6e˗)1q +Pnqؤ{CTsn)*W%Ho_XH ׍LlLjKߟ>MC^doF>9;J|Xې|uCKQ;2^tZpE05mj,VB\ zX@Nv*N| rx{- {7)S>$ b aYN ;EuvMgxikE"ue^SvR;(HˊWRM Pcdub )qm 毪sIAHRN]G!"R n TL(.*R"LRb[ExvytKveQ_e鴴 gL5 +&I4b8NƬ~O%&9$z!ؘrKjwJv".9p"]bfC|0sMPUH?11N:cV@y8rb|5!a%%fjXW4nX(Z@&h2V³qK]ni>MҴX|' d#Gsw> ᨸt7HU(A=r6HTC0. lSY_-)'7S?aBhgX0܊]!nB=Hg1Ki<: vgʌ*1dkcykTߟMƘ +p<"-b򃃜4E3@z~%uKoyg+z| Nk.2|R&Pxbf5X3zlvrj_ўsͅ70M_]d`lė]bk]UfFr +eG(ambH/4v#z@:(m:cA  zr-3V q{=/sU ;^c"s-~YNDOti$v$9XAYz"2&~2PJn)7}! .zv,@IF%Nk )G<#?)7koI ep OpvU[ZTdUk<IJHR}dQC{]UYw )?zyWҞa<ߎ2^3s`XMSZC/p/Yϝ% $u6*9 Q0Z rWod,~^6!G.SS*+ah4JPYCJ)P,[x{h=HΒ,wdrrjp8;'^Kmڥo_푲W>/ 'ʲ{ +qf.q~jRji_pV!Npr F+5|^ȸvv/|0hb\`z=(H>k^_ L g.F}JMƁi4`_imr՟ߴK4wPߡLnG Km*V]֛t(Q^at'Xüd6Ȍ';Lay2ܽ1kc%l3~ZCZ9,dT)rR[wiAޙ6Ji*ev֑lwJP;FxaBNՊ ۋiUmRRLQ"+Yn +eWO[[#~LqiHDЖ?,iG6"ڂ ćw,RR-hN+!g.2 0ҡ3֥? d,o{8 [7D}`F,PɸW.Y茵W V}hr +U $uDj'۳O\ftv`u%P ^f8M_+.] Ee*XYe~0wOaL-~fm\KxpC+MW87^@H`4[&e2s|a:OQ,.L yu,;`y$OfdOp}%jm")kQerKx2$*5&}nXP W_a TvQYꚿat[(qR$+#+ D%oؚ`]H1 rcmܫt@dgFAiG@KcXs-/i`5hblg[,o%\۲0V|\!Cz{w]Vv{ߩŇm8WۣhV1#[HɶGYP<MB㢁tdp#1n':ijYzl끞pܣ` p Q{ ^v# Gizܹ7璇F&?u&tsR"2x%Ao3#O ~ͲG or/JE?OTWJ)[ky֎H91~̤g5+͏R/= +}bU@X'2LPPY|у.ΰ+!vZ Rs .py:#enē@jfIdփ uDK m͐N |ظMuૢ0{>ixwtPjӪ.ſI(™MNWa(;Sc`!DǦ.?_P[ b1 PWˬWN)aD}k +Ǣ{Gzd霢\7cpV igT}YOn,#v#3.Uו^OUR/{V;ǰ\]S-ԕitM4ĒSDU7 +z0*GGMh@7)oy=ek|(@-+2$;0icl<pı\1q2<w\g媱 iu[nK2|16mM>Cv9a8#<6"ψ,2v\*o$ž% {4 }&AƵ#˛;y3szUt_K5#7/+?Ep`*mBzB69-N 98%rڔ`v>J^dIĵM«0J6I#x'`!aZgnp6 +O ~Hm/DR?tI݌UV;yI٪}v~{5t|+bo~kaN ǡHݭJv39&p`YȖ/"څ^l"8 q= VDisD,#7Gᬼ{ 2NwՎv~;Ϛac`JF:}Fv-؆oQD@4lV-'xW_{hjIsXfS_]e/B,l+#c| $Ji]hD tjf$HC12I'բ)Ub#QorbNkk^sLs|3W|~IBSEO-=ռe)o[ .HN.q,1iҷo{le5Kn) +5#mF+`2CD&Kn6˹ SdA,FEGjkvob5ޝvt6a/oHP'r<!g [nFs'=+5\pa=[YlФ +RUxT$\̓E,ɸ_LS 3djӹ*8hdbV,&īx],sf +QcRԱ%&ClLI.tb%yOF; " rRiX@/;PM?4!z$uwd"{^G]8P_G=q/*c[m\f Ed0XD7(DD֝V7[h@bs.reM; kBݽު ѸGZct?&!h"Y$. +yW(1=+<.엷AT=S@s_mgL6HQJ`by#٢01>4V9}lZ*t|ʎtGbvePwZ9pt=Z7@ujY ~4QbWu=YQ3+LY%ҍFS>&>n EǺҵ]+2 WפDJ+2p$^%o[zX><:9ֽ۫m6bJJÖH#<}ơeX5Ff2?o̬4}[3G(1*ҵm-e{46xXi|'o? I +K~Q1~&@~>L`f-/8:rnP&#󊵖 b3/H&Ȕ!]ͧ|\!CRA*W2`YV6yXQH~H2V)Z6rlɴ +vq]ttJ Ȟ ȟ9C< g3 BO9u8!"r]V/@l#(83 (\k,b݉ ɪ:kS[ӋE VjqL @ a}CvaF6$°^go(BxwG'DX~H9|W3 _;HӂwÌADM[Ra y)P:\t^, Av+JЖmdM!l$F!tKy:̓TPg76TKx mkt/+d3&2Go|#x h]>ɼѭ +ˊ_>2t-% ?X5TSM:*\J-kS6i2fX ?>H7szZ(Gx}AZ}~,nc.C>A(ۻ=BOq]qd<<^>]അ+ۛ1,O̎vvQ`Z$$힀~ +=4q#ꡒI.*Ts7c`*P\[ލ +N-0CrZX#fAHi)S]5{'d3'H6=>W6&N]*ʕw6^Z~ffϷ>@`^Ra޼stEm&ױ_iX|chڌMi4gCx&8'Єөj0`rV0_>u +E_/R7(brP$͘<-!`gPH&箘 FqX`caJGrޭ4-t~ kFL{ْ¶}JhgJ܂ .!JMA#jK[+q78A}o\߮ >ȁ v0S,YB:ޘjQ?9È%n0 ͢y%93R 9.3AtEl  "uO'Tol[Zʜxp} +R+1 2(L-wDTpND/WgoaއTRwf".Qڈs #;B6tEK_pEORN0#Kk79 Ϯq{~V j0m2Hwk] |PT}VZ瞶+r7q_O-gE!; C."< K$hp̪HP g mJ}O1ЯFjGq_Fn ;i$,ɑ.z_?c=Vݜf)Ic4G@Tؐ4R;[lؘ +\(٨1Jfxwxk>\r +-JAV_?rֺr|2Y +K?NQX:zښ򁟶L6}C|Opr[ʧnhC?ѳkP]:^ SĄ'WQwG(~Sk%ѧ7s'Gmn (ͧp HF) pNH.h @?bmsLF5]oVn$N:[! ѭ6DI.?)1 +~yzjýÌu +>zn?_wÒv]ɔX03).A%hT7Ӎi73u $~̫/>Fo{IMkd" c-ux[ivC,kçk>,Py%M}[eΌ~}\ ^w/iY]HIK9nv*mB0PP[6`xP`$XB%k_yQz'`L<ۃJrź0:^ [.P֪9[Fݞva1GD +#?$X9(qO?H-B[a C_\bŎZZhMG{0M3,jB:6[ue;dіxY3̤}} 8 ضVҍn`Gn]Կ{0@Ci\o`.ד"P@<9K*Ucemx +wSW"(Nv]78 +kˠ y! fKŲA*]{hi%nMfG򙛝ө}OCX"#g+.P"U}>Ńå∨7\l<\q mPsAr4XwڬaO"Mg>,Mw*a@F{ȮAh9|zԓ,&tfE>p̎p&ʼn͆|.K_\8NaGA/\sơm8T#I%xJN]U5SNyC Wӈ +endstream +endobj +853 0 obj +<< /Filter /FlateDecode /Length1 1374 /Length2 27197 /Length3 0 /Length 28158 >> +stream +xڌpeߺ=tGc;ٱm۶m۶c۶:ӱѱwιUk}ӜcZ@^FD֙($#gg([8[@jdagC98 8%] L@VN6Nzz #==9@ZDoQ88ب1q028hd` T30q_)ȹ͝9h lhx)n@E'GWc?#e lL3-lno o#ou4P` -O?,ll`ddgco`aak46ʉJ:;S lX;7p560 W@Q 3?36 ؘ:;O{pllkejakl.t*.&5ff dgc8M܍)ao/'?3xMacajdjtvt1; +40rY'_E~ @~}av) I Qgv + +ڹhX4, @&  =7O+akjwZv# +%k&@!6= o+d$ّ?6Ae_Ղ U3tO_-ؚY6Z8Z[8.#4k [y;'-@zW]FVN9/_"FvhhK%F_9@:Z[;!M(Wn#Gǿ +-_lgl\'Fs8ͳ@rNA +BQ3pQ elac_뼽 2#I]/Aq:k\q2jv^I<v8g!1ߓˇ +GR41*$K4PH H3_ T?1L^;/KUʌN}Ę&HOR%WJ$CJLqu[y  z׸+&fv'0N*BHNr{0J? +N +=hQՋ?~Y9_3e &Z?Ab'kJOXe4w2p|33#f uq< 3Fd0`N6ˊm}}iOʩxLUǾnA +ߖ ;whP;* +]1*35#YW3Y~/f6.Py#uӃR9jT%ErԢ –I.|/}g)6µ6B  ж  H>;yä0r[]K +ZߜͪL>N~oZNWl$>Y4\,clhm79`w㎣1ZæySs#^$CR+cM3M~s +Z&MmjMƢںT}s-eA[KSy#jBKA)sQ1x|0$5OD(0S0= Nc[mGپ^R \cY # 7I;BƢם)ߝO<e;zg>PEռG)-鐇PI]&T$1._01ay}kt+GAUxk}|)8SriPf m2-^@SjV^NnNkڜC٘ev`'$p驓}N$*=F7fY.RgdA&Qʂ|iE^]QGHkz_l[iűc{LixkYm~xyD4`( xP7|b!9T=) +1b5-{l \@=v*o`%R!~>Y9. ĕG`3]W wj)js mw߼1 '0Y6k|JQD}&]x%2dHr +G3@O9bn6  +غKg)\度y |/6%|JqC+x:6]HʬWH,@|:=dFyyS +0H`R35do: f w,(~E$(öOCV2I" x֖iBT_VZv4+}fa K!_sE +јi@=F:P*imϨRf'-d3eLL1;Q +uWܦ E3&eiereeLc/(e]HdN+'/>iWj4pAA.*e*Y$]lRi2vˍuQlVF iX Ix}sPX7?07yI;%=+KBhqWNcEE>GɠRvɂ[\]Jޙ.ҕAL&!LOLm 9V8-P2F&YOrՉ;5w[m /Dkj9&Jb[&`4dWߒj n5bJMA[8TV@_3aGsHyc#)؂o{*@:`އ(Q O^@~M9qoދ'ew Y $^jzިkk 9ɖ1g=J載Mn'-ѴoP( ;=AgįD&P}|ɉ +i/^SV/\j2 Iv[Q&w3(eLV1ڹ.Ƽ7\Oz=;Q #٬Ũg|[W% +̔D/s^WP{5-ڢ*p{/$ZfM!HBЋVB W$8$-jK?@Pd2$4$U~i)-bE@S?]0Ϝ<ܟlܵqt}Ȓ%M̃2w{,lʅ H +aS2=W4]y]wl<%;m*_hz+@KFwɪG[|ujj.b.lQgB2z)4r rwGVg691EnPH"n3h/e?- q˺Qz 7c@Eo=nnH4#% TW6ۖ2"06eb*`Mpϲ!e_R:CE|Wd+V}^i|t$FM]`A((.zg -0&E./T_̈́МРJՕ5*nr"+o~׷DfQ +jǟGd2j sBM:} Kj8Vu[̓-Ƒr#{Fȿ#HI$E@K|JQU`zxǖk=g)v{ 9z s{bu+YxW"D$~&ӵ:֐z@+ӽ"0`wA#5}GiSXM(WJ(GH@@uY'Rx2e6o{:JGȺ9L3(Q,uRpUםEizuxP8OڋwltLJi# 1c\0}X2!]]EHȑRIB92]Yp@(wc<bƻ=Y-]i͠A,Rh+ |)M&`Y}wq + wi=Tʽwo^`z]k(;vן;GkVUЂ|pSLLQ+7B~kmyB^i|vRυiŤ_.c -i!#Ù6zk DB#vJ +=H]A +4H5B jr>a&$Lֈ*oqF)E%ʛ+AMdZ4Wb_5>%_KLvU%Pf*ro^q~?x"~ ]kV "-(Ce0mOI~_r0o=+'ޘpOs0%Q0Bg}!N+ t P1>8W2mmv$#9ʢleMxxӕx3n)e%h ڲIܷGÓUv7Vci`4DTg+n d;fZqDC@H`&H[C†HGZذ ,*t3t:G "xPI&|"`Ye}Zy2!hRؑVn+A|e1BXm 1que8;n5{Δ@N%]vrOqŒ +O!mh)e?2hpQ8Cw[rzP0Y⨭\̊q~!s'gP#m\? BK5|j[UShq[4vIwR뜶 ̳$? +LzCfDND{zŃ`5oά=R8IkpHϟ.#3$`<ɮt۶o'b^$1N8 =0;:'/rLx9:ˁY3RS̪Jy]SF̷yPFk{00 zu}+CpAY:#bqOB~"k[5I7~eMD.6=(  +C(+h3ghcd G->NL3`<ϷyISttM9H`Ӭ \3 IRUl0pdf}86(G$`Gˋ+=0eDІ5"ps8=@ҙ}:J췝 0Xr=PǗ$w >oP +Cm|מ[XF x |TX ,5Q) ůtMc`gitdfO$׊cҒGϖgJw\az7?>8qHAn9T }F76Wt[ʵg{&|Z-4oڥ휽f sa0T/U98/p[)CW +1%>f:@ZHZto5TA;_旚/ΊD~rAc] Be9co=gf?qR͌D+M{)zrZ9Y5ݏ.v iaN y<8(>Vp>됷UϯYw$ʯA7$nФ>w Ou󲷚[]?AMD%rݚ̌(h#qRd?2<;Sz +&TڿbȌ;q DBIs+%njHܿe n,~]7@R{%T͔0N$wBC.!:46j^"CWj9Vz0d/ot~ !SRDp=mX1% +T<.Ŧԟi&P*csf;g>2 +vgfQk_ubi(H`<:@nSk6s* 3LA+ϸb91kP ! 2ZK(E}칶q.Me[`DFkAXG7ڶ@:2O .ĂL9\X56fhX3'_.H,:% +@uEkO*e>|b=(")_K:;r"kʐW0"2?b9I{ސE[EڻM:6_׎!gQ3~ j*Cc"[ K2ӤnV%!KnSA[|pYf;jrjz}:D-&' +:yh f~FTtFa3l# ضUAV_ʢi3/[egVri!X`*|&('lFX#4\V7!NQsϟ#h +x{%,xYwʶ8Ѧ5Y4(T~dh(K 'N(MmxcJFr;,b,2^Ud'?v!F9k6?عW?Qx^B\rcNKkwAD[hr\Ӛb&?(zIq+4BBl,d[=B^3-~}a^0;!E}P(Bp¤Y4AZ7T/:t(R$w}ul-[ޝE "BE0ܛ^Lsz/΂kTqC2LŝaO/ V°MYIk$b#e}!am]NYnnCrF!H CoY"#BuW+9.e皍~_u\JqX7>D~RR#CNbf"46f4)(CG+9I*M 8ua=siQٝ<p6犱mn>:FBw8O+=bW"1_P>t`\?m–uVbPhC[ ^'L ;);(^z\ue㕒umaDʹTYH"Zdz̹fܪ,=mIQ Tꦿ&n Ӎ75:ؘ"DJT@oR2.|8 #hsw+.w?,6V'Ry1Ulg"# +('X_h[@HuMA*EˑΌr+XFqOC'y`C2J=mDYG=zIQ#v%PeP`,M p]8&[gGG ޫNy +|w*Xbtv8ٔ!hBr]u!raeY=G-ϽU &]ӓRBC!HH%E]xrX"qY;K9]"z?F>A9?7+v~MOr2D^䎣҅m{줙&67o!>¦D:M//JNNNO-m̓GK~/?؉['-12toA!j+Faq69~"Jr]V/!d--sWo, 5I2 y! KZ*h5=m^k;Wo*ojOWQC>r.[ LʰZEoB,|7yoJXdv(YJEO©&ruL?no񋖽ka㘇*ϞKP+ζj |Uoּ8䛲^ϴ!?o/ ׺ YsR:Y2q?:5K}č;JEz Q#5l9_k+>l+Фw'5Qξؓ-χ8VheU^`iϰ$̡۪5q*Q :w>5O@!8|dh`!/s}/XtvwP>^R>{jz]5(tIa`_tw FCAb%[ R3J2㈥Ԭ1tàn;j^xِ\)tgp󣦭2K-+s7aWMgΜ+U,_뤯Rր80ރ.Gqxj- *q*Q} =ιDDNO*jzl'LUzsB̤8"cQO|6[[Fo߼DG=_TbJy ԼS((&Mb|'mm}414TKvQ1$ZEFjnS +b'÷ÄV7².5p 2sR5xL@JlmxLWF=0`PXQHTnxdπeO,*7_R F1+lG.78jSډZݘhY30޳q*DGRF<# ֝FڎĽH/POЊUGxb>ˑ#~I.E_zwj GI z:Cg+Biggë]T:(&+2!˃~"^ķ-BeԈϨY2,yKODO!}RnחBdE!@zS씽QdZGՈ k)Q+6ɮZ(*j+,KkNt̯t޺{cS&KP6NZ+&vjjJQ3//>Ag\*( \HLbx_!%4=#E}~΢ayZws ap +>7 +!'5r_w6BTo?_g Cr?x+a&mDdK ƃNuVڵY/H%b﷊dXlHh|Ci!s +K;K*hј/:pmlFm>3 dmE,f۞8kg1 +Oc롱=pX*Ф=q|U%#Кwxok^M?5)nFz$g5  "N: +XR<]ǻ_uQN6#':"̋y%z.P*;1j ^=O`%H01#Q^ \wAXaȉC$`7u]'ޖC2X]lR6I1i<-݄=" 9g;VS Q1Ud q">_tɮ{P\[0ǓXo&+9_UnzHKwtameCWF.ئVM OV%W2yDDmGxdM6xo{] m4*<04 +n@ 5r4;TۣjvqU/u%oqj_ivP.4WX?|_^ٮ`\#cS?OB[ uu8l#ւ3^DMM]g9{WQfe÷14by&i~\g4Z:/+Wgso qffR#a?9O2:i &#>C@lՠ">iAd3Ojݞ|0#}B2k~jEVCMg7O ~+X ?6wLٰp̦b,9-0Ն1OE 8ZjjV.ړK7+fqr-r$-"P itN!AW4V E*^!gCLU5yIF>v '?߽ +>ON2Qm߄#擄誦-÷R@]H7Ռv1x6Ǭ:tV60;Щ9Fձӣ&î^P~4/뾲W}\Q~1iQJaROՙ2gQBF4|EyYL{⸹܌;C.+~`:]=#[bm[^3!'43 z_w֦c|);m{:ă Rhv7M,=4E4x^+&<:%҃]}߱ioenzI(aRpN!h1eM v)QHTHH`A?`@@V*.Jj15`C)҆a~ +Qnb@_=; +I,h#ަXwkl^Tt桋kݪ#4晞T9zU}{9k̿wLYeΥ+gH!qiDO-rMzHt_ +5[xQ!MxA}NTz&³~7ޟ뙰䖨IV7kcy2 +GjӸdԋGĊ$\}wP7D_!3gyT?NSG0SʣIY50A\ VjLˇ1uiF9t2*(pejL% Sjc(?7'?'oqD8ݪ-IqLm;-YMcEgJM $Ǹgff D]j?1Y&vso4}Px&3Db mtL/0Cp3GQ#ßp0m_zڻC/((kbiqG^RTď;5Hn;J~IaCG::2Dn +@?8ߋ.(ف6iTQy`kk!|<**4M8 m(ltX ; +̰ M|fynplgbQZ^缙]uk㱦w~@¶K +@(]iA?ytmGnԆ$'*BL/5r'ݾ\T`ʩ7R/-iq@ez9n3ObUnU) P4kBER [[:B} T`H]G@X}p#W6IڌR8cMy+;}H(Ò~y1U6|G_ϠcO[<s35YR24PuB-ֶ|f{m5oa0?ӅWu_5;ztҍi+ҊCus#}=X8maH-$1h-Uc?_o'}H#z [k@fm/wd߹I>j7 +y%ܔ$J8HiWo jOfͼ<լd003I~vX$"_\k2 Vϼ}Y!~%B[wwe$w,/g*O< ?x]e)uq9!*AU )z쳠]RL=uBm4CA!$LCP&yma4*3ň@Es +66e:ŇmPg;ұ +{I"z]{g"b<"\0Ʊ8jK@I`?NccN7.uZO#Tfx173Fqy'vUS5{+Ccũ\1䏨ofP -]yQ~o,cs/ +([HYxϩ}<Y/:EHkX3,nIVKI^hYDx3QA%- kVп/!`Eʎ+S7|iF Cs?!\ WNΡ7_n!n&LuH\JI禧+[Ў=/Y{ku_0 s4\2`i2+e0>caE*S 0y6sse֏6!Z2'](XU+#"5ϝW~Z$Bb/F)Zͬ Je 4,dƧ=C|*k`AHG?nzimd8G)Sh̏?;@zu5( +^g0_ssΧkEWʱW. N_SrKAWy%ZF|s^Cd!S>㱫Й q]pw") +s`SVT~vPTd*xAMq1@4aw!ٹx18HpJ7Yd]wbɭ$>=yfPuVPN\Hl>܁Գ@9vC$b h2? '₌(•`3M,Pg)GBhX]sYIR0#nkrg%S[I`L6'EL %VR{5<>.}\YN?ƃ& +>^FnA-FЈ( (q(EuQf$XY!bZzM,>ڋ')y7Ql`\vZЂ;9eD珂ze|'^EkI-'.'6SeTUa^bmꏁY6 8 7ೢVreI*ѥvл[*9!(G:@܏% zR_f,w*Sp6lV8xFDTxZW:XU WG.=BGz$i:b)iņO0٬td9q߹/XA#3]{ٶ#S1'ix-`ENj!FuZH#;/sNFwpXmT>)pe_Ѿ_ +2fQsfe +cNKM{,\fx![h'2 _u3p]pStv=^eG/eHK҂mwƮyrUV},񍱾Sg~r5:koqp 7#V8^B! G#+T)he P\ؕ-[,TS}E2=F~,<*dQãeE喱=JṬzpŰp {W/lgS=LAQ{][)Q"A"e&շ|z<:wN"CAJpy9聫k6vЙ@a!~}J8?֩S{60z'8s:A`]DW/c31XY=L2@|j\DD\";& vpg&f[ܙkJݨ*eij?Jj| N:Al0FOM +=JCJ`npP@}eXc@]B@sY2C6C J1sc"ΐ;umG\g'_iQ}`vsX&鶏&8aMwgQ st(M{5A@!ⷰnѠ}EUTPm<gҲ2b!l n"Bw҃_8(٤b?:&pz$'+ kl?N'#zM#Kd +nX>,ᙋj P'>ZIgFWNRʭt6PESS&&-.0Qc+L|f_x(ߪ\Xt0MNR +73.P:Ri E̓C/tA13O9j!yڃjɱ#geEZX⡎XĢy!A*OM a71PʋC;7naә scB͝6o5u-S_ `4f|[+MV; +bbZ -!ۣMۥ| 06Te#,uyb ԕLFt;ˠY! V P +?^'Tg-#m\Z 9)R8! YP54h% J%~ن4d|iKPa(\^P9X 4oG$`<[fC (Z FsrUk( pKlH? yT1&3ndw"H9'Y%ŠV$V׌YmB]MjIOPOĔNvNJpu&8?cpPFTasߎ|}msua +kGcs2 aV[?e@28@2^/5M1`gd&%%㙪>թd,5|0K4 gq9Qȸ=כ1}SE^nEheFD*uB`+]@xuh|vvݺm/$p?ճJTuxoHIKbH+R5eʞzٝXOǚ%I,01pYs7FDPOb*|U@Vu1WYeW x^pͮE0dX6wA5u棃049\#Y zaĎu;m;R#gzRoU,/sB}\|D&?-aO9"ސpz-}/M ӻ$yky"}0,TInCLV F+SP0hd^#}cAvtܥ.:J{4Ђ* $RBJ[/==N=x9wmor=.oF(U>sw_!&BwwëJ;eh) xU7ow!/CR)QgFݞc73%mB- VKH"(s:8 :Th],u֐mWmg]hx7$ :\.Z'`?+*vRf(ab[C-ʎe?Q++}JgI+ѮϷfdpK]`Z r.IiRXR/s7(,yyboJ@If3$BP,0ț> JsVhq +KۜƅϫT7-iаf,rœ z\_s>DI\;&ūV6Ѩ +;E][ۢU\IqCC:@ΦYn޲T#c#JLq,rěQ23szg #9yˌweRv"ey+fSS0afCﯫa知չDK4\GĦN0&,{ݤsó1j*YDԈ_G}/>>nOtpXg֘CDȜzT5(>BqWTX_2ttb_[iڇ: ?ISM@\%pd~ʿaÏj"&wC1 +*AGi +9Pu hȳJS8=q_! 'Gpo]b$xjm +]xԗ!V759%SBNl4Rm*OA L-˂ҖhqBNlF^GQeddi|v~\h^J B+F& ̍%/sVLNR +_kV -oR Ձ)jEqw/Ol%ؐv J%N$)[YMֆpY0+o"oa CF-]=Ȥ_)r|v4IW{-l!~FŅd:1cUyW}[jPV9~ *L۵Q1/&jC_ZG_6k>~=?[]zz.v/;.S6䮥Eynl)n $r[`DSqjuࡼiR;<`Ңp[A[ģ)F,ZJ2?)^dZ89}[Ga<6FBj/}gDnhnCqH5"Rq{m8e 87*aX,Dً@FM64aĝNVbz])1BYEBz2Ǒj3)":0YRA bu4:a ;m&A e= 4jڀUߵ~ħOsSH'{1~ oܗuSHUF&Q1 fʟs1ch=8W{?Wk`>: aOѣmѸ g,Ɲ122 r9.{eߔaд>C {bTbF+Vܛ٭0zmfv5zmTVd`}J}lQ֎XuJeSS Df +f`q-CyX +]+R4uz}vG(%1PPC!K6)!@6h3E4Rt~ôJLr(h4HpOdHG^Y]Xqǡ(#T1۾Z/OE1Z#}k2qW`RdS{93ȔmHKT 'jSn~ԫz<вjNq#0/i:qvH'6ÿ[nYNa"ѡH}_߿y mºn$.9y큄l9 _Y|'BH>Q8 ym:.5YJLSU +'B +~\;UltnIbM,'G?zoIQ F$K^Ga|$:?:&'7=H=p\e2e@3<ԟBCtwn"(~eGpۖ AtAi*J#+-FXoˀ&=9_V7_L1qYt%)22K,5/67cO;+)sS6&zr`LB11HI%ݍf3 <'|KW:JYqo<^,SD@nH4 8Q Ky5UH2r@Ӗ)n 4PRu}7v ]aBlxsQ7ל@ 9WϮ>$nDeMADSi$S\1Vu):79F&ﳪ?,y{'n:zH-ɀ* .u 1(r!o-q]1{I[s-qCs}Q^WfkxLV:ƮWUi$b8E.hK⁨:(Zd[/1J>ʳ tFMI_wD&nC2.;F\3Xr%2'h/tD`?;oIEe&c +͙9q?/V'>u|鄺=V)eͷv()hcNyS6D9t[}ruIJaRyˆ7,0(͉F^NG#pv 4"cEF&j^HWUEwSf`!}_lǏj,f 5? 9m'ӥ.&גb;chqRS`y@aaWGq\04TŐ1`:^zޯIt`NFGC%ZJ1Pa3ŷ:MM/|SB '_'hP<`lİ>Q/?4ɕxps5Ľ;G͵&KϬڔZcSyn9MeXd+)`G3O7|Xlzdat_W(fYz˅&/ShRa0&$<{J3|gZڐ?Y [{xmѨqu +XfUCYIyQL{m w&[Z u5H½=Z-'р|TNc+$ N']`FHM@rP:wCc1AM/&ٚ╮VsCG?1w8;|?♗g;[?obvN"3]:߫6 1 ! ]^֣ӛWDVL)|}'XL#Ki&-BMG2R))O}E8{ lG(lin$d21Mm`xL R>?"SJ/]!η$~rLT߈-EvLYdsIɰdQ䓹O V 털!f_Z}ۜ.?F%R(7 90.LUk(/Sf OفzvѬ zQ x6O,r +^8ŧ +.=8v{\)[!/?1ҊVvSki]sە/7R~ym$fk=z.m&l^Vu'vDʅX>h%IMbKI=>k}S/kΖE}4YsvAsEXb-asr5Q^%ܓ 4dgA`.4~Ny.I!=\_YsJ?=cĉJװO7fIԇQZ,NH7I|y~v},V1+¾omƝ#ь u?3Ĺ&&8)BM |5m^r+xo"*xƲ * +)D&pk*]* 0V %>2mT,X`|iR+1F<&0o6 %b3Yp +EdO}R7R:zTJB7c^&&4rp4Iַ¿nx/SBv +Ӳ)Ũ T{$yJ;Y/؋4eHU wz_Wi>V{qp԰QLj_[bh#Ӂ LPVK(#^7 F}78<}?Ҥ*2BCﻰ9o̟Go a"+_V#eh-;;ʧ$|[מA)?"@&)g'pZ}z {P3e}aq2ℯ +endstream +endobj +854 0 obj +<< /Filter /FlateDecode /Length1 1373 /Length2 26888 /Length3 0 /Length 27856 >> +stream +xڌp]5X;NǶmvc6;۶m;;;8߹U{?k1s1MNB'd +46ڻ13rDT,pV.fpfNV@{ df5rÓ]mL,&vn&nFF3##NQ#7+S=@ho G.ttt=L8hNٙ9Y\,hbd PXxG *^Knwwwz#;gzWZ%@y#;&#ZZ9]܍[+3{?fN?Td +fe!==7BV'=-VfqYzZ_D#[g|#7#+[#?;7 ) x&NV.VW?,fo*3wq?Q+'3?][ٛ5返?`f.6F.vv6F#ĒfLt + `je06`3.G{Lƿ>G^@{[} Z"J4L1aa @ `bbeppp|տm0O9O:vWT+?k @?"edc4[)*_" ;YzK#ZW?jcZ93S+Wr1c!{?bcbgdr03Ur1G2jYLl׻O#͟]2cWhϘFNNFpfcHS3 `I`tZ&NN<l_ mfafhb\'Nw8 5Bm) MbQд{2]qUeP㚣cm38 %V( ,ZleBK%I'UUJm,*I+s_'.sbssp:m`-{u:/iɤQҭ l" $8va[[o#," +~S-QY@6t)L ^elvs3Yg{OVIl~pNMb/\djD"RQ={14m}z֞:l_/c+-G([杬uRHQ#Έ ':}Ѕs&"6Ps' J>$4Q8k.̰ "3N}bfw[5p?'l/z,mK& v4ՇDGu7Z! y/똍/q6=>u :+O}h7u_*6k TL8L?g#@t%u}%} aРlˆW\|}uiu +EQLSɹ}F&xDNօ:$']h'z1Se\7#%SѢ8^r:$'͟(CϔR,Ke)ċbJ'+Wu\ٺͲ/UHc  G|1d $/>+ȥcْ9BIg,ۈڰXLKujPFΞ7`};x}kEç,PJ05P\U%KO`W||b%JXp1t]IUX 60:[ժ 6Ok}> n,`pſCΧ%*~pPևP("\91"M P|DzUVty3g4s='M/(^o[X5JgQU dM;DxԳw|l׀<,95pM=TZ-ffl## կi"Cǣ̼v"nRk7fmGF4gDzu…]AVSo Yml&{#OsAZ}* ᇅݴ޶%ӎ\-S'X)=7@yݲ`:u*[{z YsϞImWf5ٹ:-69Hb| ;w$- 1 M`5xaaE g2P6I{).Q,β/tٲ;6c蟱j)[\f>8kVbFLj]15\d8a#J1MZ1-Oi҈lȠB鳯|*/%B*I&|ZVϽ;|A7\) ԃF`fhz`,KR\(x}62η p$g~߾݇oZR6zf)Jp~5\&`aȂ`IC!p3pDn:gä󅫪fv1di6qn }Da_t +.~Ht B$>{|\)*)|l:r%>lFk5ٶ%ߟw=cޞSMGTI`GIf[YKl/sl+AN_W&8*<,k뫾SG\W#. +SL_W= gC]aX2ZK8w>u '<9+ʷ1k~"U[oQwETa.:]ӞtQ0I~F̔??T祫*ϲxNUb;%,mRwEBqXwBEIa_尐X?TE*Bne|amw 2tl>xqeOQ08^̳KL%I4}ٲP(LӮ^#׿6hYNYTp :/vc+Us.HaW_ 4spHqXy pfE%‚#=&B,0/.-'l{$E+/|RcIX;ys%9bGp0Nf`}}xiQDWH%Yn88k:ߔ Y2wb M)g}})u9 8Wħ?0’UśzS l{0j&Q<}0{\!QItPda݃Qp*f1N/ ,9u5gL hkcbhcg7+Ai6w W#gu[Ɛh?Hy ̧9b|P/%Y + +7X0? %Rqlj]I+j]X|mթuK-p!-ZPY\"'r\HNM! $Z:*:,N'y]LquITǯe$,z _ʹ9ۓƙ"^Wl62NC

^.dlO7#&D=C[G(Q@׌L3Bɹ/{-B|RT>{s[s'h6Yp,/5fnBc_pkTLR\f8~_ [:!σ1Q&DqI`"$.Wy(^Os %$Nj밷AҊHYxSܗ7-R.Et~T"cQěMKM Vsė_Y) +u ,4Nz6Wd*:k?mX +b֊*_6SM;bo$4Z;+\O)>:6=%DQG =\ݻ,^<}~iBinO*xI1RI~ k<~R!4Ogs:HĿx[(DWz}Z$ȶ)i<*+P M^_}K]ue%(=WT♰K8:fna0++:|/*'*,Gͬh0 lԸIf[`5uҁL^٬fsT$bH(IG&#LͰ2br2]^py K[@[@ĊSp)<"#*X_'Z +VRݭb]縳wlq c Nֆ/~GZcWl]OIo/u?S{у+e 3KuNCSE( x8ġem=AȎQ*!Z܏TH1@*F_D <YOt+0ؤ~0-Qt#A~%滿Q* ]0 0Չ>Bx \J2(ܩ5=+䉤*Xqr=ѶeRb(c=Y(}\Ci<0WS +ʾ_h+۝6: =]5*b(w F/ܢbUu{l4B@v{ԪUo¢.xP}eRO  m,ņJמz3}ӟ`,^Β}KKRsy~":Nw&O1}V(}ĆAIJ2%X8w憱"!GA4mPtETL,gZFy_VHN +~V sbR#l7rgv>wԸ3=C<f\\EUf{ijdUtrEoc +£Ol1u@Ƨ=~$wo:=^u>l1m9O6 ";> +};q +kr˧"U|: 7*Zwz-En3EsxW鋕7?j +)]3Zr!QЅcezӗmd;a)e['wF J p1|Љ1筞]X4="eAue|J/>(1.S[~9ȯ4ydvK ;lM . +ftnSuw)eqL0J +39 )FO/zwr_0"biyus퇭QA=]kS韾* ~`<}SD]<7`- K +=l7M2=N/ݠ$>tsG<`_0c<2mdOD}}3biص f:?Ġhy:tہTp6FH Z| NƎ*(7G} ZLf8Nյ\5B'qb-͍5ڣa[Qz;VXhr$?JebBԶv Ayi + Ļ;y[ ^Vf+,h"@ީ].q!t`U-3[ Byw>M(ny$6Sy6 +,Ɯ8Z5(o߾J+?&aHآ52OUהsKSVH]82 fT YWU*Mr,.{IgT<Iyg*_GQ4Yꋈ16I^#|;^^ERA4!~,Y/"ebxYR 8Gr*gK7n"ᡫ[;=pg +#/VclVjnuT وL2pԼ6$k?"N~ObMKõЀ(돔4ȽCr9{Z #O1>jd/k@>6/ l㽳u0{2)P@rbkk<G_g;?5q"Z}kZXAٛ; Ο%rQ@ɚ@<z{Ȧ Ar'uO2"EqXSzӄ}-{Ձ=]( +h +,GԵSe־^q)$u!°N : ;ii0`V)5 A,\Y,4jVB?Cp5m o#D+06^~$'ePtpGۃ"+!kѪ;ȅ>՛ +6J4;OL.DӀuOęYO]B5eɋqv/'i_wbb!/eƀO忓a6W{KH䰪iX6&+8| ýiZC芒egzo7Q/<#JڴpwFrЯN./iO00+gikો60&8 {d1-^TA/F Lg \\a2 +RDh#ӭUa0e߁pN+ff/C.F{̦c7z5,"sfAB]ȭG0 L$EXc Ƃn޸dUPd9 c}e|8Yd-(RLnlΏ#JX_J(Ct9$RB=ΥHN74hjOݲ!.ø)P/㙊%iHŽpA1qmffE@>a0q< +N>*L2k-$S|xٳLNi +ULƲԻ}:=Jy& E"VNHNu׸AeF_|n@%~$=F%pt +c QJ_O2 <ōpG` ^67՝?,g$ezCĸf,dN-$FFNVT7 T`̲Ą ʴרッe^'mRNs:_䀐t4Lw(yR5A&[Pimm U^6"]03y뒬֨a6!D @Bk<dMSZצ ϷVoiy 'j{7!^rT]xYz/=r0$,$T[ {y+N#/r$I..O#-e9cg?Nwqi~Ix|41?OE+E1O7eC ce\=".yRZd|/ τY`l,v)Lf;CGiMV+OcTW/~??Z 9%"T$:i${?N(xWO(wİ۬fS#'Ȋ^섥h{[ wMdfѓso\s ^L$VZTb@&ƀדV-5d:;MUA$d)8p@!LdB6Js(QʌQ T)4%zqnD)` ҹ{V~-`F],. +nKf~  +u¡j`Bk(2n'wߙʺbaSC2XrgҶ5M 9.Dy!=9͹-ɨ{+/xoն[9Lb: \MPY0m պV_!c۸{pzP;D;N|L!},y?ѸƥiMg1 D(7 ]$ܟɹ5mrTS뎁 +MQ8ؗUlb`{[_'cZbXtF#2o=ZS|P#e N`LF4+@y㜛۔0DU*(O7`|zEmCM{̐>t s76i[ab {xajjdȹέ#͞v"Lb qpS[GG3UՁQ`V&8D;bW~ =kIP|WЄ`2 I2啩AFZgw`t46 vŠa h{B\[ +bOklsIC1]l)mwA=QkH0iXLgbk^N PW(|1"}yAc +:_HBWCC$3." +t1Y:؀$Nm?}Ur8&ȞN1vGish07!|}ިm6  w1œG-h<!2 9Uc5sˇoe6ܐYd,E?k$]aFӵSb{g-ݹ4퐳^-?"|T2D[ Ϡ'}B,TyٰoᐳN' ߣ 2v>Qzjx D0%iFnizN5p !C)/2qqZ9U1-_]`g{j lG:S .f G%1bE&Hwu#_sV5e@ՇG4E+ V3ƻˇ\# 2nD}u'Œ֖ e05^Z`Au+$4yJ'q8'7Sr҅%V1lC07%\8nuwWdS{݃8>!4PŦ*vƋWZ!ւaԆțSw23.څ SŽB *rm_p|t;3S +S 8DJM[AZDtmb)}{w\k..r(LQ|Fr 9KvsIY+ȷ4d18PޏLNчgu`][ +?p 85aW<_/1(™l1AP:Du # :UeNS,

Iܜ9YE[>k6oÚDlshZM廡5PFc#͉Ĩ{33H#t/zuŭ4ڷ<\e ~%iTm4I}yʖsMZ̟ 7K *z~l ^v%h=r7i5QΥrXq|0UHI&4}T<^."j$0wF'! gA>d'4f +)-]\֣3FoӖnӻ9 d}E tιS>ưBp+Dajvܬ`ݔk5"FTy4©M-p΢tdl暢zi9"ְb%AWxpIc/|~E{n!ryK5^hm(aٛP] Vѵ`sz #zb1*5O$6sbd[ɦQ1&Ѯќa(Xɦ(BcXRɩv&S̭pH)!b%ԱUk` %mԋq!ڬ KA HG&XWՍw*re2_z-qu16LkPe|ȯգڟʐlHI drw|')+׻1R2J*=FjEN cAb84{.{ *<{D\~50oxx +9ls8ݵڑoe [T Kg rUQըտۋoY8M+ȲG7{FmL2|MD|j!uFselgxr|荡 5OBI XFܟ92=ϩ3.-9hxP$xoG+0k̅ލZlb}+grIatQmZ"ZZr? #>z NK,V!g7J +eZm_76ۖ\4X_Q؎2h кL@*EBe3'F&Bѣ(^)SToR<Ұ,OM]+^P*˥vLR{*0z +\z@2e`# +[ܑL@=!!`,6۾ξ'B/l ۅ]<$0#q=nIk&#q!;J=pwS}K,X4_H'th( TV n2Xz7E'\p"vFti(N$w Ӂ1Z|rGpY /bsX%-&t&G(vE.%T=&I>J'c thWj3f"ڽ >A-4p|LBsS.'>l+@s4A(_ Zb,ؐhK6 GiX:G K3: p\>DVP&Q Ld:꬚ dh8/=9&'Q UڛQbͿ=$-D % a75&ΒtuV +2O2>FpYj Uhћ{q  xA~C|a3ܐk=愆`oo,\V ?AٙBp韐\l~-6ݕ*u;11/h?kɈql$gmm}ԍw-e 7}W1?K5mW 8:J-X7x!잴T*@)؜I^& Okzbհ#A\wȯsQW:$kJ92,!7|2#nFv 2 O_Z!zAg,Lp^ETlɁ|Bb|㮽Pl>2DŽKd"(SIT錚D7Fâ RC'\Bv|Ld6f0513rܭ60 +˴ w: 򓍽U~Ք~ ʏ4,e03h|M+PTd<Œb=b'"mwc"١uSMaVa'I%1bkwSSS?RN*C`C?g.P.B]O`_ SBgC&r u%ږX=&qsiaנ rGgz4Rj>\|܃DEzVME/dv6@R*~Ppl+͂RZ/g-N+Ǧ!bdĦ5g} u"CiJ(ܐWv, +?cDFb>֐\Ź"["\dYXiavvJމ3M{VU:͠K +}.zj,Y_JQ _ߨ;wjv~ *Y?yvoHUI(-CfV}(1OrfDrf3.$&ۛ +_LDf Z$1Kt%'>yfI<9AzvהYKLa҆"=kAV+]$? *KOdcC }6JRQ7A*wk1^elwQK-(B7R,͝Ċ׫Y1 $[#ѝҖ:#y>0aJtҷ ,iBhU~Pq@0|ԚbwOP }2:xxVnDy$į+NwU0Cq\ÜpawnbC5bdTo 14@ĬߗHaĦv6lEUy0X9ζ9#?76{Q' $> w;Ɔ{ :sEDc7;&~ZWq^_B1 +*9ť 7'6]{t0kW &hWS>e$UTc)u(mZ?"ՖJ ]B,BĻ-/g`Yn.&`$aE]7%9'vymڀX=75bJJNghXQ c쭆t~jr%/IY3WbܶnabAt3A5JaUl==O2uI #?/pu/z3|IX[Hՠ$2qq28/nã3ӌքL!$^LRIe{\1Saai OXAby%zFN4k,q=Ti{hc Ć2;8D#\zvonn4.Va E/łzn96Exg&U+C:8 +̹L=F4v?f.ja)xP=1N @fn*6fC +ǮdS AB[V}~Fl&qt[+DuhF-ЈcaE+3"޾CZ<bzlvʪc;ko9GHpp^k .HLLy䪤ʬ6_|iYW"YptT#4yW/狵?e2 s9:# N/\T;n_ql7JM)xӃ}>W8if|QZېOamG߲䚼3 ᇊ Xaw&"/tr~=:W(I +qL!@xT]@Ӂ0NMV=T% ̛Z?+djGdڬOМ>ECtQ:4A,;G[t .~B3cG;5^.x}ɱ+F:?{{Dw_xʱ:aK$cxtڛ4ڞ,ј᧩z!!qM0MͬTB]Bn=Ȗdu<=g<u"CEVkqWKN2b>*mcmi&n eх7p9U1B7'd( Jo( +}g-慄 Z] f 3dM?-*5H][ ִP"zO O ,7j2د/B{|gil>);/,3&ù$#h5^0P6#"IB>Cqޞ;ԞT}׀#MlDYo(\ҳH&J9dYV +`_*,o}~fz/ p)!RZO~s|f9D.v]s!ѥ yT?}5\4t-W,^toJwO.ʨĺ)כ[7 Ykr\,!#$=q |dwG*Qi[+ݹH~iD6 𹧝&(fd=u4f Arr+v]VtS0`qڠԠa[UfBngpC llZյW1Mlk%?:bF8R!AVla}L +<* :UbG +0?4 ?;& ٣- +n )9YBhLxdOijaY{+Z˔ QMLGBc\ɥ\M-77MSs^i^;^,.!7Zg (\(ܻZC)L$OjUiяOV&Ή| 2 +B[_hh=ꡢ]XU[ ocH:V-zJ&p߇Iㆠ̊LA h3ÈOgo@fR#X^q.̼44F8J&;2,RK9bԃ| ߚJEgT{yѷ^-jQ(nԁo2 +hEn{$m+}w>Φ<.}5%5?Zyu*ZLwѧ8xYru^E}cS,GsD\IWfqfؐb"VZuFTkOwJLF☟i0eIq"7e +H˳ 5kO3(P]9u~e= +}D"D4TGmJY3J*n$+)$BV*CfsXa/ Rs C1—DCtы=/xW :Q`ʝK0i4$MkWKmϻ Է: =H5vFe bhQpLVm4Dn)ub#<04os&S״6\WZp#ttc  PQ j64x[H%[ݪl- k0af!!'s?l20}ڵD^c/\OA18O + ʶNN ^DC+[%t?k\l9uS11x#$J5uW7)`QѣWa~*mͿHаĴ #Čj| avWYq෾Iu;mԛ.c,Ь!3* :S7"bK=)6Q"/?e9Ks.5E#X HHfȰC{XY(߷h$ LDl K <s=9s1+ODlj,ȱhV7x4&3D&Ԝ zN/^2D@f*}iU.Fu#j́z~Na^${lmBOdba J??KԀxG.ꩣaڋ֟MP8b"3~1FȬ]` +fplN?gcN3KSoԮ)1"@ exW#Jh$8}zP[@;?Fc aG/_0a?]>C ;%p +e{y/O!3͞aZFć0_>75KT`u^ LrS65 @?[}O[ms8;"d=E;.4@2aB:yH5אUoQGk q54P?8AuXP؀Ok25[?g.&ܕo-BǷAM JĀyXSj6o=l探0XЊG08kXur#3o4//HyVj 0L[w$TP*(F=39[wdXip1'<+=z :rPX9G-16 _Q4Kd~I$uba")fĽ-f|:o gVOoWy+SR**n3Ǐ{TH1ۃ!y(9jAnO-MyK̛n^Y#K2PC|WؽY[҇2F\~+ < ]HLMvy-zHʘ֔g +e6\/ݶiiS7AV]3V&@v #$4tMBbЦW%'G.}g8F 8l!bu?ס{c+MVP2qp"cU_ +IX1s0ݽ^2HBCIȉ8?ōtʾ+CV,_8&[Ju!{rCh*V.A1f4ٱ*Oӊ3`Q;L"=MdWFʲѣ5?Llg\iO)тCx`s} 5қSⅾ?T#Ԟ.Rm&cBpݪMfr{u{ K==k[nU=* t"KV0Ew?D"=e 2 ^ }70ov"Gĸ0yZQ$c^MC- «/Y$<)"ܖʞzoZ>:7`,,epXcoyi hi߀ͤg&XZ%lv;Z|!b%HesT K俬=lsᄏ( j_.1YD$Re,c+Ï o Y\' !8UʸmR^d[i\WAu$`S&[ hnw!8 FG^]GM-wҁX1DI8. YA5x'*yX^CHq"(^[PFKY 05F]eOlɬ6dG8.Kq>0ڿ%Ʌa@oVF򯡷_Wx\^j\Nq ,:Tqw ֎j]0Ywo ׆k-@8/z@ "RIKV5_H}ǺxI !KEk:lUq 5øe +,.;nW qc2,r.1x +昤 +/i9e=c $Eh%Gh1O*7oB\/Y[fDd?;cg"N!2㓌%4' +"Y߭$oh$W3Z0Ɣ(zxmOF&,+ͷ + +5a^3/} vuZ +J'0SWpo4-[KƆde",R @cDH֙21gWV*=-?8-k+8✫3KNy-LC8_1,eV!Naq$UQKҌSP_0E_r!D:DS Kiz]I?l|7IcLki[\Mt C{w76A^, i*G(mź `rh4D`$JϹ:C&cIbЈ-lD7pUs&9PQ`&i]2eȸm@_dUT#zRxv`% /n|t.SV8=\T{66\0xm:J$_wJx8KDps,3%9mBwQ-xbKjG ˎu۵':@6 񪞝 +Id 2X_ !0ۈaDDL*[1_+L7"K"/@ߧ@:xe*^=o#-R/j!8_`[b\3d;TjZD*%gpA$nl|{j|hhD>(/y` ␞[SacE#I1v#{Vx >SQ#vwQoo{G +]^"L҂σ]fغE d썼q` +yd.~_*g VȸZ RY3҅lEj< xPzS)L)n0o.0_G PKY9.챒j4+/ևcwN:d; +Q%ŠkUcNZmpgnh"E=x^@.Ĭ$rk*6Nڽ=x{LI~ߖ'z}Awa W켞r;b,VFX./zaE={d4O'6޳Nsd"6ˣsl#]oK$pq_s@/Ǒ|0#0fVTgQK"z ρHq\HS+D.@<s Hιxt = WhܾtG\lK;1Y}hagtH,ٺEoVkąr*0[RYV4qgF3p^^ DBy6x%iƲ1%˅s5pw Ql 1ME&%.o!KH_V9YR ߮]-)8`tb]r3o+W; fJ%$ <3wl7xaLR84gq9fNBS +(Hzfwznnhު9c 893uk|J" ѳPMޓ ss_~9%A_@XvxC KӘqbZkBxbYv$cG*/[[VRN fe#!,p7/fUf]pD3A<=x V;xlW}{tw{BQ\xZY:E#{}Vy# ql +e$ͣ.:V[0ip1Umz? :aKN%OJm.PᲦHxá4suOw E\= @߸I|Eg` -I,ǮbB)> µ#{ĩrj5M%V0}SweEUԘ*mnۢ6g+}ݔ_ˢP]aхFVU;ܸZ.Fr_zT;NZ@%ᰔ.33D,QYzwoEH$c,aͩ$V\}3R Ok4"@5C.U\!A4<{帻@Q=Ζ%0/ireDR& +bdX^ER'=Ǩ0jn"+1(d^,^ng lKW\Ul_;{}~zeGWdq(glOWz^ qpt 0w +dqrDҸ ƺfp~W3LU!k -}. H&ճR~cR'90fS,k]S!Mג;c60eVHvb }h^kqo>@nľZYҘbQS0xFa\IkK""`:gU+76#ApAA}5qD-R + +bʤ#P{k +]Ⱥp#+mS8"O ax `3@dE/$Jե~3kSqaч07Kv< +Ћ%K]Y9~ЏL2k,V4f%.n[z6 +&"hmY![)Ϗ,]PU ɯu`$^Qzq ~haC~"0h =$sޙځut1CkH^X:_Lz4q/p3ixֻ$N_m`9nw`%AE* Tnk׀0ʫ_9;Vl&e!e{8VIv\tv@A +(:H$Th%(%-gزnpV9-nbQA,ZGҜ0ފ11rt@zq_Htbȉ߯2@YyGU@~-oY~ |vD3._#T.Ւ$D| +endstream +endobj +855 0 obj +<< /Filter /FlateDecode /Length1 1366 /Length2 27424 /Length3 0 /Length 28388 >> +stream +xڌT߲=C {ݵ] ][\'{{ofkujWS{5 &%$ p %\44Znit@. I!` +'dg/ E ( `+26`p1+ r@7-M-qssdcd:B\En6 +d a +XiZ6kB<. _7l r)Pu V7prwD` [l@UY%V7/7fl +:U9 +m?Z::"?i R +"S {{`'? +[?MX;imA5!agg / ky;_'o [+d_W; +`ik0Yۂ'_3w?%?͗MW]S]WORep88x|w5`Xy jUG K ܈o+d"Yÿ?@G[!e?V`݀e vCuYٺY+k1[0H jϥ``g?|eapK@u󿷔[@,'/F;+/_%ZEa+79y/;mW\ߍk/%@^ eP]}hC'Ѵ͑nKb +CM;~[uWߟmI~/ sGȫx3E?%H?h:öA(9`?xy5 UlL/*T̳j.g/P""1bB_[ʝy'WH`B?*5UKHMh@@ +{59G+yMŷ$N!<ĊdE ]Gѳ&\117A4X`.!_!J.xg.wđ($*'KrU/G@/{mM8Μ!a8t2H*B}sz_X)̜F{yye@E`b}y]FRK,P"Z|/JU#Q>-RLO4+(MyךCqiZHl(ÅW]6#yE&e\ ѯs\Ԡ)aοr ~;>5"Cʶ;gBA7 E(`v'xyen +~<5'Zd ~x>2YV?bϤ הJ)1%V ,f2v0ϖo_I9)HNkd=\]i:Cn 5Uds0LIp6> ,OI#䷉몫bv뽿SZDj8xB+N2p7/K`0[tK^$f|N&:LI!E(ɶ]E}Bo,9Ī~r(6:er +;_x㉆w|Pkb1cddr%di7bK36ݚPU8 ,1i撣dpdd7)8tuB*M+MS?|ɰm\1V.o,.nz&)K,iŧ&jlѵ?nw#uo١'] SEb 3DXF煴+:n0z"67P+sm<$hY{dcԌMZd!7\ۂ!0߱϶\>iFR$j dNCkzȱ Y;\ +Xg|cJ"|rdՈ)xG !yTA~ Vxp2&hMBbzĤ|aӗ}ъF k;} |p|*v]OW- ;6G,鳅lUnLrԸ(L!n-iCY ?TׇD \AT+}!Cf$@}mI<X8[z}]76J_B7GYCh(^.cC6$1Qx-;6qg&S{|ʹ8ˉ_헕f/No{rcѬy:i1BRca= FLN7Fs֞k y]msvX,> UQI!)S9FfՅ$:P@G"L¤tlrMHx–,]ٹw@' MEI5^_rLc,B1ZxqڥxPή]$_D@&H\bf<82-˯#L/rc8sfy"^IuKV#& k貚<$+HFxh6BAO&oMċ#F;D&jexqorE9_E|qhew1C]4 +p kD‚O}2p,)"OC c=ŝW˔$<9Y*"+%.jjgB:;,dk~i:0dB@ nlG0['KV~hd/Q붲i/V@#SxԷÃ_a*MdBJ1[&}bjo0O>,oԽglӞ$Rfq뀰?n0o#=,RzH.c~Hw} ŅD/)_byyI |$bs6u@A +u VH?јӠB$zɁ4[^&د}i+ƪ&kIg~ _>g7Nɐs+9yD -\ֽ @1ct\cgix_(N7?\=f줨]J rضpTJ$~HR +Tgi;e hg3mP'{ۢ&lfzYʣ E5|/]Bt'aeW. b +|`l*aMQv*43gRi#/$lZLW><ڷx8qgI]\(ߵ| E\@.Y Xm&!=:!h~l!o.腮t.*vL6y'lsg/"jg|K?-z]ﹼcdƻ.!=UT@Y#qP 0n%>#x u <'2 wdxd,&֨da5N?W܌ Jsǵw:;=vp;E2>U{HG7Ko]SBd6( ķ6r\;o&w +g_CnZqYH_) hV62lDUD^RFc:32x}`XU˯$.ek Hm +몳>j3ޔ78 pG]I*6^Q#v]ͫW뮚TUG +\$ܥ8XTӂ:sa/q$MޞK&S)=AFlɵ L&(J`{^G$J>Jw-j[tr7P .ΐջ)ؔ^y }!vk Byqԥq~ +?f$'{u>Cn :˯+;ܣˁjFXaݛycIDn28ɦ5>%_f!ـK@Fk[RH%9#[YR=h."V@5CߕhjA~ٻڏV`2tHO/֠Hru]3.6,W{p=N]WhJ!#3<;1ZeJ: +D7*uIǶmzHՂ/t" +Y}ot'+9~+X nk,ST0z6& {5oCg Xw@!+.)O?Wt{4? bRwR=$;z>Q(K1D/qCpGSR)Ug&!ڰ8(mR_ $AKA#Vw`nS,6, dDž]T5t$=h>e,ؽ-%Wc 2~+-(}> cLco5Xf" +TF/RBWiQ,# T!<9_D'GOxXFf7@T9l_' *B6>2aY 8U.l^}g`kCa1@*Tc#,(̕y |d@3vGಈ.d':tmKt7 p39L7UIoDUI g߈Ⱦ|s蔣^saIAz{ݽ}6G¡Y.\LE=;U#:#Xb^$=GrsT^3>lS7$Wy!ok8&9ܐԤbQQpaf&J kXVSGnf櫓ڂ@n4X;D_V62vne~4gǫ̽B-~_>9jpsmVZhVJ0M: +C`J>߬JMLx!q$X6_9H'*B/d ?o_$`2Zy7#|J޴̿HS=Ā UAmt*|@Isu4m?pH~.rxr83_ )%z4X]/ExsVx~~~}%;Bhd6CU9mu*shs'! d5"?+t:1S̤v2f)&|r2VWb2:Sοxdo[j"\&AyeH\`nŢ %p#]VT_;Q&LT;Ce}őa@F=v):SͨCl#qx.F,B8$Ei@m/)F,wx [ClT:l/>^)ðuD-1M%F2BH|G\Q%&+mkݲJNQK@pNC8^UݩyWLAoV_d/3;=Ix@3b\fVIJ^,w:P::( ~ +eǹKH $ d4}q!wȷ]8l"f2 +[+}L0q1 %Th9nS-镧d0GxɆ\v2kŬ Nߏ׉c|Su!DaTT! 3 ]x-qI 9zXnX ~Р$BM1'[EaT!HGK\lEV:K AbǼΰ"?u ¶@Nl/DjG-t.\ꕩESlMٯPy@UGv/D3^O-hbCr 6c08ELPU΍ZMxI\qOs *!]gI"kQH*~l?_ny)7iǞVي\*B"Jz@|u'3$AʖNE~h6W2"{BLhNҜgpʌo02KS +>61i rۊ%= OהsNC7Ʃ/ v=;CcW:CbտgEc VCo^ڤc XGOًDם/v]2[H{u}N2vΰaZ8d0ߎ+3 PeQ[gnN<50@ņ7zv"I^N蠟FDz Pm37aopVHPCZUPLK + rˏSOI}?.;IH(PAUlǭi5·LvIGi;W؇1#g$ai1dϏjgYoT6׌p2 zC +h0-4ai5\?D2c :R3ma4j}ZBJm},g7tv.}~z2:Dv׌lAʹڎA~VC·p\YDreWjwc_ɲY݀o@[2t A ͢m wPԘ 63 %ZqJuDTri'Ek5qƃPx%łW)OVaߚ)$4U놯O7~H'fT8 Y@GȰދ6y(.ቂ>ڳ Ϫ,܆QQ7 ѳ,S^Z(hz<* q=HaHJܦg}#.V M(SJI]rb6Q$# +%iG6 o*Gt #[7[޲%;mk:]Ɔ/n偔&+|z29Ӑn Qݩ tnqZ!WeeN]kG!\~Y2ꆖVh?o^Ҵ;4mL`\:?̢S~wBOJ4*6؛E8f++1%exk=~4Nv10(IBR+-XiŔ7*mR^UFAuiZ/| J&w42R7޼_hS*E77P*'?Byr%#KY*w >-Iqw8}W,^L)ͳﱁ(e/>r!xy[c.~+ 3%W=R"[T&oYGTD++W:X#uv_0E2Mw +G :BOuw}X5׭Y[%OOEl? +/'~=0@x!Bv#'n^S/tBW֚MO.o.T*ʵ +qD!X\|'xFE߶aĜJN_sS_Yjt.nw9 _e.{m TL|X81S đo9t<:. ȿvC>W (+Qʤ9CnL9"Ɔs0ޅuRӮ17؀QyxWuÇmv/25n=#gy8:`+dKz`/bAߤ|N"dlT67o$^.И"fA9_(NX93&"Ӥ7krwYXS38~> Or\V\isfM1 ϰUFUq<+p4cr`)T*VZ&AbZ]D-q&%Nw.@#|hUkZ;o'k$kioJ7+$_6h}n{SS6TƨK}ņuM':eᏄ g;'GwJ6봧ø"ahu{887dG_9-tԆ)u^qm8|M/C,6e\ `(5@!(bN\JԐL 9iL/opnWB_fĔQ9KYfmm݈*Nd:X;^߱ҷ">HQ]YDy : -M0q0CȉB5Ulf+dv:awů4r:^oN5^+j=}0OQZPU-.&QиtةNG%Hv\S˾\<:U&eD oٿ Om1^3D0]/EfO5 +8dj"ΧΔ4: LWǟd.3Bj:ѹjPѮ7f7ścf&"<@ZTpB t~dZw:+Bj-"pCaXЕނL5RQ + 7Y%&0ݖYINiPdQO%dO ^ɓ2y-SyKu*AJ!;>yY +bYNp/P8]OZ& w +,WZ=8N?}ޫs[R1[vF +'` `V'.2祒Ɋg~wNR Q߭ ggnaAkf2YbFU1 ԉ7GugכWrWK'wl\9.& $G*,ΧMa~Ngf3w'ZNFV+%(1^a@-gd]Ip6Rrߊ8CoպI3d@B{\33Mfc{n_zQb}Z8#z7fkFHh-H>kZe}y~)}0+뎎>>F$41+e-&JS7xEr J$%}igc+@"~!)??KA +u%߮3fx(WZqo}5xWo r|I|ӾY/ap +27-X)Ynכ~/xVHM,cZZP[쨰`m*wmۯ2cdΨToF^Gnf~b߭0vJg/o"KlZ's@cZP|f4vHa0Cb9 3` ?cv$q2g3A5 G&wh+V\X,ffL H| +Fۀb􉣯 #7V*Jqy)سQt^i# "?3,.I{ Y!%$epCYOv]+~:ährG"0<>AUͦOO Hj!,"n"!W7W .{&JvT3"p`zC:`Y8BV/2]AsNvs'pe1NtR'LC<ǕTIA]Wˮx9aZ"$*^?}|8iLV';k7ɲ%r8~z -tuy/_]+m:Σ"p߰TJ2L>FNO)RJ ʇ}󀰜pT +@j"Myoyx}{~g9ў?9꺤]Ge K餠n^UQ&w6YB~S0-K#e(GW/MjZl}6xJɕ*cHmuSn% ?@q)u+U^w#oOtkt'b:̏ZD(|'(dՈ/}$ncc%pXˀLI联G\u +ŧ@m.-By@A>賭Lgu;*Rr0F! Nr>lKb][DhP Q^!8_G$JOt,J]|*tӰ>:1m<7zCUմ33vA%]㠯R`V**(#HWSUP8A_7nTQ'oDß#gHs+bNķe0V([ +TD[/VdJ$V؈Ē79GP_Ηzp5e] Fr,[0EKɄTg3'{7OI}@"/¸MW<ܫ'vw((dԪrC*JLjD{ѓ[o BB?Zד n|Q62%y(~rʛ(la@6%. fʊ.T6rTg^HSLҽ Ȳ % $(^. q#(/޴]_-UK LW{%u8g؟}ӟ m;e'nIN~@a w\ukgzֲ[;yeCB ]c~cUQ`}XCfЍ>~ppoP$ 2jfLk4"om_йi,N%b"xkvVڨ5VF DjPu;ܛwEb(f褄mᜉiya /jnU+-KX{W(Da0<{70?e5Vۇ-/qۃ><ڋ̮N긕>"牰q8Gy&O.7SꌷߌyM*h}cv_hΪ8We黵 xbu{@ (˅=JtnF@ rǹEc$E|ҡXyczŊHM[s%ůGU&x",HһUq(R FIlŗiLhDL&@_#AL4LflWQF0d[LPCP1N4'+4B|F^̫>ae "siW@.R$uً}k1\eq7ޟ1o& +Gt:6ͪz>pil*mẺ3oHZ0'A 4o%Qn$aK[Wc c(\o>+hjUL):!)SV/БpU*#ի1Jh7 ".WL>d֏?Fj~F[h&&zvnQ? VKvzKT3ZQO?̬k7~e= +3G4*m׽wn7i8-%j]_JJ C:M V~R˔msj#YwOn`3UPUuSߨ-Bx1Q<^;089]LP;jHXf,qD/d#~cz7KI _1քNݶG8;:n5|\0i s[n9Blg9U )[.c7^ +BwA핖A +[l[Ye2R2ޔ5>IR{NJP&zi&:XX` +hsxG[?V~b h"Z6!WPՕƒ;qFF0y4~%%Τbx9w\7 `ց~wSՅY**0l˛ ƹKQlDzָ xrcGB "kvG2)P]K&Fke iJ"u/tv+ Ϋ3 N0O<94IafӑŴLZbD62+wMDk*isV6YR`鏉3'Sj$ +T'wBvUQ5<D|:LRӤ@E5~fKCHòOwgfMd`{^g^];A-/#,D4hh\b-ᶼ3>1\̦ uA7Mo!Pw-u㯌sԚ)r=ƥ z?sA#4 H5iޣӀ zw->NC4Z#9);xcv[8 7` +C.͈N)%L@{miE <+k"Kkq'RNP,Ғ,W-)FZS:QtB}֊"Q_l2C^΋s%i$Q> ek'9XÞPL"^Pf_ޗj9+@OTЋtrdW+h>tO 52qWЙ¶Ʋ y$Uؚ|l~մ*P{I`a3QG ޷$ᬻvm.xD01 دca? )3Z5`(ڸJL&`(xQS%o6t]Pqyy^ɠLr7)?R1G= |z1lxd( ϞzdM +8"ܪ+3ᵂr$aڿ9#sؕ^XhZ8Kπs"ڭt8z߿6Y)%yw2STq< s vP\лóiJ- + Vydᆮ7{u/PsbTq=悜@CPTvNi}^PrŹp|2oEOE| +b9rICjl:>t'/Ps,\1ܝ|K ޟ<@ҨӸ'?Fuϼ,7a:nP42hQ }zmyw_%cWA2KB? D|{$1v&V{34`_ av0?hP:rN:^wSx8cp`tbs5:B #+7 2M<'㱱4 Ցw@jOC6j@6"P哃bǃN|X"G|hKcP9{s-U&W\ç9La6k⩽3O,CriMpWC_dΌ_E/2 ETHm^=]F8i'3Z5?/a߉}m%Bns;B)6 }0fVүgW="^o6&M#T34hP1ON{ ~%\x~0N"k/`-NpArֶ`U 1hH7ь`̞9HqRv!;pnbįC}iY&Xˁ"\һ$EB$EtXD_̹| AfOj)G]}X ,ι-G.cW "pҘ}_F-4>^7)HT= +P\Jm2M_-Pܢk|;J +ry B/>*;qWj|[~C2g/xx4X"8?;j*yN +Jqڇgu1k#xŌAkmnԱ?֮j:c',PyY z7DX [j􄙱b>'#@R*c(IPutH{IDi345)+vFy`;Z ڕ|wԂ` Q ;[t5Fs$V5ʼ\'ʎM!e.esHa@syr +U2a"pĶ3y q9KY1+(N޹ đѦEzXl0Te} uQ> $ "`©frOtNZnţJk&qj_[ h:f4>,6~tЏ)w%[7l,$r*q,>O"}b}3u@_j~oع^ +K%|`:ci^122S"4;&<*Xw y?m+M}O|I5Ȋk4o +tcC3HOm0\0@__24''WTVv+ܹbZ jTўԟEݺ1X:Wn(Y𠦺3 BoҋdF1mBz?_ -L+tJ'q4$B bB K%/jtu\!%NG5^`HD=(l3u>NF0-xTkrNehfv&g0_<˴վDꫩH&|bbI:*ٛR(i{A ĩ( )+H;pS~qua5g0FuEǨMЋbA #+8?_#\d{7N&QZi\~t|-ۅͱÔZ/I97Oz6xX=j5n\_`$ms֕W]k51px^(-;YtuX7M9'zmJ[+ΐ=*xd%F {e݄fl!F)ꭰ}-6}Cx y=NMΪxnxYKd"bWo + (bwZsxT\ dk G|pK}@Z~d>I +){}W*juf =rNPƥamJ>U1&C{}2/qA=$Y"-9>(-8rM"S{EC .nzwI.H)Pe, hRmGJ3t#(T2~xxCL'rOԋ,^#0Xv'?%I(:x(a-s T/D5rܢa=%qn;(/dT`8m.X)=)?.Ə汸rвZ{sdI!WE6,5ZGNC룱|4"A{%71x3ls@jjYNj!<"4KoaK4Šw.M u`v-͚D(R; r:H?Mm[޹'؃ 㘑.M~FY&c1םQ[:o@%Hq$3?]NlwPWuŷ)%/ˤuv2aQ7b+]]ED(yN_1ifF;͙B# Gr-D0O +Nu5;&*T`1On7O!bu/3-s4opkb L^vݢ[5=jcCL$?:#7PSfR6KAWS&R)J@ RIYag&>t$ͥgrl dԭb]Vx-ƙU}~H9? p2 fN$o>;Bw<@u܎]_a&9dd*}Mo"mQ:]_ynuQNma! ?nuLR#pxMԅ*ܨ3;1(@ Xa‰dD]ysNN%-CK|y̭b_V)Ov>uX`-?+'BCzK'P{";<`t_C0v3dz4r.0K (yșijQ贓VO "#S~wo$\2Ҩ/i)TxPФ E SV}DPBs+wUj)r&V"QChY= 6&pṋzWo8>7qO- Zޜ/u$Z'hGvZys<8IpJr)i/fV.SDzeiScMmx!Xsb'cV#Q+tKKnX>&Q" d$, mouoeC8{8 =90lFdb +'T='E'-rXݬ#PA%А_D2m + m8gG_,7b Y}!s g%%u՚]i&RjCޱlH",_ݨYpegYZN} x3O9) 4ڼFH_.A]*aiN.d3<7J4Ux$ipwCFh1|T&߮~q3k%kShPoXa27Tt}yE.@U[2R38,2܍#nqh&NDkI[97*ǔv[Vzk0ƇCw\p C0)g `(8;K;/ɿ17<c؋J ME+t2(^=O5I>$`*3ҹh4;ǫ$`~ˤrldS/=qYQڃTRS`U%.HduͶ<5.<&o"S&S8^.gA{&r'i+ӒTo?Ei:Itx16Jے,qЍ9WmraCuR/~ +%8,IbkزMuScKYkRCG.v~K._8ŻM SbŲ?I+CK bӉ,uG*PsnK¶郕1)^ةyw0 %'RVZ%3FQp + ΗStM").VkǍyxL}(xodRL\Qkφl4w5%W3s=&;2ֶ]ѳ"?oS\ 0Q&$8۸!De:+ @a +\u tH-А=`8D B5\ F#xAѕ+&Cg_3Zq:z/W{R3@;#_@pq3(=bXJՐ$K}o+yk0ڽu )11xhƇWN`# HDyWz,zj A o͢E#xR)ZB0et^S:`Xo"A;Jޕ:ټsje)!N\|Y~imTtZ'Zv@v +j!Nquc4.?N!;LQS~`WE_;a_ bԐ1Y.zd^-|/PMPE\찹jAWb}alWrrX~[H-܁vn +KNjH\j#*13[Z8x78A|uSs 5[O+ŷb[JY[O(>r5dbUP<;Z q!(HjcMJȂMti44E0hK o @~KbDK1A],&|r]wZc7Qv~Q ' #lзn('~]|$74,3z`!bTD ;!qUyfO pb4*xOPG 4q~(y֋Wv%2okg"W$k%Jʑ? ͊sUI]#p IPa1W&f9Q{UzWYD +h$23R#lsw;ٖ2 R;^ဝ8d4JEXEGʋiqFadn@]Tmr ^+Ԛ\d5 Z98,$^S;e{EKp<|m7kG=p\,Lvɧ%NxP+~2[<뿫v9 Kd$N:oVUT?\Q}|NjwNGQL2Qd|i ygknQٹ4e/8ݕP O(R10tx^DYIb +_-5`;F<<] +VN-_41\F[KL +15a~4,䱤ttohMNo iDR6+Hᰕ/ k,0O%2Fg?E2l:-l#i8,ycƂ[|?ô +V-Qa(}J$@ULUKNj_ji40{ 2o.[5 +Sm S,y+ƇMq>8B0\0 Ʉ9:ܲF +( a<ԏqZ];쒢F|So + 7_Zi l y8K[WB`]9?>1v+qߋHe +]hFӉ!֊"@F>d,A'wbѼr TZ3<+":@*id5kR XT]4yTbJ2`- Ic3OiPYoމDz@N iA~Yt`+b+U;o1+`:̞٥?DWσtAmߐvsrĴJ:R9 -c6F)pnEM+58 +\ez"g bܟe#Ϥ7[}’*R=Ohrڊ +Wr4}E-ޜwn(!ky70'%7o`i(ƛC&B4IT}ڍ$dKz3{A7* K57ĢNnnU{i" mQOA5݈SuZPC2S-DS9" +X+e<= + ݵG[EkKmzzAw$'7+2k&6eq +t鱅PuMt7t1@6a"53Ҋ$/+UN-7GQ^*&>#M&s&Bp՟-L?MW +c%-Z *Y 5S aVnV):b]byMy+9Št",// LnlIGMă}6 +>`MWN+_Pdȵa|y.6dIe{ru90/6 -7e)o*z y˜aULN.KÙ쉚u3ة:L*|K#+8|SuH)?3ٜC[]Ig1M?m*R@!4p.sw9n׋V%ヲ+h^t_GdX0q/S0 +f\O7=6-N:p) j*c=ٕ;vU$|&4V(n{i6 )oBMR_XӱZݏP7TȀ}dGI[1[srΡOWE )4lzP +Ky|; l ^7~[e ]%ܕ;X}5mЇ{ Щ'yj{3BͱBJ^ JJe9d:sqp}WTsj g0bw%2a 7Ƌ;;I7ڱokh8z6O-0ĮM!ST,c!P̍ a cIUҞ$ P,A=-QPZu8G#xR)%_dvwq n29B,ܯamuէ \#Fג,g>YwuQDvhTw/vE׉Q;1'j9FTDa9(i;QVAcNn61Zp |3F?PCFȁ:ietD%idAYAx^ +LoG5BH:%9l_iЭ'?VVpxg{;> +gsmy6$M5"o/#eT|$H< 4o=T wI$K! ohڭ(]( Jbt +I!}{Δ?Bû?d. |WԬYM׮u.uh_L`nk IWF1bTn/g[@Z`oʶ0^ӗ>>  |VB؍Hk2Sք|$a[IRDSCenѫf*CZ:W +e"XTMY9pv9X*C ౄ}o|@澜(2e=(J)#Ӊv0We?r Tvsa$y.kW\%އ5{բyrX<͢-^Q64z1gNU6+6Ԋ;f``E4k#OQvL&7X + xt,lm2# :][T=ܱS$UJD CPd >?)hc"һޑ,<^世t\vb0DZ p4۫aĐt@>J,'ɷP(C'as'?K +z)+D$9m?9 +endstream +endobj +856 0 obj +<< /Filter /FlateDecode /Length1 1366 /Length2 27501 /Length3 0 /Length 28461 >> +stream +xڌPeͲ58h޸;C42{{8\Y֎(R"eZA;#3-#@XVH LKJbl T dag; D dlR.Ff##;v\CW ,@K*lghafwzPS99i:Zd ́6w46([=W + +sgg{.zz777:C':;G3>J9@ tti ghwgts ەL k c[#eI=`h9#O?,llhllgcohaak0dݝi& ZXrC"oiىIEmMllN'b4{&4abOjk lf@g+; ^/'?xL60r2t]>^w^22L,F@3 [ 4- `oebgk?͗^YUCKQOHE ebeW6v;+3Q0O +ikjw*v(#J%g@\7,H_nnC kd-Z!;k'lWf}Nb@ gcsv4fma Tsed`?|elro)jklgXGw +bD( sۜy2/;mW\ߍk/%@c%;c`ˆ:A\7߼i^.P)Yێ)ck^MPaIo J+3E炍C0x*GjV`Ry. +hn*'Cj٤+hcTuJH!iPݑQsa}.bvb_=7T`ka=NΒy J}_*+h_X,yp{82Soҁ֙raC'3;\6tfC\\iU50e@,krْn P2f#M:+n!cT>2 +ZFFE 9is􇫼t†,ӎ=ճJ \i:܋H/D5܍P5?+|M,(a@{ёhG})Ә-hzrYXԇ?ak2M/[%Gt:IҟF5!T[U˨s,jg'YJls pQIz&(mAj4qX{DG6G[HDUoG{}Jpzᚁv |s5-8pb58f>CNf5Gst6ɸy%mPɣ#CRn8 +6EX=o v /+諜,Rez꒞98m/2εqGr˯to\N{R|ؙ7IYE޲u؃L["d3L H.VOň q^fiHESp385j  0-c) =~qhFϨk9x /_x7[St~3c>$j\Wܬ=;$*|%wO5lB2y>A= 4i8cFz>P-6_zdfT0 Ưd1 漋/2n"4Z +2u5"a7}9g8+[p@uw~|a$=19᫽`F4$A')N}cڏ"NG.:|,ΐZB) 4-8c򻲷]_VI_c:)I "؉;pWtUm?O)`3_<.P2AS|OP 2H}?pr9$G&ߝU֔_̧jmO N=%>hHeV&;[D,|T&0ai#[&|@M.LMc6Oz\z0'@k]î"$cD[ItPۨ񞼦H'݂db +_G l RMR6)Lo/18;.Iy?*A)"*;^wl-x zUMshЂC-era_C2 +#-)gr#΃V͛fx!}^<ݚ>3bbOFM*a=OeBEh^<晳dbM_!L |6?uc:\Ϛ Vv q o]5ݼ(ǀc`(Ev߿杴؟mX&UHv/Pu`Z:Ә>xƔ3C'&ML|b)O!#ϑ`7wmw 6G2$\gŐY+-y\95F *'L%go^SϚTG?!eQo.ij223x~aQNY[ Mo+l~w5rp̸͞S'$=ۃ/xnk [x!!ϸQPV!dwưKF f8 +S:$ݴ/Wߚ u1yR+t8]/"n݃)@g8H +-xĪ;xjz4Fz̾ݍ8pj voHihDR*͎.GwA0X?`6 2YȰ`äށpEH*Uξ1 I#ȡyN1#< *ǖQxA̔oa_l_ Yl:sčxȼ%?0-h$ +eq~i3qrΐ S0P}Zˈ0;KQ&$g%r bʟ֩#6믡e8.Q '_hD/u19ORԤ\Qm{A9>t1lۧ\5O, uʣE'vX5a㵂?^۩g.'?j碎 Ǥ#f4՟E:n6_@(Ϭ n_qҺG XkywߩK]kj i6s=|O&3b̯j4rfɭ;Qd|/$Hm;nُ@JQ >R#W; +PB97jJSFŤW]>A}k+N)ZF<hx0atw"Ǎ$ef엣).7jㄒCP}COښ[~˿!kR+>ςcgH4]u|P-^PU5WE֢[Fn{>b5 D3g1G}EO¹9nn(gboꗌGzˑ;"t=[0ۤMz,^;*/f 3'0%=|ԔS_&CℿG]bBI)x>xWIu*c՟m̗4`\Y+"|t&] ;!BVq@PlR8Oo[lu˄UJ&k0)hE7L +j͢{/F/~zyixۭϰ9/ˋkuN80k5S+ u +,uL"ݤ3^wݩ]!eKRoCIpAt,FVPnm%z}; KD2~ @q]8(rR::b4 Td86 (*9wpk;.b'ϥժn QJSF!$Hr?dܦnV$1lB@W cpoȷ`~a;מo![Gȼ,C\a/$M}в"Lcp0S^ѷs"^<pq+е g4APvV_=0H u+٭JWcQ + %*8EF`OdSʫ>=kЕ EGЭ=BҳE#(] ,8e!fDҵxc&چ|q^_C/?p;BjٲN_̢v4u0BN'̰~ПBIDѷ m{&O62{-AW`:EcY Q\&4嗋&rZQZѧÆr&ɩ;BbfOoI9MYtD5e,S;neG^b +4TAW nϷ$7b*w/tH +BŽ"%L`1~ CD j~J'Gس 6lL VkBJڰߡ3 .`jN7 'WDp50t: ۋ'"ѩ5\K;K}dO70왤; +'7`Yx:ŃX0CC]i1+F$/0MYzVq-T33s6I>Q%=˃uCPeA6?yd, Bwuo4ߙ46TGN<3ƭ Y~IQ%2}DVu GȩVGjhäy'G C%!#D؜'{T՘+D0Even^1/zi8OG)'j_3vܦCS@s!d>"7K%Y.B;672=b߅u~ɄD2;&]9W+ZsQ?V~ŽfF[1 +xsFo4&b^Adݵ2R1} {UV0~l\"&m ɤ!l~Cr[E 3/o!s#h=QuuŴ2Xa5 B:LWGzd(ΏbAvh[䃮zv*a'HڑTm^ 8/ܲZH*:1*21b9Ы^TvYInO7e_1!zC!ll{)r`ΓIN)\h E-[XQC7f-. +5g|>Y jj]Yc[m5'EZKodԛ}&gP=ی@y9CVg7ڭ0x];$ m1$D\SɷoJQGlOSk=zKR3}iθGuLb-ΏqqFH x e?8XŶ{gZ|̒ni*NR4TB韯b?98q㰑>Z^j,/ߚHܶY=ˋ"sАi9j:&*_DVJHzF9& L]QRKũu.!fZc/uUݝ݋QT01]{.`4 !y؋GT,˱zx./#\![bؙVhg/7:wwiD=D%nWhCl}&b^)w\;,x-S[]$1)MHxh 3w@{#m_ז-SɼxBbed^'0!aG)Y*ʩ@:7mcɷP* ϓUwe藳"LZ/X>Cɚ~˝F(v1Lr11W.S4xr4U_%F X^4\z޷CEt<)RiWZ9Z504n+4QˌLw9rpGq}}O$O +Ee#4ˢ%0P>v<$pZ.&Iz0Gn bĝPOk(g2?|TmY?vV̶5\eݠ gr+ǧBDy`ԁΟm[TE Pn +l9hyQEľnt3=#7bFuD3r̴ 9y&X틸/p!(RRY*s%;Wޖٽ`d^wr ռlK[1~?' RK?2lByiothf 0 ʭ y6)mM:Ε{ͮM"…LӻcNAۀn%9HDNvʥ~HH gjAn٤-{` q!3MT6PU@VY4>zHyFs׻Sb3—_%Ɏė@ +ڮ&ԉOEIۭ6os\x❑|W*%_ىJGne/+w$+]2^T{jy "1>|V2E2lF>z#nؐ7_a]#M^Y!PMؾ~1"prd8'h,d3oC{9׎JήH`1t{Wq4 /z&B\| +I697G\2jsRvdD].B",%k(8#WA,鼇ͽeS(ȤDb-gpt#ͣ|2HU> چF$#TӁWlG'*msELyO2yN"J*X6[цjJHQw껦rAO5-N>H:CYUż!Rvۃ~\R+-҂ϒYx2HNJcioҵ)A*mR +usʨ>؟6 ޯ-`x9NMAœ|EfהWG dFY읯=ED_ijw]3W{PBdk̎探 Kn}A~{!?t(<7kQLR,/T e֊>2~&PEͼAYG2iAh"JڧIaĿⱔ2&' e^N +"<,[7l*#)J>4Ɔ nТZv{/C(л3uD;\.&dI@5IF󛄖(-%7:(T:LdQQ =F~`6N1% NFcsŞkn:#"ٲ5gL+(@?Nx*^PyxpParJ}5k` @3YQ'p$ aQQc˷,C'iq-`طmp1f˛FbT>T:dO^?A"f؛McM]ai&ͤAx5| pH Swboي> ܏{葨DgE4=l04$v]Ac?=j DoOg8hw7ԉs0pkgya|R.m};˅dצP"Z`rTr5Ε13jw 1#q"> 4w3,w t, .<>ӗ=!̹ m{qicsam%ż N2X) hm. +K"?C`6 ś_:vЊH9* D6,[9M8v90HqF N忺hє)ɬK:3_+VI}Ke*_W/`,1 1S_$5:;/u󤐃gFMz6eܿh(HDBd3A=14sˆUbV05?۞]wq/3Ac!'e>N@i*nkYvY.?}ϗv ٣5:irxRM$}3lѶU@TݐFJFuNe[ +2]&_)K8d-ƅ_Vo't B^8 mOM@I Cfo,X" %.unCEyI@v@)v BAԥr9-o-pV"}Q +uw}`Am|9ήiaK^mj %L^54gi2p;^n=Xf.*$60J#"D9-)BygY20v> @Ρeˣt|9~ mJ\S#H]b=;+, b -$ǒic绊;P#I)+Èo–&z4NfE}d8 nΓXO7f"),3)Xn̮J gj3JgIqjmG-(ON)+:Q(b}7)yKĸnj?+#^5לkm4FNb"\J qSXHC(O:1[fGey.kzxv+;_kh(y`$%;+Bd+Boj һ @"m*-PlVx+:SkaF(Q68Z{ɨI9jDRll4F Kяw kM1+><˵ J@*|Ξ[es%7t_Uڎ|H@{ϱQp ZKWTB%] M]F5>jgȺE,)&Al?WRL۟VITm[*OI@p*OطKi%W"ݘL,r +=$5vʈş)ğUC2Oiq]]e.L\6r +寑W"Prsj5^E x%;`v&xcpR` 81tIۑSCуXJ2R[P^'b[>:8fߕ,W9Zؕ} FҫөTQg^5(.cGK aBNʇL>iaI\ +G6iC҅Bͯc6r"Yi걇܆_ȑL^{;ƩÒ9_j24֡ysXr𬯛1f rg&:گzҬqS:֬PÔ~k8ŏ=gdZX,0CZHŹ-nn4bn5\߁F޿r~vDvh M_280(䎴^tٙ|wHCi!:jft2') 8l7AE1}C']-K0e]rI!()!ƻ|N{P^<խ;7Dr_F8UJŲT)z xUzGE=ZgsY*t "fRԤ}p~KU}5 $KJ5¹_dw +*[ɶS75;g*ef\e?~ %oSvhlG.:Fz>$S!'eMgUk +ffS|m[AtT_Dv4A*n2L0|x'qWjyԕ4~EyI_a7IiVWxˢA'9+jRJ)z o߱" 4ތ핬 Q@u7-{3jݡy)ϟii~g!},Ҕ@[ +B~ O Y rvjA=L^Y +,>>jәy*s%\.ސ߾ˊ'glt~[qM${U$Ȝt>V~ :S< z2Y9)u'd[h|xm?j4QY)`(6:<#Dn}rsT; v/Pް>mhi9aS}m5 'H&f:?gљOڣ>y xZZe 1|iYSiXT3b5lK1UYt!V{ڣά$y5Z9.QXF?([dHpyݩ(" e- ݨP3׈,3}a,Fsuw m^}c\ z`tiM.^r4΀!%Yz|ۜw_ u-p6lʡ,]ڝ.G ;AwAbNI9 T#1:0T*m)eEUfu1J| <?7فyꊿ5eBxmM/1lGWݷphziA6d1@\q9釀fP@\r!CWy +e}+n;dLT|LMIč!/Fj/Aꥑ r83ppܽX=[9V?HzɹglMF7(Gʢ4Kc{$c34fE%䚓HtY'Kq]aXvUpZ~mw#Lr{J糆{)@C܏.bd%[{hC<#lgFh5oCΑݶujR*5Q6栛X:>Pܷ֯a'70|Z`O <(}unac9}ɛ#=jA)Wy.gdH +~0>z{^<ë8bѵ)h{R%X8U Np~3d*71 rἦ [jsaG>ebP 5+/FM݈_>a<;7$y+UOotx= +tiYdu۲.SJŋ3To 'V-mFń-jB( v֙t@7*R²,y Jn{)`.hn |~n(yXZH&!?W ,]v񚌺:[VS'.E7nQG/?R0:Jk5CJy+^X{8V/ﻍpH\a6bKj +U jsF2 1䋚KU{YKs(&K:738 +B۾t| ,+Md): |xìF&*_1*@=BO(~\OuߨYAYG6#L#9,Uo8=FyիRh |/d' *a˂$!P&ͧEW|*AN:$B"EL@gC<tDO/Q $lx(DIX:MhEWKrO&`L;S }^]^p/JQ?hѰ(zT5ܩ)m?]5ȣbh f#o!m]Bx:t2ߒXwFFcO% ]TFd"_p +- `5>JiXmEWnҟg tn̂ҁt`N" ,9ۛTNl z{w>%!\w"OLտ3DVԇb&_Z]JhUv8<&0)V A԰kY9kɆ:|Vv O6Бm;2CU mg7L]?ʲ-_ +z %V7][2f$y~H=M&Ê|f_ 1€N&Ue! ǁf,vuJA7ړZ]fh^ldZ:%5%A ܦ_~:uAB^+AIv P\ޔClfJ/d2e +k +Y .YF"އE'1X;P!,=ȘIc!O9wr'D +ly!~?̩R/ܲE%[*0m9HcȭиS9"' 2ug벐 sȡ)}7T1|/ɖ-+BÈw`*)m__оehjM:jadw}%8}ék&ѦB6.Rי[KGOg0<! J~Fm8_.R㛄 qU`:>ы?:q57t+W}F߭\r6إEF$Bd^,' ^r-ZdTQelPwTxkX92 2*v +r \G&즯 `T*%6ژl"?ڶ>U-[h&.MU= n28D[ܲYJbr<=1 5.0$Ov4) +bczꜥH$׾Nsssɨ_4Auy +l\Xv5G$,ֹ1/ѣV"p,/h^ f{' VFl+p +]T׾AANgwETN{A?w5 Ti+gǴ 7N\eq?R@ȹ}zY0u\ZG7N8 ,fAF?3i0-I O6Cu@&8#Ҽ_MhqTmE-U2. 2qnQA+{c0e'xY~=JmfZ1;%3wpoY[sL=aedzcB  ,QARHB`7Ը Pn߽-g;Mnwg?Ay,`^aY 9T#jg8.B^z7Y}[>ڌ]Ax٠y~_a3ݟ9=2>]nrs<I)[|*JmfcaWlfG]Dxc]/V L"rջ, SAxfHF] =SuˑU֎¼)絓um( )1(ͰfTcf>4:UٵF˓^ {y:\dខtܕ*g7-"s9ADTIK2N{F1;z77YQ2W0o,M쀗 ) 3{LV8in'ݸd2ńB5y'FͥǕ1Mܤ4=UMMȂsUFtxؕG +c\4ǿ`#AV=nҶmP+i @*&ܬ! ;V>+l +f-;"X}Em#iv=%tbD8JPl9չ0$XR1~]2;eBrVSe 93#FPfo~Nl*B`ΏS}gW̅}?󨩻ܺ;jF\IOɱcM R¼gGcoƿ*R~1+ıͬolrh [ SO];idŷ~!=MLYe}:dwësyTSdc(RMLWB[Z̬9t䰻="q`PDVrD M K:22(HZrUaU\.BP.L,z S[Ԣ NYyIu}@f%_3Rܐb2"gr&,#?}%'"KNF_oPtw@Uf'Ajw̜rەWh͡3Y~]uJ$QNm_59/c’ar{BPݴ&y=ƠFPP|ؕBox?38!]EPq1Ki@۱4-,GcmŁ,u74"N?,F{3 + #I A+Xg+M7eqV>ϾzYx捰[1ޑR6ˣ1 mZJҏ"t#K Ř'^vB\ZӃNR*Š1j7?jV O:ıq0G=~fnpn\s1 .h0x XGJh@&fúOF$O%Q'[4M]VÚV! +Q.CapQZD=~maqPUaISE;=N9gA"fFf3PURͽOݹ`~B+WLV}XL7Vl7.`vl}fVޑ{ ՛_ͯ/W0͹&X_m.B57v̽@hWS'r +L٪’La.>kJ*SL%ҮEXF5!=$@2Wڣ\fom^rwNOb;0I^m>w}i ꣞RRJM_zhcV{Wi.Pñ~#_)'*- o&0{O4gk;발s[c!ZvP1 l2ӨĉQXj+5ΈWfAb7Jx\ [df #cxA% POs܉SE;n!im\&ΤE N,oj욷4n 80&T +Ϣ`DTrJSL>IQ1.U3gX[:g*N] *6Pл5?H9Έ7pf_5F``8G&&Y/-)&Bqvԛ/a ӕ8L]90a"3?]go;c7bȶBOMWP'ThK Ur͏ lo:#d9 ՟o育4Hbvfe%F]dbL  R`+7B<Ӫsms!}N%=:0kz&г|g(3 M +zhy8vȒrI}( @I|Rb],)CQzg]~ ,=XMlIMpw-L>O݀Gab]W8qxV.` DHZsyŊ`W$ Xv&P|pJV̰p0!%J(FN NU_4W|t2lrr_Tkǘb. +1Rl]D; |'m̛*=G0A3\W-uӳXST6K2me㮲WXh&jEПXX%b|?+Z-Λf/g ŎJAI ;6[Z0_9'؀V7+rHtDÅ̃hFa5s…٥%G9PYE*^jĊ?ӽ譆UtaF)@|kPXF-=h&*R#4YL0a%\{/l;e/btOol؞nSJhҦS ! } n7)ee ֋1,y$~ /6qPW vm\Mo=7A T2MM;3$K šǍiXAKt/ZFPIb6m{ڏOZ__- ]POFH55@y~=4#6|zI_eOVſ벮y%vmgW7  6,f}[zKMX+:~7H>*,N:~7(t5J~vIgfC@^0>/ߍ) ٠p(wn,iS# /IRRz9E !H{OND&N6œS?´ Źg7 MW!,c.+VĜ`Dž(} zYqEFDB|`N[Ӷ j:xƘp,M7w_˦3wl"단*S fʭOF5p+f“ Ho9s1P'g+7ijdR'ǂ5e)(&?T]?YlƩL/ݪ>}Y"أV#[F%E(쫙x,6mX|gQ>Хϒ}e\J[}D`E]DA^"DOxjdCHEk*8Nu9M,2{A&acߪl]$#"t0kExLQ,4<BBIX|A`qӆQH,UvhJ/ͽ>52)b k{ ɠ%9LUTtu7/K]HvF9IbطE6GL2CǢwnӻ_9ז)QVM +t#០Q,d\Ma`_3VbnՍ|{+*[xL=Pq }EI,B?GXHƵ5[p,7?7S4VM;j*>3r?ݚl#:[ueYP<;WBs~6Y[7r~3~>6pˣl1ygtm[7"$Q(;;w•fz[K $e#ar,uK+>0N'I;@)F̘g}4Gn–.tK ^,֋}<نCNq;*-uno\NPBݶ㐖W7̀#J 5ö>"ߑ*եXB.#b|rkhr9̣2s'H{5>꺙G\!V57<3]?G8Y"7T)Egh€jS42{u )}i~oVZQ##[m(K/qzڍS]4c6ӑ_ s_f:EdQNlK,PS,®a鳤= ޛ*hQHOpVEQԗ71A .ng茳YGipH?vow)aKr-~cUOт Ԝ!|oͮf[VATL^W S`&&1Yrխ'3'Χaz_ V?GdÑff%**AXiWwxtI?D%mcC;׷NMF$la'R&v`BZXc'yUC~ų8m27ZQlAW,ܰ$fƜi*I;* 0 )=H{D3Ga1IDBȂ* +0GJ,r={RP :vI#D-TFuL\㠊Kݷ3^aD˚wCWtQxSyR`>;>3Bd? 48G#q@l^7iAaBLyE]UK5$еƹ}>A@/RDrbTSyaZ L̥8&o5LŮVPWВ]τ/O).Rl02s%#}:[aϷ3Хt;Zd*aӔtfpQY(=jZ=evk<ПlZ5zzhHS +^rM?h+")"3'gA;+3 +-%$xټ+0b;7Z{8xSJOO[;\67Ŵю!Ej4nk +;k#ZIᡍTLk\^m̺~C3>*{d+ l Gi@AKYW-[?U2 D== }Ue҆Ȇ2)l.'\NLʍ·b1E? +r ֳCdAHkqm1Nun]CAS9#:2q~87~;jo-w;dvFQu:DٌH;8|lC?ki-tTtnqn7B> R%J2%ّV>j?}(o$ +4Y3g psF^D&V8KUFqb8s#ܡf9F/wr1HU;!Zǹlro3XcIe[!Ae&>,e`RjR|{{]p 0%uh2aFUܷ +aY +PO1.4?* ԗ)f) +aO]U'gT6>Y,zM%/ =hpKf-ƎJ"[3$Ʒ{MȎ&șh4!C@r_͖@@IY &)qLFF_){U)M \T}Bk0^)m !CZ1p jE?|c +ex?Z5&.PQSf7"UYpcnB*I^˅vk_)xxm6bB&zuh_[YF5ō>' w21X^\䫔!hLN/9 % +s) i)(5nGSk#HP_91K]sjϖ9у2LPĶ F-&)xX Z[ZJ(Q&[pxz7"GtƨG3 Ji[-3;N+ktaSצ +IdwDĭ#݉3?QV-.>&<Ɏ/0*6뼼n]n/%Jbپ,6~묋t΅Eސ.QS?9 |.2I?_Ug܄E/aTB?q +$0k,c +5,;g礟Kֿ<:q0Gւ-¤QĎ`;Ϧ ñ.#{?Jrg^-{|v8-X~T$[E,KtuTJi! l啸bլ N%*%' ǫh-ū .w݀ HX B-X䬗ֹ߻n?1ƀ/^d[:?4Ch9*̽kf,԰/@UqjS%&\*uwVo1TͨB%4Z~&%6#lʽFD$S9>hmyT +*“h j(tʛH"۪eL)#RJFYRP`^`^ P Y=dNFf{ +PN2˷M:Ekb|-/_p#wRYkM&k4(_̑p$'JӥV@\mo{\`[Ȩ&Bye$v|VdLџt_;rD?~8dr~YE-M(մJ8|$Jߨ{5U7#BYǣ 7aؚWKo"f4k$.G-LkѼbbO{:qt0WAd\mJ2D^ ;zRTMMET:$H~d)_ I\L4`E`R;*b_ rYi4R2ac<7Y]G ~ +jF<ݩ_lU2igTRRv|mT< \$̔3Z&%Z+13g CaFz,(O$62&<Ċm {ã$άR tnΰ1E#zUR}:\FFE8ݣqO˘Ҁ^0&Ŷf L r{j͕2GuPfa,"fXʵ\6Ӆ]pTtj+· 4? P]QT *qkBەk#_o$p/KqB ZM` +wrrķ䔐 +9-\cMkܴqojKh =zAպe ž|<)a$J%v@ +'8g\L.f&>_c3#K[Q]/KEy-*7WhQ[w03xֱ7Ž]("Zil?(;"O @mŘf<򸷝؋)ϫu"(\e-6E崍9x5V(sxtI +Z̶Z>0hNu#9T{ J ++p!<2x;X:QD;C)0,&0]!wTI:,qPGw:8fnR#}&e57غ/HqU=$lZߕꢞ-/QH:/թ\u<%qBH@MۆmKpv#L5bmAKЀ㛾o66J%K饫%CS҅^U+QYQ6m5dpun@䯃8UU85a,OaɌP +ݯF'>w.O~YwP2P04˫giw9aQLF!4I;v>}/̅0cFp:%_ոPhO^!tHo?" \]X{@6_v5$JtoQSӊ^>C E2 t+&䗼K5Q[b7C'p0S._,#.z@bf꩞7Rx6>R4Փ,tf.B/jB@ )QM wUS,Q@zjÊP1~Xdgy0矛MWvB)_/- N& +endstream +endobj +857 0 obj +<< /Filter /FlateDecode /Length1 1367 /Length2 27845 /Length3 0 /Length 28805 >> +stream +xڌt߳6Ll;ضmNlwl۞ضmgbNlMls ϽkݻzwW=Uj~ɉTM퍁v. L<Qyef+ ,9 _3,:ގ:\Č\26fV33'N<1#7KS<@ K.jdingzPP9,MF.@?;TM,.Łݝ֙\nbP:܀(ja] c49p3:lP(:{6fNo_,621u03Yr ..t#;ӿF6܌,m !`s6qtpqfpEƿ9eq;SQ{[[3_Y:M'?wwafigjWjv@i!Lc3ؙ8@GĂt`0  pqrzߎ\23L-M\@sK; 4ggN?c0'?2eҠ{Y,n..';?(Y[Jۙ))WnΟ_mP3Tq]&v&?_w]n翀?uuy?*P ?Zoۙ1Z:KXzM,]L,!?vDfciTwV331/eX90opsKq;{ӿ0rr2eC$vv7)=0OO{3{'ؿ&vmG]O[@vmބ7Ī1^ϝpP#{ͩ*.;hp8գ:ѻyG3Txg׮>o s]#3M0BG>>2\JhCMÕaˇ_8d*cԢu sI ] iPo<-|$^İxkľ,yVeq!&D9IZ./vL[ڵMf9:TP~v:ӧ fG)h[Fɖs)!}aLhhĮ }uEѨIlDd7NDQth>;js'zK;78AGOyv{OUM1[},ޣ)gS[} Δ<18ZoM9; pcHS3;X[F ᨳNa#+*?jQab_w\1%gwyĶPwe"b? +5j_31T +$i*]/IJs7v@\I|zF*qr;va X1y6~:v -d7syHǪ2j[&3%GH/tϝ99^_A̭5:9ArYŮC8k!7Cʷ$" +wR>fiREC]CcO;V.aϟI7l=(_(X8 ̖-"fސb,Wv)@U#6gcygiL,K'd3N87S`f i-R}F~D],4:#!d*h|=1'r> 8h^fz-\8ViȍpjB]£hsZWx|ۯnFËR2I Oү9B^!.x;,?zp/}&G۔oXIeV1 ָx[U=]v 8a~RЭd!>z+#P6s F*']>){[ Ϛ 7sT1-My] +{BZ +fpt8ZuѡC$"v +1E Z<0ғ-9jόu.kh]T봆! .W.wIVsn8M {)w$G>toZѣV yvrq 7I<-OB y  lU2!"&Ȫ0`͙0&mBvZ\;rq sƪ!"|pK8en] %(ANMKG|iIx0L9e4QoD`W^!fumMChT;7+E6V20lD,e | K=VK2r1G60wwq6K66q.i^Nv5'Jtɫ)Zmm4>R)Q.HMl!H} *A#hMm;dZ3T%AS[NĨ(H_per*Z:W o +d#j Nx:_0mAc/sGUf.CA_JGZP + +‘ljzƼ 1rhEDF,rS@e #n9q"|kȱG~E=dԉI + +PTva JxY)8LT^ulvDz d#޼vJ 9Ǎ9E6t"nd'm2R8QԎl*5 =8@d +Fޭ=*+Y+_ dگp7ۭ&,Ǘytj~%QߐMEGt40S^`$J"DtstW~΄_0Qpr*{>,I1vVnѓ#@Rku[/fPEYU büֻhC'Zߖ)g iQΜ$I {5+LRLX%けYqp@y0Xe<Dm45Żr5f9ZJxYH%C[KkJW&b>6LScjeO uPA 4&/G^MwYm{ +"RHM7XĚLͮ{F_gϩK _n!'Wȣz"Ib,;|@>[Hr/P_{g'8DPKdD:|.H$븞Qkd8iQ;z 0I{x3/FoS_q$37hwpi^\ 4ͰGpOjk_ wx0]oҴAt%B/̕ 4f FWV-74=:OTPSt1 + =v3fTʜZT}\2v4T@[gd} @cNb%rVuOݤ>|EBkX6."a"8'~rRτ +( GBa02蠘EXn]%xI*-ʗǖ ~yd/yi$Ix{RQ8-1̒yORQ{ZHݧSU*ѐUMD7ةdw_1h‚iٺ<1 rbgEtڻa PS"nK}*8`LPbky51T \` o#rO~51Z69;v/[`ka4 +ψ &Q9ER\,3(ww+KY *cXBH=Q Csz}9ą +[%kFWLfq6q?`bNDxn P/"/:I8Aߛh&LU[mm򁷟S]b5hHC8zep* ez[`vC~ }0ʡSL8#Ȁ{'oH:OpR_dy_|4q.4RՓ6oSj2<־€B%"W7NZ _]UMe2TQЙY[ SY# ##69prKCa5Ӌ ;3F+wuX?u+8'{J:iՌHR]/crPHN)ʤIOZ 2Hrϥoڝ}bQ[g2㫉iEM`o=D t݉o.Q+oL2N9/6Fu:&4l ]nf_\yN\LLDC\(r\tEx?d,;Sg#E?q\8:9KOhF.%SD;ZmcaxVX˿NuUlV~V"2guG);5_r| +K/i%PSc*wI)Ä9蓩,bG[?{d6:HR~W` 9u+':X XxSC<[YUðOҴDڑ# D=x3v,JN +ҝzQ(t\׋&}m  Rj\h[wPJy xGKx7.Mb{vJ6e 3_' Rt1uP]Z3>%9?}I!PɰN1j>z"(QƬnC4wY$7\TN#SDKed?CB=}ى]jz~Rjt+_wUs 4, _*N3O &o|ׅܔ#cpH d2a4Yo!G3[H-er?[lzi3 TY ucBSͪ`J` ?T+x5rlZ/-D2jVgWm>Y2^GԤ2{liFcwjgtޫ?Džߘ?a> +/BhݝU Kn{N&9GN䳖sbjx(daAU"R~tC;+CٺIPz菺"C"eȏjx8bhLͦnJ$3o6㜿YQlNUf"}Dq:j(@6 u!gG{׳5jva=ApbDKS:OudKSٶ0ЏrdW7\EJwJPG_3q,TU+3y<[)If.0Kٳ/[uF(P\u"; +<(>HUQ%b9\NROR0y̐el>u5V;fcF%v;5wɖPd&@QDИx[ӮҎ8io+M KT gҠ)QՁ)Ԇ찜&aWĦ\f? +S~XLk;2yzޓF@)`tZRH밧"!lǢpxZ)8|L( ++1HV n<(RtgvI_aNy:,zr7fM q}o0P2RY229.bkЃX% ,ibKGowf_'ANbmCsW<9I_NPEqF zT+qE3Lo/s I7?5.,R$6c$zȪDm#B"0T?;-"$oMI%ߩ"2b.Q=L =ބ< e}K\}ta zDjͩ<MA+f/lӔ"5Y|{i?N ,VJUWC|IZL_*:(j3,?[]_v8qY4^ot>PSn4T[O~އ/mڿ#3=MB L(՟ =Kq#-oZV2WhbK58xϳ*dsW0*CK/sQ\q>dPsQ5 )]Pz-`*Z?ƥBBtNQnkڱTeZ$pI3MOBq( 3s9dV)+Y +"C ?ɼ~!Wa I?JF,Dy\W%]ITѭBlsѶQ.ث]{ȞϞ{Ax]7^ԁib>=;Cw|&kAy!{I]-ۢ/-%px¥2(e +CT!(48nE-F͝-8=mUQJbvnDƗJzB_6u?O[[UO'BVBn8hZ~_DVq  j0EA I60ʊ}c #-S9ʴ[9ьtD)'}]PB8Q[x{~@_<̖_}㢀@lٌV4)LJ+nJo|;tF$`UWFYA pTkVhY??m`z#z2O{0FҦWleĞ5Bzuui<3Fvr> 5XەU[Qp,@rjck|wBgy#⇔Po䧛vQyQ0t |ٖvσ;`Z3l~EVR<S}I^NBy["xX汩7`¶hZM%Hp~f~ ^ b*KQAx1$FP+";7-d7'( A6RM앧D-ĉ]-rBƄT&|.r.M|x eꗑC%][@|SsGlFGT\Zg֎LM$`(%>B%Uf5fS_|}?[݀,f~_Ho)@.O/cf&4#="l æ|Ɠ/-.d' 6?[!ĺ73Bٕ*9[?VIOhn l_ #'0Sh.nKu(0 YV)>} ~Z#z*lm,qM + +>QҭkeRZHutq67`BE;pV$R8F֜&kDŽKkKO0aǴG6ϢNBח!z\OL$]Ptzk^>d2Rl"UeE2J|,D~ZН%}eGSU.]8d4\opfI">+LG`L, 9V$\$|Akj3(<.OHΑJchKP/zuͅAX+ŬwI|,nǹa3l4Q  +G6ꁓk n,+F'JHQ깨$hNj a{u!'^pîW_mupk|OMH- 4ڗ-~pv̥:0+^!n6Zvu \@߲lsԦwr.ys. 2Ĝ',PPAzf[)a|#OohyZ `-Y10+µ)ͤ|lѤ'@SXIQfaij_ ߼!&^{}n A}kOb)VI͕M=-'Zi$>@0N[TD#mvÌ_"TΌJCDv$HRUv#;YyB/Q򐪔k@s2m 4nmpz;lo B\U _kq!]DJ=h].Ys$ NٵQVo/'>+!aHެ0ReCЮi¢tJBU9jMcXa!ሯ ጖4ލB4[M9UplUjmxZ9tl"Đͪ$ԄYmUw> /o> gqg-″sHAr9Wl}<,MݵRɧ~!ЄýX\)d uڮضr|6hg~bS9-q݅JO~+I=8W8^ t>OD\(s_L8Yᕥ "TQq$koҪ>v'3or^IzbN#<'|5wpfBm2髢#2z72.[p?Y wiT"gVy7!ꡚ>"k -+E9cmjo%zoY}- yunNKG@ +^G#UypLjl]?C!A1ty<, ۿJͯDp?~sC}>l l@@9]Eo9b])`y#(~2NO GlL!'yV.3}@K&qHһ0[;CƓKPH01K2]ɤ9.dG*Ÿ%O"-@\uۦL}=U3Y#:|b/D7?$Y6惩~pg('U$Rv\]E|3ud׌0!I/j~ +vz*~ 7z(LN[b*3'-xN};IqKvbd}*D>!㔥 MAo\,lXMv]f6ÂLeQXeцQ-Bkv#HO +ڛ+m'^B_yt[TK%+@U̦fk%֤l80` VhT׾^" 1JqhTzZR)(&5]?,%Kz.2ڛ˛k +gUuD@@U4PgbN =CՋXD]K Y/A +L-ũ&tݗ;bO|oF[~0XcYiRg$ΈPNoo$Q"W8[K *K`h[Q;^.25u/S C-) +7Bgh!oJxny#e5B1H}>*$x9x;@ká.yN@#.6ȃ8dur"i-3f=(1N{k' +b3N}K; KoTUsHc`wSBhubOt]f䘠0НK 5m=tgx~9W<CHwgo*+<|8Lʹv) +w0~Ok(:) +Clu@te k-?6 ,j}#{&yD`%I6^CӠƜ|IX3&)k4E(ٯD4ܙ<,<lg[8.te8=u0Zs';}v]Rokh'%#U<߲龹.x Lɬ '1huX̶7InPqijDxJ+"WlqZDj|%GaKm XW>#1T 5ET>b5R˥P~~M:^ Ji.Is>S! +owf71܋&-;ϡqv(ilr88EsY4Ua'O!EW). Ry&=爧z3Ad\?;r=g~:4FO֧0Sy;.)0av]_1JjW<>յ=AU !U"S ZŌQ %co Бd+L8]~P{L(R:.$Aq|v|TaJ BiNI8v@zZx&a2uqEAg5D5>Za7lnU0r,,3>l/'K=!;kVKF]ws3_y

߾"_v=(l@{!k73X$8 +[_bjK j4$JMې<3p MVF /K">7#~osd'γ#JY jp,/W{b6YB~h\df(uPPS~x/1'MyMq_ܭqY <2)BR%!lHNTrpzZQ{!-hYY-||c5kk.4q8gY>9a] ||-UǶ15>RM$ ^wʩfL>/N-,hH;3QΖA/Jc~(Y,|D&Q&~-7p>qqZ#DPwك?4_,FnEHW.RQRf58@Y;Uq+,^ T:vd_]cݔϨ>%wj|?@LeZu u2Cf~GnTGm_֔͸|m :3nvN4 ܸ_*Wu Jjk]~`Whh择4NTd,筁X< +^EcT[eO:F-*zRS@N]?ڧl,(IBpef7%)QVfp6~͹C® xQl`{} \ ja`H3amdw%T:4Cu@Ƽ:oE; 5f\Au&MVQIcUP2k|C7Ë2cm򽖐/{RIMҷ-TRA:c5By$> +Dn]StEyy{Y#mewc![eaDG Y)[_r`)ҫu|)0wH:F'u9f.uR`"'أʖa~E X3t}8 AC'vKEG:F6oԉ.ǣ$)XJ|I `b'-{)UEm8b$DĐץ9KIQoE// ++S5"*8W JYyukƔ_x9(ΐgqǬ^>PM7ṑXui%{G{w8MoRްdc(uw, ')D8 +VL2`}Ow\!R gGWr=/4^"ۅoM8bn7|5,4tVGd4T3ta{Y9j'?8OzV8zH +DSX +)[098Wpn; _9)ienbw kYeE XPKQxҜݸE4<, tc0h!T +ry̏9Cy1?^p#3nڍ2˾ؾm(z$_z&9kʚ92&sq䝖MQ:rסvgc13i@˲hLܓӮ=M"1Њ%Y:`>fY8 x%vq|M6g >$ջmDejYԋ +aaQ҆tG/VTzЈy`w6HO]IԄo5:JNIb"c 1X=WH|'l(M#Pb柆\RnBܾPVYyKgح?ypFpS`j"VswBqp(b}OALC=&)-+̗twʥPs*SPHX+G +\V`݆*'R6 %OS.YBNJ\?6y@j1[D7 EKc놵b4cRd0ۉpabzGBr`2 ~ +Q!҇tci 12ݲÙ1t8'E>]ņ1B uX2UYBI0J]EIP'8aԤ +LrNSo`ʅjZI`> %y(3AqJYW gl(>;}SًiDp%;J<գS 慯 ^2x8Wߌ(=+vrc"')mnDE9Q۫LmhM4K{P_P:t?AbaNWA~R-[h/2g$l-Ĕa4vț( L4{Q/BwVnK,o<;5 iT8A`jDWYMȘvh7j.M<} )SP5PRA|uu;2⎠RCO=#TdM+^,E{wt,F@bM3:z]HYҔ.>m*J%2O>R* +rh[pTM@r>鄡Y^FRiBۛ0)]/+|O3&tB/]ƃۯ6ax=SQcZk,:ˀ4' AA[Գye`([+UMBׯ4g88U(I hIm,qbw>I+ϚPWSeYI'/Dat83yA ?gsgL,-~bxzUm!k\bm;Ac:HKn +i6슛붺gܥՕdb=Z)/%e݇x1q% jFR` :%<ٸ[t2ZO3)-[xL݃wsW^!MѼguc$ꚜy95ܻÃ2/@ȥe`J0A.G߳mdzJ{|ߤNRmnŏ?З{Gk. +Xk) <~) 49_h0s1Ɯ"vޗd' *nGJBO:8{dvl;~ ә+Eb;|l,半dBHpy:X Am)W"T={sӨPXxa +N Y@QbOb &7WXec v ,w{HpXB I*tX }-瓹Ukq\*]w((Y"HB9ݥ#l 'vxiB6"D +Cm8\nZ" w贪,UPxi"'x(m߾j+HU'9ok8L"֒ ĝRK8pZj=&u96V|qG䱠G>²xG@_:YA^꘸aH~G3r'Z2+UŝxMs nh)8gIAl*yC9)%.k]C^` *B40"wŭ Z15.6qxzn&ŏ (+MW8%e]7R2A/Ez=@w&G$S<Ҹz7<}"Gb*n(nXl9s ڊQh`ГCx.l?ݻ?-- {CSysWpdCʭo~? uP{u"O +&0+6AԾ rABV9:Lp4WŁZ9SQ2kh۪ѰKAxg4F̲cYz ammOcnR5>]Ki:Cm󘛴H7q*4fO]Ť ϼ<$Na“i: V?´_7My;2%AȻCki_굸=z F+h>z|؞sM>I4 +4Tzy0r3U#AnV֣S7c;\tI 6bڅ0~FݞiD2he"qSf`vSv9 ̽Bdݥ3CgM7 ?@]FE˜@Fa(!_|K,tv(I&y +-dR}{ "öJqK:fOʄOBKH:[) e /%TGt:VB:a|^pp]xN:LaEm H `'R w7OgLNMx2ewJS1&Y~>W֌UgQk;?Du-\uޫg0g:K؍ͧB|/=WtY!rť:r|4.Cٕag!C+jlmG[qEp2}$-t>04Ҿ֛8ПV',k =}x,08yV 6n%EΪ0& $TOؒ@Wu脎G [X91gBw~xW{rMbEŝ7){*\{ +Ou>#|r` +FN:WvnXYN2b(7Kٚ.r}̱)}$%Fvh= +$580v ~VRLʢ ٨p+| m3L~գO}UŻj@b=m F'ΎМ"^p^6`)>ޔlAq@;hǑMX3|ǃo3NQ42kG˩,©o[XS0s/ EX@)!WB L\nf u$Yg=+Mg~$جnn-W!Z_vRQ5OI~B5i*@ec3C9u9']w9AAt@YdH=4Q#1  9,>e"%EcH=]=r[a19p\ yA#԰vZ@@*d%BeXRMZ?%)`J\1Nfj"J$1lQy TxG%G0fXe)aW SvٲMf9X] ) mzROr?rgtbCTS vU%}>ABh,)H8 fP4/n3}奢fiOfX+m]cw /ࢇPQa$u !wˀc@z>%aQY&,= W3`dXEOLN?_Y~vW p꫘Bt[p ,! /V^gTr±{ AB!~w$%>*яe.@əy0kU%v ~y,}FfUO^:5s7(ͅuVj#fᓷK'&twz21%}^oi}ul-zQP}&3c{g汑ۺ;7vm}>EbÞ73JuS>9oS%1tSyI+:/=n!uyn N׋`[xth6ɧ]{]tg?,Q6)`mK2u7?i?W y{c+& up 2EF0:"{H!-f!at<.rTNڔ#EQ<#Dte +=#ˍVFkǣE/H'ܿ, +"M=.Y X.KԦKGMr9/q>MKonnjwnd#Uc\p}!C +JAZuqT`6<!Os +O39ӡ pE&j8*5UؕDL 64g` ڷB߆ m + +;T\ y&"tҗ$[oA(W`ʢ0=&\^=*)1W8ڐZ +-Qz5E:r+Q\(Hr']P/gSLWZaTb(Da5~&Re.Do\p;@s)==Ā嘄OL{s𑮁qoa˸jRjR f#,U,l$IVlp?֜DVIgy@VPw&P- d:t9!nn~9HYI5P,8%/kљ֪eU&_id4x+I]FW=})Y\љSŵ{J$ff$ 5"-\.DEx2&VS@:f]$i| +F`*3,Ӿhce4oNj+1%\!|Qm:SK-n?ELo/`IH8y.ybMWEYg0l9Նx +=q#O_MJĐ?>l( Bԟ}_[?#x7iγ +GijzKؤq¦YuH\TZ6/4{~*'$CJn|SSPs/`-m%HPs_2}KQnM@#:B)JEu^DaS\ne_Nh̚ o[02Gw@1GVl=9YQ$"R8ؕ u k[mOݞsOoؘ uˊQhqVe.$LXVJ<8i\ɨLw,) }>hluvgtLdM8ah +=<2(sNW㘮m:#nJ&D\,=Y/` `I*"}⟷7!rv@I[2iU+ M.A&^3BJ*h k[؆Vg2Ir2(Iy"VJFjɯG_ْ@F%> Z|$7>!/bxlbX~Yr_\`bhxǃYMַr_6dTE*[E[FYg^Tmu1fnՒή{5 kr{tW@FڋgkEBfN?c9`Ge[ "ȌҨ5d;M,B$]"GV/~F1CuhjFezwĦFMx&y^ ȌBkCWחm"mNT<Ę\襂;"jG'YfkEKj(Ȳm8}.iGR |# - C%olj(gI4rwQt_:;;pǻ~gx[93@0wnʍz8` +neprkm|crKۜiy:U9)Reݠ;!]2:-kD{|"@qZp?wj7%‚=\c&>g@jW{܇N}TaQ WJ}/6}Ʈ<ߧѲ7 by)709$A{ nħ\"0"CHFj;Luؕ?`;(Dh6݉^ +3S9q7'(WAN pOaBYzy_pz,OwaJ-{Y ^!vJǴ1,Ω |]F[g6LkΌF7fFl-e^j*S[[j#4KǦ~p@]t;w1^]];iFR.Ū5_Oh+\Lm+3AZ{>6.h "xҒ#82ĖU:O uWS]7ρGm, +ܡJPkjjBēJ[ 2w5Dbnh4_f9ӛHW‡;al-#S+-J@]a|Rw/HX'ktҵBc/ 孽8 jN~2Z|„0cxJ\xնh-d/\< % +endstream +endobj +858 0 obj +<< /Filter /FlateDecode /Length1 1360 /Length2 27857 /Length3 0 /Length 28815 >> +stream +xڌwP$ʲ%:4N:0; s}}7b7:*OʓJf&@ {Wzf& +\\b`@c?61c?n7[3+ _<1cw+3<@G.leaZLܜtVycWKݟMmV@Wtuuad`0sappxXZT.@gwr +v c#YZcVu0w0vlL.̀΀?gT@qj M迈665us4[r tc{m][q;qc2O}Յ +sfvv@{Wru/ƿjca܀bc +`gbbf@OSKƿռ0SpuvfV=ܿtGx>-3{[\F)i%m)ڿ oHDCgag03q28,E_Y0;VOnvW5ԀRpX cbg2-Co!(_Yz W7?ڗw3Uϸͬ'*jg-l\$||T! V1OXHoCJOCv퉼x;I$O wZ죳Y҇KC~69G#r"SV<=kl{B}|ۀ| +C:2jC*8-zCfS7g bէ-gЭ\cZ QRj@vjS[+g]L6HŏJ}p_+Z1my`y&<_>NE>ISlw|OŇUwLR0Ul*$V/1EEKRJsڎݷ<& $W4]BLK45A4 n3F>$mIP}e=ЂD$w/#TYƦ/fj4Hy.ʵgחfzmyP2ee +/~O q(t K቉A݈e s¸`[.`tɃ+bœ^{V +QfUk@Uk] R!*bT5^W09KD[ C8i:`]GLuMOJjpzJR4J.E^6E}zA>@`gO|!\-u7h*C[eL=o ^ Yx*ڠ[չ~ɬғˬs [W{7- G;f'_: {icjO]u-&-ؘcޥ,!jQ1@:iEը#|LTN + ѣX q7h7¢q5Xjtu#tu%!Lp:`ÒEPM_OFVtٝB`ԮE+Mx,rv/C`41*Ty=ҳMMdK/?'_`0> y'Vur'a_[.dܞH\1ַڡcjCе*f7xO7qwZ0 `{[e,S5~~C8^.eVH{vIDi(}hq:xaÀ.6P }"m"hƼAv81KSʺ-jvzO-˸҈ n +l5sBo'nGFۋXc??wn vbk$'qU5틗qp $/qJ .t}gk5& +ls-rx? HEļf.7LK}O _0Z<1MdXޮ"^  4!ԅ58k<1Ie&AUXJ3F$3)0|;iu0׈uU6-j yyTQU3,ѯl 3̭Lh+lj8 *@b񝼭 +\;|r |ferc u+ +e*6{hP6rN[ݻAœE :o݂L!&afV0?s7 #X8ICKIdY#M|t-Umc8kMbK{n ȵ98CۋktNԖ8m\%{N;h[А0RJ!=r4K2hK(bU3Þ/TDV}8m +mn{?]vp4>u //R^t]m@ld:kT+feH}!񣓚? i!ddV|!z +{W%F(AQ!H8O}9z9H0Hr6' aCs +9BF(Q‰HԖdӬLMQ{О+B2po * +t *TΊyшTfmi8 WPk0TMGA]i+">'݅Bm L Z #y{Zv؋LY7ZWP1f˯u䇔 >#* 4bոtӍ=ghJ#%PIUf=q] +>%Vռy";8!U7}%mvF]瀱/z-u]w]AUaaC&EҊc賛?m6LœxP8}$'Bgm(-TE<Dz/> Joͯ|J ^˥P Mл"g1@Um}/wEȲA! NzuO~ׇ|o -ڒͪ9+'/4#5$-ΖzګMzL; }g]A=2r2t+S?#!EjMo=1&1òE,wשZJ$lۃgn{Ud +I Lvgf?NAmB%Eq6rÅ)@ept@ -s7h{ީi=4[YqPE$[bU1!G^aLXF DgT {ofĥIL)CN{f1ي}Pԕ-fO%0rK 7Rz_m)?'L`>s{P/ _IֈطOg)Fe6!s5-Tؽ n |Q6]y}q̙>qT7cفK!4դ>]vp߫M[!;!18=bMc;:@oP:DVN[uq&.]t5"(=P!DETՉ3||. TE>n6n~L05yI eD_ʅܕsMn{de0j)܆hL˝I jep8Xu'b:?~}4,]N )(\J{7y:rf6*{E@M t(}j+FLypQ?Hi'G}#vھdjFwyeD̮F]w33Pz˵4V$]N/n}#aNd:/(ɻ-#YEOK$ V!i8x,0#T_LFqH5J#-P-,dOm;ZAHLPrngرjQ/hrȩ) )m}+7kg}ִq@oLU .a=Mc0{m>bEۥq4/NC\g/>)7_ wյ)0zW6:EV^97uǢkT( Fe^x?f>jFw})Abyp8Ո㲬s\!߳OJ2:s>2^m\ &V%& m=(!y67wH5omNW4nߓçqIU\(CQHd^[%#mco]O5V)9Q/e9ב!Z6mA4}0&<6ۂ(5Gzry#[}BHjln@9|UB+iMӮB)֧O1>3Rv  p+} +aR{7!}Gzv~t]7r]O0Mm8KZA +LTVOgVM7MAU9u/4%SCp <;jJ`/k7ތ&xM\Hqh8!Ŧ~5H ccRZKQ$FByRכ[)М#W&CoX=* & 15|XW z=@,ʪI9٣`4;q<,nc?aZsG'Y>,j問06VM$hU9Kl+MZ~glŊI^/de6~[_٥V"r5asX[6fiꨟzhjMzejX'n L2ETwI~}gBo0Θ?j( 7\^l͚y ~(kYzE&ĭuR gtU%n\Baxա`79VcT +#2 al.Tmp}6 Ak5xX#a{`Lԣ*6Z6@yK8᠝[hjMg8zIYwuLWqq6Wt-CLس| SJT=5H&n%b[ם\LvV/N_ϷXQ? ths#K#"z얉˄9%K;X*ʭ Q;!'jM ZY~tm)򬥷L:h5P?mbCJCdhC95N7 ?\C[}ssm^[%RTי԰"Zk+| mYyR(ֵ/lk!gU7Ga +w'Ug7 \"YIkmAgÚgJX}p9W g'"{tѳr iyyny0²V6>I S{f"(\vCgӇeV0 > eb Bk mX4/4{*'UP֓˝weaucc9Ek:B&&Ơ.w$o$'MD +9U]֬ n"9ccIt<^U;=rQVBCf0X$2MD~QFrv@]g {/y6]rQk) [k͟E:<z/Mw@Ʀ Í1m +H25l7e +ֺ@pXD^'[jB(P)T'WfZaNR@m!tdI~8pG\w&4T`,D;Ω[vyG˦鏑D+bA*yE`0ÿP>g`8QV +M?oTY3gn82m_7D.Ք,m]%|Ó0mrBMz18QUm%${WPo6JW" 1d,'5T)ozH7y.KREi{)\1?N p7u,~Ǖ]I34;~r8I YkNr}Y<8A ; +mq}'ߞJm]|⍾4XKƂ?,,p1o[1vehK! $mQ)ǫI8 r1b+ 'Y;D4綠ew(6wNS^ T~47udE᨞Er&T76Y>8%/0ioe&k)2VMsHIʠ^wGm*;10"xD~|@9Im-a32ĀΓ13,Z =p *V^*8)ݷE&[$vO\kК@.Ud*x*/ u͹4bm043 + Ò/$i=vA^L)y]XjԬ))xj5 O]2L6a0lK]w8 n`iUŊ1]aeغxOeDx:̈́-[t Bj˄h~ު3giI' 1/4d{`S$.YXo7^Rnjд > +JnVg)O̢ mh[5ak5a-ֿ$NB[1]fy6O@y#orGXh`&;lZP<"~Q2SQ _b/&ҵ?S+*D;+ 4YM,Gd$ކgd"KjҮM CmD"_w艆4z(DCVuFyJsW^(\TB{(> XL܋Ã6Ԃ]U{iҵ!b +ZzbPfE~6)Ru|S\TI,R_i 4䪿~9rؙ T(SB$&iD~lY̋vV|Bo C9āi^Ze,%?Чcc2**MFDkx@ ʐY'rE8ߢCi,q>uA(ֆLot ?ƺ3SˆH6/XEj8 mE!?ώxs )EI>RG\ 1w&!l0]iK[Aom@KpGGl UNVYg(#r22h>Ul%G}{kT}ME=e(*Fae`9\ڰ6Du97eאsJ2"W>t6#Ŕ$o_EPmاn[)Bẉ^e%6eVv4~ 1}t+KZ" +>HF=LoV.\T;9߹cҗ0EBfS*as)+i?}I}q p${kIOtl 9mOC4(EgK>(~a: ͂1|^L[S:ѱ3_/VShiN:>k[YZP:{<~%8! +{Ǣ"es-n9'/IϥndCGԣ*h1t3uœXqESWiN~1;Hsdκ3yg$\LlLd$BFx$])m0cĩ`W帢xx&2rY|ZKcd$hK,3Ֆ:1^CԻ˲otZQH ,fwo2gX3o&~j2J\ r -edmMCbkhK SoCoZzKx;;\&;Bw\U*94,ډpB. /^p2w8\mz0I4od(SPDәy[ʳ;UF=1hN >T +#n>ۮӍW%G+nSez$W=af9aڠMbT eZ2ЦJ) +$8vW ,ŷyvńrO7k4{ZwXb ׮WwS'3z[ \.-dU}9 ;C6p8|ùUφfdZP4;52*!`(EW!vف꓊l 'c*|m`>(:OÎ'A[ȳvzVA#ٻ~S\shK) ) Ϯ'm+^4K9 +$d*9lr۫a*Ě>!^ +M7f|' }=:7l}I<Uh$3RP\ (A[ct*Bh[vUKQҞZ :U+-3zQC+ ZOX\$U{*EAEkdTwYcX10'R7yWkbyk5!ݔI9< )WDs,$hgXV\pW;" @S̆(s, +G@R9b8FMsD0F۹}ۆS?G0Ҫdwq:#%wYcUw-,1h!^yh7 ׸mxGKl삗VéƇW@lz+VÐQ2H|}`iqDFbZK+qr}3[o*;YNgxSUܯ`@Sco^]$%1@i2CYNߗA܄Cܞl%璒VdTl,sg-s&v RMPaU|Fd% ]*4Fd2-ݡ(~듁v-6k-,WݎxKLt$qmZ>Hrz"hW񏠤7]4~lc <_ Ie-|;QZ2g{j-Z<!= JMM2`kᒬqdW%3H }RS>KMcc(֢cAwa C~-BLee.M2U0'e(ɓS vN=S 1;mP^'ջ͢o +^r'v҆¥tlTX.w~jH)]CPJ􆦆[b-ކw!UfYЭBY?LEDxURgtl[>ƝS0 gX\}L񵎊^a4ZN^~wA^z6>S/UB]_e!EA $C< 3* +@hA5t*0d7qr+@@6< -f)!  1dU2wsgUCbğ;B{93.kC']z6L:IY$_XqL.V|[Ěv5L-`tcxz ,kB9։fgX՘LPZ8-,/ݚn2c|N;IP"`t:/\{ ^f6ku?\#61*-m46d̼LyTPM#a' M}\z{&Y'5[Pq&L@CQ?;v]v]H!fߝ1$!v#ˣ+"yR WiB`̹rm+ + cEȴxBܸP,lйI"G|U:FK-xe$ݨւr|+Xz]Fc&LzuMYF }֋Cm0џpu"ADl%Z+5}i:'"Y=X[xfAtaUH| $[? +\7( pѿ\Nf~G4A+ۊ?n)J{"51}"X&))d-2Bݘw7GPpVb0 n+hc۸キhZ.)[qtڃuY#< |m5e*八x. {rj8H᝼x(. ͏A3AM zKqbݹA>l Qۿ%{ݞdXa&DwۄW +h,]^\R3^9#U 8!ԾdUݡDXM&[>)!91iok$̰Б|T3.r GC~-QeZ*!ިpl~&3_%_#]) +=jxL|i<$)fZRy!h}Jf[QM{--U#OؼLxЧ|DҳRxy&fVe$W/zB$& :(K{$'2ᶓ]]7]10oR: `0oq~^ո0\oBY<=~wע=oޝ+ecn\uzgI"P_Q<XxOIr⏒/`nfzAWVeQ!)A+lseɄ%s)E am_ p|L&^gCL+Ͻ ﷼p%1Iq  +|Y_-~҈Ŕ݆C&9'py )[ZxȄ_("|RM-,i?,9R/' RĀJ{E<<6t7(-3c2=0_q8P'L ݎuk{ S=/coĬveaJ +j.ղekhH0%/p5v0ki7yJwc偻l7;JfFۯ^K@DdR'z7`rytS.VʳŶ$pA."l\4x_n%GOD3LD*3^\WW|AAfPC6O5֝+KR$N"2YWD@ފ4(0{)+%F+l̿2|ad_xeI¦iy6B]ojMk UEJu"сF(CZ/9ţg&ϹiN$:!n%Z;W^&vUF^]gf̍p;d` DrZk^ciu}uSCpZsn#.VFK)nYGRh(s; 4p3dvP23'Bb5fgtyP\$`IǤɨJa( 6Dc*a %J0]Ȁ HU=tq|X <'IbXg?ZW=4 裟N#l4n{0yt+ B~$8-= `۝G,)KVʘV)6WRfH:b<BRt65&LPsZ +="}[Ņ`RԨWa\k֚ T ֻ&Tc} E61H@:i"qd/BV"ew?L`'#"Z/_!̵2G6!JHثP}/#DRZX4QFTxtޞcu"<1SPKN'f{9MAШ. |D +|LXgsNvud"=]5Yo!1JTZU Kh  +I­Ua^0UJ$xaӉͬ-`ˀ9!P01BE Rd7c7 X ^.:< &S#RU LP-_sndg|ϖ@ Et.[6I+'r]=UzNP× LߎFJJM 2hTa^UcW c~!NPdcwQ%|g F0cAwQ ?Do=d\l<~4 n=`LAg' 2wf Ɋ󖹉aE@<`f3Jù}WX(e.+bOKC[/гUqC]dlB%Бd_ɥU-.v#YR/:4fϕ.eE_  D74H=U宵C6`BP&Gԉ%P,s3z0~_㙭BaD+4GJ/ KRcG*4ʜ۩Ϭ9r]$޼6>Q\\ݰqy?Gu(}-IkY(26-_9޺H@6K#05@ |%QkF1+'xŘ2cH Z ;Q嘲"Š,O<Ϗ#@ G@ '/A \cnIn3icz_T[)tA=hR8U^=zQUyRqb\l&ޏhI܋;8u~-D) @dSA-Wb!=O'P@-KC Isٓt!QvQBN9FPuIt ,:ۍ}P*!7wwBS}`;'#,#479du}b#z5&Xb=](1bi1Ko~`9,\KP:e$Iϼ",dSolZqϖ([ `KĶaFMZ9Ӧ~:vRqۆ-|\ko\`h,=@1 P1.*rXchQ6ȀuȠ#!-Oq6tu\)J6q+6R&A(J)J!hK0ڂ\b͉-kѢ/p]Lž| uS[ӎ*'lDPAe|. j `RH'k  w25MMRDj0 oJNHjcD + N]kUF$8!}g4v~VA:|Kwgՙ\0) SP:uuq;9P!4{-YYVn;TyFK<>b`g'ʳH~ku%6EmC "~$Oxf%+4v7G6eTI]7]T;AG 3߀$*wAC٧T|Tu%׷mRed򍗰\yMUT͕S6~BT_b2ۦ D+ji8˘߀,#d[]:d#}+< {`Xz-9+<_ h &)d>mu/Hm~lTtdSwmәX%:{L2[)>[mpuB•^7/$Yp~9E;S5 +tGvN)k"1c%b=ښsW(6àz^,,Ltd!2H ֍A5\sCi : 2[Chg\`]+X°KK$[!?U!7`}*yNAJ +٭(7r߶9(2:67 JX/ZA +o“K}_ }"#Tϭvk7SQUmUQ~n>Ơ (ߎz`Vge;lM.tRTqޒ|Ei˵LTgBqAKAĈߜE)4dM0N*!.+anmko_du0Bz~I?hwk;\_++j^YᰂݕN]48@oΙ;4:]RfMw刧sLʓ*.TD=t'͢p#6}9Cdg (xEŹ}dY\4V_'%g +"ʒ≦mk@B3)`%q܉HCh }*挦U 8g Ð0-!FgAZ&n6 v7y؋ nY|HaNf8F>%ϿKLzjDꢈ MvF2:ⴌfRT-kaT'xk ^=Rum ٺ ~Z3 p#~D8t3\7+)ǺlgO-J? Di\-[pI̱3?z"SVGQsY1CX5*Z`9/.w¹R<'Nc҉L׶>(g7ǘ'MHƁ(64* W{4dF#k\@Y'\˺r$4:F'ϣ l96o$l?eR$⬵\""d?_wtZ,X +7 +*]5}L\6 a+׷tU`33|GK` @ K_CӖw>,HmFtʒpHC>[Rz^n|n87@=Ė-Z BX.;!I J tHQ`u)?a"Hԉ4]'ٱv0Oob2Og$XFA 1ih}nЊ$s Ϙ;S|&!dlxŭ9ֹ}-3L9(V}GaXݾRfJrq+U~YvUn5Oo@\Nb:fr+BVQ ,kV߉t>:czHd1ȼw@Z SyP"-0S%|+ހkuq\}  +w3oJ"+!9-0]%73MTԚ5Zk/Ź{1M!t1 Xvpu +A>Ի(m,F'+ފYׄ "7]9RuӢ !z|c{E.Z Ɋ[.C; -2^EcuazB5xv,Z x~僇|h_V|]l Zq%Xkk\q&eƴXqT#OٮXk3Et3Eᡉ}{dd|#i7$?h:XG]XA 燘84c]2P[oj`|nIY@K '(Q(2JBȾ:aG˓kAd[6 s"vi@vMdO)&0yпv\ݒa`TYݚ7+Ǡue/ODX[e!?yThW|v=|'orey5S뒔$ n(sp`IiTPJ(TY7)`O|i7B\SFԞ"0V>>+>`U_oep _,&Djls*1ΫE Mkײ6C< H\mzS*#"3Zw0y-$/5cu'Y͉-T̪ P>ڌIL|pW26q +]YL:0J# +>b { SG:" +tYKk fz`O$7%g'Э( .>!Za14t]o>~I(YDYrNo|* +L.CicAGć=tѢGiR15 /.rxn-,EtRd/^Q + i5'_sFlc&KBdjǽvF`cц;4UL|< 6NS:fNw1nRfH`Fګs$fO ڀjyunyGsrЀ|p$=T5D +20H >lnڃfKoI.XSG߲]E|Gy#F괯LoZI 7`|ʏ!6d9Yi}שO\k$v}u +iPɣnE({Nέiuy;'zX цLf 7{ř~ ;fE <`V q`e؉ fN߲LpPȩ Z#2u Ql"Kz.hX+K} 5y:hVct1'ڦƃ/xWczV8 =Yt +Ol40+cQI+FXOve +T,vJ(I*I6M"Ozu+4$t+Oz)WECB9]徦yT\-xA77 +1ӡJ j;akMCBZDM!{)l s, !~6 :S~m_$M^l&Oj䘂YSeSWPCW._+H k麙 A8 n/ #,ߠ7I\J[IY8gIl). U )I fR]%RiD{ +fvx+/>(ɢUl-`G*aҞz.ak.|;6SYy&'f__HmsEidR5X;|ӶP. ki}T5UߢMMȂS"]%n3b2Q:MQQ&]t|b|o"uߪ="mjvjiuA4("5j)VXGGѡaL҅;=;wjʴd6HfM-+Ū!Q뭠Ӕ.g+],{jT0Qo{_Piψ'wJJU5o +,|DyWyGj{@e26LҠU]JԀ%ϖSԂLʫntt?/arv8i +QJu%SoPf6jId+4EܡZH ]}U$fl/;W(IANuRI=>O.a1+<Cm⤚V_Qs4rCƎthQ>":$z57bbųA*eZ}~ L0)-JB>Cv[e?ȲOg>O +Y+S~gAKN?le=lb&Xy >’:WjęI8.:D^fթ2,[vwZ~$% Q 6쯇 cu8 X!Vc6g_M^l@AXXQhp d4@:~6;%%0YtsU~X9a~of@ɔ#m.a `xIdp-^^,,x64/M|s#4 us+Y^E `[g_^_ UAz_5, Y֓g#ʀFh|IΧlP/rR2j$w)8̰3T~ /ͻS<0XJW^S%UI3Z!7v5k>M{8s!1Ý ԋMQRb3SQ؆v_p&^H9y,¤~}WuP DRŶJFi߫ 1d#ڦ^V4.YܩٸI>$0hhWJ퐐3iƚi?\4/04tzEjU?钠Vk!JE6/oNX.0P10XrWK5mnk~QqqZ +g"0'JP5qx 3]GVVӂZTсY`$u0Aܪ*jTJ 14,7%Ox6e1 xj~-c;7)zb$ R5bu uidlmo`{C'j6erIggwxݣх87C"9CIp2G$C:a,1 k +<@-1PηK5EcI/>dym^3Zݘpb4M4ȳ'pO~PT$HDz-aؽ!\[X+bc/'B6u+? PCB9yvA`\ϯ2ڿ9ajiLrr+~߆uf4l!l#!B__XCG&ۭ8a//M侷ꂓGLZ+Z,q jH۲kK_i,Ϡ;9 +endstream +endobj +859 0 obj +<< /D (subsection.6.2) /S /GoTo >> +endobj +860 0 obj + +endobj +861 0 obj +<< /D (subsection.6.3) /S /GoTo >> +endobj +862 0 obj + +endobj +xref +0 863 +0000000000 65535 f +0000000015 00000 n +0000000208 00000 n +0000000876 00000 n +0000014395 00000 n +0000014429 00000 n +0000014477 00000 n +0000014551 00000 n +0000014626 00000 n +0000014735 00000 n +0000014986 00000 n +0000015059 00000 n +0000015132 00000 n +0000015241 00000 n +0000015351 00000 n +0000015440 00000 n +0000015559 00000 n +0000015695 00000 n +0000015807 00000 n +0000015925 00000 n +0000016020 00000 n +0000016182 00000 n +0000016361 00000 n +0000016534 00000 n +0000016709 00000 n +0000016889 00000 n +0000017067 00000 n +0000017240 00000 n +0000017413 00000 n +0000017587 00000 n +0000017759 00000 n +0000017932 00000 n +0000018103 00000 n +0000018289 00000 n +0000018449 00000 n +0000018610 00000 n +0000018783 00000 n +0000018938 00000 n +0000019150 00000 n +0000024775 00000 n +0000024997 00000 n +0000025044 00000 n +0000025131 00000 n +0000025186 00000 n +0000025234 00000 n +0000025323 00000 n +0000025386 00000 n +0000025670 00000 n +0000026002 00000 n +0000026254 00000 n +0000026434 00000 n +0000026638 00000 n +0000027002 00000 n +0000027230 00000 n +0000027474 00000 n +0000027718 00000 n +0000027986 00000 n +0000028238 00000 n +0000028586 00000 n +0000028950 00000 n +0000029106 00000 n +0000029309 00000 n +0000029512 00000 n +0000029711 00000 n +0000029911 00000 n +0000030096 00000 n +0000030368 00000 n +0000030641 00000 n +0000030917 00000 n +0000031163 00000 n +0000031395 00000 n +0000031713 00000 n +0000031986 00000 n +0000032223 00000 n +0000032483 00000 n +0000032838 00000 n +0000033140 00000 n +0000033441 00000 n +0000033699 00000 n +0000033952 00000 n +0000034177 00000 n +0000034350 00000 n +0000034515 00000 n +0000034702 00000 n +0000034893 00000 n +0000035097 00000 n +0000035326 00000 n +0000035532 00000 n +0000035587 00000 n +0000035609 00000 n +0000035797 00000 n +0000035984 00000 n +0000036144 00000 n +0000036315 00000 n +0000036502 00000 n +0000036690 00000 n +0000036878 00000 n +0000036978 00000 n +0000037000 00000 n +0000037046 00000 n +0000037174 00000 n +0000037254 00000 n +0000037301 00000 n +0000037430 00000 n +0000037502 00000 n +0000037665 00000 n +0000037850 00000 n +0000038024 00000 n +0000038199 00000 n +0000038369 00000 n +0000038536 00000 n +0000038698 00000 n +0000038858 00000 n +0000039018 00000 n +0000039232 00000 n +0000039394 00000 n +0000039562 00000 n +0000039725 00000 n +0000039887 00000 n +0000040061 00000 n +0000040227 00000 n +0000040395 00000 n +0000040560 00000 n +0000040723 00000 n +0000040886 00000 n +0000047474 00000 n +0000047720 00000 n +0000047890 00000 n +0000048062 00000 n +0000048237 00000 n +0000048399 00000 n +0000048569 00000 n +0000048743 00000 n +0000048923 00000 n +0000049096 00000 n +0000049268 00000 n +0000049444 00000 n +0000049623 00000 n +0000049799 00000 n +0000049978 00000 n +0000050152 00000 n +0000050333 00000 n +0000050507 00000 n +0000050682 00000 n +0000050859 00000 n +0000051034 00000 n +0000051207 00000 n +0000051378 00000 n +0000051544 00000 n +0000051726 00000 n +0000051887 00000 n +0000052050 00000 n +0000052215 00000 n +0000058351 00000 n +0000058611 00000 n +0000058775 00000 n +0000058939 00000 n +0000059102 00000 n +0000059265 00000 n +0000059435 00000 n +0000059603 00000 n +0000059765 00000 n +0000059931 00000 n +0000060101 00000 n +0000060268 00000 n +0000060436 00000 n +0000060604 00000 n +0000060770 00000 n +0000060935 00000 n +0000061099 00000 n +0000061261 00000 n +0000065919 00000 n +0000066111 00000 n +0000066281 00000 n +0000066445 00000 n +0000066618 00000 n +0000066792 00000 n +0000066957 00000 n +0000067125 00000 n +0000067291 00000 n +0000073407 00000 n +0000073626 00000 n +0000073790 00000 n +0000073953 00000 n +0000074128 00000 n +0000074292 00000 n +0000074455 00000 n +0000074620 00000 n +0000074791 00000 n +0000074959 00000 n +0000075129 00000 n +0000075293 00000 n +0000081278 00000 n +0000081566 00000 n +0000081731 00000 n +0000081895 00000 n +0000082073 00000 n +0000082237 00000 n +0000082401 00000 n +0000082566 00000 n +0000082729 00000 n +0000082896 00000 n +0000083062 00000 n +0000083230 00000 n +0000083409 00000 n +0000083591 00000 n +0000083771 00000 n +0000083940 00000 n +0000084114 00000 n +0000084294 00000 n +0000084467 00000 n +0000084654 00000 n +0000084830 00000 n +0000085001 00000 n +0000085175 00000 n +0000085345 00000 n +0000085507 00000 n +0000085670 00000 n +0000085831 00000 n +0000086008 00000 n +0000086170 00000 n +0000086341 00000 n +0000086515 00000 n +0000086679 00000 n +0000092341 00000 n +0000092558 00000 n +0000092726 00000 n +0000092895 00000 n +0000093058 00000 n +0000093232 00000 n +0000093403 00000 n +0000093577 00000 n +0000093755 00000 n +0000093920 00000 n +0000094086 00000 n +0000094266 00000 n +0000094427 00000 n +0000094597 00000 n +0000094776 00000 n +0000098571 00000 n +0000098842 00000 n +0000099010 00000 n +0000099178 00000 n +0000099346 00000 n +0000099514 00000 n +0000099682 00000 n +0000099868 00000 n +0000100036 00000 n +0000100212 00000 n +0000100384 00000 n +0000100555 00000 n +0000100723 00000 n +0000100892 00000 n +0000101060 00000 n +0000101229 00000 n +0000101391 00000 n +0000107408 00000 n +0000107585 00000 n +0000107748 00000 n +0000107915 00000 n +0000108081 00000 n +0000108250 00000 n +0000108417 00000 n +0000108586 00000 n +0000108754 00000 n +0000108922 00000 n +0000109092 00000 n +0000109255 00000 n +0000109418 00000 n +0000109589 00000 n +0000109770 00000 n +0000109946 00000 n +0000110108 00000 n +0000114337 00000 n +0000114581 00000 n +0000114752 00000 n +0000114920 00000 n +0000115095 00000 n +0000115265 00000 n +0000115428 00000 n +0000115599 00000 n +0000115767 00000 n +0000115929 00000 n +0000116092 00000 n +0000116255 00000 n +0000116417 00000 n +0000116583 00000 n +0000116747 00000 n +0000116910 00000 n +0000117073 00000 n +0000117259 00000 n +0000117436 00000 n +0000117610 00000 n +0000121847 00000 n +0000122121 00000 n +0000122286 00000 n +0000122466 00000 n +0000122630 00000 n +0000122810 00000 n +0000122979 00000 n +0000123157 00000 n +0000123318 00000 n +0000123500 00000 n +0000123675 00000 n +0000123849 00000 n +0000124022 00000 n +0000124203 00000 n +0000124377 00000 n +0000124555 00000 n +0000124732 00000 n +0000124900 00000 n +0000130289 00000 n +0000130427 00000 n +0000130642 00000 n +0000130856 00000 n +0000131069 00000 n +0000131281 00000 n +0000131580 00000 n +0000131878 00000 n +0000132177 00000 n +0000132396 00000 n +0000132614 00000 n +0000132824 00000 n +0000133050 00000 n +0000133275 00000 n +0000133508 00000 n +0000133740 00000 n +0000133939 00000 n +0000134140 00000 n +0000134340 00000 n +0000134540 00000 n +0000134741 00000 n +0000134949 00000 n +0000135158 00000 n +0000135358 00000 n +0000135566 00000 n +0000135773 00000 n +0000135978 00000 n +0000136184 00000 n +0000136388 00000 n +0000136592 00000 n +0000145036 00000 n +0000145214 00000 n +0000145429 00000 n +0000145643 00000 n +0000145930 00000 n +0000146217 00000 n +0000146503 00000 n +0000146730 00000 n +0000146956 00000 n +0000147227 00000 n +0000147498 00000 n +0000147727 00000 n +0000147955 00000 n +0000148173 00000 n +0000148389 00000 n +0000148612 00000 n +0000148833 00000 n +0000149144 00000 n +0000149455 00000 n +0000149766 00000 n +0000150077 00000 n +0000150275 00000 n +0000150475 00000 n +0000150675 00000 n +0000150876 00000 n +0000151107 00000 n +0000151339 00000 n +0000151589 00000 n +0000151838 00000 n +0000152088 00000 n +0000152338 00000 n +0000152543 00000 n +0000160754 00000 n +0000160906 00000 n +0000161115 00000 n +0000161322 00000 n +0000161532 00000 n +0000161733 00000 n +0000164876 00000 n +0000165042 00000 n +0000165104 00000 n +0000165166 00000 n +0000165228 00000 n +0000165290 00000 n +0000165352 00000 n +0000165414 00000 n +0000165476 00000 n +0000165538 00000 n +0000165600 00000 n +0000165662 00000 n +0000165724 00000 n +0000165785 00000 n +0000165847 00000 n +0000165909 00000 n +0000165970 00000 n +0000166032 00000 n +0000166093 00000 n +0000166155 00000 n +0000166217 00000 n +0000166279 00000 n +0000166336 00000 n +0000166396 00000 n +0000166459 00000 n +0000166522 00000 n +0000166580 00000 n +0000166638 00000 n +0000166696 00000 n +0000166754 00000 n +0000166812 00000 n +0000166870 00000 n +0000166928 00000 n +0000166986 00000 n +0000167049 00000 n +0000167107 00000 n +0000167165 00000 n +0000167227 00000 n +0000167289 00000 n +0000167347 00000 n +0000167409 00000 n +0000167471 00000 n +0000167533 00000 n +0000167595 00000 n +0000167652 00000 n +0000167710 00000 n +0000167773 00000 n +0000167836 00000 n +0000167893 00000 n +0000167956 00000 n +0000168015 00000 n +0000168078 00000 n +0000168141 00000 n +0000168203 00000 n +0000168266 00000 n +0000168329 00000 n +0000168387 00000 n +0000168450 00000 n +0000168513 00000 n +0000168571 00000 n +0000168629 00000 n +0000168692 00000 n +0000168755 00000 n +0000168818 00000 n +0000168881 00000 n +0000168939 00000 n +0000169002 00000 n +0000169060 00000 n +0000169117 00000 n +0000169175 00000 n +0000169233 00000 n +0000169295 00000 n +0000169353 00000 n +0000169411 00000 n +0000169474 00000 n +0000169537 00000 n +0000169595 00000 n +0000169653 00000 n +0000169716 00000 n +0000169774 00000 n +0000169837 00000 n +0000169900 00000 n +0000169958 00000 n +0000170016 00000 n +0000170079 00000 n +0000170142 00000 n +0000170205 00000 n +0000170268 00000 n +0000170331 00000 n +0000170394 00000 n +0000170452 00000 n +0000170511 00000 n +0000170573 00000 n +0000170636 00000 n +0000170694 00000 n +0000170748 00000 n +0000170806 00000 n +0000170869 00000 n +0000170932 00000 n +0000170990 00000 n +0000171048 00000 n +0000171105 00000 n +0000171164 00000 n +0000171227 00000 n +0000171290 00000 n +0000171353 00000 n +0000171416 00000 n +0000171479 00000 n +0000171542 00000 n +0000171605 00000 n +0000171663 00000 n +0000171721 00000 n +0000171784 00000 n +0000171842 00000 n +0000171905 00000 n +0000171968 00000 n +0000172031 00000 n +0000172089 00000 n +0000172152 00000 n +0000172210 00000 n +0000172268 00000 n +0000172325 00000 n +0000172383 00000 n +0000172441 00000 n +0000172499 00000 n +0000172557 00000 n +0000172615 00000 n +0000172673 00000 n +0000172731 00000 n +0000172789 00000 n +0000172847 00000 n +0000172905 00000 n +0000172963 00000 n +0000173021 00000 n +0000173079 00000 n +0000173137 00000 n +0000173194 00000 n +0000173252 00000 n +0000173310 00000 n +0000173367 00000 n +0000173425 00000 n +0000173482 00000 n +0000173540 00000 n +0000173602 00000 n +0000173665 00000 n +0000173723 00000 n +0000173781 00000 n +0000173844 00000 n +0000173902 00000 n +0000173964 00000 n +0000174022 00000 n +0000174085 00000 n +0000174143 00000 n +0000174205 00000 n +0000174268 00000 n +0000174326 00000 n +0000174380 00000 n +0000174443 00000 n +0000174506 00000 n +0000174564 00000 n +0000174627 00000 n +0000174690 00000 n +0000174748 00000 n +0000175128 00000 n +0000175586 00000 n +0000176534 00000 n +0000176938 00000 n +0000177586 00000 n +0000178263 00000 n +0000179214 00000 n +0000180179 00000 n +0000180646 00000 n +0000181700 00000 n +0000182483 00000 n +0000182944 00000 n +0000183610 00000 n +0000184143 00000 n +0000184325 00000 n +0000185131 00000 n +0000185488 00000 n +0000185855 00000 n +0000186307 00000 n +0000187257 00000 n +0000187618 00000 n +0000187686 00000 n +0000188449 00000 n +0000188474 00000 n +0000188521 00000 n +0000188599 00000 n +0000188677 00000 n +0000188767 00000 n +0000188899 00000 n +0000188946 00000 n +0000189025 00000 n +0000189104 00000 n +0000189195 00000 n +0000189331 00000 n +0000189493 00000 n +0000189665 00000 n +0000189826 00000 n +0000189989 00000 n +0000190154 00000 n +0000193741 00000 n +0000201432 00000 n +0000201604 00000 n +0000201801 00000 n +0000203700 00000 n +0000213649 00000 n +0000219760 00000 n +0000223317 00000 n +0000225286 00000 n +0000228023 00000 n +0000235905 00000 n +0000237918 00000 n +0000239640 00000 n +0000244244 00000 n +0000245980 00000 n +0000246167 00000 n +0000246357 00000 n +0000349674 00000 n +0000452742 00000 n +0000460069 00000 n +0000471741 00000 n +0000535602 00000 n +0000535654 00000 n +0000535746 00000 n +0000535826 00000 n +0000535878 00000 n +0000536086 00000 n +0000536133 00000 n +0000536263 00000 n +0000536327 00000 n +0000536379 00000 n +0000536472 00000 n +0000536652 00000 n +0000536704 00000 n +0000536788 00000 n +0000536835 00000 n +0000536965 00000 n +0000537045 00000 n +0000537288 00000 n +0000538206 00000 n +0000538251 00000 n +0000538602 00000 n +0000539266 00000 n +0000539475 00000 n +0000539693 00000 n +0000540480 00000 n +0000540505 00000 n +0000540759 00000 n +0000541391 00000 n +0000541449 00000 n +0000541691 00000 n +0000542438 00000 n +0000542479 00000 n +0000542643 00000 n +0000542682 00000 n +0000542890 00000 n +0000543105 00000 n +0000546284 00000 n +0000546540 00000 n +0000546579 00000 n +0000546618 00000 n +0000546657 00000 n +0000547314 00000 n +0000547736 00000 n +0000548107 00000 n +0000552284 00000 n +0000565659 00000 n +0000569334 00000 n +0000573009 00000 n +0000612745 00000 n +0000612984 00000 n +0000613648 00000 n +0000613673 00000 n +0000613941 00000 n +0000614288 00000 n +0000615205 00000 n +0000615530 00000 n +0000615684 00000 n +0000615838 00000 n +0000615992 00000 n +0000616146 00000 n +0000616300 00000 n +0000616454 00000 n +0000616608 00000 n +0000616762 00000 n +0000616916 00000 n +0000617070 00000 n +0000617224 00000 n +0000617378 00000 n +0000617532 00000 n +0000617686 00000 n +0000617840 00000 n +0000617994 00000 n +0000618608 00000 n +0000619441 00000 n +0000620023 00000 n +0000620177 00000 n +0000620331 00000 n +0000620945 00000 n +0000621778 00000 n +0000622360 00000 n +0000622514 00000 n +0000622668 00000 n +0000622822 00000 n +0000622976 00000 n +0000623130 00000 n +0000623284 00000 n +0000623540 00000 n +0000623696 00000 n +0000623900 00000 n +0000624148 00000 n +0000624302 00000 n +0000624456 00000 n +0000624610 00000 n +0000624764 00000 n +0000624918 00000 n +0000625072 00000 n +0000625681 00000 n +0000626259 00000 n +0000626413 00000 n +0000626567 00000 n +0000627176 00000 n +0000627754 00000 n +0000627908 00000 n +0000628062 00000 n +0000628318 00000 n +0000628574 00000 n +0000628830 00000 n +0000629086 00000 n +0000629240 00000 n +0000629394 00000 n +0000630006 00000 n +0000630838 00000 n +0000631418 00000 n +0000631789 00000 n +0000632230 00000 n +0000633185 00000 n +0000633591 00000 n +0000633663 00000 n +0000634415 00000 n +0000634440 00000 n +0000634492 00000 n +0000634712 00000 n +0000634759 00000 n +0000634838 00000 n +0000634917 00000 n +0000635008 00000 n +0000635180 00000 n +0000635232 00000 n +0000635312 00000 n +0000635359 00000 n +0000635438 00000 n +0000635517 00000 n +0000635589 00000 n +0000645447 00000 n +0000653971 00000 n +0000655551 00000 n +0000657572 00000 n +0000659579 00000 n +0000662293 00000 n +0000662538 00000 n +0000662890 00000 n +0000663135 00000 n +0000663510 00000 n +0000665046 00000 n +0000668534 00000 n +0000671248 00000 n +0000671706 00000 n +0000671958 00000 n +0000672203 00000 n +0000672454 00000 n +0000686034 00000 n +0000691530 00000 n +0000700235 00000 n +0000706794 00000 n +0000714786 00000 n +0000716767 00000 n +0000731609 00000 n +0000731650 00000 n +0000733772 00000 n +0000733989 00000 n +0000734987 00000 n +0000735019 00000 n +0000736810 00000 n +0000737019 00000 n +0000737563 00000 n +0000737594 00000 n +0000738684 00000 n +0000738890 00000 n +0000739426 00000 n +0000739457 00000 n +0000741248 00000 n +0000741456 00000 n +0000742003 00000 n +0000742033 00000 n +0000743123 00000 n +0000743328 00000 n +0000743866 00000 n +0000743898 00000 n +0000744988 00000 n +0000745196 00000 n +0000745734 00000 n +0000747525 00000 n +0000748069 00000 n +0000750191 00000 n +0000751189 00000 n +0000752279 00000 n +0000752817 00000 n +0000754608 00000 n +0000755152 00000 n +0000757274 00000 n +0000758272 00000 n +0000760394 00000 n +0000761392 00000 n +0000763514 00000 n +0000764512 00000 n +0000766634 00000 n +0000767632 00000 n +0000768722 00000 n +0000769258 00000 n +0000769289 00000 n +0000771080 00000 n +0000771288 00000 n +0000771833 00000 n +0000771864 00000 n +0000772954 00000 n +0000773161 00000 n +0000773699 00000 n +0000773730 00000 n +0000774820 00000 n +0000775027 00000 n +0000775595 00000 n +0000775626 00000 n +0000776716 00000 n +0000776921 00000 n +0000777455 00000 n +0000777485 00000 n +0000778575 00000 n +0000778780 00000 n +0000779316 00000 n +0000781438 00000 n +0000782436 00000 n +0000784558 00000 n +0000785556 00000 n +0000787678 00000 n +0000788676 00000 n +0000790798 00000 n +0000791796 00000 n +0000793918 00000 n +0000794916 00000 n +0000849284 00000 n +0000849336 00000 n +0000849524 00000 n +0000849576 00000 n +0000849764 00000 n +0000849811 00000 n +0000849975 00000 n +0000850027 00000 n +0000850120 00000 n +0000850180 00000 n +0000850232 00000 n +0000850325 00000 n +0000850421 00000 n +0000855406 00000 n +0000862701 00000 n +0000873922 00000 n +0000879532 00000 n +0000881914 00000 n +0000925703 00000 n +0000953794 00000 n +0000985744 00000 n +0001013987 00000 n +0001042959 00000 n +0001071233 00000 n +0001099205 00000 n +0001127709 00000 n +0001156286 00000 n +0001185207 00000 n +0001214138 00000 n +0001214190 00000 n +0001214366 00000 n +0001214418 00000 n +trailer << /Info 2 0 R /Root 1 0 R /Size 863 /ID [<96eff7b33f17172214393742ca5ac9ea>] >> +startxref +1214642 +%%EOF diff --git a/examples/document_search.py b/examples/document_search.py new file mode 100644 index 0000000..72d5bdc --- /dev/null +++ b/examples/document_search.py @@ -0,0 +1,146 @@ +#!/usr/bin/env python3 +""" +Document search demo with recompute mode +""" + +import os +from pathlib import Path +import shutil +import time + +# Import backend packages to trigger plugin registration +try: + import leann_backend_diskann + import leann_backend_hnsw + print("INFO: Backend packages imported successfully.") +except ImportError as e: + print(f"WARNING: Could not import backend packages. Error: {e}") + +# Import upper-level API from leann-core +from leann.api import LeannBuilder, LeannSearcher, LeannChat + + +def load_sample_documents(): + """Create sample documents for demonstration""" + docs = [ + {"title": "Intro to Python", "content": "Python is a high-level, interpreted language known for simplicity."}, + {"title": "ML Basics", "content": "Machine learning builds systems that learn from data."}, + {"title": "Data Structures", "content": "Data structures like arrays, lists, and graphs organize data."}, + ] + return docs + +def main(): + print("==========================================================") + print("=== Leann Document Search Demo (DiskANN + Recompute) ===") + print("==========================================================") + + INDEX_DIR = Path("./test_indices") + INDEX_PATH = str(INDEX_DIR / "documents.diskann") + BACKEND_TO_TEST = "diskann" + + if INDEX_DIR.exists(): + print(f"--- Cleaning up old index directory: {INDEX_DIR} ---") + shutil.rmtree(INDEX_DIR) + + # --- 1. Build index --- + print(f"\n[PHASE 1] Building index using '{BACKEND_TO_TEST}' backend...") + + builder = LeannBuilder( + backend_name=BACKEND_TO_TEST, + graph_degree=32, + complexity=64 + ) + + documents = load_sample_documents() + print(f"Loaded {len(documents)} sample documents.") + for doc in documents: + builder.add_text(doc["content"], metadata={"title": doc["title"]}) + + builder.build_index(INDEX_PATH) + print(f"\nIndex built!") + + # --- 2. Basic search demo --- + print(f"\n[PHASE 2] Basic search using '{BACKEND_TO_TEST}' backend...") + searcher = LeannSearcher(index_path=INDEX_PATH) + + query = "What is machine learning?" + print(f"\nQuery: '{query}'") + + print("\n--- Basic search mode (PQ computation) ---") + start_time = time.time() + results = searcher.search(query, top_k=2) + basic_time = time.time() - start_time + + print(f"⏱️ Basic search time: {basic_time:.3f} seconds") + print(">>> Basic search results <<<") + for i, res in enumerate(results, 1): + print(f" {i}. ID: {res['id']}, Score: {res['score']:.4f}, Text: '{res['text']}', Metadata: {res['metadata']}") + + # --- 3. Recompute search demo --- + print(f"\n[PHASE 3] Recompute search using embedding server...") + + print("\n--- Recompute search mode (get real embeddings via network) ---") + + # Configure recompute parameters + recompute_params = { + "recompute_beighbor_embeddings": True, # Enable network recomputation + "USE_DEFERRED_FETCH": False, # Don't use deferred fetch + "skip_search_reorder": True, # Skip search reordering + "dedup_node_dis": True, # Enable node distance deduplication + "prune_ratio": 0.1, # Pruning ratio 10% + "batch_recompute": False, # Don't use batch recomputation + "global_pruning": False, # Don't use global pruning + "zmq_port": 5555, # ZMQ port + "embedding_model": "sentence-transformers/all-mpnet-base-v2" + } + + print("Recompute parameter configuration:") + for key, value in recompute_params.items(): + print(f" {key}: {value}") + + print(f"\n🔄 Executing Recompute search...") + try: + start_time = time.time() + recompute_results = searcher.search(query, top_k=2, **recompute_params) + recompute_time = time.time() - start_time + + print(f"⏱️ Recompute search time: {recompute_time:.3f} seconds") + print(">>> Recompute search results <<<") + for i, res in enumerate(recompute_results, 1): + print(f" {i}. ID: {res['id']}, Score: {res['score']:.4f}, Text: '{res['text']}', Metadata: {res['metadata']}") + + # Compare results + print(f"\n--- Result comparison ---") + print(f"Basic search time: {basic_time:.3f} seconds") + print(f"Recompute time: {recompute_time:.3f} seconds") + + print("\nBasic search vs Recompute results:") + for i in range(min(len(results), len(recompute_results))): + basic_score = results[i]['score'] + recompute_score = recompute_results[i]['score'] + score_diff = abs(basic_score - recompute_score) + print(f" Position {i+1}: PQ={basic_score:.4f}, Recompute={recompute_score:.4f}, Difference={score_diff:.4f}") + + if recompute_time > basic_time: + print(f"✅ Recompute mode working correctly (more accurate but slower)") + else: + print(f"ℹ️ Recompute time is unusually fast, network recomputation may not be enabled") + + except Exception as e: + print(f"❌ Recompute search failed: {e}") + print("This usually indicates an embedding server connection issue") + + # --- 4. Chat demo --- + print(f"\n[PHASE 4] Starting chat session...") + chat = LeannChat(index_path=INDEX_PATH) + chat_response = chat.ask(query) + print(f"You: {query}") + print(f"Leann: {chat_response}") + + print("\n==========================================================") + print("✅ Demo finished successfully!") + print("==========================================================") + + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/examples/main_cli_example.py b/examples/main_cli_example.py new file mode 100644 index 0000000..35c6a9c --- /dev/null +++ b/examples/main_cli_example.py @@ -0,0 +1,76 @@ +from llama_index.core import SimpleDirectoryReader, Settings +from llama_index.core.readers.base import BaseReader +from llama_index.node_parser.docling import DoclingNodeParser +from llama_index.readers.docling import DoclingReader +from docling_core.transforms.chunker.hybrid_chunker import HybridChunker +import asyncio +import os +import dotenv +from leann.api import LeannBuilder, LeannSearcher, LeannChat +import leann_backend_diskann # Import to ensure backend registration +import shutil +from pathlib import Path + +dotenv.load_dotenv() + +reader = DoclingReader(export_type=DoclingReader.ExportType.JSON) +file_extractor: dict[str, BaseReader] = { + ".docx": reader, + ".pptx": reader, + ".pdf": reader, + ".xlsx": reader, +} +node_parser = DoclingNodeParser( + chunker=HybridChunker(tokenizer="Qwen/Qwen3-Embedding-4B", max_tokens=10240) +) + +documents = SimpleDirectoryReader( + "examples/data", + recursive=True, + file_extractor=file_extractor, + encoding="utf-8", + required_exts=[".pdf", ".docx", ".pptx", ".xlsx"] +).load_data(show_progress=True) + +# Extract text from documents and prepare for Leann +all_texts = [] +for doc in documents: + # DoclingNodeParser returns Node objects, which have a text attribute + nodes = node_parser.get_nodes_from_documents([doc]) + for node in nodes: + all_texts.append(node.text) + +INDEX_DIR = Path("./test_pdf_index") +INDEX_PATH = str(INDEX_DIR / "pdf_documents.leann") + +if INDEX_DIR.exists(): + print(f"--- Cleaning up old index directory: {INDEX_DIR} ---") + shutil.rmtree(INDEX_DIR) + +print(f"\n[PHASE 1] Building Leann index...") + +builder = LeannBuilder( + backend_name="diskann", + embedding_model="sentence-transformers/all-mpnet-base-v2", # Using a common sentence transformer model + graph_degree=32, + complexity=64 +) + +print(f"Loaded {len(all_texts)} text chunks from documents.") +for chunk_text in all_texts: + builder.add_text(chunk_text) + +builder.build_index(INDEX_PATH) +print(f"\nLeann index built at {INDEX_PATH}!") + +async def main(): + print(f"\n[PHASE 2] Starting Leann chat session...") + chat = LeannChat(index_path=INDEX_PATH) + + query = "Based on the paper, what are the two main techniques LEANN uses to achieve low storage overhead and high retrieval accuracy?" + print(f"You: {query}") + chat_response = chat.ask(query, recompute_beighbor_embeddings=True) + print(f"Leann: {chat_response}") + +if __name__ == "__main__": + asyncio.run(main()) \ No newline at end of file diff --git a/examples/simple_demo.py b/examples/simple_demo.py new file mode 100644 index 0000000..058841f --- /dev/null +++ b/examples/simple_demo.py @@ -0,0 +1,81 @@ +""" +Simple demo showing basic leann usage +Run: uv run python examples/simple_demo.py +""" + +from leann import LeannBuilder, LeannSearcher, LeannChat + + +def main(): + print("=== Leann Simple Demo ===") + print() + + # Sample knowledge base + chunks = [ + "Machine learning is a subset of artificial intelligence that enables computers to learn without being explicitly programmed.", + "Deep learning uses neural networks with multiple layers to process data and make decisions.", + "Natural language processing helps computers understand and generate human language.", + "Computer vision enables machines to interpret and understand visual information from images and videos.", + "Reinforcement learning teaches agents to make decisions by receiving rewards or penalties for their actions.", + "Data science combines statistics, programming, and domain expertise to extract insights from data.", + "Big data refers to extremely large datasets that require special tools and techniques to process.", + "Cloud computing provides on-demand access to computing resources over the internet.", + ] + + print("1. Building index (no embeddings stored)...") + builder = LeannBuilder( + embedding_model="sentence-transformers/all-mpnet-base-v2", + prune_ratio=0.7, # Keep 30% of connections + ) + builder.add_chunks(chunks) + builder.build_index("demo_knowledge.leann") + print() + + print("2. Searching with real-time embeddings...") + searcher = LeannSearcher("demo_knowledge.leann") + + queries = [ + "What is machine learning?", + "How does neural network work?", + "Tell me about data processing", + ] + + for query in queries: + print(f"Query: {query}") + results = searcher.search(query, top_k=2) + + for i, result in enumerate(results, 1): + print(f" {i}. Score: {result.score:.3f}") + print(f" Text: {result.text[:100]}...") + print() + + print("3. Memory stats:") + stats = searcher.get_memory_stats() + print(f" Cache size: {stats.embedding_cache_size}") + print(f" Cache memory: {stats.embedding_cache_memory_mb:.1f} MB") + print(f" Total chunks: {stats.total_chunks}") + print() + + print("4. Interactive chat demo:") + print(" (Note: Requires OpenAI API key for real responses)") + + chat = LeannChat("demo_knowledge.leann") + + # Demo questions + demo_questions: list[str] = [ + "What is the difference between machine learning and deep learning?", + "How is data science related to big data?", + ] + + for question in demo_questions: + print(f" Q: {question}") + response = chat.ask(question) + print(f" A: {response}") + print() + + print("Demo completed! Try running:") + print(" uv run python examples/document_search.py") + + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/knowledge.leann.meta.json b/knowledge.leann.meta.json new file mode 100644 index 0000000..6a0d839 --- /dev/null +++ b/knowledge.leann.meta.json @@ -0,0 +1,32 @@ +{ + "version": "0.1.0", + "backend_name": "diskann", + "embedding_model": "sentence-transformers/all-mpnet-base-v2", + "num_chunks": 6, + "chunks": [ + { + "text": "Python is a powerful programming language", + "metadata": {} + }, + { + "text": "Machine learning transforms industries", + "metadata": {} + }, + { + "text": "Neural networks process complex data", + "metadata": {} + }, + { + "text": "Java is a powerful programming language", + "metadata": {} + }, + { + "text": "C++ is a powerful programming language", + "metadata": {} + }, + { + "text": "C# is a powerful programming language", + "metadata": {} + } + ] +} \ No newline at end of file diff --git a/packages/leann-backend-diskann/CMakeLists.txt b/packages/leann-backend-diskann/CMakeLists.txt new file mode 100644 index 0000000..ee9d932 --- /dev/null +++ b/packages/leann-backend-diskann/CMakeLists.txt @@ -0,0 +1,8 @@ +# packages/leann-backend-diskann/CMakeLists.txt (最终简化版) + +cmake_minimum_required(VERSION 3.20) +project(leann_backend_diskann_wrapper) + +# 告诉 CMake 直接进入 DiskANN 子模块并执行它自己的 CMakeLists.txt +# DiskANN 会自己处理所有事情,包括编译 Python 绑定 +add_subdirectory(src/third_party/DiskANN) diff --git a/packages/leann-backend-diskann/leann_backend_diskann/__init__.py b/packages/leann-backend-diskann/leann_backend_diskann/__init__.py new file mode 100644 index 0000000..8074aa4 --- /dev/null +++ b/packages/leann-backend-diskann/leann_backend_diskann/__init__.py @@ -0,0 +1,7 @@ +print("Initializing leann-backend-diskann...") + +try: + from .diskann_backend import DiskannBackend + print("INFO: DiskANN backend loaded successfully") +except ImportError as e: + print(f"WARNING: Could not import DiskANN backend: {e}") \ No newline at end of file diff --git a/packages/leann-backend-diskann/leann_backend_diskann/diskann_backend.py b/packages/leann-backend-diskann/leann_backend_diskann/diskann_backend.py new file mode 100644 index 0000000..71cf2fb --- /dev/null +++ b/packages/leann-backend-diskann/leann_backend_diskann/diskann_backend.py @@ -0,0 +1,299 @@ +import numpy as np +import os +import json +import struct +from pathlib import Path +from typing import Dict +import contextlib +import threading +import time +import atexit +import socket +import subprocess +import sys + +from leann.registry import register_backend +from leann.interface import ( + LeannBackendFactoryInterface, + LeannBackendBuilderInterface, + LeannBackendSearcherInterface +) +from . import _diskannpy as diskannpy + +METRIC_MAP = { + "mips": diskannpy.Metric.INNER_PRODUCT, + "l2": diskannpy.Metric.L2, + "cosine": diskannpy.Metric.COSINE, +} + +@contextlib.contextmanager +def chdir(path): + original_dir = os.getcwd() + os.chdir(path) + try: + yield + finally: + os.chdir(original_dir) + +def _write_vectors_to_bin(data: np.ndarray, file_path: str): + num_vectors, dim = data.shape + with open(file_path, 'wb') as f: + f.write(struct.pack('I', num_vectors)) + f.write(struct.pack('I', dim)) + f.write(data.tobytes()) + +def _check_port(port: int) -> bool: + with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s: + return s.connect_ex(('localhost', port)) == 0 + +class EmbeddingServerManager: + def __init__(self): + self.server_process = None + self.server_port = None + atexit.register(self.stop_server) + + def start_server(self, port=5555, model_name="sentence-transformers/all-mpnet-base-v2"): + if self.server_process and self.server_process.poll() is None: + print(f"INFO: Reusing existing server process for this session (PID {self.server_process.pid})") + return True + + # 检查端口是否已被其他无关进程占用 + if _check_port(port): + print(f"WARNING: Port {port} is already in use. Assuming an external server is running and connecting to it.") + return True + + print(f"INFO: Starting session-level embedding server as a background process...") + + try: + command = [ + sys.executable, + "-m", "packages.leann-backend-diskann.leann_backend_diskann.embedding_server", + "--zmq-port", str(port), + "--model-name", model_name + ] + project_root = Path(__file__).parent.parent.parent.parent + print(f"INFO: Running command from project root: {project_root}") + self.server_process = subprocess.Popen( + command, + cwd=project_root, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + text=True, + encoding='utf-8' + ) + self.server_port = port + print(f"INFO: Server process started with PID: {self.server_process.pid}") + + max_wait, wait_interval = 30, 0.5 + for _ in range(int(max_wait / wait_interval)): + if _check_port(port): + print(f"✅ Embedding server is up and ready for this session.") + log_thread = threading.Thread(target=self._log_monitor, daemon=True) + log_thread.start() + return True + if self.server_process.poll() is not None: + print("❌ ERROR: Server process terminated unexpectedly during startup.") + self._log_monitor() + return False + time.sleep(wait_interval) + + print(f"❌ ERROR: Server process failed to start listening within {max_wait} seconds.") + self.stop_server() + return False + + except Exception as e: + print(f"❌ ERROR: Failed to start embedding server process: {e}") + return False + + def _log_monitor(self): + if not self.server_process: + return + try: + if self.server_process.stdout: + for line in iter(self.server_process.stdout.readline, ''): + print(f"[EmbeddingServer LOG]: {line.strip()}") + self.server_process.stdout.close() + if self.server_process.stderr: + for line in iter(self.server_process.stderr.readline, ''): + print(f"[EmbeddingServer ERROR]: {line.strip()}") + self.server_process.stderr.close() + except Exception as e: + print(f"Log monitor error: {e}") + + def stop_server(self): + if self.server_process and self.server_process.poll() is None: + print(f"INFO: Terminating session server process (PID: {self.server_process.pid})...") + self.server_process.terminate() + try: + self.server_process.wait(timeout=5) + print("INFO: Server process terminated.") + except subprocess.TimeoutExpired: + print("WARNING: Server process did not terminate gracefully, killing it.") + self.server_process.kill() + self.server_process = None + +@register_backend("diskann") +class DiskannBackend(LeannBackendFactoryInterface): + @staticmethod + def builder(**kwargs) -> LeannBackendBuilderInterface: + return DiskannBuilder(**kwargs) + + @staticmethod + def searcher(index_path: str, **kwargs) -> LeannBackendSearcherInterface: + path = Path(index_path) + meta_path = path.parent / f"{path.name}.meta.json" + if not meta_path.exists(): + raise FileNotFoundError(f"Leann metadata file not found at {meta_path}. Cannot infer vector dimension for searcher.") + with open(meta_path, 'r') as f: + meta = json.load(f) + + try: + from sentence_transformers import SentenceTransformer + model = SentenceTransformer(meta.get("embedding_model")) + dimensions = model.get_sentence_embedding_dimension() + kwargs['dimensions'] = dimensions + except ImportError: + raise ImportError("sentence-transformers is required to infer embedding dimensions. Please install it.") + except Exception as e: + raise RuntimeError(f"Could not load SentenceTransformer model to get dimension: {e}") + + return DiskannSearcher(index_path, **kwargs) + +class DiskannBuilder(LeannBackendBuilderInterface): + def __init__(self, **kwargs): + self.build_params = kwargs + + def build(self, data: np.ndarray, index_path: str, **kwargs): + path = Path(index_path) + index_dir = path.parent + index_prefix = path.stem + + index_dir.mkdir(parents=True, exist_ok=True) + + if data.dtype != np.float32: + data = data.astype(np.float32) + if not data.flags['C_CONTIGUOUS']: + data = np.ascontiguousarray(data) + + data_filename = f"{index_prefix}_data.bin" + _write_vectors_to_bin(data, index_dir / data_filename) + + build_kwargs = {**self.build_params, **kwargs} + metric_str = build_kwargs.get("distance_metric", "mips").lower() + metric_enum = METRIC_MAP.get(metric_str) + if metric_enum is None: + raise ValueError(f"Unsupported distance_metric '{metric_str}'.") + + complexity = build_kwargs.get("complexity", 64) + graph_degree = build_kwargs.get("graph_degree", 32) + final_index_ram_limit = build_kwargs.get("search_memory_maximum", 4.0) + indexing_ram_budget = build_kwargs.get("build_memory_maximum", 8.0) + num_threads = build_kwargs.get("num_threads", 8) + pq_disk_bytes = build_kwargs.get("pq_disk_bytes", 0) + codebook_prefix = "" + + print(f"INFO: Building DiskANN index for {data.shape[0]} vectors with metric {metric_enum}...") + + try: + with chdir(index_dir): + diskannpy.build_disk_float_index( + metric_enum, + data_filename, + index_prefix, + complexity, + graph_degree, + final_index_ram_limit, + indexing_ram_budget, + num_threads, + pq_disk_bytes, + codebook_prefix + ) + print(f"✅ DiskANN index built successfully at '{index_dir / index_prefix}'") + except Exception as e: + print(f"💥 ERROR: DiskANN index build failed. Exception: {e}") + raise + finally: + temp_data_file = index_dir / data_filename + if temp_data_file.exists(): + os.remove(temp_data_file) + +class DiskannSearcher(LeannBackendSearcherInterface): + def __init__(self, index_path: str, **kwargs): + path = Path(index_path) + index_dir = path.parent + index_prefix = path.stem + metric_str = kwargs.get("distance_metric", "mips").lower() + metric_enum = METRIC_MAP.get(metric_str) + if metric_enum is None: + raise ValueError(f"Unsupported distance_metric '{metric_str}'.") + + num_threads = kwargs.get("num_threads", 8) + num_nodes_to_cache = kwargs.get("num_nodes_to_cache", 0) + dimensions = kwargs.get("dimensions") + if not dimensions: + raise ValueError("Vector dimension not provided to DiskannSearcher.") + + try: + full_index_prefix = str(index_dir / index_prefix) + self._index = diskannpy.StaticDiskFloatIndex( + metric_enum, full_index_prefix, num_threads, num_nodes_to_cache, 1, "", "" + ) + self.num_threads = num_threads + self.embedding_server_manager = EmbeddingServerManager() + print("✅ DiskANN index loaded successfully.") + except Exception as e: + print(f"💥 ERROR: Failed to load DiskANN index. Exception: {e}") + raise + + def search(self, query: np.ndarray, top_k: int, **kwargs) -> Dict[str, any]: + complexity = kwargs.get("complexity", 100) + beam_width = kwargs.get("beam_width", 4) + + USE_DEFERRED_FETCH = kwargs.get("USE_DEFERRED_FETCH", False) + skip_search_reorder = kwargs.get("skip_search_reorder", False) + recompute_beighbor_embeddings = kwargs.get("recompute_beighbor_embeddings", False) + dedup_node_dis = kwargs.get("dedup_node_dis", False) + prune_ratio = kwargs.get("prune_ratio", 0.0) + batch_recompute = kwargs.get("batch_recompute", False) + global_pruning = kwargs.get("global_pruning", False) + + if recompute_beighbor_embeddings: + print(f"INFO: DiskANN ZMQ mode enabled - ensuring embedding server is running") + zmq_port = kwargs.get("zmq_port", 5555) + embedding_model = kwargs.get("embedding_model", "sentence-transformers/all-mpnet-base-v2") + + if not self.embedding_server_manager.start_server(zmq_port, embedding_model): + print(f"WARNING: Failed to start embedding server, falling back to PQ computation") + kwargs['recompute_beighbor_embeddings'] = False + + if query.dtype != np.float32: + query = query.astype(np.float32) + if query.ndim == 1: + query = np.expand_dims(query, axis=0) + + try: + labels, distances = self._index.batch_search( + query, + query.shape[0], + top_k, + complexity, + beam_width, + self.num_threads, + USE_DEFERRED_FETCH, + skip_search_reorder, + recompute_beighbor_embeddings, + dedup_node_dis, + prune_ratio, + batch_recompute, + global_pruning + ) + return {"labels": labels, "distances": distances} + except Exception as e: + print(f"💥 ERROR: DiskANN search failed. Exception: {e}") + batch_size = query.shape[0] + return {"labels": np.full((batch_size, top_k), -1, dtype=np.int64), + "distances": np.full((batch_size, top_k), float('inf'), dtype=np.float32)} + + def __del__(self): + if hasattr(self, 'embedding_server_manager'): + self.embedding_server_manager.stop_server() \ No newline at end of file diff --git a/packages/leann-backend-diskann/leann_backend_diskann/embedding_pb2.py b/packages/leann-backend-diskann/leann_backend_diskann/embedding_pb2.py new file mode 100644 index 0000000..8285be2 --- /dev/null +++ b/packages/leann-backend-diskann/leann_backend_diskann/embedding_pb2.py @@ -0,0 +1,27 @@ +# -*- coding: utf-8 -*- +# Generated by the protocol buffer compiler. DO NOT EDIT! +# source: embedding.proto +"""Generated protocol buffer code.""" +from google.protobuf.internal import builder as _builder +from google.protobuf import descriptor as _descriptor +from google.protobuf import descriptor_pool as _descriptor_pool +from google.protobuf import symbol_database as _symbol_database +# @@protoc_insertion_point(imports) + +_sym_db = _symbol_database.Default() + + + + +DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x0f\x65mbedding.proto\x12\x0eprotoembedding\"(\n\x14NodeEmbeddingRequest\x12\x10\n\x08node_ids\x18\x01 \x03(\r\"Y\n\x15NodeEmbeddingResponse\x12\x17\n\x0f\x65mbeddings_data\x18\x01 \x01(\x0c\x12\x12\n\ndimensions\x18\x02 \x03(\x05\x12\x13\n\x0bmissing_ids\x18\x03 \x03(\rb\x06proto3') + +_builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, globals()) +_builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'embedding_pb2', globals()) +if _descriptor._USE_C_DESCRIPTORS == False: + + DESCRIPTOR._options = None + _NODEEMBEDDINGREQUEST._serialized_start=35 + _NODEEMBEDDINGREQUEST._serialized_end=75 + _NODEEMBEDDINGRESPONSE._serialized_start=77 + _NODEEMBEDDINGRESPONSE._serialized_end=166 +# @@protoc_insertion_point(module_scope) diff --git a/packages/leann-backend-diskann/leann_backend_diskann/embedding_server.py b/packages/leann-backend-diskann/leann_backend_diskann/embedding_server.py new file mode 100644 index 0000000..f2e715d --- /dev/null +++ b/packages/leann-backend-diskann/leann_backend_diskann/embedding_server.py @@ -0,0 +1,397 @@ +#!/usr/bin/env python3 +""" +Embedding server for leann-backend-diskann - Fixed ZMQ REQ-REP pattern +""" + +import pickle +import argparse +import threading +import time + +from transformers import AutoTokenizer, AutoModel +import os +from contextlib import contextmanager +import zmq +import numpy as np + +RED = "\033[91m" +RESET = "\033[0m" + +# 简化的文档存储 - 替代 LazyPassages +class SimpleDocumentStore: + """简化的文档存储,支持任意ID""" + def __init__(self, documents: dict = None): + self.documents = documents or {} + # 默认演示文档 + self.default_docs = { + 0: "Python is a high-level, interpreted language known for simplicity.", + 1: "Machine learning builds systems that learn from data.", + 2: "Data structures like arrays, lists, and graphs organize data.", + } + + def __getitem__(self, doc_id): + doc_id = int(doc_id) + + # 优先使用指定的文档 + if doc_id in self.documents: + return {"text": self.documents[doc_id]} + + # 其次使用默认演示文档 + if doc_id in self.default_docs: + return {"text": self.default_docs[doc_id]} + + # 对于任意其他ID,返回通用文档 + fallback_docs = [ + "This is a general document about technology and programming concepts.", + "This document discusses machine learning and artificial intelligence topics.", + "This content covers data structures, algorithms, and computer science fundamentals.", + "This is a document about software engineering and development practices.", + "This content focuses on databases, data management, and information systems." + ] + + # 根据ID选择一个fallback文档 + fallback_text = fallback_docs[doc_id % len(fallback_docs)] + return {"text": f"[ID:{doc_id}] {fallback_text}"} + + def __len__(self): + return len(self.documents) + len(self.default_docs) + +def create_embedding_server_thread( + zmq_port=5555, + model_name="sentence-transformers/all-mpnet-base-v2", + max_batch_size=128, +): + """ + 在当前线程中创建并运行 embedding server + 这个函数设计为在单独的线程中调用 + """ + print(f"INFO: Initializing embedding server thread on port {zmq_port}") + + try: + # 检查端口是否已被占用 + import socket + def check_port(port): + with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s: + return s.connect_ex(('localhost', port)) == 0 + + if check_port(zmq_port): + print(f"{RED}Port {zmq_port} is already in use{RESET}") + return + + # 初始化模型 + tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=True) + import torch + + # 选择设备 + mps_available = hasattr(torch.backends, 'mps') and torch.backends.mps.is_available() + cuda_available = torch.cuda.is_available() + + if cuda_available: + device = torch.device("cuda") + print("INFO: Using CUDA device") + elif mps_available: + device = torch.device("mps") + print("INFO: Using MPS device (Apple Silicon)") + else: + device = torch.device("cpu") + print("INFO: Using CPU device") + + # 加载模型 + print(f"INFO: Loading model {model_name}") + model = AutoModel.from_pretrained(model_name).to(device).eval() + + # 优化模型 + if cuda_available or mps_available: + try: + model = model.half() + model = torch.compile(model) + print(f"INFO: Using FP16 precision with model: {model_name}") + except Exception as e: + print(f"WARNING: Model optimization failed: {e}") + + # 默认演示文档 + demo_documents = { + 0: "Python is a high-level, interpreted language known for simplicity.", + 1: "Machine learning builds systems that learn from data.", + 2: "Data structures like arrays, lists, and graphs organize data.", + } + + passages = SimpleDocumentStore(demo_documents) + print(f"INFO: Loaded {len(passages)} demo documents") + + class DeviceTimer: + """设备计时器""" + def __init__(self, name="", device=device): + self.name = name + self.device = device + self.start_time = 0 + self.end_time = 0 + + if cuda_available: + self.start_event = torch.cuda.Event(enable_timing=True) + self.end_event = torch.cuda.Event(enable_timing=True) + else: + self.start_event = None + self.end_event = None + + @contextmanager + def timing(self): + self.start() + yield + self.end() + + def start(self): + if cuda_available: + torch.cuda.synchronize() + self.start_event.record() + else: + if self.device.type == "mps": + torch.mps.synchronize() + self.start_time = time.time() + + def end(self): + if cuda_available: + self.end_event.record() + torch.cuda.synchronize() + else: + if self.device.type == "mps": + torch.mps.synchronize() + self.end_time = time.time() + + def elapsed_time(self): + if cuda_available: + return self.start_event.elapsed_time(self.end_event) / 1000.0 + else: + return self.end_time - self.start_time + + def print_elapsed(self): + print(f"Time taken for {self.name}: {self.elapsed_time():.6f} seconds") + + def process_batch(texts_batch, ids_batch, missing_ids): + """处理文本批次""" + batch_size = len(texts_batch) + print(f"INFO: Processing batch of size {batch_size}") + + tokenize_timer = DeviceTimer("tokenization (batch)", device) + to_device_timer = DeviceTimer("transfer to device (batch)", device) + embed_timer = DeviceTimer("embedding (batch)", device) + pool_timer = DeviceTimer("mean pooling (batch)", device) + + with tokenize_timer.timing(): + encoded_batch = tokenizer.batch_encode_plus( + texts_batch, + padding="max_length", + truncation=True, + max_length=256, + return_tensors="pt", + return_token_type_ids=False, + ) + tokenize_timer.print_elapsed() + + seq_length = encoded_batch["input_ids"].size(1) + print(f"Batch size: {batch_size}, Sequence length: {seq_length}") + + with to_device_timer.timing(): + enc = {k: v.to(device) for k, v in encoded_batch.items()} + to_device_timer.print_elapsed() + + with torch.no_grad(): + with embed_timer.timing(): + out = model(enc["input_ids"], enc["attention_mask"]) + embed_timer.print_elapsed() + + with pool_timer.timing(): + hidden_states = out.last_hidden_state if hasattr(out, "last_hidden_state") else out + mask_expanded = enc["attention_mask"].unsqueeze(-1).expand(hidden_states.size()).float() + sum_embeddings = torch.sum(hidden_states * mask_expanded, 1) + sum_mask = torch.clamp(mask_expanded.sum(1), min=1e-9) + batch_embeddings = sum_embeddings / sum_mask + pool_timer.print_elapsed() + + return batch_embeddings.cpu().numpy() + + # ZMQ server 主循环 - 修改为REP套接字 + context = zmq.Context() + socket = context.socket(zmq.ROUTER) # 改为REP套接字 + socket.bind(f"tcp://127.0.0.1:{zmq_port}") + print(f"INFO: ZMQ ROUTER server listening on port {zmq_port}") + + # 设置超时 + socket.setsockopt(zmq.RCVTIMEO, 5000) # 5秒接收超时 + socket.setsockopt(zmq.SNDTIMEO, 300000) # 300秒发送超时 + + from . import embedding_pb2 + + print(f"INFO: Embedding server ready to serve requests") + + while True: + try: + parts = socket.recv_multipart() + + # --- 恢复稳健的消息格式判断 --- + # 必须检查 parts 的长度,避免 IndexError + if len(parts) >= 3: + identity = parts[0] + # empty = parts[1] # 中间的空帧我们通常不关心 + message = parts[2] + elif len(parts) == 2: + # 也能处理没有空帧的情况 + identity = parts[0] + message = parts[1] + else: + # 如果收到格式错误的消息,打印警告并忽略它,而不是崩溃 + print(f"WARNING: Received unexpected message format with {len(parts)} parts. Ignoring.") + continue + print(f"INFO: Received ZMQ request from client {identity.hex()[:8]}, size {len(message)} bytes") + + e2e_start = time.time() + lookup_timer = DeviceTimer("text lookup", device) + + # 解析请求 + req_proto = embedding_pb2.NodeEmbeddingRequest() + req_proto.ParseFromString(message) + node_ids = req_proto.node_ids + print(f"INFO: Request for {len(node_ids)} node embeddings: {list(node_ids)}") + + # 添加调试信息 + if len(node_ids) > 0: + print(f"DEBUG: Node ID range: {min(node_ids)} to {max(node_ids)}") + + # 查找文本 + texts = [] + missing_ids = [] + with lookup_timer.timing(): + for nid in node_ids: + txtinfo = passages[nid] + txt = txtinfo["text"] + texts.append(txt) + lookup_timer.print_elapsed() + + if missing_ids: + print(f"WARNING: Missing passages for IDs: {missing_ids}") + + # 处理批次 + total_size = len(texts) + print(f"INFO: Total batch size: {total_size}, max_batch_size: {max_batch_size}") + + all_embeddings = [] + + if total_size > max_batch_size: + print(f"INFO: Splitting batch of size {total_size} into chunks of {max_batch_size}") + for i in range(0, total_size, max_batch_size): + end_idx = min(i + max_batch_size, total_size) + print(f"INFO: Processing chunk {i//max_batch_size + 1}/{(total_size + max_batch_size - 1)//max_batch_size}: items {i} to {end_idx-1}") + + chunk_texts = texts[i:end_idx] + chunk_ids = node_ids[i:end_idx] + + embeddings_chunk = process_batch(chunk_texts, chunk_ids, missing_ids) + all_embeddings.append(embeddings_chunk) + + if cuda_available: + torch.cuda.empty_cache() + elif device.type == "mps": + torch.mps.empty_cache() + + hidden = np.vstack(all_embeddings) + print(f"INFO: Combined embeddings shape: {hidden.shape}") + else: + hidden = process_batch(texts, node_ids, missing_ids) + + # 序列化响应 + ser_start = time.time() + + resp_proto = embedding_pb2.NodeEmbeddingResponse() + hidden_contiguous = np.ascontiguousarray(hidden, dtype=np.float32) + resp_proto.embeddings_data = hidden_contiguous.tobytes() + resp_proto.dimensions.append(hidden_contiguous.shape[0]) + resp_proto.dimensions.append(hidden_contiguous.shape[1]) + resp_proto.missing_ids.extend(missing_ids) + + response_data = resp_proto.SerializeToString() + + # REP 套接字发送单个响应 + socket.send_multipart([identity, b'', response_data]) + + ser_end = time.time() + + print(f"INFO: Serialize time: {ser_end - ser_start:.6f} seconds") + + if device.type == "cuda": + torch.cuda.synchronize() + elif device.type == "mps": + torch.mps.synchronize() + e2e_end = time.time() + print(f"INFO: ZMQ E2E time: {e2e_end - e2e_start:.6f} seconds") + + except zmq.Again: + print("INFO: ZMQ socket timeout, continuing to listen") + # REP套接字不需要重新创建,只需要继续监听 + continue + except Exception as e: + print(f"ERROR: Error in ZMQ server: {e}") + try: + # 发送空响应以维持REQ-REP状态 + empty_resp = embedding_pb2.NodeEmbeddingResponse() + socket.send(empty_resp.SerializeToString()) + except: + # 如果发送失败,重新创建socket + socket.close() + socket = context.socket(zmq.REP) + socket.bind(f"tcp://127.0.0.1:{zmq_port}") + socket.setsockopt(zmq.RCVTIMEO, 5000) + socket.setsockopt(zmq.SNDTIMEO, 300000) + print("INFO: ZMQ socket recreated after error") + + except Exception as e: + print(f"ERROR: Failed to start embedding server: {e}") + raise + + +# 保持原有的 create_embedding_server 函数不变,只添加线程化版本 +def create_embedding_server( + domain="demo", + load_passages=True, + load_embeddings=False, + use_fp16=True, + use_int8=False, + use_cuda_graphs=False, + zmq_port=5555, + max_batch_size=128, + lazy_load_passages=False, + model_name="sentence-transformers/all-mpnet-base-v2", +): + """ + 原有的 create_embedding_server 函数保持不变 + 这个是阻塞版本,用于直接运行 + """ + create_embedding_server_thread(zmq_port, model_name, max_batch_size) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="Embedding service") + parser.add_argument("--zmq-port", type=int, default=5555, help="ZMQ port to run on") + parser.add_argument("--domain", type=str, default="demo", help="Domain name") + parser.add_argument("--load-passages", action="store_true", default=True) + parser.add_argument("--load-embeddings", action="store_true", default=False) + parser.add_argument("--use-fp16", action="store_true", default=False) + parser.add_argument("--use-int8", action="store_true", default=False) + parser.add_argument("--use-cuda-graphs", action="store_true", default=False) + parser.add_argument("--max-batch-size", type=int, default=128, help="Maximum batch size before splitting") + parser.add_argument("--lazy-load-passages", action="store_true", default=True) + parser.add_argument("--model-name", type=str, default="sentence-transformers/all-mpnet-base-v2", + help="Embedding model name") + args = parser.parse_args() + + create_embedding_server( + domain=args.domain, + load_passages=args.load_passages, + load_embeddings=args.load_embeddings, + use_fp16=args.use_fp16, + use_int8=args.use_int8, + use_cuda_graphs=args.use_cuda_graphs, + zmq_port=args.zmq_port, + max_batch_size=args.max_batch_size, + lazy_load_passages=args.lazy_load_passages, + model_name=args.model_name, + ) \ No newline at end of file diff --git a/packages/leann-backend-diskann/pyproject.toml b/packages/leann-backend-diskann/pyproject.toml new file mode 100644 index 0000000..70383e8 --- /dev/null +++ b/packages/leann-backend-diskann/pyproject.toml @@ -0,0 +1,16 @@ +[build-system] +requires = ["scikit-build-core>=0.10", "pybind11>=2.12.0", "numpy"] +build-backend = "scikit_build_core.build" + +[project] +name = "leann-backend-diskann" +version = "0.1.0" +dependencies = ["leann-core==0.1.0", "numpy"] + +[tool.scikit-build] +# 关键:简化的 CMake 路径 +cmake.source-dir = "third_party/DiskANN" +# 关键:Python 包在根目录,路径完全匹配 +wheel.packages = ["leann_backend_diskann"] +# 使用默认的 redirect 模式 +editable.mode = "redirect" \ No newline at end of file diff --git a/packages/leann-backend-diskann/third_party/DiskANN/.clang-format b/packages/leann-backend-diskann/third_party/DiskANN/.clang-format new file mode 100644 index 0000000..ad3192f --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/.clang-format @@ -0,0 +1,6 @@ +--- +BasedOnStyle: Microsoft +--- +Language: Cpp +SortIncludes: false +... diff --git a/packages/leann-backend-diskann/third_party/DiskANN/.gitattributes b/packages/leann-backend-diskann/third_party/DiskANN/.gitattributes new file mode 100644 index 0000000..fbf9358 --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/.gitattributes @@ -0,0 +1,14 @@ +# Set the default behavior, in case people don't have core.autocrlf set. +* text=auto + +# Explicitly declare text files you want to always be normalized and converted +# to native line endings on checkout. +*.c text +*.h text + +# Declare files that will always have CRLF line endings on checkout. +*.sln text eol=crlf + +# Denote all files that are truly binary and should not be modified. +*.png binary +*.jpg binary diff --git a/packages/leann-backend-diskann/third_party/DiskANN/.github/ISSUE_TEMPLATE/bug_report.md b/packages/leann-backend-diskann/third_party/DiskANN/.github/ISSUE_TEMPLATE/bug_report.md new file mode 100644 index 0000000..829d38d --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/.github/ISSUE_TEMPLATE/bug_report.md @@ -0,0 +1,40 @@ +--- +name: Bug report +about: Bug reports help us improve! Thanks for submitting yours! +title: "[BUG] " +labels: bug +assignees: '' + +--- + +## Expected Behavior +Tell us what should happen + +## Actual Behavior +Tell us what happens instead + +## Example Code +Please see [How to create a Minimal, Reproducible example](https://stackoverflow.com/help/minimal-reproducible-example) for some guidance on creating the best possible example of the problem +```bash + +``` + +## Dataset Description +Please tell us about the shape and datatype of your data, (e.g. 128 dimensions, 12.3 billion points, floats) +- Dimensions: +- Number of Points: +- Data type: + +## Error +``` +Paste the full error, with any sensitive information minimally redacted and marked $$REDACTED$$ + +``` + +## Your Environment +* Operating system (e.g. Windows 11 Pro, Ubuntu 22.04.1 LTS) +* DiskANN version (or commit built from) + +## Additional Details +Any other contextual information you might feel is important. + diff --git a/packages/leann-backend-diskann/third_party/DiskANN/.github/ISSUE_TEMPLATE/config.yml b/packages/leann-backend-diskann/third_party/DiskANN/.github/ISSUE_TEMPLATE/config.yml new file mode 100644 index 0000000..99d680b --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/.github/ISSUE_TEMPLATE/config.yml @@ -0,0 +1,2 @@ +blank_issues_enabled: false + diff --git a/packages/leann-backend-diskann/third_party/DiskANN/.github/ISSUE_TEMPLATE/feature_request.md b/packages/leann-backend-diskann/third_party/DiskANN/.github/ISSUE_TEMPLATE/feature_request.md new file mode 100644 index 0000000..9c3c58c --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/.github/ISSUE_TEMPLATE/feature_request.md @@ -0,0 +1,25 @@ +--- +name: Feature request +about: Suggest an idea for this project +title: '' +labels: enhancement +assignees: '' + +--- + +## Is your feature request related to a problem? Please describe. +A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] + +## Describe the solution you'd like +A clear and concise description of what you want to happen. + +## Describe alternatives you've considered +A clear and concise description of any alternative solutions or features you've considered. + +## Provide references (if applicable) +If your feature request is related to a published algorithm/idea, please provide links to +any relevant articles or webpages. + +## Additional context +Add any other context or screenshots about the feature request here. + diff --git a/packages/leann-backend-diskann/third_party/DiskANN/.github/ISSUE_TEMPLATE/usage-question.md b/packages/leann-backend-diskann/third_party/DiskANN/.github/ISSUE_TEMPLATE/usage-question.md new file mode 100644 index 0000000..7532f76 --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/.github/ISSUE_TEMPLATE/usage-question.md @@ -0,0 +1,11 @@ +--- +name: Usage Question +about: Ask us a question about DiskANN! +title: "[Question]" +labels: question +assignees: '' + +--- + +This is our forum for asking whatever DiskANN question you'd like! No need to feel shy - we're happy to talk about use cases and optimal tuning strategies! + diff --git a/packages/leann-backend-diskann/third_party/DiskANN/.github/PULL_REQUEST_TEMPLATE.md b/packages/leann-backend-diskann/third_party/DiskANN/.github/PULL_REQUEST_TEMPLATE.md new file mode 100644 index 0000000..0b97019 --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/.github/PULL_REQUEST_TEMPLATE.md @@ -0,0 +1,22 @@ + +- [ ] Does this PR have a descriptive title that could go in our release notes? +- [ ] Does this PR add any new dependencies? +- [ ] Does this PR modify any existing APIs? + - [ ] Is the change to the API backwards compatible? +- [ ] Should this result in any changes to our documentation, either updating existing docs or adding new ones? + +#### Reference Issues/PRs + + +#### What does this implement/fix? Briefly explain your changes. + +#### Any other comments? + diff --git a/packages/leann-backend-diskann/third_party/DiskANN/.github/actions/build/action.yml b/packages/leann-backend-diskann/third_party/DiskANN/.github/actions/build/action.yml new file mode 100644 index 0000000..219d9d6 --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/.github/actions/build/action.yml @@ -0,0 +1,39 @@ +name: 'DiskANN Build Bootstrap' +description: 'Prepares DiskANN build environment and executes build' +runs: + using: "composite" + steps: + # ------------ Linux Build --------------- + - name: Prepare and Execute Build + if: ${{ runner.os == 'Linux' }} + run: | + sudo scripts/dev/install-dev-deps-ubuntu.bash + cmake -S . -B build -DCMAKE_BUILD_TYPE=Release -DUNIT_TEST=True + cmake --build build -- -j + cmake --install build --prefix="dist" + shell: bash + # ------------ End Linux Build --------------- + # ------------ Windows Build --------------- + - name: Add VisualStudio command line tools into path + if: runner.os == 'Windows' + uses: ilammy/msvc-dev-cmd@v1 + - name: Run configure and build for Windows + if: runner.os == 'Windows' + run: | + mkdir build && cd build && cmake .. -DUNIT_TEST=True && msbuild diskann.sln /m /nologo /t:Build /p:Configuration="Release" /property:Platform="x64" -consoleloggerparameters:"ErrorsOnly;Summary" + cd .. + mkdir dist + mklink /j .\dist\bin .\x64\Release\ + shell: cmd + # ------------ End Windows Build --------------- + # ------------ Windows Build With EXEC_ENV_OLS and USE_BING_INFRA --------------- + - name: Add VisualStudio command line tools into path + if: runner.os == 'Windows' + uses: ilammy/msvc-dev-cmd@v1 + - name: Run configure and build for Windows with Bing feature flags + if: runner.os == 'Windows' + run: | + mkdir build_bing && cd build_bing && cmake .. -DEXEC_ENV_OLS=1 -DUSE_BING_INFRA=1 -DUNIT_TEST=True && msbuild diskann.sln /m /nologo /t:Build /p:Configuration="Release" /property:Platform="x64" -consoleloggerparameters:"ErrorsOnly;Summary" + cd .. + shell: cmd + # ------------ End Windows Build --------------- diff --git a/packages/leann-backend-diskann/third_party/DiskANN/.github/actions/format-check/action.yml b/packages/leann-backend-diskann/third_party/DiskANN/.github/actions/format-check/action.yml new file mode 100644 index 0000000..6ed08c0 --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/.github/actions/format-check/action.yml @@ -0,0 +1,13 @@ +name: 'Checking code formatting...' +description: 'Ensures code complies with code formatting rules' +runs: + using: "composite" + steps: + - name: Checking code formatting... + run: | + sudo apt install clang-format + find include -name '*.h' -type f -print0 | xargs -0 -P 16 /usr/bin/clang-format --Werror --dry-run + find src -name '*.cpp' -type f -print0 | xargs -0 -P 16 /usr/bin/clang-format --Werror --dry-run + find apps -name '*.cpp' -type f -print0 | xargs -0 -P 16 /usr/bin/clang-format --Werror --dry-run + find python -name '*.cpp' -type f -print0 | xargs -0 -P 16 /usr/bin/clang-format --Werror --dry-run + shell: bash diff --git a/packages/leann-backend-diskann/third_party/DiskANN/.github/actions/generate-high-dim-random/action.yml b/packages/leann-backend-diskann/third_party/DiskANN/.github/actions/generate-high-dim-random/action.yml new file mode 100644 index 0000000..65e9b7e --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/.github/actions/generate-high-dim-random/action.yml @@ -0,0 +1,28 @@ +name: 'Generating Random Data (Basic)' +description: 'Generates the random data files used in acceptance tests' +runs: + using: "composite" + steps: + - name: Generate Random Data (Basic) + run: | + mkdir data + + echo "Generating random 1020,1024,1536D float and 4096 int8 vectors for index" + dist/bin/rand_data_gen --data_type float --output_file data/rand_float_1020D_5K_norm1.0.bin -D 1020 -N 5000 --norm 1.0 + #dist/bin/rand_data_gen --data_type float --output_file data/rand_float_1024D_5K_norm1.0.bin -D 1024 -N 5000 --norm 1.0 + dist/bin/rand_data_gen --data_type float --output_file data/rand_float_1536D_5K_norm1.0.bin -D 1536 -N 5000 --norm 1.0 + dist/bin/rand_data_gen --data_type int8 --output_file data/rand_int8_4096D_5K_norm1.0.bin -D 4096 -N 5000 --norm 1.0 + + echo "Generating random 1020,1024,1536D float and 4096D int8 avectors for query" + dist/bin/rand_data_gen --data_type float --output_file data/rand_float_1020D_1K_norm1.0.bin -D 1020 -N 1000 --norm 1.0 + #dist/bin/rand_data_gen --data_type float --output_file data/rand_float_1024D_1K_norm1.0.bin -D 1024 -N 1000 --norm 1.0 + dist/bin/rand_data_gen --data_type float --output_file data/rand_float_1536D_1K_norm1.0.bin -D 1536 -N 1000 --norm 1.0 + dist/bin/rand_data_gen --data_type int8 --output_file data/rand_int8_4096D_1K_norm1.0.bin -D 4096 -N 1000 --norm 1.0 + + echo "Computing ground truth for 1020,1024,1536D float and 4096D int8 avectors for query" + dist/bin/compute_groundtruth --data_type float --dist_fn l2 --base_file data/rand_float_1020D_5K_norm1.0.bin --query_file data/rand_float_1020D_1K_norm1.0.bin --gt_file data/l2_rand_float_1020D_5K_norm1.0_1020D_1K_norm1.0_gt100 --K 100 + #dist/bin/compute_groundtruth --data_type float --dist_fn l2 --base_file data/rand_float_1024D_5K_norm1.0.bin --query_file data/rand_float_1024D_1K_norm1.0.bin --gt_file data/l2_rand_float_1024D_5K_norm1.0_1024D_1K_norm1.0_gt100 --K 100 + dist/bin/compute_groundtruth --data_type float --dist_fn l2 --base_file data/rand_float_1536D_5K_norm1.0.bin --query_file data/rand_float_1536D_1K_norm1.0.bin --gt_file data/l2_rand_float_1536D_5K_norm1.0_1536D_1K_norm1.0_gt100 --K 100 + dist/bin/compute_groundtruth --data_type int8 --dist_fn l2 --base_file data/rand_int8_4096D_5K_norm1.0.bin --query_file data/rand_int8_4096D_1K_norm1.0.bin --gt_file data/l2_rand_int8_4096D_5K_norm1.0_4096D_1K_norm1.0_gt100 --K 100 + + shell: bash diff --git a/packages/leann-backend-diskann/third_party/DiskANN/.github/actions/generate-random/action.yml b/packages/leann-backend-diskann/third_party/DiskANN/.github/actions/generate-random/action.yml new file mode 100644 index 0000000..2755067 --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/.github/actions/generate-random/action.yml @@ -0,0 +1,38 @@ +name: 'Generating Random Data (Basic)' +description: 'Generates the random data files used in acceptance tests' +runs: + using: "composite" + steps: + - name: Generate Random Data (Basic) + run: | + mkdir data + + echo "Generating random vectors for index" + dist/bin/rand_data_gen --data_type float --output_file data/rand_float_10D_10K_norm1.0.bin -D 10 -N 10000 --norm 1.0 + dist/bin/rand_data_gen --data_type float --output_file data/rand_float_10D_10K_unnorm.bin -D 10 -N 10000 --rand_scaling 2.0 + dist/bin/rand_data_gen --data_type int8 --output_file data/rand_int8_10D_10K_norm50.0.bin -D 10 -N 10000 --norm 50.0 + dist/bin/rand_data_gen --data_type uint8 --output_file data/rand_uint8_10D_10K_norm50.0.bin -D 10 -N 10000 --norm 50.0 + + echo "Generating random vectors for query" + dist/bin/rand_data_gen --data_type float --output_file data/rand_float_10D_1K_norm1.0.bin -D 10 -N 1000 --norm 1.0 + dist/bin/rand_data_gen --data_type float --output_file data/rand_float_10D_1K_unnorm.bin -D 10 -N 1000 --rand_scaling 2.0 + dist/bin/rand_data_gen --data_type int8 --output_file data/rand_int8_10D_1K_norm50.0.bin -D 10 -N 1000 --norm 50.0 + dist/bin/rand_data_gen --data_type uint8 --output_file data/rand_uint8_10D_1K_norm50.0.bin -D 10 -N 1000 --norm 50.0 + + echo "Computing ground truth for floats across l2, mips, and cosine distance functions" + dist/bin/compute_groundtruth --data_type float --dist_fn l2 --base_file data/rand_float_10D_10K_norm1.0.bin --query_file data/rand_float_10D_1K_norm1.0.bin --gt_file data/l2_rand_float_10D_10K_norm1.0_10D_1K_norm1.0_gt100 --K 100 + dist/bin/compute_groundtruth --data_type float --dist_fn mips --base_file data/rand_float_10D_10K_norm1.0.bin --query_file data/rand_float_10D_1K_norm1.0.bin --gt_file data/mips_rand_float_10D_10K_norm1.0_10D_1K_norm1.0_gt100 --K 100 + dist/bin/compute_groundtruth --data_type float --dist_fn cosine --base_file data/rand_float_10D_10K_norm1.0.bin --query_file data/rand_float_10D_1K_norm1.0.bin --gt_file data/cosine_rand_float_10D_10K_norm1.0_10D_1K_norm1.0_gt100 --K 100 + dist/bin/compute_groundtruth --data_type float --dist_fn cosine --base_file data/rand_float_10D_10K_unnorm.bin --query_file data/rand_float_10D_1K_unnorm.bin --gt_file data/cosine_rand_float_10D_10K_unnorm_10D_1K_unnorm_gt100 --K 100 + + echo "Computing ground truth for int8s across l2, mips, and cosine distance functions" + dist/bin/compute_groundtruth --data_type int8 --dist_fn l2 --base_file data/rand_int8_10D_10K_norm50.0.bin --query_file data/rand_int8_10D_1K_norm50.0.bin --gt_file data/l2_rand_int8_10D_10K_norm50.0_10D_1K_norm50.0_gt100 --K 100 + dist/bin/compute_groundtruth --data_type int8 --dist_fn mips --base_file data/rand_int8_10D_10K_norm50.0.bin --query_file data/rand_int8_10D_1K_norm50.0.bin --gt_file data/mips_rand_int8_10D_10K_norm50.0_10D_1K_norm50.0_gt100 --K 100 + dist/bin/compute_groundtruth --data_type int8 --dist_fn cosine --base_file data/rand_int8_10D_10K_norm50.0.bin --query_file data/rand_int8_10D_1K_norm50.0.bin --gt_file data/cosine_rand_int8_10D_10K_norm50.0_10D_1K_norm50.0_gt100 --K 100 + + echo "Computing ground truth for uint8s across l2, mips, and cosine distance functions" + dist/bin/compute_groundtruth --data_type uint8 --dist_fn l2 --base_file data/rand_uint8_10D_10K_norm50.0.bin --query_file data/rand_uint8_10D_1K_norm50.0.bin --gt_file data/l2_rand_uint8_10D_10K_norm50.0_10D_1K_norm50.0_gt100 --K 100 + dist/bin/compute_groundtruth --data_type uint8 --dist_fn mips --base_file data/rand_uint8_10D_10K_norm50.0.bin --query_file data/rand_uint8_10D_1K_norm50.0.bin --gt_file data/mips_rand_uint8_10D_10K_norm50.0_10D_1K_norm50.0_gt100 --K 100 + dist/bin/compute_groundtruth --data_type uint8 --dist_fn cosine --base_file data/rand_uint8_10D_10K_norm50.0.bin --query_file data/rand_uint8_10D_1K_norm50.0.bin --gt_file data/cosine_rand_uint8_10D_10K_norm50.0_10D_1K_norm50.0_gt100 --K 100 + + shell: bash diff --git a/packages/leann-backend-diskann/third_party/DiskANN/.github/actions/python-wheel/action.yml b/packages/leann-backend-diskann/third_party/DiskANN/.github/actions/python-wheel/action.yml new file mode 100644 index 0000000..6a2880c --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/.github/actions/python-wheel/action.yml @@ -0,0 +1,22 @@ +name: Build Python Wheel +description: Builds a python wheel with cibuildwheel +inputs: + cibw-identifier: + description: "CI build wheel identifier to build" + required: true +runs: + using: "composite" + steps: + - uses: actions/setup-python@v3 + - name: Install cibuildwheel + run: python -m pip install cibuildwheel==2.11.3 + shell: bash + - name: Building Python ${{inputs.cibw-identifier}} Wheel + run: python -m cibuildwheel --output-dir dist + env: + CIBW_BUILD: ${{inputs.cibw-identifier}} + shell: bash + - uses: actions/upload-artifact@v3 + with: + name: wheels + path: ./dist/*.whl diff --git a/packages/leann-backend-diskann/third_party/DiskANN/.github/workflows/build-python-pdoc.yml b/packages/leann-backend-diskann/third_party/DiskANN/.github/workflows/build-python-pdoc.yml new file mode 100644 index 0000000..444a7ee --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/.github/workflows/build-python-pdoc.yml @@ -0,0 +1,81 @@ +name: DiskANN Build PDoc Documentation +on: [workflow_call] +jobs: + build-reference-documentation: + permissions: + contents: write + runs-on: ubuntu-latest + steps: + - name: Checkout repository + uses: actions/checkout@v3 + with: + fetch-depth: 1 + - name: Set up Python 3.9 + uses: actions/setup-python@v2 + with: + python-version: 3.9 + - name: Install python build + run: python -m pip install build + shell: bash + # Install required dependencies + - name: Prepare Linux environment + run: | + sudo scripts/dev/install-dev-deps-ubuntu.bash + shell: bash + # We need to build the wheel in order to run pdoc. pdoc does not seem to work if you just point it at + # our source directory. + - name: Building Python Wheel for documentation generation + run: python -m build --wheel --outdir documentation_dist + shell: bash + - name: "Run Reference Documentation Generation" + run: | + pip install pdoc pipdeptree + pip install documentation_dist/*.whl + echo "documentation" > dependencies_documentation.txt + pipdeptree >> dependencies_documentation.txt + pdoc -o docs/python/html diskannpy + - name: Create version environment variable + run: | + echo "DISKANN_VERSION=$(python <> $GITHUB_ENV + - name: Archive documentation version artifact + uses: actions/upload-artifact@v4 + with: + name: dependencies + path: | + ${{ github.run_id }}-dependencies_documentation.txt + overwrite: true + - name: Archive documentation artifacts + uses: actions/upload-artifact@v4 + with: + name: documentation-site + path: | + docs/python/html + # Publish to /dev if we are on the "main" branch + - name: Publish reference docs for latest development version (main branch) + uses: peaceiris/actions-gh-pages@v3 + if: github.ref == 'refs/heads/main' + with: + github_token: ${{ secrets.GITHUB_TOKEN }} + publish_dir: docs/python/html + destination_dir: docs/python/dev + # Publish to / if we are releasing + - name: Publish reference docs by version (main branch) + uses: peaceiris/actions-gh-pages@v3 + if: github.event_name == 'release' + with: + github_token: ${{ secrets.GITHUB_TOKEN }} + publish_dir: docs/python/html + destination_dir: docs/python/${{ env.DISKANN_VERSION }} + # Publish to /latest if we are releasing + - name: Publish latest reference docs (main branch) + uses: peaceiris/actions-gh-pages@v3 + if: github.event_name == 'release' + with: + github_token: ${{ secrets.GITHUB_TOKEN }} + publish_dir: docs/python/html + destination_dir: docs/python/latest diff --git a/packages/leann-backend-diskann/third_party/DiskANN/.github/workflows/build-python.yml b/packages/leann-backend-diskann/third_party/DiskANN/.github/workflows/build-python.yml new file mode 100644 index 0000000..b825398 --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/.github/workflows/build-python.yml @@ -0,0 +1,42 @@ +name: DiskANN Build Python Wheel +on: [workflow_call] +jobs: + linux-build: + name: Python - Ubuntu - ${{matrix.cibw-identifier}} + strategy: + fail-fast: false + matrix: + cibw-identifier: ["cp39-manylinux_x86_64", "cp310-manylinux_x86_64", "cp311-manylinux_x86_64"] + runs-on: ubuntu-latest + defaults: + run: + shell: bash + steps: + - name: Checkout repository + uses: actions/checkout@v3 + with: + fetch-depth: 1 + - name: Building python wheel ${{matrix.cibw-identifier}} + uses: ./.github/actions/python-wheel + with: + cibw-identifier: ${{matrix.cibw-identifier}} + windows-build: + name: Python - Windows - ${{matrix.cibw-identifier}} + strategy: + fail-fast: false + matrix: + cibw-identifier: ["cp39-win_amd64", "cp310-win_amd64", "cp311-win_amd64"] + runs-on: windows-latest + defaults: + run: + shell: bash + steps: + - name: Checkout repository + uses: actions/checkout@v3 + with: + submodules: true + fetch-depth: 1 + - name: Building python wheel ${{matrix.cibw-identifier}} + uses: ./.github/actions/python-wheel + with: + cibw-identifier: ${{matrix.cibw-identifier}} diff --git a/packages/leann-backend-diskann/third_party/DiskANN/.github/workflows/common.yml b/packages/leann-backend-diskann/third_party/DiskANN/.github/workflows/common.yml new file mode 100644 index 0000000..09c020a --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/.github/workflows/common.yml @@ -0,0 +1,28 @@ +name: DiskANN Common Checks +# common means common to both pr-test and push-test +on: [workflow_call] +jobs: + formatting-check: + strategy: + fail-fast: true + name: Code Formatting Test + runs-on: ubuntu-latest + steps: + - name: Checkout repository + uses: actions/checkout@v3 + with: + fetch-depth: 1 + - name: Checking code formatting... + uses: ./.github/actions/format-check + docker-container-build: + name: Docker Container Build + needs: [formatting-check] + runs-on: ubuntu-latest + steps: + - name: Checkout repository + uses: actions/checkout@v3 + with: + fetch-depth: 1 + - name: Docker build + run: | + docker build . \ No newline at end of file diff --git a/packages/leann-backend-diskann/third_party/DiskANN/.github/workflows/disk-pq.yml b/packages/leann-backend-diskann/third_party/DiskANN/.github/workflows/disk-pq.yml new file mode 100644 index 0000000..930d213 --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/.github/workflows/disk-pq.yml @@ -0,0 +1,117 @@ +name: Disk With PQ +on: [workflow_call] +jobs: + acceptance-tests-disk-pq: + name: Disk, PQ + strategy: + fail-fast: false + matrix: + os: [ubuntu-latest, windows-2019, windows-latest] + runs-on: ${{matrix.os}} + defaults: + run: + shell: bash + steps: + - name: Checkout repository + if: ${{ runner.os == 'Linux' }} + uses: actions/checkout@v3 + with: + fetch-depth: 1 + - name: Checkout repository + if: ${{ runner.os == 'Windows' }} + uses: actions/checkout@v3 + with: + fetch-depth: 1 + submodules: true + - name: DiskANN Build CLI Applications + uses: ./.github/actions/build + + - name: Generate Data + uses: ./.github/actions/generate-random + + - name: build and search disk index (one shot graph build, L2, no diskPQ) (float) + if: success() || failure() + run: | + dist/bin/build_disk_index --data_type float --dist_fn l2 --data_path data/rand_float_10D_10K_norm1.0.bin --index_path_prefix data/disk_index_l2_rand_float_10D_10K_norm1.0_diskfull_oneshot -R 16 -L 32 -B 0.00003 -M 1 + dist/bin/search_disk_index --data_type float --dist_fn l2 --fail_if_recall_below 70 --index_path_prefix data/disk_index_l2_rand_float_10D_10K_norm1.0_diskfull_oneshot --result_path /tmp/res --query_file data/rand_float_10D_1K_norm1.0.bin --gt_file data/l2_rand_float_10D_10K_norm1.0_10D_1K_norm1.0_gt100 --recall_at 5 -L 5 12 -W 2 --num_nodes_to_cache 10 -T 16 + - name: build and search disk index (one shot graph build, cosine, no diskPQ) (float) + if: success() || failure() + run: | + dist/bin/build_disk_index --data_type float --dist_fn cosine --data_path data/rand_float_10D_10K_unnorm.bin --index_path_prefix data/disk_index_cosine_rand_float_10D_10K_unnorm_diskfull_oneshot -R 16 -L 32 -B 0.00003 -M 1 + dist/bin/search_disk_index --data_type float --dist_fn cosine --fail_if_recall_below 70 --index_path_prefix data/disk_index_cosine_rand_float_10D_10K_unnorm_diskfull_oneshot --result_path /tmp/res --query_file data/rand_float_10D_1K_unnorm.bin --gt_file data/cosine_rand_float_10D_10K_unnorm_10D_1K_unnorm_gt100 --recall_at 5 -L 5 12 -W 2 --num_nodes_to_cache 10 -T 16 + - name: build and search disk index (one shot graph build, L2, no diskPQ) (int8) + if: success() || failure() + run: | + dist/bin/build_disk_index --data_type int8 --dist_fn l2 --data_path data/rand_int8_10D_10K_norm50.0.bin --index_path_prefix data/disk_index_l2_rand_int8_10D_10K_norm50.0_diskfull_oneshot -R 16 -L 32 -B 0.00003 -M 1 + dist/bin/search_disk_index --data_type int8 --dist_fn l2 --fail_if_recall_below 70 --index_path_prefix data/disk_index_l2_rand_int8_10D_10K_norm50.0_diskfull_oneshot --result_path /tmp/res --query_file data/rand_int8_10D_1K_norm50.0.bin --gt_file data/l2_rand_int8_10D_10K_norm50.0_10D_1K_norm50.0_gt100 --recall_at 5 -L 5 12 -W 2 --num_nodes_to_cache 10 -T 16 + - name: build and search disk index (one shot graph build, L2, no diskPQ) (uint8) + if: success() || failure() + run: | + dist/bin/build_disk_index --data_type uint8 --dist_fn l2 --data_path data/rand_uint8_10D_10K_norm50.0.bin --index_path_prefix data/disk_index_l2_rand_uint8_10D_10K_norm50.0_diskfull_oneshot -R 16 -L 32 -B 0.00003 -M 1 + dist/bin/search_disk_index --data_type uint8 --dist_fn l2 --fail_if_recall_below 70 --index_path_prefix data/disk_index_l2_rand_uint8_10D_10K_norm50.0_diskfull_oneshot --result_path /tmp/res --query_file data/rand_uint8_10D_1K_norm50.0.bin --gt_file data/l2_rand_uint8_10D_10K_norm50.0_10D_1K_norm50.0_gt100 --recall_at 5 -L 5 12 -W 2 --num_nodes_to_cache 10 -T 16 + + - name: build and search disk index (one shot graph build, L2, no diskPQ, build with PQ distance comparisons) (float) + if: success() || failure() + run: | + dist/bin/build_disk_index --data_type float --dist_fn l2 --data_path data/rand_float_10D_10K_norm1.0.bin --index_path_prefix data/disk_index_l2_rand_float_10D_10K_norm1.0_diskfull_oneshot_buildpq5 -R 16 -L 32 -B 0.00003 -M 1 --build_PQ_bytes 5 + dist/bin/search_disk_index --data_type float --dist_fn l2 --fail_if_recall_below 70 --index_path_prefix data/disk_index_l2_rand_float_10D_10K_norm1.0_diskfull_oneshot_buildpq5 --result_path /tmp/res --query_file data/rand_float_10D_1K_norm1.0.bin --gt_file data/l2_rand_float_10D_10K_norm1.0_10D_1K_norm1.0_gt100 --recall_at 5 -L 5 12 -W 2 --num_nodes_to_cache 10 -T 16 + - name: build and search disk index (one shot graph build, L2, no diskPQ, build with PQ distance comparisons) (int8) + if: success() || failure() + run: | + dist/bin/build_disk_index --data_type int8 --dist_fn l2 --data_path data/rand_int8_10D_10K_norm50.0.bin --index_path_prefix data/disk_index_l2_rand_int8_10D_10K_norm50.0_diskfull_oneshot_buildpq5 -R 16 -L 32 -B 0.00003 -M 1 --build_PQ_bytes 5 + dist/bin/search_disk_index --data_type int8 --dist_fn l2 --fail_if_recall_below 70 --index_path_prefix data/disk_index_l2_rand_int8_10D_10K_norm50.0_diskfull_oneshot_buildpq5 --result_path /tmp/res --query_file data/rand_int8_10D_1K_norm50.0.bin --gt_file data/l2_rand_int8_10D_10K_norm50.0_10D_1K_norm50.0_gt100 --recall_at 5 -L 5 12 -W 2 --num_nodes_to_cache 10 -T 16\ + - name: build and search disk index (one shot graph build, L2, no diskPQ, build with PQ distance comparisons) (uint8) + if: success() || failure() + run: | + dist/bin/build_disk_index --data_type uint8 --dist_fn l2 --data_path data/rand_uint8_10D_10K_norm50.0.bin --index_path_prefix data/disk_index_l2_rand_uint8_10D_10K_norm50.0_diskfull_oneshot_buildpq5 -R 16 -L 32 -B 0.00003 -M 1 --build_PQ_bytes 5 + dist/bin/search_disk_index --data_type uint8 --dist_fn l2 --fail_if_recall_below 70 --index_path_prefix data/disk_index_l2_rand_uint8_10D_10K_norm50.0_diskfull_oneshot_buildpq5 --result_path /tmp/res --query_file data/rand_uint8_10D_1K_norm50.0.bin --gt_file data/l2_rand_uint8_10D_10K_norm50.0_10D_1K_norm50.0_gt100 --recall_at 5 -L 5 12 -W 2 --num_nodes_to_cache 10 -T 16 + + - name: build and search disk index (sharded graph build, L2, no diskPQ) (float) + if: success() || failure() + run: | + dist/bin/build_disk_index --data_type float --dist_fn l2 --data_path data/rand_float_10D_10K_norm1.0.bin --index_path_prefix data/disk_index_l2_rand_float_10D_10K_norm1.0_diskfull_sharded -R 16 -L 32 -B 0.00003 -M 0.00006 + dist/bin/search_disk_index --data_type float --dist_fn l2 --fail_if_recall_below 70 --index_path_prefix data/disk_index_l2_rand_float_10D_10K_norm1.0_diskfull_sharded --result_path /tmp/res --query_file data/rand_float_10D_1K_norm1.0.bin --gt_file data/l2_rand_float_10D_10K_norm1.0_10D_1K_norm1.0_gt100 --recall_at 5 -L 5 12 -W 2 --num_nodes_to_cache 10 -T 16 + - name: build and search disk index (sharded graph build, cosine, no diskPQ) (float) + if: success() || failure() + run: | + dist/bin/build_disk_index --data_type float --dist_fn cosine --data_path data/rand_float_10D_10K_unnorm.bin --index_path_prefix data/disk_index_cosine_rand_float_10D_10K_unnorm_diskfull_sharded -R 16 -L 32 -B 0.00003 -M 0.00006 + dist/bin/search_disk_index --data_type float --dist_fn cosine --fail_if_recall_below 70 --index_path_prefix data/disk_index_cosine_rand_float_10D_10K_unnorm_diskfull_sharded --result_path /tmp/res --query_file data/rand_float_10D_1K_unnorm.bin --gt_file data/cosine_rand_float_10D_10K_unnorm_10D_1K_unnorm_gt100 --recall_at 5 -L 5 12 -W 2 --num_nodes_to_cache 10 -T 16 + - name: build and search disk index (sharded graph build, L2, no diskPQ) (int8) + run: | + dist/bin/build_disk_index --data_type int8 --dist_fn l2 --data_path data/rand_int8_10D_10K_norm50.0.bin --index_path_prefix data/disk_index_l2_rand_int8_10D_10K_norm50.0_diskfull_sharded -R 16 -L 32 -B 0.00003 -M 0.00006 + dist/bin/search_disk_index --data_type int8 --dist_fn l2 --fail_if_recall_below 70 --index_path_prefix data/disk_index_l2_rand_int8_10D_10K_norm50.0_diskfull_sharded --result_path /tmp/res --query_file data/rand_int8_10D_1K_norm50.0.bin --gt_file data/l2_rand_int8_10D_10K_norm50.0_10D_1K_norm50.0_gt100 --recall_at 5 -L 5 12 -W 2 --num_nodes_to_cache 10 -T 16 + - name: build and search disk index (sharded graph build, L2, no diskPQ) (uint8) + if: success() || failure() + run: | + dist/bin/build_disk_index --data_type uint8 --dist_fn l2 --data_path data/rand_uint8_10D_10K_norm50.0.bin --index_path_prefix data/disk_index_l2_rand_uint8_10D_10K_norm50.0_diskfull_sharded -R 16 -L 32 -B 0.00003 -M 0.00006 + dist/bin/search_disk_index --data_type uint8 --dist_fn l2 --fail_if_recall_below 70 --index_path_prefix data/disk_index_l2_rand_uint8_10D_10K_norm50.0_diskfull_sharded --result_path /tmp/res --query_file data/rand_uint8_10D_1K_norm50.0.bin --gt_file data/l2_rand_uint8_10D_10K_norm50.0_10D_1K_norm50.0_gt100 --recall_at 5 -L 5 12 -W 2 --num_nodes_to_cache 10 -T 16 + + - name: build and search disk index (one shot graph build, L2, diskPQ) (float) + if: success() || failure() + run: | + dist/bin/build_disk_index --data_type float --dist_fn l2 --data_path data/rand_float_10D_10K_norm1.0.bin --index_path_prefix data/disk_index_l2_rand_float_10D_10K_norm1.0_diskpq_oneshot -R 16 -L 32 -B 0.00003 -M 1 --PQ_disk_bytes 5 + dist/bin/search_disk_index --data_type float --dist_fn l2 --fail_if_recall_below 70 --index_path_prefix data/disk_index_l2_rand_float_10D_10K_norm1.0_diskpq_oneshot --result_path /tmp/res --query_file data/rand_float_10D_1K_norm1.0.bin --gt_file data/l2_rand_float_10D_10K_norm1.0_10D_1K_norm1.0_gt100 --recall_at 5 -L 5 12 -W 2 --num_nodes_to_cache 10 -T 16 + - name: build and search disk index (one shot graph build, L2, diskPQ) (int8) + if: success() || failure() + run: | + dist/bin/build_disk_index --data_type int8 --dist_fn l2 --data_path data/rand_int8_10D_10K_norm50.0.bin --index_path_prefix data/disk_index_l2_rand_int8_10D_10K_norm50.0_diskpq_oneshot -R 16 -L 32 -B 0.00003 -M 1 --PQ_disk_bytes 5 + dist/bin/search_disk_index --data_type int8 --dist_fn l2 --fail_if_recall_below 70 --index_path_prefix data/disk_index_l2_rand_int8_10D_10K_norm50.0_diskpq_oneshot --result_path /tmp/res --query_file data/rand_int8_10D_1K_norm50.0.bin --gt_file data/l2_rand_int8_10D_10K_norm50.0_10D_1K_norm50.0_gt100 --recall_at 5 -L 5 12 -W 2 --num_nodes_to_cache 10 -T 16 + - name: build and search disk index (one shot graph build, L2, diskPQ) (uint8) + if: success() || failure() + run: | + dist/bin/build_disk_index --data_type uint8 --dist_fn l2 --data_path data/rand_uint8_10D_10K_norm50.0.bin --index_path_prefix data/disk_index_l2_rand_uint8_10D_10K_norm50.0_diskpq_oneshot -R 16 -L 32 -B 0.00003 -M 1 --PQ_disk_bytes 5 + dist/bin/search_disk_index --data_type uint8 --dist_fn l2 --fail_if_recall_below 70 --index_path_prefix data/disk_index_l2_rand_uint8_10D_10K_norm50.0_diskpq_oneshot --result_path /tmp/res --query_file data/rand_uint8_10D_1K_norm50.0.bin --gt_file data/l2_rand_uint8_10D_10K_norm50.0_10D_1K_norm50.0_gt100 --recall_at 5 -L 5 12 -W 2 --num_nodes_to_cache 10 -T 16 + + - name: build and search disk index (sharded graph build, MIPS, diskPQ) (float) + if: success() || failure() + run: | + dist/bin/build_disk_index --data_type float --dist_fn mips --data_path data/rand_float_10D_10K_norm1.0.bin --index_path_prefix data/disk_index_mips_rand_float_10D_10K_norm1.0_diskpq_sharded -R 16 -L 32 -B 0.00003 -M 0.00006 --PQ_disk_bytes 5 + dist/bin/search_disk_index --data_type float --dist_fn l2 --fail_if_recall_below 70 --index_path_prefix data/disk_index_mips_rand_float_10D_10K_norm1.0_diskpq_sharded --result_path /tmp/res --query_file data/rand_float_10D_1K_norm1.0.bin --gt_file data/mips_rand_float_10D_10K_norm1.0_10D_1K_norm1.0_gt100 --recall_at 5 -L 5 12 -W 2 --num_nodes_to_cache 10 -T 16 + + - name: upload data and bin + uses: actions/upload-artifact@v4 + with: + name: disk-pq-${{matrix.os}} + path: | + ./dist/** + ./data/** diff --git a/packages/leann-backend-diskann/third_party/DiskANN/.github/workflows/dynamic-labels.yml b/packages/leann-backend-diskann/third_party/DiskANN/.github/workflows/dynamic-labels.yml new file mode 100644 index 0000000..d5dc712 --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/.github/workflows/dynamic-labels.yml @@ -0,0 +1,102 @@ +name: Dynamic-Labels +on: [workflow_call] +jobs: + acceptance-tests-dynamic: + name: Dynamic-Labels + strategy: + fail-fast: false + matrix: + os: [ubuntu-latest, windows-2019, windows-latest] + runs-on: ${{matrix.os}} + defaults: + run: + shell: bash + steps: + - name: Checkout repository + if: ${{ runner.os == 'Linux' }} + uses: actions/checkout@v3 + with: + fetch-depth: 1 + - name: Checkout repository + if: ${{ runner.os == 'Windows' }} + uses: actions/checkout@v3 + with: + fetch-depth: 1 + submodules: true + - name: DiskANN Build CLI Applications + uses: ./.github/actions/build + + - name: Generate Data + uses: ./.github/actions/generate-random + + - name: Generate Labels + run: | + echo "Generating synthetic labels and computing ground truth for filtered search with universal label" + dist/bin/generate_synthetic_labels --num_labels 50 --num_points 10000 --output_file data/rand_labels_50_10K.txt --distribution_type random + + echo "Generating synthetic labels with a zipf distribution and computing ground truth for filtered search with universal label" + dist/bin/generate_synthetic_labels --num_labels 50 --num_points 10000 --output_file data/zipf_labels_50_10K.txt --distribution_type zipf + + - name: Test a streaming index (float) with labels (Zipf distributed) + run: | + dist/bin/test_streaming_scenario --data_type float --dist_fn l2 --data_path data/rand_float_10D_10K_norm1.0.bin --universal_label 0 --label_file data/zipf_labels_50_10K.txt --index_path_prefix data/index_zipf_stream -R 64 --FilteredLbuild 200 -L 50 --alpha 1.2 --insert_threads 8 --consolidate_threads 8 --max_points_to_insert 10000 --active_window 4000 --consolidate_interval 2000 --start_point_norm 3.2 --unique_labels_supported 51 + + echo "Computing groundtruth with filter" + dist/bin/compute_groundtruth_for_filters --data_type float --universal_label 0 --filter_label 1 --dist_fn l2 --base_file data/index_zipf_stream.after-streaming-act4000-cons2000-max10000.data --query_file data/rand_float_10D_1K_norm1.0.bin --K 100 --gt_file data/gt100_zipf_base-act4000-cons2000-max10000_1 --label_file data/index_zipf_stream.after-streaming-act4000-cons2000-max10000_raw_labels.txt --tags_file data/index_zipf_stream.after-streaming-act4000-cons2000-max10000.tags + echo "Searching with filter" + dist/bin/search_memory_index --data_type float --dist_fn l2 --filter_label 1 --fail_if_recall_below 40 --index_path_prefix data/index_zipf_stream.after-streaming-act4000-cons2000-max10000 --result_path data/res_stream --query_file data/rand_float_10D_1K_norm1.0.bin --gt_file data/gt100_zipf_base-act4000-cons2000-max10000_1 -K 10 -L 20 40 60 80 100 150 -T 64 --dynamic true --tags 1 + + echo "Computing groundtruth w/o filter" + dist/bin/compute_groundtruth --data_type float --dist_fn l2 --base_file data/index_zipf_stream.after-streaming-act4000-cons2000-max10000.data --query_file data/rand_float_10D_1K_norm1.0.bin --K 100 --gt_file data/gt100_zipf_base-act4000-cons2000-max10000 + echo "Searching without filter" + dist/bin/search_memory_index --data_type float --dist_fn l2 --fail_if_recall_below 70 --index_path_prefix data/index_zipf_stream.after-streaming-act4000-cons2000-max10000 --result_path res_stream --query_file data/rand_float_10D_1K_norm1.0.bin --gt_file data/gt100_zipf_base-act4000-cons2000-max10000 -K 10 -L 20 40 60 80 100 -T 64 + + - name: Test a streaming index (float) with labels (random distributed) + run: | + dist/bin/test_streaming_scenario --data_type float --dist_fn l2 --data_path data/rand_float_10D_10K_norm1.0.bin --universal_label 0 --label_file data/rand_labels_50_10K.txt --index_path_prefix data/index_rand_stream -R 64 --FilteredLbuild 200 -L 50 --alpha 1.2 --insert_threads 8 --consolidate_threads 8 --max_points_to_insert 10000 --active_window 4000 --consolidate_interval 2000 --start_point_norm 3.2 --unique_labels_supported 51 + + echo "Computing groundtruth with filter" + dist/bin/compute_groundtruth_for_filters --data_type float --universal_label 0 --filter_label 1 --dist_fn l2 --base_file data/index_rand_stream.after-streaming-act4000-cons2000-max10000.data --query_file data/rand_float_10D_1K_norm1.0.bin --K 100 --gt_file data/gt100_rand_base-act4000-cons2000-max10000_1 --label_file data/index_rand_stream.after-streaming-act4000-cons2000-max10000_raw_labels.txt --tags_file data/index_rand_stream.after-streaming-act4000-cons2000-max10000.tags + echo "Searching with filter" + dist/bin/search_memory_index --data_type float --dist_fn l2 --filter_label 1 --fail_if_recall_below 40 --index_path_prefix data/index_rand_stream.after-streaming-act4000-cons2000-max10000 --result_path data/res_stream --query_file data/rand_float_10D_1K_norm1.0.bin --gt_file data/gt100_rand_base-act4000-cons2000-max10000_1 -K 10 -L 20 40 60 80 100 150 -T 64 --dynamic true --tags 1 + + echo "Computing groundtruth w/o filter" + dist/bin/compute_groundtruth --data_type float --dist_fn l2 --base_file data/index_rand_stream.after-streaming-act4000-cons2000-max10000.data --query_file data/rand_float_10D_1K_norm1.0.bin --K 100 --gt_file data/gt100_rand_base-act4000-cons2000-max10000 + echo "Searching without filter" + dist/bin/search_memory_index --data_type float --dist_fn l2 --fail_if_recall_below 70 --index_path_prefix data/index_rand_stream.after-streaming-act4000-cons2000-max10000 --result_path res_stream --query_file data/rand_float_10D_1K_norm1.0.bin --gt_file data/gt100_rand_base-act4000-cons2000-max10000 -K 10 -L 20 40 60 80 100 -T 64 + + - name: Test Insert Delete Consolidate (float) with labels (zipf distributed) + run: | + dist/bin/test_insert_deletes_consolidate --data_type float --dist_fn l2 --universal_label 0 --label_file data/zipf_labels_50_10K.txt --FilteredLbuild 70 --data_path data/rand_float_10D_10K_norm1.0.bin --index_path_prefix data/index_zipf_ins_del -R 64 -L 10 --alpha 1.2 --points_to_skip 0 --max_points_to_insert 7500 --beginning_index_size 0 --points_per_checkpoint 1000 --checkpoints_per_snapshot 0 --points_to_delete_from_beginning 2500 --start_deletes_after 5000 --do_concurrent true --start_point_norm 3.2 --unique_labels_supported 51 + + echo "Computing groundtruth with filter" + dist/bin/compute_groundtruth_for_filters --data_type float --filter_label 5 --universal_label 0 --dist_fn l2 --base_file data/index_zipf_ins_del.after-concurrent-delete-del2500-7500.data --query_file data/rand_float_10D_1K_norm1.0.bin --K 100 --gt_file data/gt100_zipf_random10D_1K_wlabel_5 --label_file data/index_zipf_ins_del.after-concurrent-delete-del2500-7500_raw_labels.txt --tags_file data/index_zipf_ins_del.after-concurrent-delete-del2500-7500.tags + echo "Searching with filter" + dist/bin/search_memory_index --data_type float --dist_fn l2 --filter_label 5 --fail_if_recall_below 10 --index_path_prefix data/index_zipf_ins_del.after-concurrent-delete-del2500-7500 --result_path data/res_zipf_stream --query_file data/rand_float_10D_1K_norm1.0.bin --gt_file data/gt100_zipf_random10D_1K_wlabel_5 -K 10 -L 20 40 60 80 100 150 -T 64 --dynamic true --tags 1 + + echo "Computing groundtruth w/o filter" + dist/bin/compute_groundtruth --data_type float --dist_fn l2 --base_file data/index_zipf_ins_del.after-concurrent-delete-del2500-7500.data --query_file data/rand_float_10D_1K_norm1.0.bin --K 100 --gt_file data/gt100_zipf_random10D_1K + echo "Searching without filter" + dist/bin/search_memory_index --data_type float --dist_fn l2 --fail_if_recall_below 70 --index_path_prefix data/index_zipf_ins_del.after-concurrent-delete-del2500-7500 --result_path res_stream --query_file data/rand_float_10D_1K_norm1.0.bin --gt_file data/gt100_zipf_random10D_1K -K 10 -L 20 40 60 80 100 -T 64 + + - name: Test Insert Delete Consolidate (float) with labels (random distributed) + run: | + dist/bin/test_insert_deletes_consolidate --data_type float --dist_fn l2 --universal_label 0 --label_file data/rand_labels_50_10K.txt --FilteredLbuild 70 --data_path data/rand_float_10D_10K_norm1.0.bin --index_path_prefix data/index_rand_ins_del -R 64 -L 10 --alpha 1.2 --points_to_skip 0 --max_points_to_insert 7500 --beginning_index_size 0 --points_per_checkpoint 1000 --checkpoints_per_snapshot 0 --points_to_delete_from_beginning 2500 --start_deletes_after 5000 --do_concurrent true --start_point_norm 3.2 --unique_labels_supported 51 + + echo "Computing groundtruth with filter" + dist/bin/compute_groundtruth_for_filters --data_type float --filter_label 5 --universal_label 0 --dist_fn l2 --base_file data/index_rand_ins_del.after-concurrent-delete-del2500-7500.data --query_file data/rand_float_10D_1K_norm1.0.bin --K 100 --gt_file data/gt100_rand_random10D_1K_wlabel_5 --label_file data/index_rand_ins_del.after-concurrent-delete-del2500-7500_raw_labels.txt --tags_file data/index_rand_ins_del.after-concurrent-delete-del2500-7500.tags + echo "Searching with filter" + dist/bin/search_memory_index --data_type float --dist_fn l2 --filter_label 5 --fail_if_recall_below 40 --index_path_prefix data/index_rand_ins_del.after-concurrent-delete-del2500-7500 --result_path data/res_rand_stream --query_file data/rand_float_10D_1K_norm1.0.bin --gt_file data/gt100_rand_random10D_1K_wlabel_5 -K 10 -L 20 40 60 80 100 150 -T 64 --dynamic true --tags 1 + + echo "Computing groundtruth w/o filter" + dist/bin/compute_groundtruth --data_type float --dist_fn l2 --base_file data/index_rand_ins_del.after-concurrent-delete-del2500-7500.data --query_file data/rand_float_10D_1K_norm1.0.bin --K 100 --gt_file data/gt100_rand_random10D_1K + echo "Searching without filter" + dist/bin/search_memory_index --data_type float --dist_fn l2 --fail_if_recall_below 70 --index_path_prefix data/index_rand_ins_del.after-concurrent-delete-del2500-7500 --result_path res_stream --query_file data/rand_float_10D_1K_norm1.0.bin --gt_file data/gt100_rand_random10D_1K -K 10 -L 20 40 60 80 100 -T 64 + + - name: upload data and bin + uses: actions/upload-artifact@v4 + with: + name: dynamic-labels-${{matrix.os}} + path: | + ./dist/** + ./data/** diff --git a/packages/leann-backend-diskann/third_party/DiskANN/.github/workflows/dynamic.yml b/packages/leann-backend-diskann/third_party/DiskANN/.github/workflows/dynamic.yml new file mode 100644 index 0000000..edd691e --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/.github/workflows/dynamic.yml @@ -0,0 +1,75 @@ +name: Dynamic +on: [workflow_call] +jobs: + acceptance-tests-dynamic: + name: Dynamic + strategy: + fail-fast: false + matrix: + os: [ubuntu-latest, windows-2019, windows-latest] + runs-on: ${{matrix.os}} + defaults: + run: + shell: bash + steps: + - name: Checkout repository + if: ${{ runner.os == 'Linux' }} + uses: actions/checkout@v3 + with: + fetch-depth: 1 + - name: Checkout repository + if: ${{ runner.os == 'Windows' }} + uses: actions/checkout@v3 + with: + fetch-depth: 1 + submodules: true + - name: DiskANN Build CLI Applications + uses: ./.github/actions/build + + - name: Generate Data + uses: ./.github/actions/generate-random + + - name: test a streaming index (float) + run: | + dist/bin/test_streaming_scenario --data_type float --dist_fn l2 --data_path data/rand_float_10D_10K_norm1.0.bin --index_path_prefix data/index_stream -R 64 -L 600 --alpha 1.2 --insert_threads 4 --consolidate_threads 4 --max_points_to_insert 10000 --active_window 4000 --consolidate_interval 2000 --start_point_norm 3.2 + dist/bin/compute_groundtruth --data_type float --dist_fn l2 --base_file data/index_stream.after-streaming-act4000-cons2000-max10000.data --query_file data/rand_float_10D_1K_norm1.0.bin --K 100 --gt_file data/gt100_base-act4000-cons2000-max10000 --tags_file data/index_stream.after-streaming-act4000-cons2000-max10000.tags + dist/bin/search_memory_index --data_type float --dist_fn l2 --fail_if_recall_below 70 --index_path_prefix data/index_stream.after-streaming-act4000-cons2000-max10000 --result_path data/res_stream --query_file data/rand_float_10D_1K_norm1.0.bin --gt_file data/gt100_base-act4000-cons2000-max10000 -K 10 -L 20 40 60 80 100 -T 64 --dynamic true --tags 1 + - name: test a streaming index (int8) + if: success() || failure() + run: | + dist/bin/test_streaming_scenario --data_type int8 --dist_fn l2 --data_path data/rand_int8_10D_10K_norm50.0.bin --index_path_prefix data/index_stream -R 64 -L 600 --alpha 1.2 --insert_threads 4 --consolidate_threads 4 --max_points_to_insert 10000 --active_window 4000 --consolidate_interval 2000 --start_point_norm 200 + dist/bin/compute_groundtruth --data_type int8 --dist_fn l2 --base_file data/index_stream.after-streaming-act4000-cons2000-max10000.data --query_file data/rand_int8_10D_1K_norm50.0.bin --K 100 --gt_file data/gt100_base-act4000-cons2000-max10000 --tags_file data/index_stream.after-streaming-act4000-cons2000-max10000.tags + dist/bin/search_memory_index --data_type int8 --dist_fn l2 --fail_if_recall_below 70 --index_path_prefix data/index_stream.after-streaming-act4000-cons2000-max10000 --result_path res_stream --query_file data/rand_int8_10D_1K_norm50.0.bin --gt_file data/gt100_base-act4000-cons2000-max10000 -K 10 -L 20 40 60 80 100 -T 64 --dynamic true --tags 1 + - name: test a streaming index + if: success() || failure() + run: | + dist/bin/test_streaming_scenario --data_type uint8 --dist_fn l2 --data_path data/rand_uint8_10D_10K_norm50.0.bin --index_path_prefix data/index_stream -R 64 -L 600 --alpha 1.2 --insert_threads 4 --consolidate_threads 4 --max_points_to_insert 10000 --active_window 4000 --consolidate_interval 2000 --start_point_norm 200 + dist/bin/compute_groundtruth --data_type uint8 --dist_fn l2 --base_file data/index_stream.after-streaming-act4000-cons2000-max10000.data --query_file data/rand_uint8_10D_1K_norm50.0.bin --K 100 --gt_file data/gt100_base-act4000-cons2000-max10000 --tags_file data/index_stream.after-streaming-act4000-cons2000-max10000.tags + dist/bin/search_memory_index --data_type uint8 --dist_fn l2 --fail_if_recall_below 70 --index_path_prefix data/index_stream.after-streaming-act4000-cons2000-max10000 --result_path data/res_stream --query_file data/rand_uint8_10D_1K_norm50.0.bin --gt_file data/gt100_base-act4000-cons2000-max10000 -K 10 -L 20 40 60 80 100 -T 64 --dynamic true --tags 1 + + - name: build and search an incremental index (float) + if: success() || failure() + run: | + dist/bin/test_insert_deletes_consolidate --data_type float --dist_fn l2 --data_path data/rand_float_10D_10K_norm1.0.bin --index_path_prefix data/index_ins_del -R 64 -L 300 --alpha 1.2 -T 8 --points_to_skip 0 --max_points_to_insert 7500 --beginning_index_size 0 --points_per_checkpoint 1000 --checkpoints_per_snapshot 0 --points_to_delete_from_beginning 2500 --start_deletes_after 5000 --do_concurrent true --start_point_norm 3.2; + dist/bin/compute_groundtruth --data_type float --dist_fn l2 --base_file data/index_ins_del.after-concurrent-delete-del2500-7500.data --query_file data/rand_float_10D_1K_norm1.0.bin --K 100 --gt_file data/gt100_random10D_1K-conc-2500-7500 --tags_file data/index_ins_del.after-concurrent-delete-del2500-7500.tags + dist/bin/search_memory_index --data_type float --dist_fn l2 --fail_if_recall_below 70 --index_path_prefix data/index_ins_del.after-concurrent-delete-del2500-7500 --result_path data/res_ins_del --query_file data/rand_float_10D_1K_norm1.0.bin --gt_file data/gt100_random10D_1K-conc-2500-7500 -K 10 -L 20 40 60 80 100 -T 8 --dynamic true --tags 1 + - name: build and search an incremental index (int8) + if: success() || failure() + run: | + dist/bin/test_insert_deletes_consolidate --data_type int8 --dist_fn l2 --data_path data/rand_int8_10D_10K_norm50.0.bin --index_path_prefix data/index_ins_del -R 64 -L 300 --alpha 1.2 -T 8 --points_to_skip 0 --max_points_to_insert 7500 --beginning_index_size 0 --points_per_checkpoint 1000 --checkpoints_per_snapshot 0 --points_to_delete_from_beginning 2500 --start_deletes_after 5000 --do_concurrent true --start_point_norm 200 + dist/bin/compute_groundtruth --data_type int8 --dist_fn l2 --base_file data/index_ins_del.after-concurrent-delete-del2500-7500.data --query_file data/rand_int8_10D_1K_norm50.0.bin --K 100 --gt_file data/gt100_random10D_1K-conc-2500-7500 --tags_file data/index_ins_del.after-concurrent-delete-del2500-7500.tags + dist/bin/search_memory_index --data_type int8 --dist_fn l2 --fail_if_recall_below 70 --index_path_prefix data/index_ins_del.after-concurrent-delete-del2500-7500 --result_path data/res_ins_del --query_file data/rand_int8_10D_1K_norm50.0.bin --gt_file data/gt100_random10D_1K-conc-2500-7500 -K 10 -L 20 40 60 80 100 -T 8 --dynamic true --tags 1 + - name: build and search an incremental index (uint8) + if: success() || failure() + run: | + dist/bin/test_insert_deletes_consolidate --data_type uint8 --dist_fn l2 --data_path data/rand_uint8_10D_10K_norm50.0.bin --index_path_prefix data/index_ins_del -R 64 -L 300 --alpha 1.2 -T 8 --points_to_skip 0 --max_points_to_insert 7500 --beginning_index_size 0 --points_per_checkpoint 1000 --checkpoints_per_snapshot 0 --points_to_delete_from_beginning 2500 --start_deletes_after 5000 --do_concurrent true --start_point_norm 200 + dist/bin/compute_groundtruth --data_type uint8 --dist_fn l2 --base_file data/index_ins_del.after-concurrent-delete-del2500-7500.data --query_file data/rand_uint8_10D_1K_norm50.0.bin --K 100 --gt_file data/gt100_random10D_10K-conc-2500-7500 --tags_file data/index_ins_del.after-concurrent-delete-del2500-7500.tags + dist/bin/search_memory_index --data_type uint8 --dist_fn l2 --fail_if_recall_below 70 --index_path_prefix data/index_ins_del.after-concurrent-delete-del2500-7500 --result_path data/res_ins_del --query_file data/rand_uint8_10D_1K_norm50.0.bin --gt_file data/gt100_random10D_10K-conc-2500-7500 -K 10 -L 20 40 60 80 100 -T 8 --dynamic true --tags 1 + + - name: upload data and bin + uses: actions/upload-artifact@v4 + with: + name: dynamic-${{matrix.os}} + path: | + ./dist/** + ./data/** diff --git a/packages/leann-backend-diskann/third_party/DiskANN/.github/workflows/in-mem-no-pq.yml b/packages/leann-backend-diskann/third_party/DiskANN/.github/workflows/in-mem-no-pq.yml new file mode 100644 index 0000000..07fc4a2 --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/.github/workflows/in-mem-no-pq.yml @@ -0,0 +1,81 @@ +name: In-Memory Without PQ +on: [workflow_call] +jobs: + acceptance-tests-mem-no-pq: + name: In-Mem, Without PQ + strategy: + fail-fast: false + matrix: + os: [ubuntu-latest, windows-2019, windows-latest] + runs-on: ${{matrix.os}} + defaults: + run: + shell: bash + steps: + - name: Checkout repository + if: ${{ runner.os == 'Linux' }} + uses: actions/checkout@v3 + with: + fetch-depth: 1 + - name: Checkout repository + if: ${{ runner.os == 'Windows' }} + uses: actions/checkout@v3 + with: + fetch-depth: 1 + submodules: true + - name: DiskANN Build CLI Applications + uses: ./.github/actions/build + + - name: Generate Data + uses: ./.github/actions/generate-random + + - name: build and search in-memory index with L2 metrics (float) + if: success() || failure() + run: | + dist/bin/build_memory_index --data_type float --dist_fn l2 --data_path data/rand_float_10D_10K_norm1.0.bin --index_path_prefix data/index_l2_rand_float_10D_10K_norm1.0 + dist/bin/search_memory_index --data_type float --dist_fn l2 --fail_if_recall_below 70 --index_path_prefix data/index_l2_rand_float_10D_10K_norm1.0 --query_file data/rand_float_10D_1K_norm1.0.bin --recall_at 10 --result_path temp --gt_file data/l2_rand_float_10D_10K_norm1.0_10D_1K_norm1.0_gt100 -L 16 32 + - name: build and search in-memory index with L2 metrics (int8) + if: success() || failure() + run: | + dist/bin/build_memory_index --data_type int8 --dist_fn l2 --data_path data/rand_int8_10D_10K_norm50.0.bin --index_path_prefix data/index_l2_rand_int8_10D_10K_norm50.0 + dist/bin/search_memory_index --data_type int8 --dist_fn l2 --fail_if_recall_below 70 --index_path_prefix data/index_l2_rand_int8_10D_10K_norm50.0 --query_file data/rand_int8_10D_1K_norm50.0.bin --recall_at 10 --result_path temp --gt_file data/l2_rand_int8_10D_10K_norm50.0_10D_1K_norm50.0_gt100 -L 16 32 + - name: build and search in-memory index with L2 metrics (uint8) + if: success() || failure() + run: | + dist/bin/build_memory_index --data_type uint8 --dist_fn l2 --data_path data/rand_uint8_10D_10K_norm50.0.bin --index_path_prefix data/index_l2_rand_uint8_10D_10K_norm50.0 + dist/bin/search_memory_index --data_type uint8 --dist_fn l2 --fail_if_recall_below 70 --index_path_prefix data/index_l2_rand_uint8_10D_10K_norm50.0 --query_file data/rand_uint8_10D_1K_norm50.0.bin --recall_at 10 --result_path temp --gt_file data/l2_rand_uint8_10D_10K_norm50.0_10D_1K_norm50.0_gt100 -L 16 32 + + - name: Searching with fast_l2 distance function (float) + if: runner.os != 'Windows' && (success() || failure()) + run: | + dist/bin/search_memory_index --data_type float --dist_fn fast_l2 --fail_if_recall_below 70 --index_path_prefix data/index_l2_rand_float_10D_10K_norm1.0 --query_file data/rand_float_10D_1K_norm1.0.bin --recall_at 10 --result_path temp --gt_file data/l2_rand_float_10D_10K_norm1.0_10D_1K_norm1.0_gt100 -L 16 32 + + - name: build and search in-memory index with MIPS metric (float) + if: success() || failure() + run: | + dist/bin/build_memory_index --data_type float --dist_fn mips --data_path data/rand_float_10D_10K_norm1.0.bin --index_path_prefix data/index_mips_rand_float_10D_10K_norm1.0 + dist/bin/search_memory_index --data_type float --dist_fn mips --fail_if_recall_below 70 --index_path_prefix data/index_l2_rand_float_10D_10K_norm1.0 --query_file data/rand_float_10D_1K_norm1.0.bin --recall_at 10 --result_path temp --gt_file data/mips_rand_float_10D_10K_norm1.0_10D_1K_norm1.0_gt100 -L 16 32 + + - name: build and search in-memory index with cosine metric (float) + if: success() || failure() + run: | + dist/bin/build_memory_index --data_type float --dist_fn cosine --data_path data/rand_float_10D_10K_norm1.0.bin --index_path_prefix data/index_cosine_rand_float_10D_10K_norm1.0 + dist/bin/search_memory_index --data_type float --dist_fn cosine --fail_if_recall_below 70 --index_path_prefix data/index_l2_rand_float_10D_10K_norm1.0 --query_file data/rand_float_10D_1K_norm1.0.bin --recall_at 10 --result_path temp --gt_file data/cosine_rand_float_10D_10K_norm1.0_10D_1K_norm1.0_gt100 -L 16 32 + - name: build and search in-memory index with cosine metric (int8) + if: success() || failure() + run: | + dist/bin/build_memory_index --data_type int8 --dist_fn cosine --data_path data/rand_int8_10D_10K_norm50.0.bin --index_path_prefix data/index_cosine_rand_int8_10D_10K_norm50.0 + dist/bin/search_memory_index --data_type int8 --dist_fn cosine --fail_if_recall_below 70 --index_path_prefix data/index_l2_rand_int8_10D_10K_norm50.0 --query_file data/rand_int8_10D_1K_norm50.0.bin --recall_at 10 --result_path temp --gt_file data/cosine_rand_int8_10D_10K_norm50.0_10D_1K_norm50.0_gt100 -L 16 32 + - name: build and search in-memory index with cosine metric + if: success() || failure() + run: | + dist/bin/build_memory_index --data_type uint8 --dist_fn cosine --data_path data/rand_uint8_10D_10K_norm50.0.bin --index_path_prefix data/index_cosine_rand_uint8_10D_10K_norm50.0 + dist/bin/search_memory_index --data_type uint8 --dist_fn cosine --fail_if_recall_below 70 --index_path_prefix data/index_l2_rand_uint8_10D_10K_norm50.0 --query_file data/rand_uint8_10D_1K_norm50.0.bin --recall_at 10 --result_path temp --gt_file data/cosine_rand_uint8_10D_10K_norm50.0_10D_1K_norm50.0_gt100 -L 16 32 + + - name: upload data and bin + uses: actions/upload-artifact@v4 + with: + name: in-memory-no-pq-${{matrix.os}} + path: | + ./dist/** + ./data/** diff --git a/packages/leann-backend-diskann/third_party/DiskANN/.github/workflows/in-mem-pq.yml b/packages/leann-backend-diskann/third_party/DiskANN/.github/workflows/in-mem-pq.yml new file mode 100644 index 0000000..be20f10 --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/.github/workflows/in-mem-pq.yml @@ -0,0 +1,56 @@ +name: In-Memory With PQ +on: [workflow_call] +jobs: + acceptance-tests-mem-pq: + name: In-Mem, PQ + strategy: + fail-fast: false + matrix: + os: [ubuntu-latest, windows-2019, windows-latest] + runs-on: ${{matrix.os}} + defaults: + run: + shell: bash + steps: + - name: Checkout repository + if: ${{ runner.os == 'Linux' }} + uses: actions/checkout@v3 + with: + fetch-depth: 1 + - name: Checkout repository + if: ${{ runner.os == 'Windows' }} + uses: actions/checkout@v3 + with: + fetch-depth: 1 + submodules: true + - name: DiskANN Build CLI Applications + uses: ./.github/actions/build + + - name: Generate Data + uses: ./.github/actions/generate-random + + - name: build and search in-memory index with L2 metric with PQ based distance comparisons (float) + if: success() || failure() + run: | + dist/bin/build_memory_index --data_type float --dist_fn l2 --data_path data/rand_float_10D_10K_norm1.0.bin --index_path_prefix data/index_l2_rand_float_10D_10K_norm1.0_buildpq5 --build_PQ_bytes 5 + dist/bin/search_memory_index --data_type float --dist_fn l2 --fail_if_recall_below 70 --index_path_prefix data/index_l2_rand_float_10D_10K_norm1.0_buildpq5 --query_file data/rand_float_10D_1K_norm1.0.bin --recall_at 10 --result_path temp --gt_file data/l2_rand_float_10D_10K_norm1.0_10D_1K_norm1.0_gt100 -L 16 32 + + - name: build and search in-memory index with L2 metrics with PQ base distance comparisons (int8) + if: success() || failure() + run: | + dist/bin/build_memory_index --data_type int8 --dist_fn l2 --data_path data/rand_int8_10D_10K_norm50.0.bin --index_path_prefix data/index_l2_rand_int8_10D_10K_norm50.0_buildpq5 --build_PQ_bytes 5 + dist/bin/search_memory_index --data_type int8 --dist_fn l2 --fail_if_recall_below 70 --index_path_prefix data/index_l2_rand_int8_10D_10K_norm50.0_buildpq5 --query_file data/rand_int8_10D_1K_norm50.0.bin --recall_at 10 --result_path temp --gt_file data/l2_rand_int8_10D_10K_norm50.0_10D_1K_norm50.0_gt100 -L 16 32 + + - name: build and search in-memory index with L2 metrics with PQ base distance comparisons (uint8) + if: success() || failure() + run: | + dist/bin/build_memory_index --data_type uint8 --dist_fn l2 --data_path data/rand_uint8_10D_10K_norm50.0.bin --index_path_prefix data/index_l2_rand_uint8_10D_10K_norm50.0_buildpq5 --build_PQ_bytes 5 + dist/bin/search_memory_index --data_type uint8 --dist_fn l2 --fail_if_recall_below 70 --index_path_prefix data/index_l2_rand_uint8_10D_10K_norm50.0_buildpq5 --query_file data/rand_uint8_10D_1K_norm50.0.bin --recall_at 10 --result_path temp --gt_file data/l2_rand_uint8_10D_10K_norm50.0_10D_1K_norm50.0_gt100 -L 16 32 + + - name: upload data and bin + uses: actions/upload-artifact@v4 + with: + name: in-memory-pq-${{matrix.os}} + path: | + ./dist/** + ./data/** \ No newline at end of file diff --git a/packages/leann-backend-diskann/third_party/DiskANN/.github/workflows/labels.yml b/packages/leann-backend-diskann/third_party/DiskANN/.github/workflows/labels.yml new file mode 100644 index 0000000..93995f7 --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/.github/workflows/labels.yml @@ -0,0 +1,120 @@ +name: Labels +on: [workflow_call] +jobs: + acceptance-tests-labels: + name: Labels + strategy: + fail-fast: false + matrix: + os: [ubuntu-latest, windows-2019, windows-latest] + runs-on: ${{matrix.os}} + defaults: + run: + shell: bash + steps: + - name: Checkout repository + if: ${{ runner.os == 'Linux' }} + uses: actions/checkout@v3 + with: + fetch-depth: 1 + - name: Checkout repository + if: ${{ runner.os == 'Windows' }} + uses: actions/checkout@v3 + with: + fetch-depth: 1 + submodules: true + - name: DiskANN Build CLI Applications + uses: ./.github/actions/build + + - name: Generate Data + uses: ./.github/actions/generate-random + + - name: Generate Labels + run: | + echo "Generating synthetic labels and computing ground truth for filtered search with universal label" + dist/bin/generate_synthetic_labels --num_labels 50 --num_points 10000 --output_file data/rand_labels_50_10K.txt --distribution_type random + dist/bin/compute_groundtruth_for_filters --data_type uint8 --dist_fn l2 --universal_label 0 --filter_label 10 --base_file data/rand_uint8_10D_10K_norm50.0.bin --query_file data/rand_uint8_10D_1K_norm50.0.bin --label_file data/rand_labels_50_10K.txt --gt_file data/l2_rand_uint8_10D_10K_norm50.0_10D_1K_norm50.0_gt100_wlabel --K 100 + dist/bin/compute_groundtruth_for_filters --data_type uint8 --dist_fn mips --universal_label 0 --filter_label 10 --base_file data/rand_uint8_10D_10K_norm50.0.bin --query_file data/rand_uint8_10D_1K_norm50.0.bin --label_file data/rand_labels_50_10K.txt --gt_file data/mips_rand_uint8_10D_10K_norm50.0_10D_1K_norm50.0_gt100_wlabel --K 100 + dist/bin/compute_groundtruth_for_filters --data_type uint8 --dist_fn cosine --universal_label 0 --filter_label 10 --base_file data/rand_uint8_10D_10K_norm50.0.bin --query_file data/rand_uint8_10D_1K_norm50.0.bin --label_file data/rand_labels_50_10K.txt --gt_file data/cosine_rand_uint8_10D_10K_norm50.0_10D_1K_norm50.0_gt100_wlabel --K 100 + + echo "Generating synthetic labels with a zipf distribution and computing ground truth for filtered search with universal label" + dist/bin/generate_synthetic_labels --num_labels 50 --num_points 10000 --output_file data/zipf_labels_50_10K.txt --distribution_type zipf + dist/bin/compute_groundtruth_for_filters --data_type uint8 --dist_fn l2 --universal_label 0 --filter_label 5 --base_file data/rand_uint8_10D_10K_norm50.0.bin --query_file data/rand_uint8_10D_1K_norm50.0.bin --label_file data/zipf_labels_50_10K.txt --gt_file data/l2_zipf_uint8_10D_10K_norm50.0_10D_1K_norm50.0_gt100_wlabel --K 100 + dist/bin/compute_groundtruth_for_filters --data_type uint8 --dist_fn mips --universal_label 0 --filter_label 5 --base_file data/rand_uint8_10D_10K_norm50.0.bin --query_file data/rand_uint8_10D_1K_norm50.0.bin --label_file data/zipf_labels_50_10K.txt --gt_file data/mips_zipf_uint8_10D_10K_norm50.0_10D_1K_norm50.0_gt100_wlabel --K 100 + dist/bin/compute_groundtruth_for_filters --data_type uint8 --dist_fn cosine --universal_label 0 --filter_label 5 --base_file data/rand_uint8_10D_10K_norm50.0.bin --query_file data/rand_uint8_10D_1K_norm50.0.bin --label_file data/zipf_labels_50_10K.txt --gt_file data/cosine_zipf_uint8_10D_10K_norm50.0_10D_1K_norm50.0_gt100_wlabel --K 100 + + echo "Generating synthetic labels and computing ground truth for filtered search without a universal label" + dist/bin/compute_groundtruth_for_filters --data_type uint8 --dist_fn l2 --filter_label 5 --base_file data/rand_uint8_10D_10K_norm50.0.bin --query_file data/rand_uint8_10D_1K_norm50.0.bin --label_file data/zipf_labels_50_10K.txt --gt_file data/l2_zipf_uint8_10D_10K_norm50.0_10D_1K_norm50.0_gt100_wlabel_nouniversal --K 100 + dist/bin/generate_synthetic_labels --num_labels 10 --num_points 1000 --output_file data/query_labels_1K.txt --distribution_type one_per_point + dist/bin/compute_groundtruth_for_filters --data_type uint8 --dist_fn l2 --universal_label 0 --filter_label_file data/query_labels_1K.txt --base_file data/rand_uint8_10D_10K_norm50.0.bin --query_file data/rand_uint8_10D_1K_norm50.0.bin --label_file data/zipf_labels_50_10K.txt --gt_file data/combined_l2_zipf_uint8_10D_10K_norm50.0_10D_1K_norm50.0_gt100_wlabel --K 100 + + - name: build and search in-memory index with labels using L2 and Cosine metrics (random distributed labels) + if: success() || failure() + run: | + dist/bin/build_memory_index --data_type uint8 --dist_fn l2 --FilteredLbuild 90 --universal_label 0 --data_path data/rand_uint8_10D_10K_norm50.0.bin --label_file data/rand_labels_50_10K.txt --index_path_prefix data/index_l2_rand_uint8_10D_10K_norm50_wlabel + dist/bin/build_memory_index --data_type uint8 --dist_fn cosine --FilteredLbuild 90 --universal_label 0 --data_path data/rand_uint8_10D_10K_norm50.0.bin --label_file data/rand_labels_50_10K.txt --index_path_prefix data/index_cosine_rand_uint8_10D_10K_norm50_wlabel + dist/bin/search_memory_index --data_type uint8 --dist_fn l2 --filter_label 10 --fail_if_recall_below 70 --index_path_prefix data/index_l2_rand_uint8_10D_10K_norm50_wlabel --query_file data/rand_uint8_10D_1K_norm50.0.bin --recall_at 10 --result_path temp --gt_file data/l2_rand_uint8_10D_10K_norm50.0_10D_1K_norm50.0_gt100_wlabel -L 16 32 + dist/bin/search_memory_index --data_type uint8 --dist_fn cosine --filter_label 10 --fail_if_recall_below 70 --index_path_prefix data/index_cosine_rand_uint8_10D_10K_norm50_wlabel --query_file data/rand_uint8_10D_1K_norm50.0.bin --recall_at 10 --result_path temp --gt_file data/cosine_rand_uint8_10D_10K_norm50.0_10D_1K_norm50.0_gt100_wlabel -L 16 32 + + echo "Searching without filters" + dist/bin/search_memory_index --data_type uint8 --dist_fn l2 --fail_if_recall_below 70 --index_path_prefix data/index_l2_rand_uint8_10D_10K_norm50_wlabel --query_file data/rand_uint8_10D_1K_norm50.0.bin --recall_at 10 --result_path temp --gt_file data/l2_rand_uint8_10D_10K_norm50.0_10D_1K_norm50.0_gt100 -L 32 64 + dist/bin/search_memory_index --data_type uint8 --dist_fn cosine --fail_if_recall_below 70 --index_path_prefix data/index_cosine_rand_uint8_10D_10K_norm50_wlabel --query_file data/rand_uint8_10D_1K_norm50.0.bin --recall_at 10 --result_path temp --gt_file data/cosine_rand_uint8_10D_10K_norm50.0_10D_1K_norm50.0_gt100 -L 32 64 + + - name: build and search disk index with labels using L2 and Cosine metrics (random distributed labels) + if: success() || failure() + run: | + dist/bin/build_disk_index --data_type uint8 --dist_fn l2 --universal_label 0 --FilteredLbuild 90 --data_path data/rand_uint8_10D_10K_norm50.0.bin --label_file data/rand_labels_50_10K.txt --index_path_prefix data/disk_index_l2_rand_uint8_10D_10K_norm50_wlabel -R 32 -L 5 -B 0.00003 -M 1 + dist/bin/search_disk_index --data_type uint8 --dist_fn l2 --filter_label 10 --fail_if_recall_below 50 --index_path_prefix data/disk_index_l2_rand_uint8_10D_10K_norm50_wlabel --result_path temp --query_file data/rand_uint8_10D_1K_norm50.0.bin --gt_file data/l2_rand_uint8_10D_10K_norm50.0_10D_1K_norm50.0_gt100_wlabel --recall_at 5 -L 5 12 -W 2 --num_nodes_to_cache 10 -T 16 + - name: build and search in-memory index with labels using L2 and Cosine metrics (zipf distributed labels) + if: success() || failure() + run: | + dist/bin/build_memory_index --data_type uint8 --dist_fn l2 --FilteredLbuild 90 --universal_label 0 --data_path data/rand_uint8_10D_10K_norm50.0.bin --label_file data/zipf_labels_50_10K.txt --index_path_prefix data/index_l2_zipf_uint8_10D_10K_norm50_wlabel + dist/bin/build_memory_index --data_type uint8 --dist_fn cosine --FilteredLbuild 90 --universal_label 0 --data_path data/rand_uint8_10D_10K_norm50.0.bin --label_file data/zipf_labels_50_10K.txt --index_path_prefix data/index_cosine_zipf_uint8_10D_10K_norm50_wlabel + dist/bin/search_memory_index --data_type uint8 --dist_fn l2 --filter_label 5 --fail_if_recall_below 70 --index_path_prefix data/index_l2_zipf_uint8_10D_10K_norm50_wlabel --query_file data/rand_uint8_10D_1K_norm50.0.bin --recall_at 10 --result_path temp --gt_file data/l2_zipf_uint8_10D_10K_norm50.0_10D_1K_norm50.0_gt100_wlabel -L 16 32 + dist/bin/search_memory_index --data_type uint8 --dist_fn cosine --filter_label 5 --fail_if_recall_below 70 --index_path_prefix data/index_cosine_zipf_uint8_10D_10K_norm50_wlabel --query_file data/rand_uint8_10D_1K_norm50.0.bin --recall_at 10 --result_path temp --gt_file data/cosine_zipf_uint8_10D_10K_norm50.0_10D_1K_norm50.0_gt100_wlabel -L 16 32 + + echo "Searching without filters" + dist/bin/compute_groundtruth --data_type uint8 --dist_fn l2 --base_file data/rand_uint8_10D_10K_norm50.0.bin --query_file data/rand_uint8_10D_1K_norm50.0.bin --gt_file data/l2_zipf_uint8_10D_10K_norm50.0_10D_1K_norm50.0_gt100 --K 100 + dist/bin/compute_groundtruth --data_type uint8 --dist_fn cosine --base_file data/rand_uint8_10D_10K_norm50.0.bin --query_file data/rand_uint8_10D_1K_norm50.0.bin --gt_file data/cosine_zipf_uint8_10D_10K_norm50.0_10D_1K_norm50.0_gt100 --K 100 + dist/bin/search_memory_index --data_type uint8 --dist_fn l2 --fail_if_recall_below 70 --index_path_prefix data/index_l2_zipf_uint8_10D_10K_norm50_wlabel --query_file data/rand_uint8_10D_1K_norm50.0.bin --recall_at 10 --result_path temp --gt_file data/l2_zipf_uint8_10D_10K_norm50.0_10D_1K_norm50.0_gt100 -L 32 64 + dist/bin/search_memory_index --data_type uint8 --dist_fn cosine --fail_if_recall_below 70 --index_path_prefix data/index_cosine_zipf_uint8_10D_10K_norm50_wlabel --query_file data/rand_uint8_10D_1K_norm50.0.bin --recall_at 10 --result_path temp --gt_file data/cosine_zipf_uint8_10D_10K_norm50.0_10D_1K_norm50.0_gt100 -L 32 64 + + - name: build and search disk index with labels using L2 and Cosine metrics (zipf distributed labels) + if: success() || failure() + run: | + dist/bin/build_disk_index --data_type uint8 --dist_fn l2 --universal_label 0 --FilteredLbuild 90 --data_path data/rand_uint8_10D_10K_norm50.0.bin --label_file data/zipf_labels_50_10K.txt --index_path_prefix data/disk_index_l2_zipf_uint8_10D_10K_norm50_wlabel -R 32 -L 5 -B 0.00003 -M 1 + dist/bin/search_disk_index --data_type uint8 --dist_fn l2 --filter_label 5 --fail_if_recall_below 50 --index_path_prefix data/disk_index_l2_zipf_uint8_10D_10K_norm50_wlabel --result_path temp --query_file data/rand_uint8_10D_1K_norm50.0.bin --gt_file data/l2_zipf_uint8_10D_10K_norm50.0_10D_1K_norm50.0_gt100_wlabel --recall_at 5 -L 5 12 -W 2 --num_nodes_to_cache 10 -T 16 + + - name : build and search in-memory and disk index (without universal label, zipf distributed) + if: success() || failure() + run: | + dist/bin/build_memory_index --data_type uint8 --dist_fn l2 --FilteredLbuild 90 --data_path data/rand_uint8_10D_10K_norm50.0.bin --label_file data/zipf_labels_50_10K.txt --index_path_prefix data/index_l2_zipf_uint8_10D_10K_norm50_wlabel_nouniversal + dist/bin/build_disk_index --data_type uint8 --dist_fn l2 --FilteredLbuild 90 --data_path data/rand_uint8_10D_10K_norm50.0.bin --label_file data/zipf_labels_50_10K.txt --index_path_prefix data/disk_index_l2_zipf_uint8_10D_10K_norm50_wlabel_nouniversal -R 32 -L 5 -B 0.00003 -M 1 + dist/bin/search_memory_index --data_type uint8 --dist_fn l2 --filter_label 5 --fail_if_recall_below 70 --index_path_prefix data/index_l2_zipf_uint8_10D_10K_norm50_wlabel_nouniversal --query_file data/rand_uint8_10D_1K_norm50.0.bin --recall_at 10 --result_path temp --gt_file data/l2_zipf_uint8_10D_10K_norm50.0_10D_1K_norm50.0_gt100_wlabel_nouniversal -L 16 32 + dist/bin/search_disk_index --data_type uint8 --dist_fn l2 --filter_label 5 --index_path_prefix data/disk_index_l2_zipf_uint8_10D_10K_norm50_wlabel_nouniversal --result_path temp --query_file data/rand_uint8_10D_1K_norm50.0.bin --gt_file data/l2_zipf_uint8_10D_10K_norm50.0_10D_1K_norm50.0_gt100_wlabel_nouniversal --recall_at 5 -L 5 12 -W 2 --num_nodes_to_cache 10 -T 16 + - name: Generate combined GT for each query with a separate label and search + if: success() || failure() + run: | + dist/bin/build_memory_index --data_type uint8 --dist_fn l2 --FilteredLbuild 90 --universal_label 0 --data_path data/rand_uint8_10D_10K_norm50.0.bin --label_file data/zipf_labels_50_10K.txt --index_path_prefix data/index_l2_zipf_uint8_10D_10K_norm50_wlabel + dist/bin/search_memory_index --data_type uint8 --dist_fn l2 --query_filters_file data/query_labels_1K.txt --fail_if_recall_below 70 --index_path_prefix data/index_l2_zipf_uint8_10D_10K_norm50_wlabel --query_file data/rand_uint8_10D_1K_norm50.0.bin --recall_at 10 --result_path temp --gt_file data/combined_l2_zipf_uint8_10D_10K_norm50.0_10D_1K_norm50.0_gt100_wlabel -L 16 32 + - name: build and search in-memory index with pq_dist of 5 with 10 dimensions + if: success() || failure() + run: | + dist/bin/build_memory_index --data_type uint8 --dist_fn l2 --FilteredLbuild 90 --universal_label 0 --data_path data/rand_uint8_10D_10K_norm50.0.bin --label_file data/rand_labels_50_10K.txt --index_path_prefix data/index_l2_rand_uint8_10D_10K_norm50_wlabel --build_PQ_bytes 5 + dist/bin/search_memory_index --data_type uint8 --dist_fn l2 --filter_label 10 --fail_if_recall_below 70 --index_path_prefix data/index_l2_rand_uint8_10D_10K_norm50_wlabel --query_file data/rand_uint8_10D_1K_norm50.0.bin --recall_at 10 --result_path temp --gt_file data/l2_rand_uint8_10D_10K_norm50.0_10D_1K_norm50.0_gt100_wlabel -L 16 32 + - name: Build and search stitched vamana with random and zipf distributed labels + if: success() || failure() + run: | + dist/bin/build_stitched_index --num_threads 48 --data_type uint8 --data_path data/rand_uint8_10D_10K_norm50.0.bin --label_file data/rand_labels_50_10K.txt -R 32 -L 100 --alpha 1.2 --stitched_R 64 --index_path_prefix data/stit_rand_32_100_64_new --universal_label 0 + dist/bin/build_stitched_index --num_threads 48 --data_type uint8 --data_path data/rand_uint8_10D_10K_norm50.0.bin --label_file data/zipf_labels_50_10K.txt -R 32 -L 100 --alpha 1.2 --stitched_R 64 --index_path_prefix data/stit_zipf_32_100_64_new --universal_label 0 + dist/bin/search_memory_index --num_threads 48 --data_type uint8 --dist_fn l2 --filter_label 10 --index_path_prefix data/stit_rand_32_100_64_new --query_file data/rand_uint8_10D_1K_norm50.0.bin --result_path data/rand_stit_96_10_90_new --gt_file data/l2_rand_uint8_10D_10K_norm50.0_10D_1K_norm50.0_gt100_wlabel -K 10 -L 16 32 150 + dist/bin/search_memory_index --num_threads 48 --data_type uint8 --dist_fn l2 --filter_label 5 --index_path_prefix data/stit_zipf_32_100_64_new --query_file data/rand_uint8_10D_1K_norm50.0.bin --result_path data/zipf_stit_96_10_90_new --gt_file data/l2_zipf_uint8_10D_10K_norm50.0_10D_1K_norm50.0_gt100_wlabel -K 10 -L 16 32 150 + + - name: upload data and bin + if: success() || failure() + uses: actions/upload-artifact@v4 + with: + name: labels-${{matrix.os}} + path: | + ./dist/** + ./data/** diff --git a/packages/leann-backend-diskann/third_party/DiskANN/.github/workflows/multi-sector-disk-pq.yml b/packages/leann-backend-diskann/third_party/DiskANN/.github/workflows/multi-sector-disk-pq.yml new file mode 100644 index 0000000..969467a --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/.github/workflows/multi-sector-disk-pq.yml @@ -0,0 +1,60 @@ +name: Disk With PQ +on: [workflow_call] +jobs: + acceptance-tests-disk-pq: + name: Disk, PQ + strategy: + fail-fast: false + matrix: + os: [ubuntu-latest, windows-2019, windows-latest] + runs-on: ${{matrix.os}} + defaults: + run: + shell: bash + steps: + - name: Checkout repository + if: ${{ runner.os == 'Linux' }} + uses: actions/checkout@v3 + with: + fetch-depth: 1 + - name: Checkout repository + if: ${{ runner.os == 'Windows' }} + uses: actions/checkout@v3 + with: + fetch-depth: 1 + submodules: true + - name: DiskANN Build CLI Applications + uses: ./.github/actions/build + + - name: Generate Data + uses: ./.github/actions/generate-high-dim-random + + - name: build and search disk index (1020D, one shot graph build, L2, no diskPQ) (float) + if: success() || failure() + run: | + dist/bin/build_disk_index --data_type float --dist_fn l2 --data_path data/rand_float_1020D_5K_norm1.0.bin --index_path_prefix data/disk_index_l2_rand_float_1020D_5K_norm1.0_diskfull_oneshot -R 32 -L 500 -B 0.003 -M 1 + dist/bin/search_disk_index --data_type float --dist_fn l2 --fail_if_recall_below 70 --index_path_prefix data/disk_index_l2_rand_float_1020D_5K_norm1.0_diskfull_oneshot --result_path /tmp/res --query_file data/rand_float_1020D_1K_norm1.0.bin --gt_file data/l2_rand_float_1020D_5K_norm1.0_1020D_1K_norm1.0_gt100 --recall_at 5 -L 250 -W 2 --num_nodes_to_cache 100 -T 16 + #- name: build and search disk index (1024D, one shot graph build, L2, no diskPQ) (float) + # if: success() || failure() + # run: | + # dist/bin/build_disk_index --data_type float --dist_fn l2 --data_path data/rand_float_1024D_5K_norm1.0.bin --index_path_prefix data/disk_index_l2_rand_float_1024D_5K_norm1.0_diskfull_oneshot -R 32 -L 500 -B 0.003 -M 1 + # dist/bin/search_disk_index --data_type float --dist_fn l2 --fail_if_recall_below 70 --index_path_prefix data/disk_index_l2_rand_float_1024D_5K_norm1.0_diskfull_oneshot --result_path /tmp/res --query_file data/rand_float_1024D_1K_norm1.0.bin --gt_file data/l2_rand_float_1024D_5K_norm1.0_1024D_1K_norm1.0_gt100 --recall_at 5 -L 250 -W 2 --num_nodes_to_cache 100 -T 16 + - name: build and search disk index (1536D, one shot graph build, L2, no diskPQ) (float) + if: success() || failure() + run: | + dist/bin/build_disk_index --data_type float --dist_fn l2 --data_path data/rand_float_1536D_5K_norm1.0.bin --index_path_prefix data/disk_index_l2_rand_float_1536D_5K_norm1.0_diskfull_oneshot -R 32 -L 500 -B 0.003 -M 1 + dist/bin/search_disk_index --data_type float --dist_fn l2 --fail_if_recall_below 70 --index_path_prefix data/disk_index_l2_rand_float_1536D_5K_norm1.0_diskfull_oneshot --result_path /tmp/res --query_file data/rand_float_1536D_1K_norm1.0.bin --gt_file data/l2_rand_float_1536D_5K_norm1.0_1536D_1K_norm1.0_gt100 --recall_at 5 -L 250 -W 2 --num_nodes_to_cache 100 -T 16 + + - name: build and search disk index (4096D, one shot graph build, L2, no diskPQ) (int8) + if: success() || failure() + run: | + dist/bin/build_disk_index --data_type int8 --dist_fn l2 --data_path data/rand_int8_4096D_5K_norm1.0.bin --index_path_prefix data/disk_index_l2_rand_int8_4096D_5K_norm1.0_diskfull_oneshot -R 32 -L 500 -B 0.003 -M 1 + dist/bin/search_disk_index --data_type int8 --dist_fn l2 --fail_if_recall_below 70 --index_path_prefix data/disk_index_l2_rand_int8_4096D_5K_norm1.0_diskfull_oneshot --result_path /tmp/res --query_file data/rand_int8_4096D_1K_norm1.0.bin --gt_file data/l2_rand_int8_4096D_5K_norm1.0_4096D_1K_norm1.0_gt100 --recall_at 5 -L 250 -W 2 --num_nodes_to_cache 100 -T 16 + + - name: upload data and bin + uses: actions/upload-artifact@v4 + with: + name: multi-sector-disk-pq-${{matrix.os}} + path: | + ./dist/** + ./data/** diff --git a/packages/leann-backend-diskann/third_party/DiskANN/.github/workflows/perf.yml b/packages/leann-backend-diskann/third_party/DiskANN/.github/workflows/perf.yml new file mode 100644 index 0000000..d4eb9e2 --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/.github/workflows/perf.yml @@ -0,0 +1,26 @@ +name: DiskANN Nightly Performance Metrics +on: + schedule: + - cron: "41 14 * * *" # 14:41 UTC, 7:41 PDT, 8:41 PST, 08:11 IST +jobs: + perf-test: + name: Run Perf Test from main + runs-on: ubuntu-latest + steps: + - name: Checkout repository + uses: actions/checkout@v3 + with: + fetch-depth: 1 + - name: Build Perf Container + run: | + docker build --build-arg GIT_COMMIT_ISH="$GITHUB_SHA" -t perf -f scripts/perf/Dockerfile scripts + - name: Performance Tests + run: | + mkdir metrics + docker run -v ./metrics:/app/logs perf &> ./metrics/combined_stdouterr.log + - name: Upload Metrics Logs + uses: actions/upload-artifact@v4 + with: + name: metrics-${{matrix.os}} + path: | + ./metrics/** diff --git a/packages/leann-backend-diskann/third_party/DiskANN/.github/workflows/pr-test.yml b/packages/leann-backend-diskann/third_party/DiskANN/.github/workflows/pr-test.yml new file mode 100644 index 0000000..f84953b --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/.github/workflows/pr-test.yml @@ -0,0 +1,35 @@ +name: DiskANN Pull Request Build and Test +on: [pull_request] +jobs: + common: + strategy: + fail-fast: true + name: DiskANN Common Build Checks + uses: ./.github/workflows/common.yml + unit-tests: + name: Unit tests + uses: ./.github/workflows/unit-tests.yml + in-mem-pq: + name: In-Memory with PQ + uses: ./.github/workflows/in-mem-pq.yml + in-mem-no-pq: + name: In-Memory without PQ + uses: ./.github/workflows/in-mem-no-pq.yml + disk-pq: + name: Disk with PQ + uses: ./.github/workflows/disk-pq.yml + multi-sector-disk-pq: + name: Multi-sector Disk with PQ + uses: ./.github/workflows/multi-sector-disk-pq.yml + labels: + name: Labels + uses: ./.github/workflows/labels.yml + dynamic: + name: Dynamic + uses: ./.github/workflows/dynamic.yml + dynamic-labels: + name: Dynamic Labels + uses: ./.github/workflows/dynamic-labels.yml + python: + name: Python + uses: ./.github/workflows/build-python.yml diff --git a/packages/leann-backend-diskann/third_party/DiskANN/.github/workflows/push-test.yml b/packages/leann-backend-diskann/third_party/DiskANN/.github/workflows/push-test.yml new file mode 100644 index 0000000..d1261d5 --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/.github/workflows/push-test.yml @@ -0,0 +1,50 @@ +name: DiskANN Push Build +on: [push] +jobs: + common: + strategy: + fail-fast: true + name: DiskANN Common Build Checks + uses: ./.github/workflows/common.yml + build-documentation: + permissions: + contents: write + strategy: + fail-fast: true + name: DiskANN Build Documentation + uses: ./.github/workflows/build-python-pdoc.yml + build: + strategy: + fail-fast: false + matrix: + os: [ ubuntu-latest, windows-2019, windows-latest ] + name: Build for ${{matrix.os}} + runs-on: ${{matrix.os}} + defaults: + run: + shell: bash + steps: + - name: Checkout repository + if: ${{ runner.os == 'Linux' }} + uses: actions/checkout@v3 + with: + fetch-depth: 1 + - name: Checkout repository + if: ${{ runner.os == 'Windows' }} + uses: actions/checkout@v3 + with: + fetch-depth: 1 + submodules: true + - name: Build diskannpy dependency tree + run: | + pip install diskannpy pipdeptree + echo "dependencies" > dependencies_${{ matrix.os }}.txt + pipdeptree >> dependencies_${{ matrix.os }}.txt + - name: Archive diskannpy dependencies artifact + uses: actions/upload-artifact@v4 + with: + name: dependencies_${{ matrix.os }} + path: | + dependencies_${{ matrix.os }}.txt + - name: DiskANN Build CLI Applications + uses: ./.github/actions/build diff --git a/packages/leann-backend-diskann/third_party/DiskANN/.github/workflows/python-release.yml b/packages/leann-backend-diskann/third_party/DiskANN/.github/workflows/python-release.yml new file mode 100644 index 0000000..a15d4d1 --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/.github/workflows/python-release.yml @@ -0,0 +1,43 @@ +name: Build and Release Python Wheels +on: + release: + types: [published] +jobs: + python-release-wheels: + name: Python + uses: ./.github/workflows/build-python.yml + build-documentation: + strategy: + fail-fast: true + name: DiskANN Build Documentation + uses: ./.github/workflows/build-python-pdoc.yml + release: + permissions: + contents: write + runs-on: ubuntu-latest + needs: python-release-wheels + steps: + - uses: actions/download-artifact@v3 + with: + name: wheels + path: dist/ + - name: Generate SHA256 files for each wheel + run: | + sha256sum dist/*.whl > checksums.txt + cat checksums.txt + - uses: actions/setup-python@v3 + - name: Install twine + run: python -m pip install twine + - name: Publish with twine + env: + TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }} + TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }} + run: | + twine upload dist/*.whl + - name: Update release with SHA256 and Artifacts + uses: softprops/action-gh-release@v1 + with: + token: ${{ secrets.GITHUB_TOKEN }} + files: | + dist/*.whl + checksums.txt diff --git a/packages/leann-backend-diskann/third_party/DiskANN/.github/workflows/unit-tests.yml b/packages/leann-backend-diskann/third_party/DiskANN/.github/workflows/unit-tests.yml new file mode 100644 index 0000000..6ae6877 --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/.github/workflows/unit-tests.yml @@ -0,0 +1,32 @@ +name: Unit Tests +on: [workflow_call] +jobs: + acceptance-tests-labels: + name: Unit Tests + strategy: + fail-fast: false + matrix: + os: [ubuntu-latest, windows-2019, windows-latest] + runs-on: ${{matrix.os}} + defaults: + run: + shell: bash + steps: + - name: Checkout repository + if: ${{ runner.os == 'Linux' }} + uses: actions/checkout@v3 + with: + fetch-depth: 1 + - name: Checkout repository + if: ${{ runner.os == 'Windows' }} + uses: actions/checkout@v3 + with: + fetch-depth: 1 + submodules: true + - name: DiskANN Build CLI Applications + uses: ./.github/actions/build + + - name: Run Unit Tests + run: | + cd build + ctest -C Release \ No newline at end of file diff --git a/packages/leann-backend-diskann/third_party/DiskANN/.gitignore b/packages/leann-backend-diskann/third_party/DiskANN/.gitignore new file mode 100644 index 0000000..c6a88e7 --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/.gitignore @@ -0,0 +1,384 @@ +## Ignore Visual Studio temporary files, build results, and +## files generated by popular Visual Studio add-ons. +## +## Get latest from https://github.com/github/gitignore/blob/master/VisualStudio.gitignore + +# User-specific files +*.rsuser +*.suo +*.user +*.userosscache +*.sln.docstates + +# User-specific files (MonoDevelop/Xamarin Studio) +*.userprefs + +# Mono auto generated files +mono_crash.* + +# Build results +[Dd]ebug/ +[Dd]ebugPublic/ +[Rr]elease/ +[Rr]eleases/ +x64/ +x86/ +[Aa][Rr][Mm]/ +[Aa][Rr][Mm]64/ +bld/ +[Bb]in/ +[Oo]bj/ +[Ll]og/ +[Ll]ogs/ + +# Visual Studio 2015/2017 cache/options directory +.vs/ +# Uncomment if you have tasks that create the project's static files in wwwroot +#wwwroot/ + +# Visual Studio 2017 auto generated files +Generated\ Files/ + +# MSTest test Results +[Tt]est[Rr]esult*/ +[Bb]uild[Ll]og.* + +# NUnit +*.VisualState.xml +TestResult.xml +nunit-*.xml + +# Build Results of an ATL Project +[Dd]ebugPS/ +[Rr]eleasePS/ +dlldata.c + +# Benchmark Results +BenchmarkDotNet.Artifacts/ + +# .NET Core +project.lock.json +project.fragment.lock.json +artifacts/ + +# StyleCop +StyleCopReport.xml + +# Files built by Visual Studio +*_i.c +*_p.c +*_h.h +*.ilk +*.meta +*.obj +*.iobj +*.pch +*.pdb +*.ipdb +*.pgc +*.pgd +*.rsp +*.sbr +*.tlb +*.tli +*.tlh +*.tmp +*.tmp_proj +*_wpftmp.csproj +*.log +*.vspscc +*.vssscc +.builds +*.pidb +*.svclog +*.scc + +# Chutzpah Test files +_Chutzpah* + +# Visual C++ cache files +ipch/ +*.aps +*.ncb +*.opendb +*.opensdf +*.sdf +*.cachefile +*.VC.db +*.VC.VC.opendb + +# Visual Studio profiler +*.psess +*.vsp +*.vspx +*.sap + +# Visual Studio Trace Files +*.e2e + +# TFS 2012 Local Workspace +$tf/ + +# Guidance Automation Toolkit +*.gpState + +# ReSharper is a .NET coding add-in +_ReSharper*/ +*.[Rr]e[Ss]harper +*.DotSettings.user + +# TeamCity is a build add-in +_TeamCity* + +# DotCover is a Code Coverage Tool +*.dotCover + +# AxoCover is a Code Coverage Tool +.axoCover/* +!.axoCover/settings.json + +# Visual Studio code coverage results +*.coverage +*.coveragexml + +# NCrunch +_NCrunch_* +.*crunch*.local.xml +nCrunchTemp_* + +# MightyMoose +*.mm.* +AutoTest.Net/ + +# Web workbench (sass) +.sass-cache/ + +# Installshield output folder +[Ee]xpress/ + +# DocProject is a documentation generator add-in +DocProject/buildhelp/ +DocProject/Help/*.HxT +DocProject/Help/*.HxC +DocProject/Help/*.hhc +DocProject/Help/*.hhk +DocProject/Help/*.hhp +DocProject/Help/Html2 +DocProject/Help/html + +# Click-Once directory +publish/ + +# Publish Web Output +*.[Pp]ublish.xml +*.azurePubxml +# Note: Comment the next line if you want to checkin your web deploy settings, +# but database connection strings (with potential passwords) will be unencrypted +*.pubxml +*.publishproj + +# Microsoft Azure Web App publish settings. Comment the next line if you want to +# checkin your Azure Web App publish settings, but sensitive information contained +# in these scripts will be unencrypted +PublishScripts/ + +# NuGet Packages +*.nupkg +# NuGet Symbol Packages +*.snupkg +# The packages folder can be ignored because of Package Restore +**/[Pp]ackages/* +# except build/, which is used as an MSBuild target. +!**/[Pp]ackages/build/ +# Uncomment if necessary however generally it will be regenerated when needed +#!**/[Pp]ackages/repositories.config +# NuGet v3's project.json files produces more ignorable files +*.nuget.props +*.nuget.targets + +# Microsoft Azure Build Output +csx/ +*.build.csdef + +# Microsoft Azure Emulator +ecf/ +rcf/ + +# Windows Store app package directories and files +AppPackages/ +BundleArtifacts/ +Package.StoreAssociation.xml +_pkginfo.txt +*.appx +*.appxbundle +*.appxupload + +# Visual Studio cache files +# files ending in .cache can be ignored +*.[Cc]ache +# but keep track of directories ending in .cache +!?*.[Cc]ache/ + +# Others +ClientBin/ +~$* +*~ +*.dbmdl +*.dbproj.schemaview +*.jfm +*.pfx +*.publishsettings +orleans.codegen.cs + +# Including strong name files can present a security risk +# (https://github.com/github/gitignore/pull/2483#issue-259490424) +#*.snk + +# Since there are multiple workflows, uncomment next line to ignore bower_components +# (https://github.com/github/gitignore/pull/1529#issuecomment-104372622) +#bower_components/ + +# RIA/Silverlight projects +Generated_Code/ + +# Backup & report files from converting an old project file +# to a newer Visual Studio version. Backup files are not needed, +# because we have git ;-) +_UpgradeReport_Files/ +Backup*/ +UpgradeLog*.XML +UpgradeLog*.htm +ServiceFabricBackup/ +*.rptproj.bak + +# SQL Server files +*.mdf +*.ldf +*.ndf + +# Business Intelligence projects +*.rdl.data +*.bim.layout +*.bim_*.settings +*.rptproj.rsuser +*- [Bb]ackup.rdl +*- [Bb]ackup ([0-9]).rdl +*- [Bb]ackup ([0-9][0-9]).rdl + +# Microsoft Fakes +FakesAssemblies/ + +# GhostDoc plugin setting file +*.GhostDoc.xml + +# Node.js Tools for Visual Studio +.ntvs_analysis.dat +node_modules/ + +# Visual Studio 6 build log +*.plg + +# Visual Studio 6 workspace options file +*.opt + +# Visual Studio 6 auto-generated workspace file (contains which files were open etc.) +*.vbw + +# Visual Studio LightSwitch build output +**/*.HTMLClient/GeneratedArtifacts +**/*.DesktopClient/GeneratedArtifacts +**/*.DesktopClient/ModelManifest.xml +**/*.Server/GeneratedArtifacts +**/*.Server/ModelManifest.xml +_Pvt_Extensions + +# Paket dependency manager +.paket/paket.exe +paket-files/ + +# FAKE - F# Make +.fake/ + +# CodeRush personal settings +.cr/personal + +# Python Tools for Visual Studio (PTVS) +__pycache__/ +*.pyc + +# Cake - Uncomment if you are using it +# tools/** +# !tools/packages.config + +# Tabs Studio +*.tss + +# Telerik's JustMock configuration file +*.jmconfig + +# BizTalk build output +*.btp.cs +*.btm.cs +*.odx.cs +*.xsd.cs + +# OpenCover UI analysis results +OpenCover/ + +# Azure Stream Analytics local run output +ASALocalRun/ + +# MSBuild Binary and Structured Log +*.binlog + +# NVidia Nsight GPU debugger configuration file +*.nvuser + +# MFractors (Xamarin productivity tool) working folder +.mfractor/ + +# Local History for Visual Studio +.localhistory/ + +# BeatPulse healthcheck temp database +healthchecksdb + +# Backup folder for Package Reference Convert tool in Visual Studio 2017 +MigrationBackup/ + +# Ionide (cross platform F# VS Code tools) working folder +.ionide/ + +/vcproj/nsg/x64/Debug/nsg.Build.CppClean.log +/vcproj/test_recall/x64/Debug/test_recall.Build.CppClean.log +/vcproj/test_recall/test_recall.vcxproj.user +/.vs +/out/build/x64-Debug +cscope* + +build/ +build_linux/ +!.github/actions/build + +# jetbrains specific stuff +.idea/ +cmake-build-debug/ + +#python extension module ignores +python/diskannpy.egg-info/ +python/dist/ + +**/*.egg-info +wheelhouse/* +dist/* +venv*/** +*.swp + +gperftools + +# Rust +rust/target + +python/src/*.so + +compile_commands.json \ No newline at end of file diff --git a/packages/leann-backend-diskann/third_party/DiskANN/.gitmodules b/packages/leann-backend-diskann/third_party/DiskANN/.gitmodules new file mode 100644 index 0000000..125572b --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/.gitmodules @@ -0,0 +1,3 @@ +[submodule "gperftools"] + path = gperftools + url = https://github.com/gperftools/gperftools.git diff --git a/packages/leann-backend-diskann/third_party/DiskANN/CMakeLists.txt b/packages/leann-backend-diskann/third_party/DiskANN/CMakeLists.txt new file mode 100644 index 0000000..4025861 --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/CMakeLists.txt @@ -0,0 +1,563 @@ +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT license. + +# Parameters: +# +# BOOST_ROOT: +# Specify root of the Boost library if Boost cannot be auto-detected. On Windows, a fallback to a +# downloaded nuget version will be used if Boost cannot be found. +# +# DISKANN_RELEASE_UNUSED_TCMALLOC_MEMORY_AT_CHECKPOINTS: +# This is a work-in-progress feature, not completed yet. The core DiskANN library will be split into +# build-related and search-related functionality. In build-related functionality, when using tcmalloc, +# it's possible to release memory that's free but reserved by tcmalloc. Setting this to true enables +# such behavior. +# Contact for this feature: gopalrs. + + +# Some variables like MSVC are defined only after project(), so put that first. +cmake_minimum_required(VERSION 3.20) +project(diskann) + +#Set option to use tcmalloc +option(USE_TCMALLOC "Use tcmalloc from gperftools" ON) + +# set tcmalloc to false when on macos +if(APPLE) + set(USE_TCMALLOC OFF) +endif() + +option(PYBIND "Build with Python bindings" ON) + +if(PYBIND) + # Find Python + find_package(Python 3.6 COMPONENTS Interpreter Development REQUIRED) + execute_process( + COMMAND "${Python_EXECUTABLE}" -c "import pybind11; print(pybind11.get_cmake_dir())" + OUTPUT_VARIABLE pybind11_DIR + OUTPUT_STRIP_TRAILING_WHITESPACE + ) + find_package(pybind11 CONFIG REQUIRED) + + message(STATUS "Python include dirs: ${Python_INCLUDE_DIRS}") + message(STATUS "Pybind11 include dirs: ${pybind11_INCLUDE_DIRS}") + + # Add pybind11 include directories + include_directories(SYSTEM ${pybind11_INCLUDE_DIRS} ${Python_INCLUDE_DIRS}) + + # Add compilation definitions + add_definitions(-DPYBIND11_EMBEDDED) + + # Set visibility flags + if(NOT MSVC) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fvisibility=hidden") + endif() +endif() + +set(CMAKE_STANDARD 17) +set(CMAKE_CXX_STANDARD 17) +set(CMAKE_CXX_STANDARD_REQUIRED ON) + +# if(NOT MSVC) +# set(CMAKE_CXX_COMPILER g++) +# endif() + +set(CMAKE_MODULE_PATH "${PROJECT_SOURCE_DIR}/cmake;${CMAKE_MODULE_PATH}") + +# Install nuget packages for dependencies. +if (MSVC) + find_program(NUGET_EXE NAMES nuget) + + if (NOT NUGET_EXE) + message(FATAL_ERROR "Cannot find nuget command line tool.\nPlease install it from e.g. https://www.nuget.org/downloads") + endif() + + set(DISKANN_MSVC_PACKAGES_CONFIG ${CMAKE_BINARY_DIR}/packages.config) + set(DISKANN_MSVC_PACKAGES ${CMAKE_BINARY_DIR}/packages) + + message(STATUS "Invoking nuget to download Boost, OpenMP and MKL dependencies...") + configure_file(${PROJECT_SOURCE_DIR}/windows/packages.config.in ${DISKANN_MSVC_PACKAGES_CONFIG}) + exec_program(${NUGET_EXE} ARGS install \"${DISKANN_MSVC_PACKAGES_CONFIG}\" -ExcludeVersion -OutputDirectory \"${DISKANN_MSVC_PACKAGES}\") + if (RESTAPI) + set(DISKANN_MSVC_RESTAPI_PACKAGES_CONFIG ${CMAKE_BINARY_DIR}/restapi/packages.config) + configure_file(${PROJECT_SOURCE_DIR}/windows/packages_restapi.config.in ${DISKANN_MSVC_RESTAPI_PACKAGES_CONFIG}) + exec_program(${NUGET_EXE} ARGS install \"${DISKANN_MSVC_RESTAPI_PACKAGES_CONFIG}\" -ExcludeVersion -OutputDirectory \"${DISKANN_MSVC_PACKAGES}\") + endif() + message(STATUS "Finished setting up nuget dependencies") +endif() + +include_directories(${PROJECT_SOURCE_DIR}/include) + +include(FetchContent) + +if(USE_TCMALLOC) + FetchContent_Declare( + tcmalloc + GIT_REPOSITORY https://github.com/google/tcmalloc.git + GIT_TAG origin/master # or specify a particular version or commit + ) + + FetchContent_MakeAvailable(tcmalloc) +endif() + +if(NOT PYBIND) + set(DISKANN_RELEASE_UNUSED_TCMALLOC_MEMORY_AT_CHECKPOINTS ON) +endif() +# It's necessary to include tcmalloc headers only if calling into MallocExtension interface. +# For using tcmalloc in DiskANN tools, it's enough to just link with tcmalloc. +if (DISKANN_RELEASE_UNUSED_TCMALLOC_MEMORY_AT_CHECKPOINTS) + include_directories(${tcmalloc_SOURCE_DIR}/src) + if (MSVC) + include_directories(${tcmalloc_SOURCE_DIR}/src/windows) + endif() +endif() + +#OpenMP +if (MSVC) + # Do not use find_package here since it would use VisualStudio's built-in OpenMP, but MKL libraries + # refer to Intel's OpenMP. + # + # No extra settings are needed for compilation: it only needs /openmp flag which is set further below, + # in the common MSVC compiler options block. + include_directories(BEFORE "${DISKANN_MSVC_PACKAGES}/intelopenmp.devel.win/lib/native/include") + link_libraries("${DISKANN_MSVC_PACKAGES}/intelopenmp.devel.win/lib/native/win-x64/libiomp5md.lib") + + set(OPENMP_WINDOWS_RUNTIME_FILES + "${DISKANN_MSVC_PACKAGES}/intelopenmp.redist.win/runtimes/win-x64/native/libiomp5md.dll" + "${DISKANN_MSVC_PACKAGES}/intelopenmp.redist.win/runtimes/win-x64/native/libiomp5md.pdb") +elseif(APPLE) + # Check if we're building Python bindings + if(PYBIND) + # First look for PyTorch's OpenMP to avoid conflicts + execute_process( + COMMAND ${Python_EXECUTABLE} -c "import os; import torch; print(os.path.join(os.path.dirname(torch.__file__), 'lib', 'libomp.dylib'))" + RESULT_VARIABLE TORCH_PATH_RESULT + OUTPUT_VARIABLE TORCH_LIBOMP_PATH + OUTPUT_STRIP_TRAILING_WHITESPACE + ERROR_QUIET + ) + + execute_process( + COMMAND brew --prefix libomp + OUTPUT_VARIABLE LIBOMP_ROOT + OUTPUT_STRIP_TRAILING_WHITESPACE + ) + + if(EXISTS "${TORCH_LIBOMP_PATH}") + message(STATUS "Found PyTorch's libomp: ${TORCH_LIBOMP_PATH}") + set(OpenMP_CXX_FLAGS "-Xclang -fopenmp") + set(OpenMP_C_FLAGS "-Xclang -fopenmp") + set(OpenMP_CXX_LIBRARIES "${TORCH_LIBOMP_PATH}") + set(OpenMP_C_LIBRARIES "${TORCH_LIBOMP_PATH}") + set(OpenMP_FOUND TRUE) + + include_directories(${LIBOMP_ROOT}/include) + + # Set compiler flags and link libraries + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}") + link_libraries("${TORCH_LIBOMP_PATH}") + else() + message(STATUS "No PyTorch's libomp found, falling back to normal OpenMP detection") + # Fallback to normal OpenMP detection + execute_process( + COMMAND brew --prefix libomp + OUTPUT_VARIABLE LIBOMP_ROOT + OUTPUT_STRIP_TRAILING_WHITESPACE + ) + + set(OpenMP_ROOT "${LIBOMP_ROOT}") + find_package(OpenMP) + + if (OPENMP_FOUND) + set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}") + set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}") + link_libraries(OpenMP::OpenMP_CXX) + else() + message(FATAL_ERROR "No OpenMP support") + endif() + endif() + else() + # Regular OpenMP setup for non-Python builds + execute_process( + COMMAND brew --prefix libomp + OUTPUT_VARIABLE LIBOMP_ROOT + OUTPUT_STRIP_TRAILING_WHITESPACE + ) + set(OpenMP_ROOT "${LIBOMP_ROOT}") + find_package(OpenMP) + + if (OPENMP_FOUND) + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}") + link_libraries(OpenMP::OpenMP_CXX) + else() + message(FATAL_ERROR "No OpenMP support") + endif() + endif() +else() + find_package(OpenMP) + + if (OPENMP_FOUND) + set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}") + set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}") + else() + message(FATAL_ERROR "No OpenMP support") + endif() +endif() + +# DiskANN core uses header-only libraries. Only DiskANN tools need program_options which has a linker library, +# but its size is small. Reduce number of dependent DLLs by linking statically. +if (MSVC) + set(Boost_USE_STATIC_LIBS ON) +endif() + +if(NOT MSVC) + find_package(Boost COMPONENTS program_options) +endif() + +# For Windows, fall back to nuget version if find_package didn't find it. +if (MSVC AND NOT Boost_FOUND) + set(DISKANN_BOOST_INCLUDE "${DISKANN_MSVC_PACKAGES}/boost/lib/native/include") + # Multi-threaded static library. + set(PROGRAM_OPTIONS_LIB_PATTERN "${DISKANN_MSVC_PACKAGES}/boost_program_options-vc${MSVC_TOOLSET_VERSION}/lib/native/libboost_program_options-vc${MSVC_TOOLSET_VERSION}-mt-x64-*.lib") + file(GLOB DISKANN_BOOST_PROGRAM_OPTIONS_LIB ${PROGRAM_OPTIONS_LIB_PATTERN}) + + set(PROGRAM_OPTIONS_DLIB_PATTERN "${DISKANN_MSVC_PACKAGES}/boost_program_options-vc${MSVC_TOOLSET_VERSION}/lib/native/libboost_program_options-vc${MSVC_TOOLSET_VERSION}-mt-gd-x64-*.lib") + file(GLOB DISKANN_BOOST_PROGRAM_OPTIONS_DLIB ${PROGRAM_OPTIONS_DLIB_PATTERN}) + + if (EXISTS ${DISKANN_BOOST_INCLUDE} AND EXISTS ${DISKANN_BOOST_PROGRAM_OPTIONS_LIB} AND EXISTS ${DISKANN_BOOST_PROGRAM_OPTIONS_DLIB}) + set(Boost_FOUND ON) + set(Boost_INCLUDE_DIR ${DISKANN_BOOST_INCLUDE}) + add_library(Boost::program_options STATIC IMPORTED) + set_target_properties(Boost::program_options PROPERTIES IMPORTED_LOCATION_RELEASE "${DISKANN_BOOST_PROGRAM_OPTIONS_LIB}") + set_target_properties(Boost::program_options PROPERTIES IMPORTED_LOCATION_DEBUG "${DISKANN_BOOST_PROGRAM_OPTIONS_DLIB}") + message(STATUS "Falling back to using Boost from the nuget package") + else() + message(WARNING "Couldn't find Boost. Was looking for ${DISKANN_BOOST_INCLUDE} and ${PROGRAM_OPTIONS_LIB_PATTERN}") + endif() +endif() + +if (NOT Boost_FOUND) + message(FATAL_ERROR "Couldn't find Boost dependency") +endif() + +include_directories(${Boost_INCLUDE_DIR}) + +#MKL Config +if (MSVC) + # Only the DiskANN DLL and one of the tools need MKL libraries. Additionally, only a small part of MKL is used. + # Given that and given that MKL DLLs are huge, use static linking to end up with no MKL DLL dependencies and with + # significantly smaller disk footprint. + # + # The compile options are not modified as there's already an unconditional -DMKL_ILP64 define below + # for all architectures, which is all that's needed. + set(DISKANN_MKL_INCLUDE_DIRECTORIES "${DISKANN_MSVC_PACKAGES}/intelmkl.static.win-x64/lib/native/include") + set(DISKANN_MKL_LIB_PATH "${DISKANN_MSVC_PACKAGES}/intelmkl.static.win-x64/lib/native/win-x64") + + set(DISKANN_MKL_LINK_LIBRARIES + "${DISKANN_MKL_LIB_PATH}/mkl_intel_ilp64.lib" + "${DISKANN_MKL_LIB_PATH}/mkl_core.lib" + "${DISKANN_MKL_LIB_PATH}/mkl_intel_thread.lib") +elseif(APPLE) + # no mkl on non-intel devices + find_library(ACCELERATE_LIBRARY Accelerate) + message(STATUS "Found Accelerate (${ACCELERATE_LIBRARY})") + set(DISKANN_ACCEL_LINK_OPTIONS ${ACCELERATE_LIBRARY}) + add_compile_definitions(ACCELERATE_NEW_LAPACK) +else() + # expected path for manual intel mkl installs + set(POSSIBLE_OMP_PATHS "/opt/intel/oneapi/compiler/2025.0/lib/libiomp5.so;/opt/intel/oneapi/compiler/latest/linux/compiler/lib/intel64_lin/libiomp5.so;/usr/lib/x86_64-linux-gnu/libiomp5.so;/opt/intel/lib/intel64_lin/libiomp5.so") + foreach(POSSIBLE_OMP_PATH ${POSSIBLE_OMP_PATHS}) + if (EXISTS ${POSSIBLE_OMP_PATH}) + get_filename_component(OMP_PATH ${POSSIBLE_OMP_PATH} DIRECTORY) + endif() + endforeach() + + if(NOT OMP_PATH) + message(FATAL_ERROR "Could not find Intel OMP in standard locations; use -DOMP_PATH to specify the install location for your environment") + endif() + link_directories(${OMP_PATH}) + + set(POSSIBLE_MKL_LIB_PATHS "/opt/intel/oneapi/mkl/latest/lib/intel64/libmkl_core.so;/usr/lib/x86_64-linux-gnu/libmkl_core.so;/opt/intel/mkl/lib/intel64/libmkl_core.so") + foreach(POSSIBLE_MKL_LIB_PATH ${POSSIBLE_MKL_LIB_PATHS}) + if (EXISTS ${POSSIBLE_MKL_LIB_PATH}) + get_filename_component(MKL_PATH ${POSSIBLE_MKL_LIB_PATH} DIRECTORY) + endif() + endforeach() + + set(POSSIBLE_MKL_INCLUDE_PATHS "/opt/intel/oneapi/mkl/latest/include;/usr/include/mkl;/opt/intel/mkl/include/;") + foreach(POSSIBLE_MKL_INCLUDE_PATH ${POSSIBLE_MKL_INCLUDE_PATHS}) + if (EXISTS ${POSSIBLE_MKL_INCLUDE_PATH}) + set(MKL_INCLUDE_PATH ${POSSIBLE_MKL_INCLUDE_PATH}) + endif() + endforeach() + if(NOT MKL_PATH) + message(FATAL_ERROR "Could not find Intel MKL in standard locations; use -DMKL_PATH to specify the install location for your environment") + elseif(NOT MKL_INCLUDE_PATH) + message(FATAL_ERROR "Could not find Intel MKL in standard locations; use -DMKL_INCLUDE_PATH to specify the install location for headers for your environment") + endif() + if (EXISTS ${MKL_PATH}/libmkl_def.so.2) + set(MKL_DEF_SO ${MKL_PATH}/libmkl_def.so.2) + elseif(EXISTS ${MKL_PATH}/libmkl_def.so) + set(MKL_DEF_SO ${MKL_PATH}/libmkl_def.so) + else() + message(FATAL_ERROR "Despite finding MKL, libmkl_def.so was not found in expected locations.") + endif() + link_directories(${MKL_PATH}) + include_directories(${MKL_INCLUDE_PATH}) + + # compile flags and link libraries + # if gcc/g++ + if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU") + add_compile_options(-m64 -Wl,--no-as-needed) + endif() + if (NOT PYBIND) + link_libraries(mkl_intel_ilp64 mkl_intel_thread mkl_core iomp5 pthread m dl) + else() + # static linking for python so as to minimize customer dependency issues + if (CMAKE_BUILD_TYPE STREQUAL "Debug") + # In debug mode, use dynamic linking to ensure all symbols are available + link_libraries(mkl_intel_ilp64 mkl_intel_thread mkl_core ${MKL_DEF_SO} iomp5 pthread m dl) + else() + # In release mode, use static linking to minimize dependencies + link_libraries( + ${MKL_PATH}/libmkl_intel_ilp64.a + ${MKL_PATH}/libmkl_intel_thread.a + ${MKL_PATH}/libmkl_core.a + ${MKL_DEF_SO} + iomp5 + pthread + m + dl + ) + endif() + endif() + + add_definitions(-DMKL_ILP64) +endif() + + +# Section for tcmalloc. The DiskANN tools are always linked to tcmalloc. For Windows, they also need to +# force-include the _tcmalloc symbol for enabling tcmalloc. +# +# The DLL itself needs to be linked to tcmalloc only if DISKANN_RELEASE_UNUSED_TCMALLOC_MEMORY_AT_CHECKPOINTS +# is enabled. +if(USE_TCMALLOC) + if (MSVC) + if (NOT EXISTS "${PROJECT_SOURCE_DIR}/gperftools/gperftools.sln") + message(FATAL_ERROR "The gperftools submodule was not found. " + "Please check-out git submodules by doing 'git submodule init' followed by 'git submodule update'") + endif() + + set(TCMALLOC_LINK_LIBRARY "${PROJECT_SOURCE_DIR}/gperftools/x64/Release-Patch/libtcmalloc_minimal.lib") + set(TCMALLOC_WINDOWS_RUNTIME_FILES + "${PROJECT_SOURCE_DIR}/gperftools/x64/Release-Patch/libtcmalloc_minimal.dll" + "${PROJECT_SOURCE_DIR}/gperftools/x64/Release-Patch/libtcmalloc_minimal.pdb") + + # Tell CMake how to build the tcmalloc linker library from the submodule. + add_custom_target(build_libtcmalloc_minimal DEPENDS ${TCMALLOC_LINK_LIBRARY}) + add_custom_command(OUTPUT ${TCMALLOC_LINK_LIBRARY} + COMMAND ${CMAKE_VS_MSBUILD_COMMAND} gperftools.sln /m /nologo + /t:libtcmalloc_minimal /p:Configuration="Release-Patch" + /property:Platform="x64" + /p:PlatformToolset=v${MSVC_TOOLSET_VERSION} + /p:WindowsTargetPlatformVersion=${CMAKE_VS_WINDOWS_TARGET_PLATFORM_VERSION} + WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}/gperftools) + + add_library(libtcmalloc_minimal_for_exe STATIC IMPORTED) + add_library(libtcmalloc_minimal_for_dll STATIC IMPORTED) + + set_target_properties(libtcmalloc_minimal_for_dll PROPERTIES + IMPORTED_LOCATION "${TCMALLOC_LINK_LIBRARY}") + + set_target_properties(libtcmalloc_minimal_for_exe PROPERTIES + IMPORTED_LOCATION "${TCMALLOC_LINK_LIBRARY}" + INTERFACE_LINK_OPTIONS /INCLUDE:_tcmalloc) + + # Ensure libtcmalloc_minimal is built before it's being used. + add_dependencies(libtcmalloc_minimal_for_dll build_libtcmalloc_minimal) + add_dependencies(libtcmalloc_minimal_for_exe build_libtcmalloc_minimal) + + set(DISKANN_TOOLS_TCMALLOC_LINK_OPTIONS libtcmalloc_minimal_for_exe) + elseif(APPLE) # ! Inherited from #474, not been adjusted for TCMalloc Removal + execute_process( + COMMAND brew --prefix gperftools + OUTPUT_VARIABLE GPERFTOOLS_PREFIX + OUTPUT_STRIP_TRAILING_WHITESPACE + ) + set(DISKANN_TOOLS_TCMALLOC_LINK_OPTIONS "-L${GPERFTOOLS_PREFIX}/lib -ltcmalloc") + elseif(NOT PYBIND) + set(DISKANN_TOOLS_TCMALLOC_LINK_OPTIONS "-ltcmalloc") + endif() + + if (DISKANN_RELEASE_UNUSED_TCMALLOC_MEMORY_AT_CHECKPOINTS) + add_definitions(-DRELEASE_UNUSED_TCMALLOC_MEMORY_AT_CHECKPOINTS) + + if (MSVC) + set(DISKANN_DLL_TCMALLOC_LINK_OPTIONS libtcmalloc_minimal_for_dll) + endif() + endif() +endif() + +if (NOT MSVC AND NOT APPLE) + set(DISKANN_ASYNC_LIB aio) +endif() + +#Main compiler/linker settings +if(MSVC) + #language options + add_compile_options(/permissive- /openmp:experimental /Zc:twoPhase- /Zc:inline /WX- /std:c++17 /Gd /W3 /MP /Zi /FC /nologo) + #code generation options + add_compile_options(/arch:AVX2 /fp:fast /fp:except- /EHsc /GS- /Gy) + #optimization options + add_compile_options(/Ot /Oy /Oi) + #path options + add_definitions(-DUSE_AVX2 -DUSE_ACCELERATED_PQ -D_WINDOWS -DNOMINMAX -DUNICODE) + # Linker options. Exclude VCOMP/VCOMPD.LIB which contain VisualStudio's version of OpenMP. + # MKL was linked against Intel's OpenMP and depends on the corresponding DLL. + add_link_options(/NODEFAULTLIB:VCOMP.LIB /NODEFAULTLIB:VCOMPD.LIB /DEBUG:FULL /OPT:REF /OPT:ICF) + + set(CMAKE_LIBRARY_OUTPUT_DIRECTORY_DEBUG ${PROJECT_SOURCE_DIR}/x64/Debug) + set(CMAKE_RUNTIME_OUTPUT_DIRECTORY_DEBUG ${PROJECT_SOURCE_DIR}/x64/Debug) + set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY_DEBUG ${PROJECT_SOURCE_DIR}/x64/Debug) + + set(CMAKE_LIBRARY_OUTPUT_DIRECTORY_RELEASE ${PROJECT_SOURCE_DIR}/x64/Release) + set(CMAKE_RUNTIME_OUTPUT_DIRECTORY_RELEASE ${PROJECT_SOURCE_DIR}/x64/Release) + set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY_RELEASE ${PROJECT_SOURCE_DIR}/x64/Release) +elseif(APPLE) + set(ENV{TCMALLOC_LARGE_ALLOC_REPORT_THRESHOLD} 500000000000) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -ftree-vectorize -fno-builtin-malloc -fno-builtin-calloc -fno-builtin-realloc -fno-builtin-free -Xclang -fopenmp -fopenmp-simd -funroll-loops -Wfatal-errors -Wno-inconsistent-missing-override -Wno-return-type") + set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -g -DDEBUG") + set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -Ofast -DNDEBUG -ftree-vectorize") + if (NOT PYBIND) + set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -DNDEBUG -Ofast") + if (NOT PORTABLE) + set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -mtune=native") + endif() + else() + # -Ofast is not supported in a python extension module + set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -DNDEBUG -fPIC") + endif() +else() + set(ENV{TCMALLOC_LARGE_ALLOC_REPORT_THRESHOLD} 500000000000) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mavx2 -mfma -msse2 -ftree-vectorize -fopenmp -fopenmp-simd -funroll-loops -Wfatal-errors -DUSE_AVX2 -fPIC") + if(USE_TCMALLOC) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-builtin-malloc -fno-builtin-calloc -fno-builtin-realloc -fno-builtin-free") + endif() + set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -g -DDEBUG") + if (NOT PYBIND) + set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -DNDEBUG -Ofast") + if (NOT PORTABLE) + set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -march=native -mtune=native") + endif() + else() + # -Ofast is not supported in a python extension module + set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -DNDEBUG") + endif() +endif() + +add_subdirectory(src) +if (NOT PYBIND) + add_subdirectory(apps) + add_subdirectory(apps/utils) +endif() + +if (UNIT_TEST) + enable_testing() + add_subdirectory(tests) +endif() + +if (MSVC) + message(STATUS "The ${PROJECT_NAME}.sln has been created, opened it from VisualStudio to build Release or Debug configurations.\n" + "Alternatively, use MSBuild to build:\n\n" + "msbuild.exe ${PROJECT_NAME}.sln /m /nologo /t:Build /p:Configuration=\"Release\" /property:Platform=\"x64\"\n") +endif() + +if (RESTAPI) + if (MSVC) + set(DISKANN_CPPRESTSDK "${DISKANN_MSVC_PACKAGES}/cpprestsdk.v142/build/native") + # expected path for apt packaged intel mkl installs + link_libraries("${DISKANN_CPPRESTSDK}/x64/lib/cpprest142_2_10.lib") + include_directories("${DISKANN_CPPRESTSDK}/include") + endif() + add_subdirectory(apps/restapi) +endif() + +include(clang-format.cmake) + +if(PYBIND) + add_subdirectory(python) + + install(TARGETS _diskannpy + DESTINATION leann_backend_diskann + COMPONENT python_modules + ) + +endif() +############################################################################### +# PROTOBUF SECTION - Corrected to use CONFIG mode explicitly +############################################################################### +set(Protobuf_USE_STATIC_LIBS OFF) + +find_package(ZLIB REQUIRED) + +find_package(Protobuf REQUIRED) + +message(STATUS "Protobuf found: ${Protobuf_VERSION}") +message(STATUS "Protobuf include dirs: ${Protobuf_INCLUDE_DIRS}") +message(STATUS "Protobuf libraries: ${Protobuf_LIBRARIES}") +message(STATUS "Protobuf protoc executable: ${Protobuf_PROTOC_EXECUTABLE}") + +include_directories(${Protobuf_INCLUDE_DIRS}) + +set(PROTO_FILE "${CMAKE_CURRENT_SOURCE_DIR}/../embedding.proto") +protobuf_generate_cpp(PROTO_SRCS PROTO_HDRS ${PROTO_FILE}) +set(generated_proto_sources ${PROTO_SRCS}) + + +add_library(proto_embeddings STATIC ${generated_proto_sources}) +target_link_libraries(proto_embeddings PUBLIC protobuf::libprotobuf) +target_include_directories(proto_embeddings PUBLIC + ${CMAKE_CURRENT_BINARY_DIR} + ${Protobuf_INCLUDE_DIRS} +) + +target_link_libraries(diskann PRIVATE proto_embeddings protobuf::libprotobuf) +target_include_directories(diskann PRIVATE + ${CMAKE_CURRENT_BINARY_DIR} + ${Protobuf_INCLUDE_DIRS} +) + +target_link_libraries(diskann_s PRIVATE proto_embeddings protobuf::libprotobuf) +target_include_directories(diskann_s PRIVATE + ${CMAKE_CURRENT_BINARY_DIR} + ${Protobuf_INCLUDE_DIRS} +) + + +############################################################################### +# ZEROMQ SECTION - REQUIRED +############################################################################### + +find_package(ZeroMQ QUIET) +if(NOT ZeroMQ_FOUND) + find_path(ZeroMQ_INCLUDE_DIR zmq.h) + find_library(ZeroMQ_LIBRARY zmq) + if(ZeroMQ_INCLUDE_DIR AND ZeroMQ_LIBRARY) + set(ZeroMQ_FOUND TRUE) + endif() +endif() + +if(ZeroMQ_FOUND) + message(STATUS "Found ZeroMQ: ${ZeroMQ_LIBRARY}") + include_directories(${ZeroMQ_INCLUDE_DIR}) + target_link_libraries(diskann PRIVATE ${ZeroMQ_LIBRARY}) + target_link_libraries(diskann_s PRIVATE ${ZeroMQ_LIBRARY}) + add_definitions(-DUSE_ZEROMQ) +else() + message(FATAL_ERROR "ZeroMQ is required but not found. Please install ZeroMQ and try again.") +endif() + +target_link_libraries(diskann ${PYBIND11_LIBRARIES}) +target_link_libraries(diskann_s ${PYBIND11_LIBRARIES}) diff --git a/packages/leann-backend-diskann/third_party/DiskANN/CMakeSettings.json b/packages/leann-backend-diskann/third_party/DiskANN/CMakeSettings.json new file mode 100644 index 0000000..af5d7b5 --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/CMakeSettings.json @@ -0,0 +1,28 @@ +{ + "configurations": [ + { + "name": "x64-Release", + "generator": "Ninja", + "configurationType": "Release", + "inheritEnvironments": [ "msvc_x64" ], + "buildRoot": "${projectDir}\\out\\build\\${name}", + "installRoot": "${projectDir}\\out\\install\\${name}", + "cmakeCommandArgs": "", + "buildCommandArgs": "", + "ctestCommandArgs": "" + }, + { + "name": "WSL-GCC-Release", + "generator": "Ninja", + "configurationType": "RelWithDebInfo", + "buildRoot": "${projectDir}\\out\\build\\${name}", + "installRoot": "${projectDir}\\out\\install\\${name}", + "cmakeExecutable": "cmake", + "cmakeCommandArgs": "", + "buildCommandArgs": "", + "ctestCommandArgs": "", + "inheritEnvironments": [ "linux_x64" ], + "wslPath": "${defaultWSLPath}" + } + ] +} \ No newline at end of file diff --git a/packages/leann-backend-diskann/third_party/DiskANN/CODE_OF_CONDUCT.md b/packages/leann-backend-diskann/third_party/DiskANN/CODE_OF_CONDUCT.md new file mode 100644 index 0000000..f9ba8cf --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/CODE_OF_CONDUCT.md @@ -0,0 +1,9 @@ +# Microsoft Open Source Code of Conduct + +This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/). + +Resources: + +- [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/) +- [Microsoft Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/) +- Contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with questions or concerns diff --git a/packages/leann-backend-diskann/third_party/DiskANN/CONTRIBUTING.md b/packages/leann-backend-diskann/third_party/DiskANN/CONTRIBUTING.md new file mode 100644 index 0000000..dcbf795 --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/CONTRIBUTING.md @@ -0,0 +1,9 @@ +# Contributing + +This project welcomes contributions and suggestions. Most contributions require you to agree to a +Contributor License Agreement (CLA) declaring that you have the right to, and actually do, grant us +the rights to use your contribution. For details, visit https://cla.opensource.microsoft.com. + +When you submit a pull request, a CLA bot will automatically determine whether you need to provide +a CLA and decorate the PR appropriately (e.g., status check, comment). Simply follow the instructions +provided by the bot. You will only need to do this once across all repos using our CLA. diff --git a/packages/leann-backend-diskann/third_party/DiskANN/Dockerfile b/packages/leann-backend-diskann/third_party/DiskANN/Dockerfile new file mode 100644 index 0000000..ea1979f --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/Dockerfile @@ -0,0 +1,17 @@ +#Copyright(c) Microsoft Corporation.All rights reserved. +#Licensed under the MIT license. + +FROM ubuntu:jammy + +RUN apt update +RUN apt install -y software-properties-common +RUN add-apt-repository -y ppa:git-core/ppa +RUN apt update +RUN DEBIAN_FRONTEND=noninteractive apt install -y git make cmake g++ libaio-dev libgoogle-perftools-dev libunwind-dev clang-format libboost-dev libboost-program-options-dev libmkl-full-dev libcpprest-dev python3.10 + +WORKDIR /app +RUN git clone https://github.com/microsoft/DiskANN.git +WORKDIR /app/DiskANN +RUN mkdir build +RUN cmake -S . -B build -DCMAKE_BUILD_TYPE=Release +RUN cmake --build build -- -j diff --git a/packages/leann-backend-diskann/third_party/DiskANN/DockerfileDev b/packages/leann-backend-diskann/third_party/DiskANN/DockerfileDev new file mode 100644 index 0000000..0e95e40 --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/DockerfileDev @@ -0,0 +1,17 @@ +#Copyright(c) Microsoft Corporation.All rights reserved. +#Licensed under the MIT license. + +FROM ubuntu:jammy + +RUN apt update +RUN apt install -y software-properties-common +RUN add-apt-repository -y ppa:git-core/ppa +RUN apt update +RUN DEBIAN_FRONTEND=noninteractive apt install -y git make cmake g++ libaio-dev libgoogle-perftools-dev libunwind-dev clang-format libboost-dev libboost-program-options-dev libboost-test-dev libmkl-full-dev libcpprest-dev python3.10 + +WORKDIR /app +RUN git clone https://github.com/microsoft/DiskANN.git +WORKDIR /app/DiskANN +RUN mkdir build +RUN cmake -S . -B build -DCMAKE_BUILD_TYPE=Release -DUNIT_TEST=True +RUN cmake --build build -- -j diff --git a/packages/leann-backend-diskann/third_party/DiskANN/LICENSE b/packages/leann-backend-diskann/third_party/DiskANN/LICENSE new file mode 100644 index 0000000..b7a909e --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/LICENSE @@ -0,0 +1,23 @@ + DiskANN + + MIT License + + Copyright (c) Microsoft Corporation. + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in all + copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + SOFTWARE diff --git a/packages/leann-backend-diskann/third_party/DiskANN/MANIFEST.in b/packages/leann-backend-diskann/third_party/DiskANN/MANIFEST.in new file mode 100644 index 0000000..0735c27 --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/MANIFEST.in @@ -0,0 +1,12 @@ +include MANIFEST.in +include *.txt +include *.md +include setup.py +include pyproject.toml +include *.cmake +recursive-include gperftools * +recursive-include include * +recursive-include python * +recursive-include windows * +prune python/tests +recursive-include src * diff --git a/packages/leann-backend-diskann/third_party/DiskANN/README.md b/packages/leann-backend-diskann/third_party/DiskANN/README.md new file mode 100644 index 0000000..44f4c27 --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/README.md @@ -0,0 +1,135 @@ +# DiskANN + +[![DiskANN Main](https://github.com/microsoft/DiskANN/actions/workflows/push-test.yml/badge.svg?branch=main)](https://github.com/microsoft/DiskANN/actions/workflows/push-test.yml) +[![PyPI version](https://img.shields.io/pypi/v/diskannpy.svg)](https://pypi.org/project/diskannpy/) +[![Downloads shield](https://pepy.tech/badge/diskannpy)](https://pepy.tech/project/diskannpy) +[![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT) + +[![DiskANN Paper](https://img.shields.io/badge/Paper-NeurIPS%3A_DiskANN-blue)](https://papers.nips.cc/paper/9527-rand-nsg-fast-accurate-billion-point-nearest-neighbor-search-on-a-single-node.pdf) +[![DiskANN Paper](https://img.shields.io/badge/Paper-Arxiv%3A_Fresh--DiskANN-blue)](https://arxiv.org/abs/2105.09613) +[![DiskANN Paper](https://img.shields.io/badge/Paper-Filtered--DiskANN-blue)](https://harsha-simhadri.org/pubs/Filtered-DiskANN23.pdf) + + +DiskANN is a suite of scalable, accurate and cost-effective approximate nearest neighbor search algorithms for large-scale vector search that support real-time changes and simple filters. +This code is based on ideas from the [DiskANN](https://papers.nips.cc/paper/9527-rand-nsg-fast-accurate-billion-point-nearest-neighbor-search-on-a-single-node.pdf), [Fresh-DiskANN](https://arxiv.org/abs/2105.09613) and the [Filtered-DiskANN](https://harsha-simhadri.org/pubs/Filtered-DiskANN23.pdf) papers with further improvements. +This code forked off from [code for NSG](https://github.com/ZJULearning/nsg) algorithm. + +This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/). +For more information see the [Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/) or +contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with any additional questions or comments. + +See [guidelines](CONTRIBUTING.md) for contributing to this project. + +## Linux build: + +Install the following packages through apt-get + +```bash +sudo apt install make cmake g++ libaio-dev libgoogle-perftools-dev clang-format libboost-all-dev +``` + +### Install Intel MKL +#### Ubuntu 20.04 or newer +```bash +sudo apt install libmkl-full-dev +``` + +#### Earlier versions of Ubuntu +Install Intel MKL either by downloading the [oneAPI MKL installer](https://www.intel.com/content/www/us/en/developer/tools/oneapi/onemkl.html) or using [apt](https://software.intel.com/en-us/articles/installing-intel-free-libs-and-python-apt-repo) (we tested with build 2019.4-070 and 2022.1.2.146). + +``` +# OneAPI MKL Installer +wget https://registrationcenter-download.intel.com/akdlm/irc_nas/18487/l_BaseKit_p_2022.1.2.146.sh +sudo sh l_BaseKit_p_2022.1.2.146.sh -a --components intel.oneapi.lin.mkl.devel --action install --eula accept -s +``` + +### Build +```bash +mkdir build && cd build && cmake -DCMAKE_BUILD_TYPE=Release .. && make -j +``` + +## Windows build: + +The Windows version has been tested with Enterprise editions of Visual Studio 2022, 2019 and 2017. It should work with the Community and Professional editions as well without any changes. + +**Prerequisites:** + +* CMake 3.15+ (available in VisualStudio 2019+ or from https://cmake.org) +* NuGet.exe (install from https://www.nuget.org/downloads) + * The build script will use NuGet to get MKL, OpenMP and Boost packages. +* DiskANN git repository checked out together with submodules. To check out submodules after git clone: +``` +git submodule init +git submodule update +``` + +* Environment variables: + * [optional] If you would like to override the Boost library listed in windows/packages.config.in, set BOOST_ROOT to your Boost folder. + +**Build steps:** +* Open the "x64 Native Tools Command Prompt for VS 2019" (or corresponding version) and change to DiskANN folder +* Create a "build" directory inside it +* Change to the "build" directory and run +``` +cmake .. +``` +OR for Visual Studio 2017 and earlier: +``` +\cmake .. +``` +**This will create a diskann.sln solution**. Now you can: + +- Open it from VisualStudio and build either Release or Debug configuration. +- `\cmake --build build` +- Use MSBuild: +``` +msbuild.exe diskann.sln /m /nologo /t:Build /p:Configuration="Release" /property:Platform="x64" +``` + +* This will also build gperftools submodule for libtcmalloc_minimal dependency. +* Generated binaries are stored in the x64/Release or x64/Debug directories. + +## macOS Build + +### Prerequisites +* Apple Silicon. The code should still work on Intel-based Macs, but there are no guarantees. +* macOS >= 12.0 +* XCode Command Line Tools (install with `xcode-select --install`) +* [homebrew](https://brew.sh/) + +### Install Required Packages +```zsh +brew install cmake +brew install boost +brew install gperftools +brew install libomp +``` + +### Build DiskANN +```zsh +# same as ubuntu instructions +mkdir build && cd build && cmake -DCMAKE_BUILD_TYPE=Release .. && make -j +``` + +## Usage: + +Please see the following pages on using the compiled code: + +- [Commandline interface for building and search SSD based indices](workflows/SSD_index.md) +- [Commandline interface for building and search in memory indices](workflows/in_memory_index.md) +- [Commandline examples for using in-memory streaming indices](workflows/dynamic_index.md) +- [Commandline interface for building and search in memory indices with label data and filters](workflows/filtered_in_memory.md) +- [Commandline interface for building and search SSD based indices with label data and filters](workflows/filtered_ssd_index.md) +- [diskannpy - DiskANN as a python extension module](python/README.md) + +Please cite this software in your work as: + +``` +@misc{diskann-github, + author = {Simhadri, Harsha Vardhan and Krishnaswamy, Ravishankar and Srinivasa, Gopal and Subramanya, Suhas Jayaram and Antonijevic, Andrija and Pryce, Dax and Kaczynski, David and Williams, Shane and Gollapudi, Siddarth and Sivashankar, Varun and Karia, Neel and Singh, Aditi and Jaiswal, Shikhar and Mahapatro, Neelam and Adams, Philip and Tower, Bryan and Patel, Yash}}, + title = {{DiskANN: Graph-structured Indices for Scalable, Fast, Fresh and Filtered Approximate Nearest Neighbor Search}}, + url = {https://github.com/Microsoft/DiskANN}, + version = {0.6.1}, + year = {2023} +} +``` diff --git a/packages/leann-backend-diskann/third_party/DiskANN/SECURITY.md b/packages/leann-backend-diskann/third_party/DiskANN/SECURITY.md new file mode 100644 index 0000000..f7b8998 --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/SECURITY.md @@ -0,0 +1,41 @@ + + +## Security + +Microsoft takes the security of our software products and services seriously, which includes all source code repositories managed through our GitHub organizations, which include [Microsoft](https://github.com/Microsoft), [Azure](https://github.com/Azure), [DotNet](https://github.com/dotnet), [AspNet](https://github.com/aspnet), [Xamarin](https://github.com/xamarin), and [our GitHub organizations](https://opensource.microsoft.com/). + +If you believe you have found a security vulnerability in any Microsoft-owned repository that meets [Microsoft's definition of a security vulnerability](https://docs.microsoft.com/en-us/previous-versions/tn-archive/cc751383(v=technet.10)), please report it to us as described below. + +## Reporting Security Issues + +**Please do not report security vulnerabilities through public GitHub issues.** + +Instead, please report them to the Microsoft Security Response Center (MSRC) at [https://msrc.microsoft.com/create-report](https://msrc.microsoft.com/create-report). + +If you prefer to submit without logging in, send email to [secure@microsoft.com](mailto:secure@microsoft.com). If possible, encrypt your message with our PGP key; please download it from the [Microsoft Security Response Center PGP Key page](https://www.microsoft.com/en-us/msrc/pgp-key-msrc). + +You should receive a response within 24 hours. If for some reason you do not, please follow up via email to ensure we received your original message. Additional information can be found at [microsoft.com/msrc](https://www.microsoft.com/msrc). + +Please include the requested information listed below (as much as you can provide) to help us better understand the nature and scope of the possible issue: + + * Type of issue (e.g. buffer overflow, SQL injection, cross-site scripting, etc.) + * Full paths of source file(s) related to the manifestation of the issue + * The location of the affected source code (tag/branch/commit or direct URL) + * Any special configuration required to reproduce the issue + * Step-by-step instructions to reproduce the issue + * Proof-of-concept or exploit code (if possible) + * Impact of the issue, including how an attacker might exploit the issue + +This information will help us triage your report more quickly. + +If you are reporting for a bug bounty, more complete reports can contribute to a higher bounty award. Please visit our [Microsoft Bug Bounty Program](https://microsoft.com/msrc/bounty) page for more details about our active programs. + +## Preferred Languages + +We prefer all communications to be in English. + +## Policy + +Microsoft follows the principle of [Coordinated Vulnerability Disclosure](https://www.microsoft.com/en-us/msrc/cvd). + + \ No newline at end of file diff --git a/packages/leann-backend-diskann/third_party/DiskANN/apps/CMakeLists.txt b/packages/leann-backend-diskann/third_party/DiskANN/apps/CMakeLists.txt new file mode 100644 index 0000000..e42c0b6 --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/apps/CMakeLists.txt @@ -0,0 +1,42 @@ +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT license. + +set(CMAKE_CXX_STANDARD 17) +set(CMAKE_COMPILE_WARNING_AS_ERROR ON) + +add_executable(build_memory_index build_memory_index.cpp) +target_link_libraries(build_memory_index ${PROJECT_NAME} ${DISKANN_TOOLS_TCMALLOC_LINK_OPTIONS} Boost::program_options) + +add_executable(build_stitched_index build_stitched_index.cpp) +target_link_libraries(build_stitched_index ${PROJECT_NAME} ${DISKANN_TOOLS_TCMALLOC_LINK_OPTIONS} Boost::program_options) + +add_executable(search_memory_index search_memory_index.cpp) +target_link_libraries(search_memory_index ${PROJECT_NAME} ${DISKANN_ASYNC_LIB} ${DISKANN_TOOLS_TCMALLOC_LINK_OPTIONS} Boost::program_options) + +add_executable(build_disk_index build_disk_index.cpp) +target_link_libraries(build_disk_index ${PROJECT_NAME} ${DISKANN_TOOLS_TCMALLOC_LINK_OPTIONS} ${DISKANN_ASYNC_LIB} Boost::program_options) + +add_executable(search_disk_index search_disk_index.cpp) +target_link_libraries(search_disk_index ${PROJECT_NAME} ${DISKANN_ASYNC_LIB} ${DISKANN_TOOLS_TCMALLOC_LINK_OPTIONS} Boost::program_options) + +add_executable(range_search_disk_index range_search_disk_index.cpp) +target_link_libraries(range_search_disk_index ${PROJECT_NAME} ${DISKANN_ASYNC_LIB} ${DISKANN_TOOLS_TCMALLOC_LINK_OPTIONS} Boost::program_options) + +add_executable(test_streaming_scenario test_streaming_scenario.cpp) +target_link_libraries(test_streaming_scenario ${PROJECT_NAME} ${DISKANN_TOOLS_TCMALLOC_LINK_OPTIONS} Boost::program_options) + +add_executable(test_insert_deletes_consolidate test_insert_deletes_consolidate.cpp) +target_link_libraries(test_insert_deletes_consolidate ${PROJECT_NAME} ${DISKANN_TOOLS_TCMALLOC_LINK_OPTIONS} Boost::program_options) + +if (NOT MSVC) + install(TARGETS build_memory_index + build_stitched_index + search_memory_index + build_disk_index + search_disk_index + range_search_disk_index + test_streaming_scenario + test_insert_deletes_consolidate + RUNTIME + ) +endif() diff --git a/packages/leann-backend-diskann/third_party/DiskANN/apps/build_disk_index.cpp b/packages/leann-backend-diskann/third_party/DiskANN/apps/build_disk_index.cpp new file mode 100644 index 0000000..f48b617 --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/apps/build_disk_index.cpp @@ -0,0 +1,191 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT license. + +#include +#include + +#include "utils.h" +#include "disk_utils.h" +#include "math_utils.h" +#include "index.h" +#include "partition.h" +#include "program_options_utils.hpp" + +namespace po = boost::program_options; + +int main(int argc, char **argv) +{ + std::string data_type, dist_fn, data_path, index_path_prefix, codebook_prefix, label_file, universal_label, + label_type; + uint32_t num_threads, R, L, disk_PQ, build_PQ, QD, Lf, filter_threshold; + float B, M; + bool append_reorder_data = false; + bool use_opq = false; + + po::options_description desc{ + program_options_utils::make_program_description("build_disk_index", "Build a disk-based index.")}; + try + { + desc.add_options()("help,h", "Print information on arguments"); + + // Required parameters + po::options_description required_configs("Required"); + required_configs.add_options()("data_type", po::value(&data_type)->required(), + program_options_utils::DATA_TYPE_DESCRIPTION); + required_configs.add_options()("dist_fn", po::value(&dist_fn)->required(), + program_options_utils::DISTANCE_FUNCTION_DESCRIPTION); + required_configs.add_options()("index_path_prefix", po::value(&index_path_prefix)->required(), + program_options_utils::INDEX_PATH_PREFIX_DESCRIPTION); + required_configs.add_options()("data_path", po::value(&data_path)->required(), + program_options_utils::INPUT_DATA_PATH); + required_configs.add_options()("search_DRAM_budget,B", po::value(&B)->required(), + "DRAM budget in GB for searching the index to set the " + "compressed level for data while search happens"); + required_configs.add_options()("build_DRAM_budget,M", po::value(&M)->required(), + "DRAM budget in GB for building the index"); + + // Optional parameters + po::options_description optional_configs("Optional"); + optional_configs.add_options()("num_threads,T", + po::value(&num_threads)->default_value(omp_get_num_procs()), + program_options_utils::NUMBER_THREADS_DESCRIPTION); + optional_configs.add_options()("max_degree,R", po::value(&R)->default_value(64), + program_options_utils::MAX_BUILD_DEGREE); + optional_configs.add_options()("Lbuild,L", po::value(&L)->default_value(100), + program_options_utils::GRAPH_BUILD_COMPLEXITY); + optional_configs.add_options()("QD", po::value(&QD)->default_value(0), + " Quantized Dimension for compression"); + optional_configs.add_options()("codebook_prefix", po::value(&codebook_prefix)->default_value(""), + "Path prefix for pre-trained codebook"); + optional_configs.add_options()("PQ_disk_bytes", po::value(&disk_PQ)->default_value(0), + "Number of bytes to which vectors should be compressed " + "on SSD; 0 for no compression"); + optional_configs.add_options()("append_reorder_data", po::bool_switch()->default_value(false), + "Include full precision data in the index. Use only in " + "conjuction with compressed data on SSD."); + optional_configs.add_options()("build_PQ_bytes", po::value(&build_PQ)->default_value(0), + program_options_utils::BUIlD_GRAPH_PQ_BYTES); + optional_configs.add_options()("use_opq", po::bool_switch()->default_value(false), + program_options_utils::USE_OPQ); + optional_configs.add_options()("label_file", po::value(&label_file)->default_value(""), + program_options_utils::LABEL_FILE); + optional_configs.add_options()("universal_label", po::value(&universal_label)->default_value(""), + program_options_utils::UNIVERSAL_LABEL); + optional_configs.add_options()("FilteredLbuild", po::value(&Lf)->default_value(0), + program_options_utils::FILTERED_LBUILD); + optional_configs.add_options()("filter_threshold,F", po::value(&filter_threshold)->default_value(0), + "Threshold to break up the existing nodes to generate new graph " + "internally where each node has a maximum F labels."); + optional_configs.add_options()("label_type", po::value(&label_type)->default_value("uint"), + program_options_utils::LABEL_TYPE_DESCRIPTION); + + // Merge required and optional parameters + desc.add(required_configs).add(optional_configs); + + po::variables_map vm; + po::store(po::parse_command_line(argc, argv, desc), vm); + if (vm.count("help")) + { + std::cout << desc; + return 0; + } + po::notify(vm); + if (vm["append_reorder_data"].as()) + append_reorder_data = true; + if (vm["use_opq"].as()) + use_opq = true; + } + catch (const std::exception &ex) + { + std::cerr << ex.what() << '\n'; + return -1; + } + + bool use_filters = (label_file != "") ? true : false; + diskann::Metric metric; + if (dist_fn == std::string("l2")) + metric = diskann::Metric::L2; + else if (dist_fn == std::string("mips")) + metric = diskann::Metric::INNER_PRODUCT; + else if (dist_fn == std::string("cosine")) + metric = diskann::Metric::COSINE; + else + { + std::cout << "Error. Only l2 and mips distance functions are supported" << std::endl; + return -1; + } + + if (append_reorder_data) + { + if (disk_PQ == 0) + { + std::cout << "Error: It is not necessary to append data for reordering " + "when vectors are not compressed on disk." + << std::endl; + return -1; + } + if (data_type != std::string("float")) + { + std::cout << "Error: Appending data for reordering currently only " + "supported for float data type." + << std::endl; + return -1; + } + } + + std::string params = std::string(std::to_string(R)) + " " + std::string(std::to_string(L)) + " " + + std::string(std::to_string(B)) + " " + std::string(std::to_string(M)) + " " + + std::string(std::to_string(num_threads)) + " " + std::string(std::to_string(disk_PQ)) + " " + + std::string(std::to_string(append_reorder_data)) + " " + + std::string(std::to_string(build_PQ)) + " " + std::string(std::to_string(QD)); + + try + { + if (label_file != "" && label_type == "ushort") + { + if (data_type == std::string("int8")) + return diskann::build_disk_index(data_path.c_str(), index_path_prefix.c_str(), params.c_str(), + metric, use_opq, codebook_prefix, use_filters, label_file, + universal_label, filter_threshold, Lf); + else if (data_type == std::string("uint8")) + return diskann::build_disk_index( + data_path.c_str(), index_path_prefix.c_str(), params.c_str(), metric, use_opq, codebook_prefix, + use_filters, label_file, universal_label, filter_threshold, Lf); + else if (data_type == std::string("float")) + return diskann::build_disk_index( + data_path.c_str(), index_path_prefix.c_str(), params.c_str(), metric, use_opq, codebook_prefix, + use_filters, label_file, universal_label, filter_threshold, Lf); + else + { + diskann::cerr << "Error. Unsupported data type" << std::endl; + return -1; + } + } + else + { + if (data_type == std::string("int8")) + return diskann::build_disk_index(data_path.c_str(), index_path_prefix.c_str(), params.c_str(), + metric, use_opq, codebook_prefix, use_filters, label_file, + universal_label, filter_threshold, Lf); + else if (data_type == std::string("uint8")) + return diskann::build_disk_index(data_path.c_str(), index_path_prefix.c_str(), params.c_str(), + metric, use_opq, codebook_prefix, use_filters, label_file, + universal_label, filter_threshold, Lf); + else if (data_type == std::string("float")) + return diskann::build_disk_index(data_path.c_str(), index_path_prefix.c_str(), params.c_str(), + metric, use_opq, codebook_prefix, use_filters, label_file, + universal_label, filter_threshold, Lf); + else + { + diskann::cerr << "Error. Unsupported data type" << std::endl; + return -1; + } + } + } + catch (const std::exception &e) + { + std::cout << std::string(e.what()) << std::endl; + diskann::cerr << "Index build failed." << std::endl; + return -1; + } +} diff --git a/packages/leann-backend-diskann/third_party/DiskANN/apps/build_memory_index.cpp b/packages/leann-backend-diskann/third_party/DiskANN/apps/build_memory_index.cpp new file mode 100644 index 0000000..544e42d --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/apps/build_memory_index.cpp @@ -0,0 +1,164 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT license. + +#include +#include +#include + +#include "index.h" +#include "utils.h" +#include "program_options_utils.hpp" + +#ifndef _WINDOWS +#include +#include +#else +#include +#endif + +#include "memory_mapper.h" +#include "ann_exception.h" +#include "index_factory.h" + +namespace po = boost::program_options; + +int main(int argc, char **argv) +{ + std::string data_type, dist_fn, data_path, index_path_prefix, label_file, universal_label, label_type; + uint32_t num_threads, R, L, Lf, build_PQ_bytes; + float alpha; + bool use_pq_build, use_opq; + + po::options_description desc{ + program_options_utils::make_program_description("build_memory_index", "Build a memory-based DiskANN index.")}; + try + { + desc.add_options()("help,h", "Print information on arguments"); + + // Required parameters + po::options_description required_configs("Required"); + required_configs.add_options()("data_type", po::value(&data_type)->required(), + program_options_utils::DATA_TYPE_DESCRIPTION); + required_configs.add_options()("dist_fn", po::value(&dist_fn)->required(), + program_options_utils::DISTANCE_FUNCTION_DESCRIPTION); + required_configs.add_options()("index_path_prefix", po::value(&index_path_prefix)->required(), + program_options_utils::INDEX_PATH_PREFIX_DESCRIPTION); + required_configs.add_options()("data_path", po::value(&data_path)->required(), + program_options_utils::INPUT_DATA_PATH); + + // Optional parameters + po::options_description optional_configs("Optional"); + optional_configs.add_options()("num_threads,T", + po::value(&num_threads)->default_value(omp_get_num_procs()), + program_options_utils::NUMBER_THREADS_DESCRIPTION); + optional_configs.add_options()("max_degree,R", po::value(&R)->default_value(64), + program_options_utils::MAX_BUILD_DEGREE); + optional_configs.add_options()("Lbuild,L", po::value(&L)->default_value(100), + program_options_utils::GRAPH_BUILD_COMPLEXITY); + optional_configs.add_options()("alpha", po::value(&alpha)->default_value(1.2f), + program_options_utils::GRAPH_BUILD_ALPHA); + optional_configs.add_options()("build_PQ_bytes", po::value(&build_PQ_bytes)->default_value(0), + program_options_utils::BUIlD_GRAPH_PQ_BYTES); + optional_configs.add_options()("use_opq", po::bool_switch()->default_value(false), + program_options_utils::USE_OPQ); + optional_configs.add_options()("label_file", po::value(&label_file)->default_value(""), + program_options_utils::LABEL_FILE); + optional_configs.add_options()("universal_label", po::value(&universal_label)->default_value(""), + program_options_utils::UNIVERSAL_LABEL); + + optional_configs.add_options()("FilteredLbuild", po::value(&Lf)->default_value(0), + program_options_utils::FILTERED_LBUILD); + optional_configs.add_options()("label_type", po::value(&label_type)->default_value("uint"), + program_options_utils::LABEL_TYPE_DESCRIPTION); + + // Merge required and optional parameters + desc.add(required_configs).add(optional_configs); + + po::variables_map vm; + po::store(po::parse_command_line(argc, argv, desc), vm); + if (vm.count("help")) + { + std::cout << desc; + return 0; + } + po::notify(vm); + use_pq_build = (build_PQ_bytes > 0); + use_opq = vm["use_opq"].as(); + } + catch (const std::exception &ex) + { + std::cerr << ex.what() << '\n'; + return -1; + } + + diskann::Metric metric; + if (dist_fn == std::string("mips")) + { + metric = diskann::Metric::INNER_PRODUCT; + } + else if (dist_fn == std::string("l2")) + { + metric = diskann::Metric::L2; + } + else if (dist_fn == std::string("cosine")) + { + metric = diskann::Metric::COSINE; + } + else + { + std::cout << "Unsupported distance function. Currently only L2/ Inner " + "Product/Cosine are supported." + << std::endl; + return -1; + } + + try + { + diskann::cout << "Starting index build with R: " << R << " Lbuild: " << L << " alpha: " << alpha + << " #threads: " << num_threads << std::endl; + + size_t data_num, data_dim; + diskann::get_bin_metadata(data_path, data_num, data_dim); + + auto index_build_params = diskann::IndexWriteParametersBuilder(L, R) + .with_filter_list_size(Lf) + .with_alpha(alpha) + .with_saturate_graph(false) + .with_num_threads(num_threads) + .build(); + + auto filter_params = diskann::IndexFilterParamsBuilder() + .with_universal_label(universal_label) + .with_label_file(label_file) + .with_save_path_prefix(index_path_prefix) + .build(); + auto config = diskann::IndexConfigBuilder() + .with_metric(metric) + .with_dimension(data_dim) + .with_max_points(data_num) + .with_data_load_store_strategy(diskann::DataStoreStrategy::MEMORY) + .with_graph_load_store_strategy(diskann::GraphStoreStrategy::MEMORY) + .with_data_type(data_type) + .with_label_type(label_type) + .is_dynamic_index(false) + .with_index_write_params(index_build_params) + .is_enable_tags(false) + .is_use_opq(use_opq) + .is_pq_dist_build(use_pq_build) + .with_num_pq_chunks(build_PQ_bytes) + .build(); + + auto index_factory = diskann::IndexFactory(config); + auto index = index_factory.create_instance(); + index->build(data_path, data_num, filter_params); + index->save(index_path_prefix.c_str()); + index.reset(); + return 0; + } + catch (const std::exception &e) + { + std::cout << std::string(e.what()) << std::endl; + diskann::cerr << "Index build failed." << std::endl; + return -1; + } +} diff --git a/packages/leann-backend-diskann/third_party/DiskANN/apps/build_stitched_index.cpp b/packages/leann-backend-diskann/third_party/DiskANN/apps/build_stitched_index.cpp new file mode 100644 index 0000000..60e38c1 --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/apps/build_stitched_index.cpp @@ -0,0 +1,441 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT license. + +#include +#include +#include +#include +#include +#include +#include +#include "filter_utils.h" +#include +#ifndef _WINDOWS +#include +#endif + +#include "index.h" +#include "memory_mapper.h" +#include "parameters.h" +#include "utils.h" +#include "program_options_utils.hpp" + +namespace po = boost::program_options; +typedef std::tuple>, uint64_t> stitch_indices_return_values; + +/* + * Inline function to display progress bar. + */ +inline void print_progress(double percentage) +{ + int val = (int)(percentage * 100); + int lpad = (int)(percentage * PBWIDTH); + int rpad = PBWIDTH - lpad; + printf("\r%3d%% [%.*s%*s]", val, lpad, PBSTR, rpad, ""); + fflush(stdout); +} + +/* + * Inline function to generate a random integer in a range. + */ +inline size_t random(size_t range_from, size_t range_to) +{ + std::random_device rand_dev; + std::mt19937 generator(rand_dev()); + std::uniform_int_distribution distr(range_from, range_to); + return distr(generator); +} + +/* + * function to handle command line parsing. + * + * Arguments are merely the inputs from the command line. + */ +void handle_args(int argc, char **argv, std::string &data_type, path &input_data_path, path &final_index_path_prefix, + path &label_data_path, std::string &universal_label, uint32_t &num_threads, uint32_t &R, uint32_t &L, + uint32_t &stitched_R, float &alpha) +{ + po::options_description desc{ + program_options_utils::make_program_description("build_stitched_index", "Build a stitched DiskANN index.")}; + try + { + desc.add_options()("help,h", "Print information on arguments"); + + // Required parameters + po::options_description required_configs("Required"); + required_configs.add_options()("data_type", po::value(&data_type)->required(), + program_options_utils::DATA_TYPE_DESCRIPTION); + required_configs.add_options()("index_path_prefix", + po::value(&final_index_path_prefix)->required(), + program_options_utils::INDEX_PATH_PREFIX_DESCRIPTION); + required_configs.add_options()("data_path", po::value(&input_data_path)->required(), + program_options_utils::INPUT_DATA_PATH); + + // Optional parameters + po::options_description optional_configs("Optional"); + optional_configs.add_options()("num_threads,T", + po::value(&num_threads)->default_value(omp_get_num_procs()), + program_options_utils::NUMBER_THREADS_DESCRIPTION); + optional_configs.add_options()("max_degree,R", po::value(&R)->default_value(64), + program_options_utils::MAX_BUILD_DEGREE); + optional_configs.add_options()("Lbuild,L", po::value(&L)->default_value(100), + program_options_utils::GRAPH_BUILD_COMPLEXITY); + optional_configs.add_options()("alpha", po::value(&alpha)->default_value(1.2f), + program_options_utils::GRAPH_BUILD_ALPHA); + optional_configs.add_options()("label_file", po::value(&label_data_path)->default_value(""), + program_options_utils::LABEL_FILE); + optional_configs.add_options()("universal_label", po::value(&universal_label)->default_value(""), + program_options_utils::UNIVERSAL_LABEL); + optional_configs.add_options()("stitched_R", po::value(&stitched_R)->default_value(100), + "Degree to prune final graph down to"); + + // Merge required and optional parameters + desc.add(required_configs).add(optional_configs); + + po::variables_map vm; + po::store(po::parse_command_line(argc, argv, desc), vm); + if (vm.count("help")) + { + std::cout << desc; + exit(0); + } + po::notify(vm); + } + catch (const std::exception &ex) + { + std::cerr << ex.what() << '\n'; + throw; + } +} + +/* + * Custom index save to write the in-memory index to disk. + * Also writes required files for diskANN API - + * 1. labels_to_medoids + * 2. universal_label + * 3. data (redundant for static indices) + * 4. labels (redundant for static indices) + */ +void save_full_index(path final_index_path_prefix, path input_data_path, uint64_t final_index_size, + std::vector> stitched_graph, + tsl::robin_map entry_points, std::string universal_label, + path label_data_path) +{ + // aux. file 1 + auto saving_index_timer = std::chrono::high_resolution_clock::now(); + std::ifstream original_label_data_stream; + original_label_data_stream.exceptions(std::ios::badbit | std::ios::failbit); + original_label_data_stream.open(label_data_path, std::ios::binary); + std::ofstream new_label_data_stream; + new_label_data_stream.exceptions(std::ios::badbit | std::ios::failbit); + new_label_data_stream.open(final_index_path_prefix + "_labels.txt", std::ios::binary); + new_label_data_stream << original_label_data_stream.rdbuf(); + original_label_data_stream.close(); + new_label_data_stream.close(); + + // aux. file 2 + std::ifstream original_input_data_stream; + original_input_data_stream.exceptions(std::ios::badbit | std::ios::failbit); + original_input_data_stream.open(input_data_path, std::ios::binary); + std::ofstream new_input_data_stream; + new_input_data_stream.exceptions(std::ios::badbit | std::ios::failbit); + new_input_data_stream.open(final_index_path_prefix + ".data", std::ios::binary); + new_input_data_stream << original_input_data_stream.rdbuf(); + original_input_data_stream.close(); + new_input_data_stream.close(); + + // aux. file 3 + std::ofstream labels_to_medoids_writer; + labels_to_medoids_writer.exceptions(std::ios::badbit | std::ios::failbit); + labels_to_medoids_writer.open(final_index_path_prefix + "_labels_to_medoids.txt"); + for (auto iter : entry_points) + labels_to_medoids_writer << iter.first << ", " << iter.second << std::endl; + labels_to_medoids_writer.close(); + + // aux. file 4 (only if we're using a universal label) + if (universal_label != "") + { + std::ofstream universal_label_writer; + universal_label_writer.exceptions(std::ios::badbit | std::ios::failbit); + universal_label_writer.open(final_index_path_prefix + "_universal_label.txt"); + universal_label_writer << universal_label << std::endl; + universal_label_writer.close(); + } + + // main index + uint64_t index_num_frozen_points = 0, index_num_edges = 0; + uint32_t index_max_observed_degree = 0, index_entry_point = 0; + const size_t METADATA = 2 * sizeof(uint64_t) + 2 * sizeof(uint32_t); + for (auto &point_neighbors : stitched_graph) + { + index_max_observed_degree = std::max(index_max_observed_degree, (uint32_t)point_neighbors.size()); + } + + std::ofstream stitched_graph_writer; + stitched_graph_writer.exceptions(std::ios::badbit | std::ios::failbit); + stitched_graph_writer.open(final_index_path_prefix, std::ios_base::binary); + + stitched_graph_writer.write((char *)&final_index_size, sizeof(uint64_t)); + stitched_graph_writer.write((char *)&index_max_observed_degree, sizeof(uint32_t)); + stitched_graph_writer.write((char *)&index_entry_point, sizeof(uint32_t)); + stitched_graph_writer.write((char *)&index_num_frozen_points, sizeof(uint64_t)); + + size_t bytes_written = METADATA; + for (uint32_t node_point = 0; node_point < stitched_graph.size(); node_point++) + { + uint32_t current_node_num_neighbors = (uint32_t)stitched_graph[node_point].size(); + std::vector current_node_neighbors = stitched_graph[node_point]; + stitched_graph_writer.write((char *)¤t_node_num_neighbors, sizeof(uint32_t)); + bytes_written += sizeof(uint32_t); + for (const auto ¤t_node_neighbor : current_node_neighbors) + { + stitched_graph_writer.write((char *)¤t_node_neighbor, sizeof(uint32_t)); + bytes_written += sizeof(uint32_t); + } + index_num_edges += current_node_num_neighbors; + } + + if (bytes_written != final_index_size) + { + std::cerr << "Error: written bytes does not match allocated space" << std::endl; + throw; + } + + stitched_graph_writer.close(); + + std::chrono::duration saving_index_time = std::chrono::high_resolution_clock::now() - saving_index_timer; + std::cout << "Stitched graph written in " << saving_index_time.count() << " seconds" << std::endl; + std::cout << "Stitched graph average degree: " << ((float)index_num_edges) / ((float)(stitched_graph.size())) + << std::endl; + std::cout << "Stitched graph max degree: " << index_max_observed_degree << std::endl << std::endl; +} + +/* + * Unions the per-label graph indices together via the following policy: + * - any two nodes can only have at most one edge between them - + * + * Returns the "stitched" graph and its expected file size. + */ +template +stitch_indices_return_values stitch_label_indices( + path final_index_path_prefix, uint32_t total_number_of_points, label_set all_labels, + tsl::robin_map labels_to_number_of_points, + tsl::robin_map &label_entry_points, + tsl::robin_map> label_id_to_orig_id_map) +{ + size_t final_index_size = 0; + std::vector> stitched_graph(total_number_of_points); + + auto stitching_index_timer = std::chrono::high_resolution_clock::now(); + for (const auto &lbl : all_labels) + { + path curr_label_index_path(final_index_path_prefix + "_" + lbl); + std::vector> curr_label_index; + uint64_t curr_label_index_size; + uint32_t curr_label_entry_point; + + std::tie(curr_label_index, curr_label_index_size) = + diskann::load_label_index(curr_label_index_path, labels_to_number_of_points[lbl]); + curr_label_entry_point = (uint32_t)random(0, curr_label_index.size()); + label_entry_points[lbl] = label_id_to_orig_id_map[lbl][curr_label_entry_point]; + + for (uint32_t node_point = 0; node_point < curr_label_index.size(); node_point++) + { + uint32_t original_point_id = label_id_to_orig_id_map[lbl][node_point]; + for (auto &node_neighbor : curr_label_index[node_point]) + { + uint32_t original_neighbor_id = label_id_to_orig_id_map[lbl][node_neighbor]; + std::vector curr_point_neighbors = stitched_graph[original_point_id]; + if (std::find(curr_point_neighbors.begin(), curr_point_neighbors.end(), original_neighbor_id) == + curr_point_neighbors.end()) + { + stitched_graph[original_point_id].push_back(original_neighbor_id); + final_index_size += sizeof(uint32_t); + } + } + } + } + + const size_t METADATA = 2 * sizeof(uint64_t) + 2 * sizeof(uint32_t); + final_index_size += (total_number_of_points * sizeof(uint32_t) + METADATA); + + std::chrono::duration stitching_index_time = + std::chrono::high_resolution_clock::now() - stitching_index_timer; + std::cout << "stitched graph generated in memory in " << stitching_index_time.count() << " seconds" << std::endl; + + return std::make_tuple(stitched_graph, final_index_size); +} + +/* + * Applies the prune_neighbors function from src/index.cpp to + * every node in the stitched graph. + * + * This is an optional step, hence the saving of both the full + * and pruned graph. + */ +template +void prune_and_save(path final_index_path_prefix, path full_index_path_prefix, path input_data_path, + std::vector> stitched_graph, uint32_t stitched_R, + tsl::robin_map label_entry_points, std::string universal_label, + path label_data_path, uint32_t num_threads) +{ + size_t dimension, number_of_label_points; + auto diskann_cout_buffer = diskann::cout.rdbuf(nullptr); + auto std_cout_buffer = std::cout.rdbuf(nullptr); + auto pruning_index_timer = std::chrono::high_resolution_clock::now(); + + diskann::get_bin_metadata(input_data_path, number_of_label_points, dimension); + + diskann::Index index(diskann::Metric::L2, dimension, number_of_label_points, nullptr, nullptr, 0, false, false, + false, false, 0, false); + + // not searching this index, set search_l to 0 + index.load(full_index_path_prefix.c_str(), num_threads, 1); + + std::cout << "parsing labels" << std::endl; + + index.prune_all_neighbors(stitched_R, 750, 1.2); + index.save((final_index_path_prefix).c_str()); + + diskann::cout.rdbuf(diskann_cout_buffer); + std::cout.rdbuf(std_cout_buffer); + std::chrono::duration pruning_index_time = std::chrono::high_resolution_clock::now() - pruning_index_timer; + std::cout << "pruning performed in " << pruning_index_time.count() << " seconds\n" << std::endl; +} + +/* + * Delete all temporary artifacts. + * In the process of creating the stitched index, some temporary artifacts are + * created: + * 1. the separate bin files for each labels' points + * 2. the separate diskANN indices built for each label + * 3. the '.data' file created while generating the indices + */ +void clean_up_artifacts(path input_data_path, path final_index_path_prefix, label_set all_labels) +{ + for (const auto &lbl : all_labels) + { + path curr_label_input_data_path(input_data_path + "_" + lbl); + path curr_label_index_path(final_index_path_prefix + "_" + lbl); + path curr_label_index_path_data(curr_label_index_path + ".data"); + + if (std::remove(curr_label_index_path.c_str()) != 0) + throw; + if (std::remove(curr_label_input_data_path.c_str()) != 0) + throw; + if (std::remove(curr_label_index_path_data.c_str()) != 0) + throw; + } +} + +int main(int argc, char **argv) +{ + // 1. handle cmdline inputs + std::string data_type; + path input_data_path, final_index_path_prefix, label_data_path; + std::string universal_label; + uint32_t num_threads, R, L, stitched_R; + float alpha; + + auto index_timer = std::chrono::high_resolution_clock::now(); + handle_args(argc, argv, data_type, input_data_path, final_index_path_prefix, label_data_path, universal_label, + num_threads, R, L, stitched_R, alpha); + + path labels_file_to_use = final_index_path_prefix + "_label_formatted.txt"; + path labels_map_file = final_index_path_prefix + "_labels_map.txt"; + + convert_labels_string_to_int(label_data_path, labels_file_to_use, labels_map_file, universal_label); + + // 2. parse label file and create necessary data structures + std::vector point_ids_to_labels; + tsl::robin_map labels_to_number_of_points; + label_set all_labels; + + std::tie(point_ids_to_labels, labels_to_number_of_points, all_labels) = + diskann::parse_label_file(labels_file_to_use, universal_label); + + // 3. for each label, make a separate data file + tsl::robin_map> label_id_to_orig_id_map; + uint32_t total_number_of_points = (uint32_t)point_ids_to_labels.size(); + +#ifndef _WINDOWS + if (data_type == "uint8") + label_id_to_orig_id_map = diskann::generate_label_specific_vector_files( + input_data_path, labels_to_number_of_points, point_ids_to_labels, all_labels); + else if (data_type == "int8") + label_id_to_orig_id_map = diskann::generate_label_specific_vector_files( + input_data_path, labels_to_number_of_points, point_ids_to_labels, all_labels); + else if (data_type == "float") + label_id_to_orig_id_map = diskann::generate_label_specific_vector_files( + input_data_path, labels_to_number_of_points, point_ids_to_labels, all_labels); + else + throw; +#else + if (data_type == "uint8") + label_id_to_orig_id_map = diskann::generate_label_specific_vector_files_compat( + input_data_path, labels_to_number_of_points, point_ids_to_labels, all_labels); + else if (data_type == "int8") + label_id_to_orig_id_map = diskann::generate_label_specific_vector_files_compat( + input_data_path, labels_to_number_of_points, point_ids_to_labels, all_labels); + else if (data_type == "float") + label_id_to_orig_id_map = diskann::generate_label_specific_vector_files_compat( + input_data_path, labels_to_number_of_points, point_ids_to_labels, all_labels); + else + throw; +#endif + + // 4. for each created data file, create a vanilla diskANN index + if (data_type == "uint8") + diskann::generate_label_indices(input_data_path, final_index_path_prefix, all_labels, R, L, alpha, + num_threads); + else if (data_type == "int8") + diskann::generate_label_indices(input_data_path, final_index_path_prefix, all_labels, R, L, alpha, + num_threads); + else if (data_type == "float") + diskann::generate_label_indices(input_data_path, final_index_path_prefix, all_labels, R, L, alpha, + num_threads); + else + throw; + + // 5. "stitch" the indices together + std::vector> stitched_graph; + tsl::robin_map label_entry_points; + uint64_t stitched_graph_size; + + if (data_type == "uint8") + std::tie(stitched_graph, stitched_graph_size) = + stitch_label_indices(final_index_path_prefix, total_number_of_points, all_labels, + labels_to_number_of_points, label_entry_points, label_id_to_orig_id_map); + else if (data_type == "int8") + std::tie(stitched_graph, stitched_graph_size) = + stitch_label_indices(final_index_path_prefix, total_number_of_points, all_labels, + labels_to_number_of_points, label_entry_points, label_id_to_orig_id_map); + else if (data_type == "float") + std::tie(stitched_graph, stitched_graph_size) = + stitch_label_indices(final_index_path_prefix, total_number_of_points, all_labels, + labels_to_number_of_points, label_entry_points, label_id_to_orig_id_map); + else + throw; + path full_index_path_prefix = final_index_path_prefix + "_full"; + // 5a. save the stitched graph to disk + save_full_index(full_index_path_prefix, input_data_path, stitched_graph_size, stitched_graph, label_entry_points, + universal_label, labels_file_to_use); + + // 6. run a prune on the stitched index, and save to disk + if (data_type == "uint8") + prune_and_save(final_index_path_prefix, full_index_path_prefix, input_data_path, stitched_graph, + stitched_R, label_entry_points, universal_label, labels_file_to_use, num_threads); + else if (data_type == "int8") + prune_and_save(final_index_path_prefix, full_index_path_prefix, input_data_path, stitched_graph, + stitched_R, label_entry_points, universal_label, labels_file_to_use, num_threads); + else if (data_type == "float") + prune_and_save(final_index_path_prefix, full_index_path_prefix, input_data_path, stitched_graph, + stitched_R, label_entry_points, universal_label, labels_file_to_use, num_threads); + else + throw; + + std::chrono::duration index_time = std::chrono::high_resolution_clock::now() - index_timer; + std::cout << "pruned/stitched graph generated in " << index_time.count() << " seconds" << std::endl; + + clean_up_artifacts(input_data_path, final_index_path_prefix, all_labels); +} diff --git a/packages/leann-backend-diskann/third_party/DiskANN/apps/python/README.md b/packages/leann-backend-diskann/third_party/DiskANN/apps/python/README.md new file mode 100644 index 0000000..2b0bc35 --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/apps/python/README.md @@ -0,0 +1,46 @@ + + +# Integration Tests +The following tests use Python to prepare, run, verify, and tear down the rest api services. + +We do make use of the built-in `unittest` library, but that's only to take advantage of test reporting purposes. + +These are decidedly **not** _unit_ tests. These are end to end integration tests. + +## Caveats +This has only been tested or built for Linux, though we have written platform agnostic Python for the smoke test +(i.e. using `os.path.join`, etc) + +It has been tested on Python 3.9 and 3.10, but should work on Python 3.6+. + +## How to Run + +First, build the DiskANN RestAPI code; see $REPOSITORY_ROOT/workflows/rest_api.md for detailed instructions. + +```bash +cd tests/python +python3 -m venv venv +source venv/bin/activate +pip install -r requirements.txt + +export DISKANN_BUILD_DIR=/path/to/your/diskann/build +python -m unittest +``` + +## Smoke Test Failed, Now What? +The smoke test written takes advantage of temporary directories that are only valid during the +lifetime of the test. The contents of these directories include: +- Randomized vectors (first in tsv, then bin form) used to build the PQFlashIndex +- The PQFlashIndex files + +It is useful to keep these around. By setting some environment variables, you can control whether an ephemeral, +temporary directory is used (and deleted on test completion), or left as an exercise for the developer to +clean up. + +The valid environment variables are: +- `DISKANN_REST_TEST_WORKING_DIR` (example: `$USER/DiskANNRestTest`) + - If this is specified, it **must exist** and **must be writeable**. Any existing files will be clobbered. +- `DISKANN_REST_SERVER` (example: `http://127.0.0.1:10067`) + - Note that if this is set, no data will be generated, nor will a server be started; it is presumed you have done + all the work in creating and starting the rest server prior to running the test and just submits requests against it. diff --git a/packages/leann-backend-diskann/third_party/DiskANN/apps/python/restapi/__init__.py b/packages/leann-backend-diskann/third_party/DiskANN/apps/python/restapi/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/packages/leann-backend-diskann/third_party/DiskANN/apps/python/restapi/disk_ann_util.py b/packages/leann-backend-diskann/third_party/DiskANN/apps/python/restapi/disk_ann_util.py new file mode 100644 index 0000000..ec89310 --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/apps/python/restapi/disk_ann_util.py @@ -0,0 +1,67 @@ +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT license. + +import numpy as np +import os +import subprocess + + +def output_vectors( + diskann_build_path: str, + temporary_file_path: str, + vectors: np.ndarray, + timeout: int = 60 +) -> str: + vectors_as_tsv_path = os.path.join(temporary_file_path, "vectors.tsv") + with open(vectors_as_tsv_path, "w") as vectors_tsv_out: + for vector in vectors: + as_str = "\t".join((str(component) for component in vector)) + print(as_str, file=vectors_tsv_out) + # there is probably a clever way to have numpy write out C++ friendly floats, so feel free to remove this in + # favor of something more sane later + vectors_as_bin_path = os.path.join(temporary_file_path, "vectors.bin") + tsv_to_bin_path = os.path.join(diskann_build_path, "apps", "utils", "tsv_to_bin") + + number_of_points, dimensions = vectors.shape + args = [ + tsv_to_bin_path, + "float", + vectors_as_tsv_path, + vectors_as_bin_path, + str(dimensions), + str(number_of_points) + ] + completed = subprocess.run(args, timeout=timeout) + if completed.returncode != 0: + raise Exception(f"Unable to convert tsv to binary using tsv_to_bin, completed_process: {completed}") + return vectors_as_bin_path + + +def build_ssd_index( + diskann_build_path: str, + temporary_file_path: str, + vectors: np.ndarray, + per_process_timeout: int = 60 # this may not be long enough if you're doing something larger +): + vectors_as_bin_path = output_vectors(diskann_build_path, temporary_file_path, vectors, timeout=per_process_timeout) + + ssd_builder_path = os.path.join(diskann_build_path, "apps", "build_disk_index") + args = [ + ssd_builder_path, + "--data_type", "float", + "--dist_fn", "l2", + "--data_path", vectors_as_bin_path, + "--index_path_prefix", os.path.join(temporary_file_path, "smoke_test"), + "-R", "64", + "-L", "100", + "--search_DRAM_budget", "1", + "--build_DRAM_budget", "1", + "--num_threads", "1", + "--PQ_disk_bytes", "0" + ] + completed = subprocess.run(args, timeout=per_process_timeout) + + if completed.returncode != 0: + command_run = " ".join(args) + raise Exception(f"Unable to build a disk index with the command: '{command_run}'\ncompleted_process: {completed}\nstdout: {completed.stdout}\nstderr: {completed.stderr}") + # index is now built inside of temporary_file_path diff --git a/packages/leann-backend-diskann/third_party/DiskANN/apps/range_search_disk_index.cpp b/packages/leann-backend-diskann/third_party/DiskANN/apps/range_search_disk_index.cpp new file mode 100644 index 0000000..3167572 --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/apps/range_search_disk_index.cpp @@ -0,0 +1,379 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT license. + +#include +#include +#include +#include +#include +#include + +#include "index.h" +#include "disk_utils.h" +#include "math_utils.h" +#include "memory_mapper.h" +#include "pq_flash_index.h" +#include "partition.h" +#include "timer.h" +#include "program_options_utils.hpp" + +#ifndef _WINDOWS +#include +#include +#include +#include "linux_aligned_file_reader.h" +#else +#ifdef USE_BING_INFRA +#include "bing_aligned_file_reader.h" +#else +#include "windows_aligned_file_reader.h" +#endif +#endif + +namespace po = boost::program_options; + +#define WARMUP false + +void print_stats(std::string category, std::vector percentiles, std::vector results) +{ + diskann::cout << std::setw(20) << category << ": " << std::flush; + for (uint32_t s = 0; s < percentiles.size(); s++) + { + diskann::cout << std::setw(8) << percentiles[s] << "%"; + } + diskann::cout << std::endl; + diskann::cout << std::setw(22) << " " << std::flush; + for (uint32_t s = 0; s < percentiles.size(); s++) + { + diskann::cout << std::setw(9) << results[s]; + } + diskann::cout << std::endl; +} + +template +int search_disk_index(diskann::Metric &metric, const std::string &index_path_prefix, const std::string &query_file, + std::string >_file, const uint32_t num_threads, const float search_range, + const uint32_t beamwidth, const uint32_t num_nodes_to_cache, const std::vector &Lvec) +{ + std::string pq_prefix = index_path_prefix + "_pq"; + std::string disk_index_file = index_path_prefix + "_disk.index"; + std::string warmup_query_file = index_path_prefix + "_sample_data.bin"; + + diskann::cout << "Search parameters: #threads: " << num_threads << ", "; + if (beamwidth <= 0) + diskann::cout << "beamwidth to be optimized for each L value" << std::endl; + else + diskann::cout << " beamwidth: " << beamwidth << std::endl; + + // load query bin + T *query = nullptr; + std::vector> groundtruth_ids; + size_t query_num, query_dim, query_aligned_dim, gt_num; + diskann::load_aligned_bin(query_file, query, query_num, query_dim, query_aligned_dim); + + bool calc_recall_flag = false; + if (gt_file != std::string("null") && file_exists(gt_file)) + { + diskann::load_range_truthset(gt_file, groundtruth_ids, + gt_num); // use for range search type of truthset + // diskann::prune_truthset_for_range(gt_file, search_range, + // groundtruth_ids, gt_num); // use for traditional truthset + if (gt_num != query_num) + { + diskann::cout << "Error. Mismatch in number of queries and ground truth data" << std::endl; + return -1; + } + calc_recall_flag = true; + } + + std::shared_ptr reader = nullptr; +#ifdef _WINDOWS +#ifndef USE_BING_INFRA + reader.reset(new WindowsAlignedFileReader()); +#else + reader.reset(new diskann::BingAlignedFileReader()); +#endif +#else + reader.reset(new LinuxAlignedFileReader()); +#endif + + std::unique_ptr> _pFlashIndex( + new diskann::PQFlashIndex(reader, metric)); + + int res = _pFlashIndex->load(num_threads, index_path_prefix.c_str()); + + if (res != 0) + { + return res; + } + // cache bfs levels + std::vector node_list; + diskann::cout << "Caching " << num_nodes_to_cache << " BFS nodes around medoid(s)" << std::endl; + _pFlashIndex->cache_bfs_levels(num_nodes_to_cache, node_list); + // _pFlashIndex->generate_cache_list_from_sample_queries( + // warmup_query_file, 15, 6, num_nodes_to_cache, num_threads, + // node_list); + _pFlashIndex->load_cache_list(node_list); + node_list.clear(); + node_list.shrink_to_fit(); + + omp_set_num_threads(num_threads); + + uint64_t warmup_L = 20; + uint64_t warmup_num = 0, warmup_dim = 0, warmup_aligned_dim = 0; + T *warmup = nullptr; + + if (WARMUP) + { + if (file_exists(warmup_query_file)) + { + diskann::load_aligned_bin(warmup_query_file, warmup, warmup_num, warmup_dim, warmup_aligned_dim); + } + else + { + warmup_num = (std::min)((uint32_t)150000, (uint32_t)15000 * num_threads); + warmup_dim = query_dim; + warmup_aligned_dim = query_aligned_dim; + diskann::alloc_aligned(((void **)&warmup), warmup_num * warmup_aligned_dim * sizeof(T), 8 * sizeof(T)); + std::memset(warmup, 0, warmup_num * warmup_aligned_dim * sizeof(T)); + std::random_device rd; + std::mt19937 gen(rd()); + std::uniform_int_distribution<> dis(-128, 127); + for (uint32_t i = 0; i < warmup_num; i++) + { + for (uint32_t d = 0; d < warmup_dim; d++) + { + warmup[i * warmup_aligned_dim + d] = (T)dis(gen); + } + } + } + diskann::cout << "Warming up index... " << std::flush; + std::vector warmup_result_ids_64(warmup_num, 0); + std::vector warmup_result_dists(warmup_num, 0); + +#pragma omp parallel for schedule(dynamic, 1) + for (int64_t i = 0; i < (int64_t)warmup_num; i++) + { + _pFlashIndex->cached_beam_search(warmup + (i * warmup_aligned_dim), 1, warmup_L, + warmup_result_ids_64.data() + (i * 1), + warmup_result_dists.data() + (i * 1), 4); + } + diskann::cout << "..done" << std::endl; + } + + diskann::cout.setf(std::ios_base::fixed, std::ios_base::floatfield); + diskann::cout.precision(2); + + std::string recall_string = "Recall@rng=" + std::to_string(search_range); + diskann::cout << std::setw(6) << "L" << std::setw(12) << "Beamwidth" << std::setw(16) << "QPS" << std::setw(16) + << "Mean Latency" << std::setw(16) << "99.9 Latency" << std::setw(16) << "Mean IOs" << std::setw(16) + << "CPU (s)"; + if (calc_recall_flag) + { + diskann::cout << std::setw(16) << recall_string << std::endl; + } + else + diskann::cout << std::endl; + diskann::cout << "===============================================================" + "===========================================" + << std::endl; + + std::vector>> query_result_ids(Lvec.size()); + + uint32_t optimized_beamwidth = 2; + uint32_t max_list_size = 10000; + + for (uint32_t test_id = 0; test_id < Lvec.size(); test_id++) + { + uint32_t L = Lvec[test_id]; + + if (beamwidth <= 0) + { + optimized_beamwidth = + optimize_beamwidth(_pFlashIndex, warmup, warmup_num, warmup_aligned_dim, L, optimized_beamwidth); + } + else + optimized_beamwidth = beamwidth; + + query_result_ids[test_id].clear(); + query_result_ids[test_id].resize(query_num); + + diskann::QueryStats *stats = new diskann::QueryStats[query_num]; + + auto s = std::chrono::high_resolution_clock::now(); +#pragma omp parallel for schedule(dynamic, 1) + for (int64_t i = 0; i < (int64_t)query_num; i++) + { + std::vector indices; + std::vector distances; + uint32_t res_count = + _pFlashIndex->range_search(query + (i * query_aligned_dim), search_range, L, max_list_size, indices, + distances, optimized_beamwidth, stats + i); + query_result_ids[test_id][i].reserve(res_count); + query_result_ids[test_id][i].resize(res_count); + for (uint32_t idx = 0; idx < res_count; idx++) + query_result_ids[test_id][i][idx] = (uint32_t)indices[idx]; + } + auto e = std::chrono::high_resolution_clock::now(); + std::chrono::duration diff = e - s; + auto qps = (1.0 * query_num) / (1.0 * diff.count()); + + auto mean_latency = diskann::get_mean_stats( + stats, query_num, [](const diskann::QueryStats &stats) { return stats.total_us; }); + + auto latency_999 = diskann::get_percentile_stats( + stats, query_num, 0.999, [](const diskann::QueryStats &stats) { return stats.total_us; }); + + auto mean_ios = diskann::get_mean_stats(stats, query_num, + [](const diskann::QueryStats &stats) { return stats.n_ios; }); + + double mean_cpuus = diskann::get_mean_stats( + stats, query_num, [](const diskann::QueryStats &stats) { return stats.cpu_us; }); + + double recall = 0; + double ratio_of_sums = 0; + if (calc_recall_flag) + { + recall = + diskann::calculate_range_search_recall((uint32_t)query_num, groundtruth_ids, query_result_ids[test_id]); + + uint32_t total_true_positive = 0; + uint32_t total_positive = 0; + for (uint32_t i = 0; i < query_num; i++) + { + total_true_positive += (uint32_t)query_result_ids[test_id][i].size(); + total_positive += (uint32_t)groundtruth_ids[i].size(); + } + + ratio_of_sums = (1.0 * total_true_positive) / (1.0 * total_positive); + } + + diskann::cout << std::setw(6) << L << std::setw(12) << optimized_beamwidth << std::setw(16) << qps + << std::setw(16) << mean_latency << std::setw(16) << latency_999 << std::setw(16) << mean_ios + << std::setw(16) << mean_cpuus; + if (calc_recall_flag) + { + diskann::cout << std::setw(16) << recall << "," << ratio_of_sums << std::endl; + } + else + diskann::cout << std::endl; + } + + diskann::cout << "Done searching. " << std::endl; + + diskann::aligned_free(query); + if (warmup != nullptr) + diskann::aligned_free(warmup); + return 0; +} + +int main(int argc, char **argv) +{ + std::string data_type, dist_fn, index_path_prefix, result_path_prefix, query_file, gt_file; + uint32_t num_threads, W, num_nodes_to_cache; + std::vector Lvec; + float range; + + po::options_description desc{program_options_utils::make_program_description( + "range_search_disk_index", "Searches disk DiskANN indexes using ranges")}; + try + { + desc.add_options()("help,h", "Print information on arguments"); + + // Required parameters + po::options_description required_configs("Required"); + required_configs.add_options()("data_type", po::value(&data_type)->required(), + program_options_utils::DATA_TYPE_DESCRIPTION); + required_configs.add_options()("dist_fn", po::value(&dist_fn)->required(), + program_options_utils::DISTANCE_FUNCTION_DESCRIPTION); + required_configs.add_options()("index_path_prefix", po::value(&index_path_prefix)->required(), + program_options_utils::INDEX_PATH_PREFIX_DESCRIPTION); + required_configs.add_options()("query_file", po::value(&query_file)->required(), + program_options_utils::QUERY_FILE_DESCRIPTION); + required_configs.add_options()("search_list,L", + po::value>(&Lvec)->multitoken()->required(), + program_options_utils::SEARCH_LIST_DESCRIPTION); + required_configs.add_options()("range_threshold,K", po::value(&range)->required(), + "Number of neighbors to be returned"); + + // Optional parameters + po::options_description optional_configs("Optional"); + optional_configs.add_options()("num_threads,T", + po::value(&num_threads)->default_value(omp_get_num_procs()), + program_options_utils::NUMBER_THREADS_DESCRIPTION); + optional_configs.add_options()("gt_file", po::value(>_file)->default_value(std::string("null")), + program_options_utils::GROUND_TRUTH_FILE_DESCRIPTION); + optional_configs.add_options()("num_nodes_to_cache", po::value(&num_nodes_to_cache)->default_value(0), + program_options_utils::NUMBER_OF_NODES_TO_CACHE); + optional_configs.add_options()("beamwidth,W", po::value(&W)->default_value(2), + program_options_utils::BEAMWIDTH); + + // Merge required and optional parameters + desc.add(required_configs).add(optional_configs); + + po::variables_map vm; + po::store(po::parse_command_line(argc, argv, desc), vm); + if (vm.count("help")) + { + std::cout << desc; + return 0; + } + po::notify(vm); + } + catch (const std::exception &ex) + { + std::cerr << ex.what() << '\n'; + return -1; + } + + diskann::Metric metric; + if (dist_fn == std::string("mips")) + { + metric = diskann::Metric::INNER_PRODUCT; + } + else if (dist_fn == std::string("l2")) + { + metric = diskann::Metric::L2; + } + else if (dist_fn == std::string("cosine")) + { + metric = diskann::Metric::COSINE; + } + else + { + std::cout << "Unsupported distance function. Currently only L2/ Inner " + "Product/Cosine are supported." + << std::endl; + return -1; + } + + if ((data_type != std::string("float")) && (metric == diskann::Metric::INNER_PRODUCT)) + { + std::cout << "Currently support only floating point data for Inner Product." << std::endl; + return -1; + } + + try + { + if (data_type == std::string("float")) + return search_disk_index(metric, index_path_prefix, query_file, gt_file, num_threads, range, W, + num_nodes_to_cache, Lvec); + else if (data_type == std::string("int8")) + return search_disk_index(metric, index_path_prefix, query_file, gt_file, num_threads, range, W, + num_nodes_to_cache, Lvec); + else if (data_type == std::string("uint8")) + return search_disk_index(metric, index_path_prefix, query_file, gt_file, num_threads, range, W, + num_nodes_to_cache, Lvec); + else + { + std::cerr << "Unsupported data type. Use float or int8 or uint8" << std::endl; + return -1; + } + } + catch (const std::exception &e) + { + std::cout << std::string(e.what()) << std::endl; + diskann::cerr << "Index search failed." << std::endl; + return -1; + } +} diff --git a/packages/leann-backend-diskann/third_party/DiskANN/apps/restapi/CMakeLists.txt b/packages/leann-backend-diskann/third_party/DiskANN/apps/restapi/CMakeLists.txt new file mode 100644 index 0000000..c73b427 --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/apps/restapi/CMakeLists.txt @@ -0,0 +1,40 @@ +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT license. + +set(CMAKE_CXX_STANDARD 17) + +add_executable(inmem_server inmem_server.cpp) +if(MSVC) + target_link_options(inmem_server PRIVATE /MACHINE:x64) + target_link_libraries(inmem_server debug ${CMAKE_LIBRARY_OUTPUT_DIRECTORY_DEBUG}/diskann_dll.lib Boost::program_options) + target_link_libraries(inmem_server optimized ${CMAKE_LIBRARY_OUTPUT_DIRECTORY_RELEASE}/diskann_dll.lib Boost::program_options) +else() + target_link_libraries(inmem_server ${PROJECT_NAME} aio -ltcmalloc -lboost_system -lcrypto -lssl -lcpprest Boost::program_options) +endif() + +add_executable(ssd_server ssd_server.cpp) +if(MSVC) + target_link_options(ssd_server PRIVATE /MACHINE:x64) + target_link_libraries(ssd_server debug ${CMAKE_LIBRARY_OUTPUT_DIRECTORY_DEBUG}/diskann_dll.lib Boost::program_options) + target_link_libraries(ssd_server optimized ${CMAKE_LIBRARY_OUTPUT_DIRECTORY_RELEASE}/diskann_dll.lib Boost::program_options) +else() + target_link_libraries(ssd_server ${PROJECT_NAME} aio -ltcmalloc -lboost_system -lcrypto -lssl -lcpprest Boost::program_options) +endif() + +add_executable(multiple_ssdindex_server multiple_ssdindex_server.cpp) +if(MSVC) + target_link_options(multiple_ssdindex_server PRIVATE /MACHINE:x64) + target_link_libraries(multiple_ssdindex_server debug ${CMAKE_LIBRARY_OUTPUT_DIRECTORY_DEBUG}/diskann_dll.lib Boost::program_options) + target_link_libraries(multiple_ssdindex_server optimized ${CMAKE_LIBRARY_OUTPUT_DIRECTORY_RELEASE}/diskann_dll.lib Boost::program_options) +else() + target_link_libraries(multiple_ssdindex_server ${PROJECT_NAME} aio -ltcmalloc -lboost_system -lcrypto -lssl -lcpprest Boost::program_options) +endif() + +add_executable(client client.cpp) +if(MSVC) + target_link_options(client PRIVATE /MACHINE:x64) + target_link_libraries(client debug ${CMAKE_LIBRARY_OUTPUT_DIRECTORY_DEBUG}/diskann_dll.lib Boost::program_options) + target_link_libraries(client optimized ${CMAKE_LIBRARY_OUTPUT_DIRECTORY_RELEASE}/diskann_dll.lib Boost::program_options) +else() + target_link_libraries(client ${PROJECT_NAME} -lboost_system -lcrypto -lssl -lcpprest Boost::program_options) +endif() \ No newline at end of file diff --git a/packages/leann-backend-diskann/third_party/DiskANN/apps/restapi/client.cpp b/packages/leann-backend-diskann/third_party/DiskANN/apps/restapi/client.cpp new file mode 100644 index 0000000..fdf4414 --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/apps/restapi/client.cpp @@ -0,0 +1,124 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT license. + +#include +#include +#include +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT license. + +#include +#include +#include +#include + +#include +#include + +using namespace web; +using namespace web::http; +using namespace web::http::client; + +using namespace diskann; +namespace po = boost::program_options; + +template +void query_loop(const std::string &ip_addr_port, const std::string &query_file, const unsigned nq, const unsigned Ls, + const unsigned k_value) +{ + web::http::client::http_client client(U(ip_addr_port)); + + T *data; + size_t npts = 1, ndims = 128, rounded_dim = 128; + diskann::load_aligned_bin(query_file, data, npts, ndims, rounded_dim); + + for (unsigned i = 0; i < nq; ++i) + { + T *vec = data + i * rounded_dim; + web::http::http_request http_query(methods::POST); + web::json::value queryJson = web::json::value::object(); + queryJson[QUERY_ID_KEY] = i; + queryJson[K_KEY] = k_value; + queryJson[L_KEY] = Ls; + for (size_t i = 0; i < ndims; ++i) + { + queryJson[VECTOR_KEY][i] = web::json::value::number(vec[i]); + } + http_query.set_body(queryJson); + + client.request(http_query) + .then([](web::http::http_response response) -> pplx::task { + if (response.status_code() == status_codes::OK) + { + return response.extract_string(); + } + std::cerr << "Query failed" << std::endl; + return pplx::task_from_result(utility::string_t()); + }) + .then([](pplx::task previousTask) { + try + { + std::cout << previousTask.get() << std::endl; + } + catch (http_exception const &e) + { + std::wcout << e.what() << std::endl; + } + }) + .wait(); + } +} + +int main(int argc, char *argv[]) +{ + std::string data_type, query_file, address; + uint32_t num_queries; + uint32_t l_search, k_value; + + po::options_description desc{"Arguments"}; + try + { + desc.add_options()("help,h", "Print information on arguments"); + desc.add_options()("data_type", po::value(&data_type)->required(), "data type "); + desc.add_options()("address", po::value(&address)->required(), "Web server address"); + desc.add_options()("query_file", po::value(&query_file)->required(), + "File containing the queries to search"); + desc.add_options()("num_queries,Q", po::value(&num_queries)->required(), + "Number of queries to search"); + desc.add_options()("l_search", po::value(&l_search)->required(), "Value of L"); + desc.add_options()("k_value,K", po::value(&k_value)->default_value(10), "Value of K (default 10)"); + po::variables_map vm; + po::store(po::parse_command_line(argc, argv, desc), vm); + if (vm.count("help")) + { + std::cout << desc; + return 0; + } + po::notify(vm); + } + catch (const std::exception &ex) + { + std::cerr << ex.what() << std::endl; + return -1; + } + + if (data_type == std::string("float")) + { + query_loop(address, query_file, num_queries, l_search, k_value); + } + else if (data_type == std::string("int8")) + { + query_loop(address, query_file, num_queries, l_search, k_value); + } + else if (data_type == std::string("uint8")) + { + query_loop(address, query_file, num_queries, l_search, k_value); + } + else + { + std::cerr << "Unsupported type " << argv[2] << std::endl; + return -1; + } + + return 0; +} \ No newline at end of file diff --git a/packages/leann-backend-diskann/third_party/DiskANN/apps/restapi/inmem_server.cpp b/packages/leann-backend-diskann/third_party/DiskANN/apps/restapi/inmem_server.cpp new file mode 100644 index 0000000..11da541 --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/apps/restapi/inmem_server.cpp @@ -0,0 +1,138 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT license. + +#include +#include +#include +#include +#include +#include +#include + +#include + +using namespace diskann; +namespace po = boost::program_options; + +std::unique_ptr g_httpServer(nullptr); +std::vector> g_inMemorySearch; + +void setup(const utility::string_t &address, const std::string &typestring) +{ + web::http::uri_builder uriBldr(address); + auto uri = uriBldr.to_uri(); + + std::cout << "Attempting to start server on " << uri.to_string() << std::endl; + + g_httpServer = std::unique_ptr(new Server(uri, g_inMemorySearch, typestring)); + std::cout << "Created a server object" << std::endl; + + g_httpServer->open().wait(); + ucout << U"Listening for requests on: " << address << std::endl; +} + +void teardown(const utility::string_t &address) +{ + g_httpServer->close().wait(); +} + +int main(int argc, char *argv[]) +{ + std::string data_type, index_file, data_file, address, dist_fn, tags_file; + uint32_t num_threads; + uint32_t l_search; + + po::options_description desc{"Arguments"}; + try + { + desc.add_options()("help,h", "Print information on arguments"); + desc.add_options()("data_type", po::value(&data_type)->required(), "data type "); + desc.add_options()("address", po::value(&address)->required(), "Web server address"); + desc.add_options()("data_file", po::value(&data_file)->required(), + "File containing the data found in the index"); + desc.add_options()("index_path_prefix", po::value(&index_file)->required(), + "Path prefix for saving index file components"); + desc.add_options()("num_threads,T", po::value(&num_threads)->required(), + "Number of threads used for building index"); + desc.add_options()("l_search", po::value(&l_search)->required(), "Value of L"); + desc.add_options()("dist_fn", po::value(&dist_fn)->default_value("l2"), + "distance function "); + desc.add_options()("tags_file", po::value(&tags_file)->default_value(std::string()), + "Tags file location"); + po::variables_map vm; + po::store(po::parse_command_line(argc, argv, desc), vm); + if (vm.count("help")) + { + std::cout << desc; + return 0; + } + po::notify(vm); + } + catch (const std::exception &ex) + { + std::cerr << ex.what() << std::endl; + return -1; + } + diskann::Metric metric; + if (dist_fn == std::string("l2")) + metric = diskann::Metric::L2; + else if (dist_fn == std::string("mips")) + metric = diskann::Metric::INNER_PRODUCT; + else + { + std::cout << "Error. Only l2 and mips distance functions are supported" << std::endl; + return -1; + } + + if (data_type == std::string("float")) + { + auto searcher = std::unique_ptr( + new diskann::InMemorySearch(data_file, index_file, tags_file, metric, num_threads, l_search)); + g_inMemorySearch.push_back(std::move(searcher)); + } + else if (data_type == std::string("int8")) + { + auto searcher = std::unique_ptr( + new diskann::InMemorySearch(data_file, index_file, tags_file, metric, num_threads, l_search)); + g_inMemorySearch.push_back(std::move(searcher)); + } + else if (data_type == std::string("uint8")) + { + auto searcher = std::unique_ptr( + new diskann::InMemorySearch(data_file, index_file, tags_file, metric, num_threads, l_search)); + g_inMemorySearch.push_back(std::move(searcher)); + } + else + { + std::cerr << "Unsupported data type " << argv[2] << std::endl; + } + + while (1) + { + try + { + setup(address, data_type); + std::cout << "Type 'exit' (case-sensitive) to exit" << std::endl; + std::string line; + std::getline(std::cin, line); + if (line == "exit") + { + teardown(address); + g_httpServer->close().wait(); + exit(0); + } + } + catch (const std::exception &ex) + { + std::cerr << "Exception occurred: " << ex.what() << std::endl; + std::cerr << "Restarting HTTP server"; + teardown(address); + } + catch (...) + { + std::cerr << "Unknown exception occurreed" << std::endl; + std::cerr << "Restarting HTTP server"; + teardown(address); + } + } +} diff --git a/packages/leann-backend-diskann/third_party/DiskANN/apps/restapi/main.cpp b/packages/leann-backend-diskann/third_party/DiskANN/apps/restapi/main.cpp new file mode 100644 index 0000000..cb48d67 --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/apps/restapi/main.cpp @@ -0,0 +1,83 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT license. + +#include +#include +#include +#include + +std::unique_ptr g_httpServer(nullptr); +std::unique_ptr g_inMemorySearch(nullptr); + +void setup(const utility::string_t &address) +{ + web::http::uri_builder uriBldr(address); + auto uri = uriBldr.to_uri(); + + std::wcout << L"Attempting to start server on " << uri.to_string() << std::endl; + + g_httpServer = std::unique_ptr(new Server(uri, g_inMemorySearch)); + g_httpServer->open().wait(); + + ucout << U"Listening for requests on: " << address << std::endl; +} + +void teardown(const utility::string_t &address) +{ + g_httpServer->close().wait(); +} + +void loadIndex(const char *indexFile, const char *baseFile, const char *idsFile) +{ + auto nsgSearch = new diskann::InMemorySearch(baseFile, indexFile, idsFile, diskann::L2); + g_inMemorySearch = std::unique_ptr(nsgSearch); +} + +std::wstring getHostingAddress(const char *hostNameAndPort) +{ + wchar_t buffer[4096]; + mbstowcs_s(nullptr, buffer, sizeof(buffer) / sizeof(buffer[0]), hostNameAndPort, + sizeof(buffer) / sizeof(buffer[0])); + return std::wstring(buffer); +} + +int main(int argc, char *argv[]) +{ + if (argc != 5) + { + std::cout << "Usage: nsg_server " + " " + << std::endl; + exit(1); + } + + auto address = getHostingAddress(argv[1]); + loadIndex(argv[2], argv[3], argv[4]); + while (1) + { + try + { + setup(address); + std::cout << "Type 'exit' (case-sensitive) to exit" << std::endl; + std::string line; + std::getline(std::cin, line); + if (line == "exit") + { + teardown(address); + exit(0); + } + } + catch (const std::exception &ex) + { + std::cerr << "Exception occurred: " << ex.what() << std::endl; + std::cerr << "Restarting HTTP server"; + teardown(address); + } + catch (...) + { + std::cerr << "Unknown exception occurreed" << std::endl; + std::cerr << "Restarting HTTP server"; + teardown(address); + } + } +} diff --git a/packages/leann-backend-diskann/third_party/DiskANN/apps/restapi/multiple_ssdindex_server.cpp b/packages/leann-backend-diskann/third_party/DiskANN/apps/restapi/multiple_ssdindex_server.cpp new file mode 100644 index 0000000..89cb06f --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/apps/restapi/multiple_ssdindex_server.cpp @@ -0,0 +1,182 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT license. + +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +using namespace diskann; +namespace po = boost::program_options; + +std::unique_ptr g_httpServer(nullptr); +std::vector> g_ssdSearch; + +void setup(const utility::string_t &address, const std::string &typestring) +{ + web::http::uri_builder uriBldr(address); + auto uri = uriBldr.to_uri(); + + std::cout << "Attempting to start server on " << uri.to_string() << std::endl; + + g_httpServer = std::unique_ptr(new Server(uri, g_ssdSearch, typestring)); + std::cout << "Created a server object" << std::endl; + + g_httpServer->open().wait(); + ucout << U"Listening for requests on: " << address << std::endl; +} + +void teardown(const utility::string_t &address) +{ + g_httpServer->close().wait(); +} + +int main(int argc, char *argv[]) +{ + std::string data_type, index_prefix_paths, address, dist_fn, tags_file; + uint32_t num_nodes_to_cache; + uint32_t num_threads; + + po::options_description desc{"Arguments"}; + try + { + desc.add_options()("help,h", "Print information on arguments"); + desc.add_options()("address", po::value(&address)->required(), "Web server address"); + desc.add_options()("data_type", po::value(&data_type)->required(), "data type "); + desc.add_options()("index_prefix_paths", po::value(&index_prefix_paths)->required(), + "Path prefix for loading index file components"); + desc.add_options()("num_nodes_to_cache", po::value(&num_nodes_to_cache)->default_value(0), + "Number of nodes to cache during search"); + desc.add_options()("num_threads,T", po::value(&num_threads)->default_value(omp_get_num_procs()), + "Number of threads used for building index (defaults to " + "omp_get_num_procs())"); + desc.add_options()("dist_fn", po::value(&dist_fn)->default_value("l2"), + "distance function "); + desc.add_options()("tags_file", po::value(&tags_file)->default_value(std::string()), + "Tags file location"); + + po::variables_map vm; + po::store(po::parse_command_line(argc, argv, desc), vm); + if (vm.count("help")) + { + std::cout << desc; + return 0; + } + po::notify(vm); + } + catch (const std::exception &ex) + { + std::cerr << ex.what() << std::endl; + return -1; + } + + diskann::Metric metric; + if (dist_fn == std::string("l2")) + metric = diskann::Metric::L2; + else if (dist_fn == std::string("mips")) + metric = diskann::Metric::INNER_PRODUCT; + else + { + std::cout << "Error. Only l2 and mips distance functions are supported" << std::endl; + return -1; + } + + std::vector> index_tag_paths; + std::ifstream index_in(index_prefix_paths); + if (!index_in.is_open()) + { + std::cerr << "Could not open " << index_prefix_paths << std::endl; + exit(-1); + } + std::ifstream tags_in(tags_file); + if (!tags_in.is_open()) + { + std::cerr << "Could not open " << tags_file << std::endl; + exit(-1); + } + std::string prefix, tagfile; + while (std::getline(index_in, prefix)) + { + if (std::getline(tags_in, tagfile)) + { + index_tag_paths.push_back(std::make_pair(prefix, tagfile)); + } + else + { + std::cerr << "The number of tags specified does not match the number of " + "indices specified" + << std::endl; + exit(-1); + } + } + index_in.close(); + tags_in.close(); + + if (data_type == std::string("float")) + { + for (auto &index_tag : index_tag_paths) + { + auto searcher = std::unique_ptr(new diskann::PQFlashSearch( + index_tag.first.c_str(), num_nodes_to_cache, num_threads, index_tag.second.c_str(), metric)); + g_ssdSearch.push_back(std::move(searcher)); + } + } + else if (data_type == std::string("int8")) + { + for (auto &index_tag : index_tag_paths) + { + auto searcher = std::unique_ptr(new diskann::PQFlashSearch( + index_tag.first.c_str(), num_nodes_to_cache, num_threads, index_tag.second.c_str(), metric)); + g_ssdSearch.push_back(std::move(searcher)); + } + } + else if (data_type == std::string("uint8")) + { + for (auto &index_tag : index_tag_paths) + { + auto searcher = std::unique_ptr(new diskann::PQFlashSearch( + index_tag.first.c_str(), num_nodes_to_cache, num_threads, index_tag.second.c_str(), metric)); + g_ssdSearch.push_back(std::move(searcher)); + } + } + else + { + std::cerr << "Unsupported data type " << data_type << std::endl; + exit(-1); + } + + while (1) + { + try + { + setup(address, data_type); + std::cout << "Type 'exit' (case-sensitive) to exit" << std::endl; + std::string line; + std::getline(std::cin, line); + if (line == "exit") + { + teardown(address); + g_httpServer->close().wait(); + exit(0); + } + } + catch (const std::exception &ex) + { + std::cerr << "Exception occurred: " << ex.what() << std::endl; + std::cerr << "Restarting HTTP server"; + teardown(address); + } + catch (...) + { + std::cerr << "Unknown exception occurreed" << std::endl; + std::cerr << "Restarting HTTP server"; + teardown(address); + } + } +} diff --git a/packages/leann-backend-diskann/third_party/DiskANN/apps/restapi/ssd_server.cpp b/packages/leann-backend-diskann/third_party/DiskANN/apps/restapi/ssd_server.cpp new file mode 100644 index 0000000..d179973 --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/apps/restapi/ssd_server.cpp @@ -0,0 +1,141 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT license. + +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +using namespace diskann; +namespace po = boost::program_options; + +std::unique_ptr g_httpServer(nullptr); +std::vector> g_ssdSearch; + +void setup(const utility::string_t &address, const std::string &typestring) +{ + web::http::uri_builder uriBldr(address); + auto uri = uriBldr.to_uri(); + + std::cout << "Attempting to start server on " << uri.to_string() << std::endl; + + g_httpServer = std::unique_ptr(new Server(uri, g_ssdSearch, typestring)); + std::cout << "Created a server object" << std::endl; + + g_httpServer->open().wait(); + ucout << U"Listening for requests on: " << address << std::endl; +} + +void teardown(const utility::string_t &address) +{ + g_httpServer->close().wait(); +} + +int main(int argc, char *argv[]) +{ + std::string data_type, index_path_prefix, address, dist_fn, tags_file; + uint32_t num_nodes_to_cache; + uint32_t num_threads; + + po::options_description desc{"Arguments"}; + try + { + desc.add_options()("help,h", "Print information on arguments"); + desc.add_options()("data_type", po::value(&data_type)->required(), "data type "); + desc.add_options()("address", po::value(&address)->required(), "Web server address"); + desc.add_options()("index_path_prefix", po::value(&index_path_prefix)->required(), + "Path prefix for loading index file components"); + desc.add_options()("num_nodes_to_cache", po::value(&num_nodes_to_cache)->default_value(0), + "Number of nodes to cache during search"); + desc.add_options()("num_threads,T", po::value(&num_threads)->default_value(omp_get_num_procs()), + "Number of threads used for building index (defaults to " + "omp_get_num_procs())"); + desc.add_options()("dist_fn", po::value(&dist_fn)->default_value("l2"), + "distance function "); + desc.add_options()("tags_file", po::value(&tags_file)->default_value(std::string()), + "Tags file location"); + po::variables_map vm; + po::store(po::parse_command_line(argc, argv, desc), vm); + if (vm.count("help")) + { + std::cout << desc; + return 0; + } + po::notify(vm); + } + catch (const std::exception &ex) + { + std::cerr << ex.what() << std::endl; + return -1; + } + + diskann::Metric metric; + if (dist_fn == std::string("l2")) + metric = diskann::Metric::L2; + else if (dist_fn == std::string("mips")) + metric = diskann::Metric::INNER_PRODUCT; + else + { + std::cout << "Error. Only l2 and mips distance functions are supported" << std::endl; + return -1; + } + + if (data_type == std::string("float")) + { + auto searcher = std::unique_ptr( + new diskann::PQFlashSearch(index_path_prefix, num_nodes_to_cache, num_threads, tags_file, metric)); + g_ssdSearch.push_back(std::move(searcher)); + } + else if (data_type == std::string("int8")) + { + auto searcher = std::unique_ptr( + new diskann::PQFlashSearch(index_path_prefix, num_nodes_to_cache, num_threads, tags_file, metric)); + g_ssdSearch.push_back(std::move(searcher)); + } + else if (data_type == std::string("uint8")) + { + auto searcher = std::unique_ptr( + new diskann::PQFlashSearch(index_path_prefix, num_nodes_to_cache, num_threads, tags_file, metric)); + g_ssdSearch.push_back(std::move(searcher)); + } + else + { + std::cerr << "Unsupported data type " << argv[2] << std::endl; + exit(-1); + } + + while (1) + { + try + { + setup(address, data_type); + std::cout << "Type 'exit' (case-sensitive) to exit" << std::endl; + std::string line; + std::getline(std::cin, line); + if (line == "exit") + { + teardown(address); + g_httpServer->close().wait(); + exit(0); + } + } + catch (const std::exception &ex) + { + std::cerr << "Exception occurred: " << ex.what() << std::endl; + std::cerr << "Restarting HTTP server"; + teardown(address); + } + catch (...) + { + std::cerr << "Unknown exception occurreed" << std::endl; + std::cerr << "Restarting HTTP server"; + teardown(address); + } + } +} diff --git a/packages/leann-backend-diskann/third_party/DiskANN/apps/search_disk_index.cpp b/packages/leann-backend-diskann/third_party/DiskANN/apps/search_disk_index.cpp new file mode 100644 index 0000000..6b0793d --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/apps/search_disk_index.cpp @@ -0,0 +1,499 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT license. + +#include "common_includes.h" +#include + +#include "index.h" +#include "disk_utils.h" +#include "math_utils.h" +#include "memory_mapper.h" +#include "partition.h" +#include "pq_flash_index.h" +#include "timer.h" +#include "percentile_stats.h" +#include "program_options_utils.hpp" + +#ifndef _WINDOWS +#include +#include +#include +#include "linux_aligned_file_reader.h" +#else +#ifdef USE_BING_INFRA +#include "bing_aligned_file_reader.h" +#else +#include "windows_aligned_file_reader.h" +#endif +#endif + +#define WARMUP false + +namespace po = boost::program_options; + +void print_stats(std::string category, std::vector percentiles, std::vector results) +{ + diskann::cout << std::setw(20) << category << ": " << std::flush; + for (uint32_t s = 0; s < percentiles.size(); s++) + { + diskann::cout << std::setw(8) << percentiles[s] << "%"; + } + diskann::cout << std::endl; + diskann::cout << std::setw(22) << " " << std::flush; + for (uint32_t s = 0; s < percentiles.size(); s++) + { + diskann::cout << std::setw(9) << results[s]; + } + diskann::cout << std::endl; +} + +template +int search_disk_index(diskann::Metric &metric, const std::string &index_path_prefix, + const std::string &result_output_prefix, const std::string &query_file, std::string >_file, + const uint32_t num_threads, const uint32_t recall_at, const uint32_t beamwidth, + const uint32_t num_nodes_to_cache, const uint32_t search_io_limit, + const std::vector &Lvec, const float fail_if_recall_below, + const std::vector &query_filters, const bool use_reorder_data = false) +{ + diskann::cout << "Search parameters: #threads: " << num_threads << ", "; + if (beamwidth <= 0) + diskann::cout << "beamwidth to be optimized for each L value" << std::flush; + else + diskann::cout << " beamwidth: " << beamwidth << std::flush; + if (search_io_limit == std::numeric_limits::max()) + diskann::cout << "." << std::endl; + else + diskann::cout << ", io_limit: " << search_io_limit << "." << std::endl; + + std::string warmup_query_file = index_path_prefix + "_sample_data.bin"; + + // load query bin + T *query = nullptr; + uint32_t *gt_ids = nullptr; + float *gt_dists = nullptr; + size_t query_num, query_dim, query_aligned_dim, gt_num, gt_dim; + diskann::load_aligned_bin(query_file, query, query_num, query_dim, query_aligned_dim); + + bool filtered_search = false; + if (!query_filters.empty()) + { + filtered_search = true; + if (query_filters.size() != 1 && query_filters.size() != query_num) + { + std::cout << "Error. Mismatch in number of queries and size of query " + "filters file" + << std::endl; + return -1; // To return -1 or some other error handling? + } + } + + bool calc_recall_flag = false; + if (gt_file != std::string("null") && gt_file != std::string("NULL") && file_exists(gt_file)) + { + diskann::load_truthset(gt_file, gt_ids, gt_dists, gt_num, gt_dim); + if (gt_num != query_num) + { + diskann::cout << "Error. Mismatch in number of queries and ground truth data" << std::endl; + } + calc_recall_flag = true; + } + + std::shared_ptr reader = nullptr; +#ifdef _WINDOWS +#ifndef USE_BING_INFRA + reader.reset(new WindowsAlignedFileReader()); +#else + reader.reset(new diskann::BingAlignedFileReader()); +#endif +#else + reader.reset(new LinuxAlignedFileReader()); +#endif + + std::unique_ptr> _pFlashIndex( + new diskann::PQFlashIndex(reader, metric)); + + int res = _pFlashIndex->load(num_threads, index_path_prefix.c_str()); + + if (res != 0) + { + return res; + } + + std::vector node_list; + diskann::cout << "Caching " << num_nodes_to_cache << " nodes around medoid(s)" << std::endl; + _pFlashIndex->cache_bfs_levels(num_nodes_to_cache, node_list); + // if (num_nodes_to_cache > 0) + // _pFlashIndex->generate_cache_list_from_sample_queries(warmup_query_file, 15, 6, num_nodes_to_cache, + // num_threads, node_list); + _pFlashIndex->load_cache_list(node_list); + node_list.clear(); + node_list.shrink_to_fit(); + + omp_set_num_threads(num_threads); + + uint64_t warmup_L = 20; + uint64_t warmup_num = 0, warmup_dim = 0, warmup_aligned_dim = 0; + T *warmup = nullptr; + + if (WARMUP) + { + if (file_exists(warmup_query_file)) + { + diskann::load_aligned_bin(warmup_query_file, warmup, warmup_num, warmup_dim, warmup_aligned_dim); + } + else + { + warmup_num = (std::min)((uint32_t)150000, (uint32_t)15000 * num_threads); + warmup_dim = query_dim; + warmup_aligned_dim = query_aligned_dim; + diskann::alloc_aligned(((void **)&warmup), warmup_num * warmup_aligned_dim * sizeof(T), 8 * sizeof(T)); + std::memset(warmup, 0, warmup_num * warmup_aligned_dim * sizeof(T)); + std::random_device rd; + std::mt19937 gen(rd()); + std::uniform_int_distribution<> dis(-128, 127); + for (uint32_t i = 0; i < warmup_num; i++) + { + for (uint32_t d = 0; d < warmup_dim; d++) + { + warmup[i * warmup_aligned_dim + d] = (T)dis(gen); + } + } + } + diskann::cout << "Warming up index... " << std::flush; + std::vector warmup_result_ids_64(warmup_num, 0); + std::vector warmup_result_dists(warmup_num, 0); + +#pragma omp parallel for schedule(dynamic, 1) + for (int64_t i = 0; i < (int64_t)warmup_num; i++) + { + _pFlashIndex->cached_beam_search(warmup + (i * warmup_aligned_dim), 1, warmup_L, + warmup_result_ids_64.data() + (i * 1), + warmup_result_dists.data() + (i * 1), 4); + } + diskann::cout << "..done" << std::endl; + } + + diskann::cout.setf(std::ios_base::fixed, std::ios_base::floatfield); + diskann::cout.precision(2); + + std::string recall_string = "Recall@" + std::to_string(recall_at); + diskann::cout << std::setw(6) << "L" << std::setw(12) << "Beamwidth" << std::setw(16) << "QPS" << std::setw(16) + << "Mean Latency" << std::setw(16) << "99.9 Latency" << std::setw(16) << "Mean IOs" << std::setw(16) + << "Mean IO (us)" << std::setw(16) << "CPU (s)"; + if (calc_recall_flag) + { + diskann::cout << std::setw(16) << recall_string << std::endl; + } + else + diskann::cout << std::endl; + diskann::cout << "==================================================================" + "=================================================================" + << std::endl; + + std::vector> query_result_ids(Lvec.size()); + std::vector> query_result_dists(Lvec.size()); + + uint32_t optimized_beamwidth = 2; + + double best_recall = 0.0; + + for (uint32_t test_id = 0; test_id < Lvec.size(); test_id++) + { + uint32_t L = Lvec[test_id]; + + if (L < recall_at) + { + diskann::cout << "Ignoring search with L:" << L << " since it's smaller than K:" << recall_at << std::endl; + continue; + } + + if (beamwidth <= 0) + { + diskann::cout << "Tuning beamwidth.." << std::endl; + optimized_beamwidth = + optimize_beamwidth(_pFlashIndex, warmup, warmup_num, warmup_aligned_dim, L, optimized_beamwidth); + } + else + optimized_beamwidth = beamwidth; + + query_result_ids[test_id].resize(recall_at * query_num); + query_result_dists[test_id].resize(recall_at * query_num); + + auto stats = new diskann::QueryStats[query_num]; + + std::vector query_result_ids_64(recall_at * query_num); + auto s = std::chrono::high_resolution_clock::now(); + +#pragma omp parallel for schedule(dynamic, 1) + for (int64_t i = 0; i < (int64_t)query_num; i++) + { + if (!filtered_search) + { + _pFlashIndex->cached_beam_search(query + (i * query_aligned_dim), recall_at, L, + query_result_ids_64.data() + (i * recall_at), + query_result_dists[test_id].data() + (i * recall_at), + optimized_beamwidth, use_reorder_data, stats + i); + } + else + { + LabelT label_for_search; + if (query_filters.size() == 1) + { // one label for all queries + label_for_search = _pFlashIndex->get_converted_label(query_filters[0]); + } + else + { // one label for each query + label_for_search = _pFlashIndex->get_converted_label(query_filters[i]); + } + _pFlashIndex->cached_beam_search( + query + (i * query_aligned_dim), recall_at, L, query_result_ids_64.data() + (i * recall_at), + query_result_dists[test_id].data() + (i * recall_at), optimized_beamwidth, true, label_for_search, + use_reorder_data, stats + i); + } + } + auto e = std::chrono::high_resolution_clock::now(); + std::chrono::duration diff = e - s; + double qps = (1.0 * query_num) / (1.0 * diff.count()); + + diskann::convert_types(query_result_ids_64.data(), query_result_ids[test_id].data(), + query_num, recall_at); + + auto mean_latency = diskann::get_mean_stats( + stats, query_num, [](const diskann::QueryStats &stats) { return stats.total_us; }); + + auto latency_999 = diskann::get_percentile_stats( + stats, query_num, 0.999, [](const diskann::QueryStats &stats) { return stats.total_us; }); + + auto mean_ios = diskann::get_mean_stats(stats, query_num, + [](const diskann::QueryStats &stats) { return stats.n_ios; }); + + auto mean_cpuus = diskann::get_mean_stats(stats, query_num, + [](const diskann::QueryStats &stats) { return stats.cpu_us; }); + + auto mean_io_us = diskann::get_mean_stats(stats, query_num, + [](const diskann::QueryStats &stats) { return stats.io_us; }); + + double recall = 0; + if (calc_recall_flag) + { + recall = diskann::calculate_recall((uint32_t)query_num, gt_ids, gt_dists, (uint32_t)gt_dim, + query_result_ids[test_id].data(), recall_at, recall_at); + best_recall = std::max(recall, best_recall); + } + + diskann::cout << std::setw(6) << L << std::setw(12) << optimized_beamwidth << std::setw(16) << qps + << std::setw(16) << mean_latency << std::setw(16) << latency_999 << std::setw(16) << mean_ios + << std::setw(16) << mean_io_us << std::setw(16) << mean_cpuus; + if (calc_recall_flag) + { + diskann::cout << std::setw(16) << recall << std::endl; + } + else + diskann::cout << std::endl; + delete[] stats; + } + + diskann::cout << "Done searching. Now saving results " << std::endl; + uint64_t test_id = 0; + for (auto L : Lvec) + { + if (L < recall_at) + continue; + + std::string cur_result_path = result_output_prefix + "_" + std::to_string(L) + "_idx_uint32.bin"; + diskann::save_bin(cur_result_path, query_result_ids[test_id].data(), query_num, recall_at); + + cur_result_path = result_output_prefix + "_" + std::to_string(L) + "_dists_float.bin"; + diskann::save_bin(cur_result_path, query_result_dists[test_id++].data(), query_num, recall_at); + } + + diskann::aligned_free(query); + if (warmup != nullptr) + diskann::aligned_free(warmup); + return best_recall >= fail_if_recall_below ? 0 : -1; +} + +int main(int argc, char **argv) +{ + std::string data_type, dist_fn, index_path_prefix, result_path_prefix, query_file, gt_file, filter_label, + label_type, query_filters_file; + uint32_t num_threads, K, W, num_nodes_to_cache, search_io_limit; + std::vector Lvec; + bool use_reorder_data = false; + float fail_if_recall_below = 0.0f; + + po::options_description desc{ + program_options_utils::make_program_description("search_disk_index", "Searches on-disk DiskANN indexes")}; + try + { + desc.add_options()("help,h", "Print information on arguments"); + + // Required parameters + po::options_description required_configs("Required"); + required_configs.add_options()("data_type", po::value(&data_type)->required(), + program_options_utils::DATA_TYPE_DESCRIPTION); + required_configs.add_options()("dist_fn", po::value(&dist_fn)->required(), + program_options_utils::DISTANCE_FUNCTION_DESCRIPTION); + required_configs.add_options()("index_path_prefix", po::value(&index_path_prefix)->required(), + program_options_utils::INDEX_PATH_PREFIX_DESCRIPTION); + required_configs.add_options()("result_path", po::value(&result_path_prefix)->required(), + program_options_utils::RESULT_PATH_DESCRIPTION); + required_configs.add_options()("query_file", po::value(&query_file)->required(), + program_options_utils::QUERY_FILE_DESCRIPTION); + required_configs.add_options()("recall_at,K", po::value(&K)->required(), + program_options_utils::NUMBER_OF_RESULTS_DESCRIPTION); + required_configs.add_options()("search_list,L", + po::value>(&Lvec)->multitoken()->required(), + program_options_utils::SEARCH_LIST_DESCRIPTION); + + // Optional parameters + po::options_description optional_configs("Optional"); + optional_configs.add_options()("gt_file", po::value(>_file)->default_value(std::string("null")), + program_options_utils::GROUND_TRUTH_FILE_DESCRIPTION); + optional_configs.add_options()("beamwidth,W", po::value(&W)->default_value(2), + program_options_utils::BEAMWIDTH); + optional_configs.add_options()("num_nodes_to_cache", po::value(&num_nodes_to_cache)->default_value(0), + program_options_utils::NUMBER_OF_NODES_TO_CACHE); + optional_configs.add_options()( + "search_io_limit", + po::value(&search_io_limit)->default_value(std::numeric_limits::max()), + "Max #IOs for search. Default value: uint32::max()"); + optional_configs.add_options()("num_threads,T", + po::value(&num_threads)->default_value(omp_get_num_procs()), + program_options_utils::NUMBER_THREADS_DESCRIPTION); + optional_configs.add_options()("use_reorder_data", po::bool_switch()->default_value(false), + "Include full precision data in the index. Use only in " + "conjuction with compressed data on SSD. Default value: false"); + optional_configs.add_options()("filter_label", + po::value(&filter_label)->default_value(std::string("")), + program_options_utils::FILTER_LABEL_DESCRIPTION); + optional_configs.add_options()("query_filters_file", + po::value(&query_filters_file)->default_value(std::string("")), + program_options_utils::FILTERS_FILE_DESCRIPTION); + optional_configs.add_options()("label_type", po::value(&label_type)->default_value("uint"), + program_options_utils::LABEL_TYPE_DESCRIPTION); + optional_configs.add_options()("fail_if_recall_below", + po::value(&fail_if_recall_below)->default_value(0.0f), + program_options_utils::FAIL_IF_RECALL_BELOW); + + // Merge required and optional parameters + desc.add(required_configs).add(optional_configs); + + po::variables_map vm; + po::store(po::parse_command_line(argc, argv, desc), vm); + if (vm.count("help")) + { + std::cout << desc; + return 0; + } + po::notify(vm); + if (vm["use_reorder_data"].as()) + use_reorder_data = true; + } + catch (const std::exception &ex) + { + std::cerr << ex.what() << '\n'; + return -1; + } + + diskann::Metric metric; + if (dist_fn == std::string("mips")) + { + metric = diskann::Metric::INNER_PRODUCT; + } + else if (dist_fn == std::string("l2")) + { + metric = diskann::Metric::L2; + } + else if (dist_fn == std::string("cosine")) + { + metric = diskann::Metric::COSINE; + } + else + { + std::cout << "Unsupported distance function. Currently only L2/ Inner " + "Product/Cosine are supported." + << std::endl; + return -1; + } + + if ((data_type != std::string("float")) && (metric == diskann::Metric::INNER_PRODUCT)) + { + std::cout << "Currently support only floating point data for Inner Product." << std::endl; + return -1; + } + + if (use_reorder_data && data_type != std::string("float")) + { + std::cout << "Error: Reorder data for reordering currently only " + "supported for float data type." + << std::endl; + return -1; + } + + if (filter_label != "" && query_filters_file != "") + { + std::cerr << "Only one of filter_label and query_filters_file should be provided" << std::endl; + return -1; + } + + std::vector query_filters; + if (filter_label != "") + { + query_filters.push_back(filter_label); + } + else if (query_filters_file != "") + { + query_filters = read_file_to_vector_of_strings(query_filters_file); + } + + try + { + if (!query_filters.empty() && label_type == "ushort") + { + if (data_type == std::string("float")) + return search_disk_index( + metric, index_path_prefix, result_path_prefix, query_file, gt_file, num_threads, K, W, + num_nodes_to_cache, search_io_limit, Lvec, fail_if_recall_below, query_filters, use_reorder_data); + else if (data_type == std::string("int8")) + return search_disk_index( + metric, index_path_prefix, result_path_prefix, query_file, gt_file, num_threads, K, W, + num_nodes_to_cache, search_io_limit, Lvec, fail_if_recall_below, query_filters, use_reorder_data); + else if (data_type == std::string("uint8")) + return search_disk_index( + metric, index_path_prefix, result_path_prefix, query_file, gt_file, num_threads, K, W, + num_nodes_to_cache, search_io_limit, Lvec, fail_if_recall_below, query_filters, use_reorder_data); + else + { + std::cerr << "Unsupported data type. Use float or int8 or uint8" << std::endl; + return -1; + } + } + else + { + if (data_type == std::string("float")) + return search_disk_index(metric, index_path_prefix, result_path_prefix, query_file, gt_file, + num_threads, K, W, num_nodes_to_cache, search_io_limit, Lvec, + fail_if_recall_below, query_filters, use_reorder_data); + else if (data_type == std::string("int8")) + return search_disk_index(metric, index_path_prefix, result_path_prefix, query_file, gt_file, + num_threads, K, W, num_nodes_to_cache, search_io_limit, Lvec, + fail_if_recall_below, query_filters, use_reorder_data); + else if (data_type == std::string("uint8")) + return search_disk_index(metric, index_path_prefix, result_path_prefix, query_file, gt_file, + num_threads, K, W, num_nodes_to_cache, search_io_limit, Lvec, + fail_if_recall_below, query_filters, use_reorder_data); + else + { + std::cerr << "Unsupported data type. Use float or int8 or uint8" << std::endl; + return -1; + } + } + } + catch (const std::exception &e) + { + std::cout << std::string(e.what()) << std::endl; + diskann::cerr << "Index search failed." << std::endl; + return -1; + } +} \ No newline at end of file diff --git a/packages/leann-backend-diskann/third_party/DiskANN/apps/search_memory_index.cpp b/packages/leann-backend-diskann/third_party/DiskANN/apps/search_memory_index.cpp new file mode 100644 index 0000000..1a9acc2 --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/apps/search_memory_index.cpp @@ -0,0 +1,477 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT license. + +#include +#include +#include +#include +#include +#include +#include +#include + +#ifndef _WINDOWS +#include +#include +#include +#include +#endif + +#include "index.h" +#include "memory_mapper.h" +#include "utils.h" +#include "program_options_utils.hpp" +#include "index_factory.h" + +namespace po = boost::program_options; + +template +int search_memory_index(diskann::Metric &metric, const std::string &index_path, const std::string &result_path_prefix, + const std::string &query_file, const std::string &truthset_file, const uint32_t num_threads, + const uint32_t recall_at, const bool print_all_recalls, const std::vector &Lvec, + const bool dynamic, const bool tags, const bool show_qps_per_thread, + const std::vector &query_filters, const float fail_if_recall_below) +{ + using TagT = uint32_t; + // Load the query file + T *query = nullptr; + uint32_t *gt_ids = nullptr; + float *gt_dists = nullptr; + size_t query_num, query_dim, query_aligned_dim, gt_num, gt_dim; + diskann::load_aligned_bin(query_file, query, query_num, query_dim, query_aligned_dim); + + bool calc_recall_flag = false; + if (truthset_file != std::string("null") && file_exists(truthset_file)) + { + diskann::load_truthset(truthset_file, gt_ids, gt_dists, gt_num, gt_dim); + if (gt_num != query_num) + { + std::cout << "Error. Mismatch in number of queries and ground truth data" << std::endl; + } + calc_recall_flag = true; + } + else + { + diskann::cout << " Truthset file " << truthset_file << " not found. Not computing recall." << std::endl; + } + + bool filtered_search = false; + if (!query_filters.empty()) + { + filtered_search = true; + if (query_filters.size() != 1 && query_filters.size() != query_num) + { + std::cout << "Error. Mismatch in number of queries and size of query " + "filters file" + << std::endl; + return -1; // To return -1 or some other error handling? + } + } + + const size_t num_frozen_pts = diskann::get_graph_num_frozen_points(index_path); + + auto config = diskann::IndexConfigBuilder() + .with_metric(metric) + .with_dimension(query_dim) + .with_max_points(0) + .with_data_load_store_strategy(diskann::DataStoreStrategy::MEMORY) + .with_graph_load_store_strategy(diskann::GraphStoreStrategy::MEMORY) + .with_data_type(diskann_type_to_name()) + .with_label_type(diskann_type_to_name()) + .with_tag_type(diskann_type_to_name()) + .is_dynamic_index(dynamic) + .is_enable_tags(tags) + .is_concurrent_consolidate(false) + .is_pq_dist_build(false) + .is_use_opq(false) + .with_num_pq_chunks(0) + .with_num_frozen_pts(num_frozen_pts) + .build(); + + auto index_factory = diskann::IndexFactory(config); + auto index = index_factory.create_instance(); + index->load(index_path.c_str(), num_threads, *(std::max_element(Lvec.begin(), Lvec.end()))); + std::cout << "Index loaded" << std::endl; + + if (metric == diskann::FAST_L2) + index->optimize_index_layout(); + + std::cout << "Using " << num_threads << " threads to search" << std::endl; + std::cout.setf(std::ios_base::fixed, std::ios_base::floatfield); + std::cout.precision(2); + const std::string qps_title = show_qps_per_thread ? "QPS/thread" : "QPS"; + uint32_t table_width = 0; + if (tags) + { + std::cout << std::setw(4) << "Ls" << std::setw(12) << qps_title << std::setw(20) << "Mean Latency (mus)" + << std::setw(15) << "99.9 Latency"; + table_width += 4 + 12 + 20 + 15; + } + else + { + std::cout << std::setw(4) << "Ls" << std::setw(12) << qps_title << std::setw(18) << "Avg dist cmps" + << std::setw(20) << "Mean Latency (mus)" << std::setw(15) << "99.9 Latency"; + table_width += 4 + 12 + 18 + 20 + 15; + } + uint32_t recalls_to_print = 0; + const uint32_t first_recall = print_all_recalls ? 1 : recall_at; + if (calc_recall_flag) + { + for (uint32_t curr_recall = first_recall; curr_recall <= recall_at; curr_recall++) + { + std::cout << std::setw(12) << ("Recall@" + std::to_string(curr_recall)); + } + recalls_to_print = recall_at + 1 - first_recall; + table_width += recalls_to_print * 12; + } + std::cout << std::endl; + std::cout << std::string(table_width, '=') << std::endl; + + std::vector> query_result_ids(Lvec.size()); + std::vector> query_result_dists(Lvec.size()); + std::vector latency_stats(query_num, 0); + std::vector cmp_stats; + if (not tags || filtered_search) + { + cmp_stats = std::vector(query_num, 0); + } + + std::vector query_result_tags; + if (tags) + { + query_result_tags.resize(recall_at * query_num); + } + + double best_recall = 0.0; + + for (uint32_t test_id = 0; test_id < Lvec.size(); test_id++) + { + uint32_t L = Lvec[test_id]; + if (L < recall_at) + { + diskann::cout << "Ignoring search with L:" << L << " since it's smaller than K:" << recall_at << std::endl; + continue; + } + + query_result_ids[test_id].resize(recall_at * query_num); + query_result_dists[test_id].resize(recall_at * query_num); + std::vector res = std::vector(); + + auto s = std::chrono::high_resolution_clock::now(); + omp_set_num_threads(num_threads); +#pragma omp parallel for schedule(dynamic, 1) + for (int64_t i = 0; i < (int64_t)query_num; i++) + { + auto qs = std::chrono::high_resolution_clock::now(); + if (filtered_search && !tags) + { + std::string raw_filter = query_filters.size() == 1 ? query_filters[0] : query_filters[i]; + + auto retval = index->search_with_filters(query + i * query_aligned_dim, raw_filter, recall_at, L, + query_result_ids[test_id].data() + i * recall_at, + query_result_dists[test_id].data() + i * recall_at); + cmp_stats[i] = retval.second; + } + else if (metric == diskann::FAST_L2) + { + index->search_with_optimized_layout(query + i * query_aligned_dim, recall_at, L, + query_result_ids[test_id].data() + i * recall_at); + } + else if (tags) + { + if (!filtered_search) + { + index->search_with_tags(query + i * query_aligned_dim, recall_at, L, + query_result_tags.data() + i * recall_at, nullptr, res); + } + else + { + std::string raw_filter = query_filters.size() == 1 ? query_filters[0] : query_filters[i]; + + index->search_with_tags(query + i * query_aligned_dim, recall_at, L, + query_result_tags.data() + i * recall_at, nullptr, res, true, raw_filter); + } + + for (int64_t r = 0; r < (int64_t)recall_at; r++) + { + query_result_ids[test_id][recall_at * i + r] = query_result_tags[recall_at * i + r]; + } + } + else + { + cmp_stats[i] = index + ->search(query + i * query_aligned_dim, recall_at, L, + query_result_ids[test_id].data() + i * recall_at) + .second; + } + auto qe = std::chrono::high_resolution_clock::now(); + std::chrono::duration diff = qe - qs; + latency_stats[i] = (float)(diff.count() * 1000000); + } + std::chrono::duration diff = std::chrono::high_resolution_clock::now() - s; + + double displayed_qps = query_num / diff.count(); + + if (show_qps_per_thread) + displayed_qps /= num_threads; + + std::vector recalls; + if (calc_recall_flag) + { + recalls.reserve(recalls_to_print); + for (uint32_t curr_recall = first_recall; curr_recall <= recall_at; curr_recall++) + { + recalls.push_back(diskann::calculate_recall((uint32_t)query_num, gt_ids, gt_dists, (uint32_t)gt_dim, + query_result_ids[test_id].data(), recall_at, curr_recall)); + } + } + + std::sort(latency_stats.begin(), latency_stats.end()); + double mean_latency = + std::accumulate(latency_stats.begin(), latency_stats.end(), 0.0) / static_cast(query_num); + + float avg_cmps = (float)std::accumulate(cmp_stats.begin(), cmp_stats.end(), 0) / (float)query_num; + + if (tags && !filtered_search) + { + std::cout << std::setw(4) << L << std::setw(12) << displayed_qps << std::setw(20) << (float)mean_latency + << std::setw(15) << (float)latency_stats[(uint64_t)(0.999 * query_num)]; + } + else + { + std::cout << std::setw(4) << L << std::setw(12) << displayed_qps << std::setw(18) << avg_cmps + << std::setw(20) << (float)mean_latency << std::setw(15) + << (float)latency_stats[(uint64_t)(0.999 * query_num)]; + } + for (double recall : recalls) + { + std::cout << std::setw(12) << recall; + best_recall = std::max(recall, best_recall); + } + std::cout << std::endl; + } + + std::cout << "Done searching. Now saving results " << std::endl; + uint64_t test_id = 0; + for (auto L : Lvec) + { + if (L < recall_at) + { + diskann::cout << "Ignoring search with L:" << L << " since it's smaller than K:" << recall_at << std::endl; + continue; + } + std::string cur_result_path_prefix = result_path_prefix + "_" + std::to_string(L); + + std::string cur_result_path = cur_result_path_prefix + "_idx_uint32.bin"; + diskann::save_bin(cur_result_path, query_result_ids[test_id].data(), query_num, recall_at); + + cur_result_path = cur_result_path_prefix + "_dists_float.bin"; + diskann::save_bin(cur_result_path, query_result_dists[test_id].data(), query_num, recall_at); + + test_id++; + } + + diskann::aligned_free(query); + return best_recall >= fail_if_recall_below ? 0 : -1; +} + +int main(int argc, char **argv) +{ + std::string data_type, dist_fn, index_path_prefix, result_path, query_file, gt_file, filter_label, label_type, + query_filters_file; + uint32_t num_threads, K; + std::vector Lvec; + bool print_all_recalls, dynamic, tags, show_qps_per_thread; + float fail_if_recall_below = 0.0f; + + po::options_description desc{ + program_options_utils::make_program_description("search_memory_index", "Searches in-memory DiskANN indexes")}; + try + { + desc.add_options()("help,h", "Print this information on arguments"); + + // Required parameters + po::options_description required_configs("Required"); + required_configs.add_options()("data_type", po::value(&data_type)->required(), + program_options_utils::DATA_TYPE_DESCRIPTION); + required_configs.add_options()("dist_fn", po::value(&dist_fn)->required(), + program_options_utils::DISTANCE_FUNCTION_DESCRIPTION); + required_configs.add_options()("index_path_prefix", po::value(&index_path_prefix)->required(), + program_options_utils::INDEX_PATH_PREFIX_DESCRIPTION); + required_configs.add_options()("result_path", po::value(&result_path)->required(), + program_options_utils::RESULT_PATH_DESCRIPTION); + required_configs.add_options()("query_file", po::value(&query_file)->required(), + program_options_utils::QUERY_FILE_DESCRIPTION); + required_configs.add_options()("recall_at,K", po::value(&K)->required(), + program_options_utils::NUMBER_OF_RESULTS_DESCRIPTION); + required_configs.add_options()("search_list,L", + po::value>(&Lvec)->multitoken()->required(), + program_options_utils::SEARCH_LIST_DESCRIPTION); + + // Optional parameters + po::options_description optional_configs("Optional"); + optional_configs.add_options()("filter_label", + po::value(&filter_label)->default_value(std::string("")), + program_options_utils::FILTER_LABEL_DESCRIPTION); + optional_configs.add_options()("query_filters_file", + po::value(&query_filters_file)->default_value(std::string("")), + program_options_utils::FILTERS_FILE_DESCRIPTION); + optional_configs.add_options()("label_type", po::value(&label_type)->default_value("uint"), + program_options_utils::LABEL_TYPE_DESCRIPTION); + optional_configs.add_options()("gt_file", po::value(>_file)->default_value(std::string("null")), + program_options_utils::GROUND_TRUTH_FILE_DESCRIPTION); + optional_configs.add_options()("num_threads,T", + po::value(&num_threads)->default_value(omp_get_num_procs()), + program_options_utils::NUMBER_THREADS_DESCRIPTION); + optional_configs.add_options()( + "dynamic", po::value(&dynamic)->default_value(false), + "Whether the index is dynamic. Dynamic indices must have associated tags. Default false."); + optional_configs.add_options()("tags", po::value(&tags)->default_value(false), + "Whether to search with external identifiers (tags). Default false."); + optional_configs.add_options()("fail_if_recall_below", + po::value(&fail_if_recall_below)->default_value(0.0f), + program_options_utils::FAIL_IF_RECALL_BELOW); + + // Output controls + po::options_description output_controls("Output controls"); + output_controls.add_options()("print_all_recalls", po::bool_switch(&print_all_recalls), + "Print recalls at all positions, from 1 up to specified " + "recall_at value"); + output_controls.add_options()("print_qps_per_thread", po::bool_switch(&show_qps_per_thread), + "Print overall QPS divided by the number of threads in " + "the output table"); + + // Merge required and optional parameters + desc.add(required_configs).add(optional_configs).add(output_controls); + + po::variables_map vm; + po::store(po::parse_command_line(argc, argv, desc), vm); + if (vm.count("help")) + { + std::cout << desc; + return 0; + } + po::notify(vm); + } + catch (const std::exception &ex) + { + std::cerr << ex.what() << '\n'; + return -1; + } + + diskann::Metric metric; + if ((dist_fn == std::string("mips")) && (data_type == std::string("float"))) + { + metric = diskann::Metric::INNER_PRODUCT; + } + else if (dist_fn == std::string("l2")) + { + metric = diskann::Metric::L2; + } + else if (dist_fn == std::string("cosine")) + { + metric = diskann::Metric::COSINE; + } + else if ((dist_fn == std::string("fast_l2")) && (data_type == std::string("float"))) + { + metric = diskann::Metric::FAST_L2; + } + else + { + std::cout << "Unsupported distance function. Currently only l2/ cosine are " + "supported in general, and mips/fast_l2 only for floating " + "point data." + << std::endl; + return -1; + } + + if (dynamic && not tags) + { + std::cerr << "Tags must be enabled while searching dynamically built indices" << std::endl; + return -1; + } + + if (fail_if_recall_below < 0.0 || fail_if_recall_below >= 100.0) + { + std::cerr << "fail_if_recall_below parameter must be between 0 and 100%" << std::endl; + return -1; + } + + if (filter_label != "" && query_filters_file != "") + { + std::cerr << "Only one of filter_label and query_filters_file should be provided" << std::endl; + return -1; + } + + std::vector query_filters; + if (filter_label != "") + { + query_filters.push_back(filter_label); + } + else if (query_filters_file != "") + { + query_filters = read_file_to_vector_of_strings(query_filters_file); + } + + try + { + if (!query_filters.empty() && label_type == "ushort") + { + if (data_type == std::string("int8")) + { + return search_memory_index( + metric, index_path_prefix, result_path, query_file, gt_file, num_threads, K, print_all_recalls, + Lvec, dynamic, tags, show_qps_per_thread, query_filters, fail_if_recall_below); + } + else if (data_type == std::string("uint8")) + { + return search_memory_index( + metric, index_path_prefix, result_path, query_file, gt_file, num_threads, K, print_all_recalls, + Lvec, dynamic, tags, show_qps_per_thread, query_filters, fail_if_recall_below); + } + else if (data_type == std::string("float")) + { + return search_memory_index(metric, index_path_prefix, result_path, query_file, gt_file, + num_threads, K, print_all_recalls, Lvec, dynamic, tags, + show_qps_per_thread, query_filters, fail_if_recall_below); + } + else + { + std::cout << "Unsupported type. Use float/int8/uint8" << std::endl; + return -1; + } + } + else + { + if (data_type == std::string("int8")) + { + return search_memory_index(metric, index_path_prefix, result_path, query_file, gt_file, + num_threads, K, print_all_recalls, Lvec, dynamic, tags, + show_qps_per_thread, query_filters, fail_if_recall_below); + } + else if (data_type == std::string("uint8")) + { + return search_memory_index(metric, index_path_prefix, result_path, query_file, gt_file, + num_threads, K, print_all_recalls, Lvec, dynamic, tags, + show_qps_per_thread, query_filters, fail_if_recall_below); + } + else if (data_type == std::string("float")) + { + return search_memory_index(metric, index_path_prefix, result_path, query_file, gt_file, + num_threads, K, print_all_recalls, Lvec, dynamic, tags, + show_qps_per_thread, query_filters, fail_if_recall_below); + } + else + { + std::cout << "Unsupported type. Use float/int8/uint8" << std::endl; + return -1; + } + } + } + catch (std::exception &e) + { + std::cout << std::string(e.what()) << std::endl; + diskann::cerr << "Index search failed." << std::endl; + return -1; + } +} diff --git a/packages/leann-backend-diskann/third_party/DiskANN/apps/test_insert_deletes_consolidate.cpp b/packages/leann-backend-diskann/third_party/DiskANN/apps/test_insert_deletes_consolidate.cpp new file mode 100644 index 0000000..97aed18 --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/apps/test_insert_deletes_consolidate.cpp @@ -0,0 +1,536 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT license. + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "utils.h" +#include "filter_utils.h" +#include "program_options_utils.hpp" +#include "index_factory.h" + +#ifndef _WINDOWS +#include +#include +#include +#endif + +#include "memory_mapper.h" + +namespace po = boost::program_options; + +// load_aligned_bin modified to read pieces of the file, but using ifstream +// instead of cached_ifstream. +template +inline void load_aligned_bin_part(const std::string &bin_file, T *data, size_t offset_points, size_t points_to_read) +{ + diskann::Timer timer; + std::ifstream reader; + reader.exceptions(std::ios::failbit | std::ios::badbit); + reader.open(bin_file, std::ios::binary | std::ios::ate); + size_t actual_file_size = reader.tellg(); + reader.seekg(0, std::ios::beg); + + int npts_i32, dim_i32; + reader.read((char *)&npts_i32, sizeof(int)); + reader.read((char *)&dim_i32, sizeof(int)); + size_t npts = (uint32_t)npts_i32; + size_t dim = (uint32_t)dim_i32; + + size_t expected_actual_file_size = npts * dim * sizeof(T) + 2 * sizeof(uint32_t); + if (actual_file_size != expected_actual_file_size) + { + std::stringstream stream; + stream << "Error. File size mismatch. Actual size is " << actual_file_size << " while expected size is " + << expected_actual_file_size << " npts = " << npts << " dim = " << dim << " size of = " << sizeof(T) + << std::endl; + std::cout << stream.str(); + throw diskann::ANNException(stream.str(), -1, __FUNCSIG__, __FILE__, __LINE__); + } + + if (offset_points + points_to_read > npts) + { + std::stringstream stream; + stream << "Error. Not enough points in file. Requested " << offset_points << " offset and " << points_to_read + << " points, but have only " << npts << " points" << std::endl; + std::cout << stream.str(); + throw diskann::ANNException(stream.str(), -1, __FUNCSIG__, __FILE__, __LINE__); + } + + reader.seekg(2 * sizeof(uint32_t) + offset_points * dim * sizeof(T)); + + const size_t rounded_dim = ROUND_UP(dim, 8); + + for (size_t i = 0; i < points_to_read; i++) + { + reader.read((char *)(data + i * rounded_dim), dim * sizeof(T)); + memset(data + i * rounded_dim + dim, 0, (rounded_dim - dim) * sizeof(T)); + } + reader.close(); + + const double elapsedSeconds = timer.elapsed() / 1000000.0; + std::cout << "Read " << points_to_read << " points using non-cached reads in " << elapsedSeconds << std::endl; +} + +std::string get_save_filename(const std::string &save_path, size_t points_to_skip, size_t points_deleted, + size_t last_point_threshold) +{ + std::string final_path = save_path; + if (points_to_skip > 0) + { + final_path += "skip" + std::to_string(points_to_skip) + "-"; + } + + final_path += "del" + std::to_string(points_deleted) + "-"; + final_path += std::to_string(last_point_threshold); + return final_path; +} + +template +void insert_till_next_checkpoint(diskann::AbstractIndex &index, size_t start, size_t end, int32_t thread_count, T *data, + size_t aligned_dim, std::vector> &location_to_labels) +{ + diskann::Timer insert_timer; +#pragma omp parallel for num_threads(thread_count) schedule(dynamic) + for (int64_t j = start; j < (int64_t)end; j++) + { + if (!location_to_labels.empty()) + { + index.insert_point(&data[(j - start) * aligned_dim], 1 + static_cast(j), + location_to_labels[j - start]); + } + else + { + index.insert_point(&data[(j - start) * aligned_dim], 1 + static_cast(j)); + } + } + const double elapsedSeconds = insert_timer.elapsed() / 1000000.0; + std::cout << "Insertion time " << elapsedSeconds << " seconds (" << (end - start) / elapsedSeconds + << " points/second overall, " << (end - start) / elapsedSeconds / thread_count << " per thread)\n "; +} + +template +void delete_from_beginning(diskann::AbstractIndex &index, diskann::IndexWriteParameters &delete_params, + size_t points_to_skip, size_t points_to_delete_from_beginning) +{ + try + { + std::cout << std::endl + << "Lazy deleting points " << points_to_skip << " to " + << points_to_skip + points_to_delete_from_beginning << "... "; + for (size_t i = points_to_skip; i < points_to_skip + points_to_delete_from_beginning; ++i) + index.lazy_delete(static_cast(i + 1)); // Since tags are data location + 1 + std::cout << "done." << std::endl; + + auto report = index.consolidate_deletes(delete_params); + std::cout << "#active points: " << report._active_points << std::endl + << "max points: " << report._max_points << std::endl + << "empty slots: " << report._empty_slots << std::endl + << "deletes processed: " << report._slots_released << std::endl + << "latest delete size: " << report._delete_set_size << std::endl + << "rate: (" << points_to_delete_from_beginning / report._time << " points/second overall, " + << points_to_delete_from_beginning / report._time / delete_params.num_threads << " per thread)" + << std::endl; + } + catch (std::system_error &e) + { + std::cout << "Exception caught in deletion thread: " << e.what() << std::endl; + } +} + +template +void build_incremental_index(const std::string &data_path, diskann::IndexWriteParameters ¶ms, size_t points_to_skip, + size_t max_points_to_insert, size_t beginning_index_size, float start_point_norm, + uint32_t num_start_pts, size_t points_per_checkpoint, size_t checkpoints_per_snapshot, + const std::string &save_path, size_t points_to_delete_from_beginning, + size_t start_deletes_after, bool concurrent, const std::string &label_file, + const std::string &universal_label) +{ + size_t dim, aligned_dim; + size_t num_points; + diskann::get_bin_metadata(data_path, num_points, dim); + aligned_dim = ROUND_UP(dim, 8); + bool has_labels = label_file != ""; + using TagT = uint32_t; + using LabelT = uint32_t; + + size_t current_point_offset = points_to_skip; + const size_t last_point_threshold = points_to_skip + max_points_to_insert; + + bool enable_tags = true; + using TagT = uint32_t; + auto index_search_params = diskann::IndexSearchParams(params.search_list_size, params.num_threads); + diskann::IndexConfig index_config = diskann::IndexConfigBuilder() + .with_metric(diskann::L2) + .with_dimension(dim) + .with_max_points(max_points_to_insert) + .is_dynamic_index(true) + .with_index_write_params(params) + .with_index_search_params(index_search_params) + .with_data_type(diskann_type_to_name()) + .with_tag_type(diskann_type_to_name()) + .with_label_type(diskann_type_to_name()) + .with_data_load_store_strategy(diskann::DataStoreStrategy::MEMORY) + .with_graph_load_store_strategy(diskann::GraphStoreStrategy::MEMORY) + .is_enable_tags(enable_tags) + .is_filtered(has_labels) + .with_num_frozen_pts(num_start_pts) + .is_concurrent_consolidate(concurrent) + .build(); + + diskann::IndexFactory index_factory = diskann::IndexFactory(index_config); + auto index = index_factory.create_instance(); + + if (universal_label != "") + { + LabelT u_label = 0; + index->set_universal_label(u_label); + } + + if (points_to_skip > num_points) + { + throw diskann::ANNException("Asked to skip more points than in data file", -1, __FUNCSIG__, __FILE__, __LINE__); + } + + if (max_points_to_insert == 0) + { + max_points_to_insert = num_points; + } + + if (points_to_skip + max_points_to_insert > num_points) + { + max_points_to_insert = num_points - points_to_skip; + std::cerr << "WARNING: Reducing max_points_to_insert to " << max_points_to_insert + << " points since the data file has only that many" << std::endl; + } + + if (beginning_index_size > max_points_to_insert) + { + beginning_index_size = max_points_to_insert; + std::cerr << "WARNING: Reducing beginning index size to " << beginning_index_size + << " points since the data file has only that many" << std::endl; + } + if (checkpoints_per_snapshot > 0 && beginning_index_size > points_per_checkpoint) + { + beginning_index_size = points_per_checkpoint; + std::cerr << "WARNING: Reducing beginning index size to " << beginning_index_size << std::endl; + } + + T *data = nullptr; + diskann::alloc_aligned( + (void **)&data, std::max(points_per_checkpoint, beginning_index_size) * aligned_dim * sizeof(T), 8 * sizeof(T)); + + std::vector tags(beginning_index_size); + std::iota(tags.begin(), tags.end(), 1 + static_cast(current_point_offset)); + + load_aligned_bin_part(data_path, data, current_point_offset, beginning_index_size); + std::cout << "load aligned bin succeeded" << std::endl; + diskann::Timer timer; + + if (beginning_index_size > 0) + { + index->build(data, beginning_index_size, tags); + } + else + { + index->set_start_points_at_random(static_cast(start_point_norm)); + } + + const double elapsedSeconds = timer.elapsed() / 1000000.0; + std::cout << "Initial non-incremental index build time for " << beginning_index_size << " points took " + << elapsedSeconds << " seconds (" << beginning_index_size / elapsedSeconds << " points/second)\n "; + + current_point_offset += beginning_index_size; + + if (points_to_delete_from_beginning > max_points_to_insert) + { + points_to_delete_from_beginning = static_cast(max_points_to_insert); + std::cerr << "WARNING: Reducing points to delete from beginning to " << points_to_delete_from_beginning + << " points since the data file has only that many" << std::endl; + } + + std::vector> location_to_labels; + if (concurrent) + { + // handle labels + const auto save_path_inc = get_save_filename(save_path + ".after-concurrent-delete-", points_to_skip, + points_to_delete_from_beginning, last_point_threshold); + std::string labels_file_to_use = save_path_inc + "_label_formatted.txt"; + std::string mem_labels_int_map_file = save_path_inc + "_labels_map.txt"; + if (has_labels) + { + convert_labels_string_to_int(label_file, labels_file_to_use, mem_labels_int_map_file, universal_label); + auto parse_result = diskann::parse_formatted_label_file(labels_file_to_use); + location_to_labels = std::get<0>(parse_result); + } + + int32_t sub_threads = (params.num_threads + 1) / 2; + bool delete_launched = false; + std::future delete_task; + + diskann::Timer timer; + + for (size_t start = current_point_offset; start < last_point_threshold; + start += points_per_checkpoint, current_point_offset += points_per_checkpoint) + { + const size_t end = std::min(start + points_per_checkpoint, last_point_threshold); + std::cout << std::endl << "Inserting from " << start << " to " << end << std::endl; + + auto insert_task = std::async(std::launch::async, [&]() { + load_aligned_bin_part(data_path, data, start, end - start); + insert_till_next_checkpoint(*index, start, end, sub_threads, data, aligned_dim, + location_to_labels); + }); + insert_task.wait(); + + if (!delete_launched && end >= start_deletes_after && + end >= points_to_skip + points_to_delete_from_beginning) + { + delete_launched = true; + diskann::IndexWriteParameters delete_params = + diskann::IndexWriteParametersBuilder(params).with_num_threads(sub_threads).build(); + + delete_task = std::async(std::launch::async, [&]() { + delete_from_beginning(*index, delete_params, points_to_skip, + points_to_delete_from_beginning); + }); + } + } + delete_task.wait(); + + std::cout << "Time Elapsed " << timer.elapsed() / 1000 << "ms\n"; + index->save(save_path_inc.c_str(), true); + } + else + { + const auto save_path_inc = get_save_filename(save_path + ".after-delete-", points_to_skip, + points_to_delete_from_beginning, last_point_threshold); + std::string labels_file_to_use = save_path_inc + "_label_formatted.txt"; + std::string mem_labels_int_map_file = save_path_inc + "_labels_map.txt"; + if (has_labels) + { + convert_labels_string_to_int(label_file, labels_file_to_use, mem_labels_int_map_file, universal_label); + auto parse_result = diskann::parse_formatted_label_file(labels_file_to_use); + location_to_labels = std::get<0>(parse_result); + } + + size_t last_snapshot_points_threshold = 0; + size_t num_checkpoints_till_snapshot = checkpoints_per_snapshot; + + for (size_t start = current_point_offset; start < last_point_threshold; + start += points_per_checkpoint, current_point_offset += points_per_checkpoint) + { + const size_t end = std::min(start + points_per_checkpoint, last_point_threshold); + std::cout << std::endl << "Inserting from " << start << " to " << end << std::endl; + + load_aligned_bin_part(data_path, data, start, end - start); + insert_till_next_checkpoint(*index, start, end, (int32_t)params.num_threads, data, + aligned_dim, location_to_labels); + + if (checkpoints_per_snapshot > 0 && --num_checkpoints_till_snapshot == 0) + { + diskann::Timer save_timer; + + const auto save_path_inc = + get_save_filename(save_path + ".inc-", points_to_skip, points_to_delete_from_beginning, end); + index->save(save_path_inc.c_str(), false); + const double elapsedSeconds = save_timer.elapsed() / 1000000.0; + const size_t points_saved = end - points_to_skip; + + std::cout << "Saved " << points_saved << " points in " << elapsedSeconds << " seconds (" + << points_saved / elapsedSeconds << " points/second)\n"; + + num_checkpoints_till_snapshot = checkpoints_per_snapshot; + last_snapshot_points_threshold = end; + } + + std::cout << "Number of points in the index post insertion " << end << std::endl; + } + + if (checkpoints_per_snapshot > 0 && last_snapshot_points_threshold != last_point_threshold) + { + const auto save_path_inc = get_save_filename(save_path + ".inc-", points_to_skip, + points_to_delete_from_beginning, last_point_threshold); + // index.save(save_path_inc.c_str(), false); + } + + if (points_to_delete_from_beginning > 0) + { + delete_from_beginning(*index, params, points_to_skip, points_to_delete_from_beginning); + } + + index->save(save_path_inc.c_str(), true); + } + + diskann::aligned_free(data); +} + +int main(int argc, char **argv) +{ + std::string data_type, dist_fn, data_path, index_path_prefix; + uint32_t num_threads, R, L, num_start_pts; + float alpha, start_point_norm; + size_t points_to_skip, max_points_to_insert, beginning_index_size, points_per_checkpoint, checkpoints_per_snapshot, + points_to_delete_from_beginning, start_deletes_after; + bool concurrent; + + // label options + std::string label_file, label_type, universal_label; + std::uint32_t Lf, unique_labels_supported; + + po::options_description desc{program_options_utils::make_program_description("test_insert_deletes_consolidate", + "Test insert deletes & consolidate")}; + try + { + desc.add_options()("help,h", "Print information on arguments"); + + // Required parameters + po::options_description required_configs("Required"); + required_configs.add_options()("data_type", po::value(&data_type)->required(), + program_options_utils::DATA_TYPE_DESCRIPTION); + required_configs.add_options()("dist_fn", po::value(&dist_fn)->required(), + program_options_utils::DISTANCE_FUNCTION_DESCRIPTION); + required_configs.add_options()("index_path_prefix", po::value(&index_path_prefix)->required(), + program_options_utils::INDEX_PATH_PREFIX_DESCRIPTION); + required_configs.add_options()("data_path", po::value(&data_path)->required(), + program_options_utils::INPUT_DATA_PATH); + required_configs.add_options()("points_to_skip", po::value(&points_to_skip)->required(), + "Skip these first set of points from file"); + required_configs.add_options()("beginning_index_size", po::value(&beginning_index_size)->required(), + "Batch build will be called on these set of points"); + required_configs.add_options()("points_per_checkpoint", po::value(&points_per_checkpoint)->required(), + "Insertions are done in batches of points_per_checkpoint"); + required_configs.add_options()("checkpoints_per_snapshot", + po::value(&checkpoints_per_snapshot)->required(), + "Save the index to disk every few checkpoints"); + required_configs.add_options()("points_to_delete_from_beginning", + po::value(&points_to_delete_from_beginning)->required(), ""); + + // Optional parameters + po::options_description optional_configs("Optional"); + optional_configs.add_options()("num_threads,T", + po::value(&num_threads)->default_value(omp_get_num_procs()), + program_options_utils::NUMBER_THREADS_DESCRIPTION); + optional_configs.add_options()("max_degree,R", po::value(&R)->default_value(64), + program_options_utils::MAX_BUILD_DEGREE); + optional_configs.add_options()("Lbuild,L", po::value(&L)->default_value(100), + program_options_utils::GRAPH_BUILD_COMPLEXITY); + optional_configs.add_options()("alpha", po::value(&alpha)->default_value(1.2f), + program_options_utils::GRAPH_BUILD_ALPHA); + optional_configs.add_options()("max_points_to_insert", + po::value(&max_points_to_insert)->default_value(0), + "These number of points from the file are inserted after " + "points_to_skip"); + optional_configs.add_options()("do_concurrent", po::value(&concurrent)->default_value(false), ""); + optional_configs.add_options()("start_deletes_after", + po::value(&start_deletes_after)->default_value(0), ""); + optional_configs.add_options()("start_point_norm", po::value(&start_point_norm)->default_value(0), + "Set the start point to a random point on a sphere of this radius"); + + // optional params for filters + optional_configs.add_options()("label_file", po::value(&label_file)->default_value(""), + "Input label file in txt format for Filtered Index search. " + "The file should contain comma separated filters for each node " + "with each line corresponding to a graph node"); + optional_configs.add_options()("universal_label", po::value(&universal_label)->default_value(""), + "Universal label, if using it, only in conjunction with labels_file"); + optional_configs.add_options()("FilteredLbuild,Lf", po::value(&Lf)->default_value(0), + "Build complexity for filtered points, higher value " + "results in better graphs"); + optional_configs.add_options()("label_type", po::value(&label_type)->default_value("uint"), + "Storage type of Labels , default value is uint which " + "will consume memory 4 bytes per filter"); + optional_configs.add_options()("unique_labels_supported", + po::value(&unique_labels_supported)->default_value(0), + "Number of unique labels supported by the dynamic index."); + + optional_configs.add_options()( + "num_start_points", + po::value(&num_start_pts)->default_value(diskann::defaults::NUM_FROZEN_POINTS_DYNAMIC), + "Set the number of random start (frozen) points to use when " + "inserting and searching"); + + // Merge required and optional parameters + desc.add(required_configs).add(optional_configs); + + po::variables_map vm; + po::store(po::parse_command_line(argc, argv, desc), vm); + if (vm.count("help")) + { + std::cout << desc; + return 0; + } + po::notify(vm); + if (beginning_index_size == 0) + if (start_point_norm == 0) + { + std::cout << "When beginning_index_size is 0, use a start " + "point with " + "appropriate norm" + << std::endl; + return -1; + } + } + catch (const std::exception &ex) + { + std::cerr << ex.what() << '\n'; + return -1; + } + + bool has_labels = false; + if (!label_file.empty() || label_file != "") + { + has_labels = true; + } + + if (num_start_pts < unique_labels_supported) + { + num_start_pts = unique_labels_supported; + } + + try + { + diskann::IndexWriteParameters params = diskann::IndexWriteParametersBuilder(L, R) + .with_max_occlusion_size(500) + .with_alpha(alpha) + .with_num_threads(num_threads) + .with_filter_list_size(Lf) + .build(); + + if (data_type == std::string("int8")) + build_incremental_index( + data_path, params, points_to_skip, max_points_to_insert, beginning_index_size, start_point_norm, + num_start_pts, points_per_checkpoint, checkpoints_per_snapshot, index_path_prefix, + points_to_delete_from_beginning, start_deletes_after, concurrent, label_file, universal_label); + else if (data_type == std::string("uint8")) + build_incremental_index( + data_path, params, points_to_skip, max_points_to_insert, beginning_index_size, start_point_norm, + num_start_pts, points_per_checkpoint, checkpoints_per_snapshot, index_path_prefix, + points_to_delete_from_beginning, start_deletes_after, concurrent, label_file, universal_label); + else if (data_type == std::string("float")) + build_incremental_index(data_path, params, points_to_skip, max_points_to_insert, + beginning_index_size, start_point_norm, num_start_pts, points_per_checkpoint, + checkpoints_per_snapshot, index_path_prefix, points_to_delete_from_beginning, + start_deletes_after, concurrent, label_file, universal_label); + else + std::cout << "Unsupported type. Use float/int8/uint8" << std::endl; + } + catch (const std::exception &e) + { + std::cerr << "Caught exception: " << e.what() << std::endl; + exit(-1); + } + catch (...) + { + std::cerr << "Caught unknown exception" << std::endl; + exit(-1); + } + + return 0; +} diff --git a/packages/leann-backend-diskann/third_party/DiskANN/apps/test_streaming_scenario.cpp b/packages/leann-backend-diskann/third_party/DiskANN/apps/test_streaming_scenario.cpp new file mode 100644 index 0000000..5a43a69 --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/apps/test_streaming_scenario.cpp @@ -0,0 +1,523 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT license. + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "utils.h" +#include "filter_utils.h" +#include "program_options_utils.hpp" + +#ifndef _WINDOWS +#include +#include +#include +#endif + +#include "memory_mapper.h" + +namespace po = boost::program_options; + +// load_aligned_bin modified to read pieces of the file, but using ifstream +// instead of cached_ifstream. +template +inline void load_aligned_bin_part(const std::string &bin_file, T *data, size_t offset_points, size_t points_to_read) +{ + std::ifstream reader; + reader.exceptions(std::ios::failbit | std::ios::badbit); + reader.open(bin_file, std::ios::binary | std::ios::ate); + size_t actual_file_size = reader.tellg(); + reader.seekg(0, std::ios::beg); + + int npts_i32, dim_i32; + reader.read((char *)&npts_i32, sizeof(int)); + reader.read((char *)&dim_i32, sizeof(int)); + size_t npts = (uint32_t)npts_i32; + size_t dim = (uint32_t)dim_i32; + + size_t expected_actual_file_size = npts * dim * sizeof(T) + 2 * sizeof(uint32_t); + if (actual_file_size != expected_actual_file_size) + { + std::stringstream stream; + stream << "Error. File size mismatch. Actual size is " << actual_file_size << " while expected size is " + << expected_actual_file_size << " npts = " << npts << " dim = " << dim << " size of = " << sizeof(T) + << std::endl; + std::cout << stream.str(); + throw diskann::ANNException(stream.str(), -1, __FUNCSIG__, __FILE__, __LINE__); + } + + if (offset_points + points_to_read > npts) + { + std::stringstream stream; + stream << "Error. Not enough points in file. Requested " << offset_points << " offset and " << points_to_read + << " points, but have only " << npts << " points" << std::endl; + std::cout << stream.str(); + throw diskann::ANNException(stream.str(), -1, __FUNCSIG__, __FILE__, __LINE__); + } + + reader.seekg(2 * sizeof(uint32_t) + offset_points * dim * sizeof(T)); + + const size_t rounded_dim = ROUND_UP(dim, 8); + + for (size_t i = 0; i < points_to_read; i++) + { + reader.read((char *)(data + i * rounded_dim), dim * sizeof(T)); + memset(data + i * rounded_dim + dim, 0, (rounded_dim - dim) * sizeof(T)); + } + reader.close(); +} + +std::string get_save_filename(const std::string &save_path, size_t active_window, size_t consolidate_interval, + size_t max_points_to_insert) +{ + std::string final_path = save_path; + final_path += "act" + std::to_string(active_window) + "-"; + final_path += "cons" + std::to_string(consolidate_interval) + "-"; + final_path += "max" + std::to_string(max_points_to_insert); + return final_path; +} + +template +void insert_next_batch(diskann::AbstractIndex &index, size_t start, size_t end, size_t insert_threads, T *data, + size_t aligned_dim, std::vector> &pts_to_labels) +{ + try + { + diskann::Timer insert_timer; + std::cout << std::endl << "Inserting from " << start << " to " << end << std::endl; + + size_t num_failed = 0; +#pragma omp parallel for num_threads((int32_t)insert_threads) schedule(dynamic) reduction(+ : num_failed) + for (int64_t j = start; j < (int64_t)end; j++) + { + int insert_result = -1; + if (pts_to_labels.size() > 0) + { + insert_result = index.insert_point(&data[(j - start) * aligned_dim], 1 + static_cast(j), + pts_to_labels[j - start]); + } + else + { + insert_result = index.insert_point(&data[(j - start) * aligned_dim], 1 + static_cast(j)); + } + + if (insert_result != 0) + { + std::cerr << "Insert failed " << j << std::endl; + num_failed++; + } + } + const double elapsedSeconds = insert_timer.elapsed() / 1000000.0; + std::cout << "Insertion time " << elapsedSeconds << " seconds (" << (end - start) / elapsedSeconds + << " points/second overall, " << (end - start) / elapsedSeconds / insert_threads << " per thread)" + << std::endl; + if (num_failed > 0) + std::cout << num_failed << " of " << end - start << "inserts failed" << std::endl; + } + catch (std::system_error &e) + { + std::cout << "Exiting after catching exception in insertion task: " << e.what() << std::endl; + exit(-1); + } +} + +template +void delete_and_consolidate(diskann::AbstractIndex &index, diskann::IndexWriteParameters &delete_params, size_t start, + size_t end) +{ + try + { + std::cout << std::endl << "Lazy deleting points " << start << " to " << end << "... "; + for (size_t i = start; i < end; ++i) + index.lazy_delete(static_cast(1 + i)); + std::cout << "lazy delete done." << std::endl; + + auto report = index.consolidate_deletes(delete_params); + while (report._status != diskann::consolidation_report::status_code::SUCCESS) + { + int wait_time = 5; + if (report._status == diskann::consolidation_report::status_code::LOCK_FAIL) + { + diskann::cerr << "Unable to acquire consolidate delete lock after " + << "deleting points " << start << " to " << end << ". Will retry in " << wait_time + << "seconds." << std::endl; + } + else if (report._status == diskann::consolidation_report::status_code::INCONSISTENT_COUNT_ERROR) + { + diskann::cerr << "Inconsistent counts in data structure. " + << "Will retry in " << wait_time << "seconds." << std::endl; + } + else + { + std::cerr << "Exiting after unknown error in consolidate delete" << std::endl; + exit(-1); + } + std::this_thread::sleep_for(std::chrono::seconds(wait_time)); + report = index.consolidate_deletes(delete_params); + } + auto points_processed = report._active_points + report._slots_released; + auto deletion_rate = points_processed / report._time; + std::cout << "#active points: " << report._active_points << std::endl + << "max points: " << report._max_points << std::endl + << "empty slots: " << report._empty_slots << std::endl + << "deletes processed: " << report._slots_released << std::endl + << "latest delete size: " << report._delete_set_size << std::endl + << "Deletion rate: " << deletion_rate << "/sec " + << "Deletion rate: " << deletion_rate / delete_params.num_threads << "/thread/sec " << std::endl; + } + catch (std::system_error &e) + { + std::cerr << "Exiting after catching exception in deletion task: " << e.what() << std::endl; + exit(-1); + } +} + +template +void build_incremental_index(const std::string &data_path, const uint32_t L, const uint32_t R, const float alpha, + const uint32_t insert_threads, const uint32_t consolidate_threads, + size_t max_points_to_insert, size_t active_window, size_t consolidate_interval, + const float start_point_norm, uint32_t num_start_pts, const std::string &save_path, + const std::string &label_file, const std::string &universal_label, const uint32_t Lf) +{ + const uint32_t C = 500; + const bool saturate_graph = false; + bool has_labels = label_file != ""; + + diskann::IndexWriteParameters params = diskann::IndexWriteParametersBuilder(L, R) + .with_max_occlusion_size(C) + .with_alpha(alpha) + .with_saturate_graph(saturate_graph) + .with_num_threads(insert_threads) + .with_filter_list_size(Lf) + .build(); + + auto index_search_params = diskann::IndexSearchParams(L, insert_threads); + diskann::IndexWriteParameters delete_params = diskann::IndexWriteParametersBuilder(L, R) + .with_max_occlusion_size(C) + .with_alpha(alpha) + .with_saturate_graph(saturate_graph) + .with_num_threads(consolidate_threads) + .with_filter_list_size(Lf) + .build(); + + size_t dim, aligned_dim; + size_t num_points; + + std::vector> pts_to_labels; + + const auto save_path_inc = + get_save_filename(save_path + ".after-streaming-", active_window, consolidate_interval, max_points_to_insert); + std::string labels_file_to_use = save_path_inc + "_label_formatted.txt"; + std::string mem_labels_int_map_file = save_path_inc + "_labels_map.txt"; + if (has_labels) + { + convert_labels_string_to_int(label_file, labels_file_to_use, mem_labels_int_map_file, universal_label); + auto parse_result = diskann::parse_formatted_label_file(labels_file_to_use); + pts_to_labels = std::get<0>(parse_result); + } + + diskann::get_bin_metadata(data_path, num_points, dim); + diskann::cout << "metadata: file " << data_path << " has " << num_points << " points in " << dim << " dims" + << std::endl; + aligned_dim = ROUND_UP(dim, 8); + auto index_config = diskann::IndexConfigBuilder() + .with_metric(diskann::L2) + .with_dimension(dim) + .with_max_points(active_window + 4 * consolidate_interval) + .is_dynamic_index(true) + .is_enable_tags(true) + .is_use_opq(false) + .is_filtered(has_labels) + .with_num_pq_chunks(0) + .is_pq_dist_build(false) + .with_num_frozen_pts(num_start_pts) + .with_tag_type(diskann_type_to_name()) + .with_label_type(diskann_type_to_name()) + .with_data_type(diskann_type_to_name()) + .with_index_write_params(params) + .with_index_search_params(index_search_params) + .with_data_load_store_strategy(diskann::DataStoreStrategy::MEMORY) + .with_graph_load_store_strategy(diskann::GraphStoreStrategy::MEMORY) + .build(); + + diskann::IndexFactory index_factory = diskann::IndexFactory(index_config); + auto index = index_factory.create_instance(); + + if (universal_label != "") + { + LabelT u_label = 0; + index->set_universal_label(u_label); + } + + if (max_points_to_insert == 0) + { + max_points_to_insert = num_points; + } + + if (num_points < max_points_to_insert) + throw diskann::ANNException(std::string("num_points(") + std::to_string(num_points) + + ") < max_points_to_insert(" + std::to_string(max_points_to_insert) + ")", + -1, __FUNCSIG__, __FILE__, __LINE__); + + if (max_points_to_insert < active_window + consolidate_interval) + throw diskann::ANNException("ERROR: max_points_to_insert < " + "active_window + consolidate_interval", + -1, __FUNCSIG__, __FILE__, __LINE__); + + if (consolidate_interval < max_points_to_insert / 1000) + throw diskann::ANNException("ERROR: consolidate_interval is too small", -1, __FUNCSIG__, __FILE__, __LINE__); + + index->set_start_points_at_random(static_cast(start_point_norm)); + + T *data = nullptr; + diskann::alloc_aligned((void **)&data, std::max(consolidate_interval, active_window) * aligned_dim * sizeof(T), + 8 * sizeof(T)); + + std::vector tags(max_points_to_insert); + std::iota(tags.begin(), tags.end(), static_cast(0)); + + diskann::Timer timer; + + std::vector> delete_tasks; + + auto insert_task = std::async(std::launch::async, [&]() { + load_aligned_bin_part(data_path, data, 0, active_window); + insert_next_batch(*index, (size_t)0, active_window, params.num_threads, data, aligned_dim, + pts_to_labels); + }); + insert_task.wait(); + + for (size_t start = active_window; start + consolidate_interval <= max_points_to_insert; + start += consolidate_interval) + { + auto end = std::min(start + consolidate_interval, max_points_to_insert); + auto insert_task = std::async(std::launch::async, [&]() { + load_aligned_bin_part(data_path, data, start, end - start); + insert_next_batch(*index, start, end, params.num_threads, data, aligned_dim, + pts_to_labels); + }); + insert_task.wait(); + + if (delete_tasks.size() > 0) + delete_tasks[delete_tasks.size() - 1].wait(); + if (start >= active_window + consolidate_interval) + { + auto start_del = start - active_window - consolidate_interval; + auto end_del = start - active_window; + + delete_tasks.emplace_back(std::async(std::launch::async, [&]() { + delete_and_consolidate(*index, delete_params, (size_t)start_del, (size_t)end_del); + })); + } + } + if (delete_tasks.size() > 0) + delete_tasks[delete_tasks.size() - 1].wait(); + + std::cout << "Time Elapsed " << timer.elapsed() / 1000 << "ms\n"; + + index->save(save_path_inc.c_str(), true); + + diskann::aligned_free(data); +} + +int main(int argc, char **argv) +{ + std::string data_type, dist_fn, data_path, index_path_prefix, label_file, universal_label, label_type; + uint32_t insert_threads, consolidate_threads, R, L, num_start_pts, Lf, unique_labels_supported; + float alpha, start_point_norm; + size_t max_points_to_insert, active_window, consolidate_interval; + + po::options_description desc{program_options_utils::make_program_description("test_streaming_scenario", + "Test insert deletes & consolidate")}; + try + { + desc.add_options()("help,h", "Print information on arguments"); + + // Required parameters + po::options_description required_configs("Required"); + required_configs.add_options()("data_type", po::value(&data_type)->required(), + program_options_utils::DATA_TYPE_DESCRIPTION); + required_configs.add_options()("dist_fn", po::value(&dist_fn)->required(), + program_options_utils::DISTANCE_FUNCTION_DESCRIPTION); + required_configs.add_options()("index_path_prefix", po::value(&index_path_prefix)->required(), + program_options_utils::INDEX_PATH_PREFIX_DESCRIPTION); + required_configs.add_options()("data_path", po::value(&data_path)->required(), + program_options_utils::INPUT_DATA_PATH); + required_configs.add_options()("active_window", po::value(&active_window)->required(), + "Program maintains an index over an active window of " + "this size that slides through the data"); + required_configs.add_options()("consolidate_interval", po::value(&consolidate_interval)->required(), + "The program simultaneously adds this number of points to the " + "right of " + "the window while deleting the same number from the left"); + required_configs.add_options()("start_point_norm", po::value(&start_point_norm)->required(), + "Set the start point to a random point on a sphere of this radius"); + + // Optional parameters + po::options_description optional_configs("Optional"); + optional_configs.add_options()("max_degree,R", po::value(&R)->default_value(64), + program_options_utils::MAX_BUILD_DEGREE); + optional_configs.add_options()("Lbuild,L", po::value(&L)->default_value(100), + program_options_utils::GRAPH_BUILD_COMPLEXITY); + optional_configs.add_options()("alpha", po::value(&alpha)->default_value(1.2f), + program_options_utils::GRAPH_BUILD_ALPHA); + optional_configs.add_options()("insert_threads", + po::value(&insert_threads)->default_value(omp_get_num_procs() / 2), + "Number of threads used for inserting into the index (defaults to " + "omp_get_num_procs()/2)"); + optional_configs.add_options()( + "consolidate_threads", po::value(&consolidate_threads)->default_value(omp_get_num_procs() / 2), + "Number of threads used for consolidating deletes to " + "the index (defaults to omp_get_num_procs()/2)"); + optional_configs.add_options()("max_points_to_insert", + po::value(&max_points_to_insert)->default_value(0), + "The number of points from the file that the program streams " + "over "); + optional_configs.add_options()( + "num_start_points", + po::value(&num_start_pts)->default_value(diskann::defaults::NUM_FROZEN_POINTS_DYNAMIC), + "Set the number of random start (frozen) points to use when " + "inserting and searching"); + + optional_configs.add_options()("label_file", po::value(&label_file)->default_value(""), + "Input label file in txt format for Filtered Index search. " + "The file should contain comma separated filters for each node " + "with each line corresponding to a graph node"); + optional_configs.add_options()("universal_label", po::value(&universal_label)->default_value(""), + "Universal label, if using it, only in conjunction with labels_file"); + optional_configs.add_options()("FilteredLbuild,Lf", po::value(&Lf)->default_value(0), + "Build complexity for filtered points, higher value " + "results in better graphs"); + optional_configs.add_options()("label_type", po::value(&label_type)->default_value("uint"), + "Storage type of Labels , default value is uint which " + "will consume memory 4 bytes per filter"); + optional_configs.add_options()("unique_labels_supported", + po::value(&unique_labels_supported)->default_value(0), + "Number of unique labels supported by the dynamic index."); + + // Merge required and optional parameters + desc.add(required_configs).add(optional_configs); + + po::variables_map vm; + po::store(po::parse_command_line(argc, argv, desc), vm); + if (vm.count("help")) + { + std::cout << desc; + return 0; + } + po::notify(vm); + } + catch (const std::exception &ex) + { + std::cerr << ex.what() << '\n'; + return -1; + } + + // Validate arguments + if (start_point_norm == 0) + { + std::cout << "When beginning_index_size is 0, use a start point with " + "appropriate norm" + << std::endl; + return -1; + } + + if (label_type != std::string("ushort") && label_type != std::string("uint")) + { + std::cerr << "Invalid label type. Supported types are uint and ushort" << std::endl; + return -1; + } + + if (data_type != std::string("int8") && data_type != std::string("uint8") && data_type != std::string("float")) + { + std::cerr << "Invalid data type. Supported types are int8, uint8 and float" << std::endl; + return -1; + } + + // TODO: Are additional distance functions supported? + if (dist_fn != std::string("l2") && dist_fn != std::string("mips")) + { + std::cerr << "Invalid distance function. Supported functions are l2 and mips" << std::endl; + return -1; + } + + if (num_start_pts < unique_labels_supported) + { + num_start_pts = unique_labels_supported; + } + + try + { + if (data_type == std::string("uint8")) + { + if (label_type == std::string("ushort")) + { + build_incremental_index( + data_path, L, R, alpha, insert_threads, consolidate_threads, max_points_to_insert, active_window, + consolidate_interval, start_point_norm, num_start_pts, index_path_prefix, label_file, + universal_label, Lf); + } + else if (label_type == std::string("uint")) + { + build_incremental_index( + data_path, L, R, alpha, insert_threads, consolidate_threads, max_points_to_insert, active_window, + consolidate_interval, start_point_norm, num_start_pts, index_path_prefix, label_file, + universal_label, Lf); + } + } + else if (data_type == std::string("int8")) + { + if (label_type == std::string("ushort")) + { + build_incremental_index( + data_path, L, R, alpha, insert_threads, consolidate_threads, max_points_to_insert, active_window, + consolidate_interval, start_point_norm, num_start_pts, index_path_prefix, label_file, + universal_label, Lf); + } + else if (label_type == std::string("uint")) + { + build_incremental_index( + data_path, L, R, alpha, insert_threads, consolidate_threads, max_points_to_insert, active_window, + consolidate_interval, start_point_norm, num_start_pts, index_path_prefix, label_file, + universal_label, Lf); + } + } + else if (data_type == std::string("float")) + { + if (label_type == std::string("ushort")) + { + build_incremental_index( + data_path, L, R, alpha, insert_threads, consolidate_threads, max_points_to_insert, active_window, + consolidate_interval, start_point_norm, num_start_pts, index_path_prefix, label_file, + universal_label, Lf); + } + else if (label_type == std::string("uint")) + { + build_incremental_index( + data_path, L, R, alpha, insert_threads, consolidate_threads, max_points_to_insert, active_window, + consolidate_interval, start_point_norm, num_start_pts, index_path_prefix, label_file, + universal_label, Lf); + } + } + } + catch (const std::exception &e) + { + std::cerr << "Caught exception: " << e.what() << std::endl; + exit(-1); + } + catch (...) + { + std::cerr << "Caught unknown exception" << std::endl; + exit(-1); + } + + return 0; +} diff --git a/packages/leann-backend-diskann/third_party/DiskANN/apps/utils/CMakeLists.txt b/packages/leann-backend-diskann/third_party/DiskANN/apps/utils/CMakeLists.txt new file mode 100644 index 0000000..3b8cf22 --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/apps/utils/CMakeLists.txt @@ -0,0 +1,110 @@ +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT license. + +set(CMAKE_CXX_STANDARD 17) +set(CMAKE_COMPILE_WARNING_AS_ERROR ON) + + +add_executable(fvecs_to_bin fvecs_to_bin.cpp) + +add_executable(fvecs_to_bvecs fvecs_to_bvecs.cpp) + +add_executable(rand_data_gen rand_data_gen.cpp) +target_link_libraries(rand_data_gen ${PROJECT_NAME} Boost::program_options) + +add_executable(float_bin_to_int8 float_bin_to_int8.cpp) + +add_executable(ivecs_to_bin ivecs_to_bin.cpp) + +add_executable(count_bfs_levels count_bfs_levels.cpp) +target_link_libraries(count_bfs_levels ${PROJECT_NAME} Boost::program_options) + +add_executable(tsv_to_bin tsv_to_bin.cpp) + +add_executable(bin_to_tsv bin_to_tsv.cpp) + +add_executable(int8_to_float int8_to_float.cpp) +target_link_libraries(int8_to_float ${PROJECT_NAME}) + +add_executable(int8_to_float_scale int8_to_float_scale.cpp) +target_link_libraries(int8_to_float_scale ${PROJECT_NAME}) + +add_executable(uint8_to_float uint8_to_float.cpp) +target_link_libraries(uint8_to_float ${PROJECT_NAME}) + +add_executable(uint32_to_uint8 uint32_to_uint8.cpp) +target_link_libraries(uint32_to_uint8 ${PROJECT_NAME}) + +add_executable(vector_analysis vector_analysis.cpp) +target_link_libraries(vector_analysis ${PROJECT_NAME} ${DISKANN_TOOLS_TCMALLOC_LINK_OPTIONS}) + +add_executable(gen_random_slice gen_random_slice.cpp) +target_link_libraries(gen_random_slice ${PROJECT_NAME} ${DISKANN_TOOLS_TCMALLOC_LINK_OPTIONS}) + +add_executable(simulate_aggregate_recall simulate_aggregate_recall.cpp) + +add_executable(calculate_recall calculate_recall.cpp) +target_link_libraries(calculate_recall ${PROJECT_NAME} ${DISKANN_ASYNC_LIB} ${DISKANN_TOOLS_TCMALLOC_LINK_OPTIONS}) + +# Compute ground truth thing outside of DiskANN main source that depends on MKL. +add_executable(compute_groundtruth compute_groundtruth.cpp) +target_include_directories(compute_groundtruth PRIVATE ${DISKANN_MKL_INCLUDE_DIRECTORIES}) +target_link_libraries(compute_groundtruth ${PROJECT_NAME} ${DISKANN_MKL_LINK_LIBRARIES} ${DISKANN_ASYNC_LIB} Boost::program_options) + +add_executable(compute_groundtruth_for_filters compute_groundtruth_for_filters.cpp) +target_include_directories(compute_groundtruth_for_filters PRIVATE ${DISKANN_MKL_INCLUDE_DIRECTORIES}) +target_link_libraries(compute_groundtruth_for_filters ${PROJECT_NAME} ${DISKANN_MKL_LINK_LIBRARIES} ${DISKANN_ASYNC_LIB} Boost::program_options) + + +add_executable(generate_pq generate_pq.cpp) +target_link_libraries(generate_pq ${PROJECT_NAME} ${DISKANN_TOOLS_TCMALLOC_LINK_OPTIONS}) + + +add_executable(partition_data partition_data.cpp) +target_link_libraries(partition_data ${PROJECT_NAME} ${DISKANN_TOOLS_TCMALLOC_LINK_OPTIONS}) + +add_executable(partition_with_ram_budget partition_with_ram_budget.cpp) +target_link_libraries(partition_with_ram_budget ${PROJECT_NAME} ${DISKANN_TOOLS_TCMALLOC_LINK_OPTIONS}) + +add_executable(merge_shards merge_shards.cpp) +target_link_libraries(merge_shards ${PROJECT_NAME} ${DISKANN_TOOLS_TCMALLOC_LINK_OPTIONS} ${DISKANN_ASYNC_LIB}) + +add_executable(create_disk_layout create_disk_layout.cpp) +target_link_libraries(create_disk_layout ${PROJECT_NAME} ${DISKANN_ASYNC_LIB} ${DISKANN_TOOLS_TCMALLOC_LINK_OPTIONS}) + +add_executable(generate_synthetic_labels generate_synthetic_labels.cpp) +target_link_libraries(generate_synthetic_labels ${PROJECT_NAME} Boost::program_options) + +add_executable(stats_label_data stats_label_data.cpp) +target_link_libraries(stats_label_data ${PROJECT_NAME} Boost::program_options) + +if (NOT MSVC) + include(GNUInstallDirs) + install(TARGETS fvecs_to_bin + fvecs_to_bvecs + rand_data_gen + float_bin_to_int8 + ivecs_to_bin + count_bfs_levels + tsv_to_bin + bin_to_tsv + int8_to_float + int8_to_float_scale + uint8_to_float + uint32_to_uint8 + vector_analysis + gen_random_slice + simulate_aggregate_recall + calculate_recall + compute_groundtruth + compute_groundtruth_for_filters + generate_pq + partition_data + partition_with_ram_budget + merge_shards + create_disk_layout + generate_synthetic_labels + stats_label_data + RUNTIME + ) +endif() \ No newline at end of file diff --git a/packages/leann-backend-diskann/third_party/DiskANN/apps/utils/bin_to_fvecs.cpp b/packages/leann-backend-diskann/third_party/DiskANN/apps/utils/bin_to_fvecs.cpp new file mode 100644 index 0000000..e9a6a8e --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/apps/utils/bin_to_fvecs.cpp @@ -0,0 +1,63 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT license. + +#include +#include "util.h" + +void block_convert(std::ifstream &writr, std::ofstream &readr, float *read_buf, float *write_buf, uint64_t npts, + uint64_t ndims) +{ + writr.write((char *)read_buf, npts * (ndims * sizeof(float) + sizeof(unsigned))); +#pragma omp parallel for + for (uint64_t i = 0; i < npts; i++) + { + memcpy(write_buf + i * ndims, (read_buf + i * (ndims + 1)) + 1, ndims * sizeof(float)); + } + readr.read((char *)write_buf, npts * ndims * sizeof(float)); +} + +int main(int argc, char **argv) +{ + if (argc != 3) + { + std::cout << argv[0] << " input_bin output_fvecs" << std::endl; + exit(-1); + } + std::ifstream readr(argv[1], std::ios::binary); + int npts_s32; + int ndims_s32; + readr.read((char *)&npts_s32, sizeof(int32_t)); + readr.read((char *)&ndims_s32, sizeof(int32_t)); + size_t npts = npts_s32; + size_t ndims = ndims_s32; + uint32_t ndims_u32 = (uint32_t)ndims_s32; + // uint64_t fsize = writr.tellg(); + readr.seekg(0, std::ios::beg); + + unsigned ndims_u32; + writr.write((char *)&ndims_u32, sizeof(unsigned)); + writr.seekg(0, std::ios::beg); + uint64_t ndims = (uint64_t)ndims_u32; + uint64_t npts = fsize / ((ndims + 1) * sizeof(float)); + std::cout << "Dataset: #pts = " << npts << ", # dims = " << ndims << std::endl; + + uint64_t blk_size = 131072; + uint64_t nblks = ROUND_UP(npts, blk_size) / blk_size; + std::cout << "# blks: " << nblks << std::endl; + + std::ofstream writr(argv[2], std::ios::binary); + float *read_buf = new float[npts * (ndims + 1)]; + float *write_buf = new float[npts * ndims]; + for (uint64_t i = 0; i < nblks; i++) + { + uint64_t cblk_size = std::min(npts - i * blk_size, blk_size); + block_convert(writr, readr, read_buf, write_buf, cblk_size, ndims); + std::cout << "Block #" << i << " written" << std::endl; + } + + delete[] read_buf; + delete[] write_buf; + + writr.close(); + readr.close(); +} diff --git a/packages/leann-backend-diskann/third_party/DiskANN/apps/utils/bin_to_tsv.cpp b/packages/leann-backend-diskann/third_party/DiskANN/apps/utils/bin_to_tsv.cpp new file mode 100644 index 0000000..7851bef --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/apps/utils/bin_to_tsv.cpp @@ -0,0 +1,69 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT license. + +#include +#include "utils.h" + +template +void block_convert(std::ofstream &writer, std::ifstream &reader, T *read_buf, size_t npts, size_t ndims) +{ + reader.read((char *)read_buf, npts * ndims * sizeof(float)); + + for (size_t i = 0; i < npts; i++) + { + for (size_t d = 0; d < ndims; d++) + { + writer << read_buf[d + i * ndims]; + if (d < ndims - 1) + writer << "\t"; + else + writer << "\n"; + } + } +} + +int main(int argc, char **argv) +{ + if (argc != 4) + { + std::cout << argv[0] << " input_bin output_tsv" << std::endl; + exit(-1); + } + std::string type_string(argv[1]); + if ((type_string != std::string("float")) && (type_string != std::string("int8")) && + (type_string != std::string("uin8"))) + { + std::cerr << "Error: type not supported. Use float/int8/uint8" << std::endl; + } + + std::ifstream reader(argv[2], std::ios::binary); + uint32_t npts_u32; + uint32_t ndims_u32; + reader.read((char *)&npts_u32, sizeof(uint32_t)); + reader.read((char *)&ndims_u32, sizeof(uint32_t)); + size_t npts = npts_u32; + size_t ndims = ndims_u32; + std::cout << "Dataset: #pts = " << npts << ", # dims = " << ndims << std::endl; + + size_t blk_size = 131072; + size_t nblks = ROUND_UP(npts, blk_size) / blk_size; + + std::ofstream writer(argv[3]); + char *read_buf = new char[blk_size * ndims * 4]; + for (size_t i = 0; i < nblks; i++) + { + size_t cblk_size = std::min(npts - i * blk_size, blk_size); + if (type_string == std::string("float")) + block_convert(writer, reader, (float *)read_buf, cblk_size, ndims); + else if (type_string == std::string("int8")) + block_convert(writer, reader, (int8_t *)read_buf, cblk_size, ndims); + else if (type_string == std::string("uint8")) + block_convert(writer, reader, (uint8_t *)read_buf, cblk_size, ndims); + std::cout << "Block #" << i << " written" << std::endl; + } + + delete[] read_buf; + + writer.close(); + reader.close(); +} diff --git a/packages/leann-backend-diskann/third_party/DiskANN/apps/utils/calculate_recall.cpp b/packages/leann-backend-diskann/third_party/DiskANN/apps/utils/calculate_recall.cpp new file mode 100644 index 0000000..dc76252 --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/apps/utils/calculate_recall.cpp @@ -0,0 +1,55 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT license. + +#include +#include +#include +#include +#include +#include +#include + +#include "utils.h" +#include "disk_utils.h" + +int main(int argc, char **argv) +{ + if (argc != 4) + { + std::cout << argv[0] << " " << std::endl; + return -1; + } + uint32_t *gold_std = NULL; + float *gs_dist = nullptr; + uint32_t *our_results = NULL; + float *or_dist = nullptr; + size_t points_num, points_num_gs, points_num_or; + size_t dim_gs; + size_t dim_or; + diskann::load_truthset(argv[1], gold_std, gs_dist, points_num_gs, dim_gs); + diskann::load_truthset(argv[2], our_results, or_dist, points_num_or, dim_or); + + if (points_num_gs != points_num_or) + { + std::cout << "Error. Number of queries mismatch in ground truth and " + "our results" + << std::endl; + return -1; + } + points_num = points_num_gs; + + uint32_t recall_at = std::atoi(argv[3]); + + if ((dim_or < recall_at) || (recall_at > dim_gs)) + { + std::cout << "ground truth has size " << dim_gs << "; our set has " << dim_or << " points. Asking for recall " + << recall_at << std::endl; + return -1; + } + std::cout << "Calculating recall@" << recall_at << std::endl; + double recall_val = diskann::calculate_recall((uint32_t)points_num, gold_std, gs_dist, (uint32_t)dim_gs, + our_results, (uint32_t)dim_or, (uint32_t)recall_at); + + // double avg_recall = (recall*1.0)/(points_num*1.0); + std::cout << "Avg. recall@" << recall_at << " is " << recall_val << "\n"; +} diff --git a/packages/leann-backend-diskann/third_party/DiskANN/apps/utils/compute_groundtruth.cpp b/packages/leann-backend-diskann/third_party/DiskANN/apps/utils/compute_groundtruth.cpp new file mode 100644 index 0000000..da32fd7 --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/apps/utils/compute_groundtruth.cpp @@ -0,0 +1,574 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT license. + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef _WINDOWS +#include +#else +#include +#endif +#include "filter_utils.h" +#include "utils.h" + +// WORKS FOR UPTO 2 BILLION POINTS (as we use INT INSTEAD OF UNSIGNED) + +#define PARTSIZE 10000000 +#define ALIGNMENT 512 + +// custom types (for readability) +typedef tsl::robin_set label_set; +typedef std::string path; + +namespace po = boost::program_options; + +template T div_round_up(const T numerator, const T denominator) +{ + return (numerator % denominator == 0) ? (numerator / denominator) : 1 + (numerator / denominator); +} + +using pairIF = std::pair; +struct cmpmaxstruct +{ + bool operator()(const pairIF &l, const pairIF &r) + { + return l.second < r.second; + }; +}; + +using maxPQIFCS = std::priority_queue, cmpmaxstruct>; + +template T *aligned_malloc(const size_t n, const size_t alignment) +{ +#ifdef _WINDOWS + return (T *)_aligned_malloc(sizeof(T) * n, alignment); +#else + return static_cast(aligned_alloc(alignment, sizeof(T) * n)); +#endif +} + +inline bool custom_dist(const std::pair &a, const std::pair &b) +{ + return a.second < b.second; +} + +void compute_l2sq(float *const points_l2sq, const float *const matrix, const int64_t num_points, const uint64_t dim) +{ + assert(points_l2sq != NULL); +#pragma omp parallel for schedule(static, 65536) + for (int64_t d = 0; d < num_points; ++d) + points_l2sq[d] = cblas_sdot((int64_t)dim, matrix + (ptrdiff_t)d * (ptrdiff_t)dim, 1, + matrix + (ptrdiff_t)d * (ptrdiff_t)dim, 1); +} + +void distsq_to_points(const size_t dim, + float *dist_matrix, // Col Major, cols are queries, rows are points + size_t npoints, const float *const points, + const float *const points_l2sq, // points in Col major + size_t nqueries, const float *const queries, + const float *const queries_l2sq, // queries in Col major + float *ones_vec = NULL) // Scratchspace of num_data size and init to 1.0 +{ + bool ones_vec_alloc = false; + if (ones_vec == NULL) + { + ones_vec = new float[nqueries > npoints ? nqueries : npoints]; + std::fill_n(ones_vec, nqueries > npoints ? nqueries : npoints, (float)1.0); + ones_vec_alloc = true; + } + cblas_sgemm(CblasColMajor, CblasTrans, CblasNoTrans, npoints, nqueries, dim, (float)-2.0, points, dim, queries, dim, + (float)0.0, dist_matrix, npoints); + cblas_sgemm(CblasColMajor, CblasNoTrans, CblasTrans, npoints, nqueries, 1, (float)1.0, points_l2sq, npoints, + ones_vec, nqueries, (float)1.0, dist_matrix, npoints); + cblas_sgemm(CblasColMajor, CblasNoTrans, CblasTrans, npoints, nqueries, 1, (float)1.0, ones_vec, npoints, + queries_l2sq, nqueries, (float)1.0, dist_matrix, npoints); + if (ones_vec_alloc) + delete[] ones_vec; +} + +void inner_prod_to_points(const size_t dim, + float *dist_matrix, // Col Major, cols are queries, rows are points + size_t npoints, const float *const points, size_t nqueries, const float *const queries, + float *ones_vec = NULL) // Scratchspace of num_data size and init to 1.0 +{ + bool ones_vec_alloc = false; + if (ones_vec == NULL) + { + ones_vec = new float[nqueries > npoints ? nqueries : npoints]; + std::fill_n(ones_vec, nqueries > npoints ? nqueries : npoints, (float)1.0); + ones_vec_alloc = true; + } + cblas_sgemm(CblasColMajor, CblasTrans, CblasNoTrans, npoints, nqueries, dim, (float)-1.0, points, dim, queries, dim, + (float)0.0, dist_matrix, npoints); + + if (ones_vec_alloc) + delete[] ones_vec; +} + +void exact_knn(const size_t dim, const size_t k, + size_t *const closest_points, // k * num_queries preallocated, col + // major, queries columns + float *const dist_closest_points, // k * num_queries + // preallocated, Dist to + // corresponding closes_points + size_t npoints, + float *points_in, // points in Col major + size_t nqueries, float *queries_in, + diskann::Metric metric = diskann::Metric::L2) // queries in Col major +{ + float *points_l2sq = new float[npoints]; + float *queries_l2sq = new float[nqueries]; + compute_l2sq(points_l2sq, points_in, npoints, dim); + compute_l2sq(queries_l2sq, queries_in, nqueries, dim); + + float *points = points_in; + float *queries = queries_in; + + if (metric == diskann::Metric::COSINE) + { // we convert cosine distance as + // normalized L2 distnace + points = new float[npoints * dim]; + queries = new float[nqueries * dim]; +#pragma omp parallel for schedule(static, 4096) + for (int64_t i = 0; i < (int64_t)npoints; i++) + { + float norm = std::sqrt(points_l2sq[i]); + if (norm == 0) + { + norm = std::numeric_limits::epsilon(); + } + for (uint32_t j = 0; j < dim; j++) + { + points[i * dim + j] = points_in[i * dim + j] / norm; + } + } + +#pragma omp parallel for schedule(static, 4096) + for (int64_t i = 0; i < (int64_t)nqueries; i++) + { + float norm = std::sqrt(queries_l2sq[i]); + if (norm == 0) + { + norm = std::numeric_limits::epsilon(); + } + for (uint32_t j = 0; j < dim; j++) + { + queries[i * dim + j] = queries_in[i * dim + j] / norm; + } + } + // recalculate norms after normalizing, they should all be one. + compute_l2sq(points_l2sq, points, npoints, dim); + compute_l2sq(queries_l2sq, queries, nqueries, dim); + } + + std::cout << "Going to compute " << k << " NNs for " << nqueries << " queries over " << npoints << " points in " + << dim << " dimensions using"; + if (metric == diskann::Metric::INNER_PRODUCT) + std::cout << " MIPS "; + else if (metric == diskann::Metric::COSINE) + std::cout << " Cosine "; + else + std::cout << " L2 "; + std::cout << "distance fn. " << std::endl; + + size_t q_batch_size = (1 << 9); + float *dist_matrix = new float[(size_t)q_batch_size * (size_t)npoints]; + + for (size_t b = 0; b < div_round_up(nqueries, q_batch_size); ++b) + { + int64_t q_b = b * q_batch_size; + int64_t q_e = ((b + 1) * q_batch_size > nqueries) ? nqueries : (b + 1) * q_batch_size; + + if (metric == diskann::Metric::L2 || metric == diskann::Metric::COSINE) + { + distsq_to_points(dim, dist_matrix, npoints, points, points_l2sq, q_e - q_b, + queries + (ptrdiff_t)q_b * (ptrdiff_t)dim, queries_l2sq + q_b); + } + else + { + inner_prod_to_points(dim, dist_matrix, npoints, points, q_e - q_b, + queries + (ptrdiff_t)q_b * (ptrdiff_t)dim); + } + std::cout << "Computed distances for queries: [" << q_b << "," << q_e << ")" << std::endl; + +#pragma omp parallel for schedule(dynamic, 16) + for (long long q = q_b; q < q_e; q++) + { + maxPQIFCS point_dist; + for (size_t p = 0; p < k; p++) + point_dist.emplace(p, dist_matrix[(ptrdiff_t)p + (ptrdiff_t)(q - q_b) * (ptrdiff_t)npoints]); + for (size_t p = k; p < npoints; p++) + { + if (point_dist.top().second > dist_matrix[(ptrdiff_t)p + (ptrdiff_t)(q - q_b) * (ptrdiff_t)npoints]) + point_dist.emplace(p, dist_matrix[(ptrdiff_t)p + (ptrdiff_t)(q - q_b) * (ptrdiff_t)npoints]); + if (point_dist.size() > k) + point_dist.pop(); + } + for (ptrdiff_t l = 0; l < (ptrdiff_t)k; ++l) + { + closest_points[(ptrdiff_t)(k - 1 - l) + (ptrdiff_t)q * (ptrdiff_t)k] = point_dist.top().first; + dist_closest_points[(ptrdiff_t)(k - 1 - l) + (ptrdiff_t)q * (ptrdiff_t)k] = point_dist.top().second; + point_dist.pop(); + } + assert(std::is_sorted(dist_closest_points + (ptrdiff_t)q * (ptrdiff_t)k, + dist_closest_points + (ptrdiff_t)(q + 1) * (ptrdiff_t)k)); + } + std::cout << "Computed exact k-NN for queries: [" << q_b << "," << q_e << ")" << std::endl; + } + + delete[] dist_matrix; + + delete[] points_l2sq; + delete[] queries_l2sq; + + if (metric == diskann::Metric::COSINE) + { + delete[] points; + delete[] queries; + } +} + +template inline int get_num_parts(const char *filename) +{ + std::ifstream reader; + reader.exceptions(std::ios::failbit | std::ios::badbit); + reader.open(filename, std::ios::binary); + std::cout << "Reading bin file " << filename << " ...\n"; + int npts_i32, ndims_i32; + reader.read((char *)&npts_i32, sizeof(int)); + reader.read((char *)&ndims_i32, sizeof(int)); + std::cout << "#pts = " << npts_i32 << ", #dims = " << ndims_i32 << std::endl; + reader.close(); + uint32_t num_parts = + (npts_i32 % PARTSIZE) == 0 ? npts_i32 / PARTSIZE : (uint32_t)std::floor(npts_i32 / PARTSIZE) + 1; + std::cout << "Number of parts: " << num_parts << std::endl; + return num_parts; +} + +template +inline void load_bin_as_float(const char *filename, float *&data, size_t &npts, size_t &ndims, int part_num) +{ + std::ifstream reader; + reader.exceptions(std::ios::failbit | std::ios::badbit); + reader.open(filename, std::ios::binary); + std::cout << "Reading bin file " << filename << " ...\n"; + int npts_i32, ndims_i32; + reader.read((char *)&npts_i32, sizeof(int)); + reader.read((char *)&ndims_i32, sizeof(int)); + uint64_t start_id = part_num * PARTSIZE; + uint64_t end_id = (std::min)(start_id + PARTSIZE, (uint64_t)npts_i32); + npts = end_id - start_id; + ndims = (uint64_t)ndims_i32; + std::cout << "#pts in part = " << npts << ", #dims = " << ndims << ", size = " << npts * ndims * sizeof(T) << "B" + << std::endl; + + reader.seekg(start_id * ndims * sizeof(T) + 2 * sizeof(uint32_t), std::ios::beg); + T *data_T = new T[npts * ndims]; + reader.read((char *)data_T, sizeof(T) * npts * ndims); + std::cout << "Finished reading part of the bin file." << std::endl; + reader.close(); + data = aligned_malloc(npts * ndims, ALIGNMENT); +#pragma omp parallel for schedule(dynamic, 32768) + for (int64_t i = 0; i < (int64_t)npts; i++) + { + for (int64_t j = 0; j < (int64_t)ndims; j++) + { + float cur_val_float = (float)data_T[i * ndims + j]; + std::memcpy((char *)(data + i * ndims + j), (char *)&cur_val_float, sizeof(float)); + } + } + delete[] data_T; + std::cout << "Finished converting part data to float." << std::endl; +} + +template inline void save_bin(const std::string filename, T *data, size_t npts, size_t ndims) +{ + std::ofstream writer; + writer.exceptions(std::ios::failbit | std::ios::badbit); + writer.open(filename, std::ios::binary | std::ios::out); + std::cout << "Writing bin: " << filename << "\n"; + int npts_i32 = (int)npts, ndims_i32 = (int)ndims; + writer.write((char *)&npts_i32, sizeof(int)); + writer.write((char *)&ndims_i32, sizeof(int)); + std::cout << "bin: #pts = " << npts << ", #dims = " << ndims + << ", size = " << npts * ndims * sizeof(T) + 2 * sizeof(int) << "B" << std::endl; + + writer.write((char *)data, npts * ndims * sizeof(T)); + writer.close(); + std::cout << "Finished writing bin" << std::endl; +} + +inline void save_groundtruth_as_one_file(const std::string filename, int32_t *data, float *distances, size_t npts, + size_t ndims) +{ + std::ofstream writer(filename, std::ios::binary | std::ios::out); + int npts_i32 = (int)npts, ndims_i32 = (int)ndims; + writer.write((char *)&npts_i32, sizeof(int)); + writer.write((char *)&ndims_i32, sizeof(int)); + std::cout << "Saving truthset in one file (npts, dim, npts*dim id-matrix, " + "npts*dim dist-matrix) with npts = " + << npts << ", dim = " << ndims << ", size = " << 2 * npts * ndims * sizeof(uint32_t) + 2 * sizeof(int) + << "B" << std::endl; + + writer.write((char *)data, npts * ndims * sizeof(uint32_t)); + writer.write((char *)distances, npts * ndims * sizeof(float)); + writer.close(); + std::cout << "Finished writing truthset" << std::endl; +} + +template +std::vector>> processUnfilteredParts(const std::string &base_file, + size_t &nqueries, size_t &npoints, + size_t &dim, size_t &k, float *query_data, + const diskann::Metric &metric, + std::vector &location_to_tag) +{ + float *base_data = nullptr; + int num_parts = get_num_parts(base_file.c_str()); + std::vector>> res(nqueries); + for (int p = 0; p < num_parts; p++) + { + size_t start_id = p * PARTSIZE; + load_bin_as_float(base_file.c_str(), base_data, npoints, dim, p); + + size_t *closest_points_part = new size_t[nqueries * k]; + float *dist_closest_points_part = new float[nqueries * k]; + + auto part_k = k < npoints ? k : npoints; + exact_knn(dim, part_k, closest_points_part, dist_closest_points_part, npoints, base_data, nqueries, query_data, + metric); + + for (size_t i = 0; i < nqueries; i++) + { + for (size_t j = 0; j < part_k; j++) + { + if (!location_to_tag.empty()) + if (location_to_tag[closest_points_part[i * k + j] + start_id] == 0) + continue; + + res[i].push_back(std::make_pair((uint32_t)(closest_points_part[i * part_k + j] + start_id), + dist_closest_points_part[i * part_k + j])); + } + } + + delete[] closest_points_part; + delete[] dist_closest_points_part; + + diskann::aligned_free(base_data); + } + return res; +}; + +template +int aux_main(const std::string &base_file, const std::string &query_file, const std::string >_file, size_t k, + const diskann::Metric &metric, const std::string &tags_file = std::string("")) +{ + size_t npoints, nqueries, dim; + + float *query_data; + + load_bin_as_float(query_file.c_str(), query_data, nqueries, dim, 0); + if (nqueries > PARTSIZE) + std::cerr << "WARNING: #Queries provided (" << nqueries << ") is greater than " << PARTSIZE + << ". Computing GT only for the first " << PARTSIZE << " queries." << std::endl; + + // load tags + const bool tags_enabled = tags_file.empty() ? false : true; + std::vector location_to_tag = diskann::loadTags(tags_file, base_file); + + int *closest_points = new int[nqueries * k]; + float *dist_closest_points = new float[nqueries * k]; + + std::vector>> results = + processUnfilteredParts(base_file, nqueries, npoints, dim, k, query_data, metric, location_to_tag); + + for (size_t i = 0; i < nqueries; i++) + { + std::vector> &cur_res = results[i]; + std::sort(cur_res.begin(), cur_res.end(), custom_dist); + size_t j = 0; + for (auto iter : cur_res) + { + if (j == k) + break; + if (tags_enabled) + { + std::uint32_t index_with_tag = location_to_tag[iter.first]; + closest_points[i * k + j] = (int32_t)index_with_tag; + } + else + { + closest_points[i * k + j] = (int32_t)iter.first; + } + + if (metric == diskann::Metric::INNER_PRODUCT) + dist_closest_points[i * k + j] = -iter.second; + else + dist_closest_points[i * k + j] = iter.second; + + ++j; + } + if (j < k) + std::cout << "WARNING: found less than k GT entries for query " << i << std::endl; + } + + save_groundtruth_as_one_file(gt_file, closest_points, dist_closest_points, nqueries, k); + delete[] closest_points; + delete[] dist_closest_points; + diskann::aligned_free(query_data); + + return 0; +} + +void load_truthset(const std::string &bin_file, uint32_t *&ids, float *&dists, size_t &npts, size_t &dim) +{ + size_t read_blk_size = 64 * 1024 * 1024; + cached_ifstream reader(bin_file, read_blk_size); + diskann::cout << "Reading truthset file " << bin_file.c_str() << " ..." << std::endl; + size_t actual_file_size = reader.get_file_size(); + + int npts_i32, dim_i32; + reader.read((char *)&npts_i32, sizeof(int)); + reader.read((char *)&dim_i32, sizeof(int)); + npts = (uint32_t)npts_i32; + dim = (uint32_t)dim_i32; + + diskann::cout << "Metadata: #pts = " << npts << ", #dims = " << dim << "... " << std::endl; + + int truthset_type = -1; // 1 means truthset has ids and distances, 2 means + // only ids, -1 is error + size_t expected_file_size_with_dists = 2 * npts * dim * sizeof(uint32_t) + 2 * sizeof(uint32_t); + + if (actual_file_size == expected_file_size_with_dists) + truthset_type = 1; + + size_t expected_file_size_just_ids = npts * dim * sizeof(uint32_t) + 2 * sizeof(uint32_t); + + if (actual_file_size == expected_file_size_just_ids) + truthset_type = 2; + + if (truthset_type == -1) + { + std::stringstream stream; + stream << "Error. File size mismatch. File should have bin format, with " + "npts followed by ngt followed by npts*ngt ids and optionally " + "followed by npts*ngt distance values; actual size: " + << actual_file_size << ", expected: " << expected_file_size_with_dists << " or " + << expected_file_size_just_ids; + diskann::cout << stream.str(); + throw diskann::ANNException(stream.str(), -1, __FUNCSIG__, __FILE__, __LINE__); + } + + ids = new uint32_t[npts * dim]; + reader.read((char *)ids, npts * dim * sizeof(uint32_t)); + + if (truthset_type == 1) + { + dists = new float[npts * dim]; + reader.read((char *)dists, npts * dim * sizeof(float)); + } +} + +int main(int argc, char **argv) +{ + std::string data_type, dist_fn, base_file, query_file, gt_file, tags_file; + uint64_t K; + + try + { + po::options_description desc{"Arguments"}; + + desc.add_options()("help,h", "Print information on arguments"); + + desc.add_options()("data_type", po::value(&data_type)->required(), "data type "); + desc.add_options()("dist_fn", po::value(&dist_fn)->required(), + "distance function "); + desc.add_options()("base_file", po::value(&base_file)->required(), + "File containing the base vectors in binary format"); + desc.add_options()("query_file", po::value(&query_file)->required(), + "File containing the query vectors in binary format"); + desc.add_options()("gt_file", po::value(>_file)->required(), + "File name for the writing ground truth in binary " + "format, please don' append .bin at end if " + "no filter_label or filter_label_file is provided it " + "will save the file with '.bin' at end." + "else it will save the file as filename_label.bin"); + desc.add_options()("K", po::value(&K)->required(), + "Number of ground truth nearest neighbors to compute"); + desc.add_options()("tags_file", po::value(&tags_file)->default_value(std::string()), + "File containing the tags in binary format"); + + po::variables_map vm; + po::store(po::parse_command_line(argc, argv, desc), vm); + if (vm.count("help")) + { + std::cout << desc; + return 0; + } + po::notify(vm); + } + catch (const std::exception &ex) + { + std::cerr << ex.what() << '\n'; + return -1; + } + + if (data_type != std::string("float") && data_type != std::string("int8") && data_type != std::string("uint8")) + { + std::cout << "Unsupported type. float, int8 and uint8 types are supported." << std::endl; + return -1; + } + + diskann::Metric metric; + if (dist_fn == std::string("l2")) + { + metric = diskann::Metric::L2; + } + else if (dist_fn == std::string("mips")) + { + metric = diskann::Metric::INNER_PRODUCT; + } + else if (dist_fn == std::string("cosine")) + { + metric = diskann::Metric::COSINE; + } + else + { + std::cerr << "Unsupported distance function. Use l2/mips/cosine." << std::endl; + return -1; + } + + try + { + if (data_type == std::string("float")) + aux_main(base_file, query_file, gt_file, K, metric, tags_file); + if (data_type == std::string("int8")) + aux_main(base_file, query_file, gt_file, K, metric, tags_file); + if (data_type == std::string("uint8")) + aux_main(base_file, query_file, gt_file, K, metric, tags_file); + } + catch (const std::exception &e) + { + std::cout << std::string(e.what()) << std::endl; + diskann::cerr << "Compute GT failed." << std::endl; + return -1; + } +} diff --git a/packages/leann-backend-diskann/third_party/DiskANN/apps/utils/compute_groundtruth_for_filters.cpp b/packages/leann-backend-diskann/third_party/DiskANN/apps/utils/compute_groundtruth_for_filters.cpp new file mode 100644 index 0000000..52e5864 --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/apps/utils/compute_groundtruth_for_filters.cpp @@ -0,0 +1,919 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT license. + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef _WINDOWS +#include +#else +#include +#endif + +#include "filter_utils.h" +#include "utils.h" + +// WORKS FOR UPTO 2 BILLION POINTS (as we use INT INSTEAD OF UNSIGNED) + +#define PARTSIZE 10000000 +#define ALIGNMENT 512 + +// custom types (for readability) +typedef tsl::robin_set label_set; +typedef std::string path; + +namespace po = boost::program_options; + +template T div_round_up(const T numerator, const T denominator) +{ + return (numerator % denominator == 0) ? (numerator / denominator) : 1 + (numerator / denominator); +} + +using pairIF = std::pair; +struct cmpmaxstruct +{ + bool operator()(const pairIF &l, const pairIF &r) + { + return l.second < r.second; + }; +}; + +using maxPQIFCS = std::priority_queue, cmpmaxstruct>; + +template T *aligned_malloc(const size_t n, const size_t alignment) +{ +#ifdef _WINDOWS + return (T *)_aligned_malloc(sizeof(T) * n, alignment); +#else + return static_cast(aligned_alloc(alignment, sizeof(T) * n)); +#endif +} + +inline bool custom_dist(const std::pair &a, const std::pair &b) +{ + return a.second < b.second; +} + +void compute_l2sq(float *const points_l2sq, const float *const matrix, const int64_t num_points, const uint64_t dim) +{ + assert(points_l2sq != NULL); +#pragma omp parallel for schedule(static, 65536) + for (int64_t d = 0; d < num_points; ++d) + points_l2sq[d] = cblas_sdot((int64_t)dim, matrix + (ptrdiff_t)d * (ptrdiff_t)dim, 1, + matrix + (ptrdiff_t)d * (ptrdiff_t)dim, 1); +} + +void distsq_to_points(const size_t dim, + float *dist_matrix, // Col Major, cols are queries, rows are points + size_t npoints, const float *const points, + const float *const points_l2sq, // points in Col major + size_t nqueries, const float *const queries, + const float *const queries_l2sq, // queries in Col major + float *ones_vec = NULL) // Scratchspace of num_data size and init to 1.0 +{ + bool ones_vec_alloc = false; + if (ones_vec == NULL) + { + ones_vec = new float[nqueries > npoints ? nqueries : npoints]; + std::fill_n(ones_vec, nqueries > npoints ? nqueries : npoints, (float)1.0); + ones_vec_alloc = true; + } + cblas_sgemm(CblasColMajor, CblasTrans, CblasNoTrans, npoints, nqueries, dim, (float)-2.0, points, dim, queries, dim, + (float)0.0, dist_matrix, npoints); + cblas_sgemm(CblasColMajor, CblasNoTrans, CblasTrans, npoints, nqueries, 1, (float)1.0, points_l2sq, npoints, + ones_vec, nqueries, (float)1.0, dist_matrix, npoints); + cblas_sgemm(CblasColMajor, CblasNoTrans, CblasTrans, npoints, nqueries, 1, (float)1.0, ones_vec, npoints, + queries_l2sq, nqueries, (float)1.0, dist_matrix, npoints); + if (ones_vec_alloc) + delete[] ones_vec; +} + +void inner_prod_to_points(const size_t dim, + float *dist_matrix, // Col Major, cols are queries, rows are points + size_t npoints, const float *const points, size_t nqueries, const float *const queries, + float *ones_vec = NULL) // Scratchspace of num_data size and init to 1.0 +{ + bool ones_vec_alloc = false; + if (ones_vec == NULL) + { + ones_vec = new float[nqueries > npoints ? nqueries : npoints]; + std::fill_n(ones_vec, nqueries > npoints ? nqueries : npoints, (float)1.0); + ones_vec_alloc = true; + } + cblas_sgemm(CblasColMajor, CblasTrans, CblasNoTrans, npoints, nqueries, dim, (float)-1.0, points, dim, queries, dim, + (float)0.0, dist_matrix, npoints); + + if (ones_vec_alloc) + delete[] ones_vec; +} + +void exact_knn(const size_t dim, const size_t k, + size_t *const closest_points, // k * num_queries preallocated, col + // major, queries columns + float *const dist_closest_points, // k * num_queries + // preallocated, Dist to + // corresponding closes_points + size_t npoints, + float *points_in, // points in Col major + size_t nqueries, float *queries_in, + diskann::Metric metric = diskann::Metric::L2) // queries in Col major +{ + float *points_l2sq = new float[npoints]; + float *queries_l2sq = new float[nqueries]; + compute_l2sq(points_l2sq, points_in, npoints, dim); + compute_l2sq(queries_l2sq, queries_in, nqueries, dim); + + float *points = points_in; + float *queries = queries_in; + + if (metric == diskann::Metric::COSINE) + { // we convert cosine distance as + // normalized L2 distnace + points = new float[npoints * dim]; + queries = new float[nqueries * dim]; +#pragma omp parallel for schedule(static, 4096) + for (int64_t i = 0; i < (int64_t)npoints; i++) + { + float norm = std::sqrt(points_l2sq[i]); + if (norm == 0) + { + norm = std::numeric_limits::epsilon(); + } + for (uint32_t j = 0; j < dim; j++) + { + points[i * dim + j] = points_in[i * dim + j] / norm; + } + } + +#pragma omp parallel for schedule(static, 4096) + for (int64_t i = 0; i < (int64_t)nqueries; i++) + { + float norm = std::sqrt(queries_l2sq[i]); + if (norm == 0) + { + norm = std::numeric_limits::epsilon(); + } + for (uint32_t j = 0; j < dim; j++) + { + queries[i * dim + j] = queries_in[i * dim + j] / norm; + } + } + // recalculate norms after normalizing, they should all be one. + compute_l2sq(points_l2sq, points, npoints, dim); + compute_l2sq(queries_l2sq, queries, nqueries, dim); + } + + std::cout << "Going to compute " << k << " NNs for " << nqueries << " queries over " << npoints << " points in " + << dim << " dimensions using"; + if (metric == diskann::Metric::INNER_PRODUCT) + std::cout << " MIPS "; + else if (metric == diskann::Metric::COSINE) + std::cout << " Cosine "; + else + std::cout << " L2 "; + std::cout << "distance fn. " << std::endl; + + size_t q_batch_size = (1 << 9); + float *dist_matrix = new float[(size_t)q_batch_size * (size_t)npoints]; + + for (uint64_t b = 0; b < div_round_up(nqueries, q_batch_size); ++b) + { + int64_t q_b = b * q_batch_size; + int64_t q_e = ((b + 1) * q_batch_size > nqueries) ? nqueries : (b + 1) * q_batch_size; + + if (metric == diskann::Metric::L2 || metric == diskann::Metric::COSINE) + { + distsq_to_points(dim, dist_matrix, npoints, points, points_l2sq, q_e - q_b, + queries + (ptrdiff_t)q_b * (ptrdiff_t)dim, queries_l2sq + q_b); + } + else + { + inner_prod_to_points(dim, dist_matrix, npoints, points, q_e - q_b, + queries + (ptrdiff_t)q_b * (ptrdiff_t)dim); + } + std::cout << "Computed distances for queries: [" << q_b << "," << q_e << ")" << std::endl; + +#pragma omp parallel for schedule(dynamic, 16) + for (long long q = q_b; q < q_e; q++) + { + maxPQIFCS point_dist; + for (size_t p = 0; p < k; p++) + point_dist.emplace(p, dist_matrix[(ptrdiff_t)p + (ptrdiff_t)(q - q_b) * (ptrdiff_t)npoints]); + for (size_t p = k; p < npoints; p++) + { + if (point_dist.top().second > dist_matrix[(ptrdiff_t)p + (ptrdiff_t)(q - q_b) * (ptrdiff_t)npoints]) + point_dist.emplace(p, dist_matrix[(ptrdiff_t)p + (ptrdiff_t)(q - q_b) * (ptrdiff_t)npoints]); + if (point_dist.size() > k) + point_dist.pop(); + } + for (ptrdiff_t l = 0; l < (ptrdiff_t)k; ++l) + { + closest_points[(ptrdiff_t)(k - 1 - l) + (ptrdiff_t)q * (ptrdiff_t)k] = point_dist.top().first; + dist_closest_points[(ptrdiff_t)(k - 1 - l) + (ptrdiff_t)q * (ptrdiff_t)k] = point_dist.top().second; + point_dist.pop(); + } + assert(std::is_sorted(dist_closest_points + (ptrdiff_t)q * (ptrdiff_t)k, + dist_closest_points + (ptrdiff_t)(q + 1) * (ptrdiff_t)k)); + } + std::cout << "Computed exact k-NN for queries: [" << q_b << "," << q_e << ")" << std::endl; + } + + delete[] dist_matrix; + + delete[] points_l2sq; + delete[] queries_l2sq; + + if (metric == diskann::Metric::COSINE) + { + delete[] points; + delete[] queries; + } +} + +template inline int get_num_parts(const char *filename) +{ + std::ifstream reader; + reader.exceptions(std::ios::failbit | std::ios::badbit); + reader.open(filename, std::ios::binary); + std::cout << "Reading bin file " << filename << " ...\n"; + int npts_i32, ndims_i32; + reader.read((char *)&npts_i32, sizeof(int)); + reader.read((char *)&ndims_i32, sizeof(int)); + std::cout << "#pts = " << npts_i32 << ", #dims = " << ndims_i32 << std::endl; + reader.close(); + int num_parts = (npts_i32 % PARTSIZE) == 0 ? npts_i32 / PARTSIZE : (uint32_t)std::floor(npts_i32 / PARTSIZE) + 1; + std::cout << "Number of parts: " << num_parts << std::endl; + return num_parts; +} + +template +inline void load_bin_as_float(const char *filename, float *&data, size_t &npts_u64, size_t &ndims_u64, int part_num) +{ + std::ifstream reader; + reader.exceptions(std::ios::failbit | std::ios::badbit); + reader.open(filename, std::ios::binary); + std::cout << "Reading bin file " << filename << " ...\n"; + int npts_i32, ndims_i32; + reader.read((char *)&npts_i32, sizeof(int)); + reader.read((char *)&ndims_i32, sizeof(int)); + uint64_t start_id = part_num * PARTSIZE; + uint64_t end_id = (std::min)(start_id + PARTSIZE, (uint64_t)npts_i32); + npts_u64 = end_id - start_id; + ndims_u64 = (uint64_t)ndims_i32; + std::cout << "#pts in part = " << npts_u64 << ", #dims = " << ndims_u64 + << ", size = " << npts_u64 * ndims_u64 * sizeof(T) << "B" << std::endl; + + reader.seekg(start_id * ndims_u64 * sizeof(T) + 2 * sizeof(uint32_t), std::ios::beg); + T *data_T = new T[npts_u64 * ndims_u64]; + reader.read((char *)data_T, sizeof(T) * npts_u64 * ndims_u64); + std::cout << "Finished reading part of the bin file." << std::endl; + reader.close(); + data = aligned_malloc(npts_u64 * ndims_u64, ALIGNMENT); +#pragma omp parallel for schedule(dynamic, 32768) + for (int64_t i = 0; i < (int64_t)npts_u64; i++) + { + for (int64_t j = 0; j < (int64_t)ndims_u64; j++) + { + float cur_val_float = (float)data_T[i * ndims_u64 + j]; + std::memcpy((char *)(data + i * ndims_u64 + j), (char *)&cur_val_float, sizeof(float)); + } + } + delete[] data_T; + std::cout << "Finished converting part data to float." << std::endl; +} + +template +inline std::vector load_filtered_bin_as_float(const char *filename, float *&data, size_t &npts, size_t &ndims, + int part_num, const char *label_file, + const std::string &filter_label, + const std::string &universal_label, size_t &npoints_filt, + std::vector> &pts_to_labels) +{ + std::ifstream reader(filename, std::ios::binary); + if (reader.fail()) + { + throw diskann::ANNException(std::string("Failed to open file ") + filename, -1); + } + + std::cout << "Reading bin file " << filename << " ...\n"; + int npts_i32, ndims_i32; + std::vector rev_map; + reader.read((char *)&npts_i32, sizeof(int)); + reader.read((char *)&ndims_i32, sizeof(int)); + uint64_t start_id = part_num * PARTSIZE; + uint64_t end_id = (std::min)(start_id + PARTSIZE, (uint64_t)npts_i32); + npts = end_id - start_id; + ndims = (uint32_t)ndims_i32; + uint64_t nptsuint64_t = (uint64_t)npts; + uint64_t ndimsuint64_t = (uint64_t)ndims; + npoints_filt = 0; + std::cout << "#pts in part = " << npts << ", #dims = " << ndims + << ", size = " << nptsuint64_t * ndimsuint64_t * sizeof(T) << "B" << std::endl; + std::cout << "start and end ids: " << start_id << ", " << end_id << std::endl; + reader.seekg(start_id * ndims * sizeof(T) + 2 * sizeof(uint32_t), std::ios::beg); + + T *data_T = new T[nptsuint64_t * ndimsuint64_t]; + reader.read((char *)data_T, sizeof(T) * nptsuint64_t * ndimsuint64_t); + std::cout << "Finished reading part of the bin file." << std::endl; + reader.close(); + + data = aligned_malloc(nptsuint64_t * ndimsuint64_t, ALIGNMENT); + + for (int64_t i = 0; i < (int64_t)nptsuint64_t; i++) + { + if (std::find(pts_to_labels[start_id + i].begin(), pts_to_labels[start_id + i].end(), filter_label) != + pts_to_labels[start_id + i].end() || + std::find(pts_to_labels[start_id + i].begin(), pts_to_labels[start_id + i].end(), universal_label) != + pts_to_labels[start_id + i].end()) + { + rev_map.push_back(start_id + i); + for (int64_t j = 0; j < (int64_t)ndimsuint64_t; j++) + { + float cur_val_float = (float)data_T[i * ndimsuint64_t + j]; + std::memcpy((char *)(data + npoints_filt * ndimsuint64_t + j), (char *)&cur_val_float, sizeof(float)); + } + npoints_filt++; + } + } + delete[] data_T; + std::cout << "Finished converting part data to float.. identified " << npoints_filt + << " points matching the filter." << std::endl; + return rev_map; +} + +template inline void save_bin(const std::string filename, T *data, size_t npts, size_t ndims) +{ + std::ofstream writer; + writer.exceptions(std::ios::failbit | std::ios::badbit); + writer.open(filename, std::ios::binary | std::ios::out); + std::cout << "Writing bin: " << filename << "\n"; + int npts_i32 = (int)npts, ndims_i32 = (int)ndims; + writer.write((char *)&npts_i32, sizeof(int)); + writer.write((char *)&ndims_i32, sizeof(int)); + std::cout << "bin: #pts = " << npts << ", #dims = " << ndims + << ", size = " << npts * ndims * sizeof(T) + 2 * sizeof(int) << "B" << std::endl; + + writer.write((char *)data, npts * ndims * sizeof(T)); + writer.close(); + std::cout << "Finished writing bin" << std::endl; +} + +inline void save_groundtruth_as_one_file(const std::string filename, int32_t *data, float *distances, size_t npts, + size_t ndims) +{ + std::ofstream writer(filename, std::ios::binary | std::ios::out); + int npts_i32 = (int)npts, ndims_i32 = (int)ndims; + writer.write((char *)&npts_i32, sizeof(int)); + writer.write((char *)&ndims_i32, sizeof(int)); + std::cout << "Saving truthset in one file (npts, dim, npts*dim id-matrix, " + "npts*dim dist-matrix) with npts = " + << npts << ", dim = " << ndims << ", size = " << 2 * npts * ndims * sizeof(uint32_t) + 2 * sizeof(int) + << "B" << std::endl; + + writer.write((char *)data, npts * ndims * sizeof(uint32_t)); + writer.write((char *)distances, npts * ndims * sizeof(float)); + writer.close(); + std::cout << "Finished writing truthset" << std::endl; +} + +inline void parse_label_file_into_vec(size_t &line_cnt, const std::string &map_file, + std::vector> &pts_to_labels) +{ + std::ifstream infile(map_file); + std::string line, token; + std::set labels; + infile.clear(); + infile.seekg(0, std::ios::beg); + while (std::getline(infile, line)) + { + std::istringstream iss(line); + std::vector lbls(0); + + getline(iss, token, '\t'); + std::istringstream new_iss(token); + while (getline(new_iss, token, ',')) + { + token.erase(std::remove(token.begin(), token.end(), '\n'), token.end()); + token.erase(std::remove(token.begin(), token.end(), '\r'), token.end()); + lbls.push_back(token); + labels.insert(token); + } + std::sort(lbls.begin(), lbls.end()); + pts_to_labels.push_back(lbls); + } + std::cout << "Identified " << labels.size() << " distinct label(s), and populated labels for " + << pts_to_labels.size() << " points" << std::endl; +} + +template +std::vector>> processUnfilteredParts(const std::string &base_file, + size_t &nqueries, size_t &npoints, + size_t &dim, size_t &k, float *query_data, + const diskann::Metric &metric, + std::vector &location_to_tag) +{ + float *base_data = nullptr; + int num_parts = get_num_parts(base_file.c_str()); + std::vector>> res(nqueries); + for (int p = 0; p < num_parts; p++) + { + size_t start_id = p * PARTSIZE; + load_bin_as_float(base_file.c_str(), base_data, npoints, dim, p); + + size_t *closest_points_part = new size_t[nqueries * k]; + float *dist_closest_points_part = new float[nqueries * k]; + + auto part_k = k < npoints ? k : npoints; + exact_knn(dim, part_k, closest_points_part, dist_closest_points_part, npoints, base_data, nqueries, query_data, + metric); + + for (size_t i = 0; i < nqueries; i++) + { + for (uint64_t j = 0; j < part_k; j++) + { + if (!location_to_tag.empty()) + if (location_to_tag[closest_points_part[i * k + j] + start_id] == 0) + continue; + + res[i].push_back(std::make_pair((uint32_t)(closest_points_part[i * part_k + j] + start_id), + dist_closest_points_part[i * part_k + j])); + } + } + + delete[] closest_points_part; + delete[] dist_closest_points_part; + + diskann::aligned_free(base_data); + } + return res; +}; + +template +std::vector>> processFilteredParts( + const std::string &base_file, const std::string &label_file, const std::string &filter_label, + const std::string &universal_label, size_t &nqueries, size_t &npoints, size_t &dim, size_t &k, float *query_data, + const diskann::Metric &metric, std::vector &location_to_tag) +{ + size_t npoints_filt = 0; + float *base_data = nullptr; + std::vector>> res(nqueries); + int num_parts = get_num_parts(base_file.c_str()); + + std::vector> pts_to_labels; + if (filter_label != "") + parse_label_file_into_vec(npoints, label_file, pts_to_labels); + + for (int p = 0; p < num_parts; p++) + { + size_t start_id = p * PARTSIZE; + std::vector rev_map; + if (filter_label != "") + rev_map = load_filtered_bin_as_float(base_file.c_str(), base_data, npoints, dim, p, label_file.c_str(), + filter_label, universal_label, npoints_filt, pts_to_labels); + size_t *closest_points_part = new size_t[nqueries * k]; + float *dist_closest_points_part = new float[nqueries * k]; + + auto part_k = k < npoints_filt ? k : npoints_filt; + if (npoints_filt > 0) + { + exact_knn(dim, part_k, closest_points_part, dist_closest_points_part, npoints_filt, base_data, nqueries, + query_data, metric); + } + + for (size_t i = 0; i < nqueries; i++) + { + for (uint64_t j = 0; j < part_k; j++) + { + if (!location_to_tag.empty()) + if (location_to_tag[closest_points_part[i * k + j] + start_id] == 0) + continue; + + res[i].push_back(std::make_pair((uint32_t)(rev_map[closest_points_part[i * part_k + j]]), + dist_closest_points_part[i * part_k + j])); + } + } + + delete[] closest_points_part; + delete[] dist_closest_points_part; + + diskann::aligned_free(base_data); + } + return res; +}; + +template +int aux_main(const std::string &base_file, const std::string &label_file, const std::string &query_file, + const std::string >_file, size_t k, const std::string &universal_label, const diskann::Metric &metric, + const std::string &filter_label, const std::string &tags_file = std::string("")) +{ + size_t npoints, nqueries, dim; + + float *query_data = nullptr; + + load_bin_as_float(query_file.c_str(), query_data, nqueries, dim, 0); + if (nqueries > PARTSIZE) + std::cerr << "WARNING: #Queries provided (" << nqueries << ") is greater than " << PARTSIZE + << ". Computing GT only for the first " << PARTSIZE << " queries." << std::endl; + + // load tags + const bool tags_enabled = tags_file.empty() ? false : true; + std::vector location_to_tag = diskann::loadTags(tags_file, base_file); + + int *closest_points = new int[nqueries * k]; + float *dist_closest_points = new float[nqueries * k]; + + std::vector>> results; + if (filter_label == "") + { + results = processUnfilteredParts(base_file, nqueries, npoints, dim, k, query_data, metric, location_to_tag); + } + else + { + results = processFilteredParts(base_file, label_file, filter_label, universal_label, nqueries, npoints, dim, + k, query_data, metric, location_to_tag); + } + + for (size_t i = 0; i < nqueries; i++) + { + std::vector> &cur_res = results[i]; + std::sort(cur_res.begin(), cur_res.end(), custom_dist); + size_t j = 0; + for (auto iter : cur_res) + { + if (j == k) + break; + if (tags_enabled) + { + std::uint32_t index_with_tag = location_to_tag[iter.first]; + closest_points[i * k + j] = (int32_t)index_with_tag; + } + else + { + closest_points[i * k + j] = (int32_t)iter.first; + } + + if (metric == diskann::Metric::INNER_PRODUCT) + dist_closest_points[i * k + j] = -iter.second; + else + dist_closest_points[i * k + j] = iter.second; + + ++j; + } + if (j < k) + std::cout << "WARNING: found less than k GT entries for query " << i << std::endl; + } + + save_groundtruth_as_one_file(gt_file, closest_points, dist_closest_points, nqueries, k); + delete[] closest_points; + delete[] dist_closest_points; + diskann::aligned_free(query_data); + + return 0; +} + +void load_truthset(const std::string &bin_file, uint32_t *&ids, float *&dists, size_t &npts, size_t &dim) +{ + size_t read_blk_size = 64 * 1024 * 1024; + cached_ifstream reader(bin_file, read_blk_size); + diskann::cout << "Reading truthset file " << bin_file.c_str() << " ..." << std::endl; + size_t actual_file_size = reader.get_file_size(); + + int npts_i32, dim_i32; + reader.read((char *)&npts_i32, sizeof(int)); + reader.read((char *)&dim_i32, sizeof(int)); + npts = (uint32_t)npts_i32; + dim = (uint32_t)dim_i32; + + diskann::cout << "Metadata: #pts = " << npts << ", #dims = " << dim << "... " << std::endl; + + int truthset_type = -1; // 1 means truthset has ids and distances, 2 means + // only ids, -1 is error + size_t expected_file_size_with_dists = 2 * npts * dim * sizeof(uint32_t) + 2 * sizeof(uint32_t); + + if (actual_file_size == expected_file_size_with_dists) + truthset_type = 1; + + size_t expected_file_size_just_ids = npts * dim * sizeof(uint32_t) + 2 * sizeof(uint32_t); + + if (actual_file_size == expected_file_size_just_ids) + truthset_type = 2; + + if (truthset_type == -1) + { + std::stringstream stream; + stream << "Error. File size mismatch. File should have bin format, with " + "npts followed by ngt followed by npts*ngt ids and optionally " + "followed by npts*ngt distance values; actual size: " + << actual_file_size << ", expected: " << expected_file_size_with_dists << " or " + << expected_file_size_just_ids; + diskann::cout << stream.str(); + throw diskann::ANNException(stream.str(), -1, __FUNCSIG__, __FILE__, __LINE__); + } + + ids = new uint32_t[npts * dim]; + reader.read((char *)ids, npts * dim * sizeof(uint32_t)); + + if (truthset_type == 1) + { + dists = new float[npts * dim]; + reader.read((char *)dists, npts * dim * sizeof(float)); + } +} + +int main(int argc, char **argv) +{ + std::string data_type, dist_fn, base_file, query_file, gt_file, tags_file, label_file, filter_label, + universal_label, filter_label_file; + uint64_t K; + + try + { + po::options_description desc{"Arguments"}; + + desc.add_options()("help,h", "Print information on arguments"); + + desc.add_options()("data_type", po::value(&data_type)->required(), "data type "); + desc.add_options()("dist_fn", po::value(&dist_fn)->required(), "distance function "); + desc.add_options()("base_file", po::value(&base_file)->required(), + "File containing the base vectors in binary format"); + desc.add_options()("query_file", po::value(&query_file)->required(), + "File containing the query vectors in binary format"); + desc.add_options()("label_file", po::value(&label_file)->default_value(""), + "Input labels file in txt format if present"); + desc.add_options()("filter_label", po::value(&filter_label)->default_value(""), + "Input filter label if doing filtered groundtruth"); + desc.add_options()("universal_label", po::value(&universal_label)->default_value(""), + "Universal label, if using it, only in conjunction with label_file"); + desc.add_options()("gt_file", po::value(>_file)->required(), + "File name for the writing ground truth in binary " + "format, please don' append .bin at end if " + "no filter_label or filter_label_file is provided it " + "will save the file with '.bin' at end." + "else it will save the file as filename_label.bin"); + desc.add_options()("K", po::value(&K)->required(), + "Number of ground truth nearest neighbors to compute"); + desc.add_options()("tags_file", po::value(&tags_file)->default_value(std::string()), + "File containing the tags in binary format"); + desc.add_options()("filter_label_file", + po::value(&filter_label_file)->default_value(std::string("")), + "Filter file for Queries for Filtered Search "); + + po::variables_map vm; + po::store(po::parse_command_line(argc, argv, desc), vm); + if (vm.count("help")) + { + std::cout << desc; + return 0; + } + po::notify(vm); + } + catch (const std::exception &ex) + { + std::cerr << ex.what() << '\n'; + return -1; + } + + if (data_type != std::string("float") && data_type != std::string("int8") && data_type != std::string("uint8")) + { + std::cout << "Unsupported type. float, int8 and uint8 types are supported." << std::endl; + return -1; + } + + if (filter_label != "" && filter_label_file != "") + { + std::cerr << "Only one of filter_label and query_filters_file should be provided" << std::endl; + return -1; + } + + diskann::Metric metric; + if (dist_fn == std::string("l2")) + { + metric = diskann::Metric::L2; + } + else if (dist_fn == std::string("mips")) + { + metric = diskann::Metric::INNER_PRODUCT; + } + else if (dist_fn == std::string("cosine")) + { + metric = diskann::Metric::COSINE; + } + else + { + std::cerr << "Unsupported distance function. Use l2/mips/cosine." << std::endl; + return -1; + } + + std::vector filter_labels; + if (filter_label != "") + { + filter_labels.push_back(filter_label); + } + else if (filter_label_file != "") + { + filter_labels = read_file_to_vector_of_strings(filter_label_file, false); + } + + // only when there is no filter label or 1 filter label for all queries + if (filter_labels.size() == 1) + { + try + { + if (data_type == std::string("float")) + aux_main(base_file, label_file, query_file, gt_file, K, universal_label, metric, + filter_labels[0], tags_file); + if (data_type == std::string("int8")) + aux_main(base_file, label_file, query_file, gt_file, K, universal_label, metric, + filter_labels[0], tags_file); + if (data_type == std::string("uint8")) + aux_main(base_file, label_file, query_file, gt_file, K, universal_label, metric, + filter_labels[0], tags_file); + } + catch (const std::exception &e) + { + std::cout << std::string(e.what()) << std::endl; + diskann::cerr << "Compute GT failed." << std::endl; + return -1; + } + } + else + { // Each query has its own filter label + // Split up data and query bins into label specific ones + tsl::robin_map labels_to_number_of_points; + tsl::robin_map labels_to_number_of_queries; + + label_set all_labels; + for (size_t i = 0; i < filter_labels.size(); i++) + { + std::string label = filter_labels[i]; + all_labels.insert(label); + + if (labels_to_number_of_queries.find(label) == labels_to_number_of_queries.end()) + { + labels_to_number_of_queries[label] = 0; + } + labels_to_number_of_queries[label] += 1; + } + + size_t npoints; + std::vector> point_to_labels; + parse_label_file_into_vec(npoints, label_file, point_to_labels); + std::vector point_ids_to_labels(point_to_labels.size()); + std::vector query_ids_to_labels(filter_labels.size()); + + for (size_t i = 0; i < point_to_labels.size(); i++) + { + for (size_t j = 0; j < point_to_labels[i].size(); j++) + { + std::string label = point_to_labels[i][j]; + if (all_labels.find(label) != all_labels.end()) + { + point_ids_to_labels[i].insert(point_to_labels[i][j]); + if (labels_to_number_of_points.find(label) == labels_to_number_of_points.end()) + { + labels_to_number_of_points[label] = 0; + } + labels_to_number_of_points[label] += 1; + } + } + } + + for (size_t i = 0; i < filter_labels.size(); i++) + { + query_ids_to_labels[i].insert(filter_labels[i]); + } + + tsl::robin_map> label_id_to_orig_id; + tsl::robin_map> label_query_id_to_orig_id; + + if (data_type == std::string("float")) + { + label_id_to_orig_id = diskann::generate_label_specific_vector_files_compat( + base_file, labels_to_number_of_points, point_ids_to_labels, all_labels); + + label_query_id_to_orig_id = diskann::generate_label_specific_vector_files_compat( + query_file, labels_to_number_of_queries, query_ids_to_labels, + all_labels); // query_filters acts like query_ids_to_labels + } + else if (data_type == std::string("int8")) + { + label_id_to_orig_id = diskann::generate_label_specific_vector_files_compat( + base_file, labels_to_number_of_points, point_ids_to_labels, all_labels); + + label_query_id_to_orig_id = diskann::generate_label_specific_vector_files_compat( + query_file, labels_to_number_of_queries, query_ids_to_labels, + all_labels); // query_filters acts like query_ids_to_labels + } + else if (data_type == std::string("uint8")) + { + label_id_to_orig_id = diskann::generate_label_specific_vector_files_compat( + base_file, labels_to_number_of_points, point_ids_to_labels, all_labels); + + label_query_id_to_orig_id = diskann::generate_label_specific_vector_files_compat( + query_file, labels_to_number_of_queries, query_ids_to_labels, + all_labels); // query_filters acts like query_ids_to_labels + } + else + { + diskann::cerr << "Invalid data type" << std::endl; + return -1; + } + + // Generate label specific ground truths + + try + { + for (const auto &label : all_labels) + { + std::string filtered_base_file = base_file + "_" + label; + std::string filtered_query_file = query_file + "_" + label; + std::string filtered_gt_file = gt_file + "_" + label; + if (data_type == std::string("float")) + aux_main(filtered_base_file, "", filtered_query_file, filtered_gt_file, K, "", metric, ""); + if (data_type == std::string("int8")) + aux_main(filtered_base_file, "", filtered_query_file, filtered_gt_file, K, "", metric, ""); + if (data_type == std::string("uint8")) + aux_main(filtered_base_file, "", filtered_query_file, filtered_gt_file, K, "", metric, ""); + } + } + catch (const std::exception &e) + { + std::cout << std::string(e.what()) << std::endl; + diskann::cerr << "Compute GT failed." << std::endl; + return -1; + } + + // Combine the label specific ground truths to produce a single GT file + + uint32_t *gt_ids = nullptr; + float *gt_dists = nullptr; + size_t gt_num, gt_dim; + + std::vector> final_gt_ids; + std::vector> final_gt_dists; + + uint32_t query_num = 0; + for (const auto &lbl : all_labels) + { + query_num += labels_to_number_of_queries[lbl]; + } + + for (uint32_t i = 0; i < query_num; i++) + { + final_gt_ids.push_back(std::vector(K)); + final_gt_dists.push_back(std::vector(K)); + } + + for (const auto &lbl : all_labels) + { + std::string filtered_gt_file = gt_file + "_" + lbl; + load_truthset(filtered_gt_file, gt_ids, gt_dists, gt_num, gt_dim); + + for (uint32_t i = 0; i < labels_to_number_of_queries[lbl]; i++) + { + uint32_t orig_query_id = label_query_id_to_orig_id[lbl][i]; + for (uint64_t j = 0; j < K; j++) + { + final_gt_ids[orig_query_id][j] = label_id_to_orig_id[lbl][gt_ids[i * K + j]]; + final_gt_dists[orig_query_id][j] = gt_dists[i * K + j]; + } + } + } + + int32_t *closest_points = new int32_t[query_num * K]; + float *dist_closest_points = new float[query_num * K]; + + for (uint32_t i = 0; i < query_num; i++) + { + for (uint32_t j = 0; j < K; j++) + { + closest_points[i * K + j] = final_gt_ids[i][j]; + dist_closest_points[i * K + j] = final_gt_dists[i][j]; + } + } + + save_groundtruth_as_one_file(gt_file, closest_points, dist_closest_points, query_num, K); + + // cleanup artifacts + std::cout << "Cleaning up artifacts..." << std::endl; + tsl::robin_set paths_to_clean{gt_file, base_file, query_file}; + clean_up_artifacts(paths_to_clean, all_labels); + } +} diff --git a/packages/leann-backend-diskann/third_party/DiskANN/apps/utils/count_bfs_levels.cpp b/packages/leann-backend-diskann/third_party/DiskANN/apps/utils/count_bfs_levels.cpp new file mode 100644 index 0000000..6dd2d62 --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/apps/utils/count_bfs_levels.cpp @@ -0,0 +1,82 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT license. + +#include +#include +#include +#include +#include +#include +#include +#include + +#ifndef _WINDOWS +#include +#include +#include +#include +#endif + +#include "utils.h" +#include "index.h" +#include "memory_mapper.h" + +namespace po = boost::program_options; + +template void bfs_count(const std::string &index_path, uint32_t data_dims) +{ + using TagT = uint32_t; + using LabelT = uint32_t; + diskann::Index index(diskann::Metric::L2, data_dims, 0, nullptr, nullptr, 0, false, false, false, + false, 0, false); + std::cout << "Index class instantiated" << std::endl; + index.load(index_path.c_str(), 1, 100); + std::cout << "Index loaded" << std::endl; + index.count_nodes_at_bfs_levels(); +} + +int main(int argc, char **argv) +{ + std::string data_type, index_path_prefix; + uint32_t data_dims; + + po::options_description desc{"Arguments"}; + try + { + desc.add_options()("help,h", "Print information on arguments"); + desc.add_options()("data_type", po::value(&data_type)->required(), "data type "); + desc.add_options()("index_path_prefix", po::value(&index_path_prefix)->required(), + "Path prefix to the index"); + desc.add_options()("data_dims", po::value(&data_dims)->required(), "Dimensionality of the data"); + + po::variables_map vm; + po::store(po::parse_command_line(argc, argv, desc), vm); + if (vm.count("help")) + { + std::cout << desc; + return 0; + } + po::notify(vm); + } + catch (const std::exception &ex) + { + std::cerr << ex.what() << '\n'; + return -1; + } + + try + { + if (data_type == std::string("int8")) + bfs_count(index_path_prefix, data_dims); + else if (data_type == std::string("uint8")) + bfs_count(index_path_prefix, data_dims); + if (data_type == std::string("float")) + bfs_count(index_path_prefix, data_dims); + } + catch (std::exception &e) + { + std::cout << std::string(e.what()) << std::endl; + diskann::cerr << "Index BFS failed." << std::endl; + return -1; + } +} diff --git a/packages/leann-backend-diskann/third_party/DiskANN/apps/utils/create_disk_layout.cpp b/packages/leann-backend-diskann/third_party/DiskANN/apps/utils/create_disk_layout.cpp new file mode 100644 index 0000000..f494c12 --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/apps/utils/create_disk_layout.cpp @@ -0,0 +1,48 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT license. + +#include +#include +#include +#include +#include +#include + +#include "utils.h" +#include "disk_utils.h" +#include "cached_io.h" + +template int create_disk_layout(char **argv) +{ + std::string base_file(argv[2]); + std::string vamana_file(argv[3]); + std::string output_file(argv[4]); + diskann::create_disk_layout(base_file, vamana_file, output_file); + return 0; +} + +int main(int argc, char **argv) +{ + if (argc != 5) + { + std::cout << argv[0] + << " data_type data_bin " + "vamana_index_file output_diskann_index_file" + << std::endl; + exit(-1); + } + + int ret_val = -1; + if (std::string(argv[1]) == std::string("float")) + ret_val = create_disk_layout(argv); + else if (std::string(argv[1]) == std::string("int8")) + ret_val = create_disk_layout(argv); + else if (std::string(argv[1]) == std::string("uint8")) + ret_val = create_disk_layout(argv); + else + { + std::cout << "unsupported type. use int8/uint8/float " << std::endl; + ret_val = -2; + } + return ret_val; +} diff --git a/packages/leann-backend-diskann/third_party/DiskANN/apps/utils/float_bin_to_int8.cpp b/packages/leann-backend-diskann/third_party/DiskANN/apps/utils/float_bin_to_int8.cpp new file mode 100644 index 0000000..1982005 --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/apps/utils/float_bin_to_int8.cpp @@ -0,0 +1,63 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT license. + +#include +#include "utils.h" + +void block_convert(std::ofstream &writer, int8_t *write_buf, std::ifstream &reader, float *read_buf, size_t npts, + size_t ndims, float bias, float scale) +{ + reader.read((char *)read_buf, npts * ndims * sizeof(float)); + + for (size_t i = 0; i < npts; i++) + { + for (size_t d = 0; d < ndims; d++) + { + write_buf[d + i * ndims] = (int8_t)((read_buf[d + i * ndims] - bias) * (254.0 / scale)); + } + } + writer.write((char *)write_buf, npts * ndims); +} + +int main(int argc, char **argv) +{ + if (argc != 5) + { + std::cout << "Usage: " << argv[0] << " input_bin output_tsv bias scale" << std::endl; + exit(-1); + } + + std::ifstream reader(argv[1], std::ios::binary); + uint32_t npts_u32; + uint32_t ndims_u32; + reader.read((char *)&npts_u32, sizeof(uint32_t)); + reader.read((char *)&ndims_u32, sizeof(uint32_t)); + size_t npts = npts_u32; + size_t ndims = ndims_u32; + std::cout << "Dataset: #pts = " << npts << ", # dims = " << ndims << std::endl; + + size_t blk_size = 131072; + size_t nblks = ROUND_UP(npts, blk_size) / blk_size; + + std::ofstream writer(argv[2], std::ios::binary); + auto read_buf = new float[blk_size * ndims]; + auto write_buf = new int8_t[blk_size * ndims]; + float bias = (float)atof(argv[3]); + float scale = (float)atof(argv[4]); + + writer.write((char *)(&npts_u32), sizeof(uint32_t)); + writer.write((char *)(&ndims_u32), sizeof(uint32_t)); + + for (size_t i = 0; i < nblks; i++) + { + size_t cblk_size = std::min(npts - i * blk_size, blk_size); + block_convert(writer, write_buf, reader, read_buf, cblk_size, ndims, bias, scale); + std::cout << "Block #" << i << " written" << std::endl; + } + + delete[] read_buf; + delete[] write_buf; + + writer.close(); + reader.close(); +} diff --git a/packages/leann-backend-diskann/third_party/DiskANN/apps/utils/fvecs_to_bin.cpp b/packages/leann-backend-diskann/third_party/DiskANN/apps/utils/fvecs_to_bin.cpp new file mode 100644 index 0000000..873ad3b --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/apps/utils/fvecs_to_bin.cpp @@ -0,0 +1,95 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT license. + +#include +#include "utils.h" + +// Convert float types +void block_convert_float(std::ifstream &reader, std::ofstream &writer, float *read_buf, float *write_buf, size_t npts, + size_t ndims) +{ + reader.read((char *)read_buf, npts * (ndims * sizeof(float) + sizeof(uint32_t))); + for (size_t i = 0; i < npts; i++) + { + memcpy(write_buf + i * ndims, (read_buf + i * (ndims + 1)) + 1, ndims * sizeof(float)); + } + writer.write((char *)write_buf, npts * ndims * sizeof(float)); +} + +// Convert byte types +void block_convert_byte(std::ifstream &reader, std::ofstream &writer, uint8_t *read_buf, uint8_t *write_buf, + size_t npts, size_t ndims) +{ + reader.read((char *)read_buf, npts * (ndims * sizeof(uint8_t) + sizeof(uint32_t))); + for (size_t i = 0; i < npts; i++) + { + memcpy(write_buf + i * ndims, (read_buf + i * (ndims + sizeof(uint32_t))) + sizeof(uint32_t), + ndims * sizeof(uint8_t)); + } + writer.write((char *)write_buf, npts * ndims * sizeof(uint8_t)); +} + +int main(int argc, char **argv) +{ + if (argc != 4) + { + std::cout << argv[0] << " input_vecs output_bin" << std::endl; + exit(-1); + } + + int datasize = sizeof(float); + + if (strcmp(argv[1], "uint8") == 0 || strcmp(argv[1], "int8") == 0) + { + datasize = sizeof(uint8_t); + } + else if (strcmp(argv[1], "float") != 0) + { + std::cout << "Error: type not supported. Use float/int8/uint8" << std::endl; + exit(-1); + } + + std::ifstream reader(argv[2], std::ios::binary | std::ios::ate); + size_t fsize = reader.tellg(); + reader.seekg(0, std::ios::beg); + + uint32_t ndims_u32; + reader.read((char *)&ndims_u32, sizeof(uint32_t)); + reader.seekg(0, std::ios::beg); + size_t ndims = (size_t)ndims_u32; + size_t npts = fsize / ((ndims * datasize) + sizeof(uint32_t)); + std::cout << "Dataset: #pts = " << npts << ", # dims = " << ndims << std::endl; + + size_t blk_size = 131072; + size_t nblks = ROUND_UP(npts, blk_size) / blk_size; + std::cout << "# blks: " << nblks << std::endl; + std::ofstream writer(argv[3], std::ios::binary); + int32_t npts_s32 = (int32_t)npts; + int32_t ndims_s32 = (int32_t)ndims; + writer.write((char *)&npts_s32, sizeof(int32_t)); + writer.write((char *)&ndims_s32, sizeof(int32_t)); + + size_t chunknpts = std::min(npts, blk_size); + uint8_t *read_buf = new uint8_t[chunknpts * ((ndims * datasize) + sizeof(uint32_t))]; + uint8_t *write_buf = new uint8_t[chunknpts * ndims * datasize]; + + for (size_t i = 0; i < nblks; i++) + { + size_t cblk_size = std::min(npts - i * blk_size, blk_size); + if (datasize == sizeof(float)) + { + block_convert_float(reader, writer, (float *)read_buf, (float *)write_buf, cblk_size, ndims); + } + else + { + block_convert_byte(reader, writer, read_buf, write_buf, cblk_size, ndims); + } + std::cout << "Block #" << i << " written" << std::endl; + } + + delete[] read_buf; + delete[] write_buf; + + reader.close(); + writer.close(); +} diff --git a/packages/leann-backend-diskann/third_party/DiskANN/apps/utils/fvecs_to_bvecs.cpp b/packages/leann-backend-diskann/third_party/DiskANN/apps/utils/fvecs_to_bvecs.cpp new file mode 100644 index 0000000..f9c2aa7 --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/apps/utils/fvecs_to_bvecs.cpp @@ -0,0 +1,56 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT license. + +#include +#include "utils.h" + +void block_convert(std::ifstream &reader, std::ofstream &writer, float *read_buf, uint8_t *write_buf, size_t npts, + size_t ndims) +{ + reader.read((char *)read_buf, npts * (ndims * sizeof(float) + sizeof(uint32_t))); + for (size_t i = 0; i < npts; i++) + { + memcpy(write_buf + i * (ndims + 4), read_buf + i * (ndims + 1), sizeof(uint32_t)); + for (size_t d = 0; d < ndims; d++) + write_buf[i * (ndims + 4) + 4 + d] = (uint8_t)read_buf[i * (ndims + 1) + 1 + d]; + } + writer.write((char *)write_buf, npts * (ndims * 1 + 4)); +} + +int main(int argc, char **argv) +{ + if (argc != 3) + { + std::cout << argv[0] << " input_fvecs output_bvecs(uint8)" << std::endl; + exit(-1); + } + std::ifstream reader(argv[1], std::ios::binary | std::ios::ate); + size_t fsize = reader.tellg(); + reader.seekg(0, std::ios::beg); + + uint32_t ndims_u32; + reader.read((char *)&ndims_u32, sizeof(uint32_t)); + reader.seekg(0, std::ios::beg); + size_t ndims = (size_t)ndims_u32; + size_t npts = fsize / ((ndims + 1) * sizeof(float)); + std::cout << "Dataset: #pts = " << npts << ", # dims = " << ndims << std::endl; + + size_t blk_size = 131072; + size_t nblks = ROUND_UP(npts, blk_size) / blk_size; + std::cout << "# blks: " << nblks << std::endl; + std::ofstream writer(argv[2], std::ios::binary); + auto read_buf = new float[npts * (ndims + 1)]; + auto write_buf = new uint8_t[npts * (ndims + 4)]; + for (size_t i = 0; i < nblks; i++) + { + size_t cblk_size = std::min(npts - i * blk_size, blk_size); + block_convert(reader, writer, read_buf, write_buf, cblk_size, ndims); + std::cout << "Block #" << i << " written" << std::endl; + } + + delete[] read_buf; + delete[] write_buf; + + reader.close(); + writer.close(); +} diff --git a/packages/leann-backend-diskann/third_party/DiskANN/apps/utils/gen_random_slice.cpp b/packages/leann-backend-diskann/third_party/DiskANN/apps/utils/gen_random_slice.cpp new file mode 100644 index 0000000..a4cd96e --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/apps/utils/gen_random_slice.cpp @@ -0,0 +1,58 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT license. + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "partition.h" +#include "utils.h" + +#include +#include +#include +#include + +template int aux_main(char **argv) +{ + std::string base_file(argv[2]); + std::string output_prefix(argv[3]); + float sampling_rate = (float)(std::atof(argv[4])); + gen_random_slice(base_file, output_prefix, sampling_rate); + return 0; +} + +int main(int argc, char **argv) +{ + if (argc != 5) + { + std::cout << argv[0] + << " data_type [float/int8/uint8] base_bin_file " + "sample_output_prefix sampling_probability" + << std::endl; + exit(-1); + } + + if (std::string(argv[1]) == std::string("float")) + { + aux_main(argv); + } + else if (std::string(argv[1]) == std::string("int8")) + { + aux_main(argv); + } + else if (std::string(argv[1]) == std::string("uint8")) + { + aux_main(argv); + } + else + std::cout << "Unsupported type. Use float/int8/uint8." << std::endl; + return 0; +} diff --git a/packages/leann-backend-diskann/third_party/DiskANN/apps/utils/generate_pq.cpp b/packages/leann-backend-diskann/third_party/DiskANN/apps/utils/generate_pq.cpp new file mode 100644 index 0000000..a881b11 --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/apps/utils/generate_pq.cpp @@ -0,0 +1,70 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT license. + +#include "math_utils.h" +#include "pq.h" +#include "partition.h" + +#define KMEANS_ITERS_FOR_PQ 15 + +template +bool generate_pq(const std::string &data_path, const std::string &index_prefix_path, const size_t num_pq_centers, + const size_t num_pq_chunks, const float sampling_rate, const bool opq) +{ + std::string pq_pivots_path = index_prefix_path + "_pq_pivots.bin"; + std::string pq_compressed_vectors_path = index_prefix_path + "_pq_compressed.bin"; + + // generates random sample and sets it to train_data and updates train_size + size_t train_size, train_dim; + float *train_data; + gen_random_slice(data_path, sampling_rate, train_data, train_size, train_dim); + std::cout << "For computing pivots, loaded sample data of size " << train_size << std::endl; + + if (opq) + { + diskann::generate_opq_pivots(train_data, train_size, (uint32_t)train_dim, (uint32_t)num_pq_centers, + (uint32_t)num_pq_chunks, pq_pivots_path, true); + } + else + { + diskann::generate_pq_pivots(train_data, train_size, (uint32_t)train_dim, (uint32_t)num_pq_centers, + (uint32_t)num_pq_chunks, KMEANS_ITERS_FOR_PQ, pq_pivots_path); + } + diskann::generate_pq_data_from_pivots(data_path, (uint32_t)num_pq_centers, (uint32_t)num_pq_chunks, + pq_pivots_path, pq_compressed_vectors_path, true); + + delete[] train_data; + + return 0; +} + +int main(int argc, char **argv) +{ + if (argc != 7) + { + std::cout << "Usage: \n" + << argv[0] + << " " + " " + " " + << std::endl; + } + else + { + const std::string data_path(argv[2]); + const std::string index_prefix_path(argv[3]); + const size_t num_pq_centers = 256; + const size_t num_pq_chunks = (size_t)atoi(argv[4]); + const float sampling_rate = (float)atof(argv[5]); + const bool opq = atoi(argv[6]) == 0 ? false : true; + + if (std::string(argv[1]) == std::string("float")) + generate_pq(data_path, index_prefix_path, num_pq_centers, num_pq_chunks, sampling_rate, opq); + else if (std::string(argv[1]) == std::string("int8")) + generate_pq(data_path, index_prefix_path, num_pq_centers, num_pq_chunks, sampling_rate, opq); + else if (std::string(argv[1]) == std::string("uint8")) + generate_pq(data_path, index_prefix_path, num_pq_centers, num_pq_chunks, sampling_rate, opq); + else + std::cout << "Error. wrong file type" << std::endl; + } +} diff --git a/packages/leann-backend-diskann/third_party/DiskANN/apps/utils/generate_synthetic_labels.cpp b/packages/leann-backend-diskann/third_party/DiskANN/apps/utils/generate_synthetic_labels.cpp new file mode 100644 index 0000000..6741760 --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/apps/utils/generate_synthetic_labels.cpp @@ -0,0 +1,204 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT license. + +#include +#include +#include +#include +#include +#include "utils.h" + +namespace po = boost::program_options; +class ZipfDistribution +{ + public: + ZipfDistribution(uint64_t num_points, uint32_t num_labels) + : num_labels(num_labels), num_points(num_points), + uniform_zero_to_one(std::uniform_real_distribution<>(0.0, 1.0)) + { + } + + std::unordered_map createDistributionMap() + { + std::unordered_map map; + uint32_t primary_label_freq = (uint32_t)ceil(num_points * distribution_factor); + for (uint32_t i{1}; i < num_labels + 1; i++) + { + map[i] = (uint32_t)ceil(primary_label_freq / i); + } + return map; + } + + int writeDistribution(std::ofstream &outfile) + { + auto distribution_map = createDistributionMap(); + for (uint32_t i{0}; i < num_points; i++) + { + bool label_written = false; + for (auto it = distribution_map.cbegin(); it != distribution_map.cend(); it++) + { + auto label_selection_probability = std::bernoulli_distribution(distribution_factor / (double)it->first); + if (label_selection_probability(rand_engine) && distribution_map[it->first] > 0) + { + if (label_written) + { + outfile << ','; + } + outfile << it->first; + label_written = true; + // remove label from map if we have used all labels + distribution_map[it->first] -= 1; + } + } + if (!label_written) + { + outfile << 0; + } + if (i < num_points - 1) + { + outfile << '\n'; + } + } + return 0; + } + + int writeDistribution(std::string filename) + { + std::ofstream outfile(filename); + if (!outfile.is_open()) + { + std::cerr << "Error: could not open output file " << filename << '\n'; + return -1; + } + writeDistribution(outfile); + outfile.close(); + } + + private: + const uint32_t num_labels; + const uint64_t num_points; + const double distribution_factor = 0.7; + std::knuth_b rand_engine; + const std::uniform_real_distribution uniform_zero_to_one; +}; + +int main(int argc, char **argv) +{ + std::string output_file, distribution_type; + uint32_t num_labels; + uint64_t num_points; + + try + { + po::options_description desc{"Arguments"}; + + desc.add_options()("help,h", "Print information on arguments"); + desc.add_options()("output_file,O", po::value(&output_file)->required(), + "Filename for saving the label file"); + desc.add_options()("num_points,N", po::value(&num_points)->required(), "Number of points in dataset"); + desc.add_options()("num_labels,L", po::value(&num_labels)->required(), + "Number of unique labels, up to 5000"); + desc.add_options()("distribution_type,DT", po::value(&distribution_type)->default_value("random"), + "Distribution function for labels defaults " + "to random"); + + po::variables_map vm; + po::store(po::parse_command_line(argc, argv, desc), vm); + if (vm.count("help")) + { + std::cout << desc; + return 0; + } + po::notify(vm); + } + catch (const std::exception &ex) + { + std::cerr << ex.what() << '\n'; + return -1; + } + + if (num_labels > 5000) + { + std::cerr << "Error: num_labels must be 5000 or less" << '\n'; + return -1; + } + + if (num_points <= 0) + { + std::cerr << "Error: num_points must be greater than 0" << '\n'; + return -1; + } + + std::cout << "Generating synthetic labels for " << num_points << " points with " << num_labels << " unique labels" + << '\n'; + + try + { + std::ofstream outfile(output_file); + if (!outfile.is_open()) + { + std::cerr << "Error: could not open output file " << output_file << '\n'; + return -1; + } + + if (distribution_type == "zipf") + { + ZipfDistribution zipf(num_points, num_labels); + zipf.writeDistribution(outfile); + } + else if (distribution_type == "random") + { + for (size_t i = 0; i < num_points; i++) + { + bool label_written = false; + for (size_t j = 1; j <= num_labels; j++) + { + // 50% chance to assign each label + if (rand() > (RAND_MAX / 2)) + { + if (label_written) + { + outfile << ','; + } + outfile << j; + label_written = true; + } + } + if (!label_written) + { + outfile << 0; + } + if (i < num_points - 1) + { + outfile << '\n'; + } + } + } + else if (distribution_type == "one_per_point") + { + std::random_device rd; // obtain a random number from hardware + std::mt19937 gen(rd()); // seed the generator + std::uniform_int_distribution<> distr(0, num_labels); // define the range + + for (size_t i = 0; i < num_points; i++) + { + outfile << distr(gen); + if (i != num_points - 1) + outfile << '\n'; + } + } + if (outfile.is_open()) + { + outfile.close(); + } + + std::cout << "Labels written to " << output_file << '\n'; + } + catch (const std::exception &ex) + { + std::cerr << "Label generation failed: " << ex.what() << '\n'; + return -1; + } + + return 0; +} \ No newline at end of file diff --git a/packages/leann-backend-diskann/third_party/DiskANN/apps/utils/int8_to_float.cpp b/packages/leann-backend-diskann/third_party/DiskANN/apps/utils/int8_to_float.cpp new file mode 100644 index 0000000..dcdfddc --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/apps/utils/int8_to_float.cpp @@ -0,0 +1,23 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT license. + +#include +#include "utils.h" + +int main(int argc, char **argv) +{ + if (argc != 3) + { + std::cout << argv[0] << " input_int8_bin output_float_bin" << std::endl; + exit(-1); + } + + int8_t *input; + size_t npts, nd; + diskann::load_bin(argv[1], input, npts, nd); + float *output = new float[npts * nd]; + diskann::convert_types(input, output, npts, nd); + diskann::save_bin(argv[2], output, npts, nd); + delete[] output; + delete[] input; +} diff --git a/packages/leann-backend-diskann/third_party/DiskANN/apps/utils/int8_to_float_scale.cpp b/packages/leann-backend-diskann/third_party/DiskANN/apps/utils/int8_to_float_scale.cpp new file mode 100644 index 0000000..19fbc6c --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/apps/utils/int8_to_float_scale.cpp @@ -0,0 +1,63 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT license. + +#include +#include "utils.h" + +void block_convert(std::ofstream &writer, float *write_buf, std::ifstream &reader, int8_t *read_buf, size_t npts, + size_t ndims, float bias, float scale) +{ + reader.read((char *)read_buf, npts * ndims * sizeof(int8_t)); + + for (size_t i = 0; i < npts; i++) + { + for (size_t d = 0; d < ndims; d++) + { + write_buf[d + i * ndims] = (((float)read_buf[d + i * ndims] - bias) * scale); + } + } + writer.write((char *)write_buf, npts * ndims * sizeof(float)); +} + +int main(int argc, char **argv) +{ + if (argc != 5) + { + std::cout << "Usage: " << argv[0] << " input-int8.bin output-float.bin bias scale" << std::endl; + exit(-1); + } + + std::ifstream reader(argv[1], std::ios::binary); + uint32_t npts_u32; + uint32_t ndims_u32; + reader.read((char *)&npts_u32, sizeof(uint32_t)); + reader.read((char *)&ndims_u32, sizeof(uint32_t)); + size_t npts = npts_u32; + size_t ndims = ndims_u32; + std::cout << "Dataset: #pts = " << npts << ", # dims = " << ndims << std::endl; + + size_t blk_size = 131072; + size_t nblks = ROUND_UP(npts, blk_size) / blk_size; + + std::ofstream writer(argv[2], std::ios::binary); + auto read_buf = new int8_t[blk_size * ndims]; + auto write_buf = new float[blk_size * ndims]; + float bias = (float)atof(argv[3]); + float scale = (float)atof(argv[4]); + + writer.write((char *)(&npts_u32), sizeof(uint32_t)); + writer.write((char *)(&ndims_u32), sizeof(uint32_t)); + + for (size_t i = 0; i < nblks; i++) + { + size_t cblk_size = std::min(npts - i * blk_size, blk_size); + block_convert(writer, write_buf, reader, read_buf, cblk_size, ndims, bias, scale); + std::cout << "Block #" << i << " written" << std::endl; + } + + delete[] read_buf; + delete[] write_buf; + + writer.close(); + reader.close(); +} diff --git a/packages/leann-backend-diskann/third_party/DiskANN/apps/utils/ivecs_to_bin.cpp b/packages/leann-backend-diskann/third_party/DiskANN/apps/utils/ivecs_to_bin.cpp new file mode 100644 index 0000000..ea8a4a3 --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/apps/utils/ivecs_to_bin.cpp @@ -0,0 +1,58 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT license. + +#include +#include "utils.h" + +void block_convert(std::ifstream &reader, std::ofstream &writer, uint32_t *read_buf, uint32_t *write_buf, size_t npts, + size_t ndims) +{ + reader.read((char *)read_buf, npts * (ndims * sizeof(uint32_t) + sizeof(uint32_t))); + for (size_t i = 0; i < npts; i++) + { + memcpy(write_buf + i * ndims, (read_buf + i * (ndims + 1)) + 1, ndims * sizeof(uint32_t)); + } + writer.write((char *)write_buf, npts * ndims * sizeof(uint32_t)); +} + +int main(int argc, char **argv) +{ + if (argc != 3) + { + std::cout << argv[0] << " input_ivecs output_bin" << std::endl; + exit(-1); + } + std::ifstream reader(argv[1], std::ios::binary | std::ios::ate); + size_t fsize = reader.tellg(); + reader.seekg(0, std::ios::beg); + + uint32_t ndims_u32; + reader.read((char *)&ndims_u32, sizeof(uint32_t)); + reader.seekg(0, std::ios::beg); + size_t ndims = (size_t)ndims_u32; + size_t npts = fsize / ((ndims + 1) * sizeof(uint32_t)); + std::cout << "Dataset: #pts = " << npts << ", # dims = " << ndims << std::endl; + + size_t blk_size = 131072; + size_t nblks = ROUND_UP(npts, blk_size) / blk_size; + std::cout << "# blks: " << nblks << std::endl; + std::ofstream writer(argv[2], std::ios::binary); + int npts_s32 = (int)npts; + int ndims_s32 = (int)ndims; + writer.write((char *)&npts_s32, sizeof(int)); + writer.write((char *)&ndims_s32, sizeof(int)); + uint32_t *read_buf = new uint32_t[npts * (ndims + 1)]; + uint32_t *write_buf = new uint32_t[npts * ndims]; + for (size_t i = 0; i < nblks; i++) + { + size_t cblk_size = std::min(npts - i * blk_size, blk_size); + block_convert(reader, writer, read_buf, write_buf, cblk_size, ndims); + std::cout << "Block #" << i << " written" << std::endl; + } + + delete[] read_buf; + delete[] write_buf; + + reader.close(); + writer.close(); +} diff --git a/packages/leann-backend-diskann/third_party/DiskANN/apps/utils/merge_shards.cpp b/packages/leann-backend-diskann/third_party/DiskANN/apps/utils/merge_shards.cpp new file mode 100644 index 0000000..106c15e --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/apps/utils/merge_shards.cpp @@ -0,0 +1,42 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT license. + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "disk_utils.h" +#include "cached_io.h" +#include "utils.h" + +int main(int argc, char **argv) +{ + if (argc != 9) + { + std::cout << argv[0] + << " vamana_index_prefix[1] vamana_index_suffix[2] " + "idmaps_prefix[3] " + "idmaps_suffix[4] n_shards[5] max_degree[6] " + "output_vamana_path[7] " + "output_medoids_path[8]" + << std::endl; + exit(-1); + } + + std::string vamana_prefix(argv[1]); + std::string vamana_suffix(argv[2]); + std::string idmaps_prefix(argv[3]); + std::string idmaps_suffix(argv[4]); + uint64_t nshards = (uint64_t)std::atoi(argv[5]); + uint32_t max_degree = (uint64_t)std::atoi(argv[6]); + std::string output_index(argv[7]); + std::string output_medoids(argv[8]); + + return diskann::merge_shards(vamana_prefix, vamana_suffix, idmaps_prefix, idmaps_suffix, nshards, max_degree, + output_index, output_medoids); +} diff --git a/packages/leann-backend-diskann/third_party/DiskANN/apps/utils/partition_data.cpp b/packages/leann-backend-diskann/third_party/DiskANN/apps/utils/partition_data.cpp new file mode 100644 index 0000000..2520f3f --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/apps/utils/partition_data.cpp @@ -0,0 +1,39 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT license. + +#include +#include +#include "cached_io.h" +#include "partition.h" + +// DEPRECATED: NEED TO REPROGRAM + +int main(int argc, char **argv) +{ + if (argc != 7) + { + std::cout << "Usage:\n" + << argv[0] + << " datatype " + " " + " " + << std::endl; + exit(-1); + } + + const std::string data_path(argv[2]); + const std::string prefix_path(argv[3]); + const float sampling_rate = (float)atof(argv[4]); + const size_t num_partitions = (size_t)std::atoi(argv[5]); + const size_t max_reps = 15; + const size_t k_index = (size_t)std::atoi(argv[6]); + + if (std::string(argv[1]) == std::string("float")) + partition(data_path, sampling_rate, num_partitions, max_reps, prefix_path, k_index); + else if (std::string(argv[1]) == std::string("int8")) + partition(data_path, sampling_rate, num_partitions, max_reps, prefix_path, k_index); + else if (std::string(argv[1]) == std::string("uint8")) + partition(data_path, sampling_rate, num_partitions, max_reps, prefix_path, k_index); + else + std::cout << "unsupported data format. use float/int8/uint8" << std::endl; +} diff --git a/packages/leann-backend-diskann/third_party/DiskANN/apps/utils/partition_with_ram_budget.cpp b/packages/leann-backend-diskann/third_party/DiskANN/apps/utils/partition_with_ram_budget.cpp new file mode 100644 index 0000000..937b68d --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/apps/utils/partition_with_ram_budget.cpp @@ -0,0 +1,39 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT license. + +#include +#include +#include "cached_io.h" +#include "partition.h" + +// DEPRECATED: NEED TO REPROGRAM + +int main(int argc, char **argv) +{ + if (argc != 8) + { + std::cout << "Usage:\n" + << argv[0] + << " datatype " + " " + " " + << std::endl; + exit(-1); + } + + const std::string data_path(argv[2]); + const std::string prefix_path(argv[3]); + const float sampling_rate = (float)atof(argv[4]); + const double ram_budget = (double)std::atof(argv[5]); + const size_t graph_degree = (size_t)std::atoi(argv[6]); + const size_t k_index = (size_t)std::atoi(argv[7]); + + if (std::string(argv[1]) == std::string("float")) + partition_with_ram_budget(data_path, sampling_rate, ram_budget, graph_degree, prefix_path, k_index); + else if (std::string(argv[1]) == std::string("int8")) + partition_with_ram_budget(data_path, sampling_rate, ram_budget, graph_degree, prefix_path, k_index); + else if (std::string(argv[1]) == std::string("uint8")) + partition_with_ram_budget(data_path, sampling_rate, ram_budget, graph_degree, prefix_path, k_index); + else + std::cout << "unsupported data format. use float/int8/uint8" << std::endl; +} diff --git a/packages/leann-backend-diskann/third_party/DiskANN/apps/utils/rand_data_gen.cpp b/packages/leann-backend-diskann/third_party/DiskANN/apps/utils/rand_data_gen.cpp new file mode 100644 index 0000000..e89ede8 --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/apps/utils/rand_data_gen.cpp @@ -0,0 +1,237 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT license. + +#include +#include +#include +#include +#include + +#include "utils.h" + +namespace po = boost::program_options; + +int block_write_float(std::ofstream &writer, size_t ndims, size_t npts, bool normalization, float norm, + float rand_scale) +{ + auto vec = new float[ndims]; + + std::random_device rd{}; + std::mt19937 gen{rd()}; + std::normal_distribution<> normal_rand{0, 1}; + std::uniform_real_distribution<> unif_dis(1.0, rand_scale); + + for (size_t i = 0; i < npts; i++) + { + float sum = 0; + float scale = 1.0f; + if (rand_scale > 1.0f) + scale = (float)unif_dis(gen); + for (size_t d = 0; d < ndims; ++d) + vec[d] = scale * (float)normal_rand(gen); + if (normalization) + { + for (size_t d = 0; d < ndims; ++d) + sum += vec[d] * vec[d]; + for (size_t d = 0; d < ndims; ++d) + vec[d] = vec[d] * norm / std::sqrt(sum); + } + + writer.write((char *)vec, ndims * sizeof(float)); + } + + delete[] vec; + return 0; +} + +int block_write_int8(std::ofstream &writer, size_t ndims, size_t npts, float norm) +{ + auto vec = new float[ndims]; + auto vec_T = new int8_t[ndims]; + + std::random_device rd{}; + std::mt19937 gen{rd()}; + std::normal_distribution<> normal_rand{0, 1}; + + for (size_t i = 0; i < npts; i++) + { + float sum = 0; + for (size_t d = 0; d < ndims; ++d) + vec[d] = (float)normal_rand(gen); + for (size_t d = 0; d < ndims; ++d) + sum += vec[d] * vec[d]; + for (size_t d = 0; d < ndims; ++d) + vec[d] = vec[d] * norm / std::sqrt(sum); + + for (size_t d = 0; d < ndims; ++d) + { + vec_T[d] = (int8_t)std::round(vec[d]); + } + + writer.write((char *)vec_T, ndims * sizeof(int8_t)); + } + + delete[] vec; + delete[] vec_T; + return 0; +} + +int block_write_uint8(std::ofstream &writer, size_t ndims, size_t npts, float norm) +{ + auto vec = new float[ndims]; + auto vec_T = new int8_t[ndims]; + + std::random_device rd{}; + std::mt19937 gen{rd()}; + std::normal_distribution<> normal_rand{0, 1}; + + for (size_t i = 0; i < npts; i++) + { + float sum = 0; + for (size_t d = 0; d < ndims; ++d) + vec[d] = (float)normal_rand(gen); + for (size_t d = 0; d < ndims; ++d) + sum += vec[d] * vec[d]; + for (size_t d = 0; d < ndims; ++d) + vec[d] = vec[d] * norm / std::sqrt(sum); + + for (size_t d = 0; d < ndims; ++d) + { + vec_T[d] = 128 + (int8_t)std::round(vec[d]); + } + + writer.write((char *)vec_T, ndims * sizeof(uint8_t)); + } + + delete[] vec; + delete[] vec_T; + return 0; +} + +int main(int argc, char **argv) +{ + std::string data_type, output_file; + size_t ndims, npts; + float norm, rand_scaling; + bool normalization = false; + try + { + po::options_description desc{"Arguments"}; + + desc.add_options()("help,h", "Print information on arguments"); + + desc.add_options()("data_type", po::value(&data_type)->required(), "data type "); + desc.add_options()("output_file", po::value(&output_file)->required(), + "File name for saving the random vectors"); + desc.add_options()("ndims,D", po::value(&ndims)->required(), "Dimensoinality of the vector"); + desc.add_options()("npts,N", po::value(&npts)->required(), "Number of vectors"); + desc.add_options()("norm", po::value(&norm)->default_value(-1.0f), + "Norm of the vectors (if not specified, vectors are not normalized)"); + desc.add_options()("rand_scaling", po::value(&rand_scaling)->default_value(1.0f), + "Each vector will be scaled (if not explicitly normalized) by a factor randomly chosen from " + "[1, rand_scale]. Only applicable for floating point data"); + po::variables_map vm; + po::store(po::parse_command_line(argc, argv, desc), vm); + if (vm.count("help")) + { + std::cout << desc; + return 0; + } + po::notify(vm); + } + catch (const std::exception &ex) + { + std::cerr << ex.what() << '\n'; + return -1; + } + + if (data_type != std::string("float") && data_type != std::string("int8") && data_type != std::string("uint8")) + { + std::cout << "Unsupported type. float, int8 and uint8 types are supported." << std::endl; + return -1; + } + + if (norm > 0.0) + { + normalization = true; + } + + if (rand_scaling < 1.0) + { + std::cout << "We will only scale the vector norms randomly in [1, value], so value must be >= 1." << std::endl; + return -1; + } + + if ((rand_scaling > 1.0) && (normalization == true)) + { + std::cout << "Data cannot be normalized and randomly scaled at same time. Use one or the other." << std::endl; + return -1; + } + + if (data_type == std::string("int8") || data_type == std::string("uint8")) + { + if (norm > 127) + { + std::cerr << "Error: for int8/uint8 datatypes, L2 norm can not be " + "greater " + "than 127" + << std::endl; + return -1; + } + if (rand_scaling > 1.0) + { + std::cout << "Data scaling only supported for floating point data." << std::endl; + return -1; + } + } + + try + { + std::ofstream writer; + writer.exceptions(std::ofstream::failbit | std::ofstream::badbit); + writer.open(output_file, std::ios::binary); + auto npts_u32 = (uint32_t)npts; + auto ndims_u32 = (uint32_t)ndims; + writer.write((char *)&npts_u32, sizeof(uint32_t)); + writer.write((char *)&ndims_u32, sizeof(uint32_t)); + + size_t blk_size = 131072; + size_t nblks = ROUND_UP(npts, blk_size) / blk_size; + std::cout << "# blks: " << nblks << std::endl; + + int ret = 0; + for (size_t i = 0; i < nblks; i++) + { + size_t cblk_size = std::min(npts - i * blk_size, blk_size); + if (data_type == std::string("float")) + { + ret = block_write_float(writer, ndims, cblk_size, normalization, norm, rand_scaling); + } + else if (data_type == std::string("int8")) + { + ret = block_write_int8(writer, ndims, cblk_size, norm); + } + else if (data_type == std::string("uint8")) + { + ret = block_write_uint8(writer, ndims, cblk_size, norm); + } + if (ret == 0) + std::cout << "Block #" << i << " written" << std::endl; + else + { + writer.close(); + std::cout << "failed to write" << std::endl; + return -1; + } + } + writer.close(); + } + catch (const std::exception &e) + { + std::cout << std::string(e.what()) << std::endl; + diskann::cerr << "Index build failed." << std::endl; + return -1; + } + + return 0; +} diff --git a/packages/leann-backend-diskann/third_party/DiskANN/apps/utils/simulate_aggregate_recall.cpp b/packages/leann-backend-diskann/third_party/DiskANN/apps/utils/simulate_aggregate_recall.cpp new file mode 100644 index 0000000..73c4ea0 --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/apps/utils/simulate_aggregate_recall.cpp @@ -0,0 +1,85 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT license. + +#include +#include +#include +#include + +inline float aggregate_recall(const uint32_t k_aggr, const uint32_t k, const uint32_t npart, uint32_t *count, + const std::vector &recalls) +{ + float found = 0; + for (uint32_t i = 0; i < npart; ++i) + { + size_t max_found = std::min(count[i], k); + found += recalls[max_found - 1] * max_found; + } + return found / (float)k_aggr; +} + +void simulate(const uint32_t k_aggr, const uint32_t k, const uint32_t npart, const uint32_t nsim, + const std::vector &recalls) +{ + std::random_device r; + std::default_random_engine randeng(r()); + std::uniform_int_distribution uniform_dist(0, npart - 1); + + uint32_t *count = new uint32_t[npart]; + double aggr_recall = 0; + + for (uint32_t i = 0; i < nsim; ++i) + { + for (uint32_t p = 0; p < npart; ++p) + { + count[p] = 0; + } + for (uint32_t t = 0; t < k_aggr; ++t) + { + count[uniform_dist(randeng)]++; + } + aggr_recall += aggregate_recall(k_aggr, k, npart, count, recalls); + } + + std::cout << "Aggregate recall is " << aggr_recall / (double)nsim << std::endl; + delete[] count; +} + +int main(int argc, char **argv) +{ + if (argc < 6) + { + std::cout << argv[0] << " k_aggregate k_out npart nsim recall@1 recall@2 ... recall@k" << std::endl; + exit(-1); + } + + const uint32_t k_aggr = atoi(argv[1]); + const uint32_t k = atoi(argv[2]); + const uint32_t npart = atoi(argv[3]); + const uint32_t nsim = atoi(argv[4]); + + std::vector recalls; + for (int ctr = 5; ctr < argc; ctr++) + { + recalls.push_back((float)atof(argv[ctr])); + } + + if (recalls.size() != k) + { + std::cerr << "Please input k numbers for recall@1, recall@2 .. recall@k" << std::endl; + } + if (k_aggr > npart * k) + { + std::cerr << "k_aggr must be <= k * npart" << std::endl; + exit(-1); + } + if (nsim <= npart * k_aggr) + { + std::cerr << "Choose nsim > npart*k_aggr" << std::endl; + exit(-1); + } + + simulate(k_aggr, k, npart, nsim, recalls); + + return 0; +} diff --git a/packages/leann-backend-diskann/third_party/DiskANN/apps/utils/stats_label_data.cpp b/packages/leann-backend-diskann/third_party/DiskANN/apps/utils/stats_label_data.cpp new file mode 100644 index 0000000..3342672 --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/apps/utils/stats_label_data.cpp @@ -0,0 +1,147 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT license. + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "utils.h" + +#ifndef _WINDOWS +#include +#include +#include +#include +#else +#include +#endif +namespace po = boost::program_options; + +void stats_analysis(const std::string labels_file, std::string univeral_label, uint32_t density = 10) +{ + std::string token, line; + std::ifstream labels_stream(labels_file); + std::unordered_map label_counts; + std::string label_with_max_points; + uint32_t max_points = 0; + long long sum = 0; + long long point_cnt = 0; + float avg_labels_per_pt, mean_label_size; + + std::vector labels_per_point; + uint32_t dense_pts = 0; + if (labels_stream.is_open()) + { + while (getline(labels_stream, line)) + { + point_cnt++; + std::stringstream iss(line); + uint32_t lbl_cnt = 0; + while (getline(iss, token, ',')) + { + lbl_cnt++; + token.erase(std::remove(token.begin(), token.end(), '\n'), token.end()); + token.erase(std::remove(token.begin(), token.end(), '\r'), token.end()); + if (label_counts.find(token) == label_counts.end()) + label_counts[token] = 0; + label_counts[token]++; + } + if (lbl_cnt >= density) + { + dense_pts++; + } + labels_per_point.emplace_back(lbl_cnt); + } + } + + std::cout << "fraction of dense points with >= " << density + << " labels = " << (float)dense_pts / (float)labels_per_point.size() << std::endl; + std::sort(labels_per_point.begin(), labels_per_point.end()); + + std::vector> label_count_vec; + + for (auto it = label_counts.begin(); it != label_counts.end(); it++) + { + auto &lbl = *it; + label_count_vec.emplace_back(std::make_pair(lbl.first, lbl.second)); + if (lbl.second > max_points) + { + max_points = lbl.second; + label_with_max_points = lbl.first; + } + sum += lbl.second; + } + + sort(label_count_vec.begin(), label_count_vec.end(), + [](const std::pair &lhs, const std::pair &rhs) { + return lhs.second < rhs.second; + }); + + for (float p = 0; p < 1; p += 0.05) + { + std::cout << "Percentile " << (100 * p) << "\t" << label_count_vec[(size_t)(p * label_count_vec.size())].first + << " with count=" << label_count_vec[(size_t)(p * label_count_vec.size())].second << std::endl; + } + + std::cout << "Most common label " + << "\t" << label_count_vec[label_count_vec.size() - 1].first + << " with count=" << label_count_vec[label_count_vec.size() - 1].second << std::endl; + if (label_count_vec.size() > 1) + std::cout << "Second common label " + << "\t" << label_count_vec[label_count_vec.size() - 2].first + << " with count=" << label_count_vec[label_count_vec.size() - 2].second << std::endl; + if (label_count_vec.size() > 2) + std::cout << "Third common label " + << "\t" << label_count_vec[label_count_vec.size() - 3].first + << " with count=" << label_count_vec[label_count_vec.size() - 3].second << std::endl; + avg_labels_per_pt = sum / (float)point_cnt; + mean_label_size = sum / (float)label_counts.size(); + std::cout << "Total number of points = " << point_cnt << ", number of labels = " << label_counts.size() + << std::endl; + std::cout << "Average number of labels per point = " << avg_labels_per_pt << std::endl; + std::cout << "Mean label size excluding 0 = " << mean_label_size << std::endl; + std::cout << "Most popular label is " << label_with_max_points << " with " << max_points << " pts" << std::endl; +} + +int main(int argc, char **argv) +{ + std::string labels_file, universal_label; + uint32_t density; + + po::options_description desc{"Arguments"}; + try + { + desc.add_options()("help,h", "Print information on arguments"); + desc.add_options()("labels_file", po::value(&labels_file)->required(), + "path to labels data file."); + desc.add_options()("universal_label", po::value(&universal_label)->required(), + "Universal label used in labels file."); + desc.add_options()("density", po::value(&density)->default_value(1), + "Number of labels each point in labels file, defaults to 1"); + po::variables_map vm; + po::store(po::parse_command_line(argc, argv, desc), vm); + if (vm.count("help")) + { + std::cout << desc; + return 0; + } + po::notify(vm); + } + catch (const std::exception &e) + { + std::cerr << e.what() << '\n'; + return -1; + } + stats_analysis(labels_file, universal_label, density); +} diff --git a/packages/leann-backend-diskann/third_party/DiskANN/apps/utils/tsv_to_bin.cpp b/packages/leann-backend-diskann/third_party/DiskANN/apps/utils/tsv_to_bin.cpp new file mode 100644 index 0000000..c590a8f --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/apps/utils/tsv_to_bin.cpp @@ -0,0 +1,121 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT license. + +#include +#include "utils.h" + +void block_convert_float(std::ifstream &reader, std::ofstream &writer, size_t npts, size_t ndims) +{ + auto read_buf = new float[npts * (ndims + 1)]; + + auto cursor = read_buf; + float val; + + for (size_t i = 0; i < npts; i++) + { + for (size_t d = 0; d < ndims; ++d) + { + reader >> val; + *cursor = val; + cursor++; + } + } + writer.write((char *)read_buf, npts * ndims * sizeof(float)); + delete[] read_buf; +} + +void block_convert_int8(std::ifstream &reader, std::ofstream &writer, size_t npts, size_t ndims) +{ + auto read_buf = new int8_t[npts * (ndims + 1)]; + + auto cursor = read_buf; + int val; + + for (size_t i = 0; i < npts; i++) + { + for (size_t d = 0; d < ndims; ++d) + { + reader >> val; + *cursor = (int8_t)val; + cursor++; + } + } + writer.write((char *)read_buf, npts * ndims * sizeof(uint8_t)); + delete[] read_buf; +} + +void block_convert_uint8(std::ifstream &reader, std::ofstream &writer, size_t npts, size_t ndims) +{ + auto read_buf = new uint8_t[npts * (ndims + 1)]; + + auto cursor = read_buf; + int val; + + for (size_t i = 0; i < npts; i++) + { + for (size_t d = 0; d < ndims; ++d) + { + reader >> val; + *cursor = (uint8_t)val; + cursor++; + } + } + writer.write((char *)read_buf, npts * ndims * sizeof(uint8_t)); + delete[] read_buf; +} + +int main(int argc, char **argv) +{ + if (argc != 6) + { + std::cout << argv[0] + << " input_filename.tsv output_filename.bin " + "dim num_pts>" + << std::endl; + exit(-1); + } + + if (std::string(argv[1]) != std::string("float") && std::string(argv[1]) != std::string("int8") && + std::string(argv[1]) != std::string("uint8")) + { + std::cout << "Unsupported type. float, int8 and uint8 types are supported." << std::endl; + } + + size_t ndims = atoi(argv[4]); + size_t npts = atoi(argv[5]); + + std::ifstream reader(argv[2], std::ios::binary | std::ios::ate); + // size_t fsize = reader.tellg(); + reader.seekg(0, std::ios::beg); + reader.seekg(0, std::ios::beg); + + size_t blk_size = 131072; + size_t nblks = ROUND_UP(npts, blk_size) / blk_size; + std::cout << "# blks: " << nblks << std::endl; + std::ofstream writer(argv[3], std::ios::binary); + auto npts_u32 = (uint32_t)npts; + auto ndims_u32 = (uint32_t)ndims; + writer.write((char *)&npts_u32, sizeof(uint32_t)); + writer.write((char *)&ndims_u32, sizeof(uint32_t)); + + for (size_t i = 0; i < nblks; i++) + { + size_t cblk_size = std::min(npts - i * blk_size, blk_size); + if (std::string(argv[1]) == std::string("float")) + { + block_convert_float(reader, writer, cblk_size, ndims); + } + else if (std::string(argv[1]) == std::string("int8")) + { + block_convert_int8(reader, writer, cblk_size, ndims); + } + else if (std::string(argv[1]) == std::string("uint8")) + { + block_convert_uint8(reader, writer, cblk_size, ndims); + } + std::cout << "Block #" << i << " written" << std::endl; + } + + reader.close(); + writer.close(); +} diff --git a/packages/leann-backend-diskann/third_party/DiskANN/apps/utils/uint32_to_uint8.cpp b/packages/leann-backend-diskann/third_party/DiskANN/apps/utils/uint32_to_uint8.cpp new file mode 100644 index 0000000..87b6fb8 --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/apps/utils/uint32_to_uint8.cpp @@ -0,0 +1,23 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT license. + +#include +#include "utils.h" + +int main(int argc, char **argv) +{ + if (argc != 3) + { + std::cout << argv[0] << " input_uint32_bin output_int8_bin" << std::endl; + exit(-1); + } + + uint32_t *input; + size_t npts, nd; + diskann::load_bin(argv[1], input, npts, nd); + uint8_t *output = new uint8_t[npts * nd]; + diskann::convert_types(input, output, npts, nd); + diskann::save_bin(argv[2], output, npts, nd); + delete[] output; + delete[] input; +} diff --git a/packages/leann-backend-diskann/third_party/DiskANN/apps/utils/uint8_to_float.cpp b/packages/leann-backend-diskann/third_party/DiskANN/apps/utils/uint8_to_float.cpp new file mode 100644 index 0000000..6415b7c --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/apps/utils/uint8_to_float.cpp @@ -0,0 +1,23 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT license. + +#include +#include "utils.h" + +int main(int argc, char **argv) +{ + if (argc != 3) + { + std::cout << argv[0] << " input_uint8_bin output_float_bin" << std::endl; + exit(-1); + } + + uint8_t *input; + size_t npts, nd; + diskann::load_bin(argv[1], input, npts, nd); + float *output = new float[npts * nd]; + diskann::convert_types(input, output, npts, nd); + diskann::save_bin(argv[2], output, npts, nd); + delete[] output; + delete[] input; +} diff --git a/packages/leann-backend-diskann/third_party/DiskANN/apps/utils/vector_analysis.cpp b/packages/leann-backend-diskann/third_party/DiskANN/apps/utils/vector_analysis.cpp new file mode 100644 index 0000000..009df6d --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/apps/utils/vector_analysis.cpp @@ -0,0 +1,163 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT license. + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "partition.h" +#include "utils.h" + +template int analyze_norm(std::string base_file) +{ + std::cout << "Analyzing data norms" << std::endl; + T *data; + size_t npts, ndims; + diskann::load_bin(base_file, data, npts, ndims); + std::vector norms(npts, 0); +#pragma omp parallel for schedule(dynamic) + for (int64_t i = 0; i < (int64_t)npts; i++) + { + for (size_t d = 0; d < ndims; d++) + norms[i] += data[i * ndims + d] * data[i * ndims + d]; + norms[i] = std::sqrt(norms[i]); + } + std::sort(norms.begin(), norms.end()); + for (int p = 0; p < 100; p += 5) + std::cout << "percentile " << p << ": " << norms[(uint64_t)(std::floor((p / 100.0) * npts))] << std::endl; + std::cout << "percentile 100" + << ": " << norms[npts - 1] << std::endl; + delete[] data; + return 0; +} + +template int normalize_base(std::string base_file, std::string out_file) +{ + std::cout << "Normalizing base" << std::endl; + T *data; + size_t npts, ndims; + diskann::load_bin(base_file, data, npts, ndims); + // std::vector norms(npts, 0); +#pragma omp parallel for schedule(dynamic) + for (int64_t i = 0; i < (int64_t)npts; i++) + { + float pt_norm = 0; + for (size_t d = 0; d < ndims; d++) + pt_norm += data[i * ndims + d] * data[i * ndims + d]; + pt_norm = std::sqrt(pt_norm); + for (size_t d = 0; d < ndims; d++) + data[i * ndims + d] = static_cast(data[i * ndims + d] / pt_norm); + } + diskann::save_bin(out_file, data, npts, ndims); + delete[] data; + return 0; +} + +template int augment_base(std::string base_file, std::string out_file, bool prep_base = true) +{ + std::cout << "Analyzing data norms" << std::endl; + T *data; + size_t npts, ndims; + diskann::load_bin(base_file, data, npts, ndims); + std::vector norms(npts, 0); + float max_norm = 0; +#pragma omp parallel for schedule(dynamic) + for (int64_t i = 0; i < (int64_t)npts; i++) + { + for (size_t d = 0; d < ndims; d++) + norms[i] += data[i * ndims + d] * data[i * ndims + d]; + max_norm = norms[i] > max_norm ? norms[i] : max_norm; + } + // std::sort(norms.begin(), norms.end()); + max_norm = std::sqrt(max_norm); + std::cout << "Max norm: " << max_norm << std::endl; + T *new_data; + size_t newdims = ndims + 1; + new_data = new T[npts * newdims]; + for (size_t i = 0; i < npts; i++) + { + if (prep_base) + { + for (size_t j = 0; j < ndims; j++) + { + new_data[i * newdims + j] = static_cast(data[i * ndims + j] / max_norm); + } + float diff = 1 - (norms[i] / (max_norm * max_norm)); + diff = diff <= 0 ? 0 : std::sqrt(diff); + new_data[i * newdims + ndims] = static_cast(diff); + if (diff <= 0) + { + std::cout << i << " has large max norm, investigate if needed. diff = " << diff << std::endl; + } + } + else + { + for (size_t j = 0; j < ndims; j++) + { + new_data[i * newdims + j] = static_cast(data[i * ndims + j] / std::sqrt(norms[i])); + } + new_data[i * newdims + ndims] = 0; + } + } + diskann::save_bin(out_file, new_data, npts, newdims); + delete[] new_data; + delete[] data; + return 0; +} + +template int aux_main(char **argv) +{ + std::string base_file(argv[2]); + uint32_t option = atoi(argv[3]); + if (option == 1) + analyze_norm(base_file); + else if (option == 2) + augment_base(base_file, std::string(argv[4]), true); + else if (option == 3) + augment_base(base_file, std::string(argv[4]), false); + else if (option == 4) + normalize_base(base_file, std::string(argv[4])); + return 0; +} + +int main(int argc, char **argv) +{ + if (argc < 4) + { + std::cout << argv[0] + << " data_type [float/int8/uint8] base_bin_file " + "[option: 1-norm analysis, 2-prep_base_for_mip, " + "3-prep_query_for_mip, 4-normalize-vecs] [out_file for " + "options 2/3/4]" + << std::endl; + exit(-1); + } + + if (std::string(argv[1]) == std::string("float")) + { + aux_main(argv); + } + else if (std::string(argv[1]) == std::string("int8")) + { + aux_main(argv); + } + else if (std::string(argv[1]) == std::string("uint8")) + { + aux_main(argv); + } + else + std::cout << "Unsupported type. Use float/int8/uint8." << std::endl; + return 0; +} diff --git a/packages/leann-backend-diskann/third_party/DiskANN/clang-format.cmake b/packages/leann-backend-diskann/third_party/DiskANN/clang-format.cmake new file mode 100644 index 0000000..19bb3a8 --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/clang-format.cmake @@ -0,0 +1,22 @@ +if (NOT MSVC) + message(STATUS "Setting up `make format` and `make checkformat`") + # additional target to perform clang-format run, requires clang-format + # get all project files + file(GLOB_RECURSE ALL_SOURCE_FILES include/*.h include/*.hpp python/src/*.cpp src/*.cpp src/*.hpp apps/*.cpp apps/*.hpp) + + message(status ${ALL_SOURCE_FILES}) + + add_custom_target( + format + COMMAND /usr/bin/clang-format + -i + ${ALL_SOURCE_FILES} + ) + add_custom_target( + checkformat + COMMAND /usr/bin/clang-format + --Werror + --dry-run + ${ALL_SOURCE_FILES} + ) +endif() diff --git a/packages/leann-backend-diskann/third_party/DiskANN/include/abstract_data_store.h b/packages/leann-backend-diskann/third_party/DiskANN/include/abstract_data_store.h new file mode 100644 index 0000000..89856f1 --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/include/abstract_data_store.h @@ -0,0 +1,127 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT license. + +#pragma once + +#include +#include + +#include "types.h" +#include "windows_customizations.h" +#include "distance.h" + +namespace diskann +{ + +template class AbstractScratch; + +template class AbstractDataStore +{ + public: + AbstractDataStore(const location_t capacity, const size_t dim); + + virtual ~AbstractDataStore() = default; + + // Return number of points returned + virtual location_t load(const std::string &filename) = 0; + + // Why does store take num_pts? Since store only has capacity, but we allow + // resizing we can end up in a situation where the store has spare capacity. + // To optimize disk utilization, we pass the number of points that are "true" + // points, so that the store can discard the empty locations before saving. + virtual size_t save(const std::string &filename, const location_t num_pts) = 0; + + DISKANN_DLLEXPORT virtual location_t capacity() const; + + DISKANN_DLLEXPORT virtual size_t get_dims() const; + + // Implementers can choose to return _dim if they are not + // concerned about memory alignment. + // Some distance metrics (like l2) need data vectors to be aligned, so we + // align the dimension by padding zeros. + virtual size_t get_aligned_dim() const = 0; + + // populate the store with vectors (either from a pointer or bin file), + // potentially after pre-processing the vectors if the metric deems so + // e.g., normalizing vectors for cosine distance over floating-point vectors + // useful for bulk or static index building. + virtual void populate_data(const data_t *vectors, const location_t num_pts) = 0; + virtual void populate_data(const std::string &filename, const size_t offset) = 0; + + // save the first num_pts many vectors back to bin file + // note: cannot undo the pre-processing done in populate data + virtual void extract_data_to_bin(const std::string &filename, const location_t num_pts) = 0; + + // Returns the updated capacity of the datastore. Clients should check + // if resize actually changed the capacity to new_num_points before + // proceeding with operations. See the code below: + // auto new_capcity = data_store->resize(new_num_points); + // if ( new_capacity >= new_num_points) { + // //PROCEED + // else + // //ERROR. + virtual location_t resize(const location_t new_num_points); + + // operations on vectors + // like populate_data function, but over one vector at a time useful for + // streaming setting + virtual void get_vector(const location_t i, data_t *dest) const = 0; + virtual void set_vector(const location_t i, const data_t *const vector) = 0; + virtual void prefetch_vector(const location_t loc) = 0; + + // internal shuffle operations to move around vectors + // will bulk-move all the vectors in [old_start_loc, old_start_loc + + // num_points) to [new_start_loc, new_start_loc + num_points) and set the old + // positions to zero vectors. + virtual void move_vectors(const location_t old_start_loc, const location_t new_start_loc, + const location_t num_points) = 0; + + // same as above, without resetting the vectors in [from_loc, from_loc + + // num_points) to zero + virtual void copy_vectors(const location_t from_loc, const location_t to_loc, const location_t num_points) = 0; + + // With the PQ Data Store PR, we have also changed iterate_to_fixed_point to NOT take the query + // from the scratch object. Therefore every data store has to implement preprocess_query which + // at the least will be to copy the query into the scratch object. So making this pure virtual. + virtual void preprocess_query(const data_t *aligned_query, + AbstractScratch *query_scratch = nullptr) const = 0; + // distance functions. + virtual float get_distance(const data_t *query, const location_t loc) const = 0; + virtual void get_distance(const data_t *query, const location_t *locations, const uint32_t location_count, + float *distances, AbstractScratch *scratch_space = nullptr) const = 0; + // Specific overload for index.cpp. + virtual void get_distance(const data_t *preprocessed_query, const std::vector &ids, + std::vector &distances, AbstractScratch *scratch_space) const = 0; + virtual float get_distance(const location_t loc1, const location_t loc2) const = 0; + + // stats of the data stored in store + // Returns the point in the dataset that is closest to the mean of all points + // in the dataset + virtual location_t calculate_medoid() const = 0; + + // REFACTOR PQ TODO: Each data store knows about its distance function, so this is + // redundant. However, we don't have an OptmizedDataStore yet, and to preserve code + // compability, we are exposing this function. + virtual Distance *get_dist_fn() const = 0; + + // search helpers + // if the base data is aligned per the request of the metric, this will tell + // how to align the query vector in a consistent manner + virtual size_t get_alignment_factor() const = 0; + + protected: + // Expand the datastore to new_num_points. Returns the new capacity created, + // which should be == new_num_points in the normal case. Implementers can also + // return _capacity to indicate that there are not implementing this method. + virtual location_t expand(const location_t new_num_points) = 0; + + // Shrink the datastore to new_num_points. It is NOT an error if shrink + // doesn't reduce the capacity so callers need to check this correctly. See + // also for "default" implementation + virtual location_t shrink(const location_t new_num_points) = 0; + + location_t _capacity; + size_t _dim; +}; + +} // namespace diskann diff --git a/packages/leann-backend-diskann/third_party/DiskANN/include/abstract_graph_store.h b/packages/leann-backend-diskann/third_party/DiskANN/include/abstract_graph_store.h new file mode 100644 index 0000000..4d6906c --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/include/abstract_graph_store.h @@ -0,0 +1,68 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT license. + +#pragma once + +#include +#include +#include "types.h" + +namespace diskann +{ + +class AbstractGraphStore +{ + public: + AbstractGraphStore(const size_t total_pts, const size_t reserve_graph_degree) + : _capacity(total_pts), _reserve_graph_degree(reserve_graph_degree) + { + } + + virtual ~AbstractGraphStore() = default; + + // returns tuple of + virtual std::tuple load(const std::string &index_path_prefix, + const size_t num_points) = 0; + virtual int store(const std::string &index_path_prefix, const size_t num_points, const size_t num_fz_points, + const uint32_t start) = 0; + + // not synchronised, user should use lock when necvessary. + virtual const std::vector &get_neighbours(const location_t i) const = 0; + virtual void add_neighbour(const location_t i, location_t neighbour_id) = 0; + virtual void clear_neighbours(const location_t i) = 0; + virtual void swap_neighbours(const location_t a, location_t b) = 0; + + virtual void set_neighbours(const location_t i, std::vector &neighbours) = 0; + + virtual size_t resize_graph(const size_t new_size) = 0; + virtual void clear_graph() = 0; + + virtual uint32_t get_max_observed_degree() = 0; + + // set during load + virtual size_t get_max_range_of_graph() = 0; + + // Total internal points _max_points + _num_frozen_points + size_t get_total_points() + { + return _capacity; + } + + protected: + // Internal function, changes total points when resize_graph is called. + void set_total_points(size_t new_capacity) + { + _capacity = new_capacity; + } + + size_t get_reserve_graph_degree() + { + return _reserve_graph_degree; + } + + private: + size_t _capacity; + size_t _reserve_graph_degree; +}; + +} // namespace diskann \ No newline at end of file diff --git a/packages/leann-backend-diskann/third_party/DiskANN/include/abstract_index.h b/packages/leann-backend-diskann/third_party/DiskANN/include/abstract_index.h new file mode 100644 index 0000000..059866f --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/include/abstract_index.h @@ -0,0 +1,129 @@ +#pragma once +#include "distance.h" +#include "parameters.h" +#include "utils.h" +#include "types.h" +#include "index_config.h" +#include "index_build_params.h" +#include + +namespace diskann +{ +struct consolidation_report +{ + enum status_code + { + SUCCESS = 0, + FAIL = 1, + LOCK_FAIL = 2, + INCONSISTENT_COUNT_ERROR = 3 + }; + status_code _status; + size_t _active_points, _max_points, _empty_slots, _slots_released, _delete_set_size, _num_calls_to_process_delete; + double _time; + + consolidation_report(status_code status, size_t active_points, size_t max_points, size_t empty_slots, + size_t slots_released, size_t delete_set_size, size_t num_calls_to_process_delete, + double time_secs) + : _status(status), _active_points(active_points), _max_points(max_points), _empty_slots(empty_slots), + _slots_released(slots_released), _delete_set_size(delete_set_size), + _num_calls_to_process_delete(num_calls_to_process_delete), _time(time_secs) + { + } +}; + +/* A templated independent class for intercation with Index. Uses Type Erasure to add virtual implemetation of methods +that can take any type(using std::any) and Provides a clean API that can be inherited by different type of Index. +*/ +class AbstractIndex +{ + public: + AbstractIndex() = default; + virtual ~AbstractIndex() = default; + + virtual void build(const std::string &data_file, const size_t num_points_to_load, + IndexFilterParams &build_params) = 0; + + template + void build(const data_type *data, const size_t num_points_to_load, const std::vector &tags); + + virtual void save(const char *filename, bool compact_before_save = false) = 0; + +#ifdef EXEC_ENV_OLS + virtual void load(AlignedFileReader &reader, uint32_t num_threads, uint32_t search_l) = 0; +#else + virtual void load(const char *index_file, uint32_t num_threads, uint32_t search_l) = 0; +#endif + + // For FastL2 search on optimized layout + template + void search_with_optimized_layout(const data_type *query, size_t K, size_t L, uint32_t *indices); + + // Initialize space for res_vectors before calling. + template + size_t search_with_tags(const data_type *query, const uint64_t K, const uint32_t L, tag_type *tags, + float *distances, std::vector &res_vectors, bool use_filters = false, + const std::string filter_label = ""); + + // Added search overload that takes L as parameter, so that we + // can customize L on a per-query basis without tampering with "Parameters" + // IDtype is either uint32_t or uint64_t + template + std::pair search(const data_type *query, const size_t K, const uint32_t L, IDType *indices, + float *distances = nullptr); + + // Filter support search + // IndexType is either uint32_t or uint64_t + template + std::pair search_with_filters(const DataType &query, const std::string &raw_label, + const size_t K, const uint32_t L, IndexType *indices, + float *distances); + + // insert points with labels, labels should be present for filtered index + template + int insert_point(const data_type *point, const tag_type tag, const std::vector &labels); + + // insert point for unfiltered index build. do not use with filtered index + template int insert_point(const data_type *point, const tag_type tag); + + // delete point with tag, or return -1 if point can not be deleted + template int lazy_delete(const tag_type &tag); + + // batch delete tags and populates failed tags if unabke to delete given tags. + template + void lazy_delete(const std::vector &tags, std::vector &failed_tags); + + template void get_active_tags(tsl::robin_set &active_tags); + + template void set_start_points_at_random(data_type radius, uint32_t random_seed = 0); + + virtual consolidation_report consolidate_deletes(const IndexWriteParameters ¶meters) = 0; + + virtual void optimize_index_layout() = 0; + + // memory should be allocated for vec before calling this function + template int get_vector_by_tag(tag_type &tag, data_type *vec); + + template void set_universal_label(const label_type universal_label); + + private: + virtual void _build(const DataType &data, const size_t num_points_to_load, TagVector &tags) = 0; + virtual std::pair _search(const DataType &query, const size_t K, const uint32_t L, + std::any &indices, float *distances = nullptr) = 0; + virtual std::pair _search_with_filters(const DataType &query, const std::string &filter_label, + const size_t K, const uint32_t L, std::any &indices, + float *distances) = 0; + virtual int _insert_point(const DataType &data_point, const TagType tag, Labelvector &labels) = 0; + virtual int _insert_point(const DataType &data_point, const TagType tag) = 0; + virtual int _lazy_delete(const TagType &tag) = 0; + virtual void _lazy_delete(TagVector &tags, TagVector &failed_tags) = 0; + virtual void _get_active_tags(TagRobinSet &active_tags) = 0; + virtual void _set_start_points_at_random(DataType radius, uint32_t random_seed = 0) = 0; + virtual int _get_vector_by_tag(TagType &tag, DataType &vec) = 0; + virtual size_t _search_with_tags(const DataType &query, const uint64_t K, const uint32_t L, const TagType &tags, + float *distances, DataVector &res_vectors, bool use_filters = false, + const std::string filter_label = "") = 0; + virtual void _search_with_optimized_layout(const DataType &query, size_t K, size_t L, uint32_t *indices) = 0; + virtual void _set_universal_label(const LabelType universal_label) = 0; +}; +} // namespace diskann diff --git a/packages/leann-backend-diskann/third_party/DiskANN/include/abstract_scratch.h b/packages/leann-backend-diskann/third_party/DiskANN/include/abstract_scratch.h new file mode 100644 index 0000000..b42a836 --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/include/abstract_scratch.h @@ -0,0 +1,35 @@ +#pragma once +namespace diskann +{ + +template class PQScratch; + +// By somewhat more than a coincidence, it seems that both InMemQueryScratch +// and SSDQueryScratch have the aligned query and PQScratch objects. So we +// can put them in a neat hierarchy and keep PQScratch as a standalone class. +template class AbstractScratch +{ + public: + AbstractScratch() = default; + // This class does not take any responsibilty for memory management of + // its members. It is the responsibility of the derived classes to do so. + virtual ~AbstractScratch() = default; + + // Scratch objects should not be copied + AbstractScratch(const AbstractScratch &) = delete; + AbstractScratch &operator=(const AbstractScratch &) = delete; + + data_t *aligned_query_T() + { + return _aligned_query_T; + } + PQScratch *pq_scratch() + { + return _pq_scratch; + } + + protected: + data_t *_aligned_query_T = nullptr; + PQScratch *_pq_scratch = nullptr; +}; +} // namespace diskann diff --git a/packages/leann-backend-diskann/third_party/DiskANN/include/aligned_file_reader.h b/packages/leann-backend-diskann/third_party/DiskANN/include/aligned_file_reader.h new file mode 100644 index 0000000..2e2716a --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/include/aligned_file_reader.h @@ -0,0 +1,138 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT license. + +#pragma once + +#define MAX_IO_DEPTH 128 + +#include +#include + +#ifdef __linux__ +#include +#include +#include +#include +typedef io_context_t IOContext; +#elif __APPLE__ +#include +#include +#include + +struct IOContext +{ + int fd; + dispatch_io_t channel; + dispatch_queue_t queue; + dispatch_group_t grp; +}; +#elif _WINDOWS +#include +#include +#include + +#ifndef USE_BING_INFRA +struct IOContext +{ + HANDLE fhandle = NULL; + HANDLE iocp = NULL; + std::vector reqs; +}; +#else +#include "IDiskPriorityIO.h" +#include +// TODO: Caller code is very callous about copying IOContext objects +// all over the place. MUST verify that it won't cause leaks/logical +// errors. +// Because of such callous copying, we have to use ptr->atomic instead +// of atomic, as atomic is not copyable. +struct IOContext +{ + enum Status + { + READ_WAIT = 0, + READ_SUCCESS, + READ_FAILED, + PROCESS_COMPLETE + }; + + std::shared_ptr m_pDiskIO = nullptr; + std::shared_ptr> m_pRequests; + std::shared_ptr> m_pRequestsStatus; + + // waitonaddress on this memory to wait for IO completion signal + // reader should signal this memory after IO completion + // TODO: WindowsAlignedFileReader can be modified to take advantage of this + // and can largely share code with the file reader for Bing. + mutable volatile long m_completeCount = 0; + + IOContext() + : m_pRequestsStatus(new std::vector()), m_pRequests(new std::vector()) + { + (*m_pRequestsStatus).reserve(MAX_IO_DEPTH); + (*m_pRequests).reserve(MAX_IO_DEPTH); + } +}; +#endif + +#endif + +#include +#include +#include +#include "tsl/robin_map.h" +#include "utils.h" + +// NOTE :: all 3 fields must be 512-aligned +struct AlignedRead +{ + uint64_t offset; // where to read from + uint64_t len; // how much to read + void *buf; // where to read into + + AlignedRead() : offset(0), len(0), buf(nullptr) + { + } + + AlignedRead(uint64_t offset, uint64_t len, void *buf) : offset(offset), len(len), buf(buf) + { + assert(IS_512_ALIGNED(offset)); + assert(IS_512_ALIGNED(len)); + assert(IS_512_ALIGNED(buf)); + // assert(malloc_usable_size(buf) >= len); + } +}; + +class AlignedFileReader +{ + protected: + tsl::robin_map ctx_map; + std::mutex ctx_mut; + + public: + // returns the thread-specific context + // returns (io_context_t)(-1) if thread is not registered + virtual IOContext &get_ctx() = 0; + + virtual ~AlignedFileReader(){}; + + // register thread-id for a context + virtual void register_thread() = 0; + // de-register thread-id for a context + virtual void deregister_thread() = 0; + virtual void deregister_all_threads() = 0; + + // Open & close ops + // Blocking calls + virtual void open(const std::string &fname) = 0; + virtual void close() = 0; + + // process batch of aligned requests in parallel + // NOTE :: blocking call + virtual void read(std::vector &read_reqs, IOContext &ctx, bool async = false) = 0; + +#ifdef USE_BING_INFRA + // wait for completion of one request in a batch of requests + virtual void wait(IOContext &ctx, int &completedIndex) = 0; +#endif +}; diff --git a/packages/leann-backend-diskann/third_party/DiskANN/include/ann_exception.h b/packages/leann-backend-diskann/third_party/DiskANN/include/ann_exception.h new file mode 100644 index 0000000..55f069b --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/include/ann_exception.h @@ -0,0 +1,35 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT license. + +#pragma once +#include +#include +#include +#include "windows_customizations.h" +#include + +#ifndef _WINDOWS +#define __FUNCSIG__ __PRETTY_FUNCTION__ +#endif + +namespace diskann +{ + +class ANNException : public std::runtime_error +{ + public: + DISKANN_DLLEXPORT ANNException(const std::string &message, int errorCode); + DISKANN_DLLEXPORT ANNException(const std::string &message, int errorCode, const std::string &funcSig, + const std::string &fileName, uint32_t lineNum); + + private: + int _errorCode; +}; + +class FileException : public ANNException +{ + public: + DISKANN_DLLEXPORT FileException(const std::string &filename, std::system_error &e, const std::string &funcSig, + const std::string &fileName, uint32_t lineNum); +}; +} // namespace diskann diff --git a/packages/leann-backend-diskann/third_party/DiskANN/include/any_wrappers.h b/packages/leann-backend-diskann/third_party/DiskANN/include/any_wrappers.h new file mode 100644 index 0000000..da9005c --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/include/any_wrappers.h @@ -0,0 +1,53 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT license. + +#pragma once + +#include +#include +#include +#include +#include "tsl/robin_set.h" + +namespace AnyWrapper +{ + +/* + * Base Struct to hold refrence to the data. + * Note: No memory mamagement, caller need to keep object alive. + */ +struct AnyReference +{ + template AnyReference(Ty &reference) : _data(&reference) + { + } + + template Ty &get() + { + auto ptr = std::any_cast(_data); + return *ptr; + } + + private: + std::any _data; +}; +struct AnyRobinSet : public AnyReference +{ + template AnyRobinSet(const tsl::robin_set &robin_set) : AnyReference(robin_set) + { + } + template AnyRobinSet(tsl::robin_set &robin_set) : AnyReference(robin_set) + { + } +}; + +struct AnyVector : public AnyReference +{ + template AnyVector(const std::vector &vector) : AnyReference(vector) + { + } + template AnyVector(std::vector &vector) : AnyReference(vector) + { + } +}; +} // namespace AnyWrapper diff --git a/packages/leann-backend-diskann/third_party/DiskANN/include/apple_aligned_file_reader.h b/packages/leann-backend-diskann/third_party/DiskANN/include/apple_aligned_file_reader.h new file mode 100644 index 0000000..160e1ea --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/include/apple_aligned_file_reader.h @@ -0,0 +1,26 @@ +#pragma once +#ifdef __APPLE__ +#include "aligned_file_reader.h" + +class AppleAlignedFileReader : public AlignedFileReader +{ + private: + uint64_t file_sz; + FileHandle file_desc; + + public: + AppleAlignedFileReader(); + ~AppleAlignedFileReader(); + + IOContext &get_ctx(); + + void register_thread(); + void deregister_thread(); + void deregister_all_threads(); + + void open(const std::string &fname); + void close(); + + void read(std::vector &read_reqs, IOContext &ctx, bool async = false); +}; +#endif diff --git a/packages/leann-backend-diskann/third_party/DiskANN/include/boost_dynamic_bitset_fwd.h b/packages/leann-backend-diskann/third_party/DiskANN/include/boost_dynamic_bitset_fwd.h new file mode 100644 index 0000000..5aebb2b --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/include/boost_dynamic_bitset_fwd.h @@ -0,0 +1,11 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT license. + +#pragma once + +namespace boost +{ +#ifndef BOOST_DYNAMIC_BITSET_FWD_HPP +template > class dynamic_bitset; +#endif +} // namespace boost diff --git a/packages/leann-backend-diskann/third_party/DiskANN/include/cached_io.h b/packages/leann-backend-diskann/third_party/DiskANN/include/cached_io.h new file mode 100644 index 0000000..daef2f2 --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/include/cached_io.h @@ -0,0 +1,217 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT license. + +#pragma once +#include +#include +#include +#include + +#include "logger.h" +#include "ann_exception.h" + +// sequential cached reads +class cached_ifstream +{ + public: + cached_ifstream() + { + } + cached_ifstream(const std::string &filename, uint64_t cacheSize) : cache_size(cacheSize), cur_off(0) + { + reader.exceptions(std::ifstream::failbit | std::ifstream::badbit); + this->open(filename, cache_size); + } + ~cached_ifstream() + { + delete[] cache_buf; + reader.close(); + } + + void open(const std::string &filename, uint64_t cacheSize) + { + this->cur_off = 0; + + try + { + reader.open(filename, std::ios::binary | std::ios::ate); + fsize = reader.tellg(); + reader.seekg(0, std::ios::beg); + assert(reader.is_open()); + assert(cacheSize > 0); + cacheSize = (std::min)(cacheSize, fsize); + this->cache_size = cacheSize; + cache_buf = new char[cacheSize]; + reader.read(cache_buf, cacheSize); + diskann::cout << "Opened: " << filename.c_str() << ", size: " << fsize << ", cache_size: " << cacheSize + << std::endl; + } + catch (std::system_error &e) + { + throw diskann::FileException(filename, e, __FUNCSIG__, __FILE__, __LINE__); + } + } + + size_t get_file_size() + { + return fsize; + } + + void read(char *read_buf, uint64_t n_bytes) + { + assert(cache_buf != nullptr); + assert(read_buf != nullptr); + + if (n_bytes <= (cache_size - cur_off)) + { + // case 1: cache contains all data + memcpy(read_buf, cache_buf + cur_off, n_bytes); + cur_off += n_bytes; + } + else + { + // case 2: cache contains some data + uint64_t cached_bytes = cache_size - cur_off; + if (n_bytes - cached_bytes > fsize - reader.tellg()) + { + std::stringstream stream; + stream << "Reading beyond end of file" << std::endl; + stream << "n_bytes: " << n_bytes << " cached_bytes: " << cached_bytes << " fsize: " << fsize + << " current pos:" << reader.tellg() << std::endl; + diskann::cout << stream.str() << std::endl; + throw diskann::ANNException(stream.str(), -1, __FUNCSIG__, __FILE__, __LINE__); + } + memcpy(read_buf, cache_buf + cur_off, cached_bytes); + + // go to disk and fetch more data + reader.read(read_buf + cached_bytes, n_bytes - cached_bytes); + // reset cur off + cur_off = cache_size; + + uint64_t size_left = fsize - reader.tellg(); + + if (size_left >= cache_size) + { + reader.read(cache_buf, cache_size); + cur_off = 0; + } + // note that if size_left < cache_size, then cur_off = cache_size, + // so subsequent reads will all be directly from file + } + } + + private: + // underlying ifstream + std::ifstream reader; + // # bytes to cache in one shot read + uint64_t cache_size = 0; + // underlying buf for cache + char *cache_buf = nullptr; + // offset into cache_buf for cur_pos + uint64_t cur_off = 0; + // file size + uint64_t fsize = 0; +}; + +// sequential cached writes +class cached_ofstream +{ + public: + cached_ofstream(const std::string &filename, uint64_t cache_size) : cache_size(cache_size), cur_off(0) + { + writer.exceptions(std::ifstream::failbit | std::ifstream::badbit); + try + { + writer.open(filename, std::ios::binary); + assert(writer.is_open()); + assert(cache_size > 0); + cache_buf = new char[cache_size]; + diskann::cout << "Opened: " << filename.c_str() << ", cache_size: " << cache_size << std::endl; + } + catch (std::system_error &e) + { + throw diskann::FileException(filename, e, __FUNCSIG__, __FILE__, __LINE__); + } + } + + ~cached_ofstream() + { + this->close(); + } + + void close() + { + // dump any remaining data in memory + if (cur_off > 0) + { + this->flush_cache(); + } + + if (cache_buf != nullptr) + { + delete[] cache_buf; + cache_buf = nullptr; + } + + if (writer.is_open()) + writer.close(); + diskann::cout << "Finished writing " << fsize << "B" << std::endl; + } + + size_t get_file_size() + { + return fsize; + } + // writes n_bytes from write_buf to the underlying ofstream/cache + void write(char *write_buf, uint64_t n_bytes) + { + assert(cache_buf != nullptr); + if (n_bytes <= (cache_size - cur_off)) + { + // case 1: cache can take all data + memcpy(cache_buf + cur_off, write_buf, n_bytes); + cur_off += n_bytes; + } + else + { + // case 2: cache cant take all data + // go to disk and write existing cache data + writer.write(cache_buf, cur_off); + fsize += cur_off; + // write the new data to disk + writer.write(write_buf, n_bytes); + fsize += n_bytes; + // memset all cache data and reset cur_off + memset(cache_buf, 0, cache_size); + cur_off = 0; + } + } + + void flush_cache() + { + assert(cache_buf != nullptr); + writer.write(cache_buf, cur_off); + fsize += cur_off; + memset(cache_buf, 0, cache_size); + cur_off = 0; + } + + void reset() + { + flush_cache(); + writer.seekp(0); + } + + private: + // underlying ofstream + std::ofstream writer; + // # bytes to cache for one shot write + uint64_t cache_size = 0; + // underlying buf for cache + char *cache_buf = nullptr; + // offset into cache_buf for cur_pos + uint64_t cur_off = 0; + + // file size + uint64_t fsize = 0; +}; diff --git a/packages/leann-backend-diskann/third_party/DiskANN/include/common_includes.h b/packages/leann-backend-diskann/third_party/DiskANN/include/common_includes.h new file mode 100644 index 0000000..e1a51bd --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/include/common_includes.h @@ -0,0 +1,27 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT license. + +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include diff --git a/packages/leann-backend-diskann/third_party/DiskANN/include/concurrent_queue.h b/packages/leann-backend-diskann/third_party/DiskANN/include/concurrent_queue.h new file mode 100644 index 0000000..1e57bbf --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/include/concurrent_queue.h @@ -0,0 +1,132 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT license. + +#pragma once +#include +#include +#include +#include +#include +#include +#include +#include + +namespace diskann +{ + +template class ConcurrentQueue +{ + typedef std::chrono::microseconds chrono_us_t; + typedef std::unique_lock mutex_locker; + + std::queue q; + std::mutex mut; + std::mutex push_mut; + std::mutex pop_mut; + std::condition_variable push_cv; + std::condition_variable pop_cv; + T null_T; + + public: + ConcurrentQueue() + { + } + + ConcurrentQueue(T nullT) + { + this->null_T = nullT; + } + + ~ConcurrentQueue() + { + this->push_cv.notify_all(); + this->pop_cv.notify_all(); + } + + // queue stats + uint64_t size() + { + mutex_locker lk(this->mut); + uint64_t ret = q.size(); + lk.unlock(); + return ret; + } + + bool empty() + { + return (this->size() == 0); + } + + // PUSH BACK + void push(T &new_val) + { + mutex_locker lk(this->mut); + this->q.push(new_val); + lk.unlock(); + } + + template void insert(Iterator iter_begin, Iterator iter_end) + { + mutex_locker lk(this->mut); + for (Iterator it = iter_begin; it != iter_end; it++) + { + this->q.push(*it); + } + lk.unlock(); + } + + // POP FRONT + T pop() + { + mutex_locker lk(this->mut); + if (this->q.empty()) + { + lk.unlock(); + return this->null_T; + } + else + { + T ret = this->q.front(); + this->q.pop(); + // diskann::cout << "thread_id: " << std::this_thread::get_id() << + // ", ctx: " + // << ret.ctx << "\n"; + lk.unlock(); + return ret; + } + } + + // register for notifications + void wait_for_push_notify(chrono_us_t wait_time = chrono_us_t{10}) + { + mutex_locker lk(this->push_mut); + this->push_cv.wait_for(lk, wait_time); + lk.unlock(); + } + + void wait_for_pop_notify(chrono_us_t wait_time = chrono_us_t{10}) + { + mutex_locker lk(this->pop_mut); + this->pop_cv.wait_for(lk, wait_time); + lk.unlock(); + } + + // just notify functions + void push_notify_one() + { + this->push_cv.notify_one(); + } + void push_notify_all() + { + this->push_cv.notify_all(); + } + void pop_notify_one() + { + this->pop_cv.notify_one(); + } + void pop_notify_all() + { + this->pop_cv.notify_all(); + } +}; +} // namespace diskann diff --git a/packages/leann-backend-diskann/third_party/DiskANN/include/cosine_similarity.h b/packages/leann-backend-diskann/third_party/DiskANN/include/cosine_similarity.h new file mode 100644 index 0000000..539a8b0 --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/include/cosine_similarity.h @@ -0,0 +1,285 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT license. + +#pragma once + +#include +#include +#include +#include +#include +#include +#include + +#ifndef __APPLE__ +#include +#include +#include +#include "simd_utils.h" +#endif + +extern bool Avx2SupportedCPU; + +#ifdef _WINDOWS +// SIMD implementation of Cosine similarity. Taken from hnsw library. + +/** + * Non-metric Space Library + * + * Authors: Bilegsaikhan Naidan (https://github.com/bileg), Leonid Boytsov + * (http://boytsov.info). With contributions from Lawrence Cayton + * (http://lcayton.com/) and others. + * + * For the complete list of contributors and further details see: + * https://github.com/searchivarius/NonMetricSpaceLib + * + * Copyright (c) 2014 + * + * This code is released under the + * Apache License Version 2.0 http://www.apache.org/licenses/. + * + */ + +namespace diskann +{ + +using namespace std; + +#define PORTABLE_ALIGN16 __declspec(align(16)) + +static float NormScalarProductSIMD2(const int8_t *pVect1, const int8_t *pVect2, uint32_t qty) +{ + if (Avx2SupportedCPU) + { + __m256 cos, p1Len, p2Len; + cos = p1Len = p2Len = _mm256_setzero_ps(); + while (qty >= 32) + { + __m256i rx = _mm256_load_si256((__m256i *)pVect1), ry = _mm256_load_si256((__m256i *)pVect2); + cos = _mm256_add_ps(cos, _mm256_mul_epi8(rx, ry)); + p1Len = _mm256_add_ps(p1Len, _mm256_mul_epi8(rx, rx)); + p2Len = _mm256_add_ps(p2Len, _mm256_mul_epi8(ry, ry)); + pVect1 += 32; + pVect2 += 32; + qty -= 32; + } + while (qty > 0) + { + __m128i rx = _mm_load_si128((__m128i *)pVect1), ry = _mm_load_si128((__m128i *)pVect2); + cos = _mm256_add_ps(cos, _mm256_mul32_pi8(rx, ry)); + p1Len = _mm256_add_ps(p1Len, _mm256_mul32_pi8(rx, rx)); + p2Len = _mm256_add_ps(p2Len, _mm256_mul32_pi8(ry, ry)); + pVect1 += 4; + pVect2 += 4; + qty -= 4; + } + cos = _mm256_hadd_ps(_mm256_hadd_ps(cos, cos), cos); + p1Len = _mm256_hadd_ps(_mm256_hadd_ps(p1Len, p1Len), p1Len); + p2Len = _mm256_hadd_ps(_mm256_hadd_ps(p2Len, p2Len), p2Len); + float denominator = max(numeric_limits::min() * 2, sqrt(p1Len.m256_f32[0] + p1Len.m256_f32[4]) * + sqrt(p2Len.m256_f32[0] + p2Len.m256_f32[4])); + float cosine = (cos.m256_f32[0] + cos.m256_f32[4]) / denominator; + + return max(float(-1), min(float(1), cosine)); + } + + __m128 cos, p1Len, p2Len; + cos = p1Len = p2Len = _mm_setzero_ps(); + __m128i rx, ry; + while (qty >= 16) + { + rx = _mm_load_si128((__m128i *)pVect1); + ry = _mm_load_si128((__m128i *)pVect2); + cos = _mm_add_ps(cos, _mm_mul_epi8(rx, ry)); + p1Len = _mm_add_ps(p1Len, _mm_mul_epi8(rx, rx)); + p2Len = _mm_add_ps(p2Len, _mm_mul_epi8(ry, ry)); + pVect1 += 16; + pVect2 += 16; + qty -= 16; + } + while (qty > 0) + { + rx = _mm_load_si128((__m128i *)pVect1); + ry = _mm_load_si128((__m128i *)pVect2); + cos = _mm_add_ps(cos, _mm_mul32_pi8(rx, ry)); + p1Len = _mm_add_ps(p1Len, _mm_mul32_pi8(rx, rx)); + p2Len = _mm_add_ps(p2Len, _mm_mul32_pi8(ry, ry)); + pVect1 += 4; + pVect2 += 4; + qty -= 4; + } + cos = _mm_hadd_ps(_mm_hadd_ps(cos, cos), cos); + p1Len = _mm_hadd_ps(_mm_hadd_ps(p1Len, p1Len), p1Len); + p2Len = _mm_hadd_ps(_mm_hadd_ps(p2Len, p2Len), p2Len); + float norm1 = p1Len.m128_f32[0]; + float norm2 = p2Len.m128_f32[0]; + + static const float eps = numeric_limits::min() * 2; + + if (norm1 < eps) + { /* + * This shouldn't normally happen for this space, but + * if it does, we don't want to get NANs + */ + if (norm2 < eps) + { + return 1; + } + return 0; + } + /* + * Sometimes due to rounding errors, we get values > 1 or < -1. + * This throws off other functions that use scalar product, e.g., acos + */ + return max(float(-1), min(float(1), cos.m128_f32[0] / sqrt(norm1) / sqrt(norm2))); +} + +static float NormScalarProductSIMD(const float *pVect1, const float *pVect2, uint32_t qty) +{ + // Didn't get significant performance gain compared with 128bit version. + static const float eps = numeric_limits::min() * 2; + + if (Avx2SupportedCPU) + { + uint32_t qty8 = qty / 8; + + const float *pEnd1 = pVect1 + 8 * qty8; + const float *pEnd2 = pVect1 + qty; + + __m256 v1, v2; + __m256 sum_prod = _mm256_set_ps(0, 0, 0, 0, 0, 0, 0, 0); + __m256 sum_square1 = sum_prod; + __m256 sum_square2 = sum_prod; + + while (pVect1 < pEnd1) + { + v1 = _mm256_loadu_ps(pVect1); + pVect1 += 8; + v2 = _mm256_loadu_ps(pVect2); + pVect2 += 8; + sum_prod = _mm256_add_ps(sum_prod, _mm256_mul_ps(v1, v2)); + sum_square1 = _mm256_add_ps(sum_square1, _mm256_mul_ps(v1, v1)); + sum_square2 = _mm256_add_ps(sum_square2, _mm256_mul_ps(v2, v2)); + } + + float PORTABLE_ALIGN16 TmpResProd[8]; + float PORTABLE_ALIGN16 TmpResSquare1[8]; + float PORTABLE_ALIGN16 TmpResSquare2[8]; + + _mm256_store_ps(TmpResProd, sum_prod); + _mm256_store_ps(TmpResSquare1, sum_square1); + _mm256_store_ps(TmpResSquare2, sum_square2); + + float sum = 0.0f; + float norm1 = 0.0f; + float norm2 = 0.0f; + for (uint32_t i = 0; i < 8; ++i) + { + sum += TmpResProd[i]; + norm1 += TmpResSquare1[i]; + norm2 += TmpResSquare2[i]; + } + + while (pVect1 < pEnd2) + { + sum += (*pVect1) * (*pVect2); + norm1 += (*pVect1) * (*pVect1); + norm2 += (*pVect2) * (*pVect2); + + ++pVect1; + ++pVect2; + } + + if (norm1 < eps) + { + return norm2 < eps ? 1.0f : 0.0f; + } + + return max(float(-1), min(float(1), sum / sqrt(norm1) / sqrt(norm2))); + } + + __m128 v1, v2; + __m128 sum_prod = _mm_set1_ps(0); + __m128 sum_square1 = sum_prod; + __m128 sum_square2 = sum_prod; + + while (qty >= 4) + { + v1 = _mm_loadu_ps(pVect1); + pVect1 += 4; + v2 = _mm_loadu_ps(pVect2); + pVect2 += 4; + sum_prod = _mm_add_ps(sum_prod, _mm_mul_ps(v1, v2)); + sum_square1 = _mm_add_ps(sum_square1, _mm_mul_ps(v1, v1)); + sum_square2 = _mm_add_ps(sum_square2, _mm_mul_ps(v2, v2)); + + qty -= 4; + } + + float sum = sum_prod.m128_f32[0] + sum_prod.m128_f32[1] + sum_prod.m128_f32[2] + sum_prod.m128_f32[3]; + float norm1 = sum_square1.m128_f32[0] + sum_square1.m128_f32[1] + sum_square1.m128_f32[2] + sum_square1.m128_f32[3]; + float norm2 = sum_square2.m128_f32[0] + sum_square2.m128_f32[1] + sum_square2.m128_f32[2] + sum_square2.m128_f32[3]; + + if (norm1 < eps) + { + return norm2 < eps ? 1.0f : 0.0f; + } + + return max(float(-1), min(float(1), sum / sqrt(norm1) / sqrt(norm2))); +} + +static float NormScalarProductSIMD2(const float *pVect1, const float *pVect2, uint32_t qty) +{ + return NormScalarProductSIMD(pVect1, pVect2, qty); +} + +template static float CosineSimilarity2(const T *p1, const T *p2, uint32_t qty) +{ + return std::max(0.0f, 1.0f - NormScalarProductSIMD2(p1, p2, qty)); +} + +// static template float CosineSimilarity2<__int8>(const __int8* pVect1, +// const __int8* pVect2, size_t qty); + +// static template float CosineSimilarity2(const float* pVect1, +// const float* pVect2, size_t qty); + +template static void CosineSimilarityNormalize(T *pVector, uint32_t qty) +{ + T sum = 0; + for (uint32_t i = 0; i < qty; ++i) + { + sum += pVector[i] * pVector[i]; + } + sum = 1 / sqrt(sum); + if (sum == 0) + { + sum = numeric_limits::min(); + } + for (uint32_t i = 0; i < qty; ++i) + { + pVector[i] *= sum; + } +} + +// template static void CosineSimilarityNormalize(float* pVector, +// size_t qty); +// template static void CosineSimilarityNormalize(double* pVector, +// size_t qty); + +template <> void CosineSimilarityNormalize(__int8 * /*pVector*/, uint32_t /*qty*/) +{ + throw std::runtime_error("For int8 type vector, you can not use cosine distance!"); +} + +template <> void CosineSimilarityNormalize(__int16 * /*pVector*/, uint32_t /*qty*/) +{ + throw std::runtime_error("For int16 type vector, you can not use cosine distance!"); +} + +template <> void CosineSimilarityNormalize(int * /*pVector*/, uint32_t /*qty*/) +{ + throw std::runtime_error("For int type vector, you can not use cosine distance!"); +} +} // namespace diskann +#endif diff --git a/packages/leann-backend-diskann/third_party/DiskANN/include/defaults.h b/packages/leann-backend-diskann/third_party/DiskANN/include/defaults.h new file mode 100644 index 0000000..ef1750f --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/include/defaults.h @@ -0,0 +1,34 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT license. + +#pragma once +#include + +namespace diskann +{ +namespace defaults +{ +const float ALPHA = 1.2f; +const uint32_t NUM_THREADS = 0; +const uint32_t MAX_OCCLUSION_SIZE = 750; +const bool HAS_LABELS = false; +const uint32_t FILTER_LIST_SIZE = 0; +const uint32_t NUM_FROZEN_POINTS_STATIC = 0; +const uint32_t NUM_FROZEN_POINTS_DYNAMIC = 1; + +// In-mem index related limits +const float GRAPH_SLACK_FACTOR = 1.3f; + +// SSD Index related limits +const uint64_t MAX_GRAPH_DEGREE = 512; +const uint64_t SECTOR_LEN = 4096; +const uint64_t MAX_N_SECTOR_READS = 128; + +// following constants should always be specified, but are useful as a +// sensible default at cli / python boundaries +const uint32_t MAX_DEGREE = 64; +const uint32_t BUILD_LIST_SIZE = 100; +const uint32_t SATURATE_GRAPH = false; +const uint32_t SEARCH_LIST_SIZE = 100; +} // namespace defaults +} // namespace diskann diff --git a/packages/leann-backend-diskann/third_party/DiskANN/include/disk_utils.h b/packages/leann-backend-diskann/third_party/DiskANN/include/disk_utils.h new file mode 100644 index 0000000..08f046d --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/include/disk_utils.h @@ -0,0 +1,108 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT license. + +#pragma once +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#ifdef __APPLE__ +#else +#include +#endif + +#ifdef _WINDOWS +#include +typedef HANDLE FileHandle; +#else +#include +typedef int FileHandle; +#endif + +#include "cached_io.h" +#include "common_includes.h" + +#include "utils.h" +#include "windows_customizations.h" + +namespace diskann +{ +const size_t MAX_SAMPLE_POINTS_FOR_WARMUP = 100000; +const double PQ_TRAINING_SET_FRACTION = 0.1; +const double SPACE_FOR_CACHED_NODES_IN_GB = 0.25; +const double THRESHOLD_FOR_CACHING_IN_GB = 1.0; +const uint32_t NUM_NODES_TO_CACHE = 250000; +const uint32_t WARMUP_L = 20; +const uint32_t NUM_KMEANS_REPS = 12; + +template class PQFlashIndex; + +DISKANN_DLLEXPORT double get_memory_budget(const std::string &mem_budget_str); +DISKANN_DLLEXPORT double get_memory_budget(double search_ram_budget_in_gb); +DISKANN_DLLEXPORT void add_new_file_to_single_index(std::string index_file, std::string new_file); + +DISKANN_DLLEXPORT size_t calculate_num_pq_chunks(double final_index_ram_limit, size_t points_num, uint32_t dim); + +DISKANN_DLLEXPORT void read_idmap(const std::string &fname, std::vector &ivecs); + +#ifdef EXEC_ENV_OLS +template +DISKANN_DLLEXPORT T *load_warmup(MemoryMappedFiles &files, const std::string &cache_warmup_file, uint64_t &warmup_num, + uint64_t warmup_dim, uint64_t warmup_aligned_dim); +#else +template +DISKANN_DLLEXPORT T *load_warmup(const std::string &cache_warmup_file, uint64_t &warmup_num, uint64_t warmup_dim, + uint64_t warmup_aligned_dim); +#endif + +DISKANN_DLLEXPORT int merge_shards(const std::string &vamana_prefix, const std::string &vamana_suffix, + const std::string &idmaps_prefix, const std::string &idmaps_suffix, + const uint64_t nshards, uint32_t max_degree, const std::string &output_vamana, + const std::string &medoids_file, bool use_filters = false, + const std::string &labels_to_medoids_file = std::string("")); + +DISKANN_DLLEXPORT void extract_shard_labels(const std::string &in_label_file, const std::string &shard_ids_bin, + const std::string &shard_label_file); + +template +DISKANN_DLLEXPORT std::string preprocess_base_file(const std::string &infile, const std::string &indexPrefix, + diskann::Metric &distMetric); + +template +DISKANN_DLLEXPORT int build_merged_vamana_index(std::string base_file, diskann::Metric _compareMetric, uint32_t L, + uint32_t R, double sampling_rate, double ram_budget, + std::string mem_index_path, std::string medoids_file, + std::string centroids_file, size_t build_pq_bytes, bool use_opq, + uint32_t num_threads, bool use_filters = false, + const std::string &label_file = std::string(""), + const std::string &labels_to_medoids_file = std::string(""), + const std::string &universal_label = "", const uint32_t Lf = 0); + +template +DISKANN_DLLEXPORT uint32_t optimize_beamwidth(std::unique_ptr> &_pFlashIndex, + T *tuning_sample, uint64_t tuning_sample_num, + uint64_t tuning_sample_aligned_dim, uint32_t L, uint32_t nthreads, + uint32_t start_bw = 2); + +template +DISKANN_DLLEXPORT int build_disk_index( + const char *dataFilePath, const char *indexFilePath, const char *indexBuildParameters, + diskann::Metric _compareMetric, bool use_opq = false, + const std::string &codebook_prefix = "", // default is empty for no codebook pass in + bool use_filters = false, + const std::string &label_file = std::string(""), // default is empty string for no label_file + const std::string &universal_label = "", const uint32_t filter_threshold = 0, + const uint32_t Lf = 0); // default is empty string for no universal label + +template +DISKANN_DLLEXPORT void create_disk_layout(const std::string base_file, const std::string mem_index_file, + const std::string output_file, + const std::string reorder_data_file = std::string("")); + +} // namespace diskann diff --git a/packages/leann-backend-diskann/third_party/DiskANN/include/distance.h b/packages/leann-backend-diskann/third_party/DiskANN/include/distance.h new file mode 100644 index 0000000..7198308 --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/include/distance.h @@ -0,0 +1,236 @@ +#pragma once +#include "windows_customizations.h" +#include +#include + +namespace diskann +{ +enum Metric +{ + L2 = 0, + INNER_PRODUCT = 1, + COSINE = 2, + FAST_L2 = 3 +}; + +template class Distance +{ + public: + DISKANN_DLLEXPORT Distance(diskann::Metric dist_metric) : _distance_metric(dist_metric) + { + } + + // distance comparison function + DISKANN_DLLEXPORT virtual float compare(const T *a, const T *b, uint32_t length) const = 0; + + // Needed only for COSINE-BYTE and INNER_PRODUCT-BYTE + DISKANN_DLLEXPORT virtual float compare(const T *a, const T *b, const float normA, const float normB, + uint32_t length) const; + + // For MIPS, normalization adds an extra dimension to the vectors. + // This function lets callers know if the normalization process + // changes the dimension. + DISKANN_DLLEXPORT virtual uint32_t post_normalization_dimension(uint32_t orig_dimension) const; + + DISKANN_DLLEXPORT virtual diskann::Metric get_metric() const; + + // This is for efficiency. If no normalization is required, the callers + // can simply ignore the normalize_data_for_build() function. + DISKANN_DLLEXPORT virtual bool preprocessing_required() const; + + // Check the preprocessing_required() function before calling this. + // Clients can call the function like this: + // + // if (metric->preprocessing_required()){ + // T* normalized_data_batch; + // Split data into batches of batch_size and for each, call: + // metric->preprocess_base_points(data_batch, batch_size); + // + // TODO: This does not take into account the case for SSD inner product + // where the dimensions change after normalization. + DISKANN_DLLEXPORT virtual void preprocess_base_points(T *original_data, const size_t orig_dim, + const size_t num_points); + + // Invokes normalization for a single vector during search. The scratch space + // has to be created by the caller keeping track of the fact that + // normalization might change the dimension of the query vector. + DISKANN_DLLEXPORT virtual void preprocess_query(const T *query_vec, const size_t query_dim, T *scratch_query); + + // If an algorithm has a requirement that some data be aligned to a certain + // boundary it can use this function to indicate that requirement. Currently, + // we are setting it to 8 because that works well for AVX2. If we have AVX512 + // implementations of distance algos, they might have to set this to 16 + // (depending on how they are implemented) + DISKANN_DLLEXPORT virtual size_t get_required_alignment() const; + + // Providing a default implementation for the virtual destructor because we + // don't expect most metric implementations to need it. + DISKANN_DLLEXPORT virtual ~Distance() = default; + + protected: + diskann::Metric _distance_metric; + size_t _alignment_factor = 8; +}; + +class DistanceCosineInt8 : public Distance +{ + public: + DistanceCosineInt8() : Distance(diskann::Metric::COSINE) + { + } + DISKANN_DLLEXPORT virtual float compare(const int8_t *a, const int8_t *b, uint32_t length) const; +}; + +class DistanceL2Int8 : public Distance +{ + public: + DistanceL2Int8() : Distance(diskann::Metric::L2) + { + } + DISKANN_DLLEXPORT virtual float compare(const int8_t *a, const int8_t *b, uint32_t size) const; +}; + +// AVX implementations. Borrowed from HNSW code. +class AVXDistanceL2Int8 : public Distance +{ + public: + AVXDistanceL2Int8() : Distance(diskann::Metric::L2) + { + } + DISKANN_DLLEXPORT virtual float compare(const int8_t *a, const int8_t *b, uint32_t length) const; +}; + +class DistanceCosineFloat : public Distance +{ + public: + DistanceCosineFloat() : Distance(diskann::Metric::COSINE) + { + } + DISKANN_DLLEXPORT virtual float compare(const float *a, const float *b, uint32_t length) const; +}; + +class DistanceL2Float : public Distance +{ + public: + DistanceL2Float() : Distance(diskann::Metric::L2) + { + } + +#ifdef _WINDOWS + DISKANN_DLLEXPORT virtual float compare(const float *a, const float *b, uint32_t size) const; +#else + DISKANN_DLLEXPORT virtual float compare(const float *a, const float *b, uint32_t size) const __attribute__((hot)); +#endif +}; + +class AVXDistanceL2Float : public Distance +{ + public: + AVXDistanceL2Float() : Distance(diskann::Metric::L2) + { + } + DISKANN_DLLEXPORT virtual float compare(const float *a, const float *b, uint32_t length) const; +}; + +template class SlowDistanceL2 : public Distance +{ + public: + SlowDistanceL2() : Distance(diskann::Metric::L2) + { + } + DISKANN_DLLEXPORT virtual float compare(const T *a, const T *b, uint32_t length) const; +}; + +class SlowDistanceCosineUInt8 : public Distance +{ + public: + SlowDistanceCosineUInt8() : Distance(diskann::Metric::COSINE) + { + } + DISKANN_DLLEXPORT virtual float compare(const uint8_t *a, const uint8_t *b, uint32_t length) const; +}; + +class DistanceL2UInt8 : public Distance +{ + public: + DistanceL2UInt8() : Distance(diskann::Metric::L2) + { + } + DISKANN_DLLEXPORT virtual float compare(const uint8_t *a, const uint8_t *b, uint32_t size) const; +}; + +template class DistanceInnerProduct : public Distance +{ + public: + DistanceInnerProduct() : Distance(diskann::Metric::INNER_PRODUCT) + { + } + + DistanceInnerProduct(diskann::Metric metric) : Distance(metric) + { + } + inline float inner_product(const T *a, const T *b, unsigned size) const; + + inline float compare(const T *a, const T *b, unsigned size) const + { + float result = inner_product(a, b, size); + // if (result < 0) + // return std::numeric_limits::max(); + // else + return -result; + } +}; + +template class DistanceFastL2 : public DistanceInnerProduct +{ + // currently defined only for float. + // templated for future use. + public: + DistanceFastL2() : DistanceInnerProduct(diskann::Metric::FAST_L2) + { + } + float norm(const T *a, unsigned size) const; + float compare(const T *a, const T *b, float norm, unsigned size) const; +}; + +class AVXDistanceInnerProductFloat : public Distance +{ + public: + AVXDistanceInnerProductFloat() : Distance(diskann::Metric::INNER_PRODUCT) + { + } + DISKANN_DLLEXPORT virtual float compare(const float *a, const float *b, uint32_t length) const; +}; + +class AVXNormalizedCosineDistanceFloat : public Distance +{ + private: + AVXDistanceInnerProductFloat _innerProduct; + + protected: + void normalize_and_copy(const float *a, uint32_t length, float *a_norm) const; + + public: + AVXNormalizedCosineDistanceFloat() : Distance(diskann::Metric::COSINE) + { + } + DISKANN_DLLEXPORT virtual float compare(const float *a, const float *b, uint32_t length) const override + { + // Inner product returns negative values to indicate distance. + // This will ensure that cosine is between -1 and 1. + return 1.0f + _innerProduct.compare(a, b, length); + } + DISKANN_DLLEXPORT virtual uint32_t post_normalization_dimension(uint32_t orig_dimension) const override; + + DISKANN_DLLEXPORT virtual bool preprocessing_required() const override; + + DISKANN_DLLEXPORT virtual void preprocess_base_points(float *original_data, const size_t orig_dim, + const size_t num_points) override; + + DISKANN_DLLEXPORT virtual void preprocess_query(const float *query_vec, const size_t query_dim, + float *scratch_query_vector) override; +}; + +template Distance *get_distance_function(Metric m); + +} // namespace diskann diff --git a/packages/leann-backend-diskann/third_party/DiskANN/include/embedding.pb.h b/packages/leann-backend-diskann/third_party/DiskANN/include/embedding.pb.h new file mode 100644 index 0000000..9f5c2b7 --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/include/embedding.pb.h @@ -0,0 +1,675 @@ +// Generated by the protocol buffer compiler. DO NOT EDIT! +// source: embedding.proto + +#ifndef GOOGLE_PROTOBUF_INCLUDED_embedding_2eproto +#define GOOGLE_PROTOBUF_INCLUDED_embedding_2eproto + +#include +#include + +#include +#if PROTOBUF_VERSION < 3012000 +#error This file was generated by a newer version of protoc which is +#error incompatible with your Protocol Buffer headers. Please update +#error your headers. +#endif +#if 3012004 < PROTOBUF_MIN_PROTOC_VERSION +#error This file was generated by an older version of protoc which is +#error incompatible with your Protocol Buffer headers. Please +#error regenerate this file with a newer version of protoc. +#endif + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include // IWYU pragma: export +#include // IWYU pragma: export +#include +// @@protoc_insertion_point(includes) +#include +#define PROTOBUF_INTERNAL_EXPORT_embedding_2eproto +PROTOBUF_NAMESPACE_OPEN +namespace internal { +class AnyMetadata; +} // namespace internal +PROTOBUF_NAMESPACE_CLOSE + +// Internal implementation detail -- do not use these members. +struct TableStruct_embedding_2eproto { + static const ::PROTOBUF_NAMESPACE_ID::internal::ParseTableField entries[] + PROTOBUF_SECTION_VARIABLE(protodesc_cold); + static const ::PROTOBUF_NAMESPACE_ID::internal::AuxillaryParseTableField aux[] + PROTOBUF_SECTION_VARIABLE(protodesc_cold); + static const ::PROTOBUF_NAMESPACE_ID::internal::ParseTable schema[2] + PROTOBUF_SECTION_VARIABLE(protodesc_cold); + static const ::PROTOBUF_NAMESPACE_ID::internal::FieldMetadata field_metadata[]; + static const ::PROTOBUF_NAMESPACE_ID::internal::SerializationTable serialization_table[]; + static const ::PROTOBUF_NAMESPACE_ID::uint32 offsets[]; +}; +extern const ::PROTOBUF_NAMESPACE_ID::internal::DescriptorTable descriptor_table_embedding_2eproto; +namespace protoembedding { +class NodeEmbeddingRequest; +class NodeEmbeddingRequestDefaultTypeInternal; +extern NodeEmbeddingRequestDefaultTypeInternal _NodeEmbeddingRequest_default_instance_; +class NodeEmbeddingResponse; +class NodeEmbeddingResponseDefaultTypeInternal; +extern NodeEmbeddingResponseDefaultTypeInternal _NodeEmbeddingResponse_default_instance_; +} // namespace protoembedding +PROTOBUF_NAMESPACE_OPEN +template<> ::protoembedding::NodeEmbeddingRequest* Arena::CreateMaybeMessage<::protoembedding::NodeEmbeddingRequest>(Arena*); +template<> ::protoembedding::NodeEmbeddingResponse* Arena::CreateMaybeMessage<::protoembedding::NodeEmbeddingResponse>(Arena*); +PROTOBUF_NAMESPACE_CLOSE +namespace protoembedding { + +// =================================================================== + +class NodeEmbeddingRequest PROTOBUF_FINAL : + public ::PROTOBUF_NAMESPACE_ID::Message /* @@protoc_insertion_point(class_definition:protoembedding.NodeEmbeddingRequest) */ { + public: + inline NodeEmbeddingRequest() : NodeEmbeddingRequest(nullptr) {}; + virtual ~NodeEmbeddingRequest(); + + NodeEmbeddingRequest(const NodeEmbeddingRequest& from); + NodeEmbeddingRequest(NodeEmbeddingRequest&& from) noexcept + : NodeEmbeddingRequest() { + *this = ::std::move(from); + } + + inline NodeEmbeddingRequest& operator=(const NodeEmbeddingRequest& from) { + CopyFrom(from); + return *this; + } + inline NodeEmbeddingRequest& operator=(NodeEmbeddingRequest&& from) noexcept { + if (GetArena() == from.GetArena()) { + if (this != &from) InternalSwap(&from); + } else { + CopyFrom(from); + } + return *this; + } + + static const ::PROTOBUF_NAMESPACE_ID::Descriptor* descriptor() { + return GetDescriptor(); + } + static const ::PROTOBUF_NAMESPACE_ID::Descriptor* GetDescriptor() { + return GetMetadataStatic().descriptor; + } + static const ::PROTOBUF_NAMESPACE_ID::Reflection* GetReflection() { + return GetMetadataStatic().reflection; + } + static const NodeEmbeddingRequest& default_instance(); + + static void InitAsDefaultInstance(); // FOR INTERNAL USE ONLY + static inline const NodeEmbeddingRequest* internal_default_instance() { + return reinterpret_cast( + &_NodeEmbeddingRequest_default_instance_); + } + static constexpr int kIndexInFileMessages = + 0; + + friend void swap(NodeEmbeddingRequest& a, NodeEmbeddingRequest& b) { + a.Swap(&b); + } + inline void Swap(NodeEmbeddingRequest* other) { + if (other == this) return; + if (GetArena() == other->GetArena()) { + InternalSwap(other); + } else { + ::PROTOBUF_NAMESPACE_ID::internal::GenericSwap(this, other); + } + } + void UnsafeArenaSwap(NodeEmbeddingRequest* other) { + if (other == this) return; + GOOGLE_DCHECK(GetArena() == other->GetArena()); + InternalSwap(other); + } + + // implements Message ---------------------------------------------- + + inline NodeEmbeddingRequest* New() const final { + return CreateMaybeMessage(nullptr); + } + + NodeEmbeddingRequest* New(::PROTOBUF_NAMESPACE_ID::Arena* arena) const final { + return CreateMaybeMessage(arena); + } + void CopyFrom(const ::PROTOBUF_NAMESPACE_ID::Message& from) final; + void MergeFrom(const ::PROTOBUF_NAMESPACE_ID::Message& from) final; + void CopyFrom(const NodeEmbeddingRequest& from); + void MergeFrom(const NodeEmbeddingRequest& from); + PROTOBUF_ATTRIBUTE_REINITIALIZES void Clear() final; + bool IsInitialized() const final; + + size_t ByteSizeLong() const final; + const char* _InternalParse(const char* ptr, ::PROTOBUF_NAMESPACE_ID::internal::ParseContext* ctx) final; + ::PROTOBUF_NAMESPACE_ID::uint8* _InternalSerialize( + ::PROTOBUF_NAMESPACE_ID::uint8* target, ::PROTOBUF_NAMESPACE_ID::io::EpsCopyOutputStream* stream) const final; + int GetCachedSize() const final { return _cached_size_.Get(); } + + private: + inline void SharedCtor(); + inline void SharedDtor(); + void SetCachedSize(int size) const final; + void InternalSwap(NodeEmbeddingRequest* other); + friend class ::PROTOBUF_NAMESPACE_ID::internal::AnyMetadata; + static ::PROTOBUF_NAMESPACE_ID::StringPiece FullMessageName() { + return "protoembedding.NodeEmbeddingRequest"; + } + protected: + explicit NodeEmbeddingRequest(::PROTOBUF_NAMESPACE_ID::Arena* arena); + private: + static void ArenaDtor(void* object); + inline void RegisterArenaDtor(::PROTOBUF_NAMESPACE_ID::Arena* arena); + public: + + ::PROTOBUF_NAMESPACE_ID::Metadata GetMetadata() const final; + private: + static ::PROTOBUF_NAMESPACE_ID::Metadata GetMetadataStatic() { + ::PROTOBUF_NAMESPACE_ID::internal::AssignDescriptors(&::descriptor_table_embedding_2eproto); + return ::descriptor_table_embedding_2eproto.file_level_metadata[kIndexInFileMessages]; + } + + public: + + // nested types ---------------------------------------------------- + + // accessors ------------------------------------------------------- + + enum : int { + kNodeIdsFieldNumber = 1, + }; + // repeated uint32 node_ids = 1; + int node_ids_size() const; + private: + int _internal_node_ids_size() const; + public: + void clear_node_ids(); + private: + ::PROTOBUF_NAMESPACE_ID::uint32 _internal_node_ids(int index) const; + const ::PROTOBUF_NAMESPACE_ID::RepeatedField< ::PROTOBUF_NAMESPACE_ID::uint32 >& + _internal_node_ids() const; + void _internal_add_node_ids(::PROTOBUF_NAMESPACE_ID::uint32 value); + ::PROTOBUF_NAMESPACE_ID::RepeatedField< ::PROTOBUF_NAMESPACE_ID::uint32 >* + _internal_mutable_node_ids(); + public: + ::PROTOBUF_NAMESPACE_ID::uint32 node_ids(int index) const; + void set_node_ids(int index, ::PROTOBUF_NAMESPACE_ID::uint32 value); + void add_node_ids(::PROTOBUF_NAMESPACE_ID::uint32 value); + const ::PROTOBUF_NAMESPACE_ID::RepeatedField< ::PROTOBUF_NAMESPACE_ID::uint32 >& + node_ids() const; + ::PROTOBUF_NAMESPACE_ID::RepeatedField< ::PROTOBUF_NAMESPACE_ID::uint32 >* + mutable_node_ids(); + + // @@protoc_insertion_point(class_scope:protoembedding.NodeEmbeddingRequest) + private: + class _Internal; + + template friend class ::PROTOBUF_NAMESPACE_ID::Arena::InternalHelper; + typedef void InternalArenaConstructable_; + typedef void DestructorSkippable_; + ::PROTOBUF_NAMESPACE_ID::RepeatedField< ::PROTOBUF_NAMESPACE_ID::uint32 > node_ids_; + mutable std::atomic _node_ids_cached_byte_size_; + mutable ::PROTOBUF_NAMESPACE_ID::internal::CachedSize _cached_size_; + friend struct ::TableStruct_embedding_2eproto; +}; +// ------------------------------------------------------------------- + +class NodeEmbeddingResponse PROTOBUF_FINAL : + public ::PROTOBUF_NAMESPACE_ID::Message /* @@protoc_insertion_point(class_definition:protoembedding.NodeEmbeddingResponse) */ { + public: + inline NodeEmbeddingResponse() : NodeEmbeddingResponse(nullptr) {}; + virtual ~NodeEmbeddingResponse(); + + NodeEmbeddingResponse(const NodeEmbeddingResponse& from); + NodeEmbeddingResponse(NodeEmbeddingResponse&& from) noexcept + : NodeEmbeddingResponse() { + *this = ::std::move(from); + } + + inline NodeEmbeddingResponse& operator=(const NodeEmbeddingResponse& from) { + CopyFrom(from); + return *this; + } + inline NodeEmbeddingResponse& operator=(NodeEmbeddingResponse&& from) noexcept { + if (GetArena() == from.GetArena()) { + if (this != &from) InternalSwap(&from); + } else { + CopyFrom(from); + } + return *this; + } + + static const ::PROTOBUF_NAMESPACE_ID::Descriptor* descriptor() { + return GetDescriptor(); + } + static const ::PROTOBUF_NAMESPACE_ID::Descriptor* GetDescriptor() { + return GetMetadataStatic().descriptor; + } + static const ::PROTOBUF_NAMESPACE_ID::Reflection* GetReflection() { + return GetMetadataStatic().reflection; + } + static const NodeEmbeddingResponse& default_instance(); + + static void InitAsDefaultInstance(); // FOR INTERNAL USE ONLY + static inline const NodeEmbeddingResponse* internal_default_instance() { + return reinterpret_cast( + &_NodeEmbeddingResponse_default_instance_); + } + static constexpr int kIndexInFileMessages = + 1; + + friend void swap(NodeEmbeddingResponse& a, NodeEmbeddingResponse& b) { + a.Swap(&b); + } + inline void Swap(NodeEmbeddingResponse* other) { + if (other == this) return; + if (GetArena() == other->GetArena()) { + InternalSwap(other); + } else { + ::PROTOBUF_NAMESPACE_ID::internal::GenericSwap(this, other); + } + } + void UnsafeArenaSwap(NodeEmbeddingResponse* other) { + if (other == this) return; + GOOGLE_DCHECK(GetArena() == other->GetArena()); + InternalSwap(other); + } + + // implements Message ---------------------------------------------- + + inline NodeEmbeddingResponse* New() const final { + return CreateMaybeMessage(nullptr); + } + + NodeEmbeddingResponse* New(::PROTOBUF_NAMESPACE_ID::Arena* arena) const final { + return CreateMaybeMessage(arena); + } + void CopyFrom(const ::PROTOBUF_NAMESPACE_ID::Message& from) final; + void MergeFrom(const ::PROTOBUF_NAMESPACE_ID::Message& from) final; + void CopyFrom(const NodeEmbeddingResponse& from); + void MergeFrom(const NodeEmbeddingResponse& from); + PROTOBUF_ATTRIBUTE_REINITIALIZES void Clear() final; + bool IsInitialized() const final; + + size_t ByteSizeLong() const final; + const char* _InternalParse(const char* ptr, ::PROTOBUF_NAMESPACE_ID::internal::ParseContext* ctx) final; + ::PROTOBUF_NAMESPACE_ID::uint8* _InternalSerialize( + ::PROTOBUF_NAMESPACE_ID::uint8* target, ::PROTOBUF_NAMESPACE_ID::io::EpsCopyOutputStream* stream) const final; + int GetCachedSize() const final { return _cached_size_.Get(); } + + private: + inline void SharedCtor(); + inline void SharedDtor(); + void SetCachedSize(int size) const final; + void InternalSwap(NodeEmbeddingResponse* other); + friend class ::PROTOBUF_NAMESPACE_ID::internal::AnyMetadata; + static ::PROTOBUF_NAMESPACE_ID::StringPiece FullMessageName() { + return "protoembedding.NodeEmbeddingResponse"; + } + protected: + explicit NodeEmbeddingResponse(::PROTOBUF_NAMESPACE_ID::Arena* arena); + private: + static void ArenaDtor(void* object); + inline void RegisterArenaDtor(::PROTOBUF_NAMESPACE_ID::Arena* arena); + public: + + ::PROTOBUF_NAMESPACE_ID::Metadata GetMetadata() const final; + private: + static ::PROTOBUF_NAMESPACE_ID::Metadata GetMetadataStatic() { + ::PROTOBUF_NAMESPACE_ID::internal::AssignDescriptors(&::descriptor_table_embedding_2eproto); + return ::descriptor_table_embedding_2eproto.file_level_metadata[kIndexInFileMessages]; + } + + public: + + // nested types ---------------------------------------------------- + + // accessors ------------------------------------------------------- + + enum : int { + kDimensionsFieldNumber = 2, + kMissingIdsFieldNumber = 3, + kEmbeddingsDataFieldNumber = 1, + }; + // repeated int32 dimensions = 2; + int dimensions_size() const; + private: + int _internal_dimensions_size() const; + public: + void clear_dimensions(); + private: + ::PROTOBUF_NAMESPACE_ID::int32 _internal_dimensions(int index) const; + const ::PROTOBUF_NAMESPACE_ID::RepeatedField< ::PROTOBUF_NAMESPACE_ID::int32 >& + _internal_dimensions() const; + void _internal_add_dimensions(::PROTOBUF_NAMESPACE_ID::int32 value); + ::PROTOBUF_NAMESPACE_ID::RepeatedField< ::PROTOBUF_NAMESPACE_ID::int32 >* + _internal_mutable_dimensions(); + public: + ::PROTOBUF_NAMESPACE_ID::int32 dimensions(int index) const; + void set_dimensions(int index, ::PROTOBUF_NAMESPACE_ID::int32 value); + void add_dimensions(::PROTOBUF_NAMESPACE_ID::int32 value); + const ::PROTOBUF_NAMESPACE_ID::RepeatedField< ::PROTOBUF_NAMESPACE_ID::int32 >& + dimensions() const; + ::PROTOBUF_NAMESPACE_ID::RepeatedField< ::PROTOBUF_NAMESPACE_ID::int32 >* + mutable_dimensions(); + + // repeated uint32 missing_ids = 3; + int missing_ids_size() const; + private: + int _internal_missing_ids_size() const; + public: + void clear_missing_ids(); + private: + ::PROTOBUF_NAMESPACE_ID::uint32 _internal_missing_ids(int index) const; + const ::PROTOBUF_NAMESPACE_ID::RepeatedField< ::PROTOBUF_NAMESPACE_ID::uint32 >& + _internal_missing_ids() const; + void _internal_add_missing_ids(::PROTOBUF_NAMESPACE_ID::uint32 value); + ::PROTOBUF_NAMESPACE_ID::RepeatedField< ::PROTOBUF_NAMESPACE_ID::uint32 >* + _internal_mutable_missing_ids(); + public: + ::PROTOBUF_NAMESPACE_ID::uint32 missing_ids(int index) const; + void set_missing_ids(int index, ::PROTOBUF_NAMESPACE_ID::uint32 value); + void add_missing_ids(::PROTOBUF_NAMESPACE_ID::uint32 value); + const ::PROTOBUF_NAMESPACE_ID::RepeatedField< ::PROTOBUF_NAMESPACE_ID::uint32 >& + missing_ids() const; + ::PROTOBUF_NAMESPACE_ID::RepeatedField< ::PROTOBUF_NAMESPACE_ID::uint32 >* + mutable_missing_ids(); + + // bytes embeddings_data = 1; + void clear_embeddings_data(); + const std::string& embeddings_data() const; + void set_embeddings_data(const std::string& value); + void set_embeddings_data(std::string&& value); + void set_embeddings_data(const char* value); + void set_embeddings_data(const void* value, size_t size); + std::string* mutable_embeddings_data(); + std::string* release_embeddings_data(); + void set_allocated_embeddings_data(std::string* embeddings_data); + GOOGLE_PROTOBUF_RUNTIME_DEPRECATED("The unsafe_arena_ accessors for" + " string fields are deprecated and will be removed in a" + " future release.") + std::string* unsafe_arena_release_embeddings_data(); + GOOGLE_PROTOBUF_RUNTIME_DEPRECATED("The unsafe_arena_ accessors for" + " string fields are deprecated and will be removed in a" + " future release.") + void unsafe_arena_set_allocated_embeddings_data( + std::string* embeddings_data); + private: + const std::string& _internal_embeddings_data() const; + void _internal_set_embeddings_data(const std::string& value); + std::string* _internal_mutable_embeddings_data(); + public: + + // @@protoc_insertion_point(class_scope:protoembedding.NodeEmbeddingResponse) + private: + class _Internal; + + template friend class ::PROTOBUF_NAMESPACE_ID::Arena::InternalHelper; + typedef void InternalArenaConstructable_; + typedef void DestructorSkippable_; + ::PROTOBUF_NAMESPACE_ID::RepeatedField< ::PROTOBUF_NAMESPACE_ID::int32 > dimensions_; + mutable std::atomic _dimensions_cached_byte_size_; + ::PROTOBUF_NAMESPACE_ID::RepeatedField< ::PROTOBUF_NAMESPACE_ID::uint32 > missing_ids_; + mutable std::atomic _missing_ids_cached_byte_size_; + ::PROTOBUF_NAMESPACE_ID::internal::ArenaStringPtr embeddings_data_; + mutable ::PROTOBUF_NAMESPACE_ID::internal::CachedSize _cached_size_; + friend struct ::TableStruct_embedding_2eproto; +}; +// =================================================================== + + +// =================================================================== + +#ifdef __GNUC__ + #pragma GCC diagnostic push + #pragma GCC diagnostic ignored "-Wstrict-aliasing" +#endif // __GNUC__ +// NodeEmbeddingRequest + +// repeated uint32 node_ids = 1; +inline int NodeEmbeddingRequest::_internal_node_ids_size() const { + return node_ids_.size(); +} +inline int NodeEmbeddingRequest::node_ids_size() const { + return _internal_node_ids_size(); +} +inline void NodeEmbeddingRequest::clear_node_ids() { + node_ids_.Clear(); +} +inline ::PROTOBUF_NAMESPACE_ID::uint32 NodeEmbeddingRequest::_internal_node_ids(int index) const { + return node_ids_.Get(index); +} +inline ::PROTOBUF_NAMESPACE_ID::uint32 NodeEmbeddingRequest::node_ids(int index) const { + // @@protoc_insertion_point(field_get:protoembedding.NodeEmbeddingRequest.node_ids) + return _internal_node_ids(index); +} +inline void NodeEmbeddingRequest::set_node_ids(int index, ::PROTOBUF_NAMESPACE_ID::uint32 value) { + node_ids_.Set(index, value); + // @@protoc_insertion_point(field_set:protoembedding.NodeEmbeddingRequest.node_ids) +} +inline void NodeEmbeddingRequest::_internal_add_node_ids(::PROTOBUF_NAMESPACE_ID::uint32 value) { + node_ids_.Add(value); +} +inline void NodeEmbeddingRequest::add_node_ids(::PROTOBUF_NAMESPACE_ID::uint32 value) { + _internal_add_node_ids(value); + // @@protoc_insertion_point(field_add:protoembedding.NodeEmbeddingRequest.node_ids) +} +inline const ::PROTOBUF_NAMESPACE_ID::RepeatedField< ::PROTOBUF_NAMESPACE_ID::uint32 >& +NodeEmbeddingRequest::_internal_node_ids() const { + return node_ids_; +} +inline const ::PROTOBUF_NAMESPACE_ID::RepeatedField< ::PROTOBUF_NAMESPACE_ID::uint32 >& +NodeEmbeddingRequest::node_ids() const { + // @@protoc_insertion_point(field_list:protoembedding.NodeEmbeddingRequest.node_ids) + return _internal_node_ids(); +} +inline ::PROTOBUF_NAMESPACE_ID::RepeatedField< ::PROTOBUF_NAMESPACE_ID::uint32 >* +NodeEmbeddingRequest::_internal_mutable_node_ids() { + return &node_ids_; +} +inline ::PROTOBUF_NAMESPACE_ID::RepeatedField< ::PROTOBUF_NAMESPACE_ID::uint32 >* +NodeEmbeddingRequest::mutable_node_ids() { + // @@protoc_insertion_point(field_mutable_list:protoembedding.NodeEmbeddingRequest.node_ids) + return _internal_mutable_node_ids(); +} + +// ------------------------------------------------------------------- + +// NodeEmbeddingResponse + +// bytes embeddings_data = 1; +inline void NodeEmbeddingResponse::clear_embeddings_data() { + embeddings_data_.ClearToEmpty(&::PROTOBUF_NAMESPACE_ID::internal::GetEmptyStringAlreadyInited(), GetArena()); +} +inline const std::string& NodeEmbeddingResponse::embeddings_data() const { + // @@protoc_insertion_point(field_get:protoembedding.NodeEmbeddingResponse.embeddings_data) + return _internal_embeddings_data(); +} +inline void NodeEmbeddingResponse::set_embeddings_data(const std::string& value) { + _internal_set_embeddings_data(value); + // @@protoc_insertion_point(field_set:protoembedding.NodeEmbeddingResponse.embeddings_data) +} +inline std::string* NodeEmbeddingResponse::mutable_embeddings_data() { + // @@protoc_insertion_point(field_mutable:protoembedding.NodeEmbeddingResponse.embeddings_data) + return _internal_mutable_embeddings_data(); +} +inline const std::string& NodeEmbeddingResponse::_internal_embeddings_data() const { + return embeddings_data_.Get(); +} +inline void NodeEmbeddingResponse::_internal_set_embeddings_data(const std::string& value) { + + embeddings_data_.Set(&::PROTOBUF_NAMESPACE_ID::internal::GetEmptyStringAlreadyInited(), value, GetArena()); +} +inline void NodeEmbeddingResponse::set_embeddings_data(std::string&& value) { + + embeddings_data_.Set( + &::PROTOBUF_NAMESPACE_ID::internal::GetEmptyStringAlreadyInited(), ::std::move(value), GetArena()); + // @@protoc_insertion_point(field_set_rvalue:protoembedding.NodeEmbeddingResponse.embeddings_data) +} +inline void NodeEmbeddingResponse::set_embeddings_data(const char* value) { + GOOGLE_DCHECK(value != nullptr); + + embeddings_data_.Set(&::PROTOBUF_NAMESPACE_ID::internal::GetEmptyStringAlreadyInited(), ::std::string(value), + GetArena()); + // @@protoc_insertion_point(field_set_char:protoembedding.NodeEmbeddingResponse.embeddings_data) +} +inline void NodeEmbeddingResponse::set_embeddings_data(const void* value, + size_t size) { + + embeddings_data_.Set(&::PROTOBUF_NAMESPACE_ID::internal::GetEmptyStringAlreadyInited(), ::std::string( + reinterpret_cast(value), size), GetArena()); + // @@protoc_insertion_point(field_set_pointer:protoembedding.NodeEmbeddingResponse.embeddings_data) +} +inline std::string* NodeEmbeddingResponse::_internal_mutable_embeddings_data() { + + return embeddings_data_.Mutable(&::PROTOBUF_NAMESPACE_ID::internal::GetEmptyStringAlreadyInited(), GetArena()); +} +inline std::string* NodeEmbeddingResponse::release_embeddings_data() { + // @@protoc_insertion_point(field_release:protoembedding.NodeEmbeddingResponse.embeddings_data) + return embeddings_data_.Release(&::PROTOBUF_NAMESPACE_ID::internal::GetEmptyStringAlreadyInited(), GetArena()); +} +inline void NodeEmbeddingResponse::set_allocated_embeddings_data(std::string* embeddings_data) { + if (embeddings_data != nullptr) { + + } else { + + } + embeddings_data_.SetAllocated(&::PROTOBUF_NAMESPACE_ID::internal::GetEmptyStringAlreadyInited(), embeddings_data, + GetArena()); + // @@protoc_insertion_point(field_set_allocated:protoembedding.NodeEmbeddingResponse.embeddings_data) +} +inline std::string* NodeEmbeddingResponse::unsafe_arena_release_embeddings_data() { + // @@protoc_insertion_point(field_unsafe_arena_release:protoembedding.NodeEmbeddingResponse.embeddings_data) + GOOGLE_DCHECK(GetArena() != nullptr); + + return embeddings_data_.UnsafeArenaRelease(&::PROTOBUF_NAMESPACE_ID::internal::GetEmptyStringAlreadyInited(), + GetArena()); +} +inline void NodeEmbeddingResponse::unsafe_arena_set_allocated_embeddings_data( + std::string* embeddings_data) { + GOOGLE_DCHECK(GetArena() != nullptr); + if (embeddings_data != nullptr) { + + } else { + + } + embeddings_data_.UnsafeArenaSetAllocated(&::PROTOBUF_NAMESPACE_ID::internal::GetEmptyStringAlreadyInited(), + embeddings_data, GetArena()); + // @@protoc_insertion_point(field_unsafe_arena_set_allocated:protoembedding.NodeEmbeddingResponse.embeddings_data) +} + +// repeated int32 dimensions = 2; +inline int NodeEmbeddingResponse::_internal_dimensions_size() const { + return dimensions_.size(); +} +inline int NodeEmbeddingResponse::dimensions_size() const { + return _internal_dimensions_size(); +} +inline void NodeEmbeddingResponse::clear_dimensions() { + dimensions_.Clear(); +} +inline ::PROTOBUF_NAMESPACE_ID::int32 NodeEmbeddingResponse::_internal_dimensions(int index) const { + return dimensions_.Get(index); +} +inline ::PROTOBUF_NAMESPACE_ID::int32 NodeEmbeddingResponse::dimensions(int index) const { + // @@protoc_insertion_point(field_get:protoembedding.NodeEmbeddingResponse.dimensions) + return _internal_dimensions(index); +} +inline void NodeEmbeddingResponse::set_dimensions(int index, ::PROTOBUF_NAMESPACE_ID::int32 value) { + dimensions_.Set(index, value); + // @@protoc_insertion_point(field_set:protoembedding.NodeEmbeddingResponse.dimensions) +} +inline void NodeEmbeddingResponse::_internal_add_dimensions(::PROTOBUF_NAMESPACE_ID::int32 value) { + dimensions_.Add(value); +} +inline void NodeEmbeddingResponse::add_dimensions(::PROTOBUF_NAMESPACE_ID::int32 value) { + _internal_add_dimensions(value); + // @@protoc_insertion_point(field_add:protoembedding.NodeEmbeddingResponse.dimensions) +} +inline const ::PROTOBUF_NAMESPACE_ID::RepeatedField< ::PROTOBUF_NAMESPACE_ID::int32 >& +NodeEmbeddingResponse::_internal_dimensions() const { + return dimensions_; +} +inline const ::PROTOBUF_NAMESPACE_ID::RepeatedField< ::PROTOBUF_NAMESPACE_ID::int32 >& +NodeEmbeddingResponse::dimensions() const { + // @@protoc_insertion_point(field_list:protoembedding.NodeEmbeddingResponse.dimensions) + return _internal_dimensions(); +} +inline ::PROTOBUF_NAMESPACE_ID::RepeatedField< ::PROTOBUF_NAMESPACE_ID::int32 >* +NodeEmbeddingResponse::_internal_mutable_dimensions() { + return &dimensions_; +} +inline ::PROTOBUF_NAMESPACE_ID::RepeatedField< ::PROTOBUF_NAMESPACE_ID::int32 >* +NodeEmbeddingResponse::mutable_dimensions() { + // @@protoc_insertion_point(field_mutable_list:protoembedding.NodeEmbeddingResponse.dimensions) + return _internal_mutable_dimensions(); +} + +// repeated uint32 missing_ids = 3; +inline int NodeEmbeddingResponse::_internal_missing_ids_size() const { + return missing_ids_.size(); +} +inline int NodeEmbeddingResponse::missing_ids_size() const { + return _internal_missing_ids_size(); +} +inline void NodeEmbeddingResponse::clear_missing_ids() { + missing_ids_.Clear(); +} +inline ::PROTOBUF_NAMESPACE_ID::uint32 NodeEmbeddingResponse::_internal_missing_ids(int index) const { + return missing_ids_.Get(index); +} +inline ::PROTOBUF_NAMESPACE_ID::uint32 NodeEmbeddingResponse::missing_ids(int index) const { + // @@protoc_insertion_point(field_get:protoembedding.NodeEmbeddingResponse.missing_ids) + return _internal_missing_ids(index); +} +inline void NodeEmbeddingResponse::set_missing_ids(int index, ::PROTOBUF_NAMESPACE_ID::uint32 value) { + missing_ids_.Set(index, value); + // @@protoc_insertion_point(field_set:protoembedding.NodeEmbeddingResponse.missing_ids) +} +inline void NodeEmbeddingResponse::_internal_add_missing_ids(::PROTOBUF_NAMESPACE_ID::uint32 value) { + missing_ids_.Add(value); +} +inline void NodeEmbeddingResponse::add_missing_ids(::PROTOBUF_NAMESPACE_ID::uint32 value) { + _internal_add_missing_ids(value); + // @@protoc_insertion_point(field_add:protoembedding.NodeEmbeddingResponse.missing_ids) +} +inline const ::PROTOBUF_NAMESPACE_ID::RepeatedField< ::PROTOBUF_NAMESPACE_ID::uint32 >& +NodeEmbeddingResponse::_internal_missing_ids() const { + return missing_ids_; +} +inline const ::PROTOBUF_NAMESPACE_ID::RepeatedField< ::PROTOBUF_NAMESPACE_ID::uint32 >& +NodeEmbeddingResponse::missing_ids() const { + // @@protoc_insertion_point(field_list:protoembedding.NodeEmbeddingResponse.missing_ids) + return _internal_missing_ids(); +} +inline ::PROTOBUF_NAMESPACE_ID::RepeatedField< ::PROTOBUF_NAMESPACE_ID::uint32 >* +NodeEmbeddingResponse::_internal_mutable_missing_ids() { + return &missing_ids_; +} +inline ::PROTOBUF_NAMESPACE_ID::RepeatedField< ::PROTOBUF_NAMESPACE_ID::uint32 >* +NodeEmbeddingResponse::mutable_missing_ids() { + // @@protoc_insertion_point(field_mutable_list:protoembedding.NodeEmbeddingResponse.missing_ids) + return _internal_mutable_missing_ids(); +} + +#ifdef __GNUC__ + #pragma GCC diagnostic pop +#endif // __GNUC__ +// ------------------------------------------------------------------- + + +// @@protoc_insertion_point(namespace_scope) + +} // namespace protoembedding + +// @@protoc_insertion_point(global_scope) + +#include +#endif // GOOGLE_PROTOBUF_INCLUDED_GOOGLE_PROTOBUF_INCLUDED_embedding_2eproto diff --git a/packages/leann-backend-diskann/third_party/DiskANN/include/embedding_compute.h b/packages/leann-backend-diskann/third_party/DiskANN/include/embedding_compute.h new file mode 100644 index 0000000..354c9c5 --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/include/embedding_compute.h @@ -0,0 +1,118 @@ +#pragma once + +#include +#include + +#ifdef PYBIND11_EMBEDDED +#include +#else +#include +#endif +#include +#include + +namespace py = pybind11; + +namespace diskann +{ + +class PYBIND11_EXPORT EmbeddingComputer +{ + public: + static EmbeddingComputer &getInstance() + { + static EmbeddingComputer instance; + return instance; + } + + void initialize(const std::string &model_path) + { + try + { + py::module_ sys = py::module_::import("sys"); + py::module_ os = py::module_::import("os"); + + // Add the directory containing embedd_micro.py to Python path + std::string micro_dir = "micro"; + sys.attr("path").attr("append")(micro_dir); + + // Import our module + py::module_ embedd = py::module_::import("embedd_micro"); + + // Create benchmark config + py::object config = embedd.attr("BenchmarkConfig")(model_path, // model_path + py::list(), // empty batch_sizes + 256, // seq_length + 1, // num_runs + true, // use_fp16 + false, // use_cuda_graphs + false // use_flash_attention + ); + + // Create benchmark instance + benchmark = embedd.attr("Benchmark")(config); + } + catch (const std::exception &e) + { + throw std::runtime_error("Failed to initialize Python embedding computer: " + std::string(e.what())); + } + } + + template + std::vector computeEmbeddings(const std::vector &points, size_t dim, size_t batch_size = 32) + { + try + { + // Convert points to numpy array + std::vector flattened_points; + flattened_points.reserve(points.size() * dim); + + for (const auto &point : points) + { + flattened_points.insert(flattened_points.end(), point, point + dim); + } + + py::array_t points_array({static_cast(points.size()), static_cast(dim)}, + flattened_points.data()); + + // Call compute_embeddings + py::object result = benchmark.attr("compute_embeddings")(points_array, batch_size); + + // Convert result back to C++ + py::array_t np_result = result.cast>(); + py::buffer_info buf = np_result.request(); + float *ptr = static_cast(buf.ptr); + + return std::vector(ptr, ptr + buf.size); + } + catch (const std::exception &e) + { + throw std::runtime_error("Failed to compute embeddings: " + std::string(e.what())); + } + } + + private: + EmbeddingComputer() + { +#ifdef PYBIND11_EMBEDDED + if (!Py_IsInitialized()) + { + py::initialize_interpreter(); + } +#endif + } + + ~EmbeddingComputer() + { +#ifdef PYBIND11_EMBEDDED + if (Py_IsInitialized()) + { + py::finalize_interpreter(); + } +#endif + } + + py::object benchmark; +}; + +} // namespace diskann \ No newline at end of file diff --git a/packages/leann-backend-diskann/third_party/DiskANN/include/exceptions.h b/packages/leann-backend-diskann/third_party/DiskANN/include/exceptions.h new file mode 100644 index 0000000..99e4e73 --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/include/exceptions.h @@ -0,0 +1,17 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT license. + +#pragma once +#include + +namespace diskann +{ + +class NotImplementedException : public std::logic_error +{ + public: + NotImplementedException() : std::logic_error("Function not yet implemented.") + { + } +}; +} // namespace diskann diff --git a/packages/leann-backend-diskann/third_party/DiskANN/include/filter_utils.h b/packages/leann-backend-diskann/third_party/DiskANN/include/filter_utils.h new file mode 100644 index 0000000..55f7aed --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/include/filter_utils.h @@ -0,0 +1,221 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT license. + +#pragma once +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#ifdef __APPLE__ +#else +#include +#endif + +#ifdef _WINDOWS +#include +typedef HANDLE FileHandle; +#else +#include +typedef int FileHandle; +#endif + +#ifndef _WINDOWS +#include +#endif + +#include "cached_io.h" +#include "common_includes.h" +#include "memory_mapper.h" +#include "utils.h" +#include "windows_customizations.h" + +// custom types (for readability) +typedef tsl::robin_set label_set; +typedef std::string path; + +// structs for returning multiple items from a function +typedef std::tuple, tsl::robin_map, tsl::robin_set> + parse_label_file_return_values; +typedef std::tuple>, uint64_t> load_label_index_return_values; + +namespace diskann +{ +template +DISKANN_DLLEXPORT void generate_label_indices(path input_data_path, path final_index_path_prefix, label_set all_labels, + unsigned R, unsigned L, float alpha, unsigned num_threads); + +DISKANN_DLLEXPORT load_label_index_return_values load_label_index(path label_index_path, + uint32_t label_number_of_points); + +template +DISKANN_DLLEXPORT std::tuple>, tsl::robin_set> parse_formatted_label_file( + path label_file); + +DISKANN_DLLEXPORT parse_label_file_return_values parse_label_file(path label_data_path, std::string universal_label); + +template +DISKANN_DLLEXPORT tsl::robin_map> generate_label_specific_vector_files_compat( + path input_data_path, tsl::robin_map labels_to_number_of_points, + std::vector point_ids_to_labels, label_set all_labels); + +/* + * For each label, generates a file containing all vectors that have said label. + * Also copies data from original bin file to new dimension-aligned file. + * + * Utilizes POSIX functions mmap and writev in order to minimize memory + * overhead, so we include an STL version as well. + * + * Each data file is saved under the following format: + * input_data_path + "_" + label + */ +#ifndef _WINDOWS +template +inline tsl::robin_map> generate_label_specific_vector_files( + path input_data_path, tsl::robin_map labels_to_number_of_points, + std::vector point_ids_to_labels, label_set all_labels) +{ +#ifndef _WINDOWS + auto file_writing_timer = std::chrono::high_resolution_clock::now(); + diskann::MemoryMapper input_data(input_data_path); + char *input_start = input_data.getBuf(); + + uint32_t number_of_points, dimension; + std::memcpy(&number_of_points, input_start, sizeof(uint32_t)); + std::memcpy(&dimension, input_start + sizeof(uint32_t), sizeof(uint32_t)); + const uint32_t VECTOR_SIZE = dimension * sizeof(T); + const size_t METADATA = 2 * sizeof(uint32_t); + if (number_of_points != point_ids_to_labels.size()) + { + std::cerr << "Error: number of points in labels file and data file differ." << std::endl; + throw; + } + + tsl::robin_map label_to_iovec_map; + tsl::robin_map label_to_curr_iovec; + tsl::robin_map> label_id_to_orig_id; + + // setup iovec list for each label + for (const auto &lbl : all_labels) + { + iovec *label_iovecs = (iovec *)malloc(labels_to_number_of_points[lbl] * sizeof(iovec)); + if (label_iovecs == nullptr) + { + throw; + } + label_to_iovec_map[lbl] = label_iovecs; + label_to_curr_iovec[lbl] = 0; + label_id_to_orig_id[lbl].reserve(labels_to_number_of_points[lbl]); + } + + // each point added to corresponding per-label iovec list + for (uint32_t point_id = 0; point_id < number_of_points; point_id++) + { + char *curr_point = input_start + METADATA + (VECTOR_SIZE * point_id); + iovec curr_iovec; + + curr_iovec.iov_base = curr_point; + curr_iovec.iov_len = VECTOR_SIZE; + for (const auto &lbl : point_ids_to_labels[point_id]) + { + *(label_to_iovec_map[lbl] + label_to_curr_iovec[lbl]) = curr_iovec; + label_to_curr_iovec[lbl]++; + label_id_to_orig_id[lbl].push_back(point_id); + } + } + + // write each label iovec to resp. file + for (const auto &lbl : all_labels) + { + int label_input_data_fd; + path curr_label_input_data_path(input_data_path + "_" + lbl); + uint32_t curr_num_pts = labels_to_number_of_points[lbl]; + + label_input_data_fd = + open(curr_label_input_data_path.c_str(), O_CREAT | O_WRONLY | O_TRUNC | O_APPEND, (mode_t)0644); + if (label_input_data_fd == -1) + throw; + + // write metadata + uint32_t metadata[2] = {curr_num_pts, dimension}; + int return_value = write(label_input_data_fd, metadata, sizeof(uint32_t) * 2); + if (return_value == -1) + { + throw; + } + + // limits on number of iovec structs per writev means we need to perform + // multiple writevs + size_t i = 0; + while (curr_num_pts > IOV_MAX) + { + return_value = writev(label_input_data_fd, (label_to_iovec_map[lbl] + (IOV_MAX * i)), IOV_MAX); + if (return_value == -1) + { + close(label_input_data_fd); + throw; + } + curr_num_pts -= IOV_MAX; + i += 1; + } + return_value = writev(label_input_data_fd, (label_to_iovec_map[lbl] + (IOV_MAX * i)), curr_num_pts); + if (return_value == -1) + { + close(label_input_data_fd); + throw; + } + + free(label_to_iovec_map[lbl]); + close(label_input_data_fd); + } + + std::chrono::duration file_writing_time = std::chrono::high_resolution_clock::now() - file_writing_timer; + std::cout << "generated " << all_labels.size() << " label-specific vector files for index building in time " + << file_writing_time.count() << "\n" + << std::endl; + + return label_id_to_orig_id; +#endif +} +#endif + +inline std::vector loadTags(const std::string &tags_file, const std::string &base_file) +{ + const bool tags_enabled = tags_file.empty() ? false : true; + std::vector location_to_tag; + if (tags_enabled) + { + size_t tag_file_ndims, tag_file_npts; + std::uint32_t *tag_data; + diskann::load_bin(tags_file, tag_data, tag_file_npts, tag_file_ndims); + if (tag_file_ndims != 1) + { + diskann::cerr << "tags file error" << std::endl; + throw diskann::ANNException("tag file error", -1, __FUNCSIG__, __FILE__, __LINE__); + } + + // check if the point count match + size_t base_file_npts, base_file_ndims; + diskann::get_bin_metadata(base_file, base_file_npts, base_file_ndims); + if (base_file_npts != tag_file_npts) + { + diskann::cerr << "point num in tags file mismatch" << std::endl; + throw diskann::ANNException("point num in tags file mismatch", -1, __FUNCSIG__, __FILE__, __LINE__); + } + + location_to_tag.assign(tag_data, tag_data + tag_file_npts); + delete[] tag_data; + } + return location_to_tag; +} + +} // namespace diskann diff --git a/packages/leann-backend-diskann/third_party/DiskANN/include/in_mem_data_store.h b/packages/leann-backend-diskann/third_party/DiskANN/include/in_mem_data_store.h new file mode 100644 index 0000000..0a0a617 --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/include/in_mem_data_store.h @@ -0,0 +1,89 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT license. +#pragma once + +#include +#include + +#include "tsl/robin_map.h" +#include "tsl/robin_set.h" +#include "tsl/sparse_map.h" +// #include "boost/dynamic_bitset.hpp" + +#include "abstract_data_store.h" + +#include "distance.h" +#include "natural_number_map.h" +#include "natural_number_set.h" +#include "aligned_file_reader.h" + +namespace diskann +{ +template class InMemDataStore : public AbstractDataStore +{ + public: + InMemDataStore(const location_t capacity, const size_t dim, std::unique_ptr> distance_fn); + virtual ~InMemDataStore(); + + virtual location_t load(const std::string &filename) override; + virtual size_t save(const std::string &filename, const location_t num_points) override; + + virtual size_t get_aligned_dim() const override; + + // Populate internal data from unaligned data while doing alignment and any + // normalization that is required. + virtual void populate_data(const data_t *vectors, const location_t num_pts) override; + virtual void populate_data(const std::string &filename, const size_t offset) override; + + virtual void extract_data_to_bin(const std::string &filename, const location_t num_pts) override; + + virtual void get_vector(const location_t i, data_t *target) const override; + virtual void set_vector(const location_t i, const data_t *const vector) override; + virtual void prefetch_vector(const location_t loc) override; + + virtual void move_vectors(const location_t old_location_start, const location_t new_location_start, + const location_t num_points) override; + virtual void copy_vectors(const location_t from_loc, const location_t to_loc, const location_t num_points) override; + + virtual void preprocess_query(const data_t *query, AbstractScratch *query_scratch) const override; + + virtual float get_distance(const data_t *preprocessed_query, const location_t loc) const override; + virtual float get_distance(const location_t loc1, const location_t loc2) const override; + + virtual void get_distance(const data_t *preprocessed_query, const location_t *locations, + const uint32_t location_count, float *distances, + AbstractScratch *scratch) const override; + virtual void get_distance(const data_t *preprocessed_query, const std::vector &ids, + std::vector &distances, AbstractScratch *scratch_space) const override; + + virtual location_t calculate_medoid() const override; + + virtual Distance *get_dist_fn() const override; + + virtual size_t get_alignment_factor() const override; + + protected: + virtual location_t expand(const location_t new_size) override; + virtual location_t shrink(const location_t new_size) override; + + virtual location_t load_impl(const std::string &filename); +#ifdef EXEC_ENV_OLS + virtual location_t load_impl(AlignedFileReader &reader); +#endif + + private: + data_t *_data = nullptr; + + size_t _aligned_dim; + + // It may seem weird to put distance metric along with the data store class, + // but this gives us perf benefits as the datastore can do distance + // computations during search and compute norms of vectors internally without + // have to copy data back and forth. + std::unique_ptr> _distance_fn; + + // in case we need to save vector norms for optimization + std::shared_ptr _pre_computed_norms; +}; + +} // namespace diskann \ No newline at end of file diff --git a/packages/leann-backend-diskann/third_party/DiskANN/include/in_mem_graph_store.h b/packages/leann-backend-diskann/third_party/DiskANN/include/in_mem_graph_store.h new file mode 100644 index 0000000..d0206a7 --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/include/in_mem_graph_store.h @@ -0,0 +1,51 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT license. + +#pragma once + +#include "abstract_graph_store.h" + +namespace diskann +{ + +class InMemGraphStore : public AbstractGraphStore +{ + public: + InMemGraphStore(const size_t total_pts, const size_t reserve_graph_degree); + + // returns tuple of + virtual std::tuple load(const std::string &index_path_prefix, + const size_t num_points) override; + virtual int store(const std::string &index_path_prefix, const size_t num_points, const size_t num_frozen_points, + const uint32_t start) override; + + virtual const std::vector &get_neighbours(const location_t i) const override; + virtual void add_neighbour(const location_t i, location_t neighbour_id) override; + virtual void clear_neighbours(const location_t i) override; + virtual void swap_neighbours(const location_t a, location_t b) override; + + virtual void set_neighbours(const location_t i, std::vector &neighbors) override; + + virtual size_t resize_graph(const size_t new_size) override; + virtual void clear_graph() override; + + virtual size_t get_max_range_of_graph() override; + virtual uint32_t get_max_observed_degree() override; + + protected: + virtual std::tuple load_impl(const std::string &filename, size_t expected_num_points); +#ifdef EXEC_ENV_OLS + virtual std::tuple load_impl(AlignedFileReader &reader, size_t expected_num_points); +#endif + + int save_graph(const std::string &index_path_prefix, const size_t active_points, const size_t num_frozen_points, + const uint32_t start); + + private: + size_t _max_range_of_graph = 0; + uint32_t _max_observed_degree = 0; + + std::vector> _graph; +}; + +} // namespace diskann diff --git a/packages/leann-backend-diskann/third_party/DiskANN/include/index.h b/packages/leann-backend-diskann/third_party/DiskANN/include/index.h new file mode 100644 index 0000000..c4303a1 --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/include/index.h @@ -0,0 +1,452 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT license. + +#pragma once + +#include "common_includes.h" + +#ifdef EXEC_ENV_OLS +#include "aligned_file_reader.h" +#endif + +#include "distance.h" +#include "locking.h" +#include "natural_number_map.h" +#include "natural_number_set.h" +#include "neighbor.h" +#include "parameters.h" +#include "utils.h" +#include "windows_customizations.h" +#include "scratch.h" +#include "in_mem_data_store.h" +#include "in_mem_graph_store.h" +#include "abstract_index.h" + +#include "quantized_distance.h" +#include "pq_data_store.h" + +#define OVERHEAD_FACTOR 1.1 +#define EXPAND_IF_FULL 0 +#define DEFAULT_MAXC 750 + +namespace diskann +{ + +inline double estimate_ram_usage(size_t size, uint32_t dim, uint32_t datasize, uint32_t degree) +{ + double size_of_data = ((double)size) * ROUND_UP(dim, 8) * datasize; + double size_of_graph = ((double)size) * degree * sizeof(uint32_t) * defaults::GRAPH_SLACK_FACTOR; + double size_of_locks = ((double)size) * sizeof(non_recursive_mutex); + double size_of_outer_vector = ((double)size) * sizeof(ptrdiff_t); + + return OVERHEAD_FACTOR * (size_of_data + size_of_graph + size_of_locks + size_of_outer_vector); +} + +template class Index : public AbstractIndex +{ + /************************************************************************** + * + * Public functions acquire one or more of _update_lock, _consolidate_lock, + * _tag_lock, _delete_lock before calling protected functions which DO NOT + * acquire these locks. They might acquire locks on _locks[i] + * + **************************************************************************/ + + public: + // Constructor for Bulk operations and for creating the index object solely + // for loading a prexisting index. + DISKANN_DLLEXPORT Index(const IndexConfig &index_config, std::shared_ptr> data_store, + std::unique_ptr graph_store, + std::shared_ptr> pq_data_store = nullptr); + + // Constructor for incremental index + DISKANN_DLLEXPORT Index(Metric m, const size_t dim, const size_t max_points, + const std::shared_ptr index_parameters, + const std::shared_ptr index_search_params, + const size_t num_frozen_pts = 0, const bool dynamic_index = false, + const bool enable_tags = false, const bool concurrent_consolidate = false, + const bool pq_dist_build = false, const size_t num_pq_chunks = 0, + const bool use_opq = false, const bool filtered_index = false); + + DISKANN_DLLEXPORT ~Index(); + + // Saves graph, data, metadata and associated tags. + DISKANN_DLLEXPORT void save(const char *filename, bool compact_before_save = false) override; + + // Load functions +#ifdef EXEC_ENV_OLS + DISKANN_DLLEXPORT void load(AlignedFileReader &reader, uint32_t num_threads, uint32_t search_l); +#else + // Reads the number of frozen points from graph's metadata file section. + DISKANN_DLLEXPORT static size_t get_graph_num_frozen_points(const std::string &graph_file); + + DISKANN_DLLEXPORT void load(const char *index_file, uint32_t num_threads, uint32_t search_l) override; +#endif + + // get some private variables + DISKANN_DLLEXPORT size_t get_num_points(); + DISKANN_DLLEXPORT size_t get_max_points(); + + DISKANN_DLLEXPORT bool detect_common_filters(uint32_t point_id, bool search_invocation, + const std::vector &incoming_labels); + + // Batch build from a file. Optionally pass tags vector. + DISKANN_DLLEXPORT void build(const char *filename, const size_t num_points_to_load, + const std::vector &tags = std::vector()); + + // Batch build from a file. Optionally pass tags file. + DISKANN_DLLEXPORT void build(const char *filename, const size_t num_points_to_load, const char *tag_filename); + + // Batch build from a data array, which must pad vectors to aligned_dim + DISKANN_DLLEXPORT void build(const T *data, const size_t num_points_to_load, const std::vector &tags); + + // Based on filter params builds a filtered or unfiltered index + DISKANN_DLLEXPORT void build(const std::string &data_file, const size_t num_points_to_load, + IndexFilterParams &filter_params) override; + + // Filtered Support + DISKANN_DLLEXPORT void build_filtered_index(const char *filename, const std::string &label_file, + const size_t num_points_to_load, + const std::vector &tags = std::vector()); + + DISKANN_DLLEXPORT void set_universal_label(const LabelT &label); + + // Get converted integer label from string to int map (_label_map) + DISKANN_DLLEXPORT LabelT get_converted_label(const std::string &raw_label); + + // Set starting point of an index before inserting any points incrementally. + // The data count should be equal to _num_frozen_pts * _aligned_dim. + DISKANN_DLLEXPORT void set_start_points(const T *data, size_t data_count); + // Set starting points to random points on a sphere of certain radius. + // A fixed random seed can be specified for scenarios where it's important + // to have higher consistency between index builds. + DISKANN_DLLEXPORT void set_start_points_at_random(T radius, uint32_t random_seed = 0); + + // For FastL2 search on a static index, we interleave the data with graph + DISKANN_DLLEXPORT void optimize_index_layout() override; + + // For FastL2 search on optimized layout + DISKANN_DLLEXPORT void search_with_optimized_layout(const T *query, size_t K, size_t L, uint32_t *indices); + + // Added search overload that takes L as parameter, so that we + // can customize L on a per-query basis without tampering with "Parameters" + template + DISKANN_DLLEXPORT std::pair search(const T *query, const size_t K, const uint32_t L, + IDType *indices, float *distances = nullptr); + + // Initialize space for res_vectors before calling. + DISKANN_DLLEXPORT size_t search_with_tags(const T *query, const uint64_t K, const uint32_t L, TagT *tags, + float *distances, std::vector &res_vectors, bool use_filters = false, + const std::string filter_label = ""); + + // Filter support search + template + DISKANN_DLLEXPORT std::pair search_with_filters(const T *query, const LabelT &filter_label, + const size_t K, const uint32_t L, + IndexType *indices, float *distances); + + // Will fail if tag already in the index or if tag=0. + DISKANN_DLLEXPORT int insert_point(const T *point, const TagT tag); + + // Will fail if tag already in the index or if tag=0. + DISKANN_DLLEXPORT int insert_point(const T *point, const TagT tag, const std::vector &label); + + // call this before issuing deletions to sets relevant flags + DISKANN_DLLEXPORT int enable_delete(); + + // Record deleted point now and restructure graph later. Return -1 if tag + // not found, 0 if OK. + DISKANN_DLLEXPORT int lazy_delete(const TagT &tag); + + // Record deleted points now and restructure graph later. Add to failed_tags + // if tag not found. + DISKANN_DLLEXPORT void lazy_delete(const std::vector &tags, std::vector &failed_tags); + + // Call after a series of lazy deletions + // Returns number of live points left after consolidation + // If _conc_consolidates is set in the ctor, then this call can be invoked + // alongside inserts and lazy deletes, else it acquires _update_lock + DISKANN_DLLEXPORT consolidation_report consolidate_deletes(const IndexWriteParameters ¶meters) override; + + DISKANN_DLLEXPORT void prune_all_neighbors(const uint32_t max_degree, const uint32_t max_occlusion, + const float alpha); + + DISKANN_DLLEXPORT bool is_index_saved(); + + // repositions frozen points to the end of _data - if they have been moved + // during deletion + DISKANN_DLLEXPORT void reposition_frozen_point_to_end(); + DISKANN_DLLEXPORT void reposition_points(uint32_t old_location_start, uint32_t new_location_start, + uint32_t num_locations); + + // DISKANN_DLLEXPORT void save_index_as_one_file(bool flag); + + DISKANN_DLLEXPORT void get_active_tags(tsl::robin_set &active_tags); + + // memory should be allocated for vec before calling this function + DISKANN_DLLEXPORT int get_vector_by_tag(TagT &tag, T *vec); + + DISKANN_DLLEXPORT void print_status(); + + DISKANN_DLLEXPORT void count_nodes_at_bfs_levels(); + + // This variable MUST be updated if the number of entries in the metadata + // change. + DISKANN_DLLEXPORT static const int METADATA_ROWS = 5; + + DISKANN_DLLEXPORT void get_degree_stats(size_t &max_deg, size_t &min_deg, size_t &avg_deg, size_t &cnt_deg); + + DISKANN_DLLEXPORT void dump_degree_stats(std::string filename); + + // ******************************** + // + // Internals of the library + // + // ******************************** + + protected: + // overload of abstract index virtual methods + virtual void _build(const DataType &data, const size_t num_points_to_load, TagVector &tags) override; + + virtual std::pair _search(const DataType &query, const size_t K, const uint32_t L, + std::any &indices, float *distances = nullptr) override; + virtual std::pair _search_with_filters(const DataType &query, + const std::string &filter_label_raw, const size_t K, + const uint32_t L, std::any &indices, + float *distances) override; + + virtual int _insert_point(const DataType &data_point, const TagType tag) override; + virtual int _insert_point(const DataType &data_point, const TagType tag, Labelvector &labels) override; + + virtual int _lazy_delete(const TagType &tag) override; + + virtual void _lazy_delete(TagVector &tags, TagVector &failed_tags) override; + + virtual void _get_active_tags(TagRobinSet &active_tags) override; + + virtual void _set_start_points_at_random(DataType radius, uint32_t random_seed = 0) override; + + virtual int _get_vector_by_tag(TagType &tag, DataType &vec) override; + + virtual void _search_with_optimized_layout(const DataType &query, size_t K, size_t L, uint32_t *indices) override; + + virtual size_t _search_with_tags(const DataType &query, const uint64_t K, const uint32_t L, const TagType &tags, + float *distances, DataVector &res_vectors, bool use_filters = false, + const std::string filter_label = "") override; + + virtual void _set_universal_label(const LabelType universal_label) override; + + // No copy/assign. + Index(const Index &) = delete; + Index &operator=(const Index &) = delete; + + // Use after _data and _nd have been populated + // Acquire exclusive _update_lock before calling + void build_with_data_populated(const std::vector &tags); + + // generates 1 frozen point that will never be deleted from the graph + // This is not visible to the user + void generate_frozen_point(); + + // determines navigating node of the graph by calculating medoid of datafopt + uint32_t calculate_entry_point(); + + void parse_label_file(const std::string &label_file, size_t &num_pts_labels); + + std::unordered_map load_label_map(const std::string &map_file); + + // Returns the locations of start point and frozen points suitable for use + // with iterate_to_fixed_point. + std::vector get_init_ids(); + + // The query to use is placed in scratch->aligned_query + std::pair iterate_to_fixed_point(InMemQueryScratch *scratch, const uint32_t Lindex, + const std::vector &init_ids, bool use_filter, + const std::vector &filters, bool search_invocation); + + void search_for_point_and_prune(int location, uint32_t Lindex, std::vector &pruned_list, + InMemQueryScratch *scratch, bool use_filter = false, + uint32_t filteredLindex = 0); + + void prune_neighbors(const uint32_t location, std::vector &pool, std::vector &pruned_list, + InMemQueryScratch *scratch); + + void prune_neighbors(const uint32_t location, std::vector &pool, const uint32_t range, + const uint32_t max_candidate_size, const float alpha, std::vector &pruned_list, + InMemQueryScratch *scratch); + + // Prunes candidates in @pool to a shorter list @result + // @pool must be sorted before calling + void occlude_list(const uint32_t location, std::vector &pool, const float alpha, const uint32_t degree, + const uint32_t maxc, std::vector &result, InMemQueryScratch *scratch, + const tsl::robin_set *const delete_set_ptr = nullptr); + + // add reverse links from all the visited nodes to node n. + void inter_insert(uint32_t n, std::vector &pruned_list, const uint32_t range, + InMemQueryScratch *scratch); + + void inter_insert(uint32_t n, std::vector &pruned_list, InMemQueryScratch *scratch); + + // Acquire exclusive _update_lock before calling + void link(); + + // Acquire exclusive _tag_lock and _delete_lock before calling + int reserve_location(); + + // Acquire exclusive _tag_lock before calling + size_t release_location(int location); + size_t release_locations(const tsl::robin_set &locations); + + // Resize the index when no slots are left for insertion. + // Acquire exclusive _update_lock and _tag_lock before calling. + void resize(size_t new_max_points); + + // Acquire unique lock on _update_lock, _consolidate_lock, _tag_lock + // and _delete_lock before calling these functions. + // Renumber nodes, update tag and location maps and compact the + // graph, mode = _consolidated_order in case of lazy deletion and + // _compacted_order in case of eager deletion + DISKANN_DLLEXPORT void compact_data(); + DISKANN_DLLEXPORT void compact_frozen_point(); + + // Remove deleted nodes from adjacency list of node loc + // Replace removed neighbors with second order neighbors. + // Also acquires _locks[i] for i = loc and out-neighbors of loc. + void process_delete(const tsl::robin_set &old_delete_set, size_t loc, const uint32_t range, + const uint32_t maxc, const float alpha, InMemQueryScratch *scratch); + + void initialize_query_scratch(uint32_t num_threads, uint32_t search_l, uint32_t indexing_l, uint32_t r, + uint32_t maxc, size_t dim); + + // Do not call without acquiring appropriate locks + // call public member functions save and load to invoke these. + DISKANN_DLLEXPORT size_t save_graph(std::string filename); + DISKANN_DLLEXPORT size_t save_data(std::string filename); + DISKANN_DLLEXPORT size_t save_tags(std::string filename); + DISKANN_DLLEXPORT size_t save_delete_list(const std::string &filename); +#ifdef EXEC_ENV_OLS + DISKANN_DLLEXPORT size_t load_graph(AlignedFileReader &reader, size_t expected_num_points); + DISKANN_DLLEXPORT size_t load_data(AlignedFileReader &reader); + DISKANN_DLLEXPORT size_t load_tags(AlignedFileReader &reader); + DISKANN_DLLEXPORT size_t load_delete_set(AlignedFileReader &reader); +#else + DISKANN_DLLEXPORT size_t load_graph(const std::string filename, size_t expected_num_points); + DISKANN_DLLEXPORT size_t load_data(std::string filename0); + DISKANN_DLLEXPORT size_t load_tags(const std::string tag_file_name); + DISKANN_DLLEXPORT size_t load_delete_set(const std::string &filename); +#endif + + private: + // Distance functions + Metric _dist_metric = diskann::L2; + + // Data + std::shared_ptr> _data_store; + + // Graph related data structures + std::unique_ptr _graph_store; + + char *_opt_graph = nullptr; + + // Dimensions + size_t _dim = 0; + size_t _nd = 0; // number of active points i.e. existing in the graph + size_t _max_points = 0; // total number of points in given data set + + // _num_frozen_pts is the number of points which are used as initial + // candidates when iterating to closest point(s). These are not visible + // externally and won't be returned by search. At least 1 frozen point is + // needed for a dynamic index. The frozen points have consecutive locations. + // See also _start below. + size_t _num_frozen_pts = 0; + size_t _frozen_pts_used = 0; + size_t _node_size; + size_t _data_len; + size_t _neighbor_len; + + // Start point of the search. When _num_frozen_pts is greater than zero, + // this is the location of the first frozen point. Otherwise, this is a + // location of one of the points in index. + uint32_t _start = 0; + + bool _has_built = false; + bool _saturate_graph = false; + bool _save_as_one_file = false; // plan to support in next version + bool _dynamic_index = false; + bool _enable_tags = false; + bool _normalize_vecs = false; // Using normalied L2 for cosine. + bool _deletes_enabled = false; + + // Filter Support + + bool _filtered_index = false; + // Location to label is only updated during insert_point(), all other reads are protected by + // default as a location can only be released at end of consolidate deletes + std::vector> _location_to_labels; + tsl::robin_set _labels; + std::string _labels_file; + std::unordered_map _label_to_start_id; + std::unordered_map _medoid_counts; + + bool _use_universal_label = false; + LabelT _universal_label = 0; + uint32_t _filterIndexingQueueSize; + std::unordered_map _label_map; + + // Indexing parameters + uint32_t _indexingQueueSize; + uint32_t _indexingRange; + uint32_t _indexingMaxC; + float _indexingAlpha; + uint32_t _indexingThreads; + + // Query scratch data structures + ConcurrentQueue *> _query_scratch; + + // Flags for PQ based distance calculation + bool _pq_dist = false; + bool _use_opq = false; + size_t _num_pq_chunks = 0; + // REFACTOR + // uint8_t *_pq_data = nullptr; + std::shared_ptr> _pq_distance_fn = nullptr; + std::shared_ptr> _pq_data_store = nullptr; + bool _pq_generated = false; + FixedChunkPQTable _pq_table; + + // + // Data structures, locks and flags for dynamic indexing and tags + // + + // lazy_delete removes entry from _location_to_tag and _tag_to_location. If + // _location_to_tag does not resolve a location, infer that it was deleted. + tsl::sparse_map _tag_to_location; + natural_number_map _location_to_tag; + + // _empty_slots has unallocated slots and those freed by consolidate_delete. + // _delete_set has locations marked deleted by lazy_delete. Will not be + // immediately available for insert. consolidate_delete will release these + // slots to _empty_slots. + natural_number_set _empty_slots; + std::unique_ptr> _delete_set; + + bool _data_compacted = true; // true if data has been compacted + bool _is_saved = false; // Checking if the index is already saved. + bool _conc_consolidate = false; // use _lock while searching + + // Acquire locks in the order below when acquiring multiple locks + std::shared_timed_mutex // RW mutex between save/load (exclusive lock) and + _update_lock; // search/inserts/deletes/consolidate (shared lock) + std::shared_timed_mutex // Ensure only one consolidate or compact_data is + _consolidate_lock; // ever active + std::shared_timed_mutex // RW lock for _tag_to_location, + _tag_lock; // _location_to_tag, _empty_slots, _nd, _max_points, _label_to_start_id + std::shared_timed_mutex // RW Lock on _delete_set and _data_compacted + _delete_lock; // variable + + // Per node lock, cardinality=_max_points + _num_frozen_points + std::vector _locks; + + static const float INDEX_GROWTH_FACTOR; +}; +} // namespace diskann diff --git a/packages/leann-backend-diskann/third_party/DiskANN/include/index_build_params.h b/packages/leann-backend-diskann/third_party/DiskANN/include/index_build_params.h new file mode 100644 index 0000000..d4f4548 --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/include/index_build_params.h @@ -0,0 +1,73 @@ +#pragma once + +#include "common_includes.h" +#include "parameters.h" + +namespace diskann +{ +struct IndexFilterParams +{ + public: + std::string save_path_prefix; + std::string label_file; + std::string tags_file; + std::string universal_label; + uint32_t filter_threshold = 0; + + private: + IndexFilterParams(const std::string &save_path_prefix, const std::string &label_file, + const std::string &universal_label, uint32_t filter_threshold) + : save_path_prefix(save_path_prefix), label_file(label_file), universal_label(universal_label), + filter_threshold(filter_threshold) + { + } + + friend class IndexFilterParamsBuilder; +}; +class IndexFilterParamsBuilder +{ + public: + IndexFilterParamsBuilder() = default; + + IndexFilterParamsBuilder &with_save_path_prefix(const std::string &save_path_prefix) + { + if (save_path_prefix.empty() || save_path_prefix == "") + throw ANNException("Error: save_path_prefix can't be empty", -1); + this->_save_path_prefix = save_path_prefix; + return *this; + } + + IndexFilterParamsBuilder &with_label_file(const std::string &label_file) + { + this->_label_file = label_file; + return *this; + } + + IndexFilterParamsBuilder &with_universal_label(const std::string &univeral_label) + { + this->_universal_label = univeral_label; + return *this; + } + + IndexFilterParamsBuilder &with_filter_threshold(const std::uint32_t &filter_threshold) + { + this->_filter_threshold = filter_threshold; + return *this; + } + + IndexFilterParams build() + { + return IndexFilterParams(_save_path_prefix, _label_file, _universal_label, _filter_threshold); + } + + IndexFilterParamsBuilder(const IndexFilterParamsBuilder &) = delete; + IndexFilterParamsBuilder &operator=(const IndexFilterParamsBuilder &) = delete; + + private: + std::string _save_path_prefix; + std::string _label_file; + std::string _tags_file; + std::string _universal_label; + uint32_t _filter_threshold = 0; +}; +} // namespace diskann diff --git a/packages/leann-backend-diskann/third_party/DiskANN/include/index_config.h b/packages/leann-backend-diskann/third_party/DiskANN/include/index_config.h new file mode 100644 index 0000000..a8e64d0 --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/include/index_config.h @@ -0,0 +1,256 @@ +#pragma once + +#include "common_includes.h" +#include "parameters.h" + +namespace diskann +{ +enum class DataStoreStrategy +{ + MEMORY +}; + +enum class GraphStoreStrategy +{ + MEMORY +}; + +struct IndexConfig +{ + DataStoreStrategy data_strategy; + GraphStoreStrategy graph_strategy; + + Metric metric; + size_t dimension; + size_t max_points; + + bool dynamic_index; + bool enable_tags; + bool pq_dist_build; + bool concurrent_consolidate; + bool use_opq; + bool filtered_index; + + size_t num_pq_chunks; + size_t num_frozen_pts; + + std::string label_type; + std::string tag_type; + std::string data_type; + + // Params for building index + std::shared_ptr index_write_params; + // Params for searching index + std::shared_ptr index_search_params; + + private: + IndexConfig(DataStoreStrategy data_strategy, GraphStoreStrategy graph_strategy, Metric metric, size_t dimension, + size_t max_points, size_t num_pq_chunks, size_t num_frozen_points, bool dynamic_index, bool enable_tags, + bool pq_dist_build, bool concurrent_consolidate, bool use_opq, bool filtered_index, + std::string &data_type, const std::string &tag_type, const std::string &label_type, + std::shared_ptr index_write_params, + std::shared_ptr index_search_params) + : data_strategy(data_strategy), graph_strategy(graph_strategy), metric(metric), dimension(dimension), + max_points(max_points), dynamic_index(dynamic_index), enable_tags(enable_tags), pq_dist_build(pq_dist_build), + concurrent_consolidate(concurrent_consolidate), use_opq(use_opq), filtered_index(filtered_index), + num_pq_chunks(num_pq_chunks), num_frozen_pts(num_frozen_points), label_type(label_type), tag_type(tag_type), + data_type(data_type), index_write_params(index_write_params), index_search_params(index_search_params) + { + } + + friend class IndexConfigBuilder; +}; + +class IndexConfigBuilder +{ + public: + IndexConfigBuilder() = default; + + IndexConfigBuilder &with_metric(Metric m) + { + this->_metric = m; + return *this; + } + + IndexConfigBuilder &with_graph_load_store_strategy(GraphStoreStrategy graph_strategy) + { + this->_graph_strategy = graph_strategy; + return *this; + } + + IndexConfigBuilder &with_data_load_store_strategy(DataStoreStrategy data_strategy) + { + this->_data_strategy = data_strategy; + return *this; + } + + IndexConfigBuilder &with_dimension(size_t dimension) + { + this->_dimension = dimension; + return *this; + } + + IndexConfigBuilder &with_max_points(size_t max_points) + { + this->_max_points = max_points; + return *this; + } + + IndexConfigBuilder &is_dynamic_index(bool dynamic_index) + { + this->_dynamic_index = dynamic_index; + return *this; + } + + IndexConfigBuilder &is_enable_tags(bool enable_tags) + { + this->_enable_tags = enable_tags; + return *this; + } + + IndexConfigBuilder &is_pq_dist_build(bool pq_dist_build) + { + this->_pq_dist_build = pq_dist_build; + return *this; + } + + IndexConfigBuilder &is_concurrent_consolidate(bool concurrent_consolidate) + { + this->_concurrent_consolidate = concurrent_consolidate; + return *this; + } + + IndexConfigBuilder &is_use_opq(bool use_opq) + { + this->_use_opq = use_opq; + return *this; + } + + IndexConfigBuilder &is_filtered(bool is_filtered) + { + this->_filtered_index = is_filtered; + return *this; + } + + IndexConfigBuilder &with_num_pq_chunks(size_t num_pq_chunks) + { + this->_num_pq_chunks = num_pq_chunks; + return *this; + } + + IndexConfigBuilder &with_num_frozen_pts(size_t num_frozen_pts) + { + this->_num_frozen_pts = num_frozen_pts; + return *this; + } + + IndexConfigBuilder &with_label_type(const std::string &label_type) + { + this->_label_type = label_type; + return *this; + } + + IndexConfigBuilder &with_tag_type(const std::string &tag_type) + { + this->_tag_type = tag_type; + return *this; + } + + IndexConfigBuilder &with_data_type(const std::string &data_type) + { + this->_data_type = data_type; + return *this; + } + + IndexConfigBuilder &with_index_write_params(IndexWriteParameters &index_write_params) + { + this->_index_write_params = std::make_shared(index_write_params); + return *this; + } + + IndexConfigBuilder &with_index_write_params(std::shared_ptr index_write_params_ptr) + { + if (index_write_params_ptr == nullptr) + { + diskann::cout << "Passed, empty build_params while creating index config" << std::endl; + return *this; + } + this->_index_write_params = index_write_params_ptr; + return *this; + } + + IndexConfigBuilder &with_index_search_params(IndexSearchParams &search_params) + { + this->_index_search_params = std::make_shared(search_params); + return *this; + } + + IndexConfigBuilder &with_index_search_params(std::shared_ptr search_params_ptr) + { + if (search_params_ptr == nullptr) + { + diskann::cout << "Passed, empty search_params while creating index config" << std::endl; + return *this; + } + this->_index_search_params = search_params_ptr; + return *this; + } + + IndexConfig build() + { + if (_data_type == "" || _data_type.empty()) + throw ANNException("Error: data_type can not be empty", -1); + + if (_dynamic_index && _num_frozen_pts == 0) + { + _num_frozen_pts = 1; + } + + if (_dynamic_index) + { + if (_index_search_params != nullptr && _index_search_params->initial_search_list_size == 0) + throw ANNException("Error: please pass initial_search_list_size for building dynamic index.", -1); + } + + // sanity check + if (_dynamic_index && _num_frozen_pts == 0) + { + diskann::cout << "_num_frozen_pts passed as 0 for dynamic_index. Setting it to 1 for safety." << std::endl; + _num_frozen_pts = 1; + } + + return IndexConfig(_data_strategy, _graph_strategy, _metric, _dimension, _max_points, _num_pq_chunks, + _num_frozen_pts, _dynamic_index, _enable_tags, _pq_dist_build, _concurrent_consolidate, + _use_opq, _filtered_index, _data_type, _tag_type, _label_type, _index_write_params, + _index_search_params); + } + + IndexConfigBuilder(const IndexConfigBuilder &) = delete; + IndexConfigBuilder &operator=(const IndexConfigBuilder &) = delete; + + private: + DataStoreStrategy _data_strategy; + GraphStoreStrategy _graph_strategy; + + Metric _metric; + size_t _dimension; + size_t _max_points; + + bool _dynamic_index = false; + bool _enable_tags = false; + bool _pq_dist_build = false; + bool _concurrent_consolidate = false; + bool _use_opq = false; + bool _filtered_index{defaults::HAS_LABELS}; + + size_t _num_pq_chunks = 0; + size_t _num_frozen_pts{defaults::NUM_FROZEN_POINTS_STATIC}; + + std::string _label_type{"uint32"}; + std::string _tag_type{"uint32"}; + std::string _data_type; + + std::shared_ptr _index_write_params; + std::shared_ptr _index_search_params; +}; +} // namespace diskann diff --git a/packages/leann-backend-diskann/third_party/DiskANN/include/index_factory.h b/packages/leann-backend-diskann/third_party/DiskANN/include/index_factory.h new file mode 100644 index 0000000..76fb0b9 --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/include/index_factory.h @@ -0,0 +1,51 @@ +#pragma once + +#include "index.h" +#include "abstract_graph_store.h" +#include "in_mem_graph_store.h" +#include "pq_data_store.h" + +namespace diskann +{ +class IndexFactory +{ + public: + DISKANN_DLLEXPORT explicit IndexFactory(const IndexConfig &config); + DISKANN_DLLEXPORT std::unique_ptr create_instance(); + + DISKANN_DLLEXPORT static std::unique_ptr construct_graphstore( + const GraphStoreStrategy stratagy, const size_t size, const size_t reserve_graph_degree); + + template + DISKANN_DLLEXPORT static std::shared_ptr> construct_datastore(DataStoreStrategy stratagy, + size_t num_points, + size_t dimension, Metric m); + // For now PQDataStore incorporates within itself all variants of quantization that we support. In the + // future it may be necessary to introduce an AbstractPQDataStore class to spearate various quantization + // flavours. + template + DISKANN_DLLEXPORT static std::shared_ptr> construct_pq_datastore(DataStoreStrategy strategy, + size_t num_points, size_t dimension, + Metric m, size_t num_pq_chunks, + bool use_opq); + template static Distance *construct_inmem_distance_fn(Metric m); + + private: + void check_config(); + + template + std::unique_ptr create_instance(); + + std::unique_ptr create_instance(const std::string &data_type, const std::string &tag_type, + const std::string &label_type); + + template + std::unique_ptr create_instance(const std::string &tag_type, const std::string &label_type); + + template + std::unique_ptr create_instance(const std::string &label_type); + + std::unique_ptr _config; +}; + +} // namespace diskann diff --git a/packages/leann-backend-diskann/third_party/DiskANN/include/linux_aligned_file_reader.h b/packages/leann-backend-diskann/third_party/DiskANN/include/linux_aligned_file_reader.h new file mode 100644 index 0000000..d1d1e74 --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/include/linux_aligned_file_reader.h @@ -0,0 +1,41 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT license. + +#pragma once +#ifndef _WINDOWS +#ifndef __APPLE__ + +#include "aligned_file_reader.h" + +class LinuxAlignedFileReader : public AlignedFileReader +{ + private: + uint64_t file_sz; + FileHandle file_desc; + io_context_t bad_ctx = (io_context_t)-1; + + public: + LinuxAlignedFileReader(); + ~LinuxAlignedFileReader(); + + IOContext &get_ctx(); + + // register thread-id for a context + void register_thread(); + + // de-register thread-id for a context + void deregister_thread(); + void deregister_all_threads(); + + // Open & close ops + // Blocking calls + void open(const std::string &fname); + void close(); + + // process batch of aligned requests in parallel + // NOTE :: blocking call + void read(std::vector &read_reqs, IOContext &ctx, bool async = false); +}; + +#endif +#endif diff --git a/packages/leann-backend-diskann/third_party/DiskANN/include/locking.h b/packages/leann-backend-diskann/third_party/DiskANN/include/locking.h new file mode 100644 index 0000000..890c24a --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/include/locking.h @@ -0,0 +1,20 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT license. +#pragma once + +#include + +#ifdef _WINDOWS +#include "windows_slim_lock.h" +#endif + +namespace diskann +{ +#ifdef _WINDOWS +using non_recursive_mutex = windows_exclusive_slim_lock; +using LockGuard = windows_exclusive_slim_lock_guard; +#else +using non_recursive_mutex = std::mutex; +using LockGuard = std::lock_guard; +#endif +} // namespace diskann diff --git a/packages/leann-backend-diskann/third_party/DiskANN/include/logger.h b/packages/leann-backend-diskann/third_party/DiskANN/include/logger.h new file mode 100644 index 0000000..0b17807 --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/include/logger.h @@ -0,0 +1,35 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT license. +#pragma once + +#include +#include +#include "windows_customizations.h" + +#ifdef EXEC_ENV_OLS +#ifndef ENABLE_CUSTOM_LOGGER +#define ENABLE_CUSTOM_LOGGER +#endif // !ENABLE_CUSTOM_LOGGER +#endif // EXEC_ENV_OLS + +namespace diskann +{ +#ifdef ENABLE_CUSTOM_LOGGER +DISKANN_DLLEXPORT extern std::basic_ostream cout; +DISKANN_DLLEXPORT extern std::basic_ostream cerr; +#else +using std::cerr; +using std::cout; +#endif + +enum class DISKANN_DLLEXPORT LogLevel +{ + LL_Info = 0, + LL_Error, + LL_Count +}; + +#ifdef ENABLE_CUSTOM_LOGGER +DISKANN_DLLEXPORT void SetCustomLogger(std::function logger); +#endif +} // namespace diskann diff --git a/packages/leann-backend-diskann/third_party/DiskANN/include/logger_impl.h b/packages/leann-backend-diskann/third_party/DiskANN/include/logger_impl.h new file mode 100644 index 0000000..03c65e0 --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/include/logger_impl.h @@ -0,0 +1,61 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT license. + +#pragma once + +#include +#include + +#include "ann_exception.h" +#include "logger.h" + +namespace diskann +{ +#ifdef ENABLE_CUSTOM_LOGGER +class ANNStreamBuf : public std::basic_streambuf +{ + public: + DISKANN_DLLEXPORT explicit ANNStreamBuf(FILE *fp); + DISKANN_DLLEXPORT ~ANNStreamBuf(); + + DISKANN_DLLEXPORT bool is_open() const + { + return true; // because stdout and stderr are always open. + } + DISKANN_DLLEXPORT void close(); + DISKANN_DLLEXPORT virtual int underflow(); + DISKANN_DLLEXPORT virtual int overflow(int c); + DISKANN_DLLEXPORT virtual int sync(); + + private: + FILE *_fp; + char *_buf; + int _bufIndex; + std::mutex _mutex; + LogLevel _logLevel; + + int flush(); + void logImpl(char *str, int numchars); + + // Why the two buffer-sizes? If we are running normally, we are basically + // interacting with a character output system, so we short-circuit the + // output process by keeping an empty buffer and writing each character + // to stdout/stderr. But if we are running in OLS, we have to take all + // the text that is written to diskann::cout/diskann:cerr, consolidate it + // and push it out in one-shot, because the OLS infra does not give us + // character based output. Therefore, we use a larger buffer that is large + // enough to store the longest message, and continuously add characters + // to it. When the calling code outputs a std::endl or std::flush, sync() + // will be called and will output a log level, component name, and the text + // that has been collected. (sync() is also called if the buffer is full, so + // overflows/missing text are not a concern). + // This implies calling code _must_ either print std::endl or std::flush + // to ensure that the message is written immediately. + + static const int BUFFER_SIZE = 1024; + + ANNStreamBuf(const ANNStreamBuf &); + ANNStreamBuf &operator=(const ANNStreamBuf &); +}; +#endif +} // namespace diskann diff --git a/packages/leann-backend-diskann/third_party/DiskANN/include/math_utils.h b/packages/leann-backend-diskann/third_party/DiskANN/include/math_utils.h new file mode 100644 index 0000000..83d189f --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/include/math_utils.h @@ -0,0 +1,87 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT license. + +#pragma once + +#include "common_includes.h" +#include "utils.h" + +namespace math_utils +{ + +float calc_distance(float *vec_1, float *vec_2, size_t dim); + +// compute l2-squared norms of data stored in row major num_points * dim, +// needs +// to be pre-allocated +void compute_vecs_l2sq(float *vecs_l2sq, float *data, const size_t num_points, const size_t dim); + +void rotate_data_randomly(float *data, size_t num_points, size_t dim, float *rot_mat, float *&new_mat, + bool transpose_rot = false); + +// calculate closest center to data of num_points * dim (row major) +// centers is num_centers * dim (row major) +// data_l2sq has pre-computed squared norms of data +// centers_l2sq has pre-computed squared norms of centers +// pre-allocated center_index will contain id of k nearest centers +// pre-allocated dist_matrix shound be num_points * num_centers and contain +// squared distances + +// Ideally used only by compute_closest_centers +void compute_closest_centers_in_block(const float *const data, const size_t num_points, const size_t dim, + const float *const centers, const size_t num_centers, + const float *const docs_l2sq, const float *const centers_l2sq, + uint32_t *center_index, float *const dist_matrix, size_t k = 1); + +// Given data in num_points * new_dim row major +// Pivots stored in full_pivot_data as k * new_dim row major +// Calculate the closest pivot for each point and store it in vector +// closest_centers_ivf (which needs to be allocated outside) +// Additionally, if inverted index is not null (and pre-allocated), it will +// return inverted index for each center Additionally, if pts_norms_squared is +// not null, then it will assume that point norms are pre-computed and use +// those +// values + +void compute_closest_centers(float *data, size_t num_points, size_t dim, float *pivot_data, size_t num_centers, + size_t k, uint32_t *closest_centers_ivf, std::vector *inverted_index = NULL, + float *pts_norms_squared = NULL); + +// if to_subtract is 1, will subtract nearest center from each row. Else will +// add. Output will be in data_load iself. +// Nearest centers need to be provided in closst_centers. + +void process_residuals(float *data_load, size_t num_points, size_t dim, float *cur_pivot_data, size_t num_centers, + uint32_t *closest_centers, bool to_subtract); + +} // namespace math_utils + +namespace kmeans +{ + +// run Lloyds one iteration +// Given data in row major num_points * dim, and centers in row major +// num_centers * dim +// And squared lengths of data points, output the closest center to each data +// point, update centers, and also return inverted index. +// If closest_centers == NULL, will allocate memory and return. +// Similarly, if closest_docs == NULL, will allocate memory and return. + +float lloyds_iter(float *data, size_t num_points, size_t dim, float *centers, size_t num_centers, float *docs_l2sq, + std::vector *closest_docs, uint32_t *&closest_center); + +// Run Lloyds until max_reps or stopping criterion +// If you pass NULL for closest_docs and closest_center, it will NOT return +// the results, else it will assume appriate allocation as closest_docs = new +// vector [num_centers], and closest_center = new size_t[num_points] +// Final centers are output in centers as row major num_centers * dim +// +float run_lloyds(float *data, size_t num_points, size_t dim, float *centers, const size_t num_centers, + const size_t max_reps, std::vector *closest_docs, uint32_t *closest_center); + +// assumes already memory allocated for pivot_data as new +// float[num_centers*dim] and select randomly num_centers points as pivots +void selecting_pivots(float *data, size_t num_points, size_t dim, float *pivot_data, size_t num_centers); + +void kmeanspp_selecting_pivots(float *data, size_t num_points, size_t dim, float *pivot_data, size_t num_centers); +} // namespace kmeans diff --git a/packages/leann-backend-diskann/third_party/DiskANN/include/memory_mapper.h b/packages/leann-backend-diskann/third_party/DiskANN/include/memory_mapper.h new file mode 100644 index 0000000..75faca1 --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/include/memory_mapper.h @@ -0,0 +1,43 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT license. + +#pragma once + +#ifndef _WINDOWS +#include +#include +#include +#include +#include + +#else +#include +#endif +#include + +namespace diskann +{ +class MemoryMapper +{ + private: +#ifndef _WINDOWS + int _fd; +#else + HANDLE _bareFile; + HANDLE _fd; + +#endif + char *_buf; + size_t _fileSize; + const char *_fileName; + + public: + MemoryMapper(const char *filename); + MemoryMapper(const std::string &filename); + + char *getBuf(); + size_t getFileSize(); + + ~MemoryMapper(); +}; +} // namespace diskann diff --git a/packages/leann-backend-diskann/third_party/DiskANN/include/natural_number_map.h b/packages/leann-backend-diskann/third_party/DiskANN/include/natural_number_map.h new file mode 100644 index 0000000..e846882 --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/include/natural_number_map.h @@ -0,0 +1,86 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT license. + +#pragma once + +#include +#include +#include + +#include + +namespace diskann +{ +// A map whose key is a natural number (from 0 onwards) and maps to a value. +// Made as both memory and performance efficient map for scenario such as +// DiskANN location-to-tag map. There, the pool of numbers is consecutive from +// zero to some max value, and it's expected that most if not all keys from 0 +// up to some current maximum will be present in the map. The memory usage of +// the map is determined by the largest inserted key since it uses vector as a +// backing store and bitset for presence indication. +// +// Thread-safety: this class is not thread-safe in general. +// Exception: multiple read-only operations are safe on the object only if +// there are no writers to it in parallel. +template class natural_number_map +{ + public: + static_assert(std::is_trivial::value, "Key must be a trivial type"); + + // Represents a reference to a element in the map. Used while iterating + // over map entries. + struct position + { + size_t _key; + // The number of keys that were enumerated when iterating through the + // map so far. Used to early-terminate enumeration when ithere are no + // more entries in the map. + size_t _keys_already_enumerated; + + // Returns whether it's valid to access the element at this position in + // the map. + bool is_valid() const; + }; + + natural_number_map(); + + void reserve(size_t count); + size_t size() const; + + void set(Key key, Value value); + void erase(Key key); + + bool contains(Key key) const; + bool try_get(Key key, Value &value) const; + + // Returns the value at the specified position. Prerequisite: position is + // valid. + Value get(const position &pos) const; + + // Finds the first element in the map, if any. Invalidated by changes in the + // map. + position find_first() const; + + // Finds the next element in the map after the specified position. + // Invalidated by changes in the map. + position find_next(const position &after_position) const; + + void clear(); + + private: + // Number of entries in the map. Not the same as size() of the + // _values_vector below. + size_t _size; + + // Array of values. The key is the index of the value. + std::vector _values_vector; + + // Values that are in the set have the corresponding bit index set + // to 1. + // + // Use a pointer here to allow for forward declaration of dynamic_bitset + // in public headers to avoid making boost a dependency for clients + // of DiskANN. + std::unique_ptr> _values_bitset; +}; +} // namespace diskann diff --git a/packages/leann-backend-diskann/third_party/DiskANN/include/natural_number_set.h b/packages/leann-backend-diskann/third_party/DiskANN/include/natural_number_set.h new file mode 100644 index 0000000..ec5b827 --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/include/natural_number_set.h @@ -0,0 +1,50 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT license. + +#pragma once + +#include +#include + +#include "boost_dynamic_bitset_fwd.h" + +namespace diskann +{ +// A set of natural numbers (from 0 onwards). Made for scenario where the +// pool of numbers is consecutive from zero to some max value and very +// efficient methods for "add to set", "get any value from set", "is in set" +// are needed. The memory usage of the set is determined by the largest +// number of inserted entries (uses a vector as a backing store) as well as +// the largest value to be placed in it (uses bitset as well). +// +// Thread-safety: this class is not thread-safe in general. +// Exception: multiple read-only operations (e.g. is_in_set, empty, size) are +// safe on the object only if there are no writers to it in parallel. +template class natural_number_set +{ + public: + static_assert(std::is_trivial::value, "Identifier must be a trivial type"); + + natural_number_set(); + + bool is_empty() const; + void reserve(size_t count); + void insert(T id); + T pop_any(); + void clear(); + size_t size() const; + bool is_in_set(T id) const; + + private: + // Values that are currently in set. + std::vector _values_vector; + + // Values that are in the set have the corresponding bit index set + // to 1. + // + // Use a pointer here to allow for forward declaration of dynamic_bitset + // in public headers to avoid making boost a dependency for clients + // of DiskANN. + std::unique_ptr> _values_bitset; +}; +} // namespace diskann diff --git a/packages/leann-backend-diskann/third_party/DiskANN/include/neighbor.h b/packages/leann-backend-diskann/third_party/DiskANN/include/neighbor.h new file mode 100644 index 0000000..d7c0c25 --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/include/neighbor.h @@ -0,0 +1,152 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT license. + +#pragma once + +#include +#include +#include +#include "utils.h" + +namespace diskann +{ + +struct Neighbor +{ + unsigned id; + float distance; + bool expanded; + + Neighbor() = default; + + Neighbor(unsigned id, float distance) : id{id}, distance{distance}, expanded(false) + { + } + + inline bool operator<(const Neighbor &other) const + { + return distance < other.distance || (distance == other.distance && id < other.id); + } + + inline bool operator==(const Neighbor &other) const + { + return (id == other.id); + } +}; + +// Invariant: after every `insert` and `closest_unexpanded()`, `_cur` points to +// the first Neighbor which is unexpanded. +class NeighborPriorityQueue +{ + public: + NeighborPriorityQueue() : _size(0), _capacity(0), _cur(0) + { + } + + explicit NeighborPriorityQueue(size_t capacity) : _size(0), _capacity(capacity), _cur(0), _data(capacity + 1) + { + } + + // Inserts the item ordered into the set up to the sets capacity. + // The item will be dropped if it is the same id as an exiting + // set item or it has a greated distance than the final + // item in the set. The set cursor that is used to pop() the + // next item will be set to the lowest index of an uncheck item + void insert(const Neighbor &nbr) + { + if (_size == _capacity && _data[_size - 1] < nbr) + { + return; + } + + size_t lo = 0, hi = _size; + while (lo < hi) + { + size_t mid = (lo + hi) >> 1; + if (nbr < _data[mid]) + { + hi = mid; + // Make sure the same id isn't inserted into the set + } + else if (_data[mid].id == nbr.id) + { + return; + } + else + { + lo = mid + 1; + } + } + + if (lo < _capacity) + { + std::memmove(&_data[lo + 1], &_data[lo], (_size - lo) * sizeof(Neighbor)); + } + _data[lo] = {nbr.id, nbr.distance}; + if (_size < _capacity) + { + _size++; + } + if (lo < _cur) + { + _cur = lo; + } + } + + Neighbor closest_unexpanded() + { + _data[_cur].expanded = true; + size_t pre = _cur; + while (_cur < _size && _data[_cur].expanded) + { + _cur++; + } + return _data[pre]; + } + + bool has_unexpanded_node() const + { + return _cur < _size; + } + + size_t size() const + { + return _size; + } + + size_t capacity() const + { + return _capacity; + } + + void reserve(size_t capacity) + { + if (capacity + 1 > _data.size()) + { + _data.resize(capacity + 1); + } + _capacity = capacity; + } + + Neighbor &operator[](size_t i) + { + return _data[i]; + } + + Neighbor operator[](size_t i) const + { + return _data[i]; + } + + void clear() + { + _size = 0; + _cur = 0; + } + + private: + size_t _size, _capacity, _cur; + std::vector _data; +}; + +} // namespace diskann diff --git a/packages/leann-backend-diskann/third_party/DiskANN/include/parameters.h b/packages/leann-backend-diskann/third_party/DiskANN/include/parameters.h new file mode 100644 index 0000000..0206814 --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/include/parameters.h @@ -0,0 +1,119 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT license. + +#pragma once +#include +#include +#include + +#include "omp.h" +#include "defaults.h" + +namespace diskann +{ + +class IndexWriteParameters + +{ + public: + const uint32_t search_list_size; // L + const uint32_t max_degree; // R + const bool saturate_graph; + const uint32_t max_occlusion_size; // C + const float alpha; + const uint32_t num_threads; + const uint32_t filter_list_size; // Lf + + IndexWriteParameters(const uint32_t search_list_size, const uint32_t max_degree, const bool saturate_graph, + const uint32_t max_occlusion_size, const float alpha, const uint32_t num_threads, + const uint32_t filter_list_size) + : search_list_size(search_list_size), max_degree(max_degree), saturate_graph(saturate_graph), + max_occlusion_size(max_occlusion_size), alpha(alpha), num_threads(num_threads), + filter_list_size(filter_list_size) + { + } + + friend class IndexWriteParametersBuilder; +}; + +class IndexSearchParams +{ + public: + IndexSearchParams(const uint32_t initial_search_list_size, const uint32_t num_search_threads) + : initial_search_list_size(initial_search_list_size), num_search_threads(num_search_threads) + { + } + const uint32_t initial_search_list_size; // search L + const uint32_t num_search_threads; // search threads +}; + +class IndexWriteParametersBuilder +{ + /** + * Fluent builder pattern to keep track of the 7 non-default properties + * and their order. The basic ctor was getting unwieldy. + */ + public: + IndexWriteParametersBuilder(const uint32_t search_list_size, // L + const uint32_t max_degree // R + ) + : _search_list_size(search_list_size), _max_degree(max_degree) + { + } + + IndexWriteParametersBuilder &with_max_occlusion_size(const uint32_t max_occlusion_size) + { + _max_occlusion_size = max_occlusion_size; + return *this; + } + + IndexWriteParametersBuilder &with_saturate_graph(const bool saturate_graph) + { + _saturate_graph = saturate_graph; + return *this; + } + + IndexWriteParametersBuilder &with_alpha(const float alpha) + { + _alpha = alpha; + return *this; + } + + IndexWriteParametersBuilder &with_num_threads(const uint32_t num_threads) + { + _num_threads = num_threads == 0 ? omp_get_num_procs() : num_threads; + return *this; + } + + IndexWriteParametersBuilder &with_filter_list_size(const uint32_t filter_list_size) + { + _filter_list_size = filter_list_size == 0 ? _search_list_size : filter_list_size; + return *this; + } + + IndexWriteParameters build() const + { + return IndexWriteParameters(_search_list_size, _max_degree, _saturate_graph, _max_occlusion_size, _alpha, + _num_threads, _filter_list_size); + } + + IndexWriteParametersBuilder(const IndexWriteParameters &wp) + : _search_list_size(wp.search_list_size), _max_degree(wp.max_degree), + _max_occlusion_size(wp.max_occlusion_size), _saturate_graph(wp.saturate_graph), _alpha(wp.alpha), + _filter_list_size(wp.filter_list_size) + { + } + IndexWriteParametersBuilder(const IndexWriteParametersBuilder &) = delete; + IndexWriteParametersBuilder &operator=(const IndexWriteParametersBuilder &) = delete; + + private: + uint32_t _search_list_size{}; + uint32_t _max_degree{}; + uint32_t _max_occlusion_size{defaults::MAX_OCCLUSION_SIZE}; + bool _saturate_graph{defaults::SATURATE_GRAPH}; + float _alpha{defaults::ALPHA}; + uint32_t _num_threads{defaults::NUM_THREADS}; + uint32_t _filter_list_size{defaults::FILTER_LIST_SIZE}; +}; + +} // namespace diskann diff --git a/packages/leann-backend-diskann/third_party/DiskANN/include/partition.h b/packages/leann-backend-diskann/third_party/DiskANN/include/partition.h new file mode 100644 index 0000000..c2c4c76 --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/include/partition.h @@ -0,0 +1,49 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT license. + +#pragma once +#include +#include +#include +#include +#include + +#include "neighbor.h" +#include "parameters.h" +#include "tsl/robin_set.h" +#include "utils.h" + +#include "windows_customizations.h" + +template +void gen_random_slice(const std::string base_file, const std::string output_prefix, double sampling_rate); + +template +void gen_random_slice(const std::string data_file, double p_val, float *&sampled_data, size_t &slice_size, + size_t &ndims); + +template +void gen_random_slice(const T *inputdata, size_t npts, size_t ndims, double p_val, float *&sampled_data, + size_t &slice_size); + +int estimate_cluster_sizes(float *test_data_float, size_t num_test, float *pivots, const size_t num_centers, + const size_t dim, const size_t k_base, std::vector &cluster_sizes); + +template +int shard_data_into_clusters(const std::string data_file, float *pivots, const size_t num_centers, const size_t dim, + const size_t k_base, std::string prefix_path); + +template +int shard_data_into_clusters_only_ids(const std::string data_file, float *pivots, const size_t num_centers, + const size_t dim, const size_t k_base, std::string prefix_path); + +template +int retrieve_shard_data_from_ids(const std::string data_file, std::string idmap_filename, std::string data_filename); + +template +int partition(const std::string data_file, const float sampling_rate, size_t num_centers, size_t max_k_means_reps, + const std::string prefix_path, size_t k_base); + +template +int partition_with_ram_budget(const std::string data_file, const double sampling_rate, double ram_budget, + size_t graph_degree, const std::string prefix_path, size_t k_base); diff --git a/packages/leann-backend-diskann/third_party/DiskANN/include/percentile_stats.h b/packages/leann-backend-diskann/third_party/DiskANN/include/percentile_stats.h new file mode 100644 index 0000000..7932575 --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/include/percentile_stats.h @@ -0,0 +1,65 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT license. + +#pragma once + +#include +#include +#include +#include +#ifdef _WINDOWS +#include +#endif +#include +#include + +#include "distance.h" +#include "parameters.h" + +namespace diskann +{ +struct QueryStats +{ + float total_us = 0; // total time to process query in micros + float io_us = 0; // total time spent in IO + float cpu_us = 0; // total time spent in CPU + + unsigned n_4k = 0; // # of 4kB reads + unsigned n_8k = 0; // # of 8kB reads + unsigned n_12k = 0; // # of 12kB reads + unsigned n_ios = 0; // total # of IOs issued + unsigned read_size = 0; // total # of bytes read + unsigned n_cmps_saved = 0; // # cmps saved + unsigned n_cmps = 0; // # cmps + unsigned n_cache_hits = 0; // # cache_hits + unsigned n_hops = 0; // # search hops +}; + +template +inline T get_percentile_stats(QueryStats *stats, uint64_t len, float percentile, + const std::function &member_fn) +{ + std::vector vals(len); + for (uint64_t i = 0; i < len; i++) + { + vals[i] = member_fn(stats[i]); + } + + std::sort(vals.begin(), vals.end(), [](const T &left, const T &right) { return left < right; }); + + auto retval = vals[(uint64_t)(percentile * len)]; + vals.clear(); + return retval; +} + +template +inline double get_mean_stats(QueryStats *stats, uint64_t len, const std::function &member_fn) +{ + double avg = 0; + for (uint64_t i = 0; i < len; i++) + { + avg += (double)member_fn(stats[i]); + } + return avg / len; +} +} // namespace diskann diff --git a/packages/leann-backend-diskann/third_party/DiskANN/include/pq.h b/packages/leann-backend-diskann/third_party/DiskANN/include/pq.h new file mode 100644 index 0000000..3e6119f --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/include/pq.h @@ -0,0 +1,93 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT license. + +#pragma once + +#include "utils.h" +#include "pq_common.h" + +namespace diskann +{ +class FixedChunkPQTable +{ + float *tables = nullptr; // pq_tables = float array of size [256 * ndims] + uint64_t ndims = 0; // ndims = true dimension of vectors + uint64_t n_chunks = 0; + bool use_rotation = false; + uint32_t *chunk_offsets = nullptr; + float *centroid = nullptr; + float *tables_tr = nullptr; // same as pq_tables, but col-major + float *rotmat_tr = nullptr; + + public: + FixedChunkPQTable(); + + virtual ~FixedChunkPQTable(); + +#ifdef EXEC_ENV_OLS + void load_pq_centroid_bin(MemoryMappedFiles &files, const char *pq_table_file, size_t num_chunks); +#else + void load_pq_centroid_bin(const char *pq_table_file, size_t num_chunks); +#endif + + uint32_t get_num_chunks(); + + void preprocess_query(float *query_vec); + + // assumes pre-processed query + void populate_chunk_distances(const float *query_vec, float *dist_vec); + + float l2_distance(const float *query_vec, uint8_t *base_vec); + + float inner_product(const float *query_vec, uint8_t *base_vec); + + // assumes no rotation is involved + void inflate_vector(uint8_t *base_vec, float *out_vec); + + void populate_chunk_inner_products(const float *query_vec, float *dist_vec); +}; + +void aggregate_coords(const std::vector &ids, const uint8_t *all_coords, const uint64_t ndims, uint8_t *out); + +void pq_dist_lookup(const uint8_t *pq_ids, const size_t n_pts, const size_t pq_nchunks, const float *pq_dists, + std::vector &dists_out); + +// Need to replace calls to these with calls to vector& based functions above +void aggregate_coords(const unsigned *ids, const uint64_t n_ids, const uint8_t *all_coords, const uint64_t ndims, + uint8_t *out); + +void pq_dist_lookup(const uint8_t *pq_ids, const size_t n_pts, const size_t pq_nchunks, const float *pq_dists, + float *dists_out); + +DISKANN_DLLEXPORT int generate_pq_pivots(const float *const train_data, size_t num_train, unsigned dim, + unsigned num_centers, unsigned num_pq_chunks, unsigned max_k_means_reps, + std::string pq_pivots_path, bool make_zero_mean = false); + +DISKANN_DLLEXPORT int generate_opq_pivots(const float *train_data, size_t num_train, unsigned dim, unsigned num_centers, + unsigned num_pq_chunks, std::string opq_pivots_path, + bool make_zero_mean = false); + +DISKANN_DLLEXPORT int generate_pq_pivots_simplified(const float *train_data, size_t num_train, size_t dim, + size_t num_pq_chunks, std::vector &pivot_data_vector); + +template +int generate_pq_data_from_pivots(const std::string &data_file, unsigned num_centers, unsigned num_pq_chunks, + const std::string &pq_pivots_path, const std::string &pq_compressed_vectors_path, + bool use_opq = false); + +DISKANN_DLLEXPORT int generate_pq_data_from_pivots_simplified(const float *data, const size_t num, + const float *pivot_data, const size_t pivots_num, + const size_t dim, const size_t num_pq_chunks, + std::vector &pq); + +template +void generate_disk_quantized_data(const std::string &data_file_to_use, const std::string &disk_pq_pivots_path, + const std::string &disk_pq_compressed_vectors_path, + const diskann::Metric compareMetric, const double p_val, size_t &disk_pq_dims); + +template +void generate_quantized_data(const std::string &data_file_to_use, const std::string &pq_pivots_path, + const std::string &pq_compressed_vectors_path, const diskann::Metric compareMetric, + const double p_val, const uint64_t num_pq_chunks, const bool use_opq, + const std::string &codebook_prefix = ""); +} // namespace diskann diff --git a/packages/leann-backend-diskann/third_party/DiskANN/include/pq_common.h b/packages/leann-backend-diskann/third_party/DiskANN/include/pq_common.h new file mode 100644 index 0000000..c6a3a57 --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/include/pq_common.h @@ -0,0 +1,30 @@ +#pragma once + +#include +#include + +#define NUM_PQ_BITS 8 +#define NUM_PQ_CENTROIDS (1 << NUM_PQ_BITS) +#define MAX_OPQ_ITERS 20 +#define NUM_KMEANS_REPS_PQ 12 +#define MAX_PQ_TRAINING_SET_SIZE 256000 +#define MAX_PQ_CHUNKS 512 + +namespace diskann +{ +inline std::string get_quantized_vectors_filename(const std::string &prefix, bool use_opq, uint32_t num_chunks) +{ + return prefix + (use_opq ? "_opq" : "pq") + std::to_string(num_chunks) + "_compressed.bin"; +} + +inline std::string get_pivot_data_filename(const std::string &prefix, bool use_opq, uint32_t num_chunks) +{ + return prefix + (use_opq ? "_opq" : "pq") + std::to_string(num_chunks) + "_pivots.bin"; +} + +inline std::string get_rotation_matrix_suffix(const std::string &pivot_data_filename) +{ + return pivot_data_filename + "_rotation_matrix.bin"; +} + +} // namespace diskann diff --git a/packages/leann-backend-diskann/third_party/DiskANN/include/pq_data_store.h b/packages/leann-backend-diskann/third_party/DiskANN/include/pq_data_store.h new file mode 100644 index 0000000..7c0cb5f --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/include/pq_data_store.h @@ -0,0 +1,97 @@ +#pragma once +#include +#include "distance.h" +#include "quantized_distance.h" +#include "pq.h" +#include "abstract_data_store.h" + +namespace diskann +{ +// REFACTOR TODO: By default, the PQDataStore is an in-memory datastore because both Vamana and +// DiskANN treat it the same way. But with DiskPQ, that may need to change. +template class PQDataStore : public AbstractDataStore +{ + + public: + PQDataStore(size_t dim, location_t num_points, size_t num_pq_chunks, std::unique_ptr> distance_fn, + std::unique_ptr> pq_distance_fn); + PQDataStore(const PQDataStore &) = delete; + PQDataStore &operator=(const PQDataStore &) = delete; + ~PQDataStore(); + + // Load quantized vectors from a set of files. Here filename is treated + // as a prefix and the files are assumed to be named with DiskANN + // conventions. + virtual location_t load(const std::string &file_prefix) override; + + // Save quantized vectors to a set of files whose names start with + // file_prefix. + // Currently, the plan is to save the quantized vectors to the quantized + // vectors file. + virtual size_t save(const std::string &file_prefix, const location_t num_points) override; + + // Since base class function is pure virtual, we need to declare it here, even though alignent concept is not needed + // for Quantized data stores. + virtual size_t get_aligned_dim() const override; + + // Populate quantized data from unaligned data using PQ functionality + virtual void populate_data(const data_t *vectors, const location_t num_pts) override; + virtual void populate_data(const std::string &filename, const size_t offset) override; + + virtual void extract_data_to_bin(const std::string &filename, const location_t num_pts) override; + + virtual void get_vector(const location_t i, data_t *target) const override; + virtual void set_vector(const location_t i, const data_t *const vector) override; + virtual void prefetch_vector(const location_t loc) override; + + virtual void move_vectors(const location_t old_location_start, const location_t new_location_start, + const location_t num_points) override; + virtual void copy_vectors(const location_t from_loc, const location_t to_loc, const location_t num_points) override; + + virtual void preprocess_query(const data_t *query, AbstractScratch *scratch) const override; + + virtual float get_distance(const data_t *query, const location_t loc) const override; + virtual float get_distance(const location_t loc1, const location_t loc2) const override; + + // NOTE: Caller must invoke "PQDistance->preprocess_query" ONCE before calling + // this function. + virtual void get_distance(const data_t *preprocessed_query, const location_t *locations, + const uint32_t location_count, float *distances, + AbstractScratch *scratch_space) const override; + + // NOTE: Caller must invoke "PQDistance->preprocess_query" ONCE before calling + // this function. + virtual void get_distance(const data_t *preprocessed_query, const std::vector &ids, + std::vector &distances, AbstractScratch *scratch_space) const override; + + // We are returning the distance function that is used for full precision + // vectors here, not the PQ distance function. This is because the callers + // all are expecting a Distance not QuantizedDistance. + virtual Distance *get_dist_fn() const override; + + virtual location_t calculate_medoid() const override; + + virtual size_t get_alignment_factor() const override; + + protected: + virtual location_t expand(const location_t new_size) override; + virtual location_t shrink(const location_t new_size) override; + + virtual location_t load_impl(const std::string &filename); +#ifdef EXEC_ENV_OLS + virtual location_t load_impl(AlignedFileReader &reader); +#endif + + private: + uint8_t *_quantized_data = nullptr; + size_t _num_chunks = 0; + + // REFACTOR TODO: Doing this temporarily before refactoring OPQ into + // its own class. Remove later. + bool _use_opq = false; + + Metric _distance_metric; + std::unique_ptr> _distance_fn = nullptr; + std::unique_ptr> _pq_distance_fn = nullptr; +}; +} // namespace diskann diff --git a/packages/leann-backend-diskann/third_party/DiskANN/include/pq_flash_index.h b/packages/leann-backend-diskann/third_party/DiskANN/include/pq_flash_index.h new file mode 100644 index 0000000..174df5c --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/include/pq_flash_index.h @@ -0,0 +1,286 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT license. + +#pragma once +#include "common_includes.h" + +#include "aligned_file_reader.h" +#include "concurrent_queue.h" +#include "neighbor.h" +#include "parameters.h" +#include "percentile_stats.h" +#include "pq.h" +#include "utils.h" +#include "windows_customizations.h" +#include "scratch.h" +#include "tsl/robin_map.h" +#include "tsl/robin_set.h" + +#define FULL_PRECISION_REORDER_MULTIPLIER 3 + +namespace diskann +{ + +template class PQFlashIndex +{ + public: + DISKANN_DLLEXPORT PQFlashIndex(std::shared_ptr &fileReader, + std::shared_ptr &graphReader, + diskann::Metric metric = diskann::Metric::L2); + DISKANN_DLLEXPORT ~PQFlashIndex(); + +#ifdef EXEC_ENV_OLS + DISKANN_DLLEXPORT int load(diskann::MemoryMappedFiles &files, uint32_t num_threads, const char *index_prefix, + const char *pq_prefix = nullptr); +#else + // load compressed data, and obtains the handle to the disk-resident index + DISKANN_DLLEXPORT int load(uint32_t num_threads, const char *index_prefix, const char *pq_prefix = nullptr, + const char *partition_prefix = nullptr); +#endif + +#ifdef EXEC_ENV_OLS + DISKANN_DLLEXPORT int load_from_separate_paths(diskann::MemoryMappedFiles &files, uint32_t num_threads, + const char *index_filepath, const char *pivots_filepath, + const char *compressed_filepath, const char *graph_file); +#else + DISKANN_DLLEXPORT int load_from_separate_paths(uint32_t num_threads, const char *index_filepath, + const char *pivots_filepath, const char *compressed_filepath, + const char *graph_file, const char *partition_file); +#endif + + DISKANN_DLLEXPORT void load_cache_list(std::vector &node_list); + +#ifdef EXEC_ENV_OLS + DISKANN_DLLEXPORT void generate_cache_list_from_sample_queries(MemoryMappedFiles &files, std::string sample_bin, + uint64_t l_search, uint64_t beamwidth, + uint64_t num_nodes_to_cache, uint32_t nthreads, + std::vector &node_list); +#else + DISKANN_DLLEXPORT void generate_cache_list_from_sample_queries(std::string sample_bin, uint64_t l_search, + uint64_t beamwidth, uint64_t num_nodes_to_cache, + uint32_t num_threads, + std::vector &node_list); +#endif + + DISKANN_DLLEXPORT void cache_bfs_levels(uint64_t num_nodes_to_cache, std::vector &node_list, + const bool shuffle = false); + + DISKANN_DLLEXPORT void cached_beam_search(const T *query, const uint64_t k_search, const uint64_t l_search, + uint64_t *res_ids, float *res_dists, const uint64_t beam_width, + const bool use_reorder_data = false, QueryStats *stats = nullptr, + const bool USE_DEFERRED_FETCH = false, + const bool skip_search_reorder = false, + const bool recompute_beighbor_embeddings = false, + const bool dedup_node_dis = false, float prune_ratio = 0, + const bool batch_recompute = false, bool global_pruning = false); + + DISKANN_DLLEXPORT void cached_beam_search(const T *query, const uint64_t k_search, const uint64_t l_search, + uint64_t *res_ids, float *res_dists, const uint64_t beam_width, + const bool use_filter, const LabelT &filter_label, + const bool use_reorder_data = false, QueryStats *stats = nullptr, + const bool USE_DEFERRED_FETCH = false, + const bool skip_search_reorder = false, + const bool recompute_beighbor_embeddings = false, + const bool dedup_node_dis = false, float prune_ratio = 0, + const bool batch_recompute = false, bool global_pruning = false); + + DISKANN_DLLEXPORT void cached_beam_search(const T *query, const uint64_t k_search, const uint64_t l_search, + uint64_t *res_ids, float *res_dists, const uint64_t beam_width, + const uint32_t io_limit, const bool use_reorder_data = false, + QueryStats *stats = nullptr, const bool USE_DEFERRED_FETCH = false, + const bool skip_search_reorder = false, + const bool recompute_beighbor_embeddings = false, + const bool dedup_node_dis = false, float prune_ratio = 0, + const bool batch_recompute = false, bool global_pruning = false); + + DISKANN_DLLEXPORT void cached_beam_search(const T *query, const uint64_t k_search, const uint64_t l_search, + uint64_t *res_ids, float *res_dists, const uint64_t beam_width, + const bool use_filter, const LabelT &filter_label, + const uint32_t io_limit, const bool use_reorder_data = false, + QueryStats *stats = nullptr, const bool USE_DEFERRED_FETCH = false, + const bool skip_search_reorder = false, + const bool recompute_beighbor_embeddings = false, + const bool dedup_node_dis = false, float prune_ratio = 0, + const bool batch_recompute = false, bool global_pruning = false); + + DISKANN_DLLEXPORT LabelT get_converted_label(const std::string &filter_label); + + DISKANN_DLLEXPORT uint32_t range_search(const T *query1, const double range, const uint64_t min_l_search, + const uint64_t max_l_search, std::vector &indices, + std::vector &distances, const uint64_t min_beam_width, + QueryStats *stats = nullptr); + + DISKANN_DLLEXPORT uint64_t get_data_dim(); + + std::shared_ptr &reader; + + DISKANN_DLLEXPORT diskann::Metric get_metric(); + + // + // node_ids: input list of node_ids to be read + // coord_buffers: pointers to pre-allocated buffers that coords need to copied to. If null, dont copy. + // nbr_buffers: pre-allocated buffers to copy neighbors into + // + // returns a vector of bool one for each node_id: true if read is success, else false + // + DISKANN_DLLEXPORT std::vector read_nodes(const std::vector &node_ids, + std::vector &coord_buffers, + std::vector> &nbr_buffers); + + DISKANN_DLLEXPORT std::vector get_pq_vector(std::uint64_t vid); + DISKANN_DLLEXPORT uint64_t get_num_points(); + + protected: + DISKANN_DLLEXPORT void use_medoids_data_as_centroids(); + DISKANN_DLLEXPORT void setup_thread_data(uint64_t nthreads, uint64_t visited_reserve = 4096); + + DISKANN_DLLEXPORT void set_universal_label(const LabelT &label); + + private: + DISKANN_DLLEXPORT inline bool point_has_label(uint32_t point_id, LabelT label_id); + std::unordered_map load_label_map(std::basic_istream &infile); + DISKANN_DLLEXPORT void parse_label_file(std::basic_istream &infile, size_t &num_pts_labels); + DISKANN_DLLEXPORT void get_label_file_metadata(const std::string &fileContent, uint32_t &num_pts, + uint32_t &num_total_labels); + DISKANN_DLLEXPORT void generate_random_labels(std::vector &labels, const uint32_t num_labels, + const uint32_t nthreads); + void reset_stream_for_reading(std::basic_istream &infile); + + // sector # on disk where node_id is present with in the graph part + DISKANN_DLLEXPORT uint64_t get_node_sector(uint64_t node_id); + + // ptr to start of the node + DISKANN_DLLEXPORT char *offset_to_node(char *sector_buf, uint64_t node_id); + + // returns region of `node_buf` containing [NNBRS][NBR_ID(uint32_t)] + DISKANN_DLLEXPORT uint32_t *offset_to_node_nhood(char *node_buf); + + // returns region of `node_buf` containing [COORD(T)] + DISKANN_DLLEXPORT T *offset_to_node_coords(char *node_buf); + + DISKANN_DLLEXPORT int load_graph_index(const std::string &graph_index_file); + + DISKANN_DLLEXPORT int read_partition_info(const std::string &partition_bin); + + DISKANN_DLLEXPORT int read_neighbors(const std::string &graph_index_file, uint64_t target_node_id); + + // index info for multi-node sectors + // nhood of node `i` is in sector: [i / nnodes_per_sector] + // offset in sector: [(i % nnodes_per_sector) * max_node_len] + // + // index info for multi-sector nodes + // nhood of node `i` is in sector: [i * DIV_ROUND_UP(_max_node_len, SECTOR_LEN)] + // offset in sector: [0] + // + // Common info + // coords start at ofsset + // #nbrs of node `i`: *(unsigned*) (offset + disk_bytes_per_point) + // nbrs of node `i` : (unsigned*) (offset + disk_bytes_per_point + 1) + + uint64_t _max_node_len = 0; + uint64_t _nnodes_per_sector = 0; // 0 for multi-sector nodes, >0 for multi-node sectors + uint64_t _max_degree = 0; + uint64_t _C = 0; + // Data used for searching with re-order vectors + uint64_t _ndims_reorder_vecs = 0; + uint64_t _reorder_data_start_sector = 0; + uint64_t _nvecs_per_sector = 0; + + diskann::Metric metric = diskann::Metric::L2; + + // used only for inner product search to re-scale the result value + // (due to the pre-processing of base during index build) + float _max_base_norm = 0.0f; + + // data info + uint64_t _num_points = 0; + uint64_t _num_frozen_points = 0; + uint64_t _frozen_location = 0; + uint64_t _data_dim = 0; + uint64_t _aligned_dim = 0; + uint64_t _disk_bytes_per_point = 0; // Number of bytes + + std::string _disk_index_file; + std::vector> _node_visit_counter; + + // PQ data + // _n_chunks = # of chunks ndims is split into + // data: char * _n_chunks + // chunk_size = chunk size of each dimension chunk + // pq_tables = float* [[2^8 * [chunk_size]] * _n_chunks] + uint8_t *data = nullptr; + uint64_t _n_chunks; + FixedChunkPQTable _pq_table; + + // distance comparator + std::shared_ptr> _dist_cmp; + std::shared_ptr> _dist_cmp_float; + + // for very large datasets: we use PQ even for the disk resident index + bool _use_disk_index_pq = false; + uint64_t _disk_pq_n_chunks = 0; + FixedChunkPQTable _disk_pq_table; + + // medoid/start info + + // graph has one entry point by default, + // we can optionally have multiple starting points + uint32_t *_medoids = nullptr; + // defaults to 1 + size_t _num_medoids; + // by default, it is empty. If there are multiple + // centroids, we pick the medoid corresponding to the + // closest centroid as the starting point of search + float *_centroid_data = nullptr; + + // nhood_cache; the uint32_t in nhood_Cache are offsets into nhood_cache_buf + unsigned *_nhood_cache_buf = nullptr; + tsl::robin_map> _nhood_cache; + + // coord_cache; The T* in coord_cache are offsets into coord_cache_buf + T *_coord_cache_buf = nullptr; + tsl::robin_map _coord_cache; + + // thread-specific scratch + ConcurrentQueue *> _thread_data; + uint64_t _max_nthreads; + bool _load_flag = false; + bool _count_visited_nodes = false; + bool _reorder_data_exists = false; + uint64_t _reoreder_data_offset = 0; + + // filter support + uint32_t *_pts_to_label_offsets = nullptr; + uint32_t *_pts_to_label_counts = nullptr; + LabelT *_pts_to_labels = nullptr; + std::unordered_map> _filter_to_medoid_ids; + bool _use_universal_label = false; + LabelT _universal_filter_label; + tsl::robin_set _dummy_pts; + tsl::robin_set _has_dummy_pts; + tsl::robin_map _dummy_to_real_map; + tsl::robin_map> _real_to_dummy_map; + std::unordered_map _label_map; + + private: + bool _use_partition = false; + + std::shared_ptr graph_reader; // Graph file reader + std::string _graph_index_file; // Graph file path + uint64_t _graph_node_len; // Graph node length + uint64_t _emb_node_len; // Embedding node length + + // Partition related data structures + uint64_t _num_partitions; // Number of partitions + std::vector> _graph_partitions; // Partition information + std::vector _id2partition; // ID to partition mapping + +#ifdef EXEC_ENV_OLS + // Set to a larger value than the actual header to accommodate + // any additions we make to the header. This is an outer limit + // on how big the header can be. + static const int HEADER_SIZE = defaults::SECTOR_LEN; + char *getHeaderBytes(); +#endif +}; +} // namespace diskann diff --git a/packages/leann-backend-diskann/third_party/DiskANN/include/pq_l2_distance.h b/packages/leann-backend-diskann/third_party/DiskANN/include/pq_l2_distance.h new file mode 100644 index 0000000..e6fc6e4 --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/include/pq_l2_distance.h @@ -0,0 +1,87 @@ +#pragma once +#include "quantized_distance.h" + +namespace diskann +{ +template class PQL2Distance : public QuantizedDistance +{ + public: + // REFACTOR TODO: We could take a file prefix here and load the + // PQ pivots file, so that the distance object is initialized + // immediately after construction. But this would not work well + // with our data store concept where the store is created first + // and data populated after. + // REFACTOR TODO: Ideally, we should only read the num_chunks from + // the pivots file. However, we read the pivots file only later, but + // clients can call functions like get__filename without calling + // load_pivot_data. Hence this. The TODO is whether we should check + // that the num_chunks from the file is the same as this one. + + PQL2Distance(uint32_t num_chunks, bool use_opq = false); + + virtual ~PQL2Distance() override; + + virtual bool is_opq() const override; + + virtual std::string get_quantized_vectors_filename(const std::string &prefix) const override; + virtual std::string get_pivot_data_filename(const std::string &prefix) const override; + virtual std::string get_rotation_matrix_suffix(const std::string &pq_pivots_filename) const override; + +#ifdef EXEC_ENV_OLS + virtual void load_pivot_data(MemoryMappedFiles &files, const std::string &pq_table_file, + size_t num_chunks) override; +#else + virtual void load_pivot_data(const std::string &pq_table_file, size_t num_chunks) override; +#endif + + // Number of chunks in the PQ table. Depends on the compression level used. + // Has to be < ndim + virtual uint32_t get_num_chunks() const override; + + // Preprocess the query by computing chunk distances from the query vector to + // various centroids. Since we don't want this class to do scratch management, + // we will take a PQScratch object which can come either from Index class or + // PQFlashIndex class. + virtual void preprocess_query(const data_t *aligned_query, uint32_t original_dim, + PQScratch &pq_scratch) override; + + // Distance function used for graph traversal. This function must be called + // after + // preprocess_query. The reason we do not call preprocess ourselves is because + // that function has to be called once per query, while this function is + // called at each iteration of the graph walk. NOTE: This function expects + // 1. the query to be preprocessed using preprocess_query() + // 2. the scratch object to contain the quantized vectors corresponding to ids + // in aligned_pq_coord_scratch. Done by calling aggregate_coords() + // + virtual void preprocessed_distance(PQScratch &pq_scratch, const uint32_t id_count, + float *dists_out) override; + + // Same as above, but returns the distances in a vector instead of an array. + // Convenience function for index.cpp. + virtual void preprocessed_distance(PQScratch &pq_scratch, const uint32_t n_ids, + std::vector &dists_out) override; + + // Currently this function is required for DiskPQ. However, it too can be + // subsumed under preprocessed_distance if we add the appropriate scratch + // variables to PQScratch and initialize them in + // pq_flash_index.cpp::disk_iterate_to_fixed_point() + virtual float brute_force_distance(const float *query_vec, uint8_t *base_vec) override; + + protected: + // assumes pre-processed query + virtual void prepopulate_chunkwise_distances(const float *query_vec, float *dist_vec); + + // assumes no rotation is involved + // virtual void inflate_vector(uint8_t *base_vec, float *out_vec); + + float *_tables = nullptr; // pq_tables = float array of size [256 * ndims] + uint64_t _ndims = 0; // ndims = true dimension of vectors + uint64_t _num_chunks = 0; + bool _is_opq = false; + uint32_t *_chunk_offsets = nullptr; + float *_centroid = nullptr; + float *_tables_tr = nullptr; // same as pq_tables, but col-major + float *_rotmat_tr = nullptr; +}; +} // namespace diskann diff --git a/packages/leann-backend-diskann/third_party/DiskANN/include/pq_scratch.h b/packages/leann-backend-diskann/third_party/DiskANN/include/pq_scratch.h new file mode 100644 index 0000000..95f1b13 --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/include/pq_scratch.h @@ -0,0 +1,23 @@ +#pragma once +#include +#include "pq_common.h" +#include "utils.h" + +namespace diskann +{ + +template class PQScratch +{ + public: + float *aligned_pqtable_dist_scratch = nullptr; // MUST BE AT LEAST [256 * NCHUNKS] + float *aligned_dist_scratch = nullptr; // MUST BE AT LEAST diskann MAX_DEGREE + uint8_t *aligned_pq_coord_scratch = nullptr; // AT LEAST [N_CHUNKS * MAX_DEGREE] + float *rotated_query = nullptr; + float *aligned_query_float = nullptr; + + PQScratch(size_t graph_degree, size_t aligned_dim); + void initialize(size_t dim, const T *query, const float norm = 1.0f); + virtual ~PQScratch(); +}; + +} // namespace diskann \ No newline at end of file diff --git a/packages/leann-backend-diskann/third_party/DiskANN/include/program_options_utils.hpp b/packages/leann-backend-diskann/third_party/DiskANN/include/program_options_utils.hpp new file mode 100644 index 0000000..2be6059 --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/include/program_options_utils.hpp @@ -0,0 +1,81 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT license. + +#pragma once + +#include + +namespace program_options_utils +{ +const std::string make_program_description(const char *executable_name, const char *description) +{ + return std::string("\n") + .append(description) + .append("\n\n") + .append("Usage: ") + .append(executable_name) + .append(" [OPTIONS]"); +} + +// Required parameters +const char *DATA_TYPE_DESCRIPTION = "data type, one of {int8, uint8, float} - float is single precision (32 bit)"; +const char *DISTANCE_FUNCTION_DESCRIPTION = + "distance function {l2, mips, fast_l2, cosine}. 'fast l2' and 'mips' only support data_type float"; +const char *INDEX_PATH_PREFIX_DESCRIPTION = "Path prefix to the index, e.g. '/mnt/data/my_ann_index'"; +const char *RESULT_PATH_DESCRIPTION = + "Path prefix for saving results of the queries, e.g. '/mnt/data/query_file_X.bin'"; +const char *QUERY_FILE_DESCRIPTION = "Query file in binary format, e.g. '/mnt/data/query_file_X.bin'"; +const char *NUMBER_OF_RESULTS_DESCRIPTION = "Number of neighbors to be returned (K in the DiskANN white paper)"; +const char *SEARCH_LIST_DESCRIPTION = + "Size of search list to use. This value is the number of neighbor/distance pairs to keep in memory at the same " + "time while performing a query. This can also be described as the size of the working set at query time. This " + "must be greater than or equal to the number of results/neighbors to return (K in the white paper). Corresponds " + "to L in the DiskANN white paper."; +const char *INPUT_DATA_PATH = "Input data file in bin format. This is the file you want to build the index over. " + "File format: Shape of the vector followed by the vector of embeddings as binary data."; + +// Optional parameters +const char *FILTER_LABEL_DESCRIPTION = + "Filter to use when running a query. 'filter_label' and 'query_filters_file' are mutually exclusive."; +const char *FILTERS_FILE_DESCRIPTION = + "Filter file for Queries for Filtered Search. File format is text with one filter per line. File must " + "have exactly one filter OR the same number of filters as there are queries in the 'query_file'."; +const char *LABEL_TYPE_DESCRIPTION = + "Storage type of Labels {uint/uint32, ushort/uint16}, default value is uint which will consume memory 4 bytes per " + "filter. 'uint' is an alias for 'uint32' and 'ushort' is an alias for 'uint16'."; +const char *GROUND_TRUTH_FILE_DESCRIPTION = + "ground truth file for the queryset"; // what's the format, what's the requirements? does it need to include an + // entry for every item or just a small subset? I have so many questions about + // this file +const char *NUMBER_THREADS_DESCRIPTION = "Number of threads used for building index. Defaults to number of logical " + "processor cores on your this machine returned by omp_get_num_procs()"; +const char *FAIL_IF_RECALL_BELOW = + "Value between 0 (inclusive) and 100 (exclusive) indicating the recall tolerance percentage threshold before " + "program fails with a non-zero exit code. The default value of 0 means that the program will complete " + "successfully with any recall value. A non-zero value indicates the floor for acceptable recall values. If the " + "calculated recall value is below this threshold then the program will write out the results but return a non-zero " + "exit code as a signal that the recall was not acceptable."; // does it continue running or die immediately? Will I + // still get my results even if the return code is -1? + +const char *NUMBER_OF_NODES_TO_CACHE = "Number of BFS nodes around medoid(s) to cache. Default value: 0"; +const char *BEAMWIDTH = "Beamwidth for search. Set 0 to optimize internally. Default value: 2"; +const char *MAX_BUILD_DEGREE = "Maximum graph degree"; +const char *GRAPH_BUILD_COMPLEXITY = + "Size of the search working set during build time. This is the numer of neighbor/distance pairs to keep in memory " + "while building the index. Higher value results in a higher quality graph but it will take more time to build the " + "graph."; +const char *GRAPH_BUILD_ALPHA = "Alpha controls density and diameter of graph, set 1 for sparse graph, 1.2 or 1.4 for " + "denser graphs with lower diameter"; +const char *BUIlD_GRAPH_PQ_BYTES = "Number of PQ bytes to build the index; 0 for full precision build"; +const char *USE_OPQ = "Use Optimized Product Quantization (OPQ)."; +const char *LABEL_FILE = "Input label file in txt format for Filtered Index build. The file should contain comma " + "separated filters for each node with each line corresponding to a graph node"; +const char *UNIVERSAL_LABEL = + "Universal label, Use only in conjunction with label file for filtered index build. If a " + "graph node has all the labels against it, we can assign a special universal filter to the " + "point instead of comma separated filters for that point. The universal label should be assigned to nodes " + "in the labels file instead of listing all labels for a node. DiskANN will not automatically assign a " + "universal label to a node."; +const char *FILTERED_LBUILD = "Build complexity for filtered points, higher value results in better graphs"; + +} // namespace program_options_utils diff --git a/packages/leann-backend-diskann/third_party/DiskANN/include/proto_embedding.h b/packages/leann-backend-diskann/third_party/DiskANN/include/proto_embedding.h new file mode 100644 index 0000000..f17e225 --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/include/proto_embedding.h @@ -0,0 +1,9 @@ +#pragma once + +#include "embedding.pb.h" + +// This header ensures that the protobuf files are included correctly +// and provides a namespace alias for convenience +namespace diskann { + namespace proto = protoembedding; +} diff --git a/packages/leann-backend-diskann/third_party/DiskANN/include/quantized_distance.h b/packages/leann-backend-diskann/third_party/DiskANN/include/quantized_distance.h new file mode 100644 index 0000000..cc4aea9 --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/include/quantized_distance.h @@ -0,0 +1,56 @@ +#pragma once +#include +#include +#include +#include "abstract_scratch.h" + +namespace diskann +{ +template class PQScratch; + +template class QuantizedDistance +{ + public: + QuantizedDistance() = default; + QuantizedDistance(const QuantizedDistance &) = delete; + QuantizedDistance &operator=(const QuantizedDistance &) = delete; + virtual ~QuantizedDistance() = default; + + virtual bool is_opq() const = 0; + virtual std::string get_quantized_vectors_filename(const std::string &prefix) const = 0; + virtual std::string get_pivot_data_filename(const std::string &prefix) const = 0; + virtual std::string get_rotation_matrix_suffix(const std::string &pq_pivots_filename) const = 0; + + // Loading the PQ centroid table need not be part of the abstract class. + // However, we want to indicate that this function will change once we have a + // file reader hierarchy, so leave it here as-is. +#ifdef EXEC_ENV_OLS + virtual void load_pivot_data(MemoryMappedFiles &files, const std::String &pq_table_file, size_t num_chunks) = 0; +#else + virtual void load_pivot_data(const std::string &pq_table_file, size_t num_chunks) = 0; +#endif + + // Number of chunks in the PQ table. Depends on the compression level used. + // Has to be < ndim + virtual uint32_t get_num_chunks() const = 0; + + // Preprocess the query by computing chunk distances from the query vector to + // various centroids. Since we don't want this class to do scratch management, + // we will take a PQScratch object which can come either from Index class or + // PQFlashIndex class. + virtual void preprocess_query(const data_t *query_vec, uint32_t query_dim, PQScratch &pq_scratch) = 0; + + // Workhorse + // This function must be called after preprocess_query + virtual void preprocessed_distance(PQScratch &pq_scratch, const uint32_t id_count, float *dists_out) = 0; + + // Same as above, but convenience function for index.cpp. + virtual void preprocessed_distance(PQScratch &pq_scratch, const uint32_t n_ids, + std::vector &dists_out) = 0; + + // Currently this function is required for DiskPQ. However, it too can be subsumed + // under preprocessed_distance if we add the appropriate scratch variables to + // PQScratch and initialize them in pq_flash_index.cpp::disk_iterate_to_fixed_point() + virtual float brute_force_distance(const float *query_vec, uint8_t *base_vec) = 0; +}; +} // namespace diskann diff --git a/packages/leann-backend-diskann/third_party/DiskANN/include/restapi/common.h b/packages/leann-backend-diskann/third_party/DiskANN/include/restapi/common.h new file mode 100644 index 0000000..b833963 --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/include/restapi/common.h @@ -0,0 +1,18 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT license. + +#pragma once + +#include +#include + +namespace diskann +{ +// Constants +static const std::string VECTOR_KEY = "query", K_KEY = "k", INDICES_KEY = "indices", DISTANCES_KEY = "distances", + TAGS_KEY = "tags", QUERY_ID_KEY = "query_id", ERROR_MESSAGE_KEY = "error", L_KEY = "Ls", + TIME_TAKEN_KEY = "time_taken_in_us", PARTITION_KEY = "partition", + UNKNOWN_ERROR = "unknown_error"; +const unsigned int DEFAULT_L = 100; + +} // namespace diskann \ No newline at end of file diff --git a/packages/leann-backend-diskann/third_party/DiskANN/include/restapi/search_wrapper.h b/packages/leann-backend-diskann/third_party/DiskANN/include/restapi/search_wrapper.h new file mode 100644 index 0000000..ebd067d --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/include/restapi/search_wrapper.h @@ -0,0 +1,140 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT license. + +#pragma once + +#include +#include +#include + +#include +#include + +namespace diskann +{ +class SearchResult +{ + public: + SearchResult(unsigned int K, unsigned int elapsed_time_in_ms, const unsigned *const indices, + const float *const distances, const std::string *const tags = nullptr, + const unsigned *const partitions = nullptr); + + const std::vector &get_indices() const + { + return _indices; + } + const std::vector &get_distances() const + { + return _distances; + } + bool tags_enabled() const + { + return _tags_enabled; + } + const std::vector &get_tags() const + { + return _tags; + } + bool partitions_enabled() const + { + return _partitions_enabled; + } + const std::vector &get_partitions() const + { + return _partitions; + } + unsigned get_time() const + { + return _search_time_in_ms; + } + + private: + unsigned int _K; + unsigned int _search_time_in_ms; + std::vector _indices; + std::vector _distances; + + bool _tags_enabled; + std::vector _tags; + + bool _partitions_enabled; + std::vector _partitions; +}; + +class SearchNotImplementedException : public std::logic_error +{ + private: + std::string _errormsg; + + public: + SearchNotImplementedException(const char *type) : std::logic_error("Not Implemented") + { + _errormsg = "Search with data type "; + _errormsg += std::string(type); + _errormsg += " not implemented : "; + _errormsg += __FUNCTION__; + } + + virtual const char *what() const throw() + { + return _errormsg.c_str(); + } +}; + +class BaseSearch +{ + public: + BaseSearch(const std::string &tagsFile = nullptr); + virtual SearchResult search(const float *query, const unsigned int dimensions, const unsigned int K, + const unsigned int Ls) + { + throw SearchNotImplementedException("float"); + } + virtual SearchResult search(const int8_t *query, const unsigned int dimensions, const unsigned int K, + const unsigned int Ls) + { + throw SearchNotImplementedException("int8_t"); + } + + virtual SearchResult search(const uint8_t *query, const unsigned int dimensions, const unsigned int K, + const unsigned int Ls) + { + throw SearchNotImplementedException("uint8_t"); + } + + void lookup_tags(const unsigned K, const unsigned *indices, std::string *ret_tags); + + protected: + bool _tags_enabled; + std::vector _tags_str; +}; + +template class InMemorySearch : public BaseSearch +{ + public: + InMemorySearch(const std::string &baseFile, const std::string &indexFile, const std::string &tagsFile, Metric m, + uint32_t num_threads, uint32_t search_l); + virtual ~InMemorySearch(); + + SearchResult search(const T *query, const unsigned int dimensions, const unsigned int K, const unsigned int Ls); + + private: + unsigned int _dimensions, _numPoints; + std::unique_ptr> _index; +}; + +template class PQFlashSearch : public BaseSearch +{ + public: + PQFlashSearch(const std::string &indexPrefix, const unsigned num_nodes_to_cache, const unsigned num_threads, + const std::string &tagsFile, Metric m); + virtual ~PQFlashSearch(); + + SearchResult search(const T *query, const unsigned int dimensions, const unsigned int K, const unsigned int Ls); + + private: + unsigned int _dimensions, _numPoints; + std::unique_ptr> _index; + std::shared_ptr reader; +}; +} // namespace diskann diff --git a/packages/leann-backend-diskann/third_party/DiskANN/include/restapi/server.h b/packages/leann-backend-diskann/third_party/DiskANN/include/restapi/server.h new file mode 100644 index 0000000..1d75847 --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/include/restapi/server.h @@ -0,0 +1,45 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT license. + +#pragma once + +#include +#include + +namespace diskann +{ +class Server +{ + public: + Server(web::uri &url, std::vector> &multi_searcher, + const std::string &typestring); + virtual ~Server(); + + pplx::task open(); + pplx::task close(); + + protected: + template void handle_post(web::http::http_request message); + + template + web::json::value toJsonArray(const std::vector &v, std::function valConverter); + web::json::value prepareResponse(const int64_t &queryId, const int k); + + template + void parseJson(const utility::string_t &body, unsigned int &k, int64_t &queryId, T *&queryVector, + unsigned int &dimensions, unsigned &Ls); + + web::json::value idsToJsonArray(const diskann::SearchResult &result); + web::json::value distancesToJsonArray(const diskann::SearchResult &result); + web::json::value tagsToJsonArray(const diskann::SearchResult &result); + web::json::value partitionsToJsonArray(const diskann::SearchResult &result); + + SearchResult aggregate_results(const unsigned K, const std::vector &results); + + private: + bool _isDebug; + std::unique_ptr _listener; + const bool _multi_search; + std::vector> _multi_searcher; +}; +} // namespace diskann diff --git a/packages/leann-backend-diskann/third_party/DiskANN/include/scratch.h b/packages/leann-backend-diskann/third_party/DiskANN/include/scratch.h new file mode 100644 index 0000000..2f43e33 --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/include/scratch.h @@ -0,0 +1,216 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT license. + +#pragma once + +#include + +#include "boost_dynamic_bitset_fwd.h" +// #include "boost/dynamic_bitset.hpp" +#include "tsl/robin_set.h" +#include "tsl/robin_map.h" +#include "tsl/sparse_map.h" + +#include "aligned_file_reader.h" +#include "abstract_scratch.h" +#include "neighbor.h" +#include "defaults.h" +#include "concurrent_queue.h" + +namespace diskann +{ +template class PQScratch; + +// +// AbstractScratch space for in-memory index based search +// +template class InMemQueryScratch : public AbstractScratch +{ + public: + ~InMemQueryScratch(); + InMemQueryScratch(uint32_t search_l, uint32_t indexing_l, uint32_t r, uint32_t maxc, size_t dim, size_t aligned_dim, + size_t alignment_factor, bool init_pq_scratch = false); + void resize_for_new_L(uint32_t new_search_l); + void clear(); + + inline uint32_t get_L() + { + return _L; + } + inline uint32_t get_R() + { + return _R; + } + inline uint32_t get_maxc() + { + return _maxc; + } + inline T *aligned_query() + { + return this->_aligned_query_T; + } + inline PQScratch *pq_scratch() + { + return this->_pq_scratch; + } + inline std::vector &pool() + { + return _pool; + } + inline NeighborPriorityQueue &best_l_nodes() + { + return _best_l_nodes; + } + inline std::vector &occlude_factor() + { + return _occlude_factor; + } + inline tsl::robin_set &inserted_into_pool_rs() + { + return _inserted_into_pool_rs; + } + inline boost::dynamic_bitset<> &inserted_into_pool_bs() + { + return *_inserted_into_pool_bs; + } + inline std::vector &id_scratch() + { + return _id_scratch; + } + inline std::vector &dist_scratch() + { + return _dist_scratch; + } + inline tsl::robin_set &expanded_nodes_set() + { + return _expanded_nodes_set; + } + inline std::vector &expanded_nodes_vec() + { + return _expanded_nghrs_vec; + } + inline std::vector &occlude_list_output() + { + return _occlude_list_output; + } + + private: + uint32_t _L; + uint32_t _R; + uint32_t _maxc; + + // _pool stores all neighbors explored from best_L_nodes. + // Usually around L+R, but could be higher. + // Initialized to 3L+R for some slack, expands as needed. + std::vector _pool; + + // _best_l_nodes is reserved for storing best L entries + // Underlying storage is L+1 to support inserts + NeighborPriorityQueue _best_l_nodes; + + // _occlude_factor.size() >= pool.size() in occlude_list function + // _pool is clipped to maxc in occlude_list before affecting _occlude_factor + // _occlude_factor is initialized to maxc size + std::vector _occlude_factor; + + // Capacity initialized to 20L + tsl::robin_set _inserted_into_pool_rs; + + // Use a pointer here to allow for forward declaration of dynamic_bitset + // in public headers to avoid making boost a dependency for clients + // of DiskANN. + boost::dynamic_bitset<> *_inserted_into_pool_bs; + + // _id_scratch.size() must be > R*GRAPH_SLACK_FACTOR for iterate_to_fp + std::vector _id_scratch; + + // _dist_scratch must be > R*GRAPH_SLACK_FACTOR for iterate_to_fp + // _dist_scratch should be at least the size of id_scratch + std::vector _dist_scratch; + + // Buffers used in process delete, capacity increases as needed + tsl::robin_set _expanded_nodes_set; + std::vector _expanded_nghrs_vec; + std::vector _occlude_list_output; +}; + +// +// AbstractScratch space for SSD index based search +// + +template class SSDQueryScratch : public AbstractScratch +{ + public: + T *coord_scratch = nullptr; // MUST BE AT LEAST [sizeof(T) * data_dim] + + char *sector_scratch = nullptr; // MUST BE AT LEAST [MAX_N_SECTOR_READS * SECTOR_LEN] + size_t sector_idx = 0; // index of next [SECTOR_LEN] scratch to use + + tsl::robin_set visited; + NeighborPriorityQueue retset; + std::vector full_retset; + + SSDQueryScratch(size_t aligned_dim, size_t visited_reserve); + ~SSDQueryScratch(); + + void reset(); +}; + +template class SSDThreadData +{ + public: + SSDQueryScratch scratch; + IOContext ctx; + + SSDThreadData(size_t aligned_dim, size_t visited_reserve); + void clear(); +}; + +// +// Class to avoid the hassle of pushing and popping the query scratch. +// +template class ScratchStoreManager +{ + public: + ScratchStoreManager(ConcurrentQueue &query_scratch) : _scratch_pool(query_scratch) + { + _scratch = query_scratch.pop(); + while (_scratch == nullptr) + { + query_scratch.wait_for_push_notify(); + _scratch = query_scratch.pop(); + } + } + T *scratch_space() + { + return _scratch; + } + + ~ScratchStoreManager() + { + _scratch->clear(); + _scratch_pool.push(_scratch); + _scratch_pool.push_notify_all(); + } + + void destroy() + { + while (!_scratch_pool.empty()) + { + auto scratch = _scratch_pool.pop(); + while (scratch == nullptr) + { + _scratch_pool.wait_for_push_notify(); + scratch = _scratch_pool.pop(); + } + delete scratch; + } + } + + private: + T *_scratch; + ConcurrentQueue &_scratch_pool; + ScratchStoreManager(const ScratchStoreManager &); + ScratchStoreManager &operator=(const ScratchStoreManager &); +}; +} // namespace diskann diff --git a/packages/leann-backend-diskann/third_party/DiskANN/include/simd_utils.h b/packages/leann-backend-diskann/third_party/DiskANN/include/simd_utils.h new file mode 100644 index 0000000..4b07369 --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/include/simd_utils.h @@ -0,0 +1,106 @@ +#pragma once + +#ifdef _WINDOWS +#include +#include +#include +#include +#else +#include +#endif + +namespace diskann +{ +static inline __m256 _mm256_mul_epi8(__m256i X) +{ + __m256i zero = _mm256_setzero_si256(); + + __m256i sign_x = _mm256_cmpgt_epi8(zero, X); + + __m256i xlo = _mm256_unpacklo_epi8(X, sign_x); + __m256i xhi = _mm256_unpackhi_epi8(X, sign_x); + + return _mm256_cvtepi32_ps(_mm256_add_epi32(_mm256_madd_epi16(xlo, xlo), _mm256_madd_epi16(xhi, xhi))); +} + +static inline __m128 _mm_mulhi_epi8(__m128i X) +{ + __m128i zero = _mm_setzero_si128(); + __m128i sign_x = _mm_cmplt_epi8(X, zero); + __m128i xhi = _mm_unpackhi_epi8(X, sign_x); + + return _mm_cvtepi32_ps(_mm_add_epi32(_mm_setzero_si128(), _mm_madd_epi16(xhi, xhi))); +} + +static inline __m128 _mm_mulhi_epi8_shift32(__m128i X) +{ + __m128i zero = _mm_setzero_si128(); + X = _mm_srli_epi64(X, 32); + __m128i sign_x = _mm_cmplt_epi8(X, zero); + __m128i xhi = _mm_unpackhi_epi8(X, sign_x); + + return _mm_cvtepi32_ps(_mm_add_epi32(_mm_setzero_si128(), _mm_madd_epi16(xhi, xhi))); +} +static inline __m128 _mm_mul_epi8(__m128i X, __m128i Y) +{ + __m128i zero = _mm_setzero_si128(); + + __m128i sign_x = _mm_cmplt_epi8(X, zero); + __m128i sign_y = _mm_cmplt_epi8(Y, zero); + + __m128i xlo = _mm_unpacklo_epi8(X, sign_x); + __m128i xhi = _mm_unpackhi_epi8(X, sign_x); + __m128i ylo = _mm_unpacklo_epi8(Y, sign_y); + __m128i yhi = _mm_unpackhi_epi8(Y, sign_y); + + return _mm_cvtepi32_ps(_mm_add_epi32(_mm_madd_epi16(xlo, ylo), _mm_madd_epi16(xhi, yhi))); +} +static inline __m128 _mm_mul_epi8(__m128i X) +{ + __m128i zero = _mm_setzero_si128(); + __m128i sign_x = _mm_cmplt_epi8(X, zero); + __m128i xlo = _mm_unpacklo_epi8(X, sign_x); + __m128i xhi = _mm_unpackhi_epi8(X, sign_x); + + return _mm_cvtepi32_ps(_mm_add_epi32(_mm_madd_epi16(xlo, xlo), _mm_madd_epi16(xhi, xhi))); +} + +static inline __m128 _mm_mul32_pi8(__m128i X, __m128i Y) +{ + __m128i xlo = _mm_cvtepi8_epi16(X), ylo = _mm_cvtepi8_epi16(Y); + return _mm_cvtepi32_ps(_mm_unpacklo_epi32(_mm_madd_epi16(xlo, ylo), _mm_setzero_si128())); +} + +static inline __m256 _mm256_mul_epi8(__m256i X, __m256i Y) +{ + __m256i zero = _mm256_setzero_si256(); + + __m256i sign_x = _mm256_cmpgt_epi8(zero, X); + __m256i sign_y = _mm256_cmpgt_epi8(zero, Y); + + __m256i xlo = _mm256_unpacklo_epi8(X, sign_x); + __m256i xhi = _mm256_unpackhi_epi8(X, sign_x); + __m256i ylo = _mm256_unpacklo_epi8(Y, sign_y); + __m256i yhi = _mm256_unpackhi_epi8(Y, sign_y); + + return _mm256_cvtepi32_ps(_mm256_add_epi32(_mm256_madd_epi16(xlo, ylo), _mm256_madd_epi16(xhi, yhi))); +} + +static inline __m256 _mm256_mul32_pi8(__m128i X, __m128i Y) +{ + __m256i xlo = _mm256_cvtepi8_epi16(X), ylo = _mm256_cvtepi8_epi16(Y); + return _mm256_blend_ps(_mm256_cvtepi32_ps(_mm256_madd_epi16(xlo, ylo)), _mm256_setzero_ps(), 252); +} + +static inline float _mm256_reduce_add_ps(__m256 x) +{ + /* ( x3+x7, x2+x6, x1+x5, x0+x4 ) */ + const __m128 x128 = _mm_add_ps(_mm256_extractf128_ps(x, 1), _mm256_castps256_ps128(x)); + /* ( -, -, x1+x3+x5+x7, x0+x2+x4+x6 ) */ + const __m128 x64 = _mm_add_ps(x128, _mm_movehl_ps(x128, x128)); + /* ( -, -, -, x0+x1+x2+x3+x4+x5+x6+x7 ) */ + const __m128 x32 = _mm_add_ss(x64, _mm_shuffle_ps(x64, x64, 0x55)); + /* Conversion to float is a no-op on x86-64 */ + return _mm_cvtss_f32(x32); +} +} // namespace diskann diff --git a/packages/leann-backend-diskann/third_party/DiskANN/include/tag_uint128.h b/packages/leann-backend-diskann/third_party/DiskANN/include/tag_uint128.h new file mode 100644 index 0000000..642de31 --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/include/tag_uint128.h @@ -0,0 +1,68 @@ +#pragma once +#include +#include + +namespace diskann +{ +#pragma pack(push, 1) + +struct tag_uint128 +{ + std::uint64_t _data1 = 0; + std::uint64_t _data2 = 0; + + bool operator==(const tag_uint128 &other) const + { + return _data1 == other._data1 && _data2 == other._data2; + } + + bool operator==(std::uint64_t other) const + { + return _data1 == other && _data2 == 0; + } + + tag_uint128 &operator=(const tag_uint128 &other) + { + _data1 = other._data1; + _data2 = other._data2; + + return *this; + } + + tag_uint128 &operator=(std::uint64_t other) + { + _data1 = other; + _data2 = 0; + + return *this; + } +}; + +#pragma pack(pop) +} // namespace diskann + +namespace std +{ +// Hash 128 input bits down to 64 bits of output. +// This is intended to be a reasonably good hash function. +inline std::uint64_t Hash128to64(const std::uint64_t &low, const std::uint64_t &high) +{ + // Murmur-inspired hashing. + const std::uint64_t kMul = 0x9ddfea08eb382d69ULL; + std::uint64_t a = (low ^ high) * kMul; + a ^= (a >> 47); + std::uint64_t b = (high ^ a) * kMul; + b ^= (b >> 47); + b *= kMul; + return b; +} + +template <> struct hash +{ + size_t operator()(const diskann::tag_uint128 &key) const noexcept + { + return Hash128to64(key._data1, key._data2); // map -0 to 0 + } +}; + +} // namespace std \ No newline at end of file diff --git a/packages/leann-backend-diskann/third_party/DiskANN/include/timer.h b/packages/leann-backend-diskann/third_party/DiskANN/include/timer.h new file mode 100644 index 0000000..325edf3 --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/include/timer.h @@ -0,0 +1,40 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT license. +#pragma once + +#include + +namespace diskann +{ +class Timer +{ + typedef std::chrono::high_resolution_clock _clock; + std::chrono::time_point<_clock> check_point; + + public: + Timer() : check_point(_clock::now()) + { + } + + void reset() + { + check_point = _clock::now(); + } + + long long elapsed() const + { + return std::chrono::duration_cast(_clock::now() - check_point).count(); + } + + float elapsed_seconds() const + { + return (float)elapsed() / 1000000.0f; + } + + std::string elapsed_seconds_for_step(const std::string &step) const + { + return std::string("Time for ") + step + std::string(": ") + std::to_string(elapsed_seconds()) + + std::string(" seconds"); + } +}; +} // namespace diskann diff --git a/packages/leann-backend-diskann/third_party/DiskANN/include/tsl/.clang-format b/packages/leann-backend-diskann/third_party/DiskANN/include/tsl/.clang-format new file mode 100644 index 0000000..9d15924 --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/include/tsl/.clang-format @@ -0,0 +1,2 @@ +DisableFormat: true +SortIncludes: false diff --git a/packages/leann-backend-diskann/third_party/DiskANN/include/tsl/robin_growth_policy.h b/packages/leann-backend-diskann/third_party/DiskANN/include/tsl/robin_growth_policy.h new file mode 100644 index 0000000..6bfa9e5 --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/include/tsl/robin_growth_policy.h @@ -0,0 +1,330 @@ +/** + * MIT License + * + * Copyright (c) 2017 Tessil + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef TSL_ROBIN_GROWTH_POLICY_H +#define TSL_ROBIN_GROWTH_POLICY_H + + +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +#ifndef tsl_assert +# ifdef TSL_DEBUG +# define tsl_assert(expr) assert(expr) +# else +# define tsl_assert(expr) (static_cast(0)) +# endif +#endif + + +/** + * If exceptions are enabled, throw the exception passed in parameter, otherwise call std::terminate. + */ +#ifndef TSL_THROW_OR_TERMINATE +# if (defined(__cpp_exceptions) || defined(__EXCEPTIONS) || (defined (_MSC_VER) && defined (_CPPUNWIND))) && !defined(TSL_NO_EXCEPTIONS) +# define TSL_THROW_OR_TERMINATE(ex, msg) throw ex(msg) +# else +# ifdef NDEBUG +# define TSL_THROW_OR_TERMINATE(ex, msg) std::terminate() +# else +# include +# define TSL_THROW_OR_TERMINATE(ex, msg) do { std::fprintf(stderr, msg); std::terminate(); } while(0) +# endif +# endif +#endif + + +#ifndef TSL_LIKELY +# if defined(__GNUC__) || defined(__clang__) +# define TSL_LIKELY(exp) (__builtin_expect(!!(exp), true)) +# else +# define TSL_LIKELY(exp) (exp) +# endif +#endif + + +namespace tsl { +namespace rh { + +/** + * Grow the hash table by a factor of GrowthFactor keeping the bucket count to a power of two. It allows + * the table to use a mask operation instead of a modulo operation to map a hash to a bucket. + * + * GrowthFactor must be a power of two >= 2. + */ +template +class power_of_two_growth_policy { +public: + /** + * Called on the hash table creation and on rehash. The number of buckets for the table is passed in parameter. + * This number is a minimum, the policy may update this value with a higher value if needed (but not lower). + * + * If 0 is given, min_bucket_count_in_out must still be 0 after the policy creation and + * bucket_for_hash must always return 0 in this case. + */ + explicit power_of_two_growth_policy(std::size_t& min_bucket_count_in_out) { + if(min_bucket_count_in_out > max_bucket_count()) { + TSL_THROW_OR_TERMINATE(std::length_error, "The hash table exceeds its maxmimum size."); + } + + if(min_bucket_count_in_out > 0) { + min_bucket_count_in_out = round_up_to_power_of_two(min_bucket_count_in_out); + m_mask = min_bucket_count_in_out - 1; + } + else { + m_mask = 0; + } + } + + /** + * Return the bucket [0, bucket_count()) to which the hash belongs. + * If bucket_count() is 0, it must always return 0. + */ + std::size_t bucket_for_hash(std::size_t hash) const noexcept { + return hash & m_mask; + } + + /** + * Return the number of buckets that should be used on next growth. + */ + std::size_t next_bucket_count() const { + if((m_mask + 1) > max_bucket_count() / GrowthFactor) { + TSL_THROW_OR_TERMINATE(std::length_error, "The hash table exceeds its maxmimum size."); + } + + return (m_mask + 1) * GrowthFactor; + } + + /** + * Return the maximum number of buckets supported by the policy. + */ + std::size_t max_bucket_count() const { + // Largest power of two. + return ((std::numeric_limits::max)() / 2) + 1; + } + + /** + * Reset the growth policy as if it was created with a bucket count of 0. + * After a clear, the policy must always return 0 when bucket_for_hash is called. + */ + void clear() noexcept { + m_mask = 0; + } + +private: + static std::size_t round_up_to_power_of_two(std::size_t value) { + if(is_power_of_two(value)) { + return value; + } + + if(value == 0) { + return 1; + } + + --value; + for(std::size_t i = 1; i < sizeof(std::size_t) * CHAR_BIT; i *= 2) { + value |= value >> i; + } + + return value + 1; + } + + static constexpr bool is_power_of_two(std::size_t value) { + return value != 0 && (value & (value - 1)) == 0; + } + +protected: + static_assert(is_power_of_two(GrowthFactor) && GrowthFactor >= 2, "GrowthFactor must be a power of two >= 2."); + + std::size_t m_mask; +}; + + +/** + * Grow the hash table by GrowthFactor::num / GrowthFactor::den and use a modulo to map a hash + * to a bucket. Slower but it can be useful if you want a slower growth. + */ +template> +class mod_growth_policy { +public: + explicit mod_growth_policy(std::size_t& min_bucket_count_in_out) { + if(min_bucket_count_in_out > max_bucket_count()) { + TSL_THROW_OR_TERMINATE(std::length_error, "The hash table exceeds its maxmimum size."); + } + + if(min_bucket_count_in_out > 0) { + m_mod = min_bucket_count_in_out; + } + else { + m_mod = 1; + } + } + + std::size_t bucket_for_hash(std::size_t hash) const noexcept { + return hash % m_mod; + } + + std::size_t next_bucket_count() const { + if(m_mod == max_bucket_count()) { + TSL_THROW_OR_TERMINATE(std::length_error, "The hash table exceeds its maxmimum size."); + } + + const double next_bucket_count = std::ceil(double(m_mod) * REHASH_SIZE_MULTIPLICATION_FACTOR); + if(!std::isnormal(next_bucket_count)) { + TSL_THROW_OR_TERMINATE(std::length_error, "The hash table exceeds its maxmimum size."); + } + + if(next_bucket_count > double(max_bucket_count())) { + return max_bucket_count(); + } + else { + return std::size_t(next_bucket_count); + } + } + + std::size_t max_bucket_count() const { + return MAX_BUCKET_COUNT; + } + + void clear() noexcept { + m_mod = 1; + } + +private: + static constexpr double REHASH_SIZE_MULTIPLICATION_FACTOR = 1.0 * GrowthFactor::num / GrowthFactor::den; + static const std::size_t MAX_BUCKET_COUNT = + std::size_t(double( + (std::numeric_limits::max)() / REHASH_SIZE_MULTIPLICATION_FACTOR + )); + + static_assert(REHASH_SIZE_MULTIPLICATION_FACTOR >= 1.1, "Growth factor should be >= 1.1."); + + std::size_t m_mod; +}; + + + +namespace detail { + +static constexpr const std::array PRIMES = {{ + 1ul, 5ul, 17ul, 29ul, 37ul, 53ul, 67ul, 79ul, 97ul, 131ul, 193ul, 257ul, 389ul, 521ul, 769ul, 1031ul, + 1543ul, 2053ul, 3079ul, 6151ul, 12289ul, 24593ul, 49157ul, 98317ul, 196613ul, 393241ul, 786433ul, + 1572869ul, 3145739ul, 6291469ul, 12582917ul, 25165843ul, 50331653ul, 100663319ul, 201326611ul, + 402653189ul, 805306457ul, 1610612741ul, 3221225473ul, 4294967291ul +}}; + +template +static constexpr std::size_t mod(std::size_t hash) { return hash % PRIMES[IPrime]; } + +// MOD_PRIME[iprime](hash) returns hash % PRIMES[iprime]. This table allows for faster modulo as the +// compiler can optimize the modulo code better with a constant known at the compilation. +static constexpr const std::array MOD_PRIME = {{ + &mod<0>, &mod<1>, &mod<2>, &mod<3>, &mod<4>, &mod<5>, &mod<6>, &mod<7>, &mod<8>, &mod<9>, &mod<10>, + &mod<11>, &mod<12>, &mod<13>, &mod<14>, &mod<15>, &mod<16>, &mod<17>, &mod<18>, &mod<19>, &mod<20>, + &mod<21>, &mod<22>, &mod<23>, &mod<24>, &mod<25>, &mod<26>, &mod<27>, &mod<28>, &mod<29>, &mod<30>, + &mod<31>, &mod<32>, &mod<33>, &mod<34>, &mod<35>, &mod<36>, &mod<37> , &mod<38>, &mod<39> +}}; + +} + +/** + * Grow the hash table by using prime numbers as bucket count. Slower than tsl::rh::power_of_two_growth_policy in + * general but will probably distribute the values around better in the buckets with a poor hash function. + * + * To allow the compiler to optimize the modulo operation, a lookup table is used with constant primes numbers. + * + * With a switch the code would look like: + * \code + * switch(iprime) { // iprime is the current prime of the hash table + * case 0: hash % 5ul; + * break; + * case 1: hash % 17ul; + * break; + * case 2: hash % 29ul; + * break; + * ... + * } + * \endcode + * + * Due to the constant variable in the modulo the compiler is able to optimize the operation + * by a series of multiplications, substractions and shifts. + * + * The 'hash % 5' could become something like 'hash - (hash * 0xCCCCCCCD) >> 34) * 5' in a 64 bits environement. + */ +class prime_growth_policy { +public: + explicit prime_growth_policy(std::size_t& min_bucket_count_in_out) { + auto it_prime = std::lower_bound(detail::PRIMES.begin(), + detail::PRIMES.end(), min_bucket_count_in_out); + if(it_prime == detail::PRIMES.end()) { + TSL_THROW_OR_TERMINATE(std::length_error, "The hash table exceeds its maxmimum size."); + } + + m_iprime = static_cast(std::distance(detail::PRIMES.begin(), it_prime)); + if(min_bucket_count_in_out > 0) { + min_bucket_count_in_out = *it_prime; + } + else { + min_bucket_count_in_out = 0; + } + } + + std::size_t bucket_for_hash(std::size_t hash) const noexcept { + return detail::MOD_PRIME[m_iprime](hash); + } + + std::size_t next_bucket_count() const { + if(m_iprime + 1 >= detail::PRIMES.size()) { + TSL_THROW_OR_TERMINATE(std::length_error, "The hash table exceeds its maxmimum size."); + } + + return detail::PRIMES[m_iprime + 1]; + } + + std::size_t max_bucket_count() const { + return detail::PRIMES.back(); + } + + void clear() noexcept { + m_iprime = 0; + } + +private: + unsigned int m_iprime; + + static_assert((std::numeric_limits::max)() >= detail::PRIMES.size(), + "The type of m_iprime is not big enough."); +}; + +} +} + +#endif diff --git a/packages/leann-backend-diskann/third_party/DiskANN/include/tsl/robin_hash.h b/packages/leann-backend-diskann/third_party/DiskANN/include/tsl/robin_hash.h new file mode 100644 index 0000000..5ecc962 --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/include/tsl/robin_hash.h @@ -0,0 +1,1285 @@ +/** + * MIT License + * + * Copyright (c) 2017 Tessil + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef TSL_ROBIN_HASH_H +#define TSL_ROBIN_HASH_H + + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "robin_growth_policy.h" + + +namespace tsl { + +namespace detail_robin_hash { + +template +struct make_void { + using type = void; +}; + +template +struct has_is_transparent: std::false_type { +}; + +template +struct has_is_transparent::type>: std::true_type { +}; + +template +struct is_power_of_two_policy: std::false_type { +}; + +template +struct is_power_of_two_policy>: std::true_type { +}; + + + +using truncated_hash_type = std::uint_least32_t; + +/** + * Helper class that store a truncated hash if StoreHash is true and nothing otherwise. + */ +template +class bucket_entry_hash { +public: + bool bucket_hash_equal(std::size_t /*hash*/) const noexcept { + return true; + } + + truncated_hash_type truncated_hash() const noexcept { + return 0; + } + +protected: + void set_hash(truncated_hash_type /*hash*/) noexcept { + } +}; + +template<> +class bucket_entry_hash { +public: + bool bucket_hash_equal(std::size_t hash) const noexcept { + return m_hash == truncated_hash_type(hash); + } + + truncated_hash_type truncated_hash() const noexcept { + return m_hash; + } + +protected: + void set_hash(truncated_hash_type hash) noexcept { + m_hash = truncated_hash_type(hash); + } + +private: + truncated_hash_type m_hash; +}; + + +/** + * Each bucket entry has: + * - A value of type `ValueType`. + * - An integer to store how far the value of the bucket, if any, is from its ideal bucket + * (ex: if the current bucket 5 has the value 'foo' and `hash('foo') % nb_buckets` == 3, + * `dist_from_ideal_bucket()` will return 2 as the current value of the bucket is two + * buckets away from its ideal bucket) + * If there is no value in the bucket (i.e. `empty()` is true) `dist_from_ideal_bucket()` will be < 0. + * - A marker which tells us if the bucket is the last bucket of the bucket array (useful for the + * iterator of the hash table). + * - If `StoreHash` is true, 32 bits of the hash of the value, if any, are also stored in the bucket. + * If the size of the hash is more than 32 bits, it is truncated. We don't store the full hash + * as storing the hash is a potential opportunity to use the unused space due to the alignement + * of the bucket_entry structure. We can thus potentially store the hash without any extra space + * (which would not be possible with 64 bits of the hash). + */ +template +class bucket_entry: public bucket_entry_hash { + using bucket_hash = bucket_entry_hash; + +public: + using value_type = ValueType; + using distance_type = std::int_least16_t; + + + bucket_entry() noexcept: bucket_hash(), m_dist_from_ideal_bucket(EMPTY_MARKER_DIST_FROM_IDEAL_BUCKET), + m_last_bucket(false) + { + tsl_assert(empty()); + } + + bucket_entry(bool last_bucket) noexcept: bucket_hash(), m_dist_from_ideal_bucket(EMPTY_MARKER_DIST_FROM_IDEAL_BUCKET), + m_last_bucket(last_bucket) + { + tsl_assert(empty()); + } + + bucket_entry(const bucket_entry& other) noexcept(std::is_nothrow_copy_constructible::value): + bucket_hash(other), + m_dist_from_ideal_bucket(EMPTY_MARKER_DIST_FROM_IDEAL_BUCKET), + m_last_bucket(other.m_last_bucket) + { + if(!other.empty()) { + ::new (static_cast(std::addressof(m_value))) value_type(other.value()); + m_dist_from_ideal_bucket = other.m_dist_from_ideal_bucket; + } + } + + /** + * Never really used, but still necessary as we must call resize on an empty `std::vector`. + * and we need to support move-only types. See robin_hash constructor for details. + */ + bucket_entry(bucket_entry&& other) noexcept(std::is_nothrow_move_constructible::value): + bucket_hash(std::move(other)), + m_dist_from_ideal_bucket(EMPTY_MARKER_DIST_FROM_IDEAL_BUCKET), + m_last_bucket(other.m_last_bucket) + { + if(!other.empty()) { + ::new (static_cast(std::addressof(m_value))) value_type(std::move(other.value())); + m_dist_from_ideal_bucket = other.m_dist_from_ideal_bucket; + } + } + + bucket_entry& operator=(const bucket_entry& other) + noexcept(std::is_nothrow_copy_constructible::value) + { + if(this != &other) { + clear(); + + bucket_hash::operator=(other); + if(!other.empty()) { + ::new (static_cast(std::addressof(m_value))) value_type(other.value()); + } + + m_dist_from_ideal_bucket = other.m_dist_from_ideal_bucket; + m_last_bucket = other.m_last_bucket; + } + + return *this; + } + + bucket_entry& operator=(bucket_entry&& ) = delete; + + ~bucket_entry() noexcept { + clear(); + } + + void clear() noexcept { + if(!empty()) { + destroy_value(); + m_dist_from_ideal_bucket = EMPTY_MARKER_DIST_FROM_IDEAL_BUCKET; + } + } + + bool empty() const noexcept { + return m_dist_from_ideal_bucket == EMPTY_MARKER_DIST_FROM_IDEAL_BUCKET; + } + + value_type& value() noexcept { + tsl_assert(!empty()); + return *reinterpret_cast(std::addressof(m_value)); + } + + const value_type& value() const noexcept { + tsl_assert(!empty()); + return *reinterpret_cast(std::addressof(m_value)); + } + + distance_type dist_from_ideal_bucket() const noexcept { + return m_dist_from_ideal_bucket; + } + + bool last_bucket() const noexcept { + return m_last_bucket; + } + + void set_as_last_bucket() noexcept { + m_last_bucket = true; + } + + template + void set_value_of_empty_bucket(distance_type dist_from_ideal_bucket, + truncated_hash_type hash, Args&&... value_type_args) + { + tsl_assert(dist_from_ideal_bucket >= 0); + tsl_assert(empty()); + + ::new (static_cast(std::addressof(m_value))) value_type(std::forward(value_type_args)...); + this->set_hash(hash); + m_dist_from_ideal_bucket = dist_from_ideal_bucket; + + tsl_assert(!empty()); + } + + void swap_with_value_in_bucket(distance_type& dist_from_ideal_bucket, + truncated_hash_type& hash, value_type& value) + { + tsl_assert(!empty()); + + using std::swap; + swap(value, this->value()); + swap(dist_from_ideal_bucket, m_dist_from_ideal_bucket); + + // Avoid warning of unused variable if StoreHash is false + (void) hash; + if(StoreHash) { + const truncated_hash_type tmp_hash = this->truncated_hash(); + this->set_hash(hash); + hash = tmp_hash; + } + } + + static truncated_hash_type truncate_hash(std::size_t hash) noexcept { + return truncated_hash_type(hash); + } + +private: + void destroy_value() noexcept { + tsl_assert(!empty()); + value().~value_type(); + } + +private: + using storage = typename std::aligned_storage::type; + + static const distance_type EMPTY_MARKER_DIST_FROM_IDEAL_BUCKET = -1; + + distance_type m_dist_from_ideal_bucket; + bool m_last_bucket; + storage m_value; +}; + + + +/** + * Internal common class used by `robin_map` and `robin_set`. + * + * ValueType is what will be stored by `robin_hash` (usually `std::pair` for map and `Key` for set). + * + * `KeySelect` should be a `FunctionObject` which takes a `ValueType` in parameter and returns a + * reference to the key. + * + * `ValueSelect` should be a `FunctionObject` which takes a `ValueType` in parameter and returns a + * reference to the value. `ValueSelect` should be void if there is no value (in a set for example). + * + * The strong exception guarantee only holds if the expression + * `std::is_nothrow_swappable::value && std::is_nothrow_move_constructible::value` is true. + * + * Behaviour is undefined if the destructor of `ValueType` throws. + */ +template +class robin_hash: private Hash, private KeyEqual, private GrowthPolicy { +private: + template + using has_mapped_type = typename std::integral_constant::value>; + + static_assert(noexcept(std::declval().bucket_for_hash(std::size_t(0))), "GrowthPolicy::bucket_for_hash must be noexcept."); + static_assert(noexcept(std::declval().clear()), "GrowthPolicy::clear must be noexcept."); + +public: + template + class robin_iterator; + + using key_type = typename KeySelect::key_type; + using value_type = ValueType; + using size_type = std::size_t; + using difference_type = std::ptrdiff_t; + using hasher = Hash; + using key_equal = KeyEqual; + using allocator_type = Allocator; + using reference = value_type&; + using const_reference = const value_type&; + using pointer = value_type*; + using const_pointer = const value_type*; + using iterator = robin_iterator; + using const_iterator = robin_iterator; + + +private: + /** + * Either store the hash because we are asked by the `StoreHash` template parameter + * or store the hash because it doesn't cost us anything in size and can be used to speed up rehash. + */ + static constexpr bool STORE_HASH = StoreHash || + ( + (sizeof(tsl::detail_robin_hash::bucket_entry) == + sizeof(tsl::detail_robin_hash::bucket_entry)) + && + (sizeof(std::size_t) == sizeof(truncated_hash_type) || + is_power_of_two_policy::value) + && + // Don't store the hash for primitive types with default hash. + (!std::is_arithmetic::value || + !std::is_same>::value) + ); + + /** + * Only use the stored hash on lookup if we are explictly asked. We are not sure how slow + * the KeyEqual operation is. An extra comparison may slow things down with a fast KeyEqual. + */ + static constexpr bool USE_STORED_HASH_ON_LOOKUP = StoreHash; + + /** + * We can only use the hash on rehash if the size of the hash type is the same as the stored one or + * if we use a power of two modulo. In the case of the power of two modulo, we just mask + * the least significant bytes, we just have to check that the truncated_hash_type didn't truncated + * more bytes. + */ + static bool USE_STORED_HASH_ON_REHASH(size_type bucket_count) { + (void) bucket_count; + if(STORE_HASH && sizeof(std::size_t) == sizeof(truncated_hash_type)) { + return true; + } + else if(STORE_HASH && is_power_of_two_policy::value) { + tsl_assert(bucket_count > 0); + return (bucket_count - 1) <= (std::numeric_limits::max)(); + } + else { + return false; + } + } + + using bucket_entry = tsl::detail_robin_hash::bucket_entry; + using distance_type = typename bucket_entry::distance_type; + + using buckets_allocator = typename std::allocator_traits::template rebind_alloc; + using buckets_container_type = std::vector; + + +public: + /** + * The 'operator*()' and 'operator->()' methods return a const reference and const pointer respectively to the + * stored value type. + * + * In case of a map, to get a mutable reference to the value associated to a key (the '.second' in the + * stored pair), you have to call 'value()'. + * + * The main reason for this is that if we returned a `std::pair&` instead + * of a `const std::pair&`, the user may modify the key which will put the map in a undefined state. + */ + template + class robin_iterator { + friend class robin_hash; + + private: + using iterator_bucket = typename std::conditional::type; + + + robin_iterator(iterator_bucket it) noexcept: m_iterator(it) { + } + + public: + using iterator_category = std::forward_iterator_tag; + using value_type = const typename robin_hash::value_type; + using difference_type = std::ptrdiff_t; + using reference = value_type&; + using pointer = value_type*; + + + robin_iterator() noexcept { + } + + robin_iterator(const robin_iterator& other) noexcept: m_iterator(other.m_iterator) { + } + + const typename robin_hash::key_type& key() const { + return KeySelect()(m_iterator->value()); + } + + template::value && IsConst>::type* = nullptr> + const typename U::value_type& value() const { + return U()(m_iterator->value()); + } + + template::value && !IsConst>::type* = nullptr> + typename U::value_type& value() { + return U()(m_iterator->value()); + } + + reference operator*() const { + return m_iterator->value(); + } + + pointer operator->() const { + return std::addressof(m_iterator->value()); + } + + robin_iterator& operator++() { + while(true) { + if(m_iterator->last_bucket()) { + ++m_iterator; + return *this; + } + + ++m_iterator; + if(!m_iterator->empty()) { + return *this; + } + } + } + + robin_iterator operator++(int) { + robin_iterator tmp(*this); + ++*this; + + return tmp; + } + + friend bool operator==(const robin_iterator& lhs, const robin_iterator& rhs) { + return lhs.m_iterator == rhs.m_iterator; + } + + friend bool operator!=(const robin_iterator& lhs, const robin_iterator& rhs) { + return !(lhs == rhs); + } + + private: + iterator_bucket m_iterator; + }; + + +public: + robin_hash(size_type bucket_count, + const Hash& hash, + const KeyEqual& equal, + const Allocator& alloc, + float max_load_factor): Hash(hash), + KeyEqual(equal), + GrowthPolicy(bucket_count), + m_buckets(alloc), + m_first_or_empty_bucket(static_empty_bucket_ptr()), + m_bucket_count(bucket_count), + m_nb_elements(0), + m_grow_on_next_insert(false) + { + if(bucket_count > max_bucket_count()) { + TSL_THROW_OR_TERMINATE(std::length_error, "The map exceeds its maxmimum size."); + } + + if(m_bucket_count > 0) { + /* + * We can't use the `vector(size_type count, const Allocator& alloc)` constructor + * as it's only available in C++14 and we need to support C++11. We thus must resize after using + * the `vector(const Allocator& alloc)` constructor. + * + * We can't use `vector(size_type count, const T& value, const Allocator& alloc)` as it requires the + * value T to be copyable. + */ + m_buckets.resize(m_bucket_count); + m_first_or_empty_bucket = m_buckets.data(); + + tsl_assert(!m_buckets.empty()); + m_buckets.back().set_as_last_bucket(); + } + + + this->max_load_factor(max_load_factor); + } + + robin_hash(const robin_hash& other): Hash(other), + KeyEqual(other), + GrowthPolicy(other), + m_buckets(other.m_buckets), + m_first_or_empty_bucket(m_buckets.empty()?static_empty_bucket_ptr():m_buckets.data()), + m_bucket_count(other.m_bucket_count), + m_nb_elements(other.m_nb_elements), + m_load_threshold(other.m_load_threshold), + m_max_load_factor(other.m_max_load_factor), + m_grow_on_next_insert(other.m_grow_on_next_insert) + { + } + + robin_hash(robin_hash&& other) noexcept(std::is_nothrow_move_constructible::value && + std::is_nothrow_move_constructible::value && + std::is_nothrow_move_constructible::value && + std::is_nothrow_move_constructible::value) + : Hash(std::move(static_cast(other))), + KeyEqual(std::move(static_cast(other))), + GrowthPolicy(std::move(static_cast(other))), + m_buckets(std::move(other.m_buckets)), + m_first_or_empty_bucket(m_buckets.empty()?static_empty_bucket_ptr():m_buckets.data()), + m_bucket_count(other.m_bucket_count), + m_nb_elements(other.m_nb_elements), + m_load_threshold(other.m_load_threshold), + m_max_load_factor(other.m_max_load_factor), + m_grow_on_next_insert(other.m_grow_on_next_insert) + { + other.GrowthPolicy::clear(); + other.m_buckets.clear(); + other.m_first_or_empty_bucket = static_empty_bucket_ptr(); + other.m_bucket_count = 0; + other.m_nb_elements = 0; + other.m_load_threshold = 0; + other.m_grow_on_next_insert = false; + } + + robin_hash& operator=(const robin_hash& other) { + if(&other != this) { + Hash::operator=(other); + KeyEqual::operator=(other); + GrowthPolicy::operator=(other); + + m_buckets = other.m_buckets; + m_first_or_empty_bucket = m_buckets.empty()?static_empty_bucket_ptr(): + m_buckets.data(); + m_bucket_count = other.m_bucket_count; + m_nb_elements = other.m_nb_elements; + m_load_threshold = other.m_load_threshold; + m_max_load_factor = other.m_max_load_factor; + m_grow_on_next_insert = other.m_grow_on_next_insert; + } + + return *this; + } + + robin_hash& operator=(robin_hash&& other) { + other.swap(*this); + other.clear(); + + return *this; + } + + allocator_type get_allocator() const { + return m_buckets.get_allocator(); + } + + + /* + * Iterators + */ + iterator begin() noexcept { + auto begin = m_buckets.begin(); + while(begin != m_buckets.end() && begin->empty()) { + ++begin; + } + + return iterator(begin); + } + + const_iterator begin() const noexcept { + return cbegin(); + } + + const_iterator cbegin() const noexcept { + auto begin = m_buckets.cbegin(); + while(begin != m_buckets.cend() && begin->empty()) { + ++begin; + } + + return const_iterator(begin); + } + + iterator end() noexcept { + return iterator(m_buckets.end()); + } + + const_iterator end() const noexcept { + return cend(); + } + + const_iterator cend() const noexcept { + return const_iterator(m_buckets.cend()); + } + + + /* + * Capacity + */ + bool empty() const noexcept { + return m_nb_elements == 0; + } + + size_type size() const noexcept { + return m_nb_elements; + } + + size_type max_size() const noexcept { + return m_buckets.max_size(); + } + + /* + * Modifiers + */ + void clear() noexcept { + for(auto& bucket: m_buckets) { + bucket.clear(); + } + + m_nb_elements = 0; + m_grow_on_next_insert = false; + } + + + + template + std::pair insert(P&& value) { + return insert_impl(KeySelect()(value), std::forward

(value)); + } + + template + iterator insert(const_iterator hint, P&& value) { + if(hint != cend() && compare_keys(KeySelect()(*hint), KeySelect()(value))) { + return mutable_iterator(hint); + } + + return insert(std::forward

(value)).first; + } + + template + void insert(InputIt first, InputIt last) { + if(std::is_base_of::iterator_category>::value) + { + const auto nb_elements_insert = std::distance(first, last); + const size_type nb_free_buckets = m_load_threshold - size(); + tsl_assert(m_load_threshold >= size()); + + if(nb_elements_insert > 0 && nb_free_buckets < size_type(nb_elements_insert)) { + reserve(size() + size_type(nb_elements_insert)); + } + } + + for(; first != last; ++first) { + insert(*first); + } + } + + + + template + std::pair insert_or_assign(K&& key, M&& obj) { + auto it = try_emplace(std::forward(key), std::forward(obj)); + if(!it.second) { + it.first.value() = std::forward(obj); + } + + return it; + } + + template + iterator insert_or_assign(const_iterator hint, K&& key, M&& obj) { + if(hint != cend() && compare_keys(KeySelect()(*hint), key)) { + auto it = mutable_iterator(hint); + it.value() = std::forward(obj); + + return it; + } + + return insert_or_assign(std::forward(key), std::forward(obj)).first; + } + + + template + std::pair emplace(Args&&... args) { + return insert(value_type(std::forward(args)...)); + } + + template + iterator emplace_hint(const_iterator hint, Args&&... args) { + return insert(hint, value_type(std::forward(args)...)); + } + + + + template + std::pair try_emplace(K&& key, Args&&... args) { + return insert_impl(key, std::piecewise_construct, + std::forward_as_tuple(std::forward(key)), + std::forward_as_tuple(std::forward(args)...)); + } + + template + iterator try_emplace(const_iterator hint, K&& key, Args&&... args) { + if(hint != cend() && compare_keys(KeySelect()(*hint), key)) { + return mutable_iterator(hint); + } + + return try_emplace(std::forward(key), std::forward(args)...).first; + } + + /** + * Here to avoid `template size_type erase(const K& key)` being used when + * we use an `iterator` instead of a `const_iterator`. + */ + iterator erase(iterator pos) { + erase_from_bucket(pos); + + /** + * Erase bucket used a backward shift after clearing the bucket. + * Check if there is a new value in the bucket, if not get the next non-empty. + */ + if(pos.m_iterator->empty()) { + ++pos; + } + + return pos; + } + + iterator erase(const_iterator pos) { + return erase(mutable_iterator(pos)); + } + + iterator erase(const_iterator first, const_iterator last) { + if(first == last) { + return mutable_iterator(first); + } + + auto first_mutable = mutable_iterator(first); + auto last_mutable = mutable_iterator(last); + for(auto it = first_mutable.m_iterator; it != last_mutable.m_iterator; ++it) { + if(!it->empty()) { + it->clear(); + m_nb_elements--; + } + } + + if(last_mutable == end()) { + return end(); + } + + + /* + * Backward shift on the values which come after the deleted values. + * We try to move the values closer to their ideal bucket. + */ + std::size_t icloser_bucket = std::size_t(std::distance(m_buckets.begin(), first_mutable.m_iterator)); + std::size_t ito_move_closer_value = std::size_t(std::distance(m_buckets.begin(), last_mutable.m_iterator)); + tsl_assert(ito_move_closer_value > icloser_bucket); + + const std::size_t ireturn_bucket = ito_move_closer_value - + (std::min)(ito_move_closer_value - icloser_bucket, + std::size_t(m_buckets[ito_move_closer_value].dist_from_ideal_bucket())); + + while(ito_move_closer_value < m_buckets.size() && m_buckets[ito_move_closer_value].dist_from_ideal_bucket() > 0) { + icloser_bucket = ito_move_closer_value - + (std::min)(ito_move_closer_value - icloser_bucket, + std::size_t(m_buckets[ito_move_closer_value].dist_from_ideal_bucket())); + + + tsl_assert(m_buckets[icloser_bucket].empty()); + const distance_type new_distance = distance_type(m_buckets[ito_move_closer_value].dist_from_ideal_bucket() - + (ito_move_closer_value - icloser_bucket)); + m_buckets[icloser_bucket].set_value_of_empty_bucket(new_distance, + m_buckets[ito_move_closer_value].truncated_hash(), + std::move(m_buckets[ito_move_closer_value].value())); + m_buckets[ito_move_closer_value].clear(); + + + ++icloser_bucket; + ++ito_move_closer_value; + } + + + return iterator(m_buckets.begin() + ireturn_bucket); + } + + + template + size_type erase(const K& key) { + return erase(key, hash_key(key)); + } + + template + size_type erase(const K& key, std::size_t hash) { + auto it = find(key, hash); + if(it != end()) { + erase_from_bucket(it); + + return 1; + } + else { + return 0; + } + } + + + + + + void swap(robin_hash& other) { + using std::swap; + + swap(static_cast(*this), static_cast(other)); + swap(static_cast(*this), static_cast(other)); + swap(static_cast(*this), static_cast(other)); + swap(m_buckets, other.m_buckets); + swap(m_first_or_empty_bucket, other.m_first_or_empty_bucket); + swap(m_bucket_count, other.m_bucket_count); + swap(m_nb_elements, other.m_nb_elements); + swap(m_load_threshold, other.m_load_threshold); + swap(m_max_load_factor, other.m_max_load_factor); + swap(m_grow_on_next_insert, other.m_grow_on_next_insert); + } + + + /* + * Lookup + */ + template::value>::type* = nullptr> + typename U::value_type& at(const K& key) { + return at(key, hash_key(key)); + } + + template::value>::type* = nullptr> + typename U::value_type& at(const K& key, std::size_t hash) { + return const_cast(static_cast(this)->at(key, hash)); + } + + + template::value>::type* = nullptr> + const typename U::value_type& at(const K& key) const { + return at(key, hash_key(key)); + } + + template::value>::type* = nullptr> + const typename U::value_type& at(const K& key, std::size_t hash) const { + auto it = find(key, hash); + if(it != cend()) { + return it.value(); + } + else { + TSL_THROW_OR_TERMINATE(std::out_of_range, "Couldn't find key."); + } + } + + template::value>::type* = nullptr> + typename U::value_type& operator[](K&& key) { + return try_emplace(std::forward(key)).first.value(); + } + + + template + size_type count(const K& key) const { + return count(key, hash_key(key)); + } + + template + size_type count(const K& key, std::size_t hash) const { + if(find(key, hash) != cend()) { + return 1; + } + else { + return 0; + } + } + + + template + iterator find(const K& key) { + return find_impl(key, hash_key(key)); + } + + template + iterator find(const K& key, std::size_t hash) { + return find_impl(key, hash); + } + + + template + const_iterator find(const K& key) const { + return find_impl(key, hash_key(key)); + } + + template + const_iterator find(const K& key, std::size_t hash) const { + return find_impl(key, hash); + } + + + template + std::pair equal_range(const K& key) { + return equal_range(key, hash_key(key)); + } + + template + std::pair equal_range(const K& key, std::size_t hash) { + iterator it = find(key, hash); + return std::make_pair(it, (it == end())?it:std::next(it)); + } + + + template + std::pair equal_range(const K& key) const { + return equal_range(key, hash_key(key)); + } + + template + std::pair equal_range(const K& key, std::size_t hash) const { + const_iterator it = find(key, hash); + return std::make_pair(it, (it == cend())?it:std::next(it)); + } + + /* + * Bucket interface + */ + size_type bucket_count() const { + return m_bucket_count; + } + + size_type max_bucket_count() const { + return (std::min)(GrowthPolicy::max_bucket_count(), m_buckets.max_size()); + } + + /* + * Hash policy + */ + float load_factor() const { + if(bucket_count() == 0) { + return 0; + } + + return float(m_nb_elements)/float(bucket_count()); + } + + float max_load_factor() const { + return m_max_load_factor; + } + + void max_load_factor(float ml) { + m_max_load_factor = (std::max)(0.1f, (std::min)(ml, 0.95f)); + m_load_threshold = size_type(float(bucket_count())*m_max_load_factor); + } + + void rehash(size_type count) { + count = (std::max)(count, size_type(std::ceil(float(size())/max_load_factor()))); + rehash_impl(count); + } + + void reserve(size_type count) { + rehash(size_type(std::ceil(float(count)/max_load_factor()))); + } + + /* + * Observers + */ + hasher hash_function() const { + return static_cast(*this); + } + + key_equal key_eq() const { + return static_cast(*this); + } + + + /* + * Other + */ + iterator mutable_iterator(const_iterator pos) { + return iterator(m_buckets.begin() + std::distance(m_buckets.cbegin(), pos.m_iterator)); + } + +private: + template + std::size_t hash_key(const K& key) const { + return Hash::operator()(key); + } + + template + bool compare_keys(const K1& key1, const K2& key2) const { + return KeyEqual::operator()(key1, key2); + } + + std::size_t bucket_for_hash(std::size_t hash) const { + const std::size_t bucket = GrowthPolicy::bucket_for_hash(hash); + tsl_assert(bucket < m_buckets.size() || (bucket == 0 && m_buckets.empty())); + + return bucket; + } + + template::value>::type* = nullptr> + std::size_t next_bucket(std::size_t index) const noexcept { + tsl_assert(index < bucket_count()); + + return (index + 1) & this->m_mask; + } + + template::value>::type* = nullptr> + std::size_t next_bucket(std::size_t index) const noexcept { + tsl_assert(index < bucket_count()); + + index++; + return (index != bucket_count())?index:0; + } + + + + template + iterator find_impl(const K& key, std::size_t hash) { + return mutable_iterator(static_cast(this)->find(key, hash)); + } + + template + const_iterator find_impl(const K& key, std::size_t hash) const { + std::size_t ibucket = bucket_for_hash(hash); + distance_type dist_from_ideal_bucket = 0; + + while(dist_from_ideal_bucket <= (m_first_or_empty_bucket + ibucket)->dist_from_ideal_bucket()) { + if(TSL_LIKELY((!USE_STORED_HASH_ON_LOOKUP || (m_first_or_empty_bucket + ibucket)->bucket_hash_equal(hash)) && + compare_keys(KeySelect()((m_first_or_empty_bucket + ibucket)->value()), key))) + { + return const_iterator(m_buckets.begin() + ibucket); + } + + ibucket = next_bucket(ibucket); + dist_from_ideal_bucket++; + } + + return cend(); + } + + void erase_from_bucket(iterator pos) { + pos.m_iterator->clear(); + m_nb_elements--; + + /** + * Backward shift, swap the empty bucket, previous_ibucket, with the values on its right, ibucket, + * until we cross another empty bucket or if the other bucket has a distance_from_ideal_bucket == 0. + * + * We try to move the values closer to their ideal bucket. + */ + std::size_t previous_ibucket = std::size_t(std::distance(m_buckets.begin(), pos.m_iterator)); + std::size_t ibucket = next_bucket(previous_ibucket); + + while(m_buckets[ibucket].dist_from_ideal_bucket() > 0) { + tsl_assert(m_buckets[previous_ibucket].empty()); + + const distance_type new_distance = distance_type(m_buckets[ibucket].dist_from_ideal_bucket() - 1); + m_buckets[previous_ibucket].set_value_of_empty_bucket(new_distance, m_buckets[ibucket].truncated_hash(), + std::move(m_buckets[ibucket].value())); + m_buckets[ibucket].clear(); + + previous_ibucket = ibucket; + ibucket = next_bucket(ibucket); + } + } + + template + std::pair insert_impl(const K& key, Args&&... value_type_args) { + const std::size_t hash = hash_key(key); + + std::size_t ibucket = bucket_for_hash(hash); + distance_type dist_from_ideal_bucket = 0; + + while(dist_from_ideal_bucket <= (m_first_or_empty_bucket + ibucket)->dist_from_ideal_bucket()) { + if((!USE_STORED_HASH_ON_LOOKUP || (m_first_or_empty_bucket + ibucket)->bucket_hash_equal(hash)) && + compare_keys(KeySelect()((m_first_or_empty_bucket + ibucket)->value()), key)) + { + return std::make_pair(iterator(m_buckets.begin() + ibucket), false); + } + + ibucket = next_bucket(ibucket); + dist_from_ideal_bucket++; + } + + if(grow_on_high_load()) { + ibucket = bucket_for_hash(hash); + dist_from_ideal_bucket = 0; + + while(dist_from_ideal_bucket <= (m_first_or_empty_bucket + ibucket)->dist_from_ideal_bucket()) { + ibucket = next_bucket(ibucket); + dist_from_ideal_bucket++; + } + } + + + if((m_first_or_empty_bucket + ibucket)->empty()) { + (m_first_or_empty_bucket + ibucket)->set_value_of_empty_bucket(dist_from_ideal_bucket, bucket_entry::truncate_hash(hash), + std::forward(value_type_args)...); + } + else { + insert_value(ibucket, dist_from_ideal_bucket, bucket_entry::truncate_hash(hash), + std::forward(value_type_args)...); + } + + + m_nb_elements++; + /* + * The value will be inserted in ibucket in any case, either because it was + * empty or by stealing the bucket (robin hood). + */ + return std::make_pair(iterator(m_buckets.begin() + ibucket), true); + } + + + template + void insert_value(std::size_t ibucket, distance_type dist_from_ideal_bucket, + truncated_hash_type hash, Args&&... value_type_args) + { + insert_value(ibucket, dist_from_ideal_bucket, hash, value_type(std::forward(value_type_args)...)); + } + + void insert_value(std::size_t ibucket, distance_type dist_from_ideal_bucket, + truncated_hash_type hash, value_type&& value) + { + m_buckets[ibucket].swap_with_value_in_bucket(dist_from_ideal_bucket, hash, value); + ibucket = next_bucket(ibucket); + dist_from_ideal_bucket++; + + while(!m_buckets[ibucket].empty()) { + if(dist_from_ideal_bucket > m_buckets[ibucket].dist_from_ideal_bucket()) { + if(dist_from_ideal_bucket >= REHASH_ON_HIGH_NB_PROBES__NPROBES && + load_factor() >= REHASH_ON_HIGH_NB_PROBES__MIN_LOAD_FACTOR) + { + /** + * The number of probes is really high, rehash the map on the next insert. + * Difficult to do now as rehash may throw an exception. + */ + m_grow_on_next_insert = true; + } + + m_buckets[ibucket].swap_with_value_in_bucket(dist_from_ideal_bucket, hash, value); + } + + ibucket = next_bucket(ibucket); + dist_from_ideal_bucket++; + } + + m_buckets[ibucket].set_value_of_empty_bucket(dist_from_ideal_bucket, hash, std::move(value)); + } + + + void rehash_impl(size_type count) { + robin_hash new_table(count, static_cast(*this), static_cast(*this), + get_allocator(), m_max_load_factor); + + const bool use_stored_hash = USE_STORED_HASH_ON_REHASH(new_table.bucket_count()); + for(auto& bucket: m_buckets) { + if(bucket.empty()) { + continue; + } + + const std::size_t hash = use_stored_hash?bucket.truncated_hash(): + new_table.hash_key(KeySelect()(bucket.value())); + + new_table.insert_value_on_rehash(new_table.bucket_for_hash(hash), 0, + bucket_entry::truncate_hash(hash), std::move(bucket.value())); + } + + new_table.m_nb_elements = m_nb_elements; + new_table.swap(*this); + } + + void insert_value_on_rehash(std::size_t ibucket, distance_type dist_from_ideal_bucket, + truncated_hash_type hash, value_type&& value) + { + while(true) { + if(dist_from_ideal_bucket > m_buckets[ibucket].dist_from_ideal_bucket()) { + if(m_buckets[ibucket].empty()) { + m_buckets[ibucket].set_value_of_empty_bucket(dist_from_ideal_bucket, hash, std::move(value)); + return; + } + else { + m_buckets[ibucket].swap_with_value_in_bucket(dist_from_ideal_bucket, hash, value); + } + } + + dist_from_ideal_bucket++; + ibucket = next_bucket(ibucket); + } + } + + + + /** + * Return true if the map has been rehashed. + */ + bool grow_on_high_load() { + if(m_grow_on_next_insert || size() >= m_load_threshold) { + rehash_impl(GrowthPolicy::next_bucket_count()); + m_grow_on_next_insert = false; + + return true; + } + + return false; + } + + +public: + static const size_type DEFAULT_INIT_BUCKETS_SIZE = 16; + static constexpr float DEFAULT_MAX_LOAD_FACTOR = 0.5f; + +private: + static const distance_type REHASH_ON_HIGH_NB_PROBES__NPROBES = 128; + static constexpr float REHASH_ON_HIGH_NB_PROBES__MIN_LOAD_FACTOR = 0.15f; + + + /** + * Return an always valid pointer to an static empty bucket_entry with last_bucket() == true. + */ + bucket_entry* static_empty_bucket_ptr() { + static bucket_entry empty_bucket(true); + return &empty_bucket; + } + +private: + buckets_container_type m_buckets; + + /** + * Points to m_buckets.data() if !m_buckets.empty() otherwise points to static_empty_bucket_ptr. + * This variable is useful to avoid the cost of checking if m_buckets is empty when trying + * to find an element. + */ + bucket_entry* m_first_or_empty_bucket; + + /** + * Used a lot in find, avoid the call to m_buckets.size() which is a bit slower. + */ + size_type m_bucket_count; + + size_type m_nb_elements; + + size_type m_load_threshold; + float m_max_load_factor; + + bool m_grow_on_next_insert; +}; + +} + +} + +#endif diff --git a/packages/leann-backend-diskann/third_party/DiskANN/include/tsl/robin_map.h b/packages/leann-backend-diskann/third_party/DiskANN/include/tsl/robin_map.h new file mode 100644 index 0000000..5958e70 --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/include/tsl/robin_map.h @@ -0,0 +1,668 @@ +/** + * MIT License + * + * Copyright (c) 2017 Tessil + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef TSL_ROBIN_MAP_H +#define TSL_ROBIN_MAP_H + + +#include +#include +#include +#include +#include +#include +#include "robin_hash.h" + + +namespace tsl { + + +/** + * Implementation of a hash map using open-adressing and the robin hood hashing algorithm with backward shift deletion. + * + * For operations modifying the hash map (insert, erase, rehash, ...), the strong exception guarantee + * is only guaranteed when the expression `std::is_nothrow_swappable>::value && + * std::is_nothrow_move_constructible>::value` is true, otherwise if an exception + * is thrown during the swap or the move, the hash map may end up in a undefined state. Per the standard + * a `Key` or `T` with a noexcept copy constructor and no move constructor also satisfies the + * `std::is_nothrow_move_constructible>::value` criterion (and will thus guarantee the + * strong exception for the map). + * + * When `StoreHash` is true, 32 bits of the hash are stored alongside the values. It can improve + * the performance during lookups if the `KeyEqual` function takes time (if it engenders a cache-miss for example) + * as we then compare the stored hashes before comparing the keys. When `tsl::rh::power_of_two_growth_policy` is used + * as `GrowthPolicy`, it may also speed-up the rehash process as we can avoid to recalculate the hash. + * When it is detected that storing the hash will not incur any memory penality due to alignement (i.e. + * `sizeof(tsl::detail_robin_hash::bucket_entry) == + * sizeof(tsl::detail_robin_hash::bucket_entry)`) and `tsl::rh::power_of_two_growth_policy` is + * used, the hash will be stored even if `StoreHash` is false so that we can speed-up the rehash (but it will + * not be used on lookups unless `StoreHash` is true). + * + * `GrowthPolicy` defines how the map grows and consequently how a hash value is mapped to a bucket. + * By default the map uses `tsl::rh::power_of_two_growth_policy`. This policy keeps the number of buckets + * to a power of two and uses a mask to map the hash to a bucket instead of the slow modulo. + * Other growth policies are available and you may define your own growth policy, + * check `tsl::rh::power_of_two_growth_policy` for the interface. + * + * If the destructor of `Key` or `T` throws an exception, the behaviour of the class is undefined. + * + * Iterators invalidation: + * - clear, operator=, reserve, rehash: always invalidate the iterators. + * - insert, emplace, emplace_hint, operator[]: if there is an effective insert, invalidate the iterators. + * - erase: always invalidate the iterators. + */ +template, + class KeyEqual = std::equal_to, + class Allocator = std::allocator>, + bool StoreHash = false, + class GrowthPolicy = tsl::rh::power_of_two_growth_policy<2>> +class robin_map { +private: + template + using has_is_transparent = tsl::detail_robin_hash::has_is_transparent; + + class KeySelect { + public: + using key_type = Key; + + const key_type& operator()(const std::pair& key_value) const noexcept { + return key_value.first; + } + + key_type& operator()(std::pair& key_value) noexcept { + return key_value.first; + } + }; + + class ValueSelect { + public: + using value_type = T; + + const value_type& operator()(const std::pair& key_value) const noexcept { + return key_value.second; + } + + value_type& operator()(std::pair& key_value) noexcept { + return key_value.second; + } + }; + + using ht = detail_robin_hash::robin_hash, KeySelect, ValueSelect, + Hash, KeyEqual, Allocator, StoreHash, GrowthPolicy>; + +public: + using key_type = typename ht::key_type; + using mapped_type = T; + using value_type = typename ht::value_type; + using size_type = typename ht::size_type; + using difference_type = typename ht::difference_type; + using hasher = typename ht::hasher; + using key_equal = typename ht::key_equal; + using allocator_type = typename ht::allocator_type; + using reference = typename ht::reference; + using const_reference = typename ht::const_reference; + using pointer = typename ht::pointer; + using const_pointer = typename ht::const_pointer; + using iterator = typename ht::iterator; + using const_iterator = typename ht::const_iterator; + + +public: + /* + * Constructors + */ + robin_map(): robin_map(ht::DEFAULT_INIT_BUCKETS_SIZE) { + } + + explicit robin_map(size_type bucket_count, + const Hash& hash = Hash(), + const KeyEqual& equal = KeyEqual(), + const Allocator& alloc = Allocator()): + m_ht(bucket_count, hash, equal, alloc, ht::DEFAULT_MAX_LOAD_FACTOR) + { + } + + robin_map(size_type bucket_count, + const Allocator& alloc): robin_map(bucket_count, Hash(), KeyEqual(), alloc) + { + } + + robin_map(size_type bucket_count, + const Hash& hash, + const Allocator& alloc): robin_map(bucket_count, hash, KeyEqual(), alloc) + { + } + + explicit robin_map(const Allocator& alloc): robin_map(ht::DEFAULT_INIT_BUCKETS_SIZE, alloc) { + } + + template + robin_map(InputIt first, InputIt last, + size_type bucket_count = ht::DEFAULT_INIT_BUCKETS_SIZE, + const Hash& hash = Hash(), + const KeyEqual& equal = KeyEqual(), + const Allocator& alloc = Allocator()): robin_map(bucket_count, hash, equal, alloc) + { + insert(first, last); + } + + template + robin_map(InputIt first, InputIt last, + size_type bucket_count, + const Allocator& alloc): robin_map(first, last, bucket_count, Hash(), KeyEqual(), alloc) + { + } + + template + robin_map(InputIt first, InputIt last, + size_type bucket_count, + const Hash& hash, + const Allocator& alloc): robin_map(first, last, bucket_count, hash, KeyEqual(), alloc) + { + } + + robin_map(std::initializer_list init, + size_type bucket_count = ht::DEFAULT_INIT_BUCKETS_SIZE, + const Hash& hash = Hash(), + const KeyEqual& equal = KeyEqual(), + const Allocator& alloc = Allocator()): + robin_map(init.begin(), init.end(), bucket_count, hash, equal, alloc) + { + } + + robin_map(std::initializer_list init, + size_type bucket_count, + const Allocator& alloc): + robin_map(init.begin(), init.end(), bucket_count, Hash(), KeyEqual(), alloc) + { + } + + robin_map(std::initializer_list init, + size_type bucket_count, + const Hash& hash, + const Allocator& alloc): + robin_map(init.begin(), init.end(), bucket_count, hash, KeyEqual(), alloc) + { + } + + robin_map& operator=(std::initializer_list ilist) { + m_ht.clear(); + + m_ht.reserve(ilist.size()); + m_ht.insert(ilist.begin(), ilist.end()); + + return *this; + } + + allocator_type get_allocator() const { return m_ht.get_allocator(); } + + + /* + * Iterators + */ + iterator begin() noexcept { return m_ht.begin(); } + const_iterator begin() const noexcept { return m_ht.begin(); } + const_iterator cbegin() const noexcept { return m_ht.cbegin(); } + + iterator end() noexcept { return m_ht.end(); } + const_iterator end() const noexcept { return m_ht.end(); } + const_iterator cend() const noexcept { return m_ht.cend(); } + + + /* + * Capacity + */ + bool empty() const noexcept { return m_ht.empty(); } + size_type size() const noexcept { return m_ht.size(); } + size_type max_size() const noexcept { return m_ht.max_size(); } + + /* + * Modifiers + */ + void clear() noexcept { m_ht.clear(); } + + + + std::pair insert(const value_type& value) { + return m_ht.insert(value); + } + + template::value>::type* = nullptr> + std::pair insert(P&& value) { + return m_ht.emplace(std::forward

(value)); + } + + std::pair insert(value_type&& value) { + return m_ht.insert(std::move(value)); + } + + + iterator insert(const_iterator hint, const value_type& value) { + return m_ht.insert(hint, value); + } + + template::value>::type* = nullptr> + iterator insert(const_iterator hint, P&& value) { + return m_ht.emplace_hint(hint, std::forward

(value)); + } + + iterator insert(const_iterator hint, value_type&& value) { + return m_ht.insert(hint, std::move(value)); + } + + + template + void insert(InputIt first, InputIt last) { + m_ht.insert(first, last); + } + + void insert(std::initializer_list ilist) { + m_ht.insert(ilist.begin(), ilist.end()); + } + + + + + template + std::pair insert_or_assign(const key_type& k, M&& obj) { + return m_ht.insert_or_assign(k, std::forward(obj)); + } + + template + std::pair insert_or_assign(key_type&& k, M&& obj) { + return m_ht.insert_or_assign(std::move(k), std::forward(obj)); + } + + template + iterator insert_or_assign(const_iterator hint, const key_type& k, M&& obj) { + return m_ht.insert_or_assign(hint, k, std::forward(obj)); + } + + template + iterator insert_or_assign(const_iterator hint, key_type&& k, M&& obj) { + return m_ht.insert_or_assign(hint, std::move(k), std::forward(obj)); + } + + + + /** + * Due to the way elements are stored, emplace will need to move or copy the key-value once. + * The method is equivalent to insert(value_type(std::forward(args)...)); + * + * Mainly here for compatibility with the std::unordered_map interface. + */ + template + std::pair emplace(Args&&... args) { + return m_ht.emplace(std::forward(args)...); + } + + + + /** + * Due to the way elements are stored, emplace_hint will need to move or copy the key-value once. + * The method is equivalent to insert(hint, value_type(std::forward(args)...)); + * + * Mainly here for compatibility with the std::unordered_map interface. + */ + template + iterator emplace_hint(const_iterator hint, Args&&... args) { + return m_ht.emplace_hint(hint, std::forward(args)...); + } + + + + + template + std::pair try_emplace(const key_type& k, Args&&... args) { + return m_ht.try_emplace(k, std::forward(args)...); + } + + template + std::pair try_emplace(key_type&& k, Args&&... args) { + return m_ht.try_emplace(std::move(k), std::forward(args)...); + } + + template + iterator try_emplace(const_iterator hint, const key_type& k, Args&&... args) { + return m_ht.try_emplace(hint, k, std::forward(args)...); + } + + template + iterator try_emplace(const_iterator hint, key_type&& k, Args&&... args) { + return m_ht.try_emplace(hint, std::move(k), std::forward(args)...); + } + + + + + iterator erase(iterator pos) { return m_ht.erase(pos); } + iterator erase(const_iterator pos) { return m_ht.erase(pos); } + iterator erase(const_iterator first, const_iterator last) { return m_ht.erase(first, last); } + size_type erase(const key_type& key) { return m_ht.erase(key); } + + /** + * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same + * as hash_function()(key). Usefull to speed-up the lookup to the value if you already have the hash. + */ + size_type erase(const key_type& key, std::size_t precalculated_hash) { + return m_ht.erase(key, precalculated_hash); + } + + /** + * This overload only participates in the overload resolution if the typedef KeyEqual::is_transparent exists. + * If so, K must be hashable and comparable to Key. + */ + template::value>::type* = nullptr> + size_type erase(const K& key) { return m_ht.erase(key); } + + /** + * @copydoc erase(const K& key) + * + * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same + * as hash_function()(key). Usefull to speed-up the lookup to the value if you already have the hash. + */ + template::value>::type* = nullptr> + size_type erase(const K& key, std::size_t precalculated_hash) { + return m_ht.erase(key, precalculated_hash); + } + + + + void swap(robin_map& other) { other.m_ht.swap(m_ht); } + + + + /* + * Lookup + */ + T& at(const Key& key) { return m_ht.at(key); } + + /** + * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same + * as hash_function()(key). Usefull to speed-up the lookup if you already have the hash. + */ + T& at(const Key& key, std::size_t precalculated_hash) { return m_ht.at(key, precalculated_hash); } + + + const T& at(const Key& key) const { return m_ht.at(key); } + + /** + * @copydoc at(const Key& key, std::size_t precalculated_hash) + */ + const T& at(const Key& key, std::size_t precalculated_hash) const { return m_ht.at(key, precalculated_hash); } + + + /** + * This overload only participates in the overload resolution if the typedef KeyEqual::is_transparent exists. + * If so, K must be hashable and comparable to Key. + */ + template::value>::type* = nullptr> + T& at(const K& key) { return m_ht.at(key); } + + /** + * @copydoc at(const K& key) + * + * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same + * as hash_function()(key). Usefull to speed-up the lookup if you already have the hash. + */ + template::value>::type* = nullptr> + T& at(const K& key, std::size_t precalculated_hash) { return m_ht.at(key, precalculated_hash); } + + + /** + * @copydoc at(const K& key) + */ + template::value>::type* = nullptr> + const T& at(const K& key) const { return m_ht.at(key); } + + /** + * @copydoc at(const K& key, std::size_t precalculated_hash) + */ + template::value>::type* = nullptr> + const T& at(const K& key, std::size_t precalculated_hash) const { return m_ht.at(key, precalculated_hash); } + + + + + T& operator[](const Key& key) { return m_ht[key]; } + T& operator[](Key&& key) { return m_ht[std::move(key)]; } + + + + + size_type count(const Key& key) const { return m_ht.count(key); } + + /** + * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same + * as hash_function()(key). Usefull to speed-up the lookup if you already have the hash. + */ + size_type count(const Key& key, std::size_t precalculated_hash) const { + return m_ht.count(key, precalculated_hash); + } + + /** + * This overload only participates in the overload resolution if the typedef KeyEqual::is_transparent exists. + * If so, K must be hashable and comparable to Key. + */ + template::value>::type* = nullptr> + size_type count(const K& key) const { return m_ht.count(key); } + + /** + * @copydoc count(const K& key) const + * + * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same + * as hash_function()(key). Usefull to speed-up the lookup if you already have the hash. + */ + template::value>::type* = nullptr> + size_type count(const K& key, std::size_t precalculated_hash) const { return m_ht.count(key, precalculated_hash); } + + + + + iterator find(const Key& key) { return m_ht.find(key); } + + /** + * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same + * as hash_function()(key). Usefull to speed-up the lookup if you already have the hash. + */ + iterator find(const Key& key, std::size_t precalculated_hash) { return m_ht.find(key, precalculated_hash); } + + const_iterator find(const Key& key) const { return m_ht.find(key); } + + /** + * @copydoc find(const Key& key, std::size_t precalculated_hash) + */ + const_iterator find(const Key& key, std::size_t precalculated_hash) const { + return m_ht.find(key, precalculated_hash); + } + + /** + * This overload only participates in the overload resolution if the typedef KeyEqual::is_transparent exists. + * If so, K must be hashable and comparable to Key. + */ + template::value>::type* = nullptr> + iterator find(const K& key) { return m_ht.find(key); } + + /** + * @copydoc find(const K& key) + * + * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same + * as hash_function()(key). Usefull to speed-up the lookup if you already have the hash. + */ + template::value>::type* = nullptr> + iterator find(const K& key, std::size_t precalculated_hash) { return m_ht.find(key, precalculated_hash); } + + /** + * @copydoc find(const K& key) + */ + template::value>::type* = nullptr> + const_iterator find(const K& key) const { return m_ht.find(key); } + + /** + * @copydoc find(const K& key) + * + * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same + * as hash_function()(key). Usefull to speed-up the lookup if you already have the hash. + */ + template::value>::type* = nullptr> + const_iterator find(const K& key, std::size_t precalculated_hash) const { + return m_ht.find(key, precalculated_hash); + } + + + + + std::pair equal_range(const Key& key) { return m_ht.equal_range(key); } + + /** + * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same + * as hash_function()(key). Usefull to speed-up the lookup if you already have the hash. + */ + std::pair equal_range(const Key& key, std::size_t precalculated_hash) { + return m_ht.equal_range(key, precalculated_hash); + } + + std::pair equal_range(const Key& key) const { return m_ht.equal_range(key); } + + /** + * @copydoc equal_range(const Key& key, std::size_t precalculated_hash) + */ + std::pair equal_range(const Key& key, std::size_t precalculated_hash) const { + return m_ht.equal_range(key, precalculated_hash); + } + + /** + * This overload only participates in the overload resolution if the typedef KeyEqual::is_transparent exists. + * If so, K must be hashable and comparable to Key. + */ + template::value>::type* = nullptr> + std::pair equal_range(const K& key) { return m_ht.equal_range(key); } + + + /** + * @copydoc equal_range(const K& key) + * + * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same + * as hash_function()(key). Usefull to speed-up the lookup if you already have the hash. + */ + template::value>::type* = nullptr> + std::pair equal_range(const K& key, std::size_t precalculated_hash) { + return m_ht.equal_range(key, precalculated_hash); + } + + /** + * @copydoc equal_range(const K& key) + */ + template::value>::type* = nullptr> + std::pair equal_range(const K& key) const { return m_ht.equal_range(key); } + + /** + * @copydoc equal_range(const K& key, std::size_t precalculated_hash) + */ + template::value>::type* = nullptr> + std::pair equal_range(const K& key, std::size_t precalculated_hash) const { + return m_ht.equal_range(key, precalculated_hash); + } + + + + + /* + * Bucket interface + */ + size_type bucket_count() const { return m_ht.bucket_count(); } + size_type max_bucket_count() const { return m_ht.max_bucket_count(); } + + + /* + * Hash policy + */ + float load_factor() const { return m_ht.load_factor(); } + float max_load_factor() const { return m_ht.max_load_factor(); } + void max_load_factor(float ml) { m_ht.max_load_factor(ml); } + + void rehash(size_type count) { m_ht.rehash(count); } + void reserve(size_type count) { m_ht.reserve(count); } + + + /* + * Observers + */ + hasher hash_function() const { return m_ht.hash_function(); } + key_equal key_eq() const { return m_ht.key_eq(); } + + /* + * Other + */ + + /** + * Convert a const_iterator to an iterator. + */ + iterator mutable_iterator(const_iterator pos) { + return m_ht.mutable_iterator(pos); + } + + friend bool operator==(const robin_map& lhs, const robin_map& rhs) { + if(lhs.size() != rhs.size()) { + return false; + } + + for(const auto& element_lhs: lhs) { + const auto it_element_rhs = rhs.find(element_lhs.first); + if(it_element_rhs == rhs.cend() || element_lhs.second != it_element_rhs->second) { + return false; + } + } + + return true; + } + + friend bool operator!=(const robin_map& lhs, const robin_map& rhs) { + return !operator==(lhs, rhs); + } + + friend void swap(robin_map& lhs, robin_map& rhs) { + lhs.swap(rhs); + } + +private: + ht m_ht; +}; + + +/** + * Same as `tsl::robin_map`. + */ +template, + class KeyEqual = std::equal_to, + class Allocator = std::allocator>, + bool StoreHash = false> +using robin_pg_map = robin_map; + +} // end namespace tsl + +#endif diff --git a/packages/leann-backend-diskann/third_party/DiskANN/include/tsl/robin_set.h b/packages/leann-backend-diskann/third_party/DiskANN/include/tsl/robin_set.h new file mode 100644 index 0000000..4e4667e --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/include/tsl/robin_set.h @@ -0,0 +1,535 @@ +/** + * MIT License + * + * Copyright (c) 2017 Tessil + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef TSL_ROBIN_SET_H +#define TSL_ROBIN_SET_H + + +#include +#include +#include +#include +#include +#include +#include "robin_hash.h" + + +namespace tsl { + + +/** + * Implementation of a hash set using open-adressing and the robin hood hashing algorithm with backward shift deletion. + * + * For operations modifying the hash set (insert, erase, rehash, ...), the strong exception guarantee + * is only guaranteed when the expression `std::is_nothrow_swappable::value && + * std::is_nothrow_move_constructible::value` is true, otherwise if an exception + * is thrown during the swap or the move, the hash set may end up in a undefined state. Per the standard + * a `Key` with a noexcept copy constructor and no move constructor also satisfies the + * `std::is_nothrow_move_constructible::value` criterion (and will thus guarantee the + * strong exception for the set). + * + * When `StoreHash` is true, 32 bits of the hash are stored alongside the values. It can improve + * the performance during lookups if the `KeyEqual` function takes time (or engenders a cache-miss for example) + * as we then compare the stored hashes before comparing the keys. When `tsl::rh::power_of_two_growth_policy` is used + * as `GrowthPolicy`, it may also speed-up the rehash process as we can avoid to recalculate the hash. + * When it is detected that storing the hash will not incur any memory penality due to alignement (i.e. + * `sizeof(tsl::detail_robin_hash::bucket_entry) == + * sizeof(tsl::detail_robin_hash::bucket_entry)`) and `tsl::rh::power_of_two_growth_policy` is + * used, the hash will be stored even if `StoreHash` is false so that we can speed-up the rehash (but it will + * not be used on lookups unless `StoreHash` is true). + * + * `GrowthPolicy` defines how the set grows and consequently how a hash value is mapped to a bucket. + * By default the set uses `tsl::rh::power_of_two_growth_policy`. This policy keeps the number of buckets + * to a power of two and uses a mask to set the hash to a bucket instead of the slow modulo. + * Other growth policies are available and you may define your own growth policy, + * check `tsl::rh::power_of_two_growth_policy` for the interface. + * + * If the destructor of `Key` throws an exception, the behaviour of the class is undefined. + * + * Iterators invalidation: + * - clear, operator=, reserve, rehash: always invalidate the iterators. + * - insert, emplace, emplace_hint, operator[]: if there is an effective insert, invalidate the iterators. + * - erase: always invalidate the iterators. + */ +template, + class KeyEqual = std::equal_to, + class Allocator = std::allocator, + bool StoreHash = false, + class GrowthPolicy = tsl::rh::power_of_two_growth_policy<2>> +class robin_set { +private: + template + using has_is_transparent = tsl::detail_robin_hash::has_is_transparent; + + class KeySelect { + public: + using key_type = Key; + + const key_type& operator()(const Key& key) const noexcept { + return key; + } + + key_type& operator()(Key& key) noexcept { + return key; + } + }; + + using ht = detail_robin_hash::robin_hash; + +public: + using key_type = typename ht::key_type; + using value_type = typename ht::value_type; + using size_type = typename ht::size_type; + using difference_type = typename ht::difference_type; + using hasher = typename ht::hasher; + using key_equal = typename ht::key_equal; + using allocator_type = typename ht::allocator_type; + using reference = typename ht::reference; + using const_reference = typename ht::const_reference; + using pointer = typename ht::pointer; + using const_pointer = typename ht::const_pointer; + using iterator = typename ht::iterator; + using const_iterator = typename ht::const_iterator; + + + /* + * Constructors + */ + robin_set(): robin_set(ht::DEFAULT_INIT_BUCKETS_SIZE) { + } + + explicit robin_set(size_type bucket_count, + const Hash& hash = Hash(), + const KeyEqual& equal = KeyEqual(), + const Allocator& alloc = Allocator()): + m_ht(bucket_count, hash, equal, alloc, ht::DEFAULT_MAX_LOAD_FACTOR) + { + } + + robin_set(size_type bucket_count, + const Allocator& alloc): robin_set(bucket_count, Hash(), KeyEqual(), alloc) + { + } + + robin_set(size_type bucket_count, + const Hash& hash, + const Allocator& alloc): robin_set(bucket_count, hash, KeyEqual(), alloc) + { + } + + explicit robin_set(const Allocator& alloc): robin_set(ht::DEFAULT_INIT_BUCKETS_SIZE, alloc) { + } + + template + robin_set(InputIt first, InputIt last, + size_type bucket_count = ht::DEFAULT_INIT_BUCKETS_SIZE, + const Hash& hash = Hash(), + const KeyEqual& equal = KeyEqual(), + const Allocator& alloc = Allocator()): robin_set(bucket_count, hash, equal, alloc) + { + insert(first, last); + } + + template + robin_set(InputIt first, InputIt last, + size_type bucket_count, + const Allocator& alloc): robin_set(first, last, bucket_count, Hash(), KeyEqual(), alloc) + { + } + + template + robin_set(InputIt first, InputIt last, + size_type bucket_count, + const Hash& hash, + const Allocator& alloc): robin_set(first, last, bucket_count, hash, KeyEqual(), alloc) + { + } + + robin_set(std::initializer_list init, + size_type bucket_count = ht::DEFAULT_INIT_BUCKETS_SIZE, + const Hash& hash = Hash(), + const KeyEqual& equal = KeyEqual(), + const Allocator& alloc = Allocator()): + robin_set(init.begin(), init.end(), bucket_count, hash, equal, alloc) + { + } + + robin_set(std::initializer_list init, + size_type bucket_count, + const Allocator& alloc): + robin_set(init.begin(), init.end(), bucket_count, Hash(), KeyEqual(), alloc) + { + } + + robin_set(std::initializer_list init, + size_type bucket_count, + const Hash& hash, + const Allocator& alloc): + robin_set(init.begin(), init.end(), bucket_count, hash, KeyEqual(), alloc) + { + } + + + robin_set& operator=(std::initializer_list ilist) { + m_ht.clear(); + + m_ht.reserve(ilist.size()); + m_ht.insert(ilist.begin(), ilist.end()); + + return *this; + } + + allocator_type get_allocator() const { return m_ht.get_allocator(); } + + + /* + * Iterators + */ + iterator begin() noexcept { return m_ht.begin(); } + const_iterator begin() const noexcept { return m_ht.begin(); } + const_iterator cbegin() const noexcept { return m_ht.cbegin(); } + + iterator end() noexcept { return m_ht.end(); } + const_iterator end() const noexcept { return m_ht.end(); } + const_iterator cend() const noexcept { return m_ht.cend(); } + + + /* + * Capacity + */ + bool empty() const noexcept { return m_ht.empty(); } + size_type size() const noexcept { return m_ht.size(); } + size_type max_size() const noexcept { return m_ht.max_size(); } + + /* + * Modifiers + */ + void clear() noexcept { m_ht.clear(); } + + + + + std::pair insert(const value_type& value) { + return m_ht.insert(value); + } + + std::pair insert(value_type&& value) { + return m_ht.insert(std::move(value)); + } + + iterator insert(const_iterator hint, const value_type& value) { + return m_ht.insert(hint, value); + } + + iterator insert(const_iterator hint, value_type&& value) { + return m_ht.insert(hint, std::move(value)); + } + + template + void insert(InputIt first, InputIt last) { + m_ht.insert(first, last); + } + + void insert(std::initializer_list ilist) { + m_ht.insert(ilist.begin(), ilist.end()); + } + + + + + /** + * Due to the way elements are stored, emplace will need to move or copy the key-value once. + * The method is equivalent to insert(value_type(std::forward(args)...)); + * + * Mainly here for compatibility with the std::unordered_map interface. + */ + template + std::pair emplace(Args&&... args) { + return m_ht.emplace(std::forward(args)...); + } + + + + /** + * Due to the way elements are stored, emplace_hint will need to move or copy the key-value once. + * The method is equivalent to insert(hint, value_type(std::forward(args)...)); + * + * Mainly here for compatibility with the std::unordered_map interface. + */ + template + iterator emplace_hint(const_iterator hint, Args&&... args) { + return m_ht.emplace_hint(hint, std::forward(args)...); + } + + + + iterator erase(iterator pos) { return m_ht.erase(pos); } + iterator erase(const_iterator pos) { return m_ht.erase(pos); } + iterator erase(const_iterator first, const_iterator last) { return m_ht.erase(first, last); } + size_type erase(const key_type& key) { return m_ht.erase(key); } + + /** + * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same + * as hash_function()(key). Usefull to speed-up the lookup to the value if you already have the hash. + */ + size_type erase(const key_type& key, std::size_t precalculated_hash) { + return m_ht.erase(key, precalculated_hash); + } + + /** + * This overload only participates in the overload resolution if the typedef KeyEqual::is_transparent exists. + * If so, K must be hashable and comparable to Key. + */ + template::value>::type* = nullptr> + size_type erase(const K& key) { return m_ht.erase(key); } + + /** + * @copydoc erase(const K& key) + * + * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same + * as hash_function()(key). Usefull to speed-up the lookup to the value if you already have the hash. + */ + template::value>::type* = nullptr> + size_type erase(const K& key, std::size_t precalculated_hash) { + return m_ht.erase(key, precalculated_hash); + } + + + + void swap(robin_set& other) { other.m_ht.swap(m_ht); } + + + + /* + * Lookup + */ + size_type count(const Key& key) const { return m_ht.count(key); } + + /** + * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same + * as hash_function()(key). Usefull to speed-up the lookup if you already have the hash. + */ + size_type count(const Key& key, std::size_t precalculated_hash) const { return m_ht.count(key, precalculated_hash); } + + /** + * This overload only participates in the overload resolution if the typedef KeyEqual::is_transparent exists. + * If so, K must be hashable and comparable to Key. + */ + template::value>::type* = nullptr> + size_type count(const K& key) const { return m_ht.count(key); } + + /** + * @copydoc count(const K& key) const + * + * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same + * as hash_function()(key). Usefull to speed-up the lookup if you already have the hash. + */ + template::value>::type* = nullptr> + size_type count(const K& key, std::size_t precalculated_hash) const { return m_ht.count(key, precalculated_hash); } + + + + + iterator find(const Key& key) { return m_ht.find(key); } + + /** + * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same + * as hash_function()(key). Usefull to speed-up the lookup if you already have the hash. + */ + iterator find(const Key& key, std::size_t precalculated_hash) { return m_ht.find(key, precalculated_hash); } + + const_iterator find(const Key& key) const { return m_ht.find(key); } + + /** + * @copydoc find(const Key& key, std::size_t precalculated_hash) + */ + const_iterator find(const Key& key, std::size_t precalculated_hash) const { return m_ht.find(key, precalculated_hash); } + + /** + * This overload only participates in the overload resolution if the typedef KeyEqual::is_transparent exists. + * If so, K must be hashable and comparable to Key. + */ + template::value>::type* = nullptr> + iterator find(const K& key) { return m_ht.find(key); } + + /** + * @copydoc find(const K& key) + * + * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same + * as hash_function()(key). Usefull to speed-up the lookup if you already have the hash. + */ + template::value>::type* = nullptr> + iterator find(const K& key, std::size_t precalculated_hash) { return m_ht.find(key, precalculated_hash); } + + /** + * @copydoc find(const K& key) + */ + template::value>::type* = nullptr> + const_iterator find(const K& key) const { return m_ht.find(key); } + + /** + * @copydoc find(const K& key) + * + * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same + * as hash_function()(key). Usefull to speed-up the lookup if you already have the hash. + */ + template::value>::type* = nullptr> + const_iterator find(const K& key, std::size_t precalculated_hash) const { return m_ht.find(key, precalculated_hash); } + + + + + std::pair equal_range(const Key& key) { return m_ht.equal_range(key); } + + /** + * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same + * as hash_function()(key). Usefull to speed-up the lookup if you already have the hash. + */ + std::pair equal_range(const Key& key, std::size_t precalculated_hash) { + return m_ht.equal_range(key, precalculated_hash); + } + + std::pair equal_range(const Key& key) const { return m_ht.equal_range(key); } + + /** + * @copydoc equal_range(const Key& key, std::size_t precalculated_hash) + */ + std::pair equal_range(const Key& key, std::size_t precalculated_hash) const { + return m_ht.equal_range(key, precalculated_hash); + } + + /** + * This overload only participates in the overload resolution if the typedef KeyEqual::is_transparent exists. + * If so, K must be hashable and comparable to Key. + */ + template::value>::type* = nullptr> + std::pair equal_range(const K& key) { return m_ht.equal_range(key); } + + /** + * @copydoc equal_range(const K& key) + * + * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same + * as hash_function()(key). Usefull to speed-up the lookup if you already have the hash. + */ + template::value>::type* = nullptr> + std::pair equal_range(const K& key, std::size_t precalculated_hash) { + return m_ht.equal_range(key, precalculated_hash); + } + + /** + * @copydoc equal_range(const K& key) + */ + template::value>::type* = nullptr> + std::pair equal_range(const K& key) const { return m_ht.equal_range(key); } + + /** + * @copydoc equal_range(const K& key, std::size_t precalculated_hash) + */ + template::value>::type* = nullptr> + std::pair equal_range(const K& key, std::size_t precalculated_hash) const { + return m_ht.equal_range(key, precalculated_hash); + } + + + + + /* + * Bucket interface + */ + size_type bucket_count() const { return m_ht.bucket_count(); } + size_type max_bucket_count() const { return m_ht.max_bucket_count(); } + + + /* + * Hash policy + */ + float load_factor() const { return m_ht.load_factor(); } + float max_load_factor() const { return m_ht.max_load_factor(); } + void max_load_factor(float ml) { m_ht.max_load_factor(ml); } + + void rehash(size_type count) { m_ht.rehash(count); } + void reserve(size_type count) { m_ht.reserve(count); } + + + /* + * Observers + */ + hasher hash_function() const { return m_ht.hash_function(); } + key_equal key_eq() const { return m_ht.key_eq(); } + + + /* + * Other + */ + + /** + * Convert a const_iterator to an iterator. + */ + iterator mutable_iterator(const_iterator pos) { + return m_ht.mutable_iterator(pos); + } + + friend bool operator==(const robin_set& lhs, const robin_set& rhs) { + if(lhs.size() != rhs.size()) { + return false; + } + + for(const auto& element_lhs: lhs) { + const auto it_element_rhs = rhs.find(element_lhs); + if(it_element_rhs == rhs.cend()) { + return false; + } + } + + return true; + } + + friend bool operator!=(const robin_set& lhs, const robin_set& rhs) { + return !operator==(lhs, rhs); + } + + friend void swap(robin_set& lhs, robin_set& rhs) { + lhs.swap(rhs); + } + +private: + ht m_ht; +}; + + +/** + * Same as `tsl::robin_set`. + */ +template, + class KeyEqual = std::equal_to, + class Allocator = std::allocator, + bool StoreHash = false> +using robin_pg_set = robin_set; + +} // end namespace tsl + +#endif + diff --git a/packages/leann-backend-diskann/third_party/DiskANN/include/tsl/sparse_growth_policy.h b/packages/leann-backend-diskann/third_party/DiskANN/include/tsl/sparse_growth_policy.h new file mode 100644 index 0000000..d73aaaf --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/include/tsl/sparse_growth_policy.h @@ -0,0 +1,301 @@ +/** + * MIT License + * + * Copyright (c) 2017 Thibaut Goetghebuer-Planchon + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef TSL_SPARSE_GROWTH_POLICY_H +#define TSL_SPARSE_GROWTH_POLICY_H + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace tsl { +namespace sh { + +/** + * Grow the hash table by a factor of GrowthFactor keeping the bucket count to a + * power of two. It allows the table to use a mask operation instead of a modulo + * operation to map a hash to a bucket. + * + * GrowthFactor must be a power of two >= 2. + */ +template +class power_of_two_growth_policy { + public: + /** + * Called on the hash table creation and on rehash. The number of buckets for + * the table is passed in parameter. This number is a minimum, the policy may + * update this value with a higher value if needed (but not lower). + * + * If 0 is given, min_bucket_count_in_out must still be 0 after the policy + * creation and bucket_for_hash must always return 0 in this case. + */ + explicit power_of_two_growth_policy(std::size_t &min_bucket_count_in_out) { + if (min_bucket_count_in_out > max_bucket_count()) { + throw std::length_error("The hash table exceeds its maximum size."); + } + + if (min_bucket_count_in_out > 0) { + min_bucket_count_in_out = + round_up_to_power_of_two(min_bucket_count_in_out); + m_mask = min_bucket_count_in_out - 1; + } else { + m_mask = 0; + } + } + + /** + * Return the bucket [0, bucket_count()) to which the hash belongs. + * If bucket_count() is 0, it must always return 0. + */ + std::size_t bucket_for_hash(std::size_t hash) const noexcept { + return hash & m_mask; + } + + /** + * Return the number of buckets that should be used on next growth. + */ + std::size_t next_bucket_count() const { + if ((m_mask + 1) > max_bucket_count() / GrowthFactor) { + throw std::length_error("The hash table exceeds its maximum size."); + } + + return (m_mask + 1) * GrowthFactor; + } + + /** + * Return the maximum number of buckets supported by the policy. + */ + std::size_t max_bucket_count() const { + // Largest power of two. + return (std::numeric_limits::max() / 2) + 1; + } + + /** + * Reset the growth policy as if it was created with a bucket count of 0. + * After a clear, the policy must always return 0 when bucket_for_hash is + * called. + */ + void clear() noexcept { m_mask = 0; } + + private: + static std::size_t round_up_to_power_of_two(std::size_t value) { + if (is_power_of_two(value)) { + return value; + } + + if (value == 0) { + return 1; + } + + --value; + for (std::size_t i = 1; i < sizeof(std::size_t) * CHAR_BIT; i *= 2) { + value |= value >> i; + } + + return value + 1; + } + + static constexpr bool is_power_of_two(std::size_t value) { + return value != 0 && (value & (value - 1)) == 0; + } + + protected: + static_assert(is_power_of_two(GrowthFactor) && GrowthFactor >= 2, + "GrowthFactor must be a power of two >= 2."); + + std::size_t m_mask; +}; + +/** + * Grow the hash table by GrowthFactor::num / GrowthFactor::den and use a modulo + * to map a hash to a bucket. Slower but it can be useful if you want a slower + * growth. + */ +template > +class mod_growth_policy { + public: + explicit mod_growth_policy(std::size_t &min_bucket_count_in_out) { + if (min_bucket_count_in_out > max_bucket_count()) { + throw std::length_error("The hash table exceeds its maximum size."); + } + + if (min_bucket_count_in_out > 0) { + m_mod = min_bucket_count_in_out; + } else { + m_mod = 1; + } + } + + std::size_t bucket_for_hash(std::size_t hash) const noexcept { + return hash % m_mod; + } + + std::size_t next_bucket_count() const { + if (m_mod == max_bucket_count()) { + throw std::length_error("The hash table exceeds its maximum size."); + } + + const double next_bucket_count = + std::ceil(double(m_mod) * REHASH_SIZE_MULTIPLICATION_FACTOR); + if (!std::isnormal(next_bucket_count)) { + throw std::length_error("The hash table exceeds its maximum size."); + } + + if (next_bucket_count > double(max_bucket_count())) { + return max_bucket_count(); + } else { + return std::size_t(next_bucket_count); + } + } + + std::size_t max_bucket_count() const { return MAX_BUCKET_COUNT; } + + void clear() noexcept { m_mod = 1; } + + private: + static constexpr double REHASH_SIZE_MULTIPLICATION_FACTOR = + 1.0 * GrowthFactor::num / GrowthFactor::den; + static const std::size_t MAX_BUCKET_COUNT = + std::size_t(double(std::numeric_limits::max() / + REHASH_SIZE_MULTIPLICATION_FACTOR)); + + static_assert(REHASH_SIZE_MULTIPLICATION_FACTOR >= 1.1, + "Growth factor should be >= 1.1."); + + std::size_t m_mod; +}; + +/** + * Grow the hash table by using prime numbers as bucket count. Slower than + * tsl::sh::power_of_two_growth_policy in general but will probably distribute + * the values around better in the buckets with a poor hash function. + * + * To allow the compiler to optimize the modulo operation, a lookup table is + * used with constant primes numbers. + * + * With a switch the code would look like: + * \code + * switch(iprime) { // iprime is the current prime of the hash table + * case 0: hash % 5ul; + * break; + * case 1: hash % 17ul; + * break; + * case 2: hash % 29ul; + * break; + * ... + * } + * \endcode + * + * Due to the constant variable in the modulo the compiler is able to optimize + * the operation by a series of multiplications, substractions and shifts. + * + * The 'hash % 5' could become something like 'hash - (hash * 0xCCCCCCCD) >> 34) + * * 5' in a 64 bits environment. + */ +class prime_growth_policy { + public: + explicit prime_growth_policy(std::size_t &min_bucket_count_in_out) { + auto it_prime = std::lower_bound(primes().begin(), primes().end(), + min_bucket_count_in_out); + if (it_prime == primes().end()) { + throw std::length_error("The hash table exceeds its maximum size."); + } + + m_iprime = + static_cast(std::distance(primes().begin(), it_prime)); + if (min_bucket_count_in_out > 0) { + min_bucket_count_in_out = *it_prime; + } else { + min_bucket_count_in_out = 0; + } + } + + std::size_t bucket_for_hash(std::size_t hash) const noexcept { + return mod_prime()[m_iprime](hash); + } + + std::size_t next_bucket_count() const { + if (m_iprime + 1 >= primes().size()) { + throw std::length_error("The hash table exceeds its maximum size."); + } + + return primes()[m_iprime + 1]; + } + + std::size_t max_bucket_count() const { return primes().back(); } + + void clear() noexcept { m_iprime = 0; } + + private: + static const std::array &primes() { + static const std::array PRIMES = { + {1ul, 5ul, 17ul, 29ul, 37ul, + 53ul, 67ul, 79ul, 97ul, 131ul, + 193ul, 257ul, 389ul, 521ul, 769ul, + 1031ul, 1543ul, 2053ul, 3079ul, 6151ul, + 12289ul, 24593ul, 49157ul, 98317ul, 196613ul, + 393241ul, 786433ul, 1572869ul, 3145739ul, 6291469ul, + 12582917ul, 25165843ul, 50331653ul, 100663319ul, 201326611ul, + 402653189ul, 805306457ul, 1610612741ul, 3221225473ul, 4294967291ul}}; + + static_assert( + std::numeric_limits::max() >= PRIMES.size(), + "The type of m_iprime is not big enough."); + + return PRIMES; + } + + static const std::array &mod_prime() { + // MOD_PRIME[iprime](hash) returns hash % PRIMES[iprime]. This table allows + // for faster modulo as the compiler can optimize the modulo code better + // with a constant known at the compilation. + static const std::array MOD_PRIME = { + {&mod<0>, &mod<1>, &mod<2>, &mod<3>, &mod<4>, &mod<5>, &mod<6>, + &mod<7>, &mod<8>, &mod<9>, &mod<10>, &mod<11>, &mod<12>, &mod<13>, + &mod<14>, &mod<15>, &mod<16>, &mod<17>, &mod<18>, &mod<19>, &mod<20>, + &mod<21>, &mod<22>, &mod<23>, &mod<24>, &mod<25>, &mod<26>, &mod<27>, + &mod<28>, &mod<29>, &mod<30>, &mod<31>, &mod<32>, &mod<33>, &mod<34>, + &mod<35>, &mod<36>, &mod<37>, &mod<38>, &mod<39>}}; + + return MOD_PRIME; + } + + template + static std::size_t mod(std::size_t hash) { + return hash % primes()[IPrime]; + } + + private: + unsigned int m_iprime; +}; + +} // namespace sh +} // namespace tsl + +#endif diff --git a/packages/leann-backend-diskann/third_party/DiskANN/include/tsl/sparse_hash.h b/packages/leann-backend-diskann/third_party/DiskANN/include/tsl/sparse_hash.h new file mode 100644 index 0000000..e2115b4 --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/include/tsl/sparse_hash.h @@ -0,0 +1,2215 @@ +/** + * MIT License + * + * Copyright (c) 2017 Thibaut Goetghebuer-Planchon + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef TSL_SPARSE_HASH_H +#define TSL_SPARSE_HASH_H + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "sparse_growth_policy.h" + +#ifdef __INTEL_COMPILER +#include // For _popcnt32 and _popcnt64 +#endif + +#ifdef _MSC_VER +#include // For __cpuid, __popcnt and __popcnt64 +#endif + +#ifdef TSL_DEBUG +#define tsl_sh_assert(expr) assert(expr) +#else +#define tsl_sh_assert(expr) (static_cast(0)) +#endif + +namespace tsl { + +namespace sh { +enum class probing { linear, quadratic }; + +enum class exception_safety { basic, strong }; + +enum class sparsity { high, medium, low }; +} // namespace sh + +namespace detail_popcount { +/** + * Define the popcount(ll) methods and pick-up the best depending on the + * compiler. + */ + +// From Wikipedia: https://en.wikipedia.org/wiki/Hamming_weight +inline int fallback_popcountll(unsigned long long int x) { + static_assert( + sizeof(unsigned long long int) == sizeof(std::uint64_t), + "sizeof(unsigned long long int) must be equal to sizeof(std::uint64_t). " + "Open a feature request if you need support for a platform where it " + "isn't the case."); + + const std::uint64_t m1 = 0x5555555555555555ull; + const std::uint64_t m2 = 0x3333333333333333ull; + const std::uint64_t m4 = 0x0f0f0f0f0f0f0f0full; + const std::uint64_t h01 = 0x0101010101010101ull; + + x -= (x >> 1ull) & m1; + x = (x & m2) + ((x >> 2ull) & m2); + x = (x + (x >> 4ull)) & m4; + return static_cast((x * h01) >> (64ull - 8ull)); +} + +inline int fallback_popcount(unsigned int x) { + static_assert(sizeof(unsigned int) == sizeof(std::uint32_t) || + sizeof(unsigned int) == sizeof(std::uint64_t), + "sizeof(unsigned int) must be equal to sizeof(std::uint32_t) " + "or sizeof(std::uint64_t). " + "Open a feature request if you need support for a platform " + "where it isn't the case."); + + if (sizeof(unsigned int) == sizeof(std::uint32_t)) { + const std::uint32_t m1 = 0x55555555; + const std::uint32_t m2 = 0x33333333; + const std::uint32_t m4 = 0x0f0f0f0f; + const std::uint32_t h01 = 0x01010101; + + x -= (x >> 1) & m1; + x = (x & m2) + ((x >> 2) & m2); + x = (x + (x >> 4)) & m4; + return static_cast((x * h01) >> (32 - 8)); + } else { + return fallback_popcountll(x); + } +} + +#if defined(__clang__) || defined(__GNUC__) +inline int popcountll(unsigned long long int value) { + return __builtin_popcountll(value); +} + +inline int popcount(unsigned int value) { return __builtin_popcount(value); } + +#elif defined(_MSC_VER) +/** + * We need to check for popcount support at runtime on Windows with __cpuid + * See https://msdn.microsoft.com/en-us/library/bb385231.aspx + */ +inline bool has_popcount_support() { + int cpu_infos[4]; + __cpuid(cpu_infos, 1); + return (cpu_infos[2] & (1 << 23)) != 0; +} + +inline int popcountll(unsigned long long int value) { +#ifdef _WIN64 + static_assert( + sizeof(unsigned long long int) == sizeof(std::int64_t), + "sizeof(unsigned long long int) must be equal to sizeof(std::int64_t). "); + + static const bool has_popcount = has_popcount_support(); + return has_popcount + ? static_cast(__popcnt64(static_cast(value))) + : fallback_popcountll(value); +#else + return fallback_popcountll(value); +#endif +} + +inline int popcount(unsigned int value) { + static_assert(sizeof(unsigned int) == sizeof(std::int32_t), + "sizeof(unsigned int) must be equal to sizeof(std::int32_t). "); + + static const bool has_popcount = has_popcount_support(); + return has_popcount + ? static_cast(__popcnt(static_cast(value))) + : fallback_popcount(value); +} + +#elif defined(__INTEL_COMPILER) +inline int popcountll(unsigned long long int value) { + static_assert(sizeof(unsigned long long int) == sizeof(__int64), ""); + return _popcnt64(static_cast<__int64>(value)); +} + +inline int popcount(unsigned int value) { + return _popcnt32(static_cast(value)); +} + +#else +inline int popcountll(unsigned long long int x) { + return fallback_popcountll(x); +} + +inline int popcount(unsigned int x) { return fallback_popcount(x); } + +#endif +} // namespace detail_popcount + +namespace detail_sparse_hash { + +template +struct make_void { + using type = void; +}; + +template +struct has_is_transparent : std::false_type {}; + +template +struct has_is_transparent::type> + : std::true_type {}; + +template +struct is_power_of_two_policy : std::false_type {}; + +template +struct is_power_of_two_policy> + : std::true_type {}; + +inline constexpr bool is_power_of_two(std::size_t value) { + return value != 0 && (value & (value - 1)) == 0; +} + +inline std::size_t round_up_to_power_of_two(std::size_t value) { + if (is_power_of_two(value)) { + return value; + } + + if (value == 0) { + return 1; + } + + --value; + for (std::size_t i = 1; i < sizeof(std::size_t) * CHAR_BIT; i *= 2) { + value |= value >> i; + } + + return value + 1; +} + +template +static T numeric_cast(U value, + const char *error_message = "numeric_cast() failed.") { + T ret = static_cast(value); + if (static_cast(ret) != value) { + throw std::runtime_error(error_message); + } + + const bool is_same_signedness = + (std::is_unsigned::value && std::is_unsigned::value) || + (std::is_signed::value && std::is_signed::value); + if (!is_same_signedness && (ret < T{}) != (value < U{})) { + throw std::runtime_error(error_message); + } + + return ret; +} + +/** + * Fixed size type used to represent size_type values on serialization. Need to + * be big enough to represent a std::size_t on 32 and 64 bits platforms, and + * must be the same size on both platforms. + */ +using slz_size_type = std::uint64_t; +static_assert(std::numeric_limits::max() >= + std::numeric_limits::max(), + "slz_size_type must be >= std::size_t"); + +template +static T deserialize_value(Deserializer &deserializer) { + // MSVC < 2017 is not conformant, circumvent the problem by removing the + // template keyword +#if defined(_MSC_VER) && _MSC_VER < 1910 + return deserializer.Deserializer::operator()(); +#else + return deserializer.Deserializer::template operator()(); +#endif +} + +/** + * WARNING: the sparse_array class doesn't free the ressources allocated through + * the allocator passed in parameter in each method. You have to manually call + * `clear(Allocator&)` when you don't need a sparse_array object anymore. + * + * The reason is that the sparse_array doesn't store the allocator to avoid + * wasting space in each sparse_array when the allocator has a size > 0. It only + * allocates/deallocates objects with the allocator that is passed in parameter. + * + * + * + * Index denotes a value between [0, BITMAP_NB_BITS), it is an index similar to + * std::vector. Offset denotes the real position in `m_values` corresponding to + * an index. + * + * We are using raw pointers instead of std::vector to avoid loosing + * 2*sizeof(size_t) bytes to store the capacity and size of the vector in each + * sparse_array. We know we can only store up to BITMAP_NB_BITS elements in the + * array, we don't need such big types. + * + * + * T must be nothrow move constructible and/or copy constructible. + * Behaviour is undefined if the destructor of T throws an exception. + * + * See https://smerity.com/articles/2015/google_sparsehash.html for details on + * the idea behinds the implementation. + * + * TODO Check to use std::realloc and std::memmove when possible + */ +template +class sparse_array { + public: + using value_type = T; + using size_type = std::uint_least8_t; + using allocator_type = Allocator; + using iterator = value_type *; + using const_iterator = const value_type *; + + private: + static const size_type CAPACITY_GROWTH_STEP = + (Sparsity == tsl::sh::sparsity::high) ? 2 + : (Sparsity == tsl::sh::sparsity::medium) + ? 4 + : 8; // (Sparsity == tsl::sh::sparsity::low) + + /** + * Bitmap size configuration. + * Use 32 bits for the bitmap on 32-bits or less environnement as popcount on + * 64 bits numbers is slow on these environnement. Use 64 bits bitmap + * otherwise. + */ +#if SIZE_MAX <= UINT32_MAX + using bitmap_type = std::uint_least32_t; + static const std::size_t BITMAP_NB_BITS = 32; + static const std::size_t BUCKET_SHIFT = 5; +#else + using bitmap_type = std::uint_least64_t; + static const std::size_t BITMAP_NB_BITS = 64; + static const std::size_t BUCKET_SHIFT = 6; +#endif + + static const std::size_t BUCKET_MASK = BITMAP_NB_BITS - 1; + + static_assert(is_power_of_two(BITMAP_NB_BITS), + "BITMAP_NB_BITS must be a power of two."); + static_assert(std::numeric_limits::digits >= BITMAP_NB_BITS, + "bitmap_type must be able to hold at least BITMAP_NB_BITS."); + static_assert((std::size_t(1) << BUCKET_SHIFT) == BITMAP_NB_BITS, + "(1 << BUCKET_SHIFT) must be equal to BITMAP_NB_BITS."); + static_assert(std::numeric_limits::max() >= BITMAP_NB_BITS, + "size_type must be big enough to hold BITMAP_NB_BITS."); + static_assert(std::is_unsigned::value, + "bitmap_type must be unsigned."); + static_assert((std::numeric_limits::max() & BUCKET_MASK) == + BITMAP_NB_BITS - 1, + ""); + + public: + /** + * Map an ibucket [0, bucket_count) in the hash table to a sparse_ibucket + * (a sparse_array holds multiple buckets, so there is less sparse_array than + * bucket_count). + * + * The bucket ibucket is in + * m_sparse_buckets[sparse_ibucket(ibucket)][index_in_sparse_bucket(ibucket)] + * instead of something like m_buckets[ibucket] in a classical hash table. + */ + static std::size_t sparse_ibucket(std::size_t ibucket) { + return ibucket >> BUCKET_SHIFT; + } + + /** + * Map an ibucket [0, bucket_count) in the hash table to an index in the + * sparse_array which corresponds to the bucket. + * + * The bucket ibucket is in + * m_sparse_buckets[sparse_ibucket(ibucket)][index_in_sparse_bucket(ibucket)] + * instead of something like m_buckets[ibucket] in a classical hash table. + */ + static typename sparse_array::size_type index_in_sparse_bucket( + std::size_t ibucket) { + return static_cast( + ibucket & sparse_array::BUCKET_MASK); + } + + static std::size_t nb_sparse_buckets(std::size_t bucket_count) noexcept { + if (bucket_count == 0) { + return 0; + } + + return std::max( + 1, sparse_ibucket(tsl::detail_sparse_hash::round_up_to_power_of_two( + bucket_count))); + } + + public: + sparse_array() noexcept + : m_values(nullptr), + m_bitmap_vals(0), + m_bitmap_deleted_vals(0), + m_nb_elements(0), + m_capacity(0), + m_last_array(false) {} + + explicit sparse_array(bool last_bucket) noexcept + : m_values(nullptr), + m_bitmap_vals(0), + m_bitmap_deleted_vals(0), + m_nb_elements(0), + m_capacity(0), + m_last_array(last_bucket) {} + + sparse_array(size_type capacity, Allocator &alloc) + : m_values(nullptr), + m_bitmap_vals(0), + m_bitmap_deleted_vals(0), + m_nb_elements(0), + m_capacity(capacity), + m_last_array(false) { + if (m_capacity > 0) { + m_values = alloc.allocate(m_capacity); + tsl_sh_assert(m_values != + nullptr); // allocate should throw if there is a failure + } + } + + sparse_array(const sparse_array &other, Allocator &alloc) + : m_values(nullptr), + m_bitmap_vals(other.m_bitmap_vals), + m_bitmap_deleted_vals(other.m_bitmap_deleted_vals), + m_nb_elements(0), + m_capacity(other.m_capacity), + m_last_array(other.m_last_array) { + tsl_sh_assert(other.m_capacity >= other.m_nb_elements); + if (m_capacity == 0) { + return; + } + + m_values = alloc.allocate(m_capacity); + tsl_sh_assert(m_values != + nullptr); // allocate should throw if there is a failure + try { + for (size_type i = 0; i < other.m_nb_elements; i++) { + construct_value(alloc, m_values + i, other.m_values[i]); + m_nb_elements++; + } + } catch (...) { + clear(alloc); + throw; + } + } + + sparse_array(sparse_array &&other) noexcept + : m_values(other.m_values), + m_bitmap_vals(other.m_bitmap_vals), + m_bitmap_deleted_vals(other.m_bitmap_deleted_vals), + m_nb_elements(other.m_nb_elements), + m_capacity(other.m_capacity), + m_last_array(other.m_last_array) { + other.m_values = nullptr; + other.m_bitmap_vals = 0; + other.m_bitmap_deleted_vals = 0; + other.m_nb_elements = 0; + other.m_capacity = 0; + } + + sparse_array(sparse_array &&other, Allocator &alloc) + : m_values(nullptr), + m_bitmap_vals(other.m_bitmap_vals), + m_bitmap_deleted_vals(other.m_bitmap_deleted_vals), + m_nb_elements(0), + m_capacity(other.m_capacity), + m_last_array(other.m_last_array) { + tsl_sh_assert(other.m_capacity >= other.m_nb_elements); + if (m_capacity == 0) { + return; + } + + m_values = alloc.allocate(m_capacity); + tsl_sh_assert(m_values != + nullptr); // allocate should throw if there is a failure + try { + for (size_type i = 0; i < other.m_nb_elements; i++) { + construct_value(alloc, m_values + i, std::move(other.m_values[i])); + m_nb_elements++; + } + } catch (...) { + clear(alloc); + throw; + } + } + + sparse_array &operator=(const sparse_array &) = delete; + sparse_array &operator=(sparse_array &&) = delete; + + ~sparse_array() noexcept { + // The code that manages the sparse_array must have called clear before + // destruction. See documentation of sparse_array for more details. + tsl_sh_assert(m_capacity == 0 && m_nb_elements == 0 && m_values == nullptr); + } + + iterator begin() noexcept { return m_values; } + iterator end() noexcept { return m_values + m_nb_elements; } + const_iterator begin() const noexcept { return cbegin(); } + const_iterator end() const noexcept { return cend(); } + const_iterator cbegin() const noexcept { return m_values; } + const_iterator cend() const noexcept { return m_values + m_nb_elements; } + + bool empty() const noexcept { return m_nb_elements == 0; } + + size_type size() const noexcept { return m_nb_elements; } + + void clear(allocator_type &alloc) noexcept { + destroy_and_deallocate_values(alloc, m_values, m_nb_elements, m_capacity); + + m_values = nullptr; + m_bitmap_vals = 0; + m_bitmap_deleted_vals = 0; + m_nb_elements = 0; + m_capacity = 0; + } + + bool last() const noexcept { return m_last_array; } + + void set_as_last() noexcept { m_last_array = true; } + + bool has_value(size_type index) const noexcept { + tsl_sh_assert(index < BITMAP_NB_BITS); + return (m_bitmap_vals & (bitmap_type(1) << index)) != 0; + } + + bool has_deleted_value(size_type index) const noexcept { + tsl_sh_assert(index < BITMAP_NB_BITS); + return (m_bitmap_deleted_vals & (bitmap_type(1) << index)) != 0; + } + + iterator value(size_type index) noexcept { + tsl_sh_assert(has_value(index)); + return m_values + index_to_offset(index); + } + + const_iterator value(size_type index) const noexcept { + tsl_sh_assert(has_value(index)); + return m_values + index_to_offset(index); + } + + /** + * Return iterator to set value. + */ + template + iterator set(allocator_type &alloc, size_type index, Args &&...value_args) { + tsl_sh_assert(!has_value(index)); + + const size_type offset = index_to_offset(index); + insert_at_offset(alloc, offset, std::forward(value_args)...); + + m_bitmap_vals = (m_bitmap_vals | (bitmap_type(1) << index)); + m_bitmap_deleted_vals = + (m_bitmap_deleted_vals & ~(bitmap_type(1) << index)); + + m_nb_elements++; + + tsl_sh_assert(has_value(index)); + tsl_sh_assert(!has_deleted_value(index)); + + return m_values + offset; + } + + iterator erase(allocator_type &alloc, iterator position) { + const size_type offset = + static_cast(std::distance(begin(), position)); + return erase(alloc, position, offset_to_index(offset)); + } + + // Return the next value or end if no next value + iterator erase(allocator_type &alloc, iterator position, size_type index) { + tsl_sh_assert(has_value(index)); + tsl_sh_assert(!has_deleted_value(index)); + + const size_type offset = + static_cast(std::distance(begin(), position)); + erase_at_offset(alloc, offset); + + m_bitmap_vals = (m_bitmap_vals & ~(bitmap_type(1) << index)); + m_bitmap_deleted_vals = (m_bitmap_deleted_vals | (bitmap_type(1) << index)); + + m_nb_elements--; + + tsl_sh_assert(!has_value(index)); + tsl_sh_assert(has_deleted_value(index)); + + return m_values + offset; + } + + void swap(sparse_array &other) { + using std::swap; + + swap(m_values, other.m_values); + swap(m_bitmap_vals, other.m_bitmap_vals); + swap(m_bitmap_deleted_vals, other.m_bitmap_deleted_vals); + swap(m_nb_elements, other.m_nb_elements); + swap(m_capacity, other.m_capacity); + swap(m_last_array, other.m_last_array); + } + + static iterator mutable_iterator(const_iterator pos) { + return const_cast(pos); + } + + template + void serialize(Serializer &serializer) const { + const slz_size_type sparse_bucket_size = m_nb_elements; + serializer(sparse_bucket_size); + + const slz_size_type bitmap_vals = m_bitmap_vals; + serializer(bitmap_vals); + + const slz_size_type bitmap_deleted_vals = m_bitmap_deleted_vals; + serializer(bitmap_deleted_vals); + + for (const value_type &value : *this) { + serializer(value); + } + } + + template + static sparse_array deserialize_hash_compatible(Deserializer &deserializer, + Allocator &alloc) { + const slz_size_type sparse_bucket_size = + deserialize_value(deserializer); + const slz_size_type bitmap_vals = + deserialize_value(deserializer); + const slz_size_type bitmap_deleted_vals = + deserialize_value(deserializer); + + if (sparse_bucket_size > BITMAP_NB_BITS) { + throw std::runtime_error( + "Deserialized sparse_bucket_size is too big for the platform. " + "Maximum should be BITMAP_NB_BITS."); + } + + sparse_array sarray; + if (sparse_bucket_size == 0) { + return sarray; + } + + sarray.m_bitmap_vals = numeric_cast( + bitmap_vals, "Deserialized bitmap_vals is too big."); + sarray.m_bitmap_deleted_vals = numeric_cast( + bitmap_deleted_vals, "Deserialized bitmap_deleted_vals is too big."); + + sarray.m_capacity = numeric_cast( + sparse_bucket_size, "Deserialized sparse_bucket_size is too big."); + sarray.m_values = alloc.allocate(sarray.m_capacity); + + try { + for (size_type ivalue = 0; ivalue < sarray.m_capacity; ivalue++) { + construct_value(alloc, sarray.m_values + ivalue, + deserialize_value(deserializer)); + sarray.m_nb_elements++; + } + } catch (...) { + sarray.clear(alloc); + throw; + } + + return sarray; + } + + /** + * Deserialize the values of the bucket and insert them all in sparse_hash + * through sparse_hash.insert(...). + */ + template + static void deserialize_values_into_sparse_hash(Deserializer &deserializer, + SparseHash &sparse_hash) { + const slz_size_type sparse_bucket_size = + deserialize_value(deserializer); + + const slz_size_type bitmap_vals = + deserialize_value(deserializer); + static_cast(bitmap_vals); // Ignore, not needed + + const slz_size_type bitmap_deleted_vals = + deserialize_value(deserializer); + static_cast(bitmap_deleted_vals); // Ignore, not needed + + for (slz_size_type ivalue = 0; ivalue < sparse_bucket_size; ivalue++) { + sparse_hash.insert(deserialize_value(deserializer)); + } + } + + private: + template + static void construct_value(allocator_type &alloc, value_type *value, + Args &&...value_args) { + std::allocator_traits::construct( + alloc, value, std::forward(value_args)...); + } + + static void destroy_value(allocator_type &alloc, value_type *value) noexcept { + std::allocator_traits::destroy(alloc, value); + } + + static void destroy_and_deallocate_values( + allocator_type &alloc, value_type *values, size_type nb_values, + size_type capacity_values) noexcept { + for (size_type i = 0; i < nb_values; i++) { + destroy_value(alloc, values + i); + } + + alloc.deallocate(values, capacity_values); + } + + static size_type popcount(bitmap_type val) noexcept { + if (sizeof(bitmap_type) <= sizeof(unsigned int)) { + return static_cast( + tsl::detail_popcount::popcount(static_cast(val))); + } else { + return static_cast(tsl::detail_popcount::popcountll(val)); + } + } + + size_type index_to_offset(size_type index) const noexcept { + tsl_sh_assert(index < BITMAP_NB_BITS); + return popcount(m_bitmap_vals & + ((bitmap_type(1) << index) - bitmap_type(1))); + } + + // TODO optimize + size_type offset_to_index(size_type offset) const noexcept { + tsl_sh_assert(offset < m_nb_elements); + + bitmap_type bitmap_vals = m_bitmap_vals; + size_type index = 0; + size_type nb_ones = 0; + + while (bitmap_vals != 0) { + if ((bitmap_vals & 0x1) == 1) { + if (nb_ones == offset) { + break; + } + + nb_ones++; + } + + index++; + bitmap_vals = bitmap_vals >> 1; + } + + return index; + } + + size_type next_capacity() const noexcept { + return static_cast(m_capacity + CAPACITY_GROWTH_STEP); + } + + /** + * Insertion + * + * Two situations: + * - Either we are in a situation where + * std::is_nothrow_move_constructible::value is true. In this + * case, on insertion we just reallocate m_values when we reach its capacity + * (i.e. m_nb_elements == m_capacity), otherwise we just put the new value at + * its appropriate place. We can easily keep the strong exception guarantee as + * moving the values around is safe. + * - Otherwise we are in a situation where + * std::is_nothrow_move_constructible::value is false. In this + * case on EACH insertion we allocate a new area of m_nb_elements + 1 where we + * copy the values of m_values into it and put the new value there. On + * success, we set m_values to this new area. Even if slower, it's the only + * way to preserve to strong exception guarantee. + */ + template ::value>::type * = nullptr> + void insert_at_offset(allocator_type &alloc, size_type offset, + Args &&...value_args) { + if (m_nb_elements < m_capacity) { + insert_at_offset_no_realloc(alloc, offset, + std::forward(value_args)...); + } else { + insert_at_offset_realloc(alloc, offset, next_capacity(), + std::forward(value_args)...); + } + } + + template ::value>::type * = nullptr> + void insert_at_offset(allocator_type &alloc, size_type offset, + Args &&...value_args) { + insert_at_offset_realloc(alloc, offset, m_nb_elements + 1, + std::forward(value_args)...); + } + + template ::value>::type * = nullptr> + void insert_at_offset_no_realloc(allocator_type &alloc, size_type offset, + Args &&...value_args) { + tsl_sh_assert(offset <= m_nb_elements); + tsl_sh_assert(m_nb_elements < m_capacity); + + for (size_type i = m_nb_elements; i > offset; i--) { + construct_value(alloc, m_values + i, std::move(m_values[i - 1])); + destroy_value(alloc, m_values + i - 1); + } + + try { + construct_value(alloc, m_values + offset, + std::forward(value_args)...); + } catch (...) { + for (size_type i = offset; i < m_nb_elements; i++) { + construct_value(alloc, m_values + i, std::move(m_values[i + 1])); + destroy_value(alloc, m_values + i + 1); + } + throw; + } + } + + template ::value>::type * = nullptr> + void insert_at_offset_realloc(allocator_type &alloc, size_type offset, + size_type new_capacity, Args &&...value_args) { + tsl_sh_assert(new_capacity > m_nb_elements); + + value_type *new_values = alloc.allocate(new_capacity); + // Allocate should throw if there is a failure + tsl_sh_assert(new_values != nullptr); + + try { + construct_value(alloc, new_values + offset, + std::forward(value_args)...); + } catch (...) { + alloc.deallocate(new_values, new_capacity); + throw; + } + + // Should not throw from here + for (size_type i = 0; i < offset; i++) { + construct_value(alloc, new_values + i, std::move(m_values[i])); + } + + for (size_type i = offset; i < m_nb_elements; i++) { + construct_value(alloc, new_values + i + 1, std::move(m_values[i])); + } + + destroy_and_deallocate_values(alloc, m_values, m_nb_elements, m_capacity); + + m_values = new_values; + m_capacity = new_capacity; + } + + template ::value>::type * = nullptr> + void insert_at_offset_realloc(allocator_type &alloc, size_type offset, + size_type new_capacity, Args &&...value_args) { + tsl_sh_assert(new_capacity > m_nb_elements); + + value_type *new_values = alloc.allocate(new_capacity); + // Allocate should throw if there is a failure + tsl_sh_assert(new_values != nullptr); + + size_type nb_new_values = 0; + try { + for (size_type i = 0; i < offset; i++) { + construct_value(alloc, new_values + i, m_values[i]); + nb_new_values++; + } + + construct_value(alloc, new_values + offset, + std::forward(value_args)...); + nb_new_values++; + + for (size_type i = offset; i < m_nb_elements; i++) { + construct_value(alloc, new_values + i + 1, m_values[i]); + nb_new_values++; + } + } catch (...) { + destroy_and_deallocate_values(alloc, new_values, nb_new_values, + new_capacity); + throw; + } + + tsl_sh_assert(nb_new_values == m_nb_elements + 1); + + destroy_and_deallocate_values(alloc, m_values, m_nb_elements, m_capacity); + + m_values = new_values; + m_capacity = new_capacity; + } + + /** + * Erasure + * + * Two situations: + * - Either we are in a situation where + * std::is_nothrow_move_constructible::value is true. Simply + * destroy the value and left-shift move the value on the right of offset. + * - Otherwise we are in a situation where + * std::is_nothrow_move_constructible::value is false. Copy all + * the values except the one at offset into a new heap area. On success, we + * set m_values to this new area. Even if slower, it's the only way to + * preserve to strong exception guarantee. + */ + template ::value>::type * = nullptr> + void erase_at_offset(allocator_type &alloc, size_type offset) noexcept { + tsl_sh_assert(offset < m_nb_elements); + + destroy_value(alloc, m_values + offset); + + for (size_type i = offset + 1; i < m_nb_elements; i++) { + construct_value(alloc, m_values + i - 1, std::move(m_values[i])); + destroy_value(alloc, m_values + i); + } + } + + template ::value>::type * = nullptr> + void erase_at_offset(allocator_type &alloc, size_type offset) { + tsl_sh_assert(offset < m_nb_elements); + + // Erasing the last element, don't need to reallocate. We keep the capacity. + if (offset + 1 == m_nb_elements) { + destroy_value(alloc, m_values + offset); + return; + } + + tsl_sh_assert(m_nb_elements > 1); + const size_type new_capacity = m_nb_elements - 1; + + value_type *new_values = alloc.allocate(new_capacity); + // Allocate should throw if there is a failure + tsl_sh_assert(new_values != nullptr); + + size_type nb_new_values = 0; + try { + for (size_type i = 0; i < m_nb_elements; i++) { + if (i != offset) { + construct_value(alloc, new_values + nb_new_values, m_values[i]); + nb_new_values++; + } + } + } catch (...) { + destroy_and_deallocate_values(alloc, new_values, nb_new_values, + new_capacity); + throw; + } + + tsl_sh_assert(nb_new_values == m_nb_elements - 1); + + destroy_and_deallocate_values(alloc, m_values, m_nb_elements, m_capacity); + + m_values = new_values; + m_capacity = new_capacity; + } + + private: + value_type *m_values; + + bitmap_type m_bitmap_vals; + bitmap_type m_bitmap_deleted_vals; + + size_type m_nb_elements; + size_type m_capacity; + bool m_last_array; +}; + +/** + * Internal common class used by `sparse_map` and `sparse_set`. + * + * `ValueType` is what will be stored by `sparse_hash` (usually `std::pair` for map and `Key` for set). + * + * `KeySelect` should be a `FunctionObject` which takes a `ValueType` in + * parameter and returns a reference to the key. + * + * `ValueSelect` should be a `FunctionObject` which takes a `ValueType` in + * parameter and returns a reference to the value. `ValueSelect` should be void + * if there is no value (in a set for example). + * + * The strong exception guarantee only holds if `ExceptionSafety` is set to + * `tsl::sh::exception_safety::strong`. + * + * `ValueType` must be nothrow move constructible and/or copy constructible. + * Behaviour is undefined if the destructor of `ValueType` throws. + * + * + * The class holds its buckets in a 2-dimensional fashion. Instead of having a + * linear `std::vector` for [0, bucket_count) where each bucket stores + * one value, we have a `std::vector` (m_sparse_buckets_data) + * where each `sparse_array` stores multiple values (up to + * `sparse_array::BITMAP_NB_BITS`). To convert a one dimensional `ibucket` + * position to a position in `std::vector` and a position in + * `sparse_array`, use respectively the methods + * `sparse_array::sparse_ibucket(ibucket)` and + * `sparse_array::index_in_sparse_bucket(ibucket)`. + */ +template +class sparse_hash : private Allocator, + private Hash, + private KeyEqual, + private GrowthPolicy { + private: + template + using has_mapped_type = + typename std::integral_constant::value>; + + static_assert( + noexcept(std::declval().bucket_for_hash(std::size_t(0))), + "GrowthPolicy::bucket_for_hash must be noexcept."); + static_assert(noexcept(std::declval().clear()), + "GrowthPolicy::clear must be noexcept."); + + public: + template + class sparse_iterator; + + using key_type = typename KeySelect::key_type; + using value_type = ValueType; + using size_type = std::size_t; + using difference_type = std::ptrdiff_t; + using hasher = Hash; + using key_equal = KeyEqual; + using allocator_type = Allocator; + using reference = value_type &; + using const_reference = const value_type &; + using pointer = value_type *; + using const_pointer = const value_type *; + using iterator = sparse_iterator; + using const_iterator = sparse_iterator; + + private: + using sparse_array = + tsl::detail_sparse_hash::sparse_array; + + using sparse_buckets_allocator = typename std::allocator_traits< + allocator_type>::template rebind_alloc; + using sparse_buckets_container = + std::vector; + + public: + /** + * The `operator*()` and `operator->()` methods return a const reference and + * const pointer respectively to the stored value type (`Key` for a set, + * `std::pair` for a map). + * + * In case of a map, to get a mutable reference to the value `T` associated to + * a key (the `.second` in the stored pair), you have to call `value()`. + */ + template + class sparse_iterator { + friend class sparse_hash; + + private: + using sparse_bucket_iterator = typename std::conditional< + IsConst, typename sparse_buckets_container::const_iterator, + typename sparse_buckets_container::iterator>::type; + + using sparse_array_iterator = + typename std::conditional::type; + + /** + * sparse_array_it should be nullptr if sparse_bucket_it == + * m_sparse_buckets_data.end(). (TODO better way?) + */ + sparse_iterator(sparse_bucket_iterator sparse_bucket_it, + sparse_array_iterator sparse_array_it) + : m_sparse_buckets_it(sparse_bucket_it), + m_sparse_array_it(sparse_array_it) {} + + public: + using iterator_category = std::forward_iterator_tag; + using value_type = const typename sparse_hash::value_type; + using difference_type = std::ptrdiff_t; + using reference = value_type &; + using pointer = value_type *; + + sparse_iterator() noexcept {} + + // Copy constructor from iterator to const_iterator. + template ::type * = nullptr> + sparse_iterator(const sparse_iterator &other) noexcept + : m_sparse_buckets_it(other.m_sparse_buckets_it), + m_sparse_array_it(other.m_sparse_array_it) {} + + sparse_iterator(const sparse_iterator &other) = default; + sparse_iterator(sparse_iterator &&other) = default; + sparse_iterator &operator=(const sparse_iterator &other) = default; + sparse_iterator &operator=(sparse_iterator &&other) = default; + + const typename sparse_hash::key_type &key() const { + return KeySelect()(*m_sparse_array_it); + } + + template ::value && + IsConst>::type * = nullptr> + const typename U::value_type &value() const { + return U()(*m_sparse_array_it); + } + + template ::value && + !IsConst>::type * = nullptr> + typename U::value_type &value() { + return U()(*m_sparse_array_it); + } + + reference operator*() const { return *m_sparse_array_it; } + + pointer operator->() const { return std::addressof(*m_sparse_array_it); } + + sparse_iterator &operator++() { + tsl_sh_assert(m_sparse_array_it != nullptr); + ++m_sparse_array_it; + + if (m_sparse_array_it == m_sparse_buckets_it->end()) { + do { + if (m_sparse_buckets_it->last()) { + ++m_sparse_buckets_it; + m_sparse_array_it = nullptr; + return *this; + } + + ++m_sparse_buckets_it; + } while (m_sparse_buckets_it->empty()); + + m_sparse_array_it = m_sparse_buckets_it->begin(); + } + + return *this; + } + + sparse_iterator operator++(int) { + sparse_iterator tmp(*this); + ++*this; + + return tmp; + } + + friend bool operator==(const sparse_iterator &lhs, + const sparse_iterator &rhs) { + return lhs.m_sparse_buckets_it == rhs.m_sparse_buckets_it && + lhs.m_sparse_array_it == rhs.m_sparse_array_it; + } + + friend bool operator!=(const sparse_iterator &lhs, + const sparse_iterator &rhs) { + return !(lhs == rhs); + } + + private: + sparse_bucket_iterator m_sparse_buckets_it; + sparse_array_iterator m_sparse_array_it; + }; + + public: + sparse_hash(size_type bucket_count, const Hash &hash, const KeyEqual &equal, + const Allocator &alloc, float max_load_factor) + : Allocator(alloc), + Hash(hash), + KeyEqual(equal), + GrowthPolicy(bucket_count), + m_sparse_buckets_data(alloc), + m_sparse_buckets(static_empty_sparse_bucket_ptr()), + m_bucket_count(bucket_count), + m_nb_elements(0), + m_nb_deleted_buckets(0) { + if (m_bucket_count > max_bucket_count()) { + throw std::length_error("The map exceeds its maximum size."); + } + + if (m_bucket_count > 0) { + /* + * We can't use the `vector(size_type count, const Allocator& alloc)` + * constructor as it's only available in C++14 and we need to support + * C++11. We thus must resize after using the `vector(const Allocator& + * alloc)` constructor. + * + * We can't use `vector(size_type count, const T& value, const Allocator& + * alloc)` as it requires the value T to be copyable. + */ + m_sparse_buckets_data.resize( + sparse_array::nb_sparse_buckets(bucket_count)); + m_sparse_buckets = m_sparse_buckets_data.data(); + + tsl_sh_assert(!m_sparse_buckets_data.empty()); + m_sparse_buckets_data.back().set_as_last(); + } + + this->max_load_factor(max_load_factor); + + // Check in the constructor instead of outside of a function to avoid + // compilation issues when value_type is not complete. + static_assert(std::is_nothrow_move_constructible::value || + std::is_copy_constructible::value, + "Key, and T if present, must be nothrow move constructible " + "and/or copy constructible."); + } + + ~sparse_hash() { clear(); } + + sparse_hash(const sparse_hash &other) + : Allocator(std::allocator_traits< + Allocator>::select_on_container_copy_construction(other)), + Hash(other), + KeyEqual(other), + GrowthPolicy(other), + m_sparse_buckets_data( + std::allocator_traits< + Allocator>::select_on_container_copy_construction(other)), + m_bucket_count(other.m_bucket_count), + m_nb_elements(other.m_nb_elements), + m_nb_deleted_buckets(other.m_nb_deleted_buckets), + m_load_threshold_rehash(other.m_load_threshold_rehash), + m_load_threshold_clear_deleted(other.m_load_threshold_clear_deleted), + m_max_load_factor(other.m_max_load_factor) { + copy_buckets_from(other), + m_sparse_buckets = m_sparse_buckets_data.empty() + ? static_empty_sparse_bucket_ptr() + : m_sparse_buckets_data.data(); + } + + sparse_hash(sparse_hash &&other) noexcept( + std::is_nothrow_move_constructible::value + &&std::is_nothrow_move_constructible::value + &&std::is_nothrow_move_constructible::value + &&std::is_nothrow_move_constructible::value + &&std::is_nothrow_move_constructible< + sparse_buckets_container>::value) + : Allocator(std::move(other)), + Hash(std::move(other)), + KeyEqual(std::move(other)), + GrowthPolicy(std::move(other)), + m_sparse_buckets_data(std::move(other.m_sparse_buckets_data)), + m_sparse_buckets(m_sparse_buckets_data.empty() + ? static_empty_sparse_bucket_ptr() + : m_sparse_buckets_data.data()), + m_bucket_count(other.m_bucket_count), + m_nb_elements(other.m_nb_elements), + m_nb_deleted_buckets(other.m_nb_deleted_buckets), + m_load_threshold_rehash(other.m_load_threshold_rehash), + m_load_threshold_clear_deleted(other.m_load_threshold_clear_deleted), + m_max_load_factor(other.m_max_load_factor) { + other.GrowthPolicy::clear(); + other.m_sparse_buckets_data.clear(); + other.m_sparse_buckets = static_empty_sparse_bucket_ptr(); + other.m_bucket_count = 0; + other.m_nb_elements = 0; + other.m_nb_deleted_buckets = 0; + other.m_load_threshold_rehash = 0; + other.m_load_threshold_clear_deleted = 0; + } + + sparse_hash &operator=(const sparse_hash &other) { + if (this != &other) { + clear(); + + if (std::allocator_traits< + Allocator>::propagate_on_container_copy_assignment::value) { + Allocator::operator=(other); + } + + Hash::operator=(other); + KeyEqual::operator=(other); + GrowthPolicy::operator=(other); + + if (std::allocator_traits< + Allocator>::propagate_on_container_copy_assignment::value) { + m_sparse_buckets_data = + sparse_buckets_container(static_cast(other)); + } else { + if (m_sparse_buckets_data.size() != + other.m_sparse_buckets_data.size()) { + m_sparse_buckets_data = + sparse_buckets_container(static_cast(*this)); + } else { + m_sparse_buckets_data.clear(); + } + } + + copy_buckets_from(other); + m_sparse_buckets = m_sparse_buckets_data.empty() + ? static_empty_sparse_bucket_ptr() + : m_sparse_buckets_data.data(); + + m_bucket_count = other.m_bucket_count; + m_nb_elements = other.m_nb_elements; + m_nb_deleted_buckets = other.m_nb_deleted_buckets; + m_load_threshold_rehash = other.m_load_threshold_rehash; + m_load_threshold_clear_deleted = other.m_load_threshold_clear_deleted; + m_max_load_factor = other.m_max_load_factor; + } + + return *this; + } + + sparse_hash &operator=(sparse_hash &&other) { + clear(); + + if (std::allocator_traits< + Allocator>::propagate_on_container_move_assignment::value) { + static_cast(*this) = + std::move(static_cast(other)); + m_sparse_buckets_data = std::move(other.m_sparse_buckets_data); + } else if (static_cast(*this) != + static_cast(other)) { + move_buckets_from(std::move(other)); + } else { + static_cast(*this) = + std::move(static_cast(other)); + m_sparse_buckets_data = std::move(other.m_sparse_buckets_data); + } + + m_sparse_buckets = m_sparse_buckets_data.empty() + ? static_empty_sparse_bucket_ptr() + : m_sparse_buckets_data.data(); + + static_cast(*this) = std::move(static_cast(other)); + static_cast(*this) = std::move(static_cast(other)); + static_cast(*this) = + std::move(static_cast(other)); + m_bucket_count = other.m_bucket_count; + m_nb_elements = other.m_nb_elements; + m_nb_deleted_buckets = other.m_nb_deleted_buckets; + m_load_threshold_rehash = other.m_load_threshold_rehash; + m_load_threshold_clear_deleted = other.m_load_threshold_clear_deleted; + m_max_load_factor = other.m_max_load_factor; + + other.GrowthPolicy::clear(); + other.m_sparse_buckets_data.clear(); + other.m_sparse_buckets = static_empty_sparse_bucket_ptr(); + other.m_bucket_count = 0; + other.m_nb_elements = 0; + other.m_nb_deleted_buckets = 0; + other.m_load_threshold_rehash = 0; + other.m_load_threshold_clear_deleted = 0; + + return *this; + } + + allocator_type get_allocator() const { + return static_cast(*this); + } + + /* + * Iterators + */ + iterator begin() noexcept { + auto begin = m_sparse_buckets_data.begin(); + while (begin != m_sparse_buckets_data.end() && begin->empty()) { + ++begin; + } + + return iterator(begin, (begin != m_sparse_buckets_data.end()) + ? begin->begin() + : nullptr); + } + + const_iterator begin() const noexcept { return cbegin(); } + + const_iterator cbegin() const noexcept { + auto begin = m_sparse_buckets_data.cbegin(); + while (begin != m_sparse_buckets_data.cend() && begin->empty()) { + ++begin; + } + + return const_iterator(begin, (begin != m_sparse_buckets_data.cend()) + ? begin->cbegin() + : nullptr); + } + + iterator end() noexcept { + return iterator(m_sparse_buckets_data.end(), nullptr); + } + + const_iterator end() const noexcept { return cend(); } + + const_iterator cend() const noexcept { + return const_iterator(m_sparse_buckets_data.cend(), nullptr); + } + + /* + * Capacity + */ + bool empty() const noexcept { return m_nb_elements == 0; } + + size_type size() const noexcept { return m_nb_elements; } + + size_type max_size() const noexcept { + return std::min(std::allocator_traits::max_size(), + m_sparse_buckets_data.max_size()); + } + + /* + * Modifiers + */ + void clear() noexcept { + for (auto &bucket : m_sparse_buckets_data) { + bucket.clear(*this); + } + + m_nb_elements = 0; + m_nb_deleted_buckets = 0; + } + + template + std::pair insert(P &&value) { + return insert_impl(KeySelect()(value), std::forward

(value)); + } + + template + iterator insert_hint(const_iterator hint, P &&value) { + if (hint != cend() && + compare_keys(KeySelect()(*hint), KeySelect()(value))) { + return mutable_iterator(hint); + } + + return insert(std::forward

(value)).first; + } + + template + void insert(InputIt first, InputIt last) { + if (std::is_base_of< + std::forward_iterator_tag, + typename std::iterator_traits::iterator_category>::value) { + const auto nb_elements_insert = std::distance(first, last); + const size_type nb_free_buckets = m_load_threshold_rehash - size(); + tsl_sh_assert(m_load_threshold_rehash >= size()); + + if (nb_elements_insert > 0 && + nb_free_buckets < size_type(nb_elements_insert)) { + reserve(size() + size_type(nb_elements_insert)); + } + } + + for (; first != last; ++first) { + insert(*first); + } + } + + template + std::pair insert_or_assign(K &&key, M &&obj) { + auto it = try_emplace(std::forward(key), std::forward(obj)); + if (!it.second) { + it.first.value() = std::forward(obj); + } + + return it; + } + + template + iterator insert_or_assign(const_iterator hint, K &&key, M &&obj) { + if (hint != cend() && compare_keys(KeySelect()(*hint), key)) { + auto it = mutable_iterator(hint); + it.value() = std::forward(obj); + + return it; + } + + return insert_or_assign(std::forward(key), std::forward(obj)).first; + } + + template + std::pair emplace(Args &&...args) { + return insert(value_type(std::forward(args)...)); + } + + template + iterator emplace_hint(const_iterator hint, Args &&...args) { + return insert_hint(hint, value_type(std::forward(args)...)); + } + + template + std::pair try_emplace(K &&key, Args &&...args) { + return insert_impl(key, std::piecewise_construct, + std::forward_as_tuple(std::forward(key)), + std::forward_as_tuple(std::forward(args)...)); + } + + template + iterator try_emplace_hint(const_iterator hint, K &&key, Args &&...args) { + if (hint != cend() && compare_keys(KeySelect()(*hint), key)) { + return mutable_iterator(hint); + } + + return try_emplace(std::forward(key), std::forward(args)...).first; + } + + /** + * Here to avoid `template size_type erase(const K& key)` being used + * when we use an iterator instead of a const_iterator. + */ + iterator erase(iterator pos) { + tsl_sh_assert(pos != end() && m_nb_elements > 0); + auto it_sparse_array_next = + pos.m_sparse_buckets_it->erase(*this, pos.m_sparse_array_it); + m_nb_elements--; + m_nb_deleted_buckets++; + + if (it_sparse_array_next == pos.m_sparse_buckets_it->end()) { + auto it_sparse_buckets_next = pos.m_sparse_buckets_it; + do { + ++it_sparse_buckets_next; + } while (it_sparse_buckets_next != m_sparse_buckets_data.end() && + it_sparse_buckets_next->empty()); + + if (it_sparse_buckets_next == m_sparse_buckets_data.end()) { + return end(); + } else { + return iterator(it_sparse_buckets_next, + it_sparse_buckets_next->begin()); + } + } else { + return iterator(pos.m_sparse_buckets_it, it_sparse_array_next); + } + } + + iterator erase(const_iterator pos) { return erase(mutable_iterator(pos)); } + + iterator erase(const_iterator first, const_iterator last) { + if (first == last) { + return mutable_iterator(first); + } + + // TODO Optimize, could avoid the call to std::distance. + const size_type nb_elements_to_erase = + static_cast(std::distance(first, last)); + auto to_delete = mutable_iterator(first); + for (size_type i = 0; i < nb_elements_to_erase; i++) { + to_delete = erase(to_delete); + } + + return to_delete; + } + + template + size_type erase(const K &key) { + return erase(key, hash_key(key)); + } + + template + size_type erase(const K &key, std::size_t hash) { + return erase_impl(key, hash); + } + + void swap(sparse_hash &other) { + using std::swap; + + if (std::allocator_traits::propagate_on_container_swap::value) { + swap(static_cast(*this), static_cast(other)); + } else { + tsl_sh_assert(static_cast(*this) == + static_cast(other)); + } + + swap(static_cast(*this), static_cast(other)); + swap(static_cast(*this), static_cast(other)); + swap(static_cast(*this), + static_cast(other)); + swap(m_sparse_buckets_data, other.m_sparse_buckets_data); + swap(m_sparse_buckets, other.m_sparse_buckets); + swap(m_bucket_count, other.m_bucket_count); + swap(m_nb_elements, other.m_nb_elements); + swap(m_nb_deleted_buckets, other.m_nb_deleted_buckets); + swap(m_load_threshold_rehash, other.m_load_threshold_rehash); + swap(m_load_threshold_clear_deleted, other.m_load_threshold_clear_deleted); + swap(m_max_load_factor, other.m_max_load_factor); + } + + /* + * Lookup + */ + template < + class K, class U = ValueSelect, + typename std::enable_if::value>::type * = nullptr> + typename U::value_type &at(const K &key) { + return at(key, hash_key(key)); + } + + template < + class K, class U = ValueSelect, + typename std::enable_if::value>::type * = nullptr> + typename U::value_type &at(const K &key, std::size_t hash) { + return const_cast( + static_cast(this)->at(key, hash)); + } + + template < + class K, class U = ValueSelect, + typename std::enable_if::value>::type * = nullptr> + const typename U::value_type &at(const K &key) const { + return at(key, hash_key(key)); + } + + template < + class K, class U = ValueSelect, + typename std::enable_if::value>::type * = nullptr> + const typename U::value_type &at(const K &key, std::size_t hash) const { + auto it = find(key, hash); + if (it != cend()) { + return it.value(); + } else { + throw std::out_of_range("Couldn't find key."); + } + } + + template < + class K, class U = ValueSelect, + typename std::enable_if::value>::type * = nullptr> + typename U::value_type &operator[](K &&key) { + return try_emplace(std::forward(key)).first.value(); + } + + template + bool contains(const K &key) const { + return contains(key, hash_key(key)); + } + + template + bool contains(const K &key, std::size_t hash) const { + return count(key, hash) != 0; + } + + template + size_type count(const K &key) const { + return count(key, hash_key(key)); + } + + template + size_type count(const K &key, std::size_t hash) const { + if (find(key, hash) != cend()) { + return 1; + } else { + return 0; + } + } + + template + iterator find(const K &key) { + return find_impl(key, hash_key(key)); + } + + template + iterator find(const K &key, std::size_t hash) { + return find_impl(key, hash); + } + + template + const_iterator find(const K &key) const { + return find_impl(key, hash_key(key)); + } + + template + const_iterator find(const K &key, std::size_t hash) const { + return find_impl(key, hash); + } + + template + std::pair equal_range(const K &key) { + return equal_range(key, hash_key(key)); + } + + template + std::pair equal_range(const K &key, std::size_t hash) { + iterator it = find(key, hash); + return std::make_pair(it, (it == end()) ? it : std::next(it)); + } + + template + std::pair equal_range(const K &key) const { + return equal_range(key, hash_key(key)); + } + + template + std::pair equal_range( + const K &key, std::size_t hash) const { + const_iterator it = find(key, hash); + return std::make_pair(it, (it == cend()) ? it : std::next(it)); + } + + /* + * Bucket interface + */ + size_type bucket_count() const { return m_bucket_count; } + + size_type max_bucket_count() const { + return m_sparse_buckets_data.max_size(); + } + + /* + * Hash policy + */ + float load_factor() const { + if (bucket_count() == 0) { + return 0; + } + + return float(m_nb_elements) / float(bucket_count()); + } + + float max_load_factor() const { return m_max_load_factor; } + + void max_load_factor(float ml) { + m_max_load_factor = std::max(0.1f, std::min(ml, 0.8f)); + m_load_threshold_rehash = + size_type(float(bucket_count()) * m_max_load_factor); + + const float max_load_factor_with_deleted_buckets = + m_max_load_factor + 0.5f * (1.0f - m_max_load_factor); + tsl_sh_assert(max_load_factor_with_deleted_buckets > 0.0f && + max_load_factor_with_deleted_buckets <= 1.0f); + m_load_threshold_clear_deleted = + size_type(float(bucket_count()) * max_load_factor_with_deleted_buckets); + } + + void rehash(size_type count) { + count = std::max(count, + size_type(std::ceil(float(size()) / max_load_factor()))); + rehash_impl(count); + } + + void reserve(size_type count) { + rehash(size_type(std::ceil(float(count) / max_load_factor()))); + } + + /* + * Observers + */ + hasher hash_function() const { return static_cast(*this); } + + key_equal key_eq() const { return static_cast(*this); } + + /* + * Other + */ + iterator mutable_iterator(const_iterator pos) { + auto it_sparse_buckets = + m_sparse_buckets_data.begin() + + std::distance(m_sparse_buckets_data.cbegin(), pos.m_sparse_buckets_it); + + return iterator(it_sparse_buckets, + sparse_array::mutable_iterator(pos.m_sparse_array_it)); + } + + template + void serialize(Serializer &serializer) const { + serialize_impl(serializer); + } + + template + void deserialize(Deserializer &deserializer, bool hash_compatible) { + deserialize_impl(deserializer, hash_compatible); + } + + private: + template + std::size_t hash_key(const K &key) const { + return Hash::operator()(key); + } + + template + bool compare_keys(const K1 &key1, const K2 &key2) const { + return KeyEqual::operator()(key1, key2); + } + + size_type bucket_for_hash(std::size_t hash) const { + const std::size_t bucket = GrowthPolicy::bucket_for_hash(hash); + tsl_sh_assert(sparse_array::sparse_ibucket(bucket) < + m_sparse_buckets_data.size() || + (bucket == 0 && m_sparse_buckets_data.empty())); + + return bucket; + } + + template ::value>::type * = + nullptr> + size_type next_bucket(size_type ibucket, size_type iprobe) const { + (void)iprobe; + if (Probing == tsl::sh::probing::linear) { + return (ibucket + 1) & this->m_mask; + } else { + tsl_sh_assert(Probing == tsl::sh::probing::quadratic); + return (ibucket + iprobe) & this->m_mask; + } + } + + template ::value>::type * = + nullptr> + size_type next_bucket(size_type ibucket, size_type iprobe) const { + (void)iprobe; + if (Probing == tsl::sh::probing::linear) { + ibucket++; + return (ibucket != bucket_count()) ? ibucket : 0; + } else { + tsl_sh_assert(Probing == tsl::sh::probing::quadratic); + ibucket += iprobe; + return (ibucket < bucket_count()) ? ibucket : ibucket % bucket_count(); + } + } + + // TODO encapsulate m_sparse_buckets_data to avoid the managing the allocator + void copy_buckets_from(const sparse_hash &other) { + m_sparse_buckets_data.reserve(other.m_sparse_buckets_data.size()); + + try { + for (const auto &bucket : other.m_sparse_buckets_data) { + m_sparse_buckets_data.emplace_back(bucket, + static_cast(*this)); + } + } catch (...) { + clear(); + throw; + } + + tsl_sh_assert(m_sparse_buckets_data.empty() || + m_sparse_buckets_data.back().last()); + } + + void move_buckets_from(sparse_hash &&other) { + m_sparse_buckets_data.reserve(other.m_sparse_buckets_data.size()); + + try { + for (auto &&bucket : other.m_sparse_buckets_data) { + m_sparse_buckets_data.emplace_back(std::move(bucket), + static_cast(*this)); + } + } catch (...) { + clear(); + throw; + } + + tsl_sh_assert(m_sparse_buckets_data.empty() || + m_sparse_buckets_data.back().last()); + } + + template + std::pair insert_impl(const K &key, + Args &&...value_type_args) { + if (size() >= m_load_threshold_rehash) { + rehash_impl(GrowthPolicy::next_bucket_count()); + } else if (size() + m_nb_deleted_buckets >= + m_load_threshold_clear_deleted) { + clear_deleted_buckets(); + } + tsl_sh_assert(!m_sparse_buckets_data.empty()); + + /** + * We must insert the value in the first empty or deleted bucket we find. If + * we first find a deleted bucket, we still have to continue the search + * until we find an empty bucket or until we have searched all the buckets + * to be sure that the value is not in the hash table. We thus remember the + * position, if any, of the first deleted bucket we have encountered so we + * can insert it there if needed. + */ + bool found_first_deleted_bucket = false; + std::size_t sparse_ibucket_first_deleted = 0; + typename sparse_array::size_type index_in_sparse_bucket_first_deleted = 0; + + const std::size_t hash = hash_key(key); + std::size_t ibucket = bucket_for_hash(hash); + + std::size_t probe = 0; + while (true) { + std::size_t sparse_ibucket = sparse_array::sparse_ibucket(ibucket); + auto index_in_sparse_bucket = + sparse_array::index_in_sparse_bucket(ibucket); + + if (m_sparse_buckets[sparse_ibucket].has_value(index_in_sparse_bucket)) { + auto value_it = + m_sparse_buckets[sparse_ibucket].value(index_in_sparse_bucket); + if (compare_keys(key, KeySelect()(*value_it))) { + return std::make_pair( + iterator(m_sparse_buckets_data.begin() + sparse_ibucket, + value_it), + false); + } + } else if (m_sparse_buckets[sparse_ibucket].has_deleted_value( + index_in_sparse_bucket) && + probe < m_bucket_count) { + if (!found_first_deleted_bucket) { + found_first_deleted_bucket = true; + sparse_ibucket_first_deleted = sparse_ibucket; + index_in_sparse_bucket_first_deleted = index_in_sparse_bucket; + } + } else if (found_first_deleted_bucket) { + auto it = insert_in_bucket(sparse_ibucket_first_deleted, + index_in_sparse_bucket_first_deleted, + std::forward(value_type_args)...); + m_nb_deleted_buckets--; + + return it; + } else { + return insert_in_bucket(sparse_ibucket, index_in_sparse_bucket, + std::forward(value_type_args)...); + } + + probe++; + ibucket = next_bucket(ibucket, probe); + } + } + + template + std::pair insert_in_bucket( + std::size_t sparse_ibucket, + typename sparse_array::size_type index_in_sparse_bucket, + Args &&...value_type_args) { + auto value_it = m_sparse_buckets[sparse_ibucket].set( + *this, index_in_sparse_bucket, std::forward(value_type_args)...); + m_nb_elements++; + + return std::make_pair( + iterator(m_sparse_buckets_data.begin() + sparse_ibucket, value_it), + true); + } + + template + size_type erase_impl(const K &key, std::size_t hash) { + std::size_t ibucket = bucket_for_hash(hash); + + std::size_t probe = 0; + while (true) { + const std::size_t sparse_ibucket = sparse_array::sparse_ibucket(ibucket); + const auto index_in_sparse_bucket = + sparse_array::index_in_sparse_bucket(ibucket); + + if (m_sparse_buckets[sparse_ibucket].has_value(index_in_sparse_bucket)) { + auto value_it = + m_sparse_buckets[sparse_ibucket].value(index_in_sparse_bucket); + if (compare_keys(key, KeySelect()(*value_it))) { + m_sparse_buckets[sparse_ibucket].erase(*this, value_it, + index_in_sparse_bucket); + m_nb_elements--; + m_nb_deleted_buckets++; + + return 1; + } + } else if (!m_sparse_buckets[sparse_ibucket].has_deleted_value( + index_in_sparse_bucket) || + probe >= m_bucket_count) { + return 0; + } + + probe++; + ibucket = next_bucket(ibucket, probe); + } + } + + template + iterator find_impl(const K &key, std::size_t hash) { + return mutable_iterator( + static_cast(this)->find(key, hash)); + } + + template + const_iterator find_impl(const K &key, std::size_t hash) const { + std::size_t ibucket = bucket_for_hash(hash); + + std::size_t probe = 0; + while (true) { + const std::size_t sparse_ibucket = sparse_array::sparse_ibucket(ibucket); + const auto index_in_sparse_bucket = + sparse_array::index_in_sparse_bucket(ibucket); + + if (m_sparse_buckets[sparse_ibucket].has_value(index_in_sparse_bucket)) { + auto value_it = + m_sparse_buckets[sparse_ibucket].value(index_in_sparse_bucket); + if (compare_keys(key, KeySelect()(*value_it))) { + return const_iterator(m_sparse_buckets_data.cbegin() + sparse_ibucket, + value_it); + } + } else if (!m_sparse_buckets[sparse_ibucket].has_deleted_value( + index_in_sparse_bucket) || + probe >= m_bucket_count) { + return cend(); + } + + probe++; + ibucket = next_bucket(ibucket, probe); + } + } + + void clear_deleted_buckets() { + // TODO could be optimized, we could do it in-place instead of allocating a + // new bucket array. + rehash_impl(m_bucket_count); + tsl_sh_assert(m_nb_deleted_buckets == 0); + } + + template ::type + * = nullptr> + void rehash_impl(size_type count) { + sparse_hash new_table(count, static_cast(*this), + static_cast(*this), + static_cast(*this), m_max_load_factor); + + for (auto &bucket : m_sparse_buckets_data) { + for (auto &val : bucket) { + new_table.insert_on_rehash(std::move(val)); + } + + // TODO try to reuse some of the memory + bucket.clear(*this); + } + + new_table.swap(*this); + } + + /** + * TODO: For now we copy each element into the new map. We could move + * them if they are nothrow_move_constructible without triggering + * any exception if we reserve enough space in the sparse arrays beforehand. + */ + template ::type * = nullptr> + void rehash_impl(size_type count) { + sparse_hash new_table(count, static_cast(*this), + static_cast(*this), + static_cast(*this), m_max_load_factor); + + for (const auto &bucket : m_sparse_buckets_data) { + for (const auto &val : bucket) { + new_table.insert_on_rehash(val); + } + } + + new_table.swap(*this); + } + + template + void insert_on_rehash(K &&key_value) { + const key_type &key = KeySelect()(key_value); + + const std::size_t hash = hash_key(key); + std::size_t ibucket = bucket_for_hash(hash); + + std::size_t probe = 0; + while (true) { + std::size_t sparse_ibucket = sparse_array::sparse_ibucket(ibucket); + auto index_in_sparse_bucket = + sparse_array::index_in_sparse_bucket(ibucket); + + if (!m_sparse_buckets[sparse_ibucket].has_value(index_in_sparse_bucket)) { + m_sparse_buckets[sparse_ibucket].set(*this, index_in_sparse_bucket, + std::forward(key_value)); + m_nb_elements++; + + return; + } else { + tsl_sh_assert(!compare_keys( + key, KeySelect()(*m_sparse_buckets[sparse_ibucket].value( + index_in_sparse_bucket)))); + } + + probe++; + ibucket = next_bucket(ibucket, probe); + } + } + + template + void serialize_impl(Serializer &serializer) const { + const slz_size_type version = SERIALIZATION_PROTOCOL_VERSION; + serializer(version); + + const slz_size_type bucket_count = m_bucket_count; + serializer(bucket_count); + + const slz_size_type nb_sparse_buckets = m_sparse_buckets_data.size(); + serializer(nb_sparse_buckets); + + const slz_size_type nb_elements = m_nb_elements; + serializer(nb_elements); + + const slz_size_type nb_deleted_buckets = m_nb_deleted_buckets; + serializer(nb_deleted_buckets); + + const float max_load_factor = m_max_load_factor; + serializer(max_load_factor); + + for (const auto &bucket : m_sparse_buckets_data) { + bucket.serialize(serializer); + } + } + + template + void deserialize_impl(Deserializer &deserializer, bool hash_compatible) { + tsl_sh_assert( + m_bucket_count == 0 && + m_sparse_buckets_data.empty()); // Current hash table must be empty + + const slz_size_type version = + deserialize_value(deserializer); + // For now we only have one version of the serialization protocol. + // If it doesn't match there is a problem with the file. + if (version != SERIALIZATION_PROTOCOL_VERSION) { + throw std::runtime_error( + "Can't deserialize the sparse_map/set. The " + "protocol version header is invalid."); + } + + const slz_size_type bucket_count_ds = + deserialize_value(deserializer); + const slz_size_type nb_sparse_buckets = + deserialize_value(deserializer); + const slz_size_type nb_elements = + deserialize_value(deserializer); + const slz_size_type nb_deleted_buckets = + deserialize_value(deserializer); + const float max_load_factor = deserialize_value(deserializer); + + if (!hash_compatible) { + this->max_load_factor(max_load_factor); + reserve(numeric_cast(nb_elements, + "Deserialized nb_elements is too big.")); + for (slz_size_type ibucket = 0; ibucket < nb_sparse_buckets; ibucket++) { + sparse_array::deserialize_values_into_sparse_hash(deserializer, *this); + } + } else { + m_bucket_count = numeric_cast( + bucket_count_ds, "Deserialized bucket_count is too big."); + + GrowthPolicy::operator=(GrowthPolicy(m_bucket_count)); + // GrowthPolicy should not modify the bucket count we got from + // deserialization + if (m_bucket_count != bucket_count_ds) { + throw std::runtime_error( + "The GrowthPolicy is not the same even though " + "hash_compatible is true."); + } + + if (nb_sparse_buckets != + sparse_array::nb_sparse_buckets(m_bucket_count)) { + throw std::runtime_error("Deserialized nb_sparse_buckets is invalid."); + } + + m_nb_elements = numeric_cast( + nb_elements, "Deserialized nb_elements is too big."); + m_nb_deleted_buckets = numeric_cast( + nb_deleted_buckets, "Deserialized nb_deleted_buckets is too big."); + + m_sparse_buckets_data.reserve(numeric_cast( + nb_sparse_buckets, "Deserialized nb_sparse_buckets is too big.")); + for (slz_size_type ibucket = 0; ibucket < nb_sparse_buckets; ibucket++) { + m_sparse_buckets_data.emplace_back( + sparse_array::deserialize_hash_compatible( + deserializer, static_cast(*this))); + } + + if (!m_sparse_buckets_data.empty()) { + m_sparse_buckets_data.back().set_as_last(); + m_sparse_buckets = m_sparse_buckets_data.data(); + } + + this->max_load_factor(max_load_factor); + if (load_factor() > this->max_load_factor()) { + throw std::runtime_error( + "Invalid max_load_factor. Check that the serializer and " + "deserializer support " + "floats correctly as they can be converted implicitely to ints."); + } + } + } + + public: + static const size_type DEFAULT_INIT_BUCKET_COUNT = 0; + static constexpr float DEFAULT_MAX_LOAD_FACTOR = 0.5f; + + /** + * Protocol version currenlty used for serialization. + */ + static const slz_size_type SERIALIZATION_PROTOCOL_VERSION = 1; + + /** + * Return an always valid pointer to an static empty bucket_entry with + * last_bucket() == true. + */ + sparse_array *static_empty_sparse_bucket_ptr() { + static sparse_array empty_sparse_bucket(true); + return &empty_sparse_bucket; + } + + private: + sparse_buckets_container m_sparse_buckets_data; + + /** + * Points to m_sparse_buckets_data.data() if !m_sparse_buckets_data.empty() + * otherwise points to static_empty_sparse_bucket_ptr. This variable is useful + * to avoid the cost of checking if m_sparse_buckets_data is empty when trying + * to find an element. + * + * TODO Remove m_sparse_buckets_data and only use a pointer instead of a + * pointer+vector to save some space in the sparse_hash object. + */ + sparse_array *m_sparse_buckets; + + size_type m_bucket_count; + size_type m_nb_elements; + size_type m_nb_deleted_buckets; + + /** + * Maximum that m_nb_elements can reach before a rehash occurs automatically + * to grow the hash table. + */ + size_type m_load_threshold_rehash; + + /** + * Maximum that m_nb_elements + m_nb_deleted_buckets can reach before cleaning + * up the buckets marked as deleted. + */ + size_type m_load_threshold_clear_deleted; + float m_max_load_factor; +}; + +} // namespace detail_sparse_hash +} // namespace tsl + +#endif diff --git a/packages/leann-backend-diskann/third_party/DiskANN/include/tsl/sparse_map.h b/packages/leann-backend-diskann/third_party/DiskANN/include/tsl/sparse_map.h new file mode 100644 index 0000000..601742d --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/include/tsl/sparse_map.h @@ -0,0 +1,800 @@ +/** + * MIT License + * + * Copyright (c) 2017 Thibaut Goetghebuer-Planchon + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef TSL_SPARSE_MAP_H +#define TSL_SPARSE_MAP_H + +#include +#include +#include +#include +#include +#include + +#include "sparse_hash.h" + +namespace tsl { + +/** + * Implementation of a sparse hash map using open-addressing with quadratic + * probing. The goal on the hash map is to be the most memory efficient + * possible, even at low load factor, while keeping reasonable performances. + * + * `GrowthPolicy` defines how the map grows and consequently how a hash value is + * mapped to a bucket. By default the map uses + * `tsl::sh::power_of_two_growth_policy`. This policy keeps the number of + * buckets to a power of two and uses a mask to map the hash to a bucket instead + * of the slow modulo. Other growth policies are available and you may define + * your own growth policy, check `tsl::sh::power_of_two_growth_policy` for the + * interface. + * + * `ExceptionSafety` defines the exception guarantee provided by the class. By + * default only the basic exception safety is guaranteed which mean that all + * resources used by the hash map will be freed (no memory leaks) but the hash + * map may end-up in an undefined state if an exception is thrown (undefined + * here means that some elements may be missing). This can ONLY happen on rehash + * (either on insert or if `rehash` is called explicitly) and will occur if the + * Allocator can't allocate memory (`std::bad_alloc`) or if the copy constructor + * (when a nothrow move constructor is not available) throws an exception. This + * can be avoided by calling `reserve` beforehand. This basic guarantee is + * similar to the one of `google::sparse_hash_map` and `spp::sparse_hash_map`. + * It is possible to ask for the strong exception guarantee with + * `tsl::sh::exception_safety::strong`, the drawback is that the map will be + * slower on rehashes and will also need more memory on rehashes. + * + * `Sparsity` defines how much the hash set will compromise between insertion + * speed and memory usage. A high sparsity means less memory usage but longer + * insertion times, and vice-versa for low sparsity. The default + * `tsl::sh::sparsity::medium` sparsity offers a good compromise. It doesn't + * change the lookup speed. + * + * `Key` and `T` must be nothrow move constructible and/or copy constructible. + * + * If the destructor of `Key` or `T` throws an exception, the behaviour of the + * class is undefined. + * + * Iterators invalidation: + * - clear, operator=, reserve, rehash: always invalidate the iterators. + * - insert, emplace, emplace_hint, operator[]: if there is an effective + * insert, invalidate the iterators. + * - erase: always invalidate the iterators. + */ +template , + class KeyEqual = std::equal_to, + class Allocator = std::allocator>, + class GrowthPolicy = tsl::sh::power_of_two_growth_policy<2>, + tsl::sh::exception_safety ExceptionSafety = + tsl::sh::exception_safety::basic, + tsl::sh::sparsity Sparsity = tsl::sh::sparsity::medium> +class sparse_map { + private: + template + using has_is_transparent = tsl::detail_sparse_hash::has_is_transparent; + + class KeySelect { + public: + using key_type = Key; + + const key_type &operator()( + const std::pair &key_value) const noexcept { + return key_value.first; + } + + key_type &operator()(std::pair &key_value) noexcept { + return key_value.first; + } + }; + + class ValueSelect { + public: + using value_type = T; + + const value_type &operator()( + const std::pair &key_value) const noexcept { + return key_value.second; + } + + value_type &operator()(std::pair &key_value) noexcept { + return key_value.second; + } + }; + + using ht = detail_sparse_hash::sparse_hash< + std::pair, KeySelect, ValueSelect, Hash, KeyEqual, Allocator, + GrowthPolicy, ExceptionSafety, Sparsity, tsl::sh::probing::quadratic>; + + public: + using key_type = typename ht::key_type; + using mapped_type = T; + using value_type = typename ht::value_type; + using size_type = typename ht::size_type; + using difference_type = typename ht::difference_type; + using hasher = typename ht::hasher; + using key_equal = typename ht::key_equal; + using allocator_type = typename ht::allocator_type; + using reference = typename ht::reference; + using const_reference = typename ht::const_reference; + using pointer = typename ht::pointer; + using const_pointer = typename ht::const_pointer; + using iterator = typename ht::iterator; + using const_iterator = typename ht::const_iterator; + + public: + /* + * Constructors + */ + sparse_map() : sparse_map(ht::DEFAULT_INIT_BUCKET_COUNT) {} + + explicit sparse_map(size_type bucket_count, const Hash &hash = Hash(), + const KeyEqual &equal = KeyEqual(), + const Allocator &alloc = Allocator()) + : m_ht(bucket_count, hash, equal, alloc, ht::DEFAULT_MAX_LOAD_FACTOR) {} + + sparse_map(size_type bucket_count, const Allocator &alloc) + : sparse_map(bucket_count, Hash(), KeyEqual(), alloc) {} + + sparse_map(size_type bucket_count, const Hash &hash, const Allocator &alloc) + : sparse_map(bucket_count, hash, KeyEqual(), alloc) {} + + explicit sparse_map(const Allocator &alloc) + : sparse_map(ht::DEFAULT_INIT_BUCKET_COUNT, alloc) {} + + template + sparse_map(InputIt first, InputIt last, + size_type bucket_count = ht::DEFAULT_INIT_BUCKET_COUNT, + const Hash &hash = Hash(), const KeyEqual &equal = KeyEqual(), + const Allocator &alloc = Allocator()) + : sparse_map(bucket_count, hash, equal, alloc) { + insert(first, last); + } + + template + sparse_map(InputIt first, InputIt last, size_type bucket_count, + const Allocator &alloc) + : sparse_map(first, last, bucket_count, Hash(), KeyEqual(), alloc) {} + + template + sparse_map(InputIt first, InputIt last, size_type bucket_count, + const Hash &hash, const Allocator &alloc) + : sparse_map(first, last, bucket_count, hash, KeyEqual(), alloc) {} + + sparse_map(std::initializer_list init, + size_type bucket_count = ht::DEFAULT_INIT_BUCKET_COUNT, + const Hash &hash = Hash(), const KeyEqual &equal = KeyEqual(), + const Allocator &alloc = Allocator()) + : sparse_map(init.begin(), init.end(), bucket_count, hash, equal, alloc) { + } + + sparse_map(std::initializer_list init, size_type bucket_count, + const Allocator &alloc) + : sparse_map(init.begin(), init.end(), bucket_count, Hash(), KeyEqual(), + alloc) {} + + sparse_map(std::initializer_list init, size_type bucket_count, + const Hash &hash, const Allocator &alloc) + : sparse_map(init.begin(), init.end(), bucket_count, hash, KeyEqual(), + alloc) {} + + sparse_map &operator=(std::initializer_list ilist) { + m_ht.clear(); + + m_ht.reserve(ilist.size()); + m_ht.insert(ilist.begin(), ilist.end()); + + return *this; + } + + allocator_type get_allocator() const { return m_ht.get_allocator(); } + + /* + * Iterators + */ + iterator begin() noexcept { return m_ht.begin(); } + const_iterator begin() const noexcept { return m_ht.begin(); } + const_iterator cbegin() const noexcept { return m_ht.cbegin(); } + + iterator end() noexcept { return m_ht.end(); } + const_iterator end() const noexcept { return m_ht.end(); } + const_iterator cend() const noexcept { return m_ht.cend(); } + + /* + * Capacity + */ + bool empty() const noexcept { return m_ht.empty(); } + size_type size() const noexcept { return m_ht.size(); } + size_type max_size() const noexcept { return m_ht.max_size(); } + + /* + * Modifiers + */ + void clear() noexcept { m_ht.clear(); } + + std::pair insert(const value_type &value) { + return m_ht.insert(value); + } + + template ::value>::type * = nullptr> + std::pair insert(P &&value) { + return m_ht.emplace(std::forward

(value)); + } + + std::pair insert(value_type &&value) { + return m_ht.insert(std::move(value)); + } + + iterator insert(const_iterator hint, const value_type &value) { + return m_ht.insert_hint(hint, value); + } + + template ::value>::type * = nullptr> + iterator insert(const_iterator hint, P &&value) { + return m_ht.emplace_hint(hint, std::forward

(value)); + } + + iterator insert(const_iterator hint, value_type &&value) { + return m_ht.insert_hint(hint, std::move(value)); + } + + template + void insert(InputIt first, InputIt last) { + m_ht.insert(first, last); + } + + void insert(std::initializer_list ilist) { + m_ht.insert(ilist.begin(), ilist.end()); + } + + template + std::pair insert_or_assign(const key_type &k, M &&obj) { + return m_ht.insert_or_assign(k, std::forward(obj)); + } + + template + std::pair insert_or_assign(key_type &&k, M &&obj) { + return m_ht.insert_or_assign(std::move(k), std::forward(obj)); + } + + template + iterator insert_or_assign(const_iterator hint, const key_type &k, M &&obj) { + return m_ht.insert_or_assign(hint, k, std::forward(obj)); + } + + template + iterator insert_or_assign(const_iterator hint, key_type &&k, M &&obj) { + return m_ht.insert_or_assign(hint, std::move(k), std::forward(obj)); + } + + /** + * Due to the way elements are stored, emplace will need to move or copy the + * key-value once. The method is equivalent to + * `insert(value_type(std::forward(args)...));`. + * + * Mainly here for compatibility with the `std::unordered_map` interface. + */ + template + std::pair emplace(Args &&...args) { + return m_ht.emplace(std::forward(args)...); + } + + /** + * Due to the way elements are stored, emplace_hint will need to move or copy + * the key-value once. The method is equivalent to `insert(hint, + * value_type(std::forward(args)...));`. + * + * Mainly here for compatibility with the `std::unordered_map` interface. + */ + template + iterator emplace_hint(const_iterator hint, Args &&...args) { + return m_ht.emplace_hint(hint, std::forward(args)...); + } + + template + std::pair try_emplace(const key_type &k, Args &&...args) { + return m_ht.try_emplace(k, std::forward(args)...); + } + + template + std::pair try_emplace(key_type &&k, Args &&...args) { + return m_ht.try_emplace(std::move(k), std::forward(args)...); + } + + template + iterator try_emplace(const_iterator hint, const key_type &k, Args &&...args) { + return m_ht.try_emplace_hint(hint, k, std::forward(args)...); + } + + template + iterator try_emplace(const_iterator hint, key_type &&k, Args &&...args) { + return m_ht.try_emplace_hint(hint, std::move(k), + std::forward(args)...); + } + + iterator erase(iterator pos) { return m_ht.erase(pos); } + iterator erase(const_iterator pos) { return m_ht.erase(pos); } + iterator erase(const_iterator first, const_iterator last) { + return m_ht.erase(first, last); + } + size_type erase(const key_type &key) { return m_ht.erase(key); } + + /** + * Use the hash value `precalculated_hash` instead of hashing the key. The + * hash value should be the same as `hash_function()(key)`, otherwise the + * behaviour is undefined. Useful to speed-up the lookup if you already have + * the hash. + */ + size_type erase(const key_type &key, std::size_t precalculated_hash) { + return m_ht.erase(key, precalculated_hash); + } + + /** + * This overload only participates in the overload resolution if the typedef + * `KeyEqual::is_transparent` exists. If so, `K` must be hashable and + * comparable to `Key`. + */ + template < + class K, class KE = KeyEqual, + typename std::enable_if::value>::type * = nullptr> + size_type erase(const K &key) { + return m_ht.erase(key); + } + + /** + * @copydoc erase(const K& key) + * + * Use the hash value `precalculated_hash` instead of hashing the key. The + * hash value should be the same as `hash_function()(key)`, otherwise the + * behaviour is undefined. Useful to speed-up the lookup if you already have + * the hash. + */ + template < + class K, class KE = KeyEqual, + typename std::enable_if::value>::type * = nullptr> + size_type erase(const K &key, std::size_t precalculated_hash) { + return m_ht.erase(key, precalculated_hash); + } + + void swap(sparse_map &other) { other.m_ht.swap(m_ht); } + + /* + * Lookup + */ + T &at(const Key &key) { return m_ht.at(key); } + + /** + * Use the hash value `precalculated_hash` instead of hashing the key. The + * hash value should be the same as `hash_function()(key)`, otherwise the + * behaviour is undefined. Useful to speed-up the lookup if you already have + * the hash. + */ + T &at(const Key &key, std::size_t precalculated_hash) { + return m_ht.at(key, precalculated_hash); + } + + const T &at(const Key &key) const { return m_ht.at(key); } + + /** + * @copydoc at(const Key& key, std::size_t precalculated_hash) + */ + const T &at(const Key &key, std::size_t precalculated_hash) const { + return m_ht.at(key, precalculated_hash); + } + + /** + * This overload only participates in the overload resolution if the typedef + * `KeyEqual::is_transparent` exists. If so, `K` must be hashable and + * comparable to `Key`. + */ + template < + class K, class KE = KeyEqual, + typename std::enable_if::value>::type * = nullptr> + T &at(const K &key) { + return m_ht.at(key); + } + + /** + * @copydoc at(const K& key) + * + * Use the hash value `precalculated_hash` instead of hashing the key. The + * hash value should be the same as `hash_function()(key)`, otherwise the + * behaviour is undefined. Useful to speed-up the lookup if you already have + * the hash. + */ + template < + class K, class KE = KeyEqual, + typename std::enable_if::value>::type * = nullptr> + T &at(const K &key, std::size_t precalculated_hash) { + return m_ht.at(key, precalculated_hash); + } + + /** + * @copydoc at(const K& key) + */ + template < + class K, class KE = KeyEqual, + typename std::enable_if::value>::type * = nullptr> + const T &at(const K &key) const { + return m_ht.at(key); + } + + /** + * @copydoc at(const K& key, std::size_t precalculated_hash) + */ + template < + class K, class KE = KeyEqual, + typename std::enable_if::value>::type * = nullptr> + const T &at(const K &key, std::size_t precalculated_hash) const { + return m_ht.at(key, precalculated_hash); + } + + T &operator[](const Key &key) { return m_ht[key]; } + T &operator[](Key &&key) { return m_ht[std::move(key)]; } + + size_type count(const Key &key) const { return m_ht.count(key); } + + /** + * Use the hash value `precalculated_hash` instead of hashing the key. The + * hash value should be the same as `hash_function()(key)`, otherwise the + * behaviour is undefined. Useful to speed-up the lookup if you already have + * the hash. + */ + size_type count(const Key &key, std::size_t precalculated_hash) const { + return m_ht.count(key, precalculated_hash); + } + + /** + * This overload only participates in the overload resolution if the typedef + * `KeyEqual::is_transparent` exists. If so, `K` must be hashable and + * comparable to `Key`. + */ + template < + class K, class KE = KeyEqual, + typename std::enable_if::value>::type * = nullptr> + size_type count(const K &key) const { + return m_ht.count(key); + } + + /** + * @copydoc count(const K& key) const + * + * Use the hash value `precalculated_hash` instead of hashing the key. The + * hash value should be the same as `hash_function()(key)`, otherwise the + * behaviour is undefined. Useful to speed-up the lookup if you already have + * the hash. + */ + template < + class K, class KE = KeyEqual, + typename std::enable_if::value>::type * = nullptr> + size_type count(const K &key, std::size_t precalculated_hash) const { + return m_ht.count(key, precalculated_hash); + } + + iterator find(const Key &key) { return m_ht.find(key); } + + /** + * Use the hash value `precalculated_hash` instead of hashing the key. The + * hash value should be the same as `hash_function()(key)`, otherwise the + * behaviour is undefined. Useful to speed-up the lookup if you already have + * the hash. + */ + iterator find(const Key &key, std::size_t precalculated_hash) { + return m_ht.find(key, precalculated_hash); + } + + const_iterator find(const Key &key) const { return m_ht.find(key); } + + /** + * @copydoc find(const Key& key, std::size_t precalculated_hash) + */ + const_iterator find(const Key &key, std::size_t precalculated_hash) const { + return m_ht.find(key, precalculated_hash); + } + + /** + * This overload only participates in the overload resolution if the typedef + * `KeyEqual::is_transparent` exists. If so, `K` must be hashable and + * comparable to `Key`. + */ + template < + class K, class KE = KeyEqual, + typename std::enable_if::value>::type * = nullptr> + iterator find(const K &key) { + return m_ht.find(key); + } + + /** + * @copydoc find(const K& key) + * + * Use the hash value `precalculated_hash` instead of hashing the key. The + * hash value should be the same as `hash_function()(key)`, otherwise the + * behaviour is undefined. Useful to speed-up the lookup if you already have + * the hash. + */ + template < + class K, class KE = KeyEqual, + typename std::enable_if::value>::type * = nullptr> + iterator find(const K &key, std::size_t precalculated_hash) { + return m_ht.find(key, precalculated_hash); + } + + /** + * @copydoc find(const K& key) + */ + template < + class K, class KE = KeyEqual, + typename std::enable_if::value>::type * = nullptr> + const_iterator find(const K &key) const { + return m_ht.find(key); + } + + /** + * @copydoc find(const K& key) + * + * Use the hash value `precalculated_hash` instead of hashing the key. The + * hash value should be the same as `hash_function()(key)`, otherwise the + * behaviour is undefined. Useful to speed-up the lookup if you already have + * the hash. + */ + template < + class K, class KE = KeyEqual, + typename std::enable_if::value>::type * = nullptr> + const_iterator find(const K &key, std::size_t precalculated_hash) const { + return m_ht.find(key, precalculated_hash); + } + + bool contains(const Key &key) const { return m_ht.contains(key); } + + /** + * Use the hash value 'precalculated_hash' instead of hashing the key. The + * hash value should be the same as hash_function()(key). Useful to speed-up + * the lookup if you already have the hash. + */ + bool contains(const Key &key, std::size_t precalculated_hash) const { + return m_ht.contains(key, precalculated_hash); + } + + /** + * This overload only participates in the overload resolution if the typedef + * KeyEqual::is_transparent exists. If so, K must be hashable and comparable + * to Key. + */ + template < + class K, class KE = KeyEqual, + typename std::enable_if::value>::type * = nullptr> + bool contains(const K &key) const { + return m_ht.contains(key); + } + + /** + * @copydoc contains(const K& key) const + * + * Use the hash value 'precalculated_hash' instead of hashing the key. The + * hash value should be the same as hash_function()(key). Useful to speed-up + * the lookup if you already have the hash. + */ + template < + class K, class KE = KeyEqual, + typename std::enable_if::value>::type * = nullptr> + bool contains(const K &key, std::size_t precalculated_hash) const { + return m_ht.contains(key, precalculated_hash); + } + + std::pair equal_range(const Key &key) { + return m_ht.equal_range(key); + } + + /** + * Use the hash value `precalculated_hash` instead of hashing the key. The + * hash value should be the same as `hash_function()(key)`, otherwise the + * behaviour is undefined. Useful to speed-up the lookup if you already have + * the hash. + */ + std::pair equal_range(const Key &key, + std::size_t precalculated_hash) { + return m_ht.equal_range(key, precalculated_hash); + } + + std::pair equal_range(const Key &key) const { + return m_ht.equal_range(key); + } + + /** + * @copydoc equal_range(const Key& key, std::size_t precalculated_hash) + */ + std::pair equal_range( + const Key &key, std::size_t precalculated_hash) const { + return m_ht.equal_range(key, precalculated_hash); + } + + /** + * This overload only participates in the overload resolution if the typedef + * `KeyEqual::is_transparent` exists. If so, `K` must be hashable and + * comparable to `Key`. + */ + template < + class K, class KE = KeyEqual, + typename std::enable_if::value>::type * = nullptr> + std::pair equal_range(const K &key) { + return m_ht.equal_range(key); + } + + /** + * @copydoc equal_range(const K& key) + * + * Use the hash value `precalculated_hash` instead of hashing the key. The + * hash value should be the same as `hash_function()(key)`, otherwise the + * behaviour is undefined. Useful to speed-up the lookup if you already have + * the hash. + */ + template < + class K, class KE = KeyEqual, + typename std::enable_if::value>::type * = nullptr> + std::pair equal_range(const K &key, + std::size_t precalculated_hash) { + return m_ht.equal_range(key, precalculated_hash); + } + + /** + * @copydoc equal_range(const K& key) + */ + template < + class K, class KE = KeyEqual, + typename std::enable_if::value>::type * = nullptr> + std::pair equal_range(const K &key) const { + return m_ht.equal_range(key); + } + + /** + * @copydoc equal_range(const K& key, std::size_t precalculated_hash) + */ + template < + class K, class KE = KeyEqual, + typename std::enable_if::value>::type * = nullptr> + std::pair equal_range( + const K &key, std::size_t precalculated_hash) const { + return m_ht.equal_range(key, precalculated_hash); + } + + /* + * Bucket interface + */ + size_type bucket_count() const { return m_ht.bucket_count(); } + size_type max_bucket_count() const { return m_ht.max_bucket_count(); } + + /* + * Hash policy + */ + float load_factor() const { return m_ht.load_factor(); } + float max_load_factor() const { return m_ht.max_load_factor(); } + void max_load_factor(float ml) { m_ht.max_load_factor(ml); } + + void rehash(size_type count) { m_ht.rehash(count); } + void reserve(size_type count) { m_ht.reserve(count); } + + /* + * Observers + */ + hasher hash_function() const { return m_ht.hash_function(); } + key_equal key_eq() const { return m_ht.key_eq(); } + + /* + * Other + */ + + /** + * Convert a `const_iterator` to an `iterator`. + */ + iterator mutable_iterator(const_iterator pos) { + return m_ht.mutable_iterator(pos); + } + + /** + * Serialize the map through the `serializer` parameter. + * + * The `serializer` parameter must be a function object that supports the + * following call: + * - `template void operator()(const U& value);` where the types + * `std::uint64_t`, `float` and `std::pair` must be supported for U. + * + * The implementation leaves binary compatibility (endianness, IEEE 754 for + * floats, ...) of the types it serializes in the hands of the `Serializer` + * function object if compatibility is required. + */ + template + void serialize(Serializer &serializer) const { + m_ht.serialize(serializer); + } + + /** + * Deserialize a previously serialized map through the `deserializer` + * parameter. + * + * The `deserializer` parameter must be a function object that supports the + * following calls: + * - `template U operator()();` where the types `std::uint64_t`, + * `float` and `std::pair` must be supported for U. + * + * If the deserialized hash map type is hash compatible with the serialized + * map, the deserialization process can be sped up by setting + * `hash_compatible` to true. To be hash compatible, the Hash, KeyEqual and + * GrowthPolicy must behave the same way than the ones used on the serialized + * map. The `std::size_t` must also be of the same size as the one on the + * platform used to serialize the map. If these criteria are not met, the + * behaviour is undefined with `hash_compatible` sets to true. + * + * The behaviour is undefined if the type `Key` and `T` of the `sparse_map` + * are not the same as the types used during serialization. + * + * The implementation leaves binary compatibility (endianness, IEEE 754 for + * floats, size of int, ...) of the types it deserializes in the hands of the + * `Deserializer` function object if compatibility is required. + */ + template + static sparse_map deserialize(Deserializer &deserializer, + bool hash_compatible = false) { + sparse_map map(0); + map.m_ht.deserialize(deserializer, hash_compatible); + + return map; + } + + friend bool operator==(const sparse_map &lhs, const sparse_map &rhs) { + if (lhs.size() != rhs.size()) { + return false; + } + + for (const auto &element_lhs : lhs) { + const auto it_element_rhs = rhs.find(element_lhs.first); + if (it_element_rhs == rhs.cend() || + element_lhs.second != it_element_rhs->second) { + return false; + } + } + + return true; + } + + friend bool operator!=(const sparse_map &lhs, const sparse_map &rhs) { + return !operator==(lhs, rhs); + } + + friend void swap(sparse_map &lhs, sparse_map &rhs) { lhs.swap(rhs); } + + private: + ht m_ht; +}; + +/** + * Same as `tsl::sparse_map`. + */ +template , + class KeyEqual = std::equal_to, + class Allocator = std::allocator>> +using sparse_pg_map = + sparse_map; + +} // end namespace tsl + +#endif diff --git a/packages/leann-backend-diskann/third_party/DiskANN/include/tsl/sparse_set.h b/packages/leann-backend-diskann/third_party/DiskANN/include/tsl/sparse_set.h new file mode 100644 index 0000000..3ce6a58 --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/include/tsl/sparse_set.h @@ -0,0 +1,655 @@ +/** + * MIT License + * + * Copyright (c) 2017 Thibaut Goetghebuer-Planchon + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef TSL_SPARSE_SET_H +#define TSL_SPARSE_SET_H + +#include +#include +#include +#include +#include +#include + +#include "sparse_hash.h" + +namespace tsl { + +/** + * Implementation of a sparse hash set using open-addressing with quadratic + * probing. The goal on the hash set is to be the most memory efficient + * possible, even at low load factor, while keeping reasonable performances. + * + * `GrowthPolicy` defines how the set grows and consequently how a hash value is + * mapped to a bucket. By default the set uses + * `tsl::sh::power_of_two_growth_policy`. This policy keeps the number of + * buckets to a power of two and uses a mask to map the hash to a bucket instead + * of the slow modulo. Other growth policies are available and you may define + * your own growth policy, check `tsl::sh::power_of_two_growth_policy` for the + * interface. + * + * `ExceptionSafety` defines the exception guarantee provided by the class. By + * default only the basic exception safety is guaranteed which mean that all + * resources used by the hash set will be freed (no memory leaks) but the hash + * set may end-up in an undefined state if an exception is thrown (undefined + * here means that some elements may be missing). This can ONLY happen on rehash + * (either on insert or if `rehash` is called explicitly) and will occur if the + * Allocator can't allocate memory (`std::bad_alloc`) or if the copy constructor + * (when a nothrow move constructor is not available) throws an exception. This + * can be avoided by calling `reserve` beforehand. This basic guarantee is + * similar to the one of `google::sparse_hash_map` and `spp::sparse_hash_map`. + * It is possible to ask for the strong exception guarantee with + * `tsl::sh::exception_safety::strong`, the drawback is that the set will be + * slower on rehashes and will also need more memory on rehashes. + * + * `Sparsity` defines how much the hash set will compromise between insertion + * speed and memory usage. A high sparsity means less memory usage but longer + * insertion times, and vice-versa for low sparsity. The default + * `tsl::sh::sparsity::medium` sparsity offers a good compromise. It doesn't + * change the lookup speed. + * + * `Key` must be nothrow move constructible and/or copy constructible. + * + * If the destructor of `Key` throws an exception, the behaviour of the class is + * undefined. + * + * Iterators invalidation: + * - clear, operator=, reserve, rehash: always invalidate the iterators. + * - insert, emplace, emplace_hint: if there is an effective insert, invalidate + * the iterators. + * - erase: always invalidate the iterators. + */ +template , + class KeyEqual = std::equal_to, + class Allocator = std::allocator, + class GrowthPolicy = tsl::sh::power_of_two_growth_policy<2>, + tsl::sh::exception_safety ExceptionSafety = + tsl::sh::exception_safety::basic, + tsl::sh::sparsity Sparsity = tsl::sh::sparsity::medium> +class sparse_set { + private: + template + using has_is_transparent = tsl::detail_sparse_hash::has_is_transparent; + + class KeySelect { + public: + using key_type = Key; + + const key_type &operator()(const Key &key) const noexcept { return key; } + + key_type &operator()(Key &key) noexcept { return key; } + }; + + using ht = + detail_sparse_hash::sparse_hash; + + public: + using key_type = typename ht::key_type; + using value_type = typename ht::value_type; + using size_type = typename ht::size_type; + using difference_type = typename ht::difference_type; + using hasher = typename ht::hasher; + using key_equal = typename ht::key_equal; + using allocator_type = typename ht::allocator_type; + using reference = typename ht::reference; + using const_reference = typename ht::const_reference; + using pointer = typename ht::pointer; + using const_pointer = typename ht::const_pointer; + using iterator = typename ht::iterator; + using const_iterator = typename ht::const_iterator; + + /* + * Constructors + */ + sparse_set() : sparse_set(ht::DEFAULT_INIT_BUCKET_COUNT) {} + + explicit sparse_set(size_type bucket_count, const Hash &hash = Hash(), + const KeyEqual &equal = KeyEqual(), + const Allocator &alloc = Allocator()) + : m_ht(bucket_count, hash, equal, alloc, ht::DEFAULT_MAX_LOAD_FACTOR) {} + + sparse_set(size_type bucket_count, const Allocator &alloc) + : sparse_set(bucket_count, Hash(), KeyEqual(), alloc) {} + + sparse_set(size_type bucket_count, const Hash &hash, const Allocator &alloc) + : sparse_set(bucket_count, hash, KeyEqual(), alloc) {} + + explicit sparse_set(const Allocator &alloc) + : sparse_set(ht::DEFAULT_INIT_BUCKET_COUNT, alloc) {} + + template + sparse_set(InputIt first, InputIt last, + size_type bucket_count = ht::DEFAULT_INIT_BUCKET_COUNT, + const Hash &hash = Hash(), const KeyEqual &equal = KeyEqual(), + const Allocator &alloc = Allocator()) + : sparse_set(bucket_count, hash, equal, alloc) { + insert(first, last); + } + + template + sparse_set(InputIt first, InputIt last, size_type bucket_count, + const Allocator &alloc) + : sparse_set(first, last, bucket_count, Hash(), KeyEqual(), alloc) {} + + template + sparse_set(InputIt first, InputIt last, size_type bucket_count, + const Hash &hash, const Allocator &alloc) + : sparse_set(first, last, bucket_count, hash, KeyEqual(), alloc) {} + + sparse_set(std::initializer_list init, + size_type bucket_count = ht::DEFAULT_INIT_BUCKET_COUNT, + const Hash &hash = Hash(), const KeyEqual &equal = KeyEqual(), + const Allocator &alloc = Allocator()) + : sparse_set(init.begin(), init.end(), bucket_count, hash, equal, alloc) { + } + + sparse_set(std::initializer_list init, size_type bucket_count, + const Allocator &alloc) + : sparse_set(init.begin(), init.end(), bucket_count, Hash(), KeyEqual(), + alloc) {} + + sparse_set(std::initializer_list init, size_type bucket_count, + const Hash &hash, const Allocator &alloc) + : sparse_set(init.begin(), init.end(), bucket_count, hash, KeyEqual(), + alloc) {} + + sparse_set &operator=(std::initializer_list ilist) { + m_ht.clear(); + + m_ht.reserve(ilist.size()); + m_ht.insert(ilist.begin(), ilist.end()); + + return *this; + } + + allocator_type get_allocator() const { return m_ht.get_allocator(); } + + /* + * Iterators + */ + iterator begin() noexcept { return m_ht.begin(); } + const_iterator begin() const noexcept { return m_ht.begin(); } + const_iterator cbegin() const noexcept { return m_ht.cbegin(); } + + iterator end() noexcept { return m_ht.end(); } + const_iterator end() const noexcept { return m_ht.end(); } + const_iterator cend() const noexcept { return m_ht.cend(); } + + /* + * Capacity + */ + bool empty() const noexcept { return m_ht.empty(); } + size_type size() const noexcept { return m_ht.size(); } + size_type max_size() const noexcept { return m_ht.max_size(); } + + /* + * Modifiers + */ + void clear() noexcept { m_ht.clear(); } + + std::pair insert(const value_type &value) { + return m_ht.insert(value); + } + + std::pair insert(value_type &&value) { + return m_ht.insert(std::move(value)); + } + + iterator insert(const_iterator hint, const value_type &value) { + return m_ht.insert_hint(hint, value); + } + + iterator insert(const_iterator hint, value_type &&value) { + return m_ht.insert_hint(hint, std::move(value)); + } + + template + void insert(InputIt first, InputIt last) { + m_ht.insert(first, last); + } + + void insert(std::initializer_list ilist) { + m_ht.insert(ilist.begin(), ilist.end()); + } + + /** + * Due to the way elements are stored, emplace will need to move or copy the + * key-value once. The method is equivalent to + * `insert(value_type(std::forward(args)...));`. + * + * Mainly here for compatibility with the `std::unordered_map` interface. + */ + template + std::pair emplace(Args &&...args) { + return m_ht.emplace(std::forward(args)...); + } + + /** + * Due to the way elements are stored, emplace_hint will need to move or copy + * the key-value once. The method is equivalent to `insert(hint, + * value_type(std::forward(args)...));`. + * + * Mainly here for compatibility with the `std::unordered_map` interface. + */ + template + iterator emplace_hint(const_iterator hint, Args &&...args) { + return m_ht.emplace_hint(hint, std::forward(args)...); + } + + iterator erase(iterator pos) { return m_ht.erase(pos); } + iterator erase(const_iterator pos) { return m_ht.erase(pos); } + iterator erase(const_iterator first, const_iterator last) { + return m_ht.erase(first, last); + } + size_type erase(const key_type &key) { return m_ht.erase(key); } + + /** + * Use the hash value `precalculated_hash` instead of hashing the key. The + * hash value should be the same as `hash_function()(key)`, otherwise the + * behaviour is undefined. Useful to speed-up the lookup if you already have + * the hash. + */ + size_type erase(const key_type &key, std::size_t precalculated_hash) { + return m_ht.erase(key, precalculated_hash); + } + + /** + * This overload only participates in the overload resolution if the typedef + * `KeyEqual::is_transparent` exists. If so, `K` must be hashable and + * comparable to `Key`. + */ + template < + class K, class KE = KeyEqual, + typename std::enable_if::value>::type * = nullptr> + size_type erase(const K &key) { + return m_ht.erase(key); + } + + /** + * @copydoc erase(const K& key) + * + * Use the hash value `precalculated_hash` instead of hashing the key. The + * hash value should be the same as `hash_function()(key)`, otherwise the + * behaviour is undefined. Useful to speed-up the lookup if you already have + * the hash. + */ + template < + class K, class KE = KeyEqual, + typename std::enable_if::value>::type * = nullptr> + size_type erase(const K &key, std::size_t precalculated_hash) { + return m_ht.erase(key, precalculated_hash); + } + + void swap(sparse_set &other) { other.m_ht.swap(m_ht); } + + /* + * Lookup + */ + size_type count(const Key &key) const { return m_ht.count(key); } + + /** + * Use the hash value `precalculated_hash` instead of hashing the key. The + * hash value should be the same as `hash_function()(key)`, otherwise the + * behaviour is undefined. Useful to speed-up the lookup if you already have + * the hash. + */ + size_type count(const Key &key, std::size_t precalculated_hash) const { + return m_ht.count(key, precalculated_hash); + } + + /** + * This overload only participates in the overload resolution if the typedef + * `KeyEqual::is_transparent` exists. If so, `K` must be hashable and + * comparable to `Key`. + */ + template < + class K, class KE = KeyEqual, + typename std::enable_if::value>::type * = nullptr> + size_type count(const K &key) const { + return m_ht.count(key); + } + + /** + * @copydoc count(const K& key) const + * + * Use the hash value `precalculated_hash` instead of hashing the key. The + * hash value should be the same as `hash_function()(key)`, otherwise the + * behaviour is undefined. Useful to speed-up the lookup if you already have + * the hash. + */ + template < + class K, class KE = KeyEqual, + typename std::enable_if::value>::type * = nullptr> + size_type count(const K &key, std::size_t precalculated_hash) const { + return m_ht.count(key, precalculated_hash); + } + + iterator find(const Key &key) { return m_ht.find(key); } + + /** + * Use the hash value `precalculated_hash` instead of hashing the key. The + * hash value should be the same as `hash_function()(key)`, otherwise the + * behaviour is undefined. Useful to speed-up the lookup if you already have + * the hash. + */ + iterator find(const Key &key, std::size_t precalculated_hash) { + return m_ht.find(key, precalculated_hash); + } + + const_iterator find(const Key &key) const { return m_ht.find(key); } + + /** + * @copydoc find(const Key& key, std::size_t precalculated_hash) + */ + const_iterator find(const Key &key, std::size_t precalculated_hash) const { + return m_ht.find(key, precalculated_hash); + } + + /** + * This overload only participates in the overload resolution if the typedef + * `KeyEqual::is_transparent` exists. If so, `K` must be hashable and + * comparable to `Key`. + */ + template < + class K, class KE = KeyEqual, + typename std::enable_if::value>::type * = nullptr> + iterator find(const K &key) { + return m_ht.find(key); + } + + /** + * @copydoc find(const K& key) + * + * Use the hash value `precalculated_hash` instead of hashing the key. The + * hash value should be the same as `hash_function()(key)`, otherwise the + * behaviour is undefined. Useful to speed-up the lookup if you already have + * the hash. + */ + template < + class K, class KE = KeyEqual, + typename std::enable_if::value>::type * = nullptr> + iterator find(const K &key, std::size_t precalculated_hash) { + return m_ht.find(key, precalculated_hash); + } + + /** + * @copydoc find(const K& key) + */ + template < + class K, class KE = KeyEqual, + typename std::enable_if::value>::type * = nullptr> + const_iterator find(const K &key) const { + return m_ht.find(key); + } + + /** + * @copydoc find(const K& key) + * + * Use the hash value `precalculated_hash` instead of hashing the key. The + * hash value should be the same as `hash_function()(key)`, otherwise the + * behaviour is undefined. Useful to speed-up the lookup if you already have + * the hash. + */ + template < + class K, class KE = KeyEqual, + typename std::enable_if::value>::type * = nullptr> + const_iterator find(const K &key, std::size_t precalculated_hash) const { + return m_ht.find(key, precalculated_hash); + } + + bool contains(const Key &key) const { return m_ht.contains(key); } + + /** + * Use the hash value 'precalculated_hash' instead of hashing the key. The + * hash value should be the same as hash_function()(key). Useful to speed-up + * the lookup if you already have the hash. + */ + bool contains(const Key &key, std::size_t precalculated_hash) const { + return m_ht.contains(key, precalculated_hash); + } + + /** + * This overload only participates in the overload resolution if the typedef + * KeyEqual::is_transparent exists. If so, K must be hashable and comparable + * to Key. + */ + template < + class K, class KE = KeyEqual, + typename std::enable_if::value>::type * = nullptr> + bool contains(const K &key) const { + return m_ht.contains(key); + } + + /** + * @copydoc contains(const K& key) const + * + * Use the hash value 'precalculated_hash' instead of hashing the key. The + * hash value should be the same as hash_function()(key). Useful to speed-up + * the lookup if you already have the hash. + */ + template < + class K, class KE = KeyEqual, + typename std::enable_if::value>::type * = nullptr> + bool contains(const K &key, std::size_t precalculated_hash) const { + return m_ht.contains(key, precalculated_hash); + } + + std::pair equal_range(const Key &key) { + return m_ht.equal_range(key); + } + + /** + * Use the hash value `precalculated_hash` instead of hashing the key. The + * hash value should be the same as `hash_function()(key)`, otherwise the + * behaviour is undefined. Useful to speed-up the lookup if you already have + * the hash. + */ + std::pair equal_range(const Key &key, + std::size_t precalculated_hash) { + return m_ht.equal_range(key, precalculated_hash); + } + + std::pair equal_range(const Key &key) const { + return m_ht.equal_range(key); + } + + /** + * @copydoc equal_range(const Key& key, std::size_t precalculated_hash) + */ + std::pair equal_range( + const Key &key, std::size_t precalculated_hash) const { + return m_ht.equal_range(key, precalculated_hash); + } + + /** + * This overload only participates in the overload resolution if the typedef + * `KeyEqual::is_transparent` exists. If so, `K` must be hashable and + * comparable to `Key`. + */ + template < + class K, class KE = KeyEqual, + typename std::enable_if::value>::type * = nullptr> + std::pair equal_range(const K &key) { + return m_ht.equal_range(key); + } + + /** + * @copydoc equal_range(const K& key) + * + * Use the hash value `precalculated_hash` instead of hashing the key. The + * hash value should be the same as `hash_function()(key)`, otherwise the + * behaviour is undefined. Useful to speed-up the lookup if you already have + * the hash. + */ + template < + class K, class KE = KeyEqual, + typename std::enable_if::value>::type * = nullptr> + std::pair equal_range(const K &key, + std::size_t precalculated_hash) { + return m_ht.equal_range(key, precalculated_hash); + } + + /** + * @copydoc equal_range(const K& key) + */ + template < + class K, class KE = KeyEqual, + typename std::enable_if::value>::type * = nullptr> + std::pair equal_range(const K &key) const { + return m_ht.equal_range(key); + } + + /** + * @copydoc equal_range(const K& key, std::size_t precalculated_hash) + */ + template < + class K, class KE = KeyEqual, + typename std::enable_if::value>::type * = nullptr> + std::pair equal_range( + const K &key, std::size_t precalculated_hash) const { + return m_ht.equal_range(key, precalculated_hash); + } + + /* + * Bucket interface + */ + size_type bucket_count() const { return m_ht.bucket_count(); } + size_type max_bucket_count() const { return m_ht.max_bucket_count(); } + + /* + * Hash policy + */ + float load_factor() const { return m_ht.load_factor(); } + float max_load_factor() const { return m_ht.max_load_factor(); } + void max_load_factor(float ml) { m_ht.max_load_factor(ml); } + + void rehash(size_type count) { m_ht.rehash(count); } + void reserve(size_type count) { m_ht.reserve(count); } + + /* + * Observers + */ + hasher hash_function() const { return m_ht.hash_function(); } + key_equal key_eq() const { return m_ht.key_eq(); } + + /* + * Other + */ + + /** + * Convert a `const_iterator` to an `iterator`. + */ + iterator mutable_iterator(const_iterator pos) { + return m_ht.mutable_iterator(pos); + } + + /** + * Serialize the set through the `serializer` parameter. + * + * The `serializer` parameter must be a function object that supports the + * following call: + * - `void operator()(const U& value);` where the types `std::uint64_t`, + * `float` and `Key` must be supported for U. + * + * The implementation leaves binary compatibility (endianness, IEEE 754 for + * floats, ...) of the types it serializes in the hands of the `Serializer` + * function object if compatibility is required. + */ + template + void serialize(Serializer &serializer) const { + m_ht.serialize(serializer); + } + + /** + * Deserialize a previously serialized set through the `deserializer` + * parameter. + * + * The `deserializer` parameter must be a function object that supports the + * following calls: + * - `template U operator()();` where the types `std::uint64_t`, + * `float` and `Key` must be supported for U. + * + * If the deserialized hash set type is hash compatible with the serialized + * set, the deserialization process can be sped up by setting + * `hash_compatible` to true. To be hash compatible, the Hash, KeyEqual and + * GrowthPolicy must behave the same way than the ones used on the serialized + * set. The `std::size_t` must also be of the same size as the one on the + * platform used to serialize the set. If these criteria are not met, the + * behaviour is undefined with `hash_compatible` sets to true. + * + * The behaviour is undefined if the type `Key` of the `sparse_set` is not the + * same as the type used during serialization. + * + * The implementation leaves binary compatibility (endianness, IEEE 754 for + * floats, size of int, ...) of the types it deserializes in the hands of the + * `Deserializer` function object if compatibility is required. + */ + template + static sparse_set deserialize(Deserializer &deserializer, + bool hash_compatible = false) { + sparse_set set(0); + set.m_ht.deserialize(deserializer, hash_compatible); + + return set; + } + + friend bool operator==(const sparse_set &lhs, const sparse_set &rhs) { + if (lhs.size() != rhs.size()) { + return false; + } + + for (const auto &element_lhs : lhs) { + const auto it_element_rhs = rhs.find(element_lhs); + if (it_element_rhs == rhs.cend()) { + return false; + } + } + + return true; + } + + friend bool operator!=(const sparse_set &lhs, const sparse_set &rhs) { + return !operator==(lhs, rhs); + } + + friend void swap(sparse_set &lhs, sparse_set &rhs) { lhs.swap(rhs); } + + private: + ht m_ht; +}; + +/** + * Same as `tsl::sparse_set`. + */ +template , + class KeyEqual = std::equal_to, + class Allocator = std::allocator> +using sparse_pg_set = + sparse_set; + +} // end namespace tsl + +#endif diff --git a/packages/leann-backend-diskann/third_party/DiskANN/include/types.h b/packages/leann-backend-diskann/third_party/DiskANN/include/types.h new file mode 100644 index 0000000..953d59a --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/include/types.h @@ -0,0 +1,22 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT license. + +#pragma once + +#include +#include +#include +#include "any_wrappers.h" + +namespace diskann +{ +typedef uint32_t location_t; + +using DataType = std::any; +using TagType = std::any; +using LabelType = std::any; +using TagVector = AnyWrapper::AnyVector; +using DataVector = AnyWrapper::AnyVector; +using Labelvector = AnyWrapper::AnyVector; +using TagRobinSet = AnyWrapper::AnyRobinSet; +} // namespace diskann diff --git a/packages/leann-backend-diskann/third_party/DiskANN/include/utils.h b/packages/leann-backend-diskann/third_party/DiskANN/include/utils.h new file mode 100644 index 0000000..355a613 --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/include/utils.h @@ -0,0 +1,1455 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT license. + +#pragma once + +#include + +#include "common_includes.h" + +#ifdef __APPLE__ +#include +#else +#include +#endif + +#ifdef _WINDOWS +#include +typedef HANDLE FileHandle; +#else +#include +typedef int FileHandle; +#endif + +#include "distance.h" +#include "logger.h" +#include "cached_io.h" +#include "ann_exception.h" +#include "windows_customizations.h" +#include "tsl/robin_set.h" +#include "types.h" +#include "tag_uint128.h" +#include + +#ifdef EXEC_ENV_OLS +#include "content_buf.h" +#include "memory_mapped_files.h" +#endif + +#ifdef __APPLE__ +#ifdef __arm64__ +#define _MM_HINT_T0 1 +#define _MM_HINT_T1 2 + +static inline __attribute__((always_inline)) void _mm_prefetch(char const *p, int i) +{ + switch (i) + { + case _MM_HINT_T0: + __builtin_prefetch(p, 0, 3); + break; + case _MM_HINT_T1: + __builtin_prefetch(p, 0, 2); + break; + } +} +#endif + +#define LAPACK_COL_MAJOR 1 +#define LAPACK_ROW_MAJOR 0 +#ifdef __APPLE__ +typedef int clp_int; +#else +typedef __CLPK_integer clp_int; +#endif + +inline void _sge_trans(int matrix_layout, clp_int m, clp_int n, const float *in, clp_int ldin, float *out, + clp_int ldout) +{ + clp_int i, j, x, y; + + if (matrix_layout == LAPACK_COL_MAJOR) + { + x = n; + y = m; + } + else + { + x = m; + y = n; + } + for (i = 0; i < MIN(y, ldin); i++) + { + for (j = 0; j < MIN(x, ldout); j++) + { + out[(size_t)i * ldout + j] = in[(size_t)j * ldin + i]; + } + } +} +inline clp_int sgesdd_rm_work(char jobz, clp_int m, clp_int n, float *a, clp_int lda, float *s, float *u, clp_int ldu, + float *vt, clp_int ldvt, float *work, clp_int lwork, clp_int *iwork) +{ + clp_int info = 0; + clp_int nrows_u = ((jobz == 'a') || (jobz == 's') || ((jobz == 'o') && m < n)) ? m : 1; + clp_int ncols_u = ((jobz == 'a') || ((jobz == 'o') && m < n)) ? m : ((jobz == 's') ? MIN(m, n) : 1); + clp_int nrows_vt = ((jobz == 'a') || ((jobz == 'o') && m >= n)) ? n : ((jobz == 's') ? MIN(m, n) : 1); + + clp_int lda_t = MAX(1, m); + clp_int ldu_t = MAX(1, nrows_u); + clp_int ldvt_t = MAX(1, nrows_vt); + float *a_t = NULL; + float *u_t = NULL; + float *vt_t = NULL; + + // check leading dimensions + if (lda < n) + { + info = -6; + return info; + } + if (ldu < ncols_u) + { + info = -9; + return info; + } + if (ldvt < n) + { + info = -11; + return info; + } + + // query for optimal work size if lwork = -1 + if (lwork == -1) + { + sgesdd_(&jobz, &m, &n, a, &lda_t, s, u, &ldu_t, vt, &ldvt_t, work, &lwork, iwork, &info); + return (info < 0) ? (info - 1) : info; + } + + // setup temp arrays + a_t = (float *)malloc(sizeof(float) * lda_t * MAX(1, n)); + if (a_t == NULL) + { + info = -1011; + return info; + } + if ((jobz == 'a') || (jobz == 's') || ((jobz == 'o') && (m < n))) + { + u_t = (float *)malloc(sizeof(float) * ldu_t * MAX(1, ncols_u)); + if (u_t == NULL) + { + info = -1011; + free(a_t); + return info; + } + } + if ((jobz == 'a') || (jobz == 's') || ((jobz == 'o') && (m >= n))) + { + vt_t = (float *)malloc(sizeof(float) * ldvt_t * MAX(1, n)); + if (vt_t == NULL) + { + info = -1011; + free(a_t); + if ((jobz == 'a') || (jobz == 's') || ((jobz == 'o') && (m < n))) + { + free(u_t); + } + return info; + } + } + + _sge_trans(LAPACK_ROW_MAJOR, m, n, a, lda, a_t, lda_t); + sgesdd_(&jobz, &m, &n, a_t, &lda_t, s, u_t, &ldu_t, vt_t, &ldvt_t, work, &lwork, iwork, &info); + + if (info < 0) + { + info = info - 1; + } + /* Transpose output matrices */ + _sge_trans(LAPACK_COL_MAJOR, m, n, a_t, lda_t, a, lda); + if ((jobz == 'a') || (jobz == 's') || ((jobz == 'o') && (m < n))) + { + _sge_trans(LAPACK_COL_MAJOR, nrows_u, ncols_u, u_t, ldu_t, u, ldu); + } + if ((jobz == 'a') || (jobz == 's') || ((jobz == 'o') && (m >= n))) + { + _sge_trans(LAPACK_COL_MAJOR, nrows_vt, n, vt_t, ldvt_t, vt, ldvt); + } + /* Release memory and exit */ + if ((jobz == 'a') || (jobz == 's') || ((jobz == 'o') && (m >= n))) + { + free(vt_t); + } + if ((jobz == 'a') || (jobz == 's') || ((jobz == 'o') && (m < n))) + { + free(u_t); + } + free(a_t); + return info; +} + +inline clp_int LAPACKE_sgesdd(int matrix_layout, char jobz, clp_int m, clp_int n, float *a, clp_int lda, float *s, + float *u, clp_int ldu, float *vt, clp_int ldvt) +{ + // internal SGESDD vars + clp_int info = 0; + clp_int lwork = -1; + clp_int *iwork = NULL; + float *work = NULL; + float work_query; + + // allocate space for iwork + iwork = (clp_int *)malloc(sizeof(clp_int) * MAX(1, 8 * MIN(m, n))); + if (iwork == NULL) + throw; + /* Query optimal working array(s) size */ + info = sgesdd_rm_work(jobz, m, n, a, lda, s, u, ldu, vt, ldvt, &work_query, lwork, iwork); + if (info != 0) + { + free(iwork); + info = -1010; + return info; + } + + lwork = (clp_int)work_query; + /* Allocate memory for work arrays */ + work = (float *)malloc(sizeof(float) * lwork); + if (work == NULL) + throw; + + /* Call middle-level interface */ + info = sgesdd_rm_work(jobz, m, n, a, lda, s, u, ldu, vt, ldvt, work, lwork, iwork); + /* Release memory and exit */ + free(work); + free(iwork); + return info; +} +#endif + +// taken from +// https://github.com/Microsoft/BLAS-on-flash/blob/master/include/utils.h +// round up X to the nearest multiple of Y +#define ROUND_UP(X, Y) ((((uint64_t)(X) / (Y)) + ((uint64_t)(X) % (Y) != 0)) * (Y)) + +#define DIV_ROUND_UP(X, Y) (((uint64_t)(X) / (Y)) + ((uint64_t)(X) % (Y) != 0)) + +// round down X to the nearest multiple of Y +#define ROUND_DOWN(X, Y) (((uint64_t)(X) / (Y)) * (Y)) + +// alignment tests +#define IS_ALIGNED(X, Y) ((uint64_t)(X) % (uint64_t)(Y) == 0) +#define IS_512_ALIGNED(X) IS_ALIGNED(X, 512) +#define IS_4096_ALIGNED(X) IS_ALIGNED(X, 4096) +#define METADATA_SIZE \ + 4096 // all metadata of individual sub-component files is written in first + // 4KB for unified files + +#define BUFFER_SIZE_FOR_CACHED_IO (size_t)1024 * (size_t)1048576 + +#define PBSTR "||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||" +#define PBWIDTH 60 + +inline bool file_exists_impl(const std::string &name, bool dirCheck = false) +{ + int val; +#ifndef _WINDOWS + struct stat buffer; + val = stat(name.c_str(), &buffer); +#else + // It is the 21st century but Windows API still thinks in 32-bit terms. + // Turns out calling stat() on a file > 4GB results in errno = 132 + // (OVERFLOW). How silly is this!? So calling _stat64() + struct _stat64 buffer; + val = _stat64(name.c_str(), &buffer); +#endif + + if (val != 0) + { + switch (errno) + { + case EINVAL: + diskann::cout << "Invalid argument passed to stat()" << std::endl; + break; + case ENOENT: + // file is not existing, not an issue, so we won't cout anything. + break; + default: + diskann::cout << "Unexpected error in stat():" << errno << std::endl; + break; + } + return false; + } + else + { + // the file entry exists. If reqd, check if this is a directory. + return dirCheck ? buffer.st_mode & S_IFDIR : true; + } +} + +inline bool file_exists(const std::string &name, bool dirCheck = false) +{ +#ifdef EXEC_ENV_OLS + bool exists = file_exists_impl(name, dirCheck); + if (exists) + { + return true; + } + if (!dirCheck) + { + // try with .enc extension + std::string enc_name = name + ENCRYPTED_EXTENSION; + return file_exists_impl(enc_name, dirCheck); + } + else + { + return exists; + } +#else + return file_exists_impl(name, dirCheck); +#endif +} + +inline void open_file_to_write(std::ofstream &writer, const std::string &filename) +{ + writer.exceptions(std::ofstream::failbit | std::ofstream::badbit); + if (!file_exists(filename)) + writer.open(filename, std::ios::binary | std::ios::out); + else + writer.open(filename, std::ios::binary | std::ios::in | std::ios::out); + + if (writer.fail()) + { + char buff[1024]; +#ifdef _WINDOWS + auto ret = std::to_string(strerror_s(buff, 1024, errno)); +#elif __APPLE__ + auto ret = std::to_string(strerror_r(errno, buff, 1024)); +#else + auto ret = std::string(strerror_r(errno, buff, 1024)); +#endif + auto message = std::string("Failed to open file") + filename + " for write because " + buff + ", ret=" + ret; + diskann::cerr << message << std::endl; + throw diskann::ANNException(message, -1); + } +} + +inline size_t get_file_size(const std::string &fname) +{ + std::ifstream reader(fname, std::ios::binary | std::ios::ate); + if (!reader.fail() && reader.is_open()) + { + size_t end_pos = reader.tellg(); + reader.close(); + return end_pos; + } + else + { + diskann::cerr << "Could not open file: " << fname << std::endl; + return 0; + } +} + +inline int delete_file(const std::string &fileName) +{ + if (file_exists(fileName)) + { + auto rc = ::remove(fileName.c_str()); + if (rc != 0) + { + diskann::cerr << "Could not delete file: " << fileName + << " even though it exists. This might indicate a permissions " + "issue. " + "If you see this message, please contact the diskann team." + << std::endl; + } + return rc; + } + else + { + return 0; + } +} + +// generates formatted_label and _labels_map file. +inline void convert_labels_string_to_int(const std::string &inFileName, const std::string &outFileName, + const std::string &mapFileName, const std::string &unv_label) +{ + std::unordered_map string_int_map; + std::ofstream label_writer(outFileName); + std::ifstream label_reader(inFileName); + if (unv_label != "") + string_int_map[unv_label] = 0; // if universal label is provided map it to 0 always + std::string line, token; + while (std::getline(label_reader, line)) + { + std::istringstream new_iss(line); + std::vector lbls; + while (getline(new_iss, token, ',')) + { + token.erase(std::remove(token.begin(), token.end(), '\n'), token.end()); + token.erase(std::remove(token.begin(), token.end(), '\r'), token.end()); + if (string_int_map.find(token) == string_int_map.end()) + { + uint32_t nextId = (uint32_t)string_int_map.size() + 1; + string_int_map[token] = nextId; // nextId can never be 0 + } + lbls.push_back(string_int_map[token]); + } + if (lbls.size() <= 0) + { + std::cout << "No label found"; + exit(-1); + } + for (size_t j = 0; j < lbls.size(); j++) + { + if (j != lbls.size() - 1) + label_writer << lbls[j] << ","; + else + label_writer << lbls[j] << std::endl; + } + } + label_writer.close(); + + std::ofstream map_writer(mapFileName); + for (auto mp : string_int_map) + { + map_writer << mp.first << "\t" << mp.second << std::endl; + } + map_writer.close(); +} + +#ifdef EXEC_ENV_OLS +class AlignedFileReader; +#endif + +namespace diskann +{ +static const size_t MAX_SIZE_OF_STREAMBUF = 2LL * 1024 * 1024 * 1024; + +inline void print_error_and_terminate(std::stringstream &error_stream) +{ + diskann::cerr << error_stream.str() << std::endl; + throw diskann::ANNException(error_stream.str(), -1, __FUNCSIG__, __FILE__, __LINE__); +} + +inline void report_memory_allocation_failure() +{ + std::stringstream stream; + stream << "Memory Allocation Failed."; + print_error_and_terminate(stream); +} + +inline void report_misalignment_of_requested_size(size_t align) +{ + std::stringstream stream; + stream << "Requested memory size is not a multiple of " << align << ". Can not be allocated."; + print_error_and_terminate(stream); +} + +inline void alloc_aligned(void **ptr, size_t size, size_t align) +{ + *ptr = nullptr; + if (IS_ALIGNED(size, align) == 0) + report_misalignment_of_requested_size(align); +#ifdef _WINDOWS + *ptr = ::_aligned_malloc(size, align); // note the swapped arguments! +#elif __APPLE__ + int err = posix_memalign(ptr, align, size); + if (err) + { + std::cout << err << std::endl; + throw; + } +#else + *ptr = ::aligned_alloc(align, size); +#endif + if (*ptr == nullptr) + report_memory_allocation_failure(); +} + +inline void realloc_aligned(void **ptr, size_t size, size_t align) +{ + if (IS_ALIGNED(size, align) == 0) + report_misalignment_of_requested_size(align); +#ifdef _WINDOWS + *ptr = ::_aligned_realloc(*ptr, size, align); +#else + diskann::cerr << "No aligned realloc on GCC. Must malloc and mem_align, " + "left it out for now." + << std::endl; +#endif + if (*ptr == nullptr) + report_memory_allocation_failure(); +} + +inline void check_stop(std::string arnd) +{ + int brnd; + diskann::cout << arnd << std::endl; + std::cin >> brnd; +} + +inline void aligned_free(void *ptr) +{ + // Gopal. Must have a check here if the pointer was actually allocated by + // _alloc_aligned + if (ptr == nullptr) + { + return; + } +#ifndef _WINDOWS + free(ptr); +#else + ::_aligned_free(ptr); +#endif +} + +inline void GenRandom(std::mt19937 &rng, unsigned *addr, unsigned size, unsigned N) +{ + for (unsigned i = 0; i < size; ++i) + { + addr[i] = rng() % (N - size); + } + + std::sort(addr, addr + size); + for (unsigned i = 1; i < size; ++i) + { + if (addr[i] <= addr[i - 1]) + { + addr[i] = addr[i - 1] + 1; + } + } + unsigned off = rng() % N; + for (unsigned i = 0; i < size; ++i) + { + addr[i] = (addr[i] + off) % N; + } +} + +// get_bin_metadata functions START +inline void get_bin_metadata_impl(std::basic_istream &reader, size_t &nrows, size_t &ncols, size_t offset = 0) +{ + int nrows_32, ncols_32; + reader.seekg(offset, reader.beg); + reader.read((char *)&nrows_32, sizeof(int)); + reader.read((char *)&ncols_32, sizeof(int)); + nrows = nrows_32; + ncols = ncols_32; +} + +#ifdef EXEC_ENV_OLS +inline void get_bin_metadata(MemoryMappedFiles &files, const std::string &bin_file, size_t &nrows, size_t &ncols, + size_t offset = 0) +{ + diskann::cout << "Getting metadata for file: " << bin_file << std::endl; + auto fc = files.getContent(bin_file); + // auto cb = ContentBuf((char*) fc._content, fc._size); + // std::basic_istream reader(&cb); + // get_bin_metadata_impl(reader, nrows, ncols, offset); + + int nrows_32, ncols_32; + int32_t *metadata_ptr = (int32_t *)((char *)fc._content + offset); + nrows_32 = *metadata_ptr; + ncols_32 = *(metadata_ptr + 1); + nrows = nrows_32; + ncols = ncols_32; +} +#endif + +inline void get_bin_metadata(const std::string &bin_file, size_t &nrows, size_t &ncols, size_t offset = 0) +{ + std::ifstream reader(bin_file.c_str(), std::ios::binary); + get_bin_metadata_impl(reader, nrows, ncols, offset); +} +// get_bin_metadata functions END + +#ifndef EXEC_ENV_OLS +inline size_t get_graph_num_frozen_points(const std::string &graph_file) +{ + size_t expected_file_size; + uint32_t max_observed_degree, start; + size_t file_frozen_pts; + + std::ifstream in; + in.exceptions(std::ios::badbit | std::ios::failbit); + + in.open(graph_file, std::ios::binary); + in.read((char *)&expected_file_size, sizeof(size_t)); + in.read((char *)&max_observed_degree, sizeof(uint32_t)); + in.read((char *)&start, sizeof(uint32_t)); + in.read((char *)&file_frozen_pts, sizeof(size_t)); + + return file_frozen_pts; +} +#endif + +template inline std::string getValues(T *data, size_t num) +{ + std::stringstream stream; + stream << "["; + for (size_t i = 0; i < num; i++) + { + stream << std::to_string(data[i]) << ","; + } + stream << "]" << std::endl; + + return stream.str(); +} + +// load_bin functions START +template +inline void load_bin_impl(std::basic_istream &reader, T *&data, size_t &npts, size_t &dim, size_t file_offset = 0) +{ + int npts_i32, dim_i32; + + reader.seekg(file_offset, reader.beg); + reader.read((char *)&npts_i32, sizeof(int)); + reader.read((char *)&dim_i32, sizeof(int)); + npts = (unsigned)npts_i32; + dim = (unsigned)dim_i32; + + std::cout << "Metadata: #pts = " << npts << ", #dims = " << dim << "..." << std::endl; + + data = new T[npts * dim]; + reader.read((char *)data, npts * dim * sizeof(T)); +} + +#ifdef EXEC_ENV_OLS +template +inline void load_bin(MemoryMappedFiles &files, const std::string &bin_file, T *&data, size_t &npts, size_t &dim, + size_t offset = 0) +{ + diskann::cout << "Reading bin file " << bin_file.c_str() << " at offset: " << offset << "..." << std::endl; + auto fc = files.getContent(bin_file); + + uint32_t t_npts, t_dim; + uint32_t *contentAsIntPtr = (uint32_t *)((char *)fc._content + offset); + t_npts = *(contentAsIntPtr); + t_dim = *(contentAsIntPtr + 1); + + npts = t_npts; + dim = t_dim; + + data = (T *)((char *)fc._content + offset + 2 * sizeof(uint32_t)); // No need to copy! +} + +DISKANN_DLLEXPORT void get_bin_metadata(AlignedFileReader &reader, size_t &npts, size_t &ndim, size_t offset = 0); +template +DISKANN_DLLEXPORT void load_bin(AlignedFileReader &reader, T *&data, size_t &npts, size_t &ndim, size_t offset = 0); +template +DISKANN_DLLEXPORT void load_bin(AlignedFileReader &reader, std::unique_ptr &data, size_t &npts, size_t &ndim, + size_t offset = 0); + +template +DISKANN_DLLEXPORT void copy_aligned_data_from_file(AlignedFileReader &reader, T *&data, size_t &npts, size_t &dim, + const size_t &rounded_dim, size_t offset = 0); + +// Unlike load_bin, assumes that data is already allocated 'size' entries +template +DISKANN_DLLEXPORT void read_array(AlignedFileReader &reader, T *data, size_t size, size_t offset = 0); + +template DISKANN_DLLEXPORT void read_value(AlignedFileReader &reader, T &value, size_t offset = 0); +#endif + +template +inline void load_bin(const std::string &bin_file, T *&data, size_t &npts, size_t &dim, size_t offset = 0) +{ + diskann::cout << "Reading bin file " << bin_file.c_str() << " ..." << std::endl; + std::ifstream reader; + reader.exceptions(std::ifstream::failbit | std::ifstream::badbit); + + try + { + diskann::cout << "Opening bin file " << bin_file.c_str() << "... " << std::endl; + reader.open(bin_file, std::ios::binary | std::ios::ate); + reader.seekg(0); + load_bin_impl(reader, data, npts, dim, offset); + } + catch (std::system_error &e) + { + throw FileException(bin_file, e, __FUNCSIG__, __FILE__, __LINE__); + } + diskann::cout << "done." << std::endl; +} + +inline void wait_for_keystroke() +{ + int a; + std::cout << "Press any number to continue.." << std::endl; + std::cin >> a; +} +// load_bin functions END + +inline void load_truthset(const std::string &bin_file, uint32_t *&ids, float *&dists, size_t &npts, size_t &dim) +{ + size_t read_blk_size = 64 * 1024 * 1024; + cached_ifstream reader(bin_file, read_blk_size); + diskann::cout << "Reading truthset file " << bin_file.c_str() << " ..." << std::endl; + size_t actual_file_size = reader.get_file_size(); + + int npts_i32, dim_i32; + reader.read((char *)&npts_i32, sizeof(int)); + reader.read((char *)&dim_i32, sizeof(int)); + npts = (unsigned)npts_i32; + dim = (unsigned)dim_i32; + + diskann::cout << "Metadata: #pts = " << npts << ", #dims = " << dim << "... " << std::endl; + + int truthset_type = -1; // 1 means truthset has ids and distances, 2 means + // only ids, -1 is error + size_t expected_file_size_with_dists = 2 * npts * dim * sizeof(uint32_t) + 2 * sizeof(uint32_t); + + if (actual_file_size == expected_file_size_with_dists) + truthset_type = 1; + + size_t expected_file_size_just_ids = npts * dim * sizeof(uint32_t) + 2 * sizeof(uint32_t); + + if (actual_file_size == expected_file_size_just_ids) + truthset_type = 2; + + if (truthset_type == -1) + { + std::stringstream stream; + stream << "Error. File size mismatch. File should have bin format, with " + "npts followed by ngt followed by npts*ngt ids and optionally " + "followed by npts*ngt distance values; actual size: " + << actual_file_size << ", expected: " << expected_file_size_with_dists << " or " + << expected_file_size_just_ids; + diskann::cout << stream.str(); + throw diskann::ANNException(stream.str(), -1, __FUNCSIG__, __FILE__, __LINE__); + } + + ids = new uint32_t[npts * dim]; + reader.read((char *)ids, npts * dim * sizeof(uint32_t)); + + if (truthset_type == 1) + { + dists = new float[npts * dim]; + reader.read((char *)dists, npts * dim * sizeof(float)); + } +} + +inline void prune_truthset_for_range(const std::string &bin_file, float range, + std::vector> &groundtruth, size_t &npts) +{ + size_t read_blk_size = 64 * 1024 * 1024; + cached_ifstream reader(bin_file, read_blk_size); + diskann::cout << "Reading truthset file " << bin_file.c_str() << "... " << std::endl; + size_t actual_file_size = reader.get_file_size(); + + int npts_i32, dim_i32; + reader.read((char *)&npts_i32, sizeof(int)); + reader.read((char *)&dim_i32, sizeof(int)); + npts = (unsigned)npts_i32; + uint64_t dim = (unsigned)dim_i32; + uint32_t *ids; + float *dists; + + diskann::cout << "Metadata: #pts = " << npts << ", #dims = " << dim << "... " << std::endl; + + int truthset_type = -1; // 1 means truthset has ids and distances, 2 means + // only ids, -1 is error + size_t expected_file_size_with_dists = 2 * npts * dim * sizeof(uint32_t) + 2 * sizeof(uint32_t); + + if (actual_file_size == expected_file_size_with_dists) + truthset_type = 1; + + if (truthset_type == -1) + { + std::stringstream stream; + stream << "Error. File size mismatch. File should have bin format, with " + "npts followed by ngt followed by npts*ngt ids and optionally " + "followed by npts*ngt distance values; actual size: " + << actual_file_size << ", expected: " << expected_file_size_with_dists; + diskann::cout << stream.str(); + throw diskann::ANNException(stream.str(), -1, __FUNCSIG__, __FILE__, __LINE__); + } + + ids = new uint32_t[npts * dim]; + reader.read((char *)ids, npts * dim * sizeof(uint32_t)); + + if (truthset_type == 1) + { + dists = new float[npts * dim]; + reader.read((char *)dists, npts * dim * sizeof(float)); + } + float min_dist = std::numeric_limits::max(); + float max_dist = 0; + groundtruth.resize(npts); + for (uint32_t i = 0; i < npts; i++) + { + groundtruth[i].clear(); + for (uint32_t j = 0; j < dim; j++) + { + if (dists[i * dim + j] <= range) + { + groundtruth[i].emplace_back(ids[i * dim + j]); + } + min_dist = min_dist > dists[i * dim + j] ? dists[i * dim + j] : min_dist; + max_dist = max_dist < dists[i * dim + j] ? dists[i * dim + j] : max_dist; + } + // std::cout<> &groundtruth, + size_t >_num) +{ + size_t read_blk_size = 64 * 1024 * 1024; + cached_ifstream reader(bin_file, read_blk_size); + diskann::cout << "Reading truthset file " << bin_file.c_str() << "... " << std::flush; + size_t actual_file_size = reader.get_file_size(); + + int nptsuint32_t, totaluint32_t; + reader.read((char *)&nptsuint32_t, sizeof(int)); + reader.read((char *)&totaluint32_t, sizeof(int)); + + gt_num = (uint64_t)nptsuint32_t; + uint64_t total_res = (uint64_t)totaluint32_t; + + diskann::cout << "Metadata: #pts = " << gt_num << ", #total_results = " << total_res << "..." << std::endl; + + size_t expected_file_size = 2 * sizeof(uint32_t) + gt_num * sizeof(uint32_t) + total_res * sizeof(uint32_t); + + if (actual_file_size != expected_file_size) + { + std::stringstream stream; + stream << "Error. File size mismatch in range truthset. actual size: " << actual_file_size + << ", expected: " << expected_file_size; + diskann::cout << stream.str(); + throw diskann::ANNException(stream.str(), -1, __FUNCSIG__, __FILE__, __LINE__); + } + groundtruth.clear(); + groundtruth.resize(gt_num); + std::vector gt_count(gt_num); + + reader.read((char *)gt_count.data(), sizeof(uint32_t) * gt_num); + + std::vector gt_stats(gt_count); + std::sort(gt_stats.begin(), gt_stats.end()); + + std::cout << "GT count percentiles:" << std::endl; + for (uint32_t p = 0; p < 100; p += 5) + std::cout << "percentile " << p << ": " << gt_stats[static_cast(std::floor((p / 100.0) * gt_num))] + << std::endl; + std::cout << "percentile 100" + << ": " << gt_stats[gt_num - 1] << std::endl; + + for (uint32_t i = 0; i < gt_num; i++) + { + groundtruth[i].clear(); + groundtruth[i].resize(gt_count[i]); + if (gt_count[i] != 0) + reader.read((char *)groundtruth[i].data(), sizeof(uint32_t) * gt_count[i]); + } +} + +#ifdef EXEC_ENV_OLS +template +inline void load_bin(MemoryMappedFiles &files, const std::string &bin_file, std::unique_ptr &data, size_t &npts, + size_t &dim, size_t offset = 0) +{ + T *ptr; + load_bin(files, bin_file, ptr, npts, dim, offset); + data.reset(ptr); +} +#endif + +inline void copy_file(std::string in_file, std::string out_file) +{ + std::ifstream source(in_file, std::ios::binary); + std::ofstream dest(out_file, std::ios::binary); + + std::istreambuf_iterator begin_source(source); + std::istreambuf_iterator end_source; + std::ostreambuf_iterator begin_dest(dest); + std::copy(begin_source, end_source, begin_dest); + + source.close(); + dest.close(); +} + +DISKANN_DLLEXPORT double calculate_recall(unsigned num_queries, unsigned *gold_std, float *gs_dist, unsigned dim_gs, + unsigned *our_results, unsigned dim_or, unsigned recall_at); + +DISKANN_DLLEXPORT double calculate_recall(unsigned num_queries, unsigned *gold_std, float *gs_dist, unsigned dim_gs, + unsigned *our_results, unsigned dim_or, unsigned recall_at, + const tsl::robin_set &active_tags); + +DISKANN_DLLEXPORT double calculate_range_search_recall(unsigned num_queries, + std::vector> &groundtruth, + std::vector> &our_results); + +template +inline void load_bin(const std::string &bin_file, std::unique_ptr &data, size_t &npts, size_t &dim, + size_t offset = 0) +{ + T *ptr; + load_bin(bin_file, ptr, npts, dim, offset); + data.reset(ptr); +} + +inline void open_file_to_write(std::ofstream &writer, const std::string &filename) +{ + writer.exceptions(std::ofstream::failbit | std::ofstream::badbit); + if (!file_exists(filename)) + writer.open(filename, std::ios::binary | std::ios::out); + else + writer.open(filename, std::ios::binary | std::ios::in | std::ios::out); + + if (writer.fail()) + { + char buff[1024]; +#ifdef _WINDOWS + auto ret = std::to_string(strerror_s(buff, 1024, errno)); +#elif __APPLE__ + auto ret = std::to_string(strerror_r(errno, buff, 1024)); +#else + auto ret = std::string(strerror_r(errno, buff, 1024)); +#endif + + std::string error_message = + std::string("Failed to open file") + filename + " for write because " + buff + ", ret=" + ret; + diskann::cerr << error_message << std::endl; + throw diskann::ANNException(error_message, -1); + } +} + +template +inline size_t save_bin(const std::string &filename, T *data, size_t npts, size_t ndims, size_t offset = 0) +{ + std::ofstream writer; + open_file_to_write(writer, filename); + + diskann::cout << "Writing bin: " << filename.c_str() << std::endl; + writer.seekp(offset, writer.beg); + int npts_i32 = (int)npts, ndims_i32 = (int)ndims; + size_t bytes_written = npts * ndims * sizeof(T) + 2 * sizeof(uint32_t); + writer.write((char *)&npts_i32, sizeof(int)); + writer.write((char *)&ndims_i32, sizeof(int)); + diskann::cout << "bin: #pts = " << npts << ", #dims = " << ndims << ", size = " << bytes_written << "B" + << std::endl; + + writer.write((char *)data, npts * ndims * sizeof(T)); + writer.close(); + diskann::cout << "Finished writing bin." << std::endl; + return bytes_written; +} + +inline void print_progress(double percentage) +{ + int val = (int)(percentage * 100); + int lpad = (int)(percentage * PBWIDTH); + int rpad = PBWIDTH - lpad; + printf("\r%3d%% [%.*s%*s]", val, lpad, PBSTR, rpad, ""); + fflush(stdout); +} + +// load_aligned_bin functions START + +template +inline void load_aligned_bin_impl(std::basic_istream &reader, size_t actual_file_size, T *&data, size_t &npts, + size_t &dim, size_t &rounded_dim) +{ + int npts_i32, dim_i32; + reader.read((char *)&npts_i32, sizeof(int)); + reader.read((char *)&dim_i32, sizeof(int)); + npts = (unsigned)npts_i32; + dim = (unsigned)dim_i32; + + size_t expected_actual_file_size = npts * dim * sizeof(T) + 2 * sizeof(uint32_t); + if (actual_file_size != expected_actual_file_size) + { + std::stringstream stream; + stream << "Error. File size mismatch. Actual size is " << actual_file_size << " while expected size is " + << expected_actual_file_size << " npts = " << npts << " dim = " << dim << " size of = " << sizeof(T) + << std::endl; + diskann::cout << stream.str() << std::endl; + throw diskann::ANNException(stream.str(), -1, __FUNCSIG__, __FILE__, __LINE__); + } + rounded_dim = ROUND_UP(dim, 8); + diskann::cout << "Metadata: #pts = " << npts << ", #dims = " << dim << ", aligned_dim = " << rounded_dim << "... " + << std::flush; + size_t allocSize = npts * rounded_dim * sizeof(T); + diskann::cout << "allocating aligned memory of " << allocSize << " bytes... " << std::flush; + alloc_aligned(((void **)&data), allocSize, 8 * sizeof(T)); + diskann::cout << "done. Copying data to mem_aligned buffer..." << std::flush; + + for (size_t i = 0; i < npts; i++) + { + reader.read((char *)(data + i * rounded_dim), dim * sizeof(T)); + memset(data + i * rounded_dim + dim, 0, (rounded_dim - dim) * sizeof(T)); + } + diskann::cout << " done." << std::endl; +} + +#ifdef EXEC_ENV_OLS +template +inline void load_aligned_bin(MemoryMappedFiles &files, const std::string &bin_file, T *&data, size_t &npts, size_t &dim, + size_t &rounded_dim) +{ + try + { + diskann::cout << "Opening bin file " << bin_file << " ..." << std::flush; + FileContent fc = files.getContent(bin_file); + ContentBuf buf((char *)fc._content, fc._size); + std::basic_istream reader(&buf); + + size_t actual_file_size = fc._size; + load_aligned_bin_impl(reader, actual_file_size, data, npts, dim, rounded_dim); + } + catch (std::system_error &e) + { + throw FileException(bin_file, e, __FUNCSIG__, __FILE__, __LINE__); + } +} +#endif + +template +inline void load_aligned_bin(const std::string &bin_file, T *&data, size_t &npts, size_t &dim, size_t &rounded_dim) +{ + std::ifstream reader; + reader.exceptions(std::ifstream::failbit | std::ifstream::badbit); + + try + { + diskann::cout << "Reading (with alignment) bin file " << bin_file << " ..." << std::flush; + reader.open(bin_file, std::ios::binary | std::ios::ate); + + uint64_t fsize = reader.tellg(); + reader.seekg(0); + load_aligned_bin_impl(reader, fsize, data, npts, dim, rounded_dim); + } + catch (std::system_error &e) + { + throw FileException(bin_file, e, __FUNCSIG__, __FILE__, __LINE__); + } +} + +template +void convert_types(const InType *srcmat, OutType *destmat, size_t npts, size_t dim) +{ +#pragma omp parallel for schedule(static, 65536) + for (int64_t i = 0; i < (int64_t)npts; i++) + { + for (uint64_t j = 0; j < dim; j++) + { + destmat[i * dim + j] = (OutType)srcmat[i * dim + j]; + } + } +} + +// this function will take in_file of n*d dimensions and save the output as a +// floating point matrix +// with n*(d+1) dimensions. All vectors are scaled by a large value M so that +// the norms are <=1 and the final coordinate is set so that the resulting +// norm (in d+1 coordinates) is equal to 1 this is a classical transformation +// from MIPS to L2 search from "On Symmetric and Asymmetric LSHs for Inner +// Product Search" by Neyshabur and Srebro + +template float prepare_base_for_inner_products(const std::string in_file, const std::string out_file) +{ + std::cout << "Pre-processing base file by adding extra coordinate" << std::endl; + std::ifstream in_reader(in_file.c_str(), std::ios::binary); + std::ofstream out_writer(out_file.c_str(), std::ios::binary); + uint64_t npts, in_dims, out_dims; + float max_norm = 0; + + uint32_t npts32, dims32; + in_reader.read((char *)&npts32, sizeof(uint32_t)); + in_reader.read((char *)&dims32, sizeof(uint32_t)); + + npts = npts32; + in_dims = dims32; + out_dims = in_dims + 1; + uint32_t outdims32 = (uint32_t)out_dims; + + out_writer.write((char *)&npts32, sizeof(uint32_t)); + out_writer.write((char *)&outdims32, sizeof(uint32_t)); + + size_t BLOCK_SIZE = 100000; + size_t block_size = npts <= BLOCK_SIZE ? npts : BLOCK_SIZE; + std::unique_ptr in_block_data = std::make_unique(block_size * in_dims); + std::unique_ptr out_block_data = std::make_unique(block_size * out_dims); + + std::memset(out_block_data.get(), 0, sizeof(float) * block_size * out_dims); + uint64_t num_blocks = DIV_ROUND_UP(npts, block_size); + + std::vector norms(npts, 0); + + for (uint64_t b = 0; b < num_blocks; b++) + { + uint64_t start_id = b * block_size; + uint64_t end_id = (b + 1) * block_size < npts ? (b + 1) * block_size : npts; + uint64_t block_pts = end_id - start_id; + in_reader.read((char *)in_block_data.get(), block_pts * in_dims * sizeof(T)); + for (uint64_t p = 0; p < block_pts; p++) + { + for (uint64_t j = 0; j < in_dims; j++) + { + norms[start_id + p] += in_block_data[p * in_dims + j] * in_block_data[p * in_dims + j]; + } + max_norm = max_norm > norms[start_id + p] ? max_norm : norms[start_id + p]; + } + } + + max_norm = std::sqrt(max_norm); + + in_reader.seekg(2 * sizeof(uint32_t), std::ios::beg); + for (uint64_t b = 0; b < num_blocks; b++) + { + uint64_t start_id = b * block_size; + uint64_t end_id = (b + 1) * block_size < npts ? (b + 1) * block_size : npts; + uint64_t block_pts = end_id - start_id; + in_reader.read((char *)in_block_data.get(), block_pts * in_dims * sizeof(T)); + for (uint64_t p = 0; p < block_pts; p++) + { + for (uint64_t j = 0; j < in_dims; j++) + { + out_block_data[p * out_dims + j] = in_block_data[p * in_dims + j] / max_norm; + } + float res = 1 - (norms[start_id + p] / (max_norm * max_norm)); + res = res <= 0 ? 0 : std::sqrt(res); + out_block_data[p * out_dims + out_dims - 1] = res; + } + out_writer.write((char *)out_block_data.get(), block_pts * out_dims * sizeof(float)); + } + out_writer.close(); + return max_norm; +} + +// plain saves data as npts X ndims array into filename +template void save_Tvecs(const char *filename, T *data, size_t npts, size_t ndims) +{ + std::string fname(filename); + + // create cached ofstream with 64MB cache + cached_ofstream writer(fname, 64 * 1048576); + + unsigned dims_u32 = (unsigned)ndims; + + // start writing + for (size_t i = 0; i < npts; i++) + { + // write dims in u32 + writer.write((char *)&dims_u32, sizeof(unsigned)); + + // get cur point in data + T *cur_pt = data + i * ndims; + writer.write((char *)cur_pt, ndims * sizeof(T)); + } +} +template +inline size_t save_data_in_base_dimensions(const std::string &filename, T *data, size_t npts, size_t ndims, + size_t aligned_dim, size_t offset = 0) +{ + std::ofstream writer; //(filename, std::ios::binary | std::ios::out); + open_file_to_write(writer, filename); + int npts_i32 = (int)npts, ndims_i32 = (int)ndims; + size_t bytes_written = 2 * sizeof(uint32_t) + npts * ndims * sizeof(T); + writer.seekp(offset, writer.beg); + writer.write((char *)&npts_i32, sizeof(int)); + writer.write((char *)&ndims_i32, sizeof(int)); + for (size_t i = 0; i < npts; i++) + { + writer.write((char *)(data + i * aligned_dim), ndims * sizeof(T)); + } + writer.close(); + return bytes_written; +} + +template +inline void copy_aligned_data_from_file(const char *bin_file, T *&data, size_t &npts, size_t &dim, + const size_t &rounded_dim, size_t offset = 0) +{ + if (data == nullptr) + { + diskann::cerr << "Memory was not allocated for " << data << " before calling the load function. Exiting..." + << std::endl; + throw diskann::ANNException("Null pointer passed to copy_aligned_data_from_file function", -1, __FUNCSIG__, + __FILE__, __LINE__); + } + std::ifstream reader; + reader.exceptions(std::ios::badbit | std::ios::failbit); + reader.open(bin_file, std::ios::binary); + reader.seekg(offset, reader.beg); + + int npts_i32, dim_i32; + reader.read((char *)&npts_i32, sizeof(int)); + reader.read((char *)&dim_i32, sizeof(int)); + npts = (unsigned)npts_i32; + dim = (unsigned)dim_i32; + + for (size_t i = 0; i < npts; i++) + { + reader.read((char *)(data + i * rounded_dim), dim * sizeof(T)); + memset(data + i * rounded_dim + dim, 0, (rounded_dim - dim) * sizeof(T)); + } +} + +// NOTE :: good efficiency when total_vec_size is integral multiple of 64 +inline void prefetch_vector(const char *vec, size_t vecsize) +{ + size_t max_prefetch_size = (vecsize / 64) * 64; + for (size_t d = 0; d < max_prefetch_size; d += 64) + _mm_prefetch((const char *)vec + d, _MM_HINT_T0); +} + +// NOTE :: good efficiency when total_vec_size is integral multiple of 64 +inline void prefetch_vector_l2(const char *vec, size_t vecsize) +{ + size_t max_prefetch_size = (vecsize / 64) * 64; + for (size_t d = 0; d < max_prefetch_size; d += 64) + _mm_prefetch((const char *)vec + d, _MM_HINT_T1); +} + +// NOTE: Implementation in utils.cpp. +void block_convert(std::ofstream &writr, std::ifstream &readr, float *read_buf, uint64_t npts, uint64_t ndims); + +DISKANN_DLLEXPORT void normalize_data_file(const std::string &inFileName, const std::string &outFileName); + +inline std::string get_tag_string(std::uint64_t tag) +{ + return std::to_string(tag); +} + +inline std::string get_tag_string(const tag_uint128 &tag) +{ + std::string str = std::to_string(tag._data2) + "_" + std::to_string(tag._data1); + return str; +} + +}; // namespace diskann + +struct PivotContainer +{ + PivotContainer() = default; + + PivotContainer(size_t pivo_id, float pivo_dist) : piv_id{pivo_id}, piv_dist{pivo_dist} + { + } + + bool operator<(const PivotContainer &p) const + { + return p.piv_dist < piv_dist; + } + + bool operator>(const PivotContainer &p) const + { + return p.piv_dist > piv_dist; + } + + size_t piv_id; + float piv_dist; +}; + +inline bool validate_index_file_size(std::ifstream &in) +{ + if (!in.is_open()) + throw diskann::ANNException("Index file size check called on unopened file stream", -1, __FUNCSIG__, __FILE__, + __LINE__); + in.seekg(0, in.end); + size_t actual_file_size = in.tellg(); + in.seekg(0, in.beg); + size_t expected_file_size; + in.read((char *)&expected_file_size, sizeof(uint64_t)); + in.seekg(0, in.beg); + if (actual_file_size != expected_file_size) + { + diskann::cerr << "Index file size error. Expected size (metadata): " << expected_file_size + << ", actual file size : " << actual_file_size << "." << std::endl; + return false; + } + return true; +} + +template inline float get_norm(T *arr, const size_t dim) +{ + float sum = 0.0f; + for (uint32_t i = 0; i < dim; i++) + { + sum += arr[i] * arr[i]; + } + return sqrt(sum); +} + +// This function is valid only for float data type. +template inline void normalize(T *arr, const size_t dim) +{ + float norm = get_norm(arr, dim); + for (uint32_t i = 0; i < dim; i++) + { + arr[i] = (T)(arr[i] / norm); + } +} + +inline std::vector read_file_to_vector_of_strings(const std::string &filename, bool unique = false) +{ + std::vector result; + std::set elementSet; + if (filename != "") + { + std::ifstream file(filename); + if (file.fail()) + { + throw diskann::ANNException(std::string("Failed to open file ") + filename, -1); + } + std::string line; + while (std::getline(file, line)) + { + if (line.empty()) + { + break; + } + if (line.find(',') != std::string::npos) + { + std::cerr << "Every query must have exactly one filter" << std::endl; + exit(-1); + } + if (!line.empty() && (line.back() == '\r' || line.back() == '\n')) + { + line.erase(line.size() - 1); + } + if (!elementSet.count(line)) + { + result.push_back(line); + } + if (unique) + { + elementSet.insert(line); + } + } + file.close(); + } + else + { + throw diskann::ANNException(std::string("Failed to open file. filename can not be blank"), -1); + } + return result; +} + +inline void clean_up_artifacts(tsl::robin_set paths_to_clean, tsl::robin_set path_suffixes) +{ + try + { + for (const auto &path : paths_to_clean) + { + for (const auto &suffix : path_suffixes) + { + std::string curr_path_to_clean(path + "_" + suffix); + if (std::remove(curr_path_to_clean.c_str()) != 0) + diskann::cout << "Warning: Unable to remove file :" << curr_path_to_clean << std::endl; + } + } + diskann::cout << "Cleaned all artifacts" << std::endl; + } + catch (const std::exception &e) + { + diskann::cout << "Warning: Unable to clean all artifacts " << e.what() << std::endl; + } +} + +template inline const char *diskann_type_to_name() = delete; +template <> inline const char *diskann_type_to_name() +{ + return "float"; +} +template <> inline const char *diskann_type_to_name() +{ + return "uint8"; +} +template <> inline const char *diskann_type_to_name() +{ + return "int8"; +} +template <> inline const char *diskann_type_to_name() +{ + return "uint16"; +} +template <> inline const char *diskann_type_to_name() +{ + return "int16"; +} +template <> inline const char *diskann_type_to_name() +{ + return "uint32"; +} +template <> inline const char *diskann_type_to_name() +{ + return "int32"; +} +template <> inline const char *diskann_type_to_name() +{ + return "uint64"; +} +template <> inline const char *diskann_type_to_name() +{ + return "int64"; +} + +#ifdef _WINDOWS +#include +#include + +extern bool AvxSupportedCPU; +extern bool Avx2SupportedCPU; + +inline size_t getMemoryUsage() +{ + PROCESS_MEMORY_COUNTERS_EX pmc; + GetProcessMemoryInfo(GetCurrentProcess(), (PROCESS_MEMORY_COUNTERS *)&pmc, sizeof(pmc)); + return pmc.PrivateUsage; +} + +inline std::string getWindowsErrorMessage(DWORD lastError) +{ + char *errorText; + FormatMessageA( + // use system message tables to retrieve error text + FORMAT_MESSAGE_FROM_SYSTEM + // allocate buffer on local heap for error text + | FORMAT_MESSAGE_ALLOCATE_BUFFER + // Important! will fail otherwise, since we're not + // (and CANNOT) pass insertion parameters + | FORMAT_MESSAGE_IGNORE_INSERTS, + NULL, // unused with FORMAT_MESSAGE_FROM_SYSTEM + lastError, MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT), + (LPSTR)&errorText, // output + 0, // minimum size for output buffer + NULL); // arguments - see note + + return errorText != nullptr ? std::string(errorText) : std::string(); +} + +inline void printProcessMemory(const char *message) +{ + PROCESS_MEMORY_COUNTERS counters; + HANDLE h = GetCurrentProcess(); + GetProcessMemoryInfo(h, &counters, sizeof(counters)); + diskann::cout << message + << " [Peaking Working Set size: " << counters.PeakWorkingSetSize * 1.0 / (1024.0 * 1024 * 1024) + << "GB Working set size: " << counters.WorkingSetSize * 1.0 / (1024.0 * 1024 * 1024) + << "GB Private bytes " << counters.PagefileUsage * 1.0 / (1024 * 1024 * 1024) << "GB]" << std::endl; +} +#else + +// need to check and change this +inline bool avx2Supported() +{ + return true; +} +inline void printProcessMemory(const char *) +{ +} + +inline size_t getMemoryUsage() +{ // for non-windows, we have not implemented this function + return 0; +} + +#endif + +extern bool AvxSupportedCPU; +extern bool Avx2SupportedCPU; diff --git a/packages/leann-backend-diskann/third_party/DiskANN/include/windows_aligned_file_reader.h b/packages/leann-backend-diskann/third_party/DiskANN/include/windows_aligned_file_reader.h new file mode 100644 index 0000000..0d9a317 --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/include/windows_aligned_file_reader.h @@ -0,0 +1,57 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT license. + +#pragma once +#ifdef _WINDOWS +#ifndef USE_BING_INFRA +#include +#include +#include +#include + +#include +#include +#include +#include "aligned_file_reader.h" +#include "tsl/robin_map.h" +#include "utils.h" +#include "windows_customizations.h" + +class WindowsAlignedFileReader : public AlignedFileReader +{ + private: +#ifdef UNICODE + std::wstring m_filename; +#else + std::string m_filename; +#endif + + protected: + // virtual IOContext createContext(); + + public: + DISKANN_DLLEXPORT WindowsAlignedFileReader(){}; + DISKANN_DLLEXPORT virtual ~WindowsAlignedFileReader(){}; + + // Open & close ops + // Blocking calls + DISKANN_DLLEXPORT virtual void open(const std::string &fname) override; + DISKANN_DLLEXPORT virtual void close() override; + + DISKANN_DLLEXPORT virtual void register_thread() override; + DISKANN_DLLEXPORT virtual void deregister_thread() override + { + // TODO: Needs implementation. + } + DISKANN_DLLEXPORT virtual void deregister_all_threads() override + { + // TODO: Needs implementation. + } + DISKANN_DLLEXPORT virtual IOContext &get_ctx() override; + + // process batch of aligned requests in parallel + // NOTE :: blocking call for the calling thread, but can thread-safe + DISKANN_DLLEXPORT virtual void read(std::vector &read_reqs, IOContext &ctx, bool async) override; +}; +#endif // USE_BING_INFRA +#endif //_WINDOWS diff --git a/packages/leann-backend-diskann/third_party/DiskANN/include/windows_customizations.h b/packages/leann-backend-diskann/third_party/DiskANN/include/windows_customizations.h new file mode 100644 index 0000000..e6c5846 --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/include/windows_customizations.h @@ -0,0 +1,16 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT license. + +#pragma once + +#ifdef _WINDOWS + +#ifdef _WINDLL +#define DISKANN_DLLEXPORT __declspec(dllexport) +#else +#define DISKANN_DLLEXPORT __declspec(dllimport) +#endif + +#else +#define DISKANN_DLLEXPORT +#endif diff --git a/packages/leann-backend-diskann/third_party/DiskANN/include/windows_slim_lock.h b/packages/leann-backend-diskann/third_party/DiskANN/include/windows_slim_lock.h new file mode 100644 index 0000000..7fc09b8 --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/include/windows_slim_lock.h @@ -0,0 +1,73 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT license. +#pragma once + +#ifndef WIN32_LEAN_AND_MEAN +#define WIN32_LEAN_AND_MEAN +#endif +#include "Windows.h" + +namespace diskann +{ +// A thin C++ wrapper around Windows exclusive functionality of Windows +// SlimReaderWriterLock. +// +// The SlimReaderWriterLock is simpler/more lightweight than std::mutex +// (8 bytes vs 80 bytes), which is useful in the scenario where DiskANN has +// one lock per vector in the index. It does not support recursive locking and +// requires Windows Vista or later. +// +// Full documentation can be found at. +// https://msdn.microsoft.com/en-us/library/windows/desktop/aa904937(v=vs.85).aspx +class windows_exclusive_slim_lock +{ + public: + windows_exclusive_slim_lock() : _lock(SRWLOCK_INIT) + { + } + + // The lock is non-copyable. This also disables move constructor/operator=. + windows_exclusive_slim_lock(const windows_exclusive_slim_lock &) = delete; + windows_exclusive_slim_lock &operator=(const windows_exclusive_slim_lock &) = delete; + + void lock() + { + return AcquireSRWLockExclusive(&_lock); + } + + bool try_lock() + { + return TryAcquireSRWLockExclusive(&_lock) != FALSE; + } + + void unlock() + { + return ReleaseSRWLockExclusive(&_lock); + } + + private: + SRWLOCK _lock; +}; + +// An exclusive lock over a SlimReaderWriterLock. +class windows_exclusive_slim_lock_guard +{ + public: + windows_exclusive_slim_lock_guard(windows_exclusive_slim_lock &p_lock) : _lock(p_lock) + { + _lock.lock(); + } + + // The lock is non-copyable. This also disables move constructor/operator=. + windows_exclusive_slim_lock_guard(const windows_exclusive_slim_lock_guard &) = delete; + windows_exclusive_slim_lock_guard &operator=(const windows_exclusive_slim_lock_guard &) = delete; + + ~windows_exclusive_slim_lock_guard() + { + _lock.unlock(); + } + + private: + windows_exclusive_slim_lock &_lock; +}; +} // namespace diskann diff --git a/packages/leann-backend-diskann/third_party/DiskANN/pyproject.toml b/packages/leann-backend-diskann/third_party/DiskANN/pyproject.toml new file mode 100644 index 0000000..3871c71 --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/pyproject.toml @@ -0,0 +1,58 @@ +[build-system] +requires = [ + "setuptools>=59.6", + "pybind11>=2.10.0", + "cmake>=3.22", + "numpy==1.25", # this is important to keep fixed. It also means anyone using something other than 1.25 won't be able to use this library + "wheel", + "ninja" +] +build-backend = "setuptools.build_meta" + +[project] +name = "diskannpy" +version = "0.7.1" + +description = "DiskANN Python extension module" +readme = "python/README.md" +requires-python = ">=3.9" +license = {text = "MIT License"} +dependencies = [ + "numpy==1.25" +] +authors = [ + {name = "Harsha Vardhan Simhadri", email = "harshasi@microsoft.com"}, + {name = "Dax Pryce", email = "daxpryce@microsoft.com"} +] + +[project.optional-dependencies] +dev = ["black", "isort", "mypy"] + +[tool.setuptools] +package-dir = {"" = "python/src"} + +[tool.isort] +profile = "black" +multi_line_output = 3 + +[tool.mypy] +plugins = "numpy.typing.mypy_plugin" + +[tool.cibuildwheel] +manylinux-x86_64-image = "manylinux_2_28" +test-requires = ["scikit-learn~=1.2"] +build-frontend = "build" +skip = ["pp*", "*-win32", "*-manylinux_i686", "*-musllinux*"] +test-command = "python -m unittest discover {project}/python/tests" + +[tool.cibuildwheel.linux] +before-build = [ + "rpm --import https://repo.almalinux.org/almalinux/RPM-GPG-KEY-AlmaLinux", + "dnf makecache --refresh", + "dnf upgrade -y almalinux-release", + "dnf install -y epel-release", + "dnf config-manager -y --add-repo https://yum.repos.intel.com/mkl/setup/intel-mkl.repo", + "rpm --import https://yum.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB", + "dnf makecache --refresh -y", + "dnf install -y wget make cmake gcc-c++ libaio-devel gperftools-libs libunwind-devel clang-tools-extra boost-devel boost-program-options intel-mkl-2020.4-912" +] diff --git a/packages/leann-backend-diskann/third_party/DiskANN/python/CMakeLists.txt b/packages/leann-backend-diskann/third_party/DiskANN/python/CMakeLists.txt new file mode 100644 index 0000000..66a5ba3 --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/python/CMakeLists.txt @@ -0,0 +1,82 @@ +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT license. + +cmake_minimum_required(VERSION 3.18...3.22) + +set(CMAKE_CXX_STANDARD 17) + +if (PYTHON_EXECUTABLE) + set(Python3_EXECUTABLE ${PYTHON_EXECUTABLE}) +endif() + +find_package(Python3 COMPONENTS Interpreter Development.Module NumPy REQUIRED) + +execute_process(COMMAND ${Python3_EXECUTABLE} -c "import pybind11; print(pybind11.get_cmake_dir())" + OUTPUT_VARIABLE _tmp_dir + OUTPUT_STRIP_TRAILING_WHITESPACE COMMAND_ECHO STDOUT) +list(APPEND CMAKE_PREFIX_PATH "${_tmp_dir}") + +# Now we can find pybind11 +find_package(pybind11 CONFIG REQUIRED) + +execute_process(COMMAND ${Python3_EXECUTABLE} -c "import numpy; print(numpy.get_include())" + OUTPUT_VARIABLE _numpy_include + OUTPUT_STRIP_TRAILING_WHITESPACE COMMAND_ECHO STDOUT) + +# pybind11_add_module(diskannpy MODULE src/diskann_bindings.cpp) +# the following is fairly synonymous with pybind11_add_module, but we need more target_link_libraries +# see https://pybind11.readthedocs.io/en/latest/compiling.html#advanced-interface-library-targets for more details +add_library(_diskannpy MODULE + src/module.cpp + src/builder.cpp + src/dynamic_memory_index.cpp + src/static_memory_index.cpp + src/static_disk_index.cpp +) + +target_include_directories(_diskannpy AFTER PRIVATE include) + +if (MSVC) + target_compile_options(_diskannpy PRIVATE /U_WINDLL) +endif() + +target_link_libraries( + _diskannpy + PRIVATE + pybind11::module + pybind11::lto + pybind11::windows_extras + ${PROJECT_NAME} + ${DISKANN_TOOLS_TCMALLOC_LINK_OPTIONS} + ${DISKANN_ASYNC_LIB} +) + +pybind11_extension(_diskannpy) +if(NOT MSVC AND NOT ${CMAKE_BUILD_TYPE} MATCHES Debug|RelWithDebInfo) + # Strip unnecessary sections of the binary on Linux/macOS + pybind11_strip(_diskannpy) +endif() + +set_target_properties(_diskannpy PROPERTIES CXX_VISIBILITY_PRESET "hidden" + CUDA_VISIBILITY_PRESET "hidden") + +# generally, the VERSION_INFO flag is set by pyproject.toml, by way of setup.py. +# attempts to locate the version within CMake fail because the version has to be available +# to pyproject.toml for the sdist to work after we build it. + +if(NOT VERSION_INFO) + set(VERSION_INFO "0.0.0dev") +endif() +target_compile_definitions(_diskannpy PRIVATE VERSION_INFO="${VERSION_INFO}") + +# Add a post-build command to automatically copy the compiled Python module +if(UPDATE_EDITABLE_INSTALL) +add_custom_command( +TARGET _diskannpy +POST_BUILD +COMMAND ${CMAKE_COMMAND} -E copy + ${CMAKE_CURRENT_BINARY_DIR}/_diskannpy.cpython-*.so + ${CMAKE_SOURCE_DIR}/python/src/ +COMMENT "Copying Python module to python/src directory" +) +endif() \ No newline at end of file diff --git a/packages/leann-backend-diskann/third_party/DiskANN/python/README.md b/packages/leann-backend-diskann/third_party/DiskANN/python/README.md new file mode 100644 index 0000000..a0c9475 --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/python/README.md @@ -0,0 +1,55 @@ +# diskannpy + +[![DiskANN Paper](https://img.shields.io/badge/Paper-NeurIPS%3A_DiskANN-blue)](https://papers.nips.cc/paper/9527-rand-nsg-fast-accurate-billion-point-nearest-neighbor-search-on-a-single-node.pdf) +[![DiskANN Paper](https://img.shields.io/badge/Paper-Arxiv%3A_Fresh--DiskANN-blue)](https://arxiv.org/abs/2105.09613) +[![DiskANN Paper](https://img.shields.io/badge/Paper-Filtered--DiskANN-blue)](https://harsha-simhadri.org/pubs/Filtered-DiskANN23.pdf) +[![DiskANN Main](https://github.com/microsoft/DiskANN/actions/workflows/push-test.yml/badge.svg?branch=main)](https://github.com/microsoft/DiskANN/actions/workflows/push-test.yml) +[![PyPI version](https://img.shields.io/pypi/v/diskannpy.svg)](https://pypi.org/project/diskannpy/) +[![Downloads shield](https://pepy.tech/badge/diskannpy)](https://pepy.tech/project/diskannpy) +[![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT) + +## Installation +Packages published to PyPI will always be built using the latest numpy major.minor release (at this time, 1.25). + +Conda distributions for versions 1.19-1.25 will be completed as a future effort. In the meantime, feel free to +clone this repository and build it yourself. + +## Local Build Instructions +Please see the [Project README](https://github.com/microsoft/DiskANN/blob/main/README.md) for system dependencies and requirements. + +After ensuring you've followed the directions to build the project library and executables, you will be ready to also +build `diskannpy` with these additional instructions. + +### Changing Numpy Version +In the root folder of DiskANN, there is a file `pyproject.toml`. You will need to edit the version of numpy in both the +`[build-system.requires]` section, as well as the `[project.dependencies]` section. The version numbers must match. + +#### Linux +```bash +python3.11 -m venv venv # versions from python3.9 and up should work +source venv/bin/activate +pip install build +python -m build +``` + +#### Windows +```powershell +py -3.11 -m venv venv # versions from python3.9 and up should work +venv\Scripts\Activate.ps1 +pip install build +python -m build +``` + +The built wheel will be placed in the `dist` directory in your DiskANN root. Install it using `pip install dist/.whl` + +## Citations +Please cite this software in your work as: +``` +@misc{diskann-github, + author = {Simhadri, Harsha Vardhan and Krishnaswamy, Ravishankar and Srinivasa, Gopal and Subramanya, Suhas Jayaram and Antonijevic, Andrija and Pryce, Dax and Kaczynski, David and Williams, Shane and Gollapudi, Siddarth and Sivashankar, Varun and Karia, Neel and Singh, Aditi and Jaiswal, Shikhar and Mahapatro, Neelam and Adams, Philip and Tower, Bryan and Patel, Yash}}, + title = {{DiskANN: Graph-structured Indices for Scalable, Fast, Fresh and Filtered Approximate Nearest Neighbor Search}}, + url = {https://github.com/Microsoft/DiskANN}, + version = {0.6.1}, + year = {2023} +} +``` diff --git a/packages/leann-backend-diskann/third_party/DiskANN/python/apps/cli/__main__.py b/packages/leann-backend-diskann/third_party/DiskANN/python/apps/cli/__main__.py new file mode 100644 index 0000000..d2c9990 --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/python/apps/cli/__main__.py @@ -0,0 +1,152 @@ +import diskannpy as dap +import numpy as np +import numpy.typing as npt + +import fire + +from contextlib import contextmanager +from time import perf_counter + +from typing import Tuple + + +def _basic_setup( + dtype: str, + query_vectors_file: str +) -> Tuple[dap.VectorDType, npt.NDArray[dap.VectorDType]]: + _dtype = dap.valid_dtype(dtype) + vectors_to_query = dap.vectors_from_binary(query_vectors_file, dtype=_dtype) + return _dtype, vectors_to_query + + +def dynamic( + dtype: str, + index_vectors_file: str, + query_vectors_file: str, + build_complexity: int, + graph_degree: int, + K: int, + search_complexity: int, + num_insert_threads: int, + num_search_threads: int, + gt_file: str = "", +): + _dtype, vectors_to_query = _basic_setup(dtype, query_vectors_file) + vectors_to_index = dap.vectors_from_binary(index_vectors_file, dtype=_dtype) + + npts, ndims = vectors_to_index.shape + index = dap.DynamicMemoryIndex( + "l2", _dtype, ndims, npts, build_complexity, graph_degree + ) + + tags = np.arange(1, npts+1, dtype=np.uintc) + timer = Timer() + + with timer.time("batch insert"): + index.batch_insert(vectors_to_index, tags, num_insert_threads) + + delete_tags = np.random.choice( + np.array(range(1, npts + 1, 1), dtype=np.uintc), + size=int(0.5 * npts), + replace=False + ) + with timer.time("mark deletion"): + for tag in delete_tags: + index.mark_deleted(tag) + + with timer.time("consolidation"): + index.consolidate_delete() + + deleted_data = vectors_to_index[delete_tags - 1, :] + + with timer.time("re-insertion"): + index.batch_insert(deleted_data, delete_tags, num_insert_threads) + + with timer.time("batch searched"): + tags, dists = index.batch_search(vectors_to_query, K, search_complexity, num_search_threads) + + # res_ids = tags - 1 + # if gt_file != "": + # recall = utils.calculate_recall_from_gt_file(K, res_ids, gt_file) + # print(f"recall@{K} is {recall}") + +def static( + dtype: str, + index_directory: str, + index_vectors_file: str, + query_vectors_file: str, + build_complexity: int, + graph_degree: int, + K: int, + search_complexity: int, + num_threads: int, + gt_file: str = "", + index_prefix: str = "ann" +): + _dtype, vectors_to_query = _basic_setup(dtype, query_vectors_file) + timer = Timer() + with timer.time("build static index"): + # build index + dap.build_memory_index( + data=index_vectors_file, + metric="l2", + vector_dtype=_dtype, + index_directory=index_directory, + complexity=build_complexity, + graph_degree=graph_degree, + num_threads=num_threads, + index_prefix=index_prefix, + alpha=1.2, + use_pq_build=False, + num_pq_bytes=8, + use_opq=False, + ) + + with timer.time("load static index"): + # ready search object + index = dap.StaticMemoryIndex( + metric="l2", + vector_dtype=_dtype, + data_path=index_vectors_file, + index_directory=index_directory, + num_threads=num_threads, # this can be different at search time if you would like + initial_search_complexity=search_complexity, + index_prefix=index_prefix + ) + + ids, dists = index.batch_search(vectors_to_query, K, search_complexity, num_threads) + + # if gt_file != "": + # recall = utils.calculate_recall_from_gt_file(K, ids, gt_file) + # print(f"recall@{K} is {recall}") + +def dynamic_clustered(): + pass + +def generate_clusters(): + pass + + +class Timer: + def __init__(self): + self._start = -1 + + @contextmanager + def time(self, message: str): + start = perf_counter() + if self._start == -1: + self._start = start + yield + now = perf_counter() + print(f"Operation {message} completed in {(now - start):.3f}s, total: {(now - self._start):.3f}s") + + + + +if __name__ == "__main__": + fire.Fire({ + "in-mem-dynamic": dynamic, + "in-mem-static": static, + "in-mem-dynamic-clustered": dynamic_clustered, + "generate-clusters": generate_clusters + }, name="cli") diff --git a/packages/leann-backend-diskann/third_party/DiskANN/python/apps/cluster.py b/packages/leann-backend-diskann/third_party/DiskANN/python/apps/cluster.py new file mode 100644 index 0000000..27a34bb --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/python/apps/cluster.py @@ -0,0 +1,28 @@ +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT license. + +import argparse +import utils + + +if __name__ == "__main__": + parser = argparse.ArgumentParser( + prog="cluster", description="kmeans cluster points in a file" + ) + + parser.add_argument("-d", "--data_type", required=True) + parser.add_argument("-i", "--indexdata_file", required=True) + parser.add_argument("-k", "--num_clusters", type=int, required=True) + args = parser.parse_args() + + npts, ndims = get_bin_metadata(indexdata_file) + + data = utils.bin_to_numpy(args.data_type, args.indexdata_file) + + offsets, permutation = utils.cluster_and_permute( + args.data_type, npts, ndims, data, args.num_clusters + ) + + permuted_data = data[permutation] + + utils.numpy_to_bin(permuted_data, args.indexdata_file + ".cluster") diff --git a/packages/leann-backend-diskann/third_party/DiskANN/python/apps/in-mem-dynamic.py b/packages/leann-backend-diskann/third_party/DiskANN/python/apps/in-mem-dynamic.py new file mode 100644 index 0000000..f97e131 --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/python/apps/in-mem-dynamic.py @@ -0,0 +1,161 @@ +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT license. + +import argparse + +import diskannpy +import numpy as np +import utils + +def insert_and_search( + dtype_str, + indexdata_file, + querydata_file, + Lb, + graph_degree, + K, + Ls, + num_insert_threads, + num_search_threads, + gt_file, +) -> dict[str, float]: + """ + + :param dtype_str: + :param indexdata_file: + :param querydata_file: + :param Lb: + :param graph_degree: + :param K: + :param Ls: + :param num_insert_threads: + :param num_search_threads: + :param gt_file: + :return: Dictionary of timings. Key is the event and value is the number of seconds the event took + """ + timer_results: dict[str, float] = {} + + method_timer: utils.Timer = utils.Timer() + + npts, ndims = utils.get_bin_metadata(indexdata_file) + + if dtype_str == "float": + dtype = np.float32 + elif dtype_str == "int8": + dtype = np.int8 + elif dtype_str == "uint8": + dtype = np.uint8 + else: + raise ValueError("data_type must be float, int8 or uint8") + + index = diskannpy.DynamicMemoryIndex( + distance_metric="l2", + vector_dtype=dtype, + dimensions=ndims, + max_vectors=npts, + complexity=Lb, + graph_degree=graph_degree + ) + queries = diskannpy.vectors_from_file(querydata_file, dtype) + data = diskannpy.vectors_from_file(indexdata_file, dtype) + + tags = np.zeros(npts, dtype=np.uintc) + timer = utils.Timer() + for i in range(npts): + tags[i] = i + 1 + index.batch_insert(data, tags, num_insert_threads) + compute_seconds = timer.elapsed() + print('batch_insert complete in', compute_seconds, 's') + timer_results["batch_insert_seconds"] = compute_seconds + + delete_tags = np.random.choice( + np.array(range(1, npts + 1, 1), dtype=np.uintc), + size=int(0.5 * npts), + replace=False + ) + + timer.reset() + for tag in delete_tags: + index.mark_deleted(tag) + compute_seconds = timer.elapsed() + timer_results['mark_deletion_seconds'] = compute_seconds + print('mark deletion completed in', compute_seconds, 's') + + timer.reset() + index.consolidate_delete() + compute_seconds = timer.elapsed() + print('consolidation completed in', compute_seconds, 's') + timer_results['consolidation_completed_seconds'] = compute_seconds + + deleted_data = data[delete_tags - 1, :] + + timer.reset() + index.batch_insert(deleted_data, delete_tags, num_insert_threads) + compute_seconds = timer.elapsed() + print('re-insertion completed in', compute_seconds, 's') + timer_results['re-insertion_seconds'] = compute_seconds + + timer.reset() + tags, dists = index.batch_search(queries, K, Ls, num_search_threads) + compute_seconds = timer.elapsed() + print('Batch searched', queries.shape[0], ' queries in ', compute_seconds, 's') + timer_results['batch_searched_seconds'] = compute_seconds + + res_ids = tags - 1 + if gt_file != "": + timer.reset() + recall = utils.calculate_recall_from_gt_file(K, res_ids, gt_file) + print(f"recall@{K} is {recall}") + timer_results['recall_computed_seconds'] = timer.elapsed() + + timer_results['total_time_seconds'] = method_timer.elapsed() + + return timer_results + + +if __name__ == "__main__": + parser = argparse.ArgumentParser( + prog="in-mem-dynamic", + description="Inserts points dynamically in a clustered order and search from vectors in a file.", + ) + + parser.add_argument("-d", "--data_type", required=True) + parser.add_argument("-i", "--indexdata_file", required=True) + parser.add_argument("-q", "--querydata_file", required=True) + parser.add_argument("-Lb", "--Lbuild", default=50, type=int) + parser.add_argument("-Ls", "--Lsearch", default=50, type=int) + parser.add_argument("-R", "--graph_degree", default=32, type=int) + parser.add_argument("-TI", "--num_insert_threads", default=8, type=int) + parser.add_argument("-TS", "--num_search_threads", default=8, type=int) + parser.add_argument("-K", default=10, type=int) + parser.add_argument("--gt_file", default="") + parser.add_argument("--json_timings_output", required=False, default=None, help="File to write out timings to as JSON. If not specified, timings will not be written out.") + args = parser.parse_args() + + timings = insert_and_search( + args.data_type, + args.indexdata_file, + args.querydata_file, + args.Lbuild, + args.graph_degree, # Build args + args.K, + args.Lsearch, + args.num_insert_threads, + args.num_search_threads, # search args + args.gt_file, + ) + + if args.json_timings_output is not None: + import json + timings['log_file'] = args.json_timings_output + with open(args.json_timings_output, "w") as f: + json.dump(timings, f) + +""" +An ingest optimized example with SIFT1M +source venv/bin/activate +python python/apps/in-mem-dynamic.py -d float \ +-i "$HOME/data/sift/sift_base.fbin" -q "$HOME/data/sift/sift_query.fbin" --gt_file "$HOME/data/sift/gt100_base" \ +-Lb 10 -R 30 -Ls 200 +""" + diff --git a/packages/leann-backend-diskann/third_party/DiskANN/python/apps/in-mem-static.py b/packages/leann-backend-diskann/third_party/DiskANN/python/apps/in-mem-static.py new file mode 100644 index 0000000..9fb9a2c --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/python/apps/in-mem-static.py @@ -0,0 +1,149 @@ +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT license. + +import argparse +from xml.dom.pulldom import default_bufsize + +import diskannpy +import numpy as np +import utils + +def build_and_search( + metric, + dtype_str, + index_directory, + indexdata_file, + querydata_file, + Lb, + graph_degree, + K, + Ls, + num_threads, + gt_file, + index_prefix, + search_only +) -> dict[str, float]: + """ + + :param metric: + :param dtype_str: + :param index_directory: + :param indexdata_file: + :param querydata_file: + :param Lb: + :param graph_degree: + :param K: + :param Ls: + :param num_threads: + :param gt_file: + :param index_prefix: + :param search_only: + :return: Dictionary of timings. Key is the event and value is the number of seconds the event took + in wall-clock-time. + """ + timer_results: dict[str, float] = {} + + method_timer: utils.Timer = utils.Timer() + + if dtype_str == "float": + dtype = np.single + elif dtype_str == "int8": + dtype = np.byte + elif dtype_str == "uint8": + dtype = np.ubyte + else: + raise ValueError("data_type must be float, int8 or uint8") + + # build index + if not search_only: + build_index_timer = utils.Timer() + diskannpy.build_memory_index( + data=indexdata_file, + distance_metric=metric, + vector_dtype=dtype, + index_directory=index_directory, + complexity=Lb, + graph_degree=graph_degree, + num_threads=num_threads, + index_prefix=index_prefix, + alpha=1.2, + use_pq_build=False, + num_pq_bytes=8, + use_opq=False, + ) + timer_results["build_index_seconds"] = build_index_timer.elapsed() + + # ready search object + load_index_timer = utils.Timer() + index = diskannpy.StaticMemoryIndex( + distance_metric=metric, + vector_dtype=dtype, + index_directory=index_directory, + num_threads=num_threads, # this can be different at search time if you would like + initial_search_complexity=Ls, + index_prefix=index_prefix + ) + timer_results["load_index_seconds"] = load_index_timer.elapsed() + + queries = utils.bin_to_numpy(dtype, querydata_file) + + query_timer = utils.Timer() + ids, dists = index.batch_search(queries, 10, Ls, num_threads) + query_time = query_timer.elapsed() + qps = round(queries.shape[0]/query_time, 1) + print('Batch searched', queries.shape[0], 'in', query_time, 's @', qps, 'QPS') + timer_results["query_seconds"] = query_time + + if gt_file != "": + recall_timer = utils.Timer() + recall = utils.calculate_recall_from_gt_file(K, ids, gt_file) + print(f"recall@{K} is {recall}") + timer_results["recall_seconds"] = recall_timer.elapsed() + + timer_results['total_time_seconds'] = method_timer.elapsed() + + return timer_results + +if __name__ == "__main__": + parser = argparse.ArgumentParser( + prog="in-mem-static", + description="Static in-memory build and search from vectors in a file", + ) + + parser.add_argument("-m", "--metric", required=False, default="l2") + parser.add_argument("-d", "--data_type", required=True) + parser.add_argument("-id", "--index_directory", required=False, default=".") + parser.add_argument("-i", "--indexdata_file", required=True) + parser.add_argument("-q", "--querydata_file", required=True) + parser.add_argument("-Lb", "--Lbuild", default=50, type=int) + parser.add_argument("-Ls", "--Lsearch", default=50, type=int) + parser.add_argument("-R", "--graph_degree", default=32, type=int) + parser.add_argument("-T", "--num_threads", default=8, type=int) + parser.add_argument("-K", default=10, type=int) + parser.add_argument("-G", "--gt_file", default="") + parser.add_argument("-ip", "--index_prefix", required=False, default="ann") + parser.add_argument("--search_only", required=False, default=False) + parser.add_argument("--json_timings_output", required=False, default=None, help="File to write out timings to as JSON. If not specified, timings will not be written out.") + args = parser.parse_args() + + timings: dict[str, float] = build_and_search( + args.metric, + args.data_type, + args.index_directory.strip(), + args.indexdata_file.strip(), + args.querydata_file.strip(), + args.Lbuild, + args.graph_degree, # Build args + args.K, + args.Lsearch, + args.num_threads, # search args + args.gt_file, + args.index_prefix, + args.search_only + ) + + if args.json_timings_output is not None: + import json + timings['log_file'] = args.json_timings_output + with open(args.json_timings_output, "w") as f: + json.dump(timings, f) diff --git a/packages/leann-backend-diskann/third_party/DiskANN/python/apps/insert-in-clustered-order.py b/packages/leann-backend-diskann/third_party/DiskANN/python/apps/insert-in-clustered-order.py new file mode 100644 index 0000000..25cb9d5 --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/python/apps/insert-in-clustered-order.py @@ -0,0 +1,103 @@ +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT license. + +import argparse + +import diskannpy +import numpy as np +import utils + + +def insert_and_search( + dtype_str, + indexdata_file, + querydata_file, + Lb, + graph_degree, + num_clusters, + num_insert_threads, + K, + Ls, + num_search_threads, + gt_file, +): + npts, ndims = utils.get_bin_metadata(indexdata_file) + + if dtype_str == "float": + dtype = np.float32 + elif dtype_str == "int8": + dtype = np.int8 + elif dtype_str == "uint8": + dtype = np.uint8 + else: + raise ValueError("data_type must be float, int8 or uint8") + + index = diskannpy.DynamicMemoryIndex( + distance_metric="l2", + vector_dtype=dtype, + dimensions=ndims, + max_vectors=npts, + complexity=Lb, + graph_degree=graph_degree + ) + queries = diskannpy.vectors_from_file(querydata_file, dtype) + data = diskannpy.vectors_from_file(indexdata_file, dtype) + + offsets, permutation = utils.cluster_and_permute( + dtype_str, npts, ndims, data, num_clusters + ) + + i = 0 + timer = utils.Timer() + for c in range(num_clusters): + cluster_index_range = range(offsets[c], offsets[c + 1]) + cluster_indices = np.array(permutation[cluster_index_range], dtype=np.uint32) + cluster_data = data[cluster_indices, :] + index.batch_insert(cluster_data, cluster_indices + 1, num_insert_threads) + print('Inserted cluster', c, 'in', timer.elapsed(), 's') + tags, dists = index.batch_search(queries, K, Ls, num_search_threads) + print('Batch searched', queries.shape[0], 'queries in', timer.elapsed(), 's') + res_ids = tags - 1 + + if gt_file != "": + recall = utils.calculate_recall_from_gt_file(K, res_ids, gt_file) + print(f"recall@{K} is {recall}") + + +if __name__ == "__main__": + parser = argparse.ArgumentParser( + prog="in-mem-dynamic", + description="Inserts points dynamically in a clustered order and search from vectors in a file.", + ) + + parser.add_argument("-d", "--data_type", required=True) + parser.add_argument("-i", "--indexdata_file", required=True) + parser.add_argument("-q", "--querydata_file", required=True) + parser.add_argument("-Lb", "--Lbuild", default=50, type=int) + parser.add_argument("-Ls", "--Lsearch", default=50, type=int) + parser.add_argument("-R", "--graph_degree", default=32, type=int) + parser.add_argument("-TI", "--num_insert_threads", default=8, type=int) + parser.add_argument("-TS", "--num_search_threads", default=8, type=int) + parser.add_argument("-C", "--num_clusters", default=32, type=int) + parser.add_argument("-K", default=10, type=int) + parser.add_argument("--gt_file", default="") + args = parser.parse_args() + + insert_and_search( + args.data_type, + args.indexdata_file, + args.querydata_file, + args.Lbuild, + args.graph_degree, # Build args + args.num_clusters, + args.num_insert_threads, + args.K, + args.Lsearch, + args.num_search_threads, # search args + args.gt_file, + ) + +# An ingest optimized example with SIFT1M +# python3 ~/DiskANN/python/apps/insert-in-clustered-order.py -d float \ +# -i sift_base.fbin -q sift_query.fbin --gt_file gt100_base \ +# -Lb 10 -R 30 -Ls 200 -C 32 \ No newline at end of file diff --git a/packages/leann-backend-diskann/third_party/DiskANN/python/apps/utils.py b/packages/leann-backend-diskann/third_party/DiskANN/python/apps/utils.py new file mode 100644 index 0000000..a526984 --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/python/apps/utils.py @@ -0,0 +1,120 @@ +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT license. + +import numpy as np +from scipy.cluster.vq import vq, kmeans2 +from typing import Tuple +from time import perf_counter + + +def get_bin_metadata(bin_file) -> Tuple[int, int]: + array = np.fromfile(file=bin_file, dtype=np.uint32, count=2) + return array[0], array[1] + + +def bin_to_numpy(dtype, bin_file) -> np.ndarray: + npts, ndims = get_bin_metadata(bin_file) + return np.fromfile(file=bin_file, dtype=dtype, offset=8).reshape(npts, ndims) + + +class Timer: + last = perf_counter() + + def reset(self): + new = perf_counter() + self.last = new + + def elapsed(self, round_digit:int = 3): + new = perf_counter() + elapsed_time = new - self.last + self.last = new + return round(elapsed_time, round_digit) + + +def numpy_to_bin(array, out_file): + shape = np.shape(array) + npts = shape[0].astype(np.uint32) + ndims = shape[1].astype(np.uint32) + f = open(out_file, "wb") + f.write(npts.tobytes()) + f.write(ndims.tobytes()) + f.write(array.tobytes()) + f.close() + + +def read_gt_file(gt_file) -> Tuple[np.ndarray[int], np.ndarray[float]]: + """ + Return ids and distances to queries + """ + nq, K = get_bin_metadata(gt_file) + ids = np.fromfile(file=gt_file, dtype=np.uint32, offset=8, count=nq * K).reshape( + nq, K + ) + dists = np.fromfile( + file=gt_file, dtype=np.float32, offset=8 + nq * K * 4, count=nq * K + ).reshape(nq, K) + return ids, dists + + +def calculate_recall( + result_set_indices: np.ndarray[int], + truth_set_indices: np.ndarray[int], + recall_at: int = 5, +) -> float: + """ + result_set_indices and truth_set_indices correspond by row index. the columns in each row contain the indices of + the nearest neighbors, with result_set_indices being the approximate nearest neighbor results and truth_set_indices + being the brute force nearest neighbor calculation via sklearn's NearestNeighbor class. + :param result_set_indices: + :param truth_set_indices: + :param recall_at: + :return: + """ + found = 0 + for i in range(0, result_set_indices.shape[0]): + result_set_set = set(result_set_indices[i][0:recall_at]) + truth_set_set = set(truth_set_indices[i][0:recall_at]) + found += len(result_set_set.intersection(truth_set_set)) + return found / (result_set_indices.shape[0] * recall_at) + + +def calculate_recall_from_gt_file(K: int, ids: np.ndarray[int], gt_file: str) -> float: + """ + Calculate recall from ids returned from search and those read from file + """ + gt_ids, gt_dists = read_gt_file(gt_file) + return calculate_recall(ids, gt_ids, K) + + +def cluster_and_permute( + dtype_str, npts, ndims, data, num_clusters +) -> Tuple[np.ndarray[int], np.ndarray[int]]: + """ + Cluster the data and return permutation of row indices + that would group indices of the same cluster together + """ + sample_size = min(100000, npts) + sample_indices = np.random.choice(range(npts), size=sample_size, replace=False) + sampled_data = data[sample_indices, :] + centroids, sample_labels = kmeans2(sampled_data, num_clusters, minit="++", iter=10) + labels, dist = vq(data, centroids) + + count = np.zeros(num_clusters) + for i in range(npts): + count[labels[i]] += 1 + print("Cluster counts") + print(count) + + offsets = np.zeros(num_clusters + 1, dtype=int) + for i in range(0, num_clusters, 1): + offsets[i + 1] = offsets[i] + count[i] + + permutation = np.zeros(npts, dtype=int) + counters = np.zeros(num_clusters, dtype=int) + for i in range(npts): + label = labels[i] + row = offsets[label] + counters[label] + counters[label] += 1 + permutation[row] = i + + return offsets, permutation diff --git a/packages/leann-backend-diskann/third_party/DiskANN/python/include/builder.h b/packages/leann-backend-diskann/third_party/DiskANN/python/include/builder.h new file mode 100644 index 0000000..56677ac --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/python/include/builder.h @@ -0,0 +1,27 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT license. + +#pragma once + +#include +#include + +#include "common.h" +#include "distance.h" + +namespace diskannpy +{ +template +void build_disk_index(diskann::Metric metric, const std::string &data_file_path, const std::string &index_prefix_path, + uint32_t complexity, uint32_t graph_degree, double final_index_ram_limit, + double indexing_ram_budget, uint32_t num_threads, uint32_t pq_disk_bytes, + const std::string &codebook_prefix); + +template +void build_memory_index(diskann::Metric metric, const std::string &vector_bin_path, + const std::string &index_output_path, uint32_t graph_degree, uint32_t complexity, float alpha, + uint32_t num_threads, bool use_pq_build, size_t num_pq_bytes, bool use_opq, + bool use_tags = false, const std::string &filter_labels_file = "", + const std::string &universal_label = "", uint32_t filter_complexity = 0); + +} // namespace diskannpy diff --git a/packages/leann-backend-diskann/third_party/DiskANN/python/include/common.h b/packages/leann-backend-diskann/third_party/DiskANN/python/include/common.h new file mode 100644 index 0000000..7c63534 --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/python/include/common.h @@ -0,0 +1,24 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT license. + +#pragma once + +#include +#include + +#include +#include + +namespace py = pybind11; + +namespace diskannpy +{ + +typedef uint32_t filterT; + +typedef uint32_t StaticIdType; +typedef uint32_t DynamicIdType; + +template using NeighborsAndDistances = std::pair, py::array_t>; + +}; // namespace diskannpy diff --git a/packages/leann-backend-diskann/third_party/DiskANN/python/include/dynamic_memory_index.h b/packages/leann-backend-diskann/third_party/DiskANN/python/include/dynamic_memory_index.h new file mode 100644 index 0000000..02d6b8c --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/python/include/dynamic_memory_index.h @@ -0,0 +1,53 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT license. + +#pragma once + +#include +#include + +#include +#include + +#include "common.h" +#include "index.h" +#include "parameters.h" + +namespace py = pybind11; + +namespace diskannpy +{ + +template +class DynamicMemoryIndex +{ + public: + DynamicMemoryIndex(diskann::Metric m, size_t dimensions, size_t max_vectors, uint32_t complexity, + uint32_t graph_degree, bool saturate_graph, uint32_t max_occlusion_size, float alpha, + uint32_t num_threads, uint32_t filter_complexity, uint32_t num_frozen_points, + uint32_t initial_search_complexity, uint32_t initial_search_threads, + bool concurrent_consolidation); + + void load(const std::string &index_path); + int insert(const py::array_t &vector, DynamicIdType id); + py::array_t batch_insert(py::array_t &vectors, + py::array_t &ids, int32_t num_inserts, + int num_threads = 0); + int mark_deleted(DynamicIdType id); + void save(const std::string &save_path, bool compact_before_save = false); + NeighborsAndDistances search(py::array_t &query, uint64_t knn, + uint64_t complexity); + NeighborsAndDistances batch_search(py::array_t &queries, + uint64_t num_queries, uint64_t knn, uint64_t complexity, + uint32_t num_threads); + void consolidate_delete(); + size_t num_points(); + + + private: + const uint32_t _initial_search_complexity; + const diskann::IndexWriteParameters _write_parameters; + diskann::Index _index; +}; + +}; // namespace diskannpy \ No newline at end of file diff --git a/packages/leann-backend-diskann/third_party/DiskANN/python/include/static_disk_index.h b/packages/leann-backend-diskann/third_party/DiskANN/python/include/static_disk_index.h new file mode 100644 index 0000000..a3b79c4 --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/python/include/static_disk_index.h @@ -0,0 +1,65 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT license. + +#pragma once + +#include +#include + +#include +#include + +#ifdef _WINDOWS +#include "windows_aligned_file_reader.h" +#elif __APPLE__ +#include "apple_aligned_file_reader.h" +#else +#include "linux_aligned_file_reader.h" +#endif + +#include "common.h" +#include "pq_flash_index.h" + +namespace py = pybind11; + +namespace diskannpy +{ + +#ifdef _WINDOWS +typedef WindowsAlignedFileReader PlatformSpecificAlignedFileReader; +#elif __APPLE__ +typedef AppleAlignedFileReader PlatformSpecificAlignedFileReader; +#else +typedef LinuxAlignedFileReader PlatformSpecificAlignedFileReader; +#endif + +template class StaticDiskIndex +{ + public: + StaticDiskIndex(diskann::Metric metric, const std::string &index_path_prefix, uint32_t num_threads, + size_t num_nodes_to_cache, uint32_t cache_mechanism, const std::string &pq_prefix, + const std::string &partition_prefix); + + void cache_bfs_levels(size_t num_nodes_to_cache); + + void cache_sample_paths(size_t num_nodes_to_cache, const std::string &warmup_query_file, uint32_t num_threads); + + NeighborsAndDistances search(py::array_t &query, + uint64_t knn, uint64_t complexity, uint64_t beam_width, + bool USE_DEFERRED_FETCH = false, bool skip_search_reorder = false, + bool recompute_beighbor_embeddings = false, bool dedup_node_dis = false, + float prune_ratio = 0, bool batch_recompute = false, + bool global_pruning = false); + + NeighborsAndDistances batch_search( + py::array_t &queries, uint64_t num_queries, uint64_t knn, + uint64_t complexity, uint64_t beam_width, uint32_t num_threads, bool USE_DEFERRED_FETCH = false, + bool skip_search_reorder = false, bool recompute_beighbor_embeddings = false, bool dedup_node_dis = false, + float prune_ratio = 0, bool batch_recompute = false, bool global_pruning = false); + + private: + std::shared_ptr _reader; + std::shared_ptr _graph_reader; + diskann::PQFlashIndex

_index; +}; +} // namespace diskannpy diff --git a/packages/leann-backend-diskann/third_party/DiskANN/python/include/static_memory_index.h b/packages/leann-backend-diskann/third_party/DiskANN/python/include/static_memory_index.h new file mode 100644 index 0000000..6ed5a08 --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/python/include/static_memory_index.h @@ -0,0 +1,40 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT license. + +#pragma once + +#include +#include + +#include +#include + +#include "common.h" +#include "index.h" + +namespace py = pybind11; + +namespace diskannpy +{ + +template class StaticMemoryIndex +{ + public: + StaticMemoryIndex(diskann::Metric m, const std::string &index_prefix, size_t num_points, size_t dimensions, + uint32_t num_threads, uint32_t initial_search_complexity); + + NeighborsAndDistances search(py::array_t &query, + uint64_t knn, uint64_t complexity); + + NeighborsAndDistances search_with_filter( + py::array_t &query, uint64_t knn, uint64_t complexity, + filterT filter); + + NeighborsAndDistances batch_search( + py::array_t &queries, uint64_t num_queries, uint64_t knn, + uint64_t complexity, uint32_t num_threads); + + private: + diskann::Index _index; +}; +} // namespace diskannpy \ No newline at end of file diff --git a/packages/leann-backend-diskann/third_party/DiskANN/python/src/__init__.py b/packages/leann-backend-diskann/third_party/DiskANN/python/src/__init__.py new file mode 100644 index 0000000..c2e1b07 --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/python/src/__init__.py @@ -0,0 +1,138 @@ +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT license. + +""" +# Documentation Overview +`diskannpy` is mostly structured around 2 distinct processes: [Index Builder Functions](#index-builders) and [Search Classes](#search-classes) + +It also includes a few nascent [utilities](#utilities). + +And lastly, it makes substantial use of type hints, with various shorthand [type aliases](#parameter-and-response-type-aliases) documented. +When reading the `diskannpy` code we refer to the type aliases, though `pdoc` helpfully expands them. + +## Index Builders +- `build_disk_index` - To build an index that cannot fully fit into memory when searching +- `build_memory_index` - To build an index that can fully fit into memory when searching + +## Search Classes +- `StaticMemoryIndex` - for indices that can fully fit in memory and won't be changed during the search operations +- `StaticDiskIndex` - for indices that cannot fully fit in memory, thus relying on disk IO to search, and also won't be changed during search operations +- `DynamicMemoryIndex` - for indices that can fully fit in memory and will be mutated via insert/deletion operations as well as search operations + +## Parameter Defaults +- `diskannpy.defaults` - Default values exported from the C++ extension for Python users + +## Parameter and Response Type Aliases +- `DistanceMetric` - What distance metrics does `diskannpy` support? +- `VectorDType` - What vector datatypes does `diskannpy` support? +- `QueryResponse` - What can I expect as a response to my search? +- `QueryResponseBatch` - What can I expect as a response to my batch search? +- `VectorIdentifier` - What types do `diskannpy` support as vector identifiers? +- `VectorIdentifierBatch` - A batch of identifiers of the exact same type. The type can change, but they must **all** change. +- `VectorLike` - How does a vector look to `diskannpy`, to be inserted or searched with. +- `VectorLikeBatch` - A batch of those vectors, to be inserted or searched with. +- `Metadata` - DiskANN vector binary file metadata (num_points, vector_dim) + +## Utilities +- `vectors_to_file` - Turns a 2 dimensional `numpy.typing.NDArray[VectorDType]` with shape `(number_of_points, vector_dim)` into a DiskANN vector bin file. +- `vectors_from_file` - Reads a DiskANN vector bin file representing stored vectors into a numpy ndarray. +- `vectors_metadata_from_file` - Reads metadata stored in a DiskANN vector bin file without reading the entire file +- `tags_to_file` - Turns a 1 dimensional `numpy.typing.NDArray[VectorIdentifier]` into a DiskANN tags bin file. +- `tags_from_file` - Reads a DiskANN tags bin file representing stored tags into a numpy ndarray. +- `valid_dtype` - Checks if a given vector dtype is supported by `diskannpy` +""" + +from typing import Any, Literal, NamedTuple, Type, Union + +import numpy as np +from numpy import typing as npt + +DistanceMetric = Literal["l2", "mips", "cosine"] +""" Type alias for one of {"l2", "mips", "cosine"} """ +VectorDType = Union[Type[np.float32], Type[np.int8], Type[np.uint8]] +""" Type alias for one of {`numpy.float32`, `numpy.int8`, `numpy.uint8`} """ +VectorLike = npt.NDArray[VectorDType] +""" Type alias for something that can be treated as a vector """ +VectorLikeBatch = npt.NDArray[VectorDType] +""" Type alias for a batch of VectorLikes """ +VectorIdentifier = np.uint32 +""" +Type alias for a vector identifier, whether it be an implicit array index identifier from StaticMemoryIndex or +StaticDiskIndex, or an explicit tag identifier from DynamicMemoryIndex +""" +VectorIdentifierBatch = npt.NDArray[np.uint32] +""" Type alias for a batch of VectorIdentifiers """ + + +class QueryResponse(NamedTuple): + """ + Tuple with two values, identifiers and distances. Both are 1d arrays, positionally correspond, and will contain the + nearest neighbors from [0..k_neighbors) + """ + + identifiers: npt.NDArray[VectorIdentifier] + """ A `numpy.typing.NDArray[VectorIdentifier]` array of vector identifiers, 1 dimensional """ + distances: npt.NDArray[np.float32] + """ + A `numpy.typing.NDAarray[numpy.float32]` of distances as calculated by the distance metric function, 1 dimensional + """ + + +class QueryResponseBatch(NamedTuple): + """ + Tuple with two values, identifiers and distances. Both are 2d arrays, with dimensionality determined by the + rows corresponding to the number of queries made, and the columns corresponding to the k neighbors + requested. The two 2d arrays have an implicit, position-based relationship + """ + + identifiers: npt.NDArray[VectorIdentifier] + """ + A `numpy.typing.NDArray[VectorIdentifier]` array of vector identifiers, 2 dimensional. The row corresponds to index + of the query, and the column corresponds to the k neighbors requested + """ + distances: np.ndarray[np.float32] + """ + A `numpy.typing.NDAarray[numpy.float32]` of distances as calculated by the distance metric function, 2 dimensional. + The row corresponds to the index of the query, and the column corresponds to the distance of the query to the + *k-th* neighbor + """ + + +from . import defaults +from ._builder import build_disk_index, build_memory_index +from ._common import valid_dtype +from ._dynamic_memory_index import DynamicMemoryIndex +from ._files import ( + Metadata, + tags_from_file, + tags_to_file, + vectors_from_file, + vectors_metadata_from_file, + vectors_to_file, +) +from ._static_disk_index import StaticDiskIndex +from ._static_memory_index import StaticMemoryIndex + +__all__ = [ + "build_disk_index", + "build_memory_index", + "StaticDiskIndex", + "StaticMemoryIndex", + "DynamicMemoryIndex", + "defaults", + "DistanceMetric", + "VectorDType", + "QueryResponse", + "QueryResponseBatch", + "VectorIdentifier", + "VectorIdentifierBatch", + "VectorLike", + "VectorLikeBatch", + "Metadata", + "vectors_metadata_from_file", + "vectors_to_file", + "vectors_from_file", + "tags_to_file", + "tags_from_file", + "valid_dtype", +] diff --git a/packages/leann-backend-diskann/third_party/DiskANN/python/src/_builder.py b/packages/leann-backend-diskann/third_party/DiskANN/python/src/_builder.py new file mode 100644 index 0000000..6567020 --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/python/src/_builder.py @@ -0,0 +1,349 @@ +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT license. + +import json +import os +import shutil +from pathlib import Path +from typing import Optional, Tuple, Union + +import numpy as np + +from . import DistanceMetric, VectorDType, VectorIdentifierBatch, VectorLikeBatch +from . import _diskannpy as _native_dap +from ._common import ( + _assert, + _assert_is_nonnegative_uint32, + _assert_is_positive_uint32, + _castable_dtype_or_raise, + _valid_metric, + _write_index_metadata, + valid_dtype, +) +from ._diskannpy import defaults +from ._files import tags_to_file, vectors_metadata_from_file, vectors_to_file + + +def _valid_path_and_dtype( + data: Union[str, VectorLikeBatch], + vector_dtype: VectorDType, + index_path: str, + index_prefix: str, +) -> Tuple[str, VectorDType]: + if isinstance(data, str): + vector_bin_path = data + _assert( + Path(data).exists() and Path(data).is_file(), + "if data is of type `str`, it must both exist and be a file", + ) + vector_dtype_actual = valid_dtype(vector_dtype) + else: + vector_bin_path = os.path.join(index_path, f"{index_prefix}_vectors.bin") + # if Path(vector_bin_path).exists(): + # raise ValueError( + # f"The path {vector_bin_path} already exists. Remove it and try again." + # ) + vector_dtype_actual = valid_dtype(data.dtype) + # vectors_to_file(vector_file=vector_bin_path, vectors=data) + + return vector_bin_path, vector_dtype_actual + + +def build_disk_index( + data: Union[str, VectorLikeBatch], + distance_metric: DistanceMetric, + index_directory: str, + complexity: int, + graph_degree: int, + search_memory_maximum: float, + build_memory_maximum: float, + num_threads: int, + pq_disk_bytes: int = defaults.PQ_DISK_BYTES, + vector_dtype: Optional[VectorDType] = None, + index_prefix: str = "ann", + codebook_prefix: str = "", +) -> None: + """ + This function will construct a DiskANN disk index. Disk indices are ideal for very large datasets that + are too large to fit in memory. Memory is still used, but it is primarily used to provide precise disk + locations for fast retrieval of smaller subsets of the index without compromising much on recall. + + If you provide a numpy array, it will save this array to disk in a temp location + in the format DiskANN's PQ Flash Index builder requires. This temp folder is deleted upon index creation completion + or error. + + ## Distance Metric and Vector Datatype Restrictions + | Metric \ Datatype | np.float32 | np.uint8 | np.int8 | + |-------------------|------------|----------|---------| + | L2 | ✅ | ✅ | ✅ | + | MIPS | ✅ | ❌ | ❌ | + | Cosine [^bug-in-disk-cosine] | ❌ | ❌ | ❌ | + + [^bug-in-disk-cosine]: For StaticDiskIndex, Cosine distances are not currently supported. + + ### Parameters + - **data**: Either a `str` representing a path to a DiskANN vector bin file, or a numpy.ndarray, + of a supported dtype, in 2 dimensions. Note that `vector_dtype` must be provided if data is a `str` + - **distance_metric**: A `str`, strictly one of {"l2", "mips", "cosine"}. `l2` and `cosine` are supported for all 3 + vector dtypes, but `mips` is only available for single precision floats. + - **index_directory**: The index files will be saved to this **existing** directory path + - **complexity**: The size of the candidate nearest neighbor list to use when building the index. Values between 75 + and 200 are typical. Larger values will take more time to build but result in indices that provide higher recall + for the same search complexity. Use a value that is at least as large as `graph_degree` unless you are prepared + to compromise on quality + - **graph_degree**: The degree of the graph index, typically between 60 and 150. A larger maximum degree will + result in larger indices and longer indexing times, but better search quality. + - **search_memory_maximum**: Build index with the expectation that the search will use at most + `search_memory_maximum`, in gb. + - **build_memory_maximum**: Build index using at most `build_memory_maximum` in gb. Building processes typically + require more memory, while search memory can be reduced. + - **num_threads**: Number of threads to use when creating this index. `0` is used to indicate all available + logical processors should be used. + - **pq_disk_bytes**: Use `0` to store uncompressed data on SSD. This allows the index to asymptote to 100% + recall. If your vectors are too large to store in SSD, this parameter provides the option to compress the + vectors using PQ for storing on SSD. This will trade off recall. You would also want this to be greater + than the number of bytes used for the PQ compressed data stored in-memory. Default is `0`. + - **vector_dtype**: Required if the provided `data` is of type `str`, else we use the `data.dtype` if np array. + - **index_prefix**: The prefix of the index files. Defaults to "ann". + """ + + _assert( + (isinstance(data, str) and vector_dtype is not None) + or isinstance(data, np.ndarray), + "vector_dtype is required if data is a str representing a path to the vector bin file", + ) + dap_metric = _valid_metric(distance_metric) + _assert_is_positive_uint32(complexity, "complexity") + _assert_is_positive_uint32(graph_degree, "graph_degree") + _assert(search_memory_maximum > 0, "search_memory_maximum must be larger than 0") + _assert(build_memory_maximum > 0, "build_memory_maximum must be larger than 0") + _assert_is_nonnegative_uint32(num_threads, "num_threads") + _assert_is_nonnegative_uint32(pq_disk_bytes, "pq_disk_bytes") + _assert(index_prefix != "", "index_prefix cannot be an empty string") + + index_path = Path(index_directory) + _assert( + index_path.exists() and index_path.is_dir(), + "index_directory must both exist and be a directory", + ) + + vector_bin_path, vector_dtype_actual = _valid_path_and_dtype( + data, vector_dtype, index_directory, index_prefix + ) + _assert(dap_metric != _native_dap.COSINE, "Cosine is currently not supported in StaticDiskIndex") + if dap_metric == _native_dap.INNER_PRODUCT: + _assert( + vector_dtype_actual == np.float32, + "Integral vector dtypes (np.uint8, np.int8) are not supported with distance metric mips" + ) + + num_points, dimensions = vectors_metadata_from_file(vector_bin_path) + + if vector_dtype_actual == np.uint8: + _builder = _native_dap.build_disk_uint8_index + elif vector_dtype_actual == np.int8: + _builder = _native_dap.build_disk_int8_index + else: + _builder = _native_dap.build_disk_float_index + + index_prefix_path = os.path.join(index_directory, index_prefix) + + _builder( + distance_metric=dap_metric, + data_file_path=vector_bin_path, + index_prefix_path=index_prefix_path, + complexity=complexity, + graph_degree=graph_degree, + final_index_ram_limit=search_memory_maximum, + indexing_ram_budget=build_memory_maximum, + num_threads=num_threads, + pq_disk_bytes=pq_disk_bytes, + codebook_prefix=codebook_prefix, + ) + _write_index_metadata( + index_prefix_path, vector_dtype_actual, dap_metric, num_points, dimensions + ) + + +def build_memory_index( + data: Union[str, VectorLikeBatch], + distance_metric: DistanceMetric, + index_directory: str, + complexity: int, + graph_degree: int, + num_threads: int, + alpha: float = defaults.ALPHA, + use_pq_build: bool = defaults.USE_PQ_BUILD, + num_pq_bytes: int = defaults.NUM_PQ_BYTES, + use_opq: bool = defaults.USE_OPQ, + vector_dtype: Optional[VectorDType] = None, + tags: Union[str, VectorIdentifierBatch] = "", + filter_labels: Optional[list[list[str]]] = None, + universal_label: str = "", + filter_complexity: int = defaults.FILTER_COMPLEXITY, + index_prefix: str = "ann", +) -> None: + """ + This function will construct a DiskANN memory index. Memory indices are ideal for smaller datasets whose + indices can fit into memory. Memory indices are faster than disk indices, but usually cannot scale to massive + sizes in an individual index on an individual machine. + + `diskannpy`'s memory indices take two forms: a `diskannpy.StaticMemoryIndex`, which will not be mutated, only + searched upon, and a `diskannpy.DynamicMemoryIndex`, which can be mutated AND searched upon in the same process. + + ## Important Note: + You **must** determine the type of index you are building for. If you are building for a + `diskannpy.DynamicMemoryIndex`, you **must** supply a valid value for the `tags` parameter. **Do not supply + tags if the index is intended to be `diskannpy.StaticMemoryIndex`**! + + ## Distance Metric and Vector Datatype Restrictions + + | Metric \ Datatype | np.float32 | np.uint8 | np.int8 | + |-------------------|------------|----------|---------| + | L2 | ✅ | ✅ | ✅ | + | MIPS | ✅ | ❌ | ❌ | + | Cosine | ✅ | ✅ | ✅ | + + ### Parameters + + - **data**: Either a `str` representing a path to an existing DiskANN vector bin file, or a numpy.ndarray of a + supported dtype in 2 dimensions. Note that `vector_dtype` must be provided if `data` is a `str`. + - **distance_metric**: A `str`, strictly one of {"l2", "mips", "cosine"}. `l2` and `cosine` are supported for all 3 + vector dtypes, but `mips` is only available for single precision floats. + - **index_directory**: The index files will be saved to this **existing** directory path + - **complexity**: The size of the candidate nearest neighbor list to use when building the index. Values between 75 + and 200 are typical. Larger values will take more time to build but result in indices that provide higher recall + for the same search complexity. Use a value that is at least as large as `graph_degree` unless you are prepared + to compromise on quality + - **graph_degree**: The degree of the graph index, typically between 60 and 150. A larger maximum degree will + result in larger indices and longer indexing times, but better search quality. + - **num_threads**: Number of threads to use when creating this index. `0` is used to indicate all available + logical processors should be used. + - **alpha**: The alpha parameter (>=1) is used to control the nature and number of points that are added to the + graph. A higher alpha value (e.g., 1.4) will result in fewer hops (and IOs) to convergence, but probably more + distance comparisons compared to a lower alpha value. + - **use_pq_build**: Use product quantization during build. Product quantization is a lossy compression technique + that can reduce the size of the index on disk. This will trade off recall. Default is `True`. + - **num_pq_bytes**: The number of bytes used to store the PQ compressed data in memory. This will trade off recall. + Default is `0`. + - **use_opq**: Use optimized product quantization during build. + - **vector_dtype**: Required if the provided `data` is of type `str`, else we use the `data.dtype` if np array. + - **tags**: Tags can be defined either as a path on disk to an existing .tags file, or provided as a np.array of + the same length as the number of vectors. Tags are used to identify vectors in the index via your *own* + numbering conventions, and is absolutely required for loading DynamicMemoryIndex indices `from_file`. + - **filter_labels**: An optional, but exhaustive list of categories for each vector. This is used to filter + search results by category. If provided, this must be a list of lists, where each inner list is a list of + categories for the corresponding vector. For example, if you have 3 vectors, and the first vector belongs to + categories "a" and "b", the second vector belongs to category "b", and the third vector belongs to no categories, + you would provide `filter_labels=[["a", "b"], ["b"], []]`. If you do not want to provide categories for a + particular vector, you can provide an empty list. If you do not want to provide categories for any vectors, + you can provide `None` for this parameter (which is the default) + - **universal_label**: An optional label that indicates that this vector should be included in *every* search + in which it also meets the knn search criteria. + - **filter_complexity**: Complexity to use when using filters. Default is 0. 0 is strictly invalid if you are + using filters. + - **index_prefix**: The prefix of the index files. Defaults to "ann". + """ + _assert( + (isinstance(data, str) and vector_dtype is not None) + or isinstance(data, np.ndarray), + "vector_dtype is required if data is a str representing a path to the vector bin file", + ) + dap_metric = _valid_metric(distance_metric) + _assert_is_positive_uint32(complexity, "complexity") + _assert_is_positive_uint32(graph_degree, "graph_degree") + _assert( + alpha >= 1, + "alpha must be >= 1, and realistically should be kept between [1.0, 2.0)", + ) + _assert_is_nonnegative_uint32(num_threads, "num_threads") + _assert_is_nonnegative_uint32(num_pq_bytes, "num_pq_bytes") + _assert_is_nonnegative_uint32(filter_complexity, "filter_complexity") + _assert(index_prefix != "", "index_prefix cannot be an empty string") + _assert( + filter_labels is None or filter_complexity > 0, + "if filter_labels is provided, filter_complexity must not be 0" + ) + + index_path = Path(index_directory) + _assert( + index_path.exists() and index_path.is_dir(), + "index_directory must both exist and be a directory", + ) + + vector_bin_path, vector_dtype_actual = _valid_path_and_dtype( + data, vector_dtype, index_directory, index_prefix + ) + if dap_metric == _native_dap.INNER_PRODUCT: + _assert( + vector_dtype_actual == np.float32, + "Integral vector dtypes (np.uint8, np.int8) are not supported with distance metric mips" + ) + + num_points, dimensions = vectors_metadata_from_file(vector_bin_path) + if filter_labels is not None: + _assert( + len(filter_labels) == num_points, + "filter_labels must be the same length as the number of points" + ) + + if vector_dtype_actual == np.uint8: + _builder = _native_dap.build_memory_uint8_index + elif vector_dtype_actual == np.int8: + _builder = _native_dap.build_memory_int8_index + else: + _builder = _native_dap.build_memory_float_index + + index_prefix_path = os.path.join(index_directory, index_prefix) + + filter_labels_file = "" + if filter_labels is not None: + label_counts = {} + filter_labels_file = f"{index_prefix_path}_pylabels.txt" + with open(filter_labels_file, "w") as labels_file: + for labels in filter_labels: + for label in labels: + label_counts[label] = 1 if label not in label_counts else label_counts[label] + 1 + if len(labels) == 0: + print("default", file=labels_file) + else: + print(",".join(labels), file=labels_file) + with open(f"{index_prefix_path}_label_metadata.json", "w") as label_metadata_file: + json.dump(label_counts, label_metadata_file, indent=True) + + if isinstance(tags, str) and tags != "": + use_tags = True + shutil.copy(tags, index_prefix_path + ".tags") + elif not isinstance(tags, str): + use_tags = True + tags_as_array = _castable_dtype_or_raise(tags, expected=np.uint32) + _assert(len(tags_as_array.shape) == 1, "Provided tags must be 1 dimensional") + _assert( + tags_as_array.shape[0] == num_points, + "Provided tags must contain an identical population to the number of points, " + f"{tags_as_array.shape[0]=}, {num_points=}", + ) + tags_to_file(index_prefix_path + ".tags", tags_as_array) + else: + use_tags = False + + _builder( + distance_metric=dap_metric, + data_file_path=vector_bin_path, + index_output_path=index_prefix_path, + complexity=complexity, + graph_degree=graph_degree, + alpha=alpha, + num_threads=num_threads, + use_pq_build=use_pq_build, + num_pq_bytes=num_pq_bytes, + use_opq=use_opq, + use_tags=use_tags, + filter_labels_file=filter_labels_file, + universal_label=universal_label, + filter_complexity=filter_complexity, + ) + + _write_index_metadata( + index_prefix_path, vector_dtype_actual, dap_metric, num_points, dimensions + ) diff --git a/packages/leann-backend-diskann/third_party/DiskANN/python/src/_builder.pyi b/packages/leann-backend-diskann/third_party/DiskANN/python/src/_builder.pyi new file mode 100644 index 0000000..223e6c9 --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/python/src/_builder.pyi @@ -0,0 +1,74 @@ +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT license. + +from typing import BinaryIO, Optional, overload + +import numpy as np + +from . import DistanceMetric, VectorDType, VectorIdentifierBatch, VectorLikeBatch + +def numpy_to_diskann_file(vectors: np.ndarray, file_handler: BinaryIO): ... +@overload +def build_disk_index( + data: str, + distance_metric: DistanceMetric, + index_directory: str, + complexity: int, + graph_degree: int, + search_memory_maximum: float, + build_memory_maximum: float, + num_threads: int, + pq_disk_bytes: int, + vector_dtype: VectorDType, + index_prefix: str, +) -> None: ... +@overload +def build_disk_index( + data: VectorLikeBatch, + distance_metric: DistanceMetric, + index_directory: str, + complexity: int, + graph_degree: int, + search_memory_maximum: float, + build_memory_maximum: float, + num_threads: int, + pq_disk_bytes: int, + index_prefix: str, +) -> None: ... +@overload +def build_memory_index( + data: VectorLikeBatch, + distance_metric: DistanceMetric, + index_directory: str, + complexity: int, + graph_degree: int, + alpha: float, + num_threads: int, + use_pq_build: bool, + num_pq_bytes: int, + use_opq: bool, + tags: Union[str, VectorIdentifierBatch], + filter_labels: Optional[list[list[str]]], + universal_label: str, + filter_complexity: int, + index_prefix: str +) -> None: ... +@overload +def build_memory_index( + data: str, + distance_metric: DistanceMetric, + index_directory: str, + complexity: int, + graph_degree: int, + alpha: float, + num_threads: int, + use_pq_build: bool, + num_pq_bytes: int, + use_opq: bool, + vector_dtype: VectorDType, + tags: Union[str, VectorIdentifierBatch], + filter_labels_file: Optional[list[list[str]]], + universal_label: str, + filter_complexity: int, + index_prefix: str +) -> None: ... diff --git a/packages/leann-backend-diskann/third_party/DiskANN/python/src/_common.py b/packages/leann-backend-diskann/third_party/DiskANN/python/src/_common.py new file mode 100644 index 0000000..2b28802 --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/python/src/_common.py @@ -0,0 +1,251 @@ +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT license. + +import os +import warnings +from enum import Enum +from pathlib import Path +from typing import Literal, NamedTuple, Optional, Tuple, Type, Union + +import numpy as np + +from . import ( + DistanceMetric, + VectorDType, + VectorIdentifierBatch, + VectorLike, + VectorLikeBatch, +) +from . import _diskannpy as _native_dap + +__ALL__ = ["valid_dtype"] + +_VALID_DTYPES = [np.float32, np.int8, np.uint8] + + +def valid_dtype(dtype: Type) -> VectorDType: + """ + Utility method to determine whether the provided dtype is supported by `diskannpy`, and if so, the canonical + dtype we will use internally (e.g. np.single -> np.float32) + """ + _assert_dtype(dtype) + if dtype == np.uint8: + return np.uint8 + if dtype == np.int8: + return np.int8 + if dtype == np.float32: + return np.float32 + + +def _assert(statement_eval: bool, message: str): + if not statement_eval: + raise ValueError(message) + + +def _valid_metric(metric: str) -> _native_dap.Metric: + if not isinstance(metric, str): + raise ValueError("distance_metric must be a string") + if metric.lower() == "l2": + return _native_dap.L2 + elif metric.lower() == "mips": + return _native_dap.INNER_PRODUCT + elif metric.lower() == "cosine": + return _native_dap.COSINE + else: + raise ValueError("distance_metric must be one of 'l2', 'mips', or 'cosine'") + + +def _assert_dtype(dtype: Type): + _assert( + any(np.can_cast(dtype, _dtype) for _dtype in _VALID_DTYPES), + f"Vector dtype must be of one of type {{(np.single, np.float32), (np.byte, np.int8), (np.ubyte, np.uint8)}}", + ) + + +def _castable_dtype_or_raise( + data: Union[VectorLike, VectorLikeBatch, VectorIdentifierBatch], expected: np.dtype +) -> np.ndarray: + if isinstance(data, np.ndarray) and np.can_cast(data.dtype, expected): + return data.astype(expected, casting="safe") + else: + raise TypeError( + f"expecting a numpy ndarray of dtype {expected}, not a {type(data)}" + ) + + +def _assert_2d(vectors: np.ndarray, name: str): + _assert(len(vectors.shape) == 2, f"{name} must be 2d numpy array") + + +__MAX_UINT32_VAL = 4_294_967_295 + + +def _assert_is_positive_uint32(test_value: int, parameter: str): + _assert( + test_value is not None and 0 < test_value < __MAX_UINT32_VAL, + f"{parameter} must be a positive integer in the uint32 range", + ) + + +def _assert_is_nonnegative_uint32(test_value: int, parameter: str): + _assert( + test_value is not None and -1 < test_value < __MAX_UINT32_VAL, + f"{parameter} must be a non-negative integer in the uint32 range", + ) + + +def _assert_is_nonnegative_uint64(test_value: int, parameter: str): + _assert( + -1 < test_value, + f"{parameter} must be a non-negative integer in the uint64 range", + ) + + +def _assert_existing_directory(path: str, parameter: str): + _path = Path(path) + _assert( + _path.exists() and _path.is_dir(), f"{parameter} must be an existing directory" + ) + + +def _assert_existing_file(path: str, parameter: str): + _path = Path(path) + _assert(_path.exists() and _path.is_file(), f"{parameter} must be an existing file") + + +class _DataType(Enum): + FLOAT32 = 0 + INT8 = 1 + UINT8 = 2 + + @classmethod + def from_type(cls, vector_dtype: VectorDType) -> "DataType": + if vector_dtype == np.float32: + return cls.FLOAT32 + if vector_dtype == np.int8: + return cls.INT8 + if vector_dtype == np.uint8: + return cls.UINT8 + + def to_type(self) -> VectorDType: + if self is _DataType.FLOAT32: + return np.float32 + if self is _DataType.INT8: + return np.int8 + if self is _DataType.UINT8: + return np.uint8 + + +class _Metric(Enum): + L2 = 0 + MIPS = 1 + COSINE = 2 + + @classmethod + def from_native(cls, metric: _native_dap.Metric) -> "_Metric": + if metric == _native_dap.L2: + return cls.L2 + if metric == _native_dap.INNER_PRODUCT: + return cls.MIPS + if metric == _native_dap.COSINE: + return cls.COSINE + + def to_native(self) -> _native_dap.Metric: + if self is _Metric.L2: + return _native_dap.L2 + if self is _Metric.MIPS: + return _native_dap.INNER_PRODUCT + if self is _Metric.COSINE: + return _native_dap.COSINE + + def to_str(self) -> _native_dap.Metric: + if self is _Metric.L2: + return "l2" + if self is _Metric.MIPS: + return "mips" + if self is _Metric.COSINE: + return "cosine" + + +def _build_metadata_path(index_path_and_prefix: str) -> str: + return index_path_and_prefix + "_metadata.bin" + + +def _write_index_metadata( + index_path_and_prefix: str, + dtype: VectorDType, + metric: _native_dap.Metric, + num_points: int, + dimensions: int, +): + np.array( + [ + _DataType.from_type(dtype).value, + _Metric.from_native(metric).value, + num_points, + dimensions, + ], + dtype=np.uint64, + ).tofile(_build_metadata_path(index_path_and_prefix)) + + +def _read_index_metadata( + index_path_and_prefix: str, +) -> Optional[Tuple[VectorDType, str, np.uint64, np.uint64]]: + path = _build_metadata_path(index_path_and_prefix) + if not Path(path).exists(): + return None + else: + metadata = np.fromfile(path, dtype=np.uint64, count=-1) + return ( + _DataType(int(metadata[0])).to_type(), + _Metric(int(metadata[1])).to_str(), + metadata[2], + metadata[3], + ) + + +def _ensure_index_metadata( + index_path_and_prefix: str, + vector_dtype: Optional[VectorDType], + distance_metric: Optional[DistanceMetric], + max_vectors: int, + dimensions: Optional[int], + warn_size_exceeded: bool = False, +) -> Tuple[VectorDType, str, np.uint64, np.uint64]: + possible_metadata = _read_index_metadata(index_path_and_prefix) + if possible_metadata is None: + _assert( + all([vector_dtype, distance_metric, dimensions]), + "distance_metric, vector_dtype, and dimensions must provided if a corresponding metadata file has not " + "been built for this index, such as when an index was built via the CLI tools or prior to the addition " + "of a metadata file", + ) + _assert_dtype(vector_dtype) + _assert_is_positive_uint32(max_vectors, "max_vectors") + _assert_is_positive_uint32(dimensions, "dimensions") + return vector_dtype, distance_metric, max_vectors, dimensions # type: ignore + else: + vector_dtype, distance_metric, num_vectors, dimensions = possible_metadata + if warn_size_exceeded: + if max_vectors is not None and num_vectors > max_vectors: + warnings.warn( + "The number of vectors in the saved index exceeds the max_vectors parameter. " + "max_vectors is being adjusted to accommodate the dataset, but any insertions will fail." + ) + max_vectors = num_vectors + if num_vectors == max_vectors: + warnings.warn( + "The number of vectors in the saved index equals max_vectors parameter. Any insertions will fail." + ) + return possible_metadata + + +def _valid_index_prefix(index_directory: str, index_prefix: str) -> str: + _assert( + index_directory is not None and index_directory != "", + "index_directory cannot be None or empty", + ) + _assert_existing_directory(index_directory, "index_directory") + _assert(index_prefix != "", "index_prefix cannot be an empty string") + return os.path.join(index_directory, index_prefix) diff --git a/packages/leann-backend-diskann/third_party/DiskANN/python/src/_diskannpy.cpython-310-x86_64-linux-gnu.so.bak b/packages/leann-backend-diskann/third_party/DiskANN/python/src/_diskannpy.cpython-310-x86_64-linux-gnu.so.bak new file mode 100755 index 0000000..5741ecd Binary files /dev/null and b/packages/leann-backend-diskann/third_party/DiskANN/python/src/_diskannpy.cpython-310-x86_64-linux-gnu.so.bak differ diff --git a/packages/leann-backend-diskann/third_party/DiskANN/python/src/_dynamic_memory_index.py b/packages/leann-backend-diskann/third_party/DiskANN/python/src/_dynamic_memory_index.py new file mode 100644 index 0000000..cdf6432 --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/python/src/_dynamic_memory_index.py @@ -0,0 +1,511 @@ +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT license. + +import os +import warnings +from pathlib import Path +from typing import Optional + +import numpy as np + +from . import ( + DistanceMetric, + QueryResponse, + QueryResponseBatch, + VectorDType, + VectorIdentifier, + VectorIdentifierBatch, + VectorLike, + VectorLikeBatch, +) +from . import _diskannpy as _native_dap +from ._common import ( + _assert, + _assert_2d, + _assert_dtype, + _assert_existing_directory, + _assert_is_nonnegative_uint32, + _assert_is_positive_uint32, + _castable_dtype_or_raise, + _ensure_index_metadata, + _valid_index_prefix, + _valid_metric, + _write_index_metadata, +) +from ._diskannpy import defaults + +__ALL__ = ["DynamicMemoryIndex"] + + +class DynamicMemoryIndex: + """ + A DynamicMemoryIndex instance is used to both search and mutate a `diskannpy` memory index. This index is unlike + either `diskannpy.StaticMemoryIndex` or `diskannpy.StaticDiskIndex` in the following ways: + + - It requires an explicit vector identifier for each vector added to it. + - Insert and (lazy) deletion operations are provided for a flexible, living index + + The mutable aspect of this index will absolutely impact search time performance as new vectors are added and + old deleted. `DynamicMemoryIndex.consolidate_deletes()` should be called periodically to restructure the index + to remove deleted vectors and improve per-search performance, at the cost of an expensive index consolidation to + occur. + """ + + @classmethod + def from_file( + cls, + index_directory: str, + max_vectors: int, + complexity: int, + graph_degree: int, + saturate_graph: bool = defaults.SATURATE_GRAPH, + max_occlusion_size: int = defaults.MAX_OCCLUSION_SIZE, + alpha: float = defaults.ALPHA, + num_threads: int = defaults.NUM_THREADS, + filter_complexity: int = defaults.FILTER_COMPLEXITY, + num_frozen_points: int = defaults.NUM_FROZEN_POINTS_DYNAMIC, + initial_search_complexity: int = 0, + search_threads: int = 0, + concurrent_consolidation: bool = True, + index_prefix: str = "ann", + distance_metric: Optional[DistanceMetric] = None, + vector_dtype: Optional[VectorDType] = None, + dimensions: Optional[int] = None, + ) -> "DynamicMemoryIndex": + """ + The `from_file` classmethod is used to load a previously saved index from disk. This index *must* have been + created with a valid `tags` file or `tags` np.ndarray of `diskannpy.VectorIdentifier`s. It is *strongly* + recommended that you use the same parameters as the `diskannpy.build_memory_index()` function that created + the index. + + ### Parameters + - **index_directory**: The directory containing the index files. This directory must contain the following + files: + - `{index_prefix}.data` + - `{index_prefix}.tags` + - `{index_prefix}` + + It may also include the following optional files: + - `{index_prefix}_vectors.bin`: Optional. `diskannpy` builder functions may create this file in the + `index_directory` if the index was created from a numpy array + - `{index_prefix}_metadata.bin`: Optional. `diskannpy` builder functions create this file to store metadata + about the index, such as vector dtype, distance metric, number of vectors and vector dimensionality. + If an index is built from the `diskann` cli tools, this file will not exist. + - **max_vectors**: Capacity of the memory index including space for future insertions. + - **complexity**: Complexity (a.k.a `L`) references the size of the list we store candidate approximate + neighbors in. It's used during save (which is an index rebuild), and it's used as an initial search size to + warm up our index and lower the latency for initial real searches. + - **graph_degree**: Graph degree (a.k.a. `R`) is the maximum degree allowed for a node in the index's graph + structure. This degree will be pruned throughout the course of the index build, but it will never grow beyond + this value. Higher R values require longer index build times, but may result in an index showing excellent + recall and latency characteristics. + - **saturate_graph**: If True, the adjacency list of each node will be saturated with neighbors to have exactly + `graph_degree` neighbors. If False, each node will have between 1 and `graph_degree` neighbors. + - **max_occlusion_size**: The maximum number of points that can be considered by occlude_list function. + - **alpha**: The alpha parameter (>=1) is used to control the nature and number of points that are added to the + graph. A higher alpha value (e.g., 1.4) will result in fewer hops (and IOs) to convergence, but probably + more distance comparisons compared to a lower alpha value. + - **num_threads**: Number of threads to use when creating this index. `0` indicates we should use all available + logical processors. + - **filter_complexity**: Complexity to use when using filters. Default is 0. + - **num_frozen_points**: Number of points to freeze. Default is 1. + - **initial_search_complexity**: Should be set to the most common `complexity` expected to be used during the + life of this `diskannpy.DynamicMemoryIndex` object. The working scratch memory allocated is based off of + `initial_search_complexity` * `search_threads`. Note that it may be resized if a `search` or `batch_search` + operation requests a space larger than can be accommodated by these values. + - **search_threads**: Should be set to the most common `num_threads` expected to be used during the + life of this `diskannpy.DynamicMemoryIndex` object. The working scratch memory allocated is based off of + `initial_search_complexity` * `search_threads`. Note that it may be resized if a `batch_search` + operation requests a space larger than can be accommodated by these values. + - **concurrent_consolidation**: This flag dictates whether consolidation can be run alongside inserts and + deletes, or whether the index is locked down to changes while consolidation is ongoing. + - **index_prefix**: The prefix of the index files. Defaults to "ann". + - **distance_metric**: A `str`, strictly one of {"l2", "mips", "cosine"}. `l2` and `cosine` are supported for all 3 + vector dtypes, but `mips` is only available for single precision floats. Default is `None`. **This + value is only used if a `{index_prefix}_metadata.bin` file does not exist.** If it does not exist, + you are required to provide it. + - **vector_dtype**: The vector dtype this index has been built with. **This value is only used if a + `{index_prefix}_metadata.bin` file does not exist.** If it does not exist, you are required to provide it. + - **dimensions**: The vector dimensionality of this index. All new vectors inserted must be the same + dimensionality. **This value is only used if a `{index_prefix}_metadata.bin` file does not exist.** If it + does not exist, you are required to provide it. + + ### Returns + A `diskannpy.DynamicMemoryIndex` object, with the index loaded from disk and ready to use for insertions, + deletions, and searches. + + """ + index_prefix_path = _valid_index_prefix(index_directory, index_prefix) + + # do tags exist? + tags_file = index_prefix_path + ".tags" + _assert( + Path(tags_file).exists(), + f"The file {tags_file} does not exist in {index_directory}", + ) + vector_dtype, dap_metric, num_vectors, dimensions = _ensure_index_metadata( + index_prefix_path, vector_dtype, distance_metric, max_vectors, dimensions, warn_size_exceeded=True + ) + + index = cls( + distance_metric=dap_metric, # type: ignore + vector_dtype=vector_dtype, + dimensions=dimensions, + max_vectors=max_vectors, + complexity=complexity, + graph_degree=graph_degree, + saturate_graph=saturate_graph, + max_occlusion_size=max_occlusion_size, + alpha=alpha, + num_threads=num_threads, + filter_complexity=filter_complexity, + num_frozen_points=num_frozen_points, + initial_search_complexity=initial_search_complexity, + search_threads=search_threads, + concurrent_consolidation=concurrent_consolidation, + ) + index._index.load(index_prefix_path) + index._num_vectors = num_vectors # current number of vectors loaded + return index + + def __init__( + self, + distance_metric: DistanceMetric, + vector_dtype: VectorDType, + dimensions: int, + max_vectors: int, + complexity: int, + graph_degree: int, + saturate_graph: bool = defaults.SATURATE_GRAPH, + max_occlusion_size: int = defaults.MAX_OCCLUSION_SIZE, + alpha: float = defaults.ALPHA, + num_threads: int = defaults.NUM_THREADS, + filter_complexity: int = defaults.FILTER_COMPLEXITY, + num_frozen_points: int = defaults.NUM_FROZEN_POINTS_DYNAMIC, + initial_search_complexity: int = 0, + search_threads: int = 0, + concurrent_consolidation: bool = True, + ): + """ + The `diskannpy.DynamicMemoryIndex` represents our python API into a mutable DiskANN memory index. + + This constructor is used to create a new, empty index. If you wish to load a previously saved index from disk, + please use the `diskannpy.DynamicMemoryIndex.from_file` classmethod instead. + + ### Parameters + - **distance_metric**: A `str`, strictly one of {"l2", "mips", "cosine"}. `l2` and `cosine` are supported for all 3 + vector dtypes, but `mips` is only available for single precision floats. + - **vector_dtype**: One of {`np.float32`, `np.int8`, `np.uint8`}. The dtype of the vectors this index will + be storing. + - **dimensions**: The vector dimensionality of this index. All new vectors inserted must be the same + dimensionality. + - **max_vectors**: Capacity of the data store including space for future insertions + - **graph_degree**: Graph degree (a.k.a. `R`) is the maximum degree allowed for a node in the index's graph + structure. This degree will be pruned throughout the course of the index build, but it will never grow beyond + this value. Higher `graph_degree` values require longer index build times, but may result in an index showing + excellent recall and latency characteristics. + - **saturate_graph**: If True, the adjacency list of each node will be saturated with neighbors to have exactly + `graph_degree` neighbors. If False, each node will have between 1 and `graph_degree` neighbors. + - **max_occlusion_size**: The maximum number of points that can be considered by occlude_list function. + - **alpha**: The alpha parameter (>=1) is used to control the nature and number of points that are added to the + graph. A higher alpha value (e.g., 1.4) will result in fewer hops (and IOs) to convergence, but probably + more distance comparisons compared to a lower alpha value. + - **num_threads**: Number of threads to use when creating this index. `0` indicates we should use all available + logical processors. + - **filter_complexity**: Complexity to use when using filters. Default is 0. + - **num_frozen_points**: Number of points to freeze. Default is 1. + - **initial_search_complexity**: Should be set to the most common `complexity` expected to be used during the + life of this `diskannpy.DynamicMemoryIndex` object. The working scratch memory allocated is based off of + `initial_search_complexity` * `search_threads`. Note that it may be resized if a `search` or `batch_search` + operation requests a space larger than can be accommodated by these values. + - **search_threads**: Should be set to the most common `num_threads` expected to be used during the + life of this `diskannpy.DynamicMemoryIndex` object. The working scratch memory allocated is based off of + `initial_search_complexity` * `search_threads`. Note that it may be resized if a `batch_search` + operation requests a space larger than can be accommodated by these values. + - **concurrent_consolidation**: This flag dictates whether consolidation can be run alongside inserts and + deletes, or whether the index is locked down to changes while consolidation is ongoing. + + """ + self._num_vectors = 0 + self._removed_num_vectors = 0 + dap_metric = _valid_metric(distance_metric) + self._dap_metric = dap_metric + _assert_dtype(vector_dtype) + _assert_is_positive_uint32(dimensions, "dimensions") + + self._vector_dtype = vector_dtype + self._dimensions = dimensions + + _assert_is_positive_uint32(max_vectors, "max_vectors") + _assert_is_positive_uint32(complexity, "complexity") + _assert_is_positive_uint32(graph_degree, "graph_degree") + _assert( + alpha >= 1, + "alpha must be >= 1, and realistically should be kept between [1.0, 2.0)", + ) + _assert_is_nonnegative_uint32(max_occlusion_size, "max_occlusion_size") + _assert_is_nonnegative_uint32(num_threads, "num_threads") + _assert_is_nonnegative_uint32(filter_complexity, "filter_complexity") + _assert_is_nonnegative_uint32(num_frozen_points, "num_frozen_points") + _assert_is_nonnegative_uint32( + initial_search_complexity, "initial_search_complexity" + ) + _assert_is_nonnegative_uint32(search_threads, "search_threads") + + self._max_vectors = max_vectors + self._complexity = complexity + self._graph_degree = graph_degree + + if vector_dtype == np.uint8: + _index = _native_dap.DynamicMemoryUInt8Index + elif vector_dtype == np.int8: + _index = _native_dap.DynamicMemoryInt8Index + else: + _index = _native_dap.DynamicMemoryFloatIndex + + self._index = _index( + distance_metric=dap_metric, + dimensions=dimensions, + max_vectors=max_vectors, + complexity=complexity, + graph_degree=graph_degree, + saturate_graph=saturate_graph, + max_occlusion_size=max_occlusion_size, + alpha=alpha, + num_threads=num_threads, + filter_complexity=filter_complexity, + num_frozen_points=num_frozen_points, + initial_search_complexity=initial_search_complexity, + search_threads=search_threads, + concurrent_consolidation=concurrent_consolidation, + ) + self._points_deleted = False + + def search( + self, query: VectorLike, k_neighbors: int, complexity: int + ) -> QueryResponse: + """ + Searches the index by a single query vector. + + ### Parameters + - **query**: 1d numpy array of the same dimensionality and dtype of the index. + - **k_neighbors**: Number of neighbors to be returned. If query vector exists in index, it almost definitely + will be returned as well, so adjust your ``k_neighbors`` as appropriate. Must be > 0. + - **complexity**: Size of distance ordered list of candidate neighbors to use while searching. List size + increases accuracy at the cost of latency. Must be at least k_neighbors in size. + """ + _query = _castable_dtype_or_raise(query, expected=self._vector_dtype) + _assert(len(_query.shape) == 1, "query vector must be 1-d") + _assert( + _query.shape[0] == self._dimensions, + f"query vector must have the same dimensionality as the index; index dimensionality: {self._dimensions}, " + f"query dimensionality: {_query.shape[0]}", + ) + _assert_is_positive_uint32(k_neighbors, "k_neighbors") + _assert_is_nonnegative_uint32(complexity, "complexity") + + if k_neighbors > complexity: + warnings.warn( + f"k_neighbors={k_neighbors} asked for, but list_size={complexity} was smaller. Increasing {complexity} to {k_neighbors}" + ) + complexity = k_neighbors + neighbors, distances = self._index.search(query=_query, knn=k_neighbors, complexity=complexity) + return QueryResponse(identifiers=neighbors, distances=distances) + + def batch_search( + self, + queries: VectorLikeBatch, + k_neighbors: int, + complexity: int, + num_threads: int, + ) -> QueryResponseBatch: + """ + Searches the index by a batch of query vectors. + + This search is parallelized and far more efficient than searching for each vector individually. + + ### Parameters + - **queries**: 2d numpy array, with column dimensionality matching the index and row dimensionality being the + number of queries intended to search for in parallel. Dtype must match dtype of the index. + - **k_neighbors**: Number of neighbors to be returned. If query vector exists in index, it almost definitely + will be returned as well, so adjust your ``k_neighbors`` as appropriate. Must be > 0. + - **complexity**: Size of distance ordered list of candidate neighbors to use while searching. List size + increases accuracy at the cost of latency. Must be at least k_neighbors in size. + - **num_threads**: Number of threads to use when searching this index. (>= 0), 0 = num_threads in system + """ + _queries = _castable_dtype_or_raise(queries, expected=self._vector_dtype) + _assert_2d(_queries, "queries") + _assert( + _queries.shape[1] == self._dimensions, + f"query vectors must have the same dimensionality as the index; index dimensionality: {self._dimensions}, " + f"query dimensionality: {_queries.shape[1]}", + ) + + _assert_is_positive_uint32(k_neighbors, "k_neighbors") + _assert_is_positive_uint32(complexity, "complexity") + _assert_is_nonnegative_uint32(num_threads, "num_threads") + + if k_neighbors > complexity: + warnings.warn( + f"k_neighbors={k_neighbors} asked for, but list_size={complexity} was smaller. Increasing {complexity} to {k_neighbors}" + ) + complexity = k_neighbors + + num_queries, dim = queries.shape + neighbors, distances = self._index.batch_search( + queries=_queries, + num_queries=num_queries, + knn=k_neighbors, + complexity=complexity, + num_threads=num_threads, + ) + return QueryResponseBatch(identifiers=neighbors, distances=distances) + + def save(self, save_path: str, index_prefix: str = "ann"): + """ + Saves this index to file. + + ### Parameters + - **save_path**: The path to save these index files to. + - **index_prefix**: The prefix of the index files. Defaults to "ann". + """ + if save_path == "": + raise ValueError("save_path cannot be empty") + if index_prefix == "": + raise ValueError("index_prefix cannot be empty") + + index_prefix = index_prefix.format(complexity=self._complexity, graph_degree=self._graph_degree) + _assert_existing_directory(save_path, "save_path") + save_path = os.path.join(save_path, index_prefix) + if self._points_deleted is True: + warnings.warn( + "DynamicMemoryIndex.save() currently requires DynamicMemoryIndex.consolidate_delete() to be called " + "prior to save when items have been marked for deletion. This is being done automatically now, though" + "it will increase the time it takes to save; on large sets of data it can take a substantial amount of " + "time. In the future, we will implement a faster save with unconsolidated deletes, but for now this is " + "required." + ) + self._index.consolidate_delete() + self._index.save( + save_path=save_path, compact_before_save=True + ) # we do not yet support uncompacted saves + _write_index_metadata( + save_path, + self._vector_dtype, + self._dap_metric, + self._index.num_points(), + self._dimensions, + ) + + def insert(self, vector: VectorLike, vector_id: VectorIdentifier): + """ + Inserts a single vector into the index with the provided vector_id. + + If this insertion will overrun the `max_vectors` count boundaries of this index, `consolidate_delete()` will + be executed automatically. + + ### Parameters + - **vector**: The vector to insert. Note that dtype must match. + - **vector_id**: The vector_id to use for this vector. + """ + _vector = _castable_dtype_or_raise(vector, expected=self._vector_dtype) + _assert(len(vector.shape) == 1, "insert vector must be 1-d") + _assert_is_positive_uint32(vector_id, "vector_id") + if self._num_vectors + 1 > self._max_vectors: + if self._removed_num_vectors > 0: + warnings.warn(f"Inserting this vector would overrun the max_vectors={self._max_vectors} specified at index " + f"construction. We are attempting to consolidate_delete() to make space.") + self.consolidate_delete() + else: + raise RuntimeError(f"Inserting this vector would overrun the max_vectors={self._max_vectors} specified " + f"at index construction. Unable to make space by consolidating deletions. The insert" + f"operation has failed.") + status = self._index.insert(_vector, np.uint32(vector_id)) + if status == 0: + self._num_vectors += 1 + else: + raise RuntimeError( + f"Insert was unable to complete successfully; error code returned from diskann C++ lib: {status}" + ) + + + def batch_insert( + self, + vectors: VectorLikeBatch, + vector_ids: VectorIdentifierBatch, + num_threads: int = 0, + ): + """ + Inserts a batch of vectors into the index with the provided vector_ids. + + If this batch insertion will overrun the `max_vectors` count boundaries of this index, `consolidate_delete()` + will be executed automatically. + + ### Parameters + - **vectors**: The 2d numpy array of vectors to insert. + - **vector_ids**: The 1d array of vector ids to use. This array must have the same number of elements as + the vectors array has rows. The dtype of vector_ids must be `np.uint32` + - **num_threads**: Number of threads to use when inserting into this index. (>= 0), 0 = num_threads in system + """ + _query = _castable_dtype_or_raise(vectors, expected=self._vector_dtype) + _assert(len(vectors.shape) == 2, "vectors must be a 2-d array") + _assert( + vectors.shape[0] == vector_ids.shape[0], + "Number of vectors must be equal to number of ids", + ) + _vectors = vectors.astype(dtype=self._vector_dtype, casting="safe", copy=False) + _vector_ids = vector_ids.astype(dtype=np.uint32, casting="safe", copy=False) + + if self._num_vectors + _vector_ids.shape[0] > self._max_vectors: + if self._max_vectors + self._removed_num_vectors >= _vector_ids.shape[0]: + warnings.warn(f"Inserting these vectors, count={_vector_ids.shape[0]} would overrun the " + f"max_vectors={self._max_vectors} specified at index construction. We are attempting to " + f"consolidate_delete() to make space.") + self.consolidate_delete() + else: + raise RuntimeError(f"Inserting these vectors count={_vector_ids.shape[0]} would overrun the " + f"max_vectors={self._max_vectors} specified at index construction. Unable to make " + f"space by consolidating deletions. The batch insert operation has failed.") + + statuses = self._index.batch_insert( + _vectors, _vector_ids, _vector_ids.shape[0], num_threads + ) + successes = [] + failures = [] + for i in range(0, len(statuses)): + if statuses[i] == 0: + successes.append(i) + else: + failures.append(i) + self._num_vectors += len(successes) + if len(failures) == 0: + return + failed_ids = vector_ids[failures] + raise RuntimeError( + f"During batch insert, the following vector_ids were unable to be inserted into the index: {failed_ids}. " + f"{len(successes)} were successfully inserted" + ) + + + def mark_deleted(self, vector_id: VectorIdentifier): + """ + Mark vector for deletion. This is a soft delete that won't return the vector id in any results, but does not + remove it from the underlying index files or memory structure. To execute a hard delete, call this method and + then call the much more expensive `consolidate_delete` method on this index. + ### Parameters + - **vector_id**: The vector id to delete. Must be a uint32. + """ + _assert_is_positive_uint32(vector_id, "vector_id") + self._points_deleted = True + self._removed_num_vectors += 1 + # we do not decrement self._num_vectors until consolidate_delete + self._index.mark_deleted(np.uint32(vector_id)) + + def consolidate_delete(self): + """ + This method actually restructures the DiskANN index to remove the items that have been marked for deletion. + """ + self._index.consolidate_delete() + self._points_deleted = False + self._num_vectors -= self._removed_num_vectors + self._removed_num_vectors = 0 diff --git a/packages/leann-backend-diskann/third_party/DiskANN/python/src/_files.py b/packages/leann-backend-diskann/third_party/DiskANN/python/src/_files.py new file mode 100644 index 0000000..7740c34 --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/python/src/_files.py @@ -0,0 +1,122 @@ +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT license. + +import warnings +from typing import BinaryIO, Literal, NamedTuple + +import numpy as np +import numpy.typing as npt + +from . import VectorDType, VectorIdentifierBatch, VectorLikeBatch +from ._common import _assert, _assert_2d, _assert_dtype, _assert_existing_file + + +class Metadata(NamedTuple): + """DiskANN binary vector files contain a small stanza containing some metadata about them.""" + + num_vectors: int + """ The number of vectors in the file. """ + dimensions: int + """ The dimensionality of the vectors in the file. """ + + +def vectors_metadata_from_file(vector_file: str) -> Metadata: + """ + Read the metadata from a DiskANN binary vector file. + ### Parameters + - **vector_file**: The path to the vector file to read the metadata from. + + ### Returns + `diskannpy.Metadata` + """ + _assert_existing_file(vector_file, "vector_file") + points, dims = np.fromfile(file=vector_file, dtype=np.int32, count=2) + return Metadata(points, dims) + + +def _write_bin(data: np.ndarray, file_handler: BinaryIO): + if len(data.shape) == 1: + _ = file_handler.write(np.array([data.shape[0], 1], dtype=np.int32).tobytes()) + else: + _ = file_handler.write(np.array(data.shape, dtype=np.int32).tobytes()) + _ = file_handler.write(data.tobytes()) + + +def vectors_to_file(vector_file: str, vectors: VectorLikeBatch) -> None: + """ + Utility function that writes a DiskANN binary vector formatted file to the location of your choosing. + + ### Parameters + - **vector_file**: The path to the vector file to write the vectors to. + - **vectors**: A 2d array of dtype `numpy.float32`, `numpy.uint8`, or `numpy.int8` + """ + _assert_dtype(vectors.dtype) + _assert_2d(vectors, "vectors") + with open(vector_file, "wb") as fh: + _write_bin(vectors, fh) + + +def vectors_from_file( + vector_file: str, + dtype: VectorDType, + use_memmap: bool = False, + mode: Literal["r", "r+"] = "r" +) -> npt.NDArray[VectorDType]: + """ + Read vectors from a DiskANN binary vector file. + + ### Parameters + - **vector_file**: The path to the vector file to read the vectors from. + - **dtype**: The data type of the vectors in the file. Ensure you match the data types exactly + - **use_memmap**: If True, return a np.memmap, else a standard np.ndarray will be returned + - **mode**: Read-only (r) or read-write (r+) (memmap only). Unlike np.memmap, default is read-only (r) + + ### Returns + `numpy.typing.NDArray[dtype] | numpy.memmap` + """ + assert mode in ["r", "r+"] + points, dims = vectors_metadata_from_file(vector_file) + if not use_memmap: + return np.fromfile(file=vector_file, dtype=dtype, offset=8).reshape(points, dims) + else: + return np.memmap(vector_file, dtype=dtype, mode=mode, offset=8, shape=(points, dims), order='C') + + +def tags_to_file(tags_file: str, tags: VectorIdentifierBatch) -> None: + """ + Write tags to a DiskANN binary tag file. + + ### Parameters + - **tags_file**: The path to the tag file to write the tags to. + - **tags**: A 1d array of dtype `numpy.uint32` containing the tags to write. If you have a 2d array of tags with + one column, you can pass it here and it will be reshaped and copied to a new array. It is more efficient for you + to reshape on your own without copying it first, as it should be a constant time operation vs. linear time + + """ + _assert(np.can_cast(tags.dtype, np.uint32), "valid tags must be uint32") + _assert( + len(tags.shape) == 1 or tags.shape[1] == 1, + "tags must be 1d or 2d with 1 column", + ) + if len(tags.shape) == 2: + warnings.warn( + "Tags in 2d with one column will be reshaped and copied to a new array. " + "It is more efficient for you to reshape without copying first." + ) + tags = tags.reshape(tags.shape[0], copy=True) + with open(tags_file, "wb") as fh: + _write_bin(tags.astype(np.uint32), fh) + + +def tags_from_file(tags_file: str) -> VectorIdentifierBatch: + """ + Read tags from a DiskANN binary tag file and return them as a 1d array of dtype `numpy.uint32`. + + ### Parameters + - **tags_file**: The path to the tag file to read the tags from. + """ + _assert_existing_file(tags_file, "tags_file") + points, dims = vectors_metadata_from_file( + tags_file + ) # tag files contain the same metadata stanza + return np.fromfile(file=tags_file, dtype=np.uint32, offset=8).reshape(points) diff --git a/packages/leann-backend-diskann/third_party/DiskANN/python/src/_static_disk_index.py b/packages/leann-backend-diskann/third_party/DiskANN/python/src/_static_disk_index.py new file mode 100644 index 0000000..47af362 --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/python/src/_static_disk_index.py @@ -0,0 +1,244 @@ +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT license. + +import os +import warnings +from typing import Optional + +import numpy as np + +from . import ( + DistanceMetric, + QueryResponse, + QueryResponseBatch, + VectorDType, + VectorLike, + VectorLikeBatch, +) +from . import _diskannpy as _native_dap +from ._common import ( + _assert, + _assert_2d, + _assert_is_nonnegative_uint32, + _assert_is_positive_uint32, + _castable_dtype_or_raise, + _ensure_index_metadata, + _valid_index_prefix, + _valid_metric, +) + +__ALL__ = ["StaticDiskIndex"] + + +class StaticDiskIndex: + """ + A StaticDiskIndex is a disk-backed index that is not mutable. + """ + + def __init__( + self, + index_directory: str, + num_threads: int, + num_nodes_to_cache: int, + cache_mechanism: int = 1, + distance_metric: Optional[DistanceMetric] = None, + vector_dtype: Optional[VectorDType] = None, + dimensions: Optional[int] = None, + index_prefix: str = "ann", + pq_prefix: str = "", + partition_prefix: str = "", + ): + """ + ### Parameters + - **index_directory**: The directory containing the index files. This directory must contain the following + files: + - `{index_prefix}_sample_data.bin` + - `{index_prefix}_mem.index.data` + - `{index_prefix}_pq_compressed.bin` + - `{index_prefix}_pq_pivots.bin` + - `{index_prefix}_sample_ids.bin` + - `{index_prefix}_disk.index` + + It may also include the following optional files: + - `{index_prefix}_vectors.bin`: Optional. `diskannpy` builder functions may create this file in the + `index_directory` if the index was created from a numpy array + - `{index_prefix}_metadata.bin`: Optional. `diskannpy` builder functions create this file to store metadata + about the index, such as vector dtype, distance metric, number of vectors and vector dimensionality. + If an index is built from the `diskann` cli tools, this file will not exist. + - **num_threads**: Number of threads to use when searching this index. (>= 0), 0 = num_threads in system + - **num_nodes_to_cache**: Number of nodes to cache in memory (> -1) + - **cache_mechanism**: 1 -> use the generated sample_data.bin file for + the index to initialize a set of cached nodes, up to `num_nodes_to_cache`, 2 -> ready the cache for up to + `num_nodes_to_cache`, but do not initialize it with any nodes. Any other value disables node caching. + - **distance_metric**: A `str`, strictly one of {"l2", "mips", "cosine"}. `l2` and `cosine` are supported for all 3 + vector dtypes, but `mips` is only available for single precision floats. Default is `None`. **This + value is only used if a `{index_prefix}_metadata.bin` file does not exist.** If it does not exist, + you are required to provide it. + - **vector_dtype**: The vector dtype this index has been built with. **This value is only used if a + `{index_prefix}_metadata.bin` file does not exist.** If it does not exist, you are required to provide it. + - **dimensions**: The vector dimensionality of this index. All new vectors inserted must be the same + dimensionality. **This value is only used if a `{index_prefix}_metadata.bin` file does not exist.** If it + does not exist, you are required to provide it. + - **index_prefix**: The prefix of the index files. Defaults to "ann". + """ + index_prefix_path = _valid_index_prefix(index_directory, index_prefix) + vector_dtype, metric, _, _ = _ensure_index_metadata( + index_prefix_path, + vector_dtype, + distance_metric, + 1, # it doesn't matter because we don't need it in this context anyway + dimensions, + ) + dap_metric = _valid_metric(metric) + + _assert_is_nonnegative_uint32(num_threads, "num_threads") + _assert_is_nonnegative_uint32(num_nodes_to_cache, "num_nodes_to_cache") + + self._vector_dtype = vector_dtype + if vector_dtype == np.uint8: + _index = _native_dap.StaticDiskUInt8Index + elif vector_dtype == np.int8: + _index = _native_dap.StaticDiskInt8Index + else: + _index = _native_dap.StaticDiskFloatIndex + self._index = _index( + distance_metric=dap_metric, + index_path_prefix=index_prefix_path, + num_threads=num_threads, + num_nodes_to_cache=num_nodes_to_cache, + cache_mechanism=cache_mechanism, + pq_prefix=pq_prefix, + partition_prefix=partition_prefix, + ) + print("After index init") + + def search( + self, + query: VectorLike, + k_neighbors: int, + complexity: int, + beam_width: int = 2, + USE_DEFERRED_FETCH: bool = False, + skip_search_reorder: bool = False, + recompute_beighbor_embeddings: bool = False, + dedup_node_dis: bool = False, + prune_ratio: float = 0, + batch_recompute: bool = False, + global_pruning: bool = False, + ) -> QueryResponse: + """ + Searches the index by a single query vector. + + ### Parameters + - **query**: 1d numpy array of the same dimensionality and dtype of the index. + - **k_neighbors**: Number of neighbors to be returned. If query vector exists in index, it almost definitely + will be returned as well, so adjust your ``k_neighbors`` as appropriate. Must be > 0. + - **complexity**: Size of distance ordered list of candidate neighbors to use while searching. List size + increases accuracy at the cost of latency. Must be at least k_neighbors in size. + - **beam_width**: The beamwidth to be used for search. This is the maximum number of IO requests each query + will issue per iteration of search code. Larger beamwidth will result in fewer IO round-trips per query, + but might result in slightly higher total number of IO requests to SSD per query. For the highest query + throughput with a fixed SSD IOps rating, use W=1. For best latency, use W=4,8 or higher complexity search. + Specifying 0 will optimize the beamwidth depending on the number of threads performing search, but will + involve some tuning overhead. + - **skip_search_reorder**: Whether to skip search reorder for diskann search. + - **recompute_beighbor_embeddings**: Whether to recompute the neighbor embeddings. + - **dedup_node_dis**: Whether to dedup node distances. + - **batch_recompute**: Whether to batch recompute. + """ + _query = _castable_dtype_or_raise(query, expected=self._vector_dtype) + _assert(len(_query.shape) == 1, "query vector must be 1-d") + _assert_is_positive_uint32(k_neighbors, "k_neighbors") + _assert_is_positive_uint32(complexity, "complexity") + _assert_is_positive_uint32(beam_width, "beam_width") + + if k_neighbors > complexity: + warnings.warn( + f"{k_neighbors=} asked for, but {complexity=} was smaller. Increasing {complexity} to {k_neighbors}" + ) + complexity = k_neighbors + + neighbors, distances = self._index.search( + query=_query, + knn=k_neighbors, + complexity=complexity, + beam_width=beam_width, + USE_DEFERRED_FETCH=USE_DEFERRED_FETCH, + skip_search_reorder=skip_search_reorder, + recompute_beighbor_embeddings=recompute_beighbor_embeddings, + dedup_node_dis=dedup_node_dis, + prune_ratio=prune_ratio, + batch_recompute=batch_recompute, + global_pruning=global_pruning, + ) + return QueryResponse(identifiers=neighbors, distances=distances) + + def batch_search( + self, + queries: VectorLikeBatch, + k_neighbors: int, + complexity: int, + num_threads: int, + beam_width: int = 2, + USE_DEFERRED_FETCH: bool = False, + skip_search_reorder: bool = False, + recompute_beighbor_embeddings: bool = False, + dedup_node_dis: bool = False, + prune_ratio: float = 0, + batch_recompute: bool = False, + global_pruning: bool = False, + ) -> QueryResponseBatch: + """ + Searches the index by a batch of query vectors. + + This search is parallelized and far more efficient than searching for each vector individually. + + ### Parameters + - **queries**: 2d numpy array, with column dimensionality matching the index and row dimensionality being the + number of queries intended to search for in parallel. Dtype must match dtype of the index. + - **k_neighbors**: Number of neighbors to be returned. If query vector exists in index, it almost definitely + will be returned as well, so adjust your ``k_neighbors`` as appropriate. Must be > 0. + - **complexity**: Size of distance ordered list of candidate neighbors to use while searching. List size + increases accuracy at the cost of latency. Must be at least k_neighbors in size. + - **num_threads**: Number of threads to use when searching this index. (>= 0), 0 = num_threads in system + - **beam_width**: The beamwidth to be used for search. This is the maximum number of IO requests each query + will issue per iteration of search code. Larger beamwidth will result in fewer IO round-trips per query, + but might result in slightly higher total number of IO requests to SSD per query. For the highest query + throughput with a fixed SSD IOps rating, use W=1. For best latency, use W=4,8 or higher complexity search. + Specifying 0 will optimize the beamwidth depending on the number of threads performing search, but will + involve some tuning overhead. + - **skip_search_reorder**: Whether to skip search reorder for diskann search. + """ + _queries = _castable_dtype_or_raise(queries, expected=self._vector_dtype) + _assert_2d(_queries, "queries") + _assert_is_positive_uint32(k_neighbors, "k_neighbors") + _assert_is_positive_uint32(complexity, "complexity") + _assert_is_nonnegative_uint32(num_threads, "num_threads") + _assert_is_positive_uint32(beam_width, "beam_width") + + if k_neighbors > complexity: + warnings.warn( + f"{k_neighbors=} asked for, but {complexity=} was smaller. Increasing {complexity} to {k_neighbors}" + ) + complexity = k_neighbors + + num_queries, dim = _queries.shape + print( + f"USE_DEFERRED_FETCH={USE_DEFERRED_FETCH} skip_search_reorder={skip_search_reorder} recompute_beighbor_embeddings={recompute_beighbor_embeddings}, dedup_node_dis={dedup_node_dis}" + ) + neighbors, distances = self._index.batch_search( + queries=_queries, + num_queries=num_queries, + knn=k_neighbors, + complexity=complexity, + beam_width=beam_width, + num_threads=num_threads, + USE_DEFERRED_FETCH=USE_DEFERRED_FETCH, + skip_search_reorder=skip_search_reorder, + recompute_beighbor_embeddings=recompute_beighbor_embeddings, + dedup_node_dis=dedup_node_dis, + prune_ratio=prune_ratio, + batch_recompute=batch_recompute, + global_pruning=global_pruning, + ) + return QueryResponseBatch(identifiers=neighbors, distances=distances) diff --git a/packages/leann-backend-diskann/third_party/DiskANN/python/src/_static_memory_index.py b/packages/leann-backend-diskann/third_party/DiskANN/python/src/_static_memory_index.py new file mode 100644 index 0000000..1380360 --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/python/src/_static_memory_index.py @@ -0,0 +1,262 @@ +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT license. + +import json +import os +import warnings +from typing import Optional + +import numpy as np + +from . import ( + DistanceMetric, + QueryResponse, + QueryResponseBatch, + VectorDType, + VectorLike, + VectorLikeBatch, +) +from . import _diskannpy as _native_dap +from ._common import ( + _assert, + _assert_is_nonnegative_uint32, + _assert_is_positive_uint32, + _castable_dtype_or_raise, + _ensure_index_metadata, + _valid_index_prefix, + _valid_metric, +) + +__ALL__ = ["StaticMemoryIndex"] + + +class StaticMemoryIndex: + """ + A StaticMemoryIndex is an immutable in-memory DiskANN index. + """ + + def __init__( + self, + index_directory: str, + num_threads: int, + initial_search_complexity: int, + index_prefix: str = "ann", + distance_metric: Optional[DistanceMetric] = None, + vector_dtype: Optional[VectorDType] = None, + dimensions: Optional[int] = None, + enable_filters: bool = False, + ): + """ + ### Parameters + - **index_directory**: The directory containing the index files. This directory must contain the following + files: + - `{index_prefix}.data` + - `{index_prefix}` + + + It may also include the following optional files: + - `{index_prefix}_vectors.bin`: Optional. `diskannpy` builder functions may create this file in the + `index_directory` if the index was created from a numpy array + - `{index_prefix}_metadata.bin`: Optional. `diskannpy` builder functions create this file to store metadata + about the index, such as vector dtype, distance metric, number of vectors and vector dimensionality. + If an index is built from the `diskann` cli tools, this file will not exist. + - **num_threads**: Number of threads to use when searching this index. (>= 0), 0 = num_threads in system + - **initial_search_complexity**: Should be set to the most common `complexity` expected to be used during the + life of this `diskannpy.DynamicMemoryIndex` object. The working scratch memory allocated is based off of + `initial_search_complexity` * `search_threads`. Note that it may be resized if a `search` or `batch_search` + operation requests a space larger than can be accommodated by these values. + - **index_prefix**: The prefix of the index files. Defaults to "ann". + - **distance_metric**: A `str`, strictly one of {"l2", "mips", "cosine"}. `l2` and `cosine` are supported for all 3 + vector dtypes, but `mips` is only available for single precision floats. Default is `None`. **This + value is only used if a `{index_prefix}_metadata.bin` file does not exist.** If it does not exist, + you are required to provide it. + - **vector_dtype**: The vector dtype this index has been built with. **This value is only used if a + `{index_prefix}_metadata.bin` file does not exist.** If it does not exist, you are required to provide it. + - **dimensions**: The vector dimensionality of this index. All new vectors inserted must be the same + dimensionality. **This value is only used if a `{index_prefix}_metadata.bin` file does not exist.** If it + does not exist, you are required to provide it. + - **enable_filters**: Indexes built with filters can also be used for filtered search. + """ + index_prefix_path = _valid_index_prefix(index_directory, index_prefix) + self._labels_map = {} + self._labels_metadata = {} + if enable_filters: + try: + with open(f"{index_prefix_path}_labels_map.txt", "r") as labels_map_if: + for line in labels_map_if: + (key, val) = line.split("\t") + self._labels_map[key] = int(val) + with open( + f"{index_prefix_path}_label_metadata.json", "r" + ) as labels_metadata_if: + self._labels_metadata = json.load(labels_metadata_if) + except: # noqa: E722 + # exceptions are basically presumed to be either file not found or file not formatted correctly + raise RuntimeException("Filter labels file was unable to be processed.") + vector_dtype, metric, num_points, dims = _ensure_index_metadata( + index_prefix_path, + vector_dtype, + distance_metric, + 1, # it doesn't matter because we don't need it in this context anyway + dimensions, + ) + dap_metric = _valid_metric(metric) + + _assert_is_nonnegative_uint32(num_threads, "num_threads") + _assert_is_positive_uint32( + initial_search_complexity, "initial_search_complexity" + ) + + self._vector_dtype = vector_dtype + self._dimensions = dims + + if vector_dtype == np.uint8: + _index = _native_dap.StaticMemoryUInt8Index + elif vector_dtype == np.int8: + _index = _native_dap.StaticMemoryInt8Index + else: + _index = _native_dap.StaticMemoryFloatIndex + + self._index = _index( + distance_metric=dap_metric, + num_points=num_points, + dimensions=dims, + index_path=index_prefix_path, + num_threads=num_threads, + initial_search_complexity=initial_search_complexity, + ) + + def search( + self, + query: VectorLike, + k_neighbors: int, + complexity: int, + filter_label: str = "", + USE_DEFERRED_FETCH: bool = False, + skip_search_reorder: bool = False, + recompute_beighbor_embeddings: bool = False, + dedup_node_dis: bool = False, + prune_ratio: float = 0, + batch_recompute: bool = False, + global_pruning: bool = False, + ) -> QueryResponse: + """ + Searches the index by a single query vector. + + ### Parameters + - **query**: 1d numpy array of the same dimensionality and dtype of the index. + - **k_neighbors**: Number of neighbors to be returned. If query vector exists in index, it almost definitely + will be returned as well, so adjust your ``k_neighbors`` as appropriate. Must be > 0. + - **complexity**: Size of distance ordered list of candidate neighbors to use while searching. List size + increases accuracy at the cost of latency. Must be at least k_neighbors in size. + """ + if filter_label != "": + if len(self._labels_map) == 0: + raise ValueError( + f"A filter label of {filter_label} was provided, but this class was not initialized with filters " + "enabled, e.g. StaticDiskMemory(..., enable_filters=True)" + ) + if filter_label not in self._labels_map: + raise ValueError( + f"A filter label of {filter_label} was provided, but the external(str)->internal(np.uint32) labels map " + f"does not include that label." + ) + k_neighbors = min(k_neighbors, self._labels_metadata[filter_label]) + _query = _castable_dtype_or_raise(query, expected=self._vector_dtype) + _assert(len(_query.shape) == 1, "query vector must be 1-d") + _assert( + _query.shape[0] == self._dimensions, + f"query vector must have the same dimensionality as the index; index dimensionality: {self._dimensions}, " + f"query dimensionality: {_query.shape[0]}", + ) + _assert_is_positive_uint32(k_neighbors, "k_neighbors") + _assert_is_nonnegative_uint32(complexity, "complexity") + + if k_neighbors > complexity: + warnings.warn( + f"k_neighbors={k_neighbors} asked for, but list_size={complexity} was smaller. Increasing {complexity} to {k_neighbors}" + ) + complexity = k_neighbors + + if filter_label == "": + neighbors, distances = self._index.search( + query=_query, + knn=k_neighbors, + complexity=complexity, + USE_DEFERRED_FETCH=USE_DEFERRED_FETCH, + skip_search_reorder=skip_search_reorder, + recompute_beighbor_embeddings=recompute_beighbor_embeddings, + dedup_node_dis=dedup_node_dis, + prune_ratio=prune_ratio, + batch_recompute=batch_recompute, + global_pruning=global_pruning, + ) + else: + filter = self._labels_map[filter_label] + neighbors, distances = self._index.search_with_filter( + query=query, knn=k_neighbors, complexity=complexity, filter=filter + ) + return QueryResponse(identifiers=neighbors, distances=distances) + + def batch_search( + self, + queries: VectorLikeBatch, + k_neighbors: int, + complexity: int, + num_threads: int, + USE_DEFERRED_FETCH: bool = False, + skip_search_reorder: bool = False, + recompute_beighbor_embeddings: bool = False, + dedup_node_dis: bool = False, + prune_ratio: float = 0, + batch_recompute: bool = False, + global_pruning: bool = False, + ) -> QueryResponseBatch: + """ + Searches the index by a batch of query vectors. + + This search is parallelized and far more efficient than searching for each vector individually. + + ### Parameters + - **queries**: 2d numpy array, with column dimensionality matching the index and row dimensionality being the + number of queries intended to search for in parallel. Dtype must match dtype of the index. + - **k_neighbors**: Number of neighbors to be returned. If query vector exists in index, it almost definitely + will be returned as well, so adjust your ``k_neighbors`` as appropriate. Must be > 0. + - **complexity**: Size of distance ordered list of candidate neighbors to use while searching. List size + increases accuracy at the cost of latency. Must be at least k_neighbors in size. + - **num_threads**: Number of threads to use when searching this index. (>= 0), 0 = num_threads in system + """ + + _queries = _castable_dtype_or_raise(queries, expected=self._vector_dtype) + _assert(len(_queries.shape) == 2, "queries must must be 2-d np array") + _assert( + _queries.shape[1] == self._dimensions, + f"query vectors must have the same dimensionality as the index; index dimensionality: {self._dimensions}, " + f"query dimensionality: {_queries.shape[1]}", + ) + _assert_is_positive_uint32(k_neighbors, "k_neighbors") + _assert_is_positive_uint32(complexity, "complexity") + _assert_is_nonnegative_uint32(num_threads, "num_threads") + + if k_neighbors > complexity: + warnings.warn( + f"k_neighbors={k_neighbors} asked for, but list_size={complexity} was smaller. Increasing {complexity} to {k_neighbors}" + ) + complexity = k_neighbors + + num_queries, dim = _queries.shape + neighbors, distances = self._index.batch_search( + queries=_queries, + num_queries=num_queries, + knn=k_neighbors, + complexity=complexity, + num_threads=num_threads, + USE_DEFERRED_FETCH=USE_DEFERRED_FETCH, + skip_search_reorder=skip_search_reorder, + recompute_beighbor_embeddings=recompute_beighbor_embeddings, + dedup_node_dis=dedup_node_dis, + prune_ratio=prune_ratio, + batch_recompute=batch_recompute, + global_pruning=global_pruning, + ) + return QueryResponseBatch(identifiers=neighbors, distances=distances) diff --git a/packages/leann-backend-diskann/third_party/DiskANN/python/src/builder.cpp b/packages/leann-backend-diskann/third_party/DiskANN/python/src/builder.cpp new file mode 100644 index 0000000..2b91eac --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/python/src/builder.cpp @@ -0,0 +1,136 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT license. + +#include "builder.h" +#include "common.h" +#include "disk_utils.h" +#include "index.h" +#include "parameters.h" + +namespace diskannpy +{ +template +void build_disk_index(const diskann::Metric metric, const std::string &data_file_path, + const std::string &index_prefix_path, const uint32_t complexity, const uint32_t graph_degree, + const double final_index_ram_limit, const double indexing_ram_budget, const uint32_t num_threads, + const uint32_t pq_disk_bytes, const std::string &codebook_prefix) +{ + std::string params = std::to_string(graph_degree) + " " + std::to_string(complexity) + " " + + std::to_string(final_index_ram_limit) + " " + std::to_string(indexing_ram_budget) + " " + + std::to_string(num_threads); + if (pq_disk_bytes > 0) + params = params + " " + std::to_string(pq_disk_bytes); + if (!codebook_prefix.empty()) + params = params + " " + codebook_prefix; + diskann::build_disk_index
(data_file_path.c_str(), index_prefix_path.c_str(), params.c_str(), metric, false, + codebook_prefix); +} + +template void build_disk_index(diskann::Metric, const std::string &, const std::string &, uint32_t, uint32_t, + double, double, uint32_t, uint32_t, const std::string &); + +template void build_disk_index(diskann::Metric, const std::string &, const std::string &, uint32_t, uint32_t, + double, double, uint32_t, uint32_t, const std::string &); +template void build_disk_index(diskann::Metric, const std::string &, const std::string &, uint32_t, uint32_t, + double, double, uint32_t, uint32_t, const std::string &); + +template +std::string prepare_filtered_label_map(diskann::Index &index, const std::string &index_output_path, + const std::string &filter_labels_file, const std::string &universal_label) +{ + std::string labels_file_to_use = index_output_path + "_label_formatted.txt"; + std::string mem_labels_int_map_file = index_output_path + "_labels_map.txt"; + convert_labels_string_to_int(filter_labels_file, labels_file_to_use, mem_labels_int_map_file, universal_label); + if (!universal_label.empty()) + { + uint32_t unv_label_as_num = 0; + index.set_universal_label(unv_label_as_num); + } + return labels_file_to_use; +} + +template std::string prepare_filtered_label_map(diskann::Index &, const std::string &, + const std::string &, const std::string &); + +template std::string prepare_filtered_label_map(diskann::Index &, + const std::string &, const std::string &, const std::string &); + +template std::string prepare_filtered_label_map(diskann::Index &, + const std::string &, const std::string &, const std::string &); + +template +void build_memory_index(const diskann::Metric metric, const std::string &vector_bin_path, + const std::string &index_output_path, const uint32_t graph_degree, const uint32_t complexity, + const float alpha, const uint32_t num_threads, const bool use_pq_build, + const size_t num_pq_bytes, const bool use_opq, const bool use_tags, + const std::string &filter_labels_file, const std::string &universal_label, + const uint32_t filter_complexity) +{ + diskann::IndexWriteParameters index_build_params = diskann::IndexWriteParametersBuilder(complexity, graph_degree) + .with_filter_list_size(filter_complexity) + .with_alpha(alpha) + .with_saturate_graph(false) + .with_num_threads(num_threads) + .build(); + diskann::IndexSearchParams index_search_params = + diskann::IndexSearchParams(index_build_params.search_list_size, num_threads); + size_t data_num, data_dim; + diskann::get_bin_metadata(vector_bin_path, data_num, data_dim); + + diskann::Index index(metric, data_dim, data_num, + std::make_shared(index_build_params), + std::make_shared(index_search_params), 0, + use_tags, use_tags, false, use_pq_build, num_pq_bytes, use_opq); + + if (use_tags) + { + const std::string tags_file = index_output_path + ".tags"; + if (!file_exists(tags_file)) + { + throw std::runtime_error("tags file not found at expected path: " + tags_file); + } + TagT *tags_data; + size_t tag_dims = 1; + diskann::load_bin(tags_file, tags_data, data_num, tag_dims); + std::vector tags(tags_data, tags_data + data_num); + if (filter_labels_file.empty()) + { + index.build(vector_bin_path.c_str(), data_num, tags); + } + else + { + auto labels_file = prepare_filtered_label_map(index, index_output_path, filter_labels_file, + universal_label); + index.build_filtered_index(vector_bin_path.c_str(), labels_file, data_num, tags); + } + } + else + { + if (filter_labels_file.empty()) + { + index.build(vector_bin_path.c_str(), data_num); + } + else + { + auto labels_file = prepare_filtered_label_map(index, index_output_path, filter_labels_file, + universal_label); + index.build_filtered_index(vector_bin_path.c_str(), labels_file, data_num); + } + } + + index.save(index_output_path.c_str()); +} + +template void build_memory_index(diskann::Metric, const std::string &, const std::string &, uint32_t, uint32_t, + float, uint32_t, bool, size_t, bool, bool, const std::string &, + const std::string &, uint32_t); + +template void build_memory_index(diskann::Metric, const std::string &, const std::string &, uint32_t, uint32_t, + float, uint32_t, bool, size_t, bool, bool, const std::string &, + const std::string &, uint32_t); + +template void build_memory_index(diskann::Metric, const std::string &, const std::string &, uint32_t, uint32_t, + float, uint32_t, bool, size_t, bool, bool, const std::string &, + const std::string &, uint32_t); + +} // namespace diskannpy diff --git a/packages/leann-backend-diskann/third_party/DiskANN/python/src/defaults.py b/packages/leann-backend-diskann/third_party/DiskANN/python/src/defaults.py new file mode 100644 index 0000000..4e22983 --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/python/src/defaults.py @@ -0,0 +1,71 @@ +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT license. + +""" +# Parameter Defaults +These parameter defaults are re-exported from the C++ extension module, and used to keep the pythonic wrapper in sync with the C++. +""" +from ._diskannpy import defaults as _defaults + +ALPHA = _defaults.ALPHA +""" +Note that, as ALPHA is a `float32` (single precision float) in C++, when converted into Python it becomes a +`float64` (double precision float). The actual value is 1.2f. The alpha parameter (>=1) is used to control the nature +and number of points that are added to the graph. A higher alpha value (e.g., 1.4) will result in fewer hops (and IOs) +to convergence, but probably more distance comparisons compared to a lower alpha value. +""" +NUM_THREADS = _defaults.NUM_THREADS +""" Number of threads to use. `0` will use all available detected logical processors """ +MAX_OCCLUSION_SIZE = _defaults.MAX_OCCLUSION_SIZE +""" +The maximum number of points that can be occluded by a single point. This is used to prevent a single point from +dominating the graph structure. If a point has more than `max_occlusion_size` neighbors closer to it than the current +point, it will not be added to the graph. This is a tradeoff between index build time and search quality. +""" +FILTER_COMPLEXITY = _defaults.FILTER_COMPLEXITY +""" +Complexity (a.k.a. `L`) references the size of the list we store candidate approximate neighbors in while doing a +filtered search. This value must be larger than `k_neighbors`, and larger values tend toward higher recall in the +resultant ANN search at the cost of more time. +""" +NUM_FROZEN_POINTS_STATIC = _defaults.NUM_FROZEN_POINTS_STATIC +""" Number of points frozen by default in a StaticMemoryIndex """ +NUM_FROZEN_POINTS_DYNAMIC = _defaults.NUM_FROZEN_POINTS_DYNAMIC +""" Number of points frozen by default in a DynamicMemoryIndex """ +SATURATE_GRAPH = _defaults.SATURATE_GRAPH +""" Whether to saturate the graph or not. Default is `True` """ +GRAPH_DEGREE = _defaults.GRAPH_DEGREE +""" +Graph degree (a.k.a. `R`) is the maximum degree allowed for a node in the index's graph structure. This degree will be +pruned throughout the course of the index build, but it will never grow beyond this value. Higher R values require +longer index build times, but may result in an index showing excellent recall and latency characteristics. +""" +COMPLEXITY = _defaults.COMPLEXITY +""" +Complexity (a.k.a `L`) references the size of the list we store candidate approximate neighbors in while doing build +or search tasks. It's used during index build as part of the index optimization processes. It's used in index search +classes both to help mitigate poor latencies during cold start, as well as on subsequent queries to conduct the search. +Large values will likely increase latency but also may improve recall, and tuning these values for your particular +index is certainly a reasonable choice. +""" +PQ_DISK_BYTES = _defaults.PQ_DISK_BYTES +""" +Use `0` to store uncompressed data on SSD. This allows the index to asymptote to 100% recall. If your vectors are +too large to store in SSD, this parameter provides the option to compress the vectors using PQ for storing on SSD. +This will trade off recall. You would also want this to be greater than the number of bytes used for the PQ +compressed data stored in-memory. Default is `0`. +""" +USE_PQ_BUILD = _defaults.USE_PQ_BUILD +""" + Whether to use product quantization in the index building process. Product quantization is an approximation +technique that can vastly speed up vector computations and comparisons in a spatial neighborhood, but it is still an +approximation technique. It should be preferred when index creation times take longer than you can afford for your +use case. +""" +NUM_PQ_BYTES = _defaults.NUM_PQ_BYTES +""" +The number of product quantization bytes to use. More bytes requires more resources in both memory and time, but is +like to result in better approximations. +""" +USE_OPQ = _defaults.USE_OPQ +""" Whether to use Optimized Product Quantization or not. """ diff --git a/packages/leann-backend-diskann/third_party/DiskANN/python/src/dynamic_memory_index.cpp b/packages/leann-backend-diskann/third_party/DiskANN/python/src/dynamic_memory_index.cpp new file mode 100644 index 0000000..d05e54d --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/python/src/dynamic_memory_index.cpp @@ -0,0 +1,167 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT license. + +#include "parameters.h" +#include "dynamic_memory_index.h" + +#include "pybind11/numpy.h" + +namespace diskannpy +{ + +diskann::IndexWriteParameters dynamic_index_write_parameters(const uint32_t complexity, const uint32_t graph_degree, + const bool saturate_graph, + const uint32_t max_occlusion_size, const float alpha, + const uint32_t num_threads, + const uint32_t filter_complexity) +{ + return diskann::IndexWriteParametersBuilder(complexity, graph_degree) + .with_saturate_graph(saturate_graph) + .with_max_occlusion_size(max_occlusion_size) + .with_alpha(alpha) + .with_num_threads(num_threads) + .with_filter_list_size(filter_complexity) + .build(); +} + +template +diskann::Index dynamic_index_builder( + const diskann::Metric m, const diskann::IndexWriteParameters &write_params, const size_t dimensions, + const size_t max_vectors, const uint32_t initial_search_complexity, const uint32_t initial_search_threads, + const bool concurrent_consolidation, const uint32_t num_frozen_points) +{ + const uint32_t _initial_search_threads = initial_search_threads != 0 ? initial_search_threads : omp_get_num_procs(); + + auto index_search_params = diskann::IndexSearchParams(initial_search_complexity, _initial_search_threads); + return diskann::Index( + m, dimensions, max_vectors, + std::make_shared(write_params), // index write params + std::make_shared(index_search_params), // index_search_params + num_frozen_points, // frozen_points + true, // dynamic_index + true, // enable_tags + concurrent_consolidation, + false, // pq_dist_build + 0, // num_pq_chunks + false); // use_opq = false +} + +template +DynamicMemoryIndex
::DynamicMemoryIndex(const diskann::Metric m, const size_t dimensions, const size_t max_vectors, + const uint32_t complexity, const uint32_t graph_degree, + const bool saturate_graph, const uint32_t max_occlusion_size, + const float alpha, const uint32_t num_threads, + const uint32_t filter_complexity, const uint32_t num_frozen_points, + const uint32_t initial_search_complexity, + const uint32_t initial_search_threads, const bool concurrent_consolidation) + : _initial_search_complexity(initial_search_complexity != 0 ? initial_search_complexity : complexity), + _write_parameters(dynamic_index_write_parameters(complexity, graph_degree, saturate_graph, max_occlusion_size, + alpha, num_threads, filter_complexity)), + _index(dynamic_index_builder
(m, _write_parameters, dimensions, max_vectors, _initial_search_complexity, + initial_search_threads, concurrent_consolidation, num_frozen_points)) +{ +} + +template void DynamicMemoryIndex
::load(const std::string &index_path) +{ + const std::string tags_file = index_path + ".tags"; + if (!file_exists(tags_file)) + { + throw std::runtime_error("tags file not found at expected path: " + tags_file); + } + _index.load(index_path.c_str(), _write_parameters.num_threads, _initial_search_complexity); +} + +template +int DynamicMemoryIndex
::insert(const py::array_t &vector, + const DynamicIdType id) +{ + return _index.insert_point(vector.data(), id); +} + +template +py::array_t DynamicMemoryIndex
::batch_insert( + py::array_t &vectors, + py::array_t &ids, const int32_t num_inserts, + const int num_threads) +{ + if (num_threads == 0) + omp_set_num_threads(omp_get_num_procs()); + else + omp_set_num_threads(num_threads); + py::array_t insert_retvals(num_inserts); + +#pragma omp parallel for schedule(dynamic, 1) default(none) shared(num_inserts, insert_retvals, vectors, ids) + for (int32_t i = 0; i < num_inserts; i++) + { + insert_retvals.mutable_data()[i] = _index.insert_point(vectors.data(i), *(ids.data(i))); + } + + return insert_retvals; +} + +template int DynamicMemoryIndex
::mark_deleted(const DynamicIdType id) +{ + return this->_index.lazy_delete(id); +} + +template void DynamicMemoryIndex
::save(const std::string &save_path, const bool compact_before_save) +{ + if (save_path.empty()) + { + throw std::runtime_error("A save_path must be provided"); + } + _index.save(save_path.c_str(), compact_before_save); +} + +template +NeighborsAndDistances DynamicMemoryIndex
::search( + py::array_t &query, const uint64_t knn, const uint64_t complexity) +{ + py::array_t ids(knn); + py::array_t dists(knn); + std::vector
empty_vector; + _index.search_with_tags(query.data(), knn, complexity, ids.mutable_data(), dists.mutable_data(), empty_vector); + return std::make_pair(ids, dists); +} + +template +NeighborsAndDistances DynamicMemoryIndex
::batch_search( + py::array_t &queries, const uint64_t num_queries, const uint64_t knn, + const uint64_t complexity, const uint32_t num_threads) +{ + py::array_t ids({num_queries, knn}); + py::array_t dists({num_queries, knn}); + std::vector
empty_vector; + + if (num_threads == 0) + omp_set_num_threads(omp_get_num_procs()); + else + omp_set_num_threads(static_cast(num_threads)); + +#pragma omp parallel for schedule(dynamic, 1) default(none) \ + shared(num_queries, queries, knn, complexity, ids, dists, empty_vector) + for (int64_t i = 0; i < (int64_t)num_queries; i++) + { + _index.search_with_tags(queries.data(i), knn, complexity, ids.mutable_data(i), dists.mutable_data(i), + empty_vector); + } + + return std::make_pair(ids, dists); +} + +template void DynamicMemoryIndex
::consolidate_delete() +{ + _index.consolidate_deletes(_write_parameters); +} + +template size_t DynamicMemoryIndex
::num_points() +{ + return _index.get_num_points(); +} + +template class DynamicMemoryIndex; +template class DynamicMemoryIndex; +template class DynamicMemoryIndex; + +}; // namespace diskannpy diff --git a/packages/leann-backend-diskann/third_party/DiskANN/python/src/module.cpp b/packages/leann-backend-diskann/third_party/DiskANN/python/src/module.cpp new file mode 100644 index 0000000..0f295e1 --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/python/src/module.cpp @@ -0,0 +1,142 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT license. + +#include + +#include +#include + +#include "defaults.h" +#include "distance.h" + +#include "builder.h" +#include "dynamic_memory_index.h" +#include "static_disk_index.h" +#include "static_memory_index.h" + +PYBIND11_MAKE_OPAQUE(std::vector); +PYBIND11_MAKE_OPAQUE(std::vector); +PYBIND11_MAKE_OPAQUE(std::vector); +PYBIND11_MAKE_OPAQUE(std::vector); + +namespace py = pybind11; +using namespace pybind11::literals; + +struct Variant +{ + std::string disk_builder_name; + std::string memory_builder_name; + std::string dynamic_memory_index_name; + std::string static_memory_index_name; + std::string static_disk_index_name; +}; + +const Variant FloatVariant{"build_disk_float_index", "build_memory_float_index", "DynamicMemoryFloatIndex", + "StaticMemoryFloatIndex", "StaticDiskFloatIndex"}; + +const Variant UInt8Variant{"build_disk_uint8_index", "build_memory_uint8_index", "DynamicMemoryUInt8Index", + "StaticMemoryUInt8Index", "StaticDiskUInt8Index"}; + +const Variant Int8Variant{"build_disk_int8_index", "build_memory_int8_index", "DynamicMemoryInt8Index", + "StaticMemoryInt8Index", "StaticDiskInt8Index"}; + +template inline void add_variant(py::module_ &m, const Variant &variant) +{ + m.def(variant.disk_builder_name.c_str(), &diskannpy::build_disk_index, "distance_metric"_a, "data_file_path"_a, + "index_prefix_path"_a, "complexity"_a, "graph_degree"_a, "final_index_ram_limit"_a, "indexing_ram_budget"_a, + "num_threads"_a, "pq_disk_bytes"_a, "codebook_prefix"_a = ""); + + m.def(variant.memory_builder_name.c_str(), &diskannpy::build_memory_index, "distance_metric"_a, + "data_file_path"_a, "index_output_path"_a, "graph_degree"_a, "complexity"_a, "alpha"_a, "num_threads"_a, + "use_pq_build"_a, "num_pq_bytes"_a, "use_opq"_a, "use_tags"_a = false, "filter_labels_file"_a = "", + "universal_label"_a = "", "filter_complexity"_a = 0); + + py::class_>(m, variant.static_memory_index_name.c_str()) + .def(py::init(), + "distance_metric"_a, "index_path"_a, "num_points"_a, "dimensions"_a, "num_threads"_a, + "initial_search_complexity"_a) + .def("search", &diskannpy::StaticMemoryIndex::search, "query"_a, "knn"_a, "complexity"_a) + .def("search_with_filter", &diskannpy::StaticMemoryIndex::search_with_filter, "query"_a, "knn"_a, + "complexity"_a, "filter"_a) + .def("batch_search", &diskannpy::StaticMemoryIndex::batch_search, "queries"_a, "num_queries"_a, "knn"_a, + "complexity"_a, "num_threads"_a); + + py::class_>(m, variant.dynamic_memory_index_name.c_str()) + .def(py::init(), + "distance_metric"_a, "dimensions"_a, "max_vectors"_a, "complexity"_a, "graph_degree"_a, + "saturate_graph"_a = diskann::defaults::SATURATE_GRAPH, + "max_occlusion_size"_a = diskann::defaults::MAX_OCCLUSION_SIZE, "alpha"_a = diskann::defaults::ALPHA, + "num_threads"_a = diskann::defaults::NUM_THREADS, + "filter_complexity"_a = diskann::defaults::FILTER_LIST_SIZE, + "num_frozen_points"_a = diskann::defaults::NUM_FROZEN_POINTS_DYNAMIC, "initial_search_complexity"_a = 0, + "search_threads"_a = 0, "concurrent_consolidation"_a = true) + .def("search", &diskannpy::DynamicMemoryIndex::search, "query"_a, "knn"_a, "complexity"_a) + .def("load", &diskannpy::DynamicMemoryIndex::load, "index_path"_a) + .def("batch_search", &diskannpy::DynamicMemoryIndex::batch_search, "queries"_a, "num_queries"_a, "knn"_a, + "complexity"_a, "num_threads"_a) + .def("batch_insert", &diskannpy::DynamicMemoryIndex::batch_insert, "vectors"_a, "ids"_a, "num_inserts"_a, + "num_threads"_a) + .def("save", &diskannpy::DynamicMemoryIndex::save, "save_path"_a = "", "compact_before_save"_a = false) + .def("insert", &diskannpy::DynamicMemoryIndex::insert, "vector"_a, "id"_a) + .def("mark_deleted", &diskannpy::DynamicMemoryIndex::mark_deleted, "id"_a) + .def("consolidate_delete", &diskannpy::DynamicMemoryIndex::consolidate_delete) + .def("num_points", &diskannpy::DynamicMemoryIndex::num_points); + + py::class_>(m, variant.static_disk_index_name.c_str()) + .def(py::init(), + "distance_metric"_a, "index_path_prefix"_a, "num_threads"_a, "num_nodes_to_cache"_a, + "cache_mechanism"_a = 1, "pq_prefix"_a = "", "partition_prefix"_a) + .def("cache_bfs_levels", &diskannpy::StaticDiskIndex::cache_bfs_levels, "num_nodes_to_cache"_a) + .def("search", &diskannpy::StaticDiskIndex::search, "query"_a, "knn"_a, "complexity"_a, "beam_width"_a, + "USE_DEFERRED_FETCH"_a = false, "skip_search_reorder"_a = false, "recompute_beighbor_embeddings"_a = false, + "dedup_node_dis"_a = false, "prune_ratio"_a = 0, "batch_recompute"_a = false, "global_pruning"_a = false) + .def("batch_search", &diskannpy::StaticDiskIndex::batch_search, "queries"_a, "num_queries"_a, "knn"_a, + "complexity"_a, "beam_width"_a, "num_threads"_a, "USE_DEFERRED_FETCH"_a = false, + "skip_search_reorder"_a = false, "recompute_beighbor_embeddings"_a = false, "dedup_node_dis"_a = false, + "prune_ratio"_a = 0, "batch_recompute"_a = false, "global_pruning"_a = false); +} + +PYBIND11_MODULE(_diskannpy, m) +{ + m.doc() = "DiskANN Python Bindings"; +#ifdef VERSION_INFO + m.attr("__version__") = VERSION_INFO; +#else + m.attr("__version__") = "dev"; +#endif + + // let's re-export our defaults + py::module_ default_values = m.def_submodule( + "defaults", + "A collection of the default values used for common diskann operations. `GRAPH_DEGREE` and `COMPLEXITY` are not" + " set as defaults, but some semi-reasonable default values are selected for your convenience. We urge you to " + "investigate their meaning and adjust them for your use cases."); + + default_values.attr("ALPHA") = diskann::defaults::ALPHA; + default_values.attr("NUM_THREADS") = diskann::defaults::NUM_THREADS; + default_values.attr("MAX_OCCLUSION_SIZE") = diskann::defaults::MAX_OCCLUSION_SIZE; + default_values.attr("FILTER_COMPLEXITY") = diskann::defaults::FILTER_LIST_SIZE; + default_values.attr("NUM_FROZEN_POINTS_STATIC") = diskann::defaults::NUM_FROZEN_POINTS_STATIC; + default_values.attr("NUM_FROZEN_POINTS_DYNAMIC") = diskann::defaults::NUM_FROZEN_POINTS_DYNAMIC; + default_values.attr("SATURATE_GRAPH") = diskann::defaults::SATURATE_GRAPH; + default_values.attr("GRAPH_DEGREE") = diskann::defaults::MAX_DEGREE; + default_values.attr("COMPLEXITY") = diskann::defaults::BUILD_LIST_SIZE; + default_values.attr("PQ_DISK_BYTES") = (uint32_t)0; + default_values.attr("USE_PQ_BUILD") = false; + default_values.attr("NUM_PQ_BYTES") = (uint32_t)0; + default_values.attr("USE_OPQ") = false; + + add_variant(m, FloatVariant); + add_variant(m, UInt8Variant); + add_variant(m, Int8Variant); + + py::enum_(m, "Metric") + .value("L2", diskann::Metric::L2) + .value("INNER_PRODUCT", diskann::Metric::INNER_PRODUCT) + .value("COSINE", diskann::Metric::COSINE) + .export_values(); +} diff --git a/packages/leann-backend-diskann/third_party/DiskANN/python/src/py.typed b/packages/leann-backend-diskann/third_party/DiskANN/python/src/py.typed new file mode 100644 index 0000000..e69de29 diff --git a/packages/leann-backend-diskann/third_party/DiskANN/python/src/static_disk_index.cpp b/packages/leann-backend-diskann/third_party/DiskANN/python/src/static_disk_index.cpp new file mode 100644 index 0000000..47dc09b --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/python/src/static_disk_index.cpp @@ -0,0 +1,123 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT license. + +#include "static_disk_index.h" + +#include "pybind11/numpy.h" + +namespace diskannpy +{ + +template +StaticDiskIndex
::StaticDiskIndex(const diskann::Metric metric, const std::string &index_path_prefix, + const uint32_t num_threads, const size_t num_nodes_to_cache, + const uint32_t cache_mechanism, const std::string &pq_prefix, + const std::string &partition_prefix) + : _reader(std::make_shared()), + _graph_reader(std::make_shared()), _index(_reader, _graph_reader, metric) +{ + std::cout << "Before index load" << std::endl; + + const uint32_t _num_threads = num_threads != 0 ? num_threads : omp_get_num_procs(); + int load_success = + _index.load(_num_threads, index_path_prefix.c_str(), pq_prefix.c_str(), partition_prefix.c_str()); + if (load_success != 0) + { + throw std::runtime_error("index load failed, " + index_path_prefix); + } + if (cache_mechanism == 1) + { + std::string sample_file = index_path_prefix + std::string("_sample_data.bin"); + cache_sample_paths(num_nodes_to_cache, sample_file, _num_threads); + } + else if (cache_mechanism == 2) + { + cache_bfs_levels(num_nodes_to_cache); + } + std::cout << "After index load" << std::endl; +} + +template void StaticDiskIndex
::cache_bfs_levels(const size_t num_nodes_to_cache) +{ + std::vector node_list; + _index.cache_bfs_levels(num_nodes_to_cache, node_list); + _index.load_cache_list(node_list); +} + +template +void StaticDiskIndex
::cache_sample_paths(const size_t num_nodes_to_cache, const std::string &warmup_query_file, + const uint32_t num_threads) +{ + if (!file_exists(warmup_query_file)) + { + return; + } + + std::vector node_list; + _index.generate_cache_list_from_sample_queries(warmup_query_file, 15, 4, num_nodes_to_cache, num_threads, + node_list); + _index.load_cache_list(node_list); +} + +template +NeighborsAndDistances StaticDiskIndex
::search( + py::array_t &query, const uint64_t knn, const uint64_t complexity, + const uint64_t beam_width, const bool USE_DEFERRED_FETCH, const bool skip_search_reorder, + const bool recompute_beighbor_embeddings, const bool dedup_node_dis, const float prune_ratio, + const bool batch_recompute, const bool global_pruning) +{ + py::array_t ids(knn); + py::array_t dists(knn); + + std::vector u32_ids(knn); + std::vector u64_ids(knn); + diskann::QueryStats stats; + + _index.cached_beam_search(query.data(), knn, complexity, u64_ids.data(), dists.mutable_data(), beam_width, false, + &stats, USE_DEFERRED_FETCH, skip_search_reorder, recompute_beighbor_embeddings, + dedup_node_dis, prune_ratio, batch_recompute, global_pruning); + + auto r = ids.mutable_unchecked<1>(); + for (uint64_t i = 0; i < knn; ++i) + r(i) = (unsigned)u64_ids[i]; + + return std::make_pair(ids, dists); +} + +template +NeighborsAndDistances StaticDiskIndex
::batch_search( + py::array_t &queries, const uint64_t num_queries, const uint64_t knn, + const uint64_t complexity, const uint64_t beam_width, const uint32_t num_threads, const bool USE_DEFERRED_FETCH, + const bool skip_search_reorder, const bool recompute_beighbor_embeddings, const bool dedup_node_dis, + const float prune_ratio, const bool batch_recompute, const bool global_pruning) +{ + py::array_t ids({num_queries, knn}); + py::array_t dists({num_queries, knn}); + + omp_set_num_threads(num_threads); + + std::vector u64_ids(knn * num_queries); + +#pragma omp parallel for schedule(dynamic, 1) default(none) \ + shared(num_queries, queries, knn, complexity, u64_ids, dists, beam_width, USE_DEFERRED_FETCH, skip_search_reorder, \ + recompute_beighbor_embeddings, dedup_node_dis, prune_ratio, batch_recompute, global_pruning) + for (int64_t i = 0; i < (int64_t)num_queries; i++) + { + _index.cached_beam_search(queries.data(i), knn, complexity, u64_ids.data() + i * knn, dists.mutable_data(i), + beam_width, false, nullptr, USE_DEFERRED_FETCH, skip_search_reorder, + recompute_beighbor_embeddings, dedup_node_dis, prune_ratio, batch_recompute, + global_pruning); + } + + auto r = ids.mutable_unchecked(); + for (uint64_t i = 0; i < num_queries; ++i) + for (uint64_t j = 0; j < knn; ++j) + r(i, j) = (uint32_t)u64_ids[i * knn + j]; + + return std::make_pair(ids, dists); +} + +template class StaticDiskIndex; +template class StaticDiskIndex; +template class StaticDiskIndex; +} // namespace diskannpy \ No newline at end of file diff --git a/packages/leann-backend-diskann/third_party/DiskANN/python/src/static_memory_index.cpp b/packages/leann-backend-diskann/third_party/DiskANN/python/src/static_memory_index.cpp new file mode 100644 index 0000000..d3ac079 --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/python/src/static_memory_index.cpp @@ -0,0 +1,91 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT license. + +#include "static_memory_index.h" + +#include "pybind11/numpy.h" + +namespace diskannpy +{ + +template +diskann::Index static_index_builder(const diskann::Metric m, const size_t num_points, + const size_t dimensions, + const uint32_t initial_search_complexity) +{ + if (initial_search_complexity == 0) + { + throw std::runtime_error("initial_search_complexity must be a positive uint32_t"); + } + auto index_search_params = diskann::IndexSearchParams(initial_search_complexity, omp_get_num_procs()); + return diskann::Index
(m, dimensions, num_points, + nullptr, // index write params + std::make_shared(index_search_params), // index search params + 0, // num frozen points + false, // not a dynamic_index + false, // no enable_tags/ids + false, // no concurrent_consolidate, + false, // pq_dist_build + 0, // num_pq_chunks + false); // use_opq = false +} + +template +StaticMemoryIndex
::StaticMemoryIndex(const diskann::Metric m, const std::string &index_prefix, + const size_t num_points, const size_t dimensions, const uint32_t num_threads, + const uint32_t initial_search_complexity) + : _index(static_index_builder
(m, num_points, dimensions, initial_search_complexity)) +{ + const uint32_t _num_threads = num_threads != 0 ? num_threads : omp_get_num_procs(); + _index.load(index_prefix.c_str(), _num_threads, initial_search_complexity); +} + +template +NeighborsAndDistances StaticMemoryIndex
::search( + py::array_t &query, const uint64_t knn, const uint64_t complexity) +{ + py::array_t ids(knn); + py::array_t dists(knn); + std::vector
empty_vector; + _index.search(query.data(), knn, complexity, ids.mutable_data(), dists.mutable_data()); + return std::make_pair(ids, dists); +} + +template +NeighborsAndDistances StaticMemoryIndex
::search_with_filter( + py::array_t &query, const uint64_t knn, const uint64_t complexity, + const filterT filter) +{ + py::array_t ids(knn); + py::array_t dists(knn); + std::vector
empty_vector; + _index.search_with_filters(query.data(), filter, knn, complexity, ids.mutable_data(), dists.mutable_data()); + return std::make_pair(ids, dists); +} + +template +NeighborsAndDistances StaticMemoryIndex
::batch_search( + py::array_t &queries, const uint64_t num_queries, const uint64_t knn, + const uint64_t complexity, const uint32_t num_threads) +{ + const uint32_t _num_threads = num_threads != 0 ? num_threads : omp_get_num_procs(); + py::array_t ids({num_queries, knn}); + py::array_t dists({num_queries, knn}); + std::vector
empty_vector; + + omp_set_num_threads(static_cast(_num_threads)); + +#pragma omp parallel for schedule(dynamic, 1) default(none) shared(num_queries, queries, knn, complexity, ids, dists) + for (int64_t i = 0; i < (int64_t)num_queries; i++) + { + _index.search(queries.data(i), knn, complexity, ids.mutable_data(i), dists.mutable_data(i)); + } + + return std::make_pair(ids, dists); +} + +template class StaticMemoryIndex; +template class StaticMemoryIndex; +template class StaticMemoryIndex; + +} // namespace diskannpy \ No newline at end of file diff --git a/packages/leann-backend-diskann/third_party/DiskANN/python/tests/fixtures/__init__.py b/packages/leann-backend-diskann/third_party/DiskANN/python/tests/fixtures/__init__.py new file mode 100644 index 0000000..4aeb960 --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/python/tests/fixtures/__init__.py @@ -0,0 +1,6 @@ +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT license. + +from .build_memory_index import build_random_vectors_and_memory_index +from .create_test_data import random_vectors, vectors_as_temp_file, write_vectors +from .recall import calculate_recall diff --git a/packages/leann-backend-diskann/third_party/DiskANN/python/tests/fixtures/build_memory_index.py b/packages/leann-backend-diskann/third_party/DiskANN/python/tests/fixtures/build_memory_index.py new file mode 100644 index 0000000..3c30bed --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/python/tests/fixtures/build_memory_index.py @@ -0,0 +1,51 @@ +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT license. + +import os +from tempfile import mkdtemp + +import diskannpy as dap +import numpy as np + +from .create_test_data import random_vectors + + +def build_random_vectors_and_memory_index( + dtype, metric, with_tags: bool = False, index_prefix: str = "ann", seed: int = 12345 +): + query_vectors: np.ndarray = random_vectors(1000, 10, dtype=dtype, seed=seed) + index_vectors: np.ndarray = random_vectors(10000, 10, dtype=dtype, seed=seed) + ann_dir = mkdtemp() + + if with_tags: + rng = np.random.default_rng(seed) + tags = np.arange(start=1, stop=10001, dtype=np.uint32) + rng.shuffle(tags) + else: + tags = "" + + dap.build_memory_index( + data=index_vectors, + distance_metric=metric, + index_directory=ann_dir, + graph_degree=16, + complexity=32, + alpha=1.2, + num_threads=0, + use_pq_build=False, + num_pq_bytes=8, + use_opq=False, + filter_complexity=32, + tags=tags, + index_prefix=index_prefix, + ) + + return ( + metric, + dtype, + query_vectors, + index_vectors, + ann_dir, + os.path.join(ann_dir, "vectors.bin"), + tags, + ) diff --git a/packages/leann-backend-diskann/third_party/DiskANN/python/tests/fixtures/create_test_data.py b/packages/leann-backend-diskann/third_party/DiskANN/python/tests/fixtures/create_test_data.py new file mode 100644 index 0000000..44e413e --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/python/tests/fixtures/create_test_data.py @@ -0,0 +1,40 @@ +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT license. + +from contextlib import contextmanager +from pathlib import Path +from tempfile import NamedTemporaryFile +from typing import BinaryIO + +import numpy as np + + +def random_vectors(rows: int, dimensions: int, dtype, seed: int = 12345) -> np.ndarray: + rng = np.random.default_rng(seed) + if dtype == np.float32: + vectors = rng.random((rows, dimensions), dtype=dtype) + elif dtype == np.uint8: + vectors = rng.integers( + low=0, high=256, size=(rows, dimensions), dtype=dtype + ) # low is inclusive, high is exclusive + elif dtype == np.int8: + vectors = rng.integers( + low=-128, high=128, size=(rows, dimensions), dtype=dtype + ) # low is inclusive, high is exclusive + else: + raise RuntimeError("Only np.float32, np.int8, and np.uint8 are supported") + return vectors + + +def write_vectors(file_handler: BinaryIO, vectors: np.ndarray): + _ = file_handler.write(np.array(vectors.shape, dtype=np.int32).tobytes()) + _ = file_handler.write(vectors.tobytes()) + + +@contextmanager +def vectors_as_temp_file(vectors: np.ndarray) -> str: + temp = NamedTemporaryFile(mode="wb", delete=False) + write_vectors(temp, vectors) + temp.close() + yield temp.name + Path(temp.name).unlink() diff --git a/packages/leann-backend-diskann/third_party/DiskANN/python/tests/fixtures/recall.py b/packages/leann-backend-diskann/third_party/DiskANN/python/tests/fixtures/recall.py new file mode 100644 index 0000000..03f38f3 --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/python/tests/fixtures/recall.py @@ -0,0 +1,24 @@ +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT license. + +import numpy as np + + +def calculate_recall( + result_set_indices: np.ndarray, truth_set_indices: np.ndarray, recall_at: int = 5 +) -> float: + """ + result_set_indices and truth_set_indices correspond by row index. the columns in each row contain the indices of + the nearest neighbors, with result_set_indices being the approximate nearest neighbor results and truth_set_indices + being the brute force nearest neighbor calculation via sklearn's NearestNeighbor class. + :param result_set_indices: + :param truth_set_indices: + :param recall_at: + :return: + """ + found = 0 + for i in range(0, result_set_indices.shape[0]): + result_set_set = set(result_set_indices[i][0:recall_at]) + truth_set_set = set(truth_set_indices[i][0:recall_at]) + found += len(result_set_set.intersection(truth_set_set)) + return found / (result_set_indices.shape[0] * recall_at) diff --git a/packages/leann-backend-diskann/third_party/DiskANN/rust/Cargo.lock b/packages/leann-backend-diskann/third_party/DiskANN/rust/Cargo.lock new file mode 100644 index 0000000..3a8a252 --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/rust/Cargo.lock @@ -0,0 +1,1820 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "adler" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe" + +[[package]] +name = "ahash" +version = "0.8.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2c99f64d1e06488f620f932677e24bc6e2897582980441ae90a671415bd7ec2f" +dependencies = [ + "cfg-if", + "once_cell", + "version_check", +] + +[[package]] +name = "anes" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4b46cbb362ab8752921c97e041f5e366ee6297bd428a31275b9fcf1e380f7299" + +[[package]] +name = "anstream" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0ca84f3628370c59db74ee214b3263d58f9aadd9b4fe7e711fd87dc452b7f163" +dependencies = [ + "anstyle", + "anstyle-parse", + "anstyle-query", + "anstyle-wincon", + "colorchoice", + "is-terminal", + "utf8parse", +] + +[[package]] +name = "anstyle" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3a30da5c5f2d5e72842e00bcb57657162cdabef0931f40e2deb9b4140440cecd" + +[[package]] +name = "anstyle-parse" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "938874ff5980b03a87c5524b3ae5b59cf99b1d6bc836848df7bc5ada9643c333" +dependencies = [ + "utf8parse", +] + +[[package]] +name = "anstyle-query" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5ca11d4be1bab0c8bc8734a9aa7bf4ee8316d462a08c6ac5052f888fef5b494b" +dependencies = [ + "windows-sys 0.48.0", +] + +[[package]] +name = "anstyle-wincon" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "180abfa45703aebe0093f79badacc01b8fd4ea2e35118747e5811127f926e188" +dependencies = [ + "anstyle", + "windows-sys 0.48.0", +] + +[[package]] +name = "anyhow" +version = "1.0.71" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9c7d0618f0e0b7e8ff11427422b64564d5fb0be1940354bfe2e0529b18a9d9b8" + +[[package]] +name = "approx" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cab112f0a86d568ea0e627cc1d6be74a1e9cd55214684db5561995f6dad897c6" +dependencies = [ + "num-traits", +] + +[[package]] +name = "autocfg" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa" + +[[package]] +name = "base64" +version = "0.21.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "604178f6c5c21f02dc555784810edfb88d34ac2c73b2eae109655649ee73ce3d" + +[[package]] +name = "bincode" +version = "1.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b1f45e9417d87227c7a56d22e471c6206462cba514c7590c09aff4cf6d1ddcad" +dependencies = [ + "serde", +] + +[[package]] +name = "bit-vec" +version = "0.6.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "349f9b6a179ed607305526ca489b34ad0a41aed5f7980fa90eb03160b69598fb" + +[[package]] +name = "bitflags" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" + +[[package]] +name = "bitflags" +version = "2.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "327762f6e5a765692301e5bb513e0d9fef63be86bbc14528052b1cd3e6f03e07" + +[[package]] +name = "build_and_insert_delete_memory_index" +version = "0.1.0" +dependencies = [ + "diskann", + "logger", + "vector", +] + +[[package]] +name = "build_and_insert_memory_index" +version = "0.1.0" +dependencies = [ + "diskann", + "logger", + "vector", +] + +[[package]] +name = "build_disk_index" +version = "0.1.0" +dependencies = [ + "diskann", + "logger", + "openblas-src", + "vector", +] + +[[package]] +name = "build_memory_index" +version = "0.1.0" +dependencies = [ + "clap", + "diskann", + "logger", + "vector", +] + +[[package]] +name = "bumpalo" +version = "3.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a3e2c3daef883ecc1b5d58c15adae93470a91d425f3532ba1695849656af3fc1" + +[[package]] +name = "bytemuck" +version = "1.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "17febce684fd15d89027105661fec94afb475cb995fbc59d2865198446ba2eea" + +[[package]] +name = "byteorder" +version = "1.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "14c189c53d098945499cdfa7ecc63567cf3886b3332b312a5b4585d8d3a6a610" + +[[package]] +name = "bytes" +version = "1.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "89b2fd2a0dcf38d7971e2194b6b6eebab45ae01067456a7fd93d5547a61b70be" + +[[package]] +name = "cast" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5" + +[[package]] +name = "cblas" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3de46dff748ed7e891bc46faae117f48d2a7911041c6630aed3c61a3fe12326f" +dependencies = [ + "cblas-sys", + "libc", + "num-complex", +] + +[[package]] +name = "cblas-sys" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6feecd82cce51b0204cf063f0041d69f24ce83f680d87514b004248e7b0fa65" +dependencies = [ + "libc", +] + +[[package]] +name = "cc" +version = "1.0.79" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "50d30906286121d95be3d479533b458f87493b30a4b5f79a607db8f5d11aa91f" + +[[package]] +name = "cfg-if" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" + +[[package]] +name = "ciborium" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "effd91f6c78e5a4ace8a5d3c0b6bfaec9e2baaef55f3efc00e45fb2e477ee926" +dependencies = [ + "ciborium-io", + "ciborium-ll", + "serde", +] + +[[package]] +name = "ciborium-io" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cdf919175532b369853f5d5e20b26b43112613fd6fe7aee757e35f7a44642656" + +[[package]] +name = "ciborium-ll" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "defaa24ecc093c77630e6c15e17c51f5e187bf35ee514f4e2d67baaa96dae22b" +dependencies = [ + "ciborium-io", + "half 1.8.2", +] + +[[package]] +name = "clap" +version = "4.3.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d9394150f5b4273a1763355bd1c2ec54cc5a2593f790587bcd6b2c947cfa9211" +dependencies = [ + "clap_builder", + "clap_derive", + "once_cell", +] + +[[package]] +name = "clap_builder" +version = "4.3.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a78fbdd3cc2914ddf37ba444114bc7765bbdcb55ec9cbe6fa054f0137400717" +dependencies = [ + "anstream", + "anstyle", + "bitflags 1.3.2", + "clap_lex", + "strsim", +] + +[[package]] +name = "clap_derive" +version = "4.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b8cd2b2a819ad6eec39e8f1d6b53001af1e5469f8c177579cdaeb313115b825f" +dependencies = [ + "heck", + "proc-macro2", + "quote", + "syn 2.0.18", +] + +[[package]] +name = "clap_lex" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2da6da31387c7e4ef160ffab6d5e7f00c42626fe39aea70a7b0f1773f7dd6c1b" + +[[package]] +name = "colorchoice" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "acbf1af155f9b9ef647e42cdc158db4b64a1b61f743629225fde6f3e0be2a7c7" + +[[package]] +name = "convert_f32_to_bf16" +version = "0.1.0" +dependencies = [ + "half 2.2.1", +] + +[[package]] +name = "core-foundation" +version = "0.9.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "194a7a9e6de53fa55116934067c844d9d749312f75c6f6d0980e8c252f8c2146" +dependencies = [ + "core-foundation-sys", + "libc", +] + +[[package]] +name = "core-foundation-sys" +version = "0.8.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e496a50fda8aacccc86d7529e2c1e0892dbd0f898a6b5645b5561b89c3210efa" + +[[package]] +name = "crc32fast" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b540bd8bc810d3885c6ea91e2018302f68baba2129ab3e88f32389ee9370880d" +dependencies = [ + "cfg-if", +] + +[[package]] +name = "criterion" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f2b12d017a929603d80db1831cd3a24082f8137ce19c69e6447f54f5fc8d692f" +dependencies = [ + "anes", + "cast", + "ciborium", + "clap", + "criterion-plot", + "is-terminal", + "itertools", + "num-traits", + "once_cell", + "oorandom", + "plotters", + "rayon", + "regex", + "serde", + "serde_derive", + "serde_json", + "tinytemplate", + "walkdir", +] + +[[package]] +name = "criterion-plot" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6b50826342786a51a89e2da3a28f1c32b06e387201bc2d19791f622c673706b1" +dependencies = [ + "cast", + "itertools", +] + +[[package]] +name = "crossbeam" +version = "0.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2801af0d36612ae591caa9568261fddce32ce6e08a7275ea334a06a4ad021a2c" +dependencies = [ + "cfg-if", + "crossbeam-channel", + "crossbeam-deque", + "crossbeam-epoch", + "crossbeam-queue", + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-channel" +version = "0.5.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a33c2bf77f2df06183c3aa30d1e96c0695a313d4f9c453cc3762a6db39f99200" +dependencies = [ + "cfg-if", + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-deque" +version = "0.8.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ce6fd6f855243022dcecf8702fef0c297d4338e226845fe067f6341ad9fa0cef" +dependencies = [ + "cfg-if", + "crossbeam-epoch", + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-epoch" +version = "0.9.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ae211234986c545741a7dc064309f67ee1e5ad243d0e48335adc0484d960bcc7" +dependencies = [ + "autocfg", + "cfg-if", + "crossbeam-utils", + "memoffset", + "scopeguard", +] + +[[package]] +name = "crossbeam-queue" +version = "0.3.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d1cfb3ea8a53f37c40dea2c7bedcbd88bdfae54f5e2175d6ecaff1c988353add" +dependencies = [ + "cfg-if", + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-utils" +version = "0.8.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a22b2d63d4d1dc0b7f1b6b2747dd0088008a9be28b6ddf0b1e7d335e3037294" +dependencies = [ + "cfg-if", +] + +[[package]] +name = "crunchy" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7a81dae078cea95a014a339291cec439d2f232ebe854a9d672b796c6afafa9b7" + +[[package]] +name = "dirs" +version = "3.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "30baa043103c9d0c2a57cf537cc2f35623889dc0d405e6c3cccfadbc81c71309" +dependencies = [ + "dirs-sys", +] + +[[package]] +name = "dirs-sys" +version = "0.3.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1b1d1d91c932ef41c0f2663aa8b0ca0342d444d842c06914aa0a7e352d0bada6" +dependencies = [ + "libc", + "redox_users", + "winapi", +] + +[[package]] +name = "diskann" +version = "0.1.0" +dependencies = [ + "approx", + "bincode", + "bit-vec", + "byteorder", + "cblas", + "cc", + "criterion", + "crossbeam", + "half 2.2.1", + "hashbrown 0.13.2", + "logger", + "num-traits", + "once_cell", + "openblas-src", + "platform", + "rand", + "rayon", + "serde", + "thiserror", + "vector", + "winapi", +] + +[[package]] +name = "either" +version = "1.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7fcaabb2fef8c910e7f4c7ce9f67a1283a1715879a7c230ca9d6d1ae31f16d91" + +[[package]] +name = "errno" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4bcfec3a70f97c962c307b2d2c56e358cf1d00b558d74262b5f929ee8cc7e73a" +dependencies = [ + "errno-dragonfly", + "libc", + "windows-sys 0.48.0", +] + +[[package]] +name = "errno-dragonfly" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aa68f1b12764fab894d2755d2518754e71b4fd80ecfb822714a1206c2aab39bf" +dependencies = [ + "cc", + "libc", +] + +[[package]] +name = "fastrand" +version = "1.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e51093e27b0797c359783294ca4f0a911c270184cb10f85783b118614a1501be" +dependencies = [ + "instant", +] + +[[package]] +name = "filetime" +version = "0.2.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5cbc844cecaee9d4443931972e1289c8ff485cb4cc2767cb03ca139ed6885153" +dependencies = [ + "cfg-if", + "libc", + "redox_syscall 0.2.16", + "windows-sys 0.48.0", +] + +[[package]] +name = "fixedbitset" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0ce7134b9999ecaf8bcd65542e436736ef32ddca1b3e06094cb6ec5755203b80" + +[[package]] +name = "flate2" +version = "1.0.26" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3b9429470923de8e8cbd4d2dc513535400b4b3fef0319fb5c4e1f520a7bef743" +dependencies = [ + "crc32fast", + "miniz_oxide", +] + +[[package]] +name = "foreign-types" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f6f339eb8adc052cd2ca78910fda869aefa38d22d5cb648e6485e4d3fc06f3b1" +dependencies = [ + "foreign-types-shared", +] + +[[package]] +name = "foreign-types-shared" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "00b0228411908ca8685dba7fc2cdd70ec9990a6e753e89b6ac91a84c40fbaf4b" + +[[package]] +name = "form_urlencoded" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a62bc1cf6f830c2ec14a513a9fb124d0a213a629668a4186f329db21fe045652" +dependencies = [ + "percent-encoding", +] + +[[package]] +name = "getrandom" +version = "0.2.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "be4136b2a15dd319360be1c07d9933517ccf0be8f16bf62a3bee4f0d618df427" +dependencies = [ + "cfg-if", + "libc", + "wasi", +] + +[[package]] +name = "half" +version = "1.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eabb4a44450da02c90444cf74558da904edde8fb4e9035a9a6a4e15445af0bd7" + +[[package]] +name = "half" +version = "2.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "02b4af3693f1b705df946e9fe5631932443781d0aabb423b62fcd4d73f6d2fd0" +dependencies = [ + "crunchy", +] + +[[package]] +name = "hashbrown" +version = "0.12.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888" + +[[package]] +name = "hashbrown" +version = "0.13.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "43a3c133739dddd0d2990f9a4bdf8eb4b21ef50e4851ca85ab661199821d510e" +dependencies = [ + "ahash", +] + +[[package]] +name = "heck" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8" + +[[package]] +name = "hermit-abi" +version = "0.2.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ee512640fe35acbfb4bb779db6f0d80704c2cacfa2e39b601ef3e3f47d1ae4c7" +dependencies = [ + "libc", +] + +[[package]] +name = "hermit-abi" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fed44880c466736ef9a5c5b5facefb5ed0785676d0c02d612db14e54f0d84286" + +[[package]] +name = "idna" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7d20d6b07bfbc108882d88ed8e37d39636dcc260e15e30c45e6ba089610b917c" +dependencies = [ + "unicode-bidi", + "unicode-normalization", +] + +[[package]] +name = "indexmap" +version = "1.9.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bd070e393353796e801d209ad339e89596eb4c8d430d18ede6a1cced8fafbd99" +dependencies = [ + "autocfg", + "hashbrown 0.12.3", +] + +[[package]] +name = "instant" +version = "0.1.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7a5bbe824c507c5da5956355e86a746d82e0e1464f65d862cc5e71da70e94b2c" +dependencies = [ + "cfg-if", +] + +[[package]] +name = "io-lifetimes" +version = "1.0.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eae7b9aee968036d54dce06cebaefd919e4472e753296daccd6d344e3e2df0c2" +dependencies = [ + "hermit-abi 0.3.1", + "libc", + "windows-sys 0.48.0", +] + +[[package]] +name = "is-terminal" +version = "0.4.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "adcf93614601c8129ddf72e2d5633df827ba6551541c6d8c59520a371475be1f" +dependencies = [ + "hermit-abi 0.3.1", + "io-lifetimes", + "rustix", + "windows-sys 0.48.0", +] + +[[package]] +name = "itertools" +version = "0.10.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b0fd2260e829bddf4cb6ea802289de2f86d6a7a690192fbe91b3f46e0f2c8473" +dependencies = [ + "either", +] + +[[package]] +name = "itoa" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "453ad9f582a441959e5f0d088b02ce04cfe8d51a8eaf077f12ac6d3e94164ca6" + +[[package]] +name = "js-sys" +version = "0.3.64" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c5f195fe497f702db0f318b07fdd68edb16955aed830df8363d837542f8f935a" +dependencies = [ + "wasm-bindgen", +] + +[[package]] +name = "lazy_static" +version = "1.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" + +[[package]] +name = "libc" +version = "0.2.146" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f92be4933c13fd498862a9e02a3055f8a8d9c039ce33db97306fd5a6caa7f29b" + +[[package]] +name = "linux-raw-sys" +version = "0.3.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ef53942eb7bf7ff43a617b3e2c1c4a5ecf5944a7c1bc12d7ee39bbb15e5c1519" + +[[package]] +name = "load_and_insert_memory_index" +version = "0.1.0" +dependencies = [ + "diskann", + "logger", + "vector", +] + +[[package]] +name = "log" +version = "0.4.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b06a4cde4c0f271a446782e3eff8de789548ce57dbc8eca9292c27f4a42004b4" + +[[package]] +name = "logger" +version = "0.1.0" +dependencies = [ + "lazy_static", + "log", + "once_cell", + "prost", + "prost-build", + "prost-types", + "thiserror", + "vcpkg", + "win_etw_macros", + "win_etw_provider", +] + +[[package]] +name = "memoffset" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a634b1c61a95585bd15607c6ab0c4e5b226e695ff2800ba0cdccddf208c406c" +dependencies = [ + "autocfg", +] + +[[package]] +name = "miniz_oxide" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e7810e0be55b428ada41041c41f32c9f1a42817901b4ccf45fa3d4b6561e74c7" +dependencies = [ + "adler", +] + +[[package]] +name = "multimap" +version = "0.8.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e5ce46fe64a9d73be07dcbe690a38ce1b293be448fd8ce1e6c1b8062c9f72c6a" + +[[package]] +name = "native-tls" +version = "0.2.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "07226173c32f2926027b63cce4bcd8076c3552846cbe7925f3aaffeac0a3b92e" +dependencies = [ + "lazy_static", + "libc", + "log", + "openssl", + "openssl-probe", + "openssl-sys", + "schannel", + "security-framework", + "security-framework-sys", + "tempfile", +] + +[[package]] +name = "num-complex" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "02e0d21255c828d6f128a1e41534206671e8c3ea0c62f32291e808dc82cff17d" +dependencies = [ + "num-traits", +] + +[[package]] +name = "num-traits" +version = "0.2.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "578ede34cf02f8924ab9447f50c28075b4d3e5b269972345e7e0372b38c6cdcd" +dependencies = [ + "autocfg", +] + +[[package]] +name = "num_cpus" +version = "1.15.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fac9e2da13b5eb447a6ce3d392f23a29d8694bff781bf03a16cd9ac8697593b" +dependencies = [ + "hermit-abi 0.2.6", + "libc", +] + +[[package]] +name = "once_cell" +version = "1.18.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dd8b5dd2ae5ed71462c540258bedcb51965123ad7e7ccf4b9a8cafaa4a63576d" + +[[package]] +name = "oorandom" +version = "11.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0ab1bc2a289d34bd04a330323ac98a1b4bc82c9d9fcb1e66b63caa84da26b575" + +[[package]] +name = "openblas-build" +version = "0.10.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eba42c395477605f400a8d79ee0b756cfb82abe3eb5618e35fa70d3a36010a7f" +dependencies = [ + "anyhow", + "flate2", + "native-tls", + "tar", + "thiserror", + "ureq", + "walkdir", +] + +[[package]] +name = "openblas-src" +version = "0.10.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "38e5d8af0b707ac2fe1574daa88b4157da73b0de3dc7c39fe3e2c0bb64070501" +dependencies = [ + "dirs", + "openblas-build", + "vcpkg", +] + +[[package]] +name = "openssl" +version = "0.10.60" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "79a4c6c3a2b158f7f8f2a2fc5a969fa3a068df6fc9dbb4a43845436e3af7c800" +dependencies = [ + "bitflags 2.4.1", + "cfg-if", + "foreign-types", + "libc", + "once_cell", + "openssl-macros", + "openssl-sys", +] + +[[package]] +name = "openssl-macros" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a948666b637a0f465e8564c73e89d4dde00d72d4d473cc972f390fc3dcee7d9c" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.18", +] + +[[package]] +name = "openssl-probe" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ff011a302c396a5197692431fc1948019154afc178baf7d8e37367442a4601cf" + +[[package]] +name = "openssl-sys" +version = "0.9.96" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3812c071ba60da8b5677cc12bcb1d42989a65553772897a7e0355545a819838f" +dependencies = [ + "cc", + "libc", + "pkg-config", + "vcpkg", +] + +[[package]] +name = "percent-encoding" +version = "2.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b2a4787296e9989611394c33f193f676704af1686e70b8f8033ab5ba9a35a94" + +[[package]] +name = "petgraph" +version = "0.6.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4dd7d28ee937e54fe3080c91faa1c3a46c06de6252988a7f4592ba2310ef22a4" +dependencies = [ + "fixedbitset", + "indexmap", +] + +[[package]] +name = "pkg-config" +version = "0.3.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "26072860ba924cbfa98ea39c8c19b4dd6a4a25423dbdf219c1eca91aa0cf6964" + +[[package]] +name = "platform" +version = "0.1.0" +dependencies = [ + "log", + "winapi", +] + +[[package]] +name = "plotters" +version = "0.3.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d2c224ba00d7cadd4d5c660deaf2098e5e80e07846537c51f9cfa4be50c1fd45" +dependencies = [ + "num-traits", + "plotters-backend", + "plotters-svg", + "wasm-bindgen", + "web-sys", +] + +[[package]] +name = "plotters-backend" +version = "0.3.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9e76628b4d3a7581389a35d5b6e2139607ad7c75b17aed325f210aa91f4a9609" + +[[package]] +name = "plotters-svg" +version = "0.3.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "38f6d39893cca0701371e3c27294f09797214b86f1fb951b89ade8ec04e2abab" +dependencies = [ + "plotters-backend", +] + +[[package]] +name = "ppv-lite86" +version = "0.2.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de" + +[[package]] +name = "prettyplease" +version = "0.1.25" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c8646e95016a7a6c4adea95bafa8a16baab64b583356217f2c85db4a39d9a86" +dependencies = [ + "proc-macro2", + "syn 1.0.109", +] + +[[package]] +name = "proc-macro2" +version = "1.0.60" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dec2b086b7a862cf4de201096214fa870344cf922b2b30c167badb3af3195406" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "prost" +version = "0.11.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b82eaa1d779e9a4bc1c3217db8ffbeabaae1dca241bf70183242128d48681cd" +dependencies = [ + "bytes", + "prost-derive", +] + +[[package]] +name = "prost-build" +version = "0.11.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "119533552c9a7ffacc21e099c24a0ac8bb19c2a2a3f363de84cd9b844feab270" +dependencies = [ + "bytes", + "heck", + "itertools", + "lazy_static", + "log", + "multimap", + "petgraph", + "prettyplease", + "prost", + "prost-types", + "regex", + "syn 1.0.109", + "tempfile", + "which", +] + +[[package]] +name = "prost-derive" +version = "0.11.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e5d2d8d10f3c6ded6da8b05b5fb3b8a5082514344d56c9f871412d29b4e075b4" +dependencies = [ + "anyhow", + "itertools", + "proc-macro2", + "quote", + "syn 1.0.109", +] + +[[package]] +name = "prost-types" +version = "0.11.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "213622a1460818959ac1181aaeb2dc9c7f63df720db7d788b3e24eacd1983e13" +dependencies = [ + "prost", +] + +[[package]] +name = "quote" +version = "1.0.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1b9ab9c7eadfd8df19006f1cf1a4aed13540ed5cbc047010ece5826e10825488" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "rand" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" +dependencies = [ + "libc", + "rand_chacha", + "rand_core", +] + +[[package]] +name = "rand_chacha" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" +dependencies = [ + "ppv-lite86", + "rand_core", +] + +[[package]] +name = "rand_core" +version = "0.6.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" +dependencies = [ + "getrandom", +] + +[[package]] +name = "rayon" +version = "1.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d2df5196e37bcc87abebc0053e20787d73847bb33134a69841207dd0a47f03b" +dependencies = [ + "either", + "rayon-core", +] + +[[package]] +name = "rayon-core" +version = "1.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4b8f95bd6966f5c87776639160a66bd8ab9895d9d4ab01ddba9fc60661aebe8d" +dependencies = [ + "crossbeam-channel", + "crossbeam-deque", + "crossbeam-utils", + "num_cpus", +] + +[[package]] +name = "redox_syscall" +version = "0.2.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fb5a58c1855b4b6819d59012155603f0b22ad30cad752600aadfcb695265519a" +dependencies = [ + "bitflags 1.3.2", +] + +[[package]] +name = "redox_syscall" +version = "0.3.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "567664f262709473930a4bf9e51bf2ebf3348f2e748ccc50dea20646858f8f29" +dependencies = [ + "bitflags 1.3.2", +] + +[[package]] +name = "redox_users" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b033d837a7cf162d7993aded9304e30a83213c648b6e389db233191f891e5c2b" +dependencies = [ + "getrandom", + "redox_syscall 0.2.16", + "thiserror", +] + +[[package]] +name = "regex" +version = "1.8.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d0ab3ca65655bb1e41f2a8c8cd662eb4fb035e67c3f78da1d61dffe89d07300f" +dependencies = [ + "regex-syntax", +] + +[[package]] +name = "regex-syntax" +version = "0.7.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "436b050e76ed2903236f032a59761c1eb99e1b0aead2c257922771dab1fc8c78" + +[[package]] +name = "rustix" +version = "0.37.25" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d4eb579851244c2c03e7c24f501c3432bed80b8f720af1d6e5b0e0f01555a035" +dependencies = [ + "bitflags 1.3.2", + "errno", + "io-lifetimes", + "libc", + "linux-raw-sys", + "windows-sys 0.48.0", +] + +[[package]] +name = "rustls-native-certs" +version = "0.6.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a9aace74cb666635c918e9c12bc0d348266037aa8eb599b5cba565709a8dff00" +dependencies = [ + "openssl-probe", + "rustls-pemfile", + "schannel", + "security-framework", +] + +[[package]] +name = "rustls-pemfile" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d194b56d58803a43635bdc398cd17e383d6f71f9182b9a192c127ca42494a59b" +dependencies = [ + "base64", +] + +[[package]] +name = "ryu" +version = "1.0.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f91339c0467de62360649f8d3e185ca8de4224ff281f66000de5eb2a77a79041" + +[[package]] +name = "same-file" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502" +dependencies = [ + "winapi-util", +] + +[[package]] +name = "schannel" +version = "0.1.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "713cfb06c7059f3588fb8044c0fad1d09e3c01d225e25b9220dbfdcf16dbb1b3" +dependencies = [ + "windows-sys 0.42.0", +] + +[[package]] +name = "scopeguard" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd" + +[[package]] +name = "search_memory_index" +version = "0.1.0" +dependencies = [ + "bytemuck", + "diskann", + "num_cpus", + "rayon", + "vector", +] + +[[package]] +name = "security-framework" +version = "2.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fc758eb7bffce5b308734e9b0c1468893cae9ff70ebf13e7090be8dcbcc83a8" +dependencies = [ + "bitflags 1.3.2", + "core-foundation", + "core-foundation-sys", + "libc", + "security-framework-sys", +] + +[[package]] +name = "security-framework-sys" +version = "2.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f51d0c0d83bec45f16480d0ce0058397a69e48fcdc52d1dc8855fb68acbd31a7" +dependencies = [ + "core-foundation-sys", + "libc", +] + +[[package]] +name = "serde" +version = "1.0.164" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9e8c8cf938e98f769bc164923b06dce91cea1751522f46f8466461af04c9027d" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.164" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d9735b638ccc51c28bf6914d90a2e9725b377144fc612c49a611fddd1b631d68" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.18", +] + +[[package]] +name = "serde_json" +version = "1.0.97" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bdf3bf93142acad5821c99197022e170842cdbc1c30482b98750c688c640842a" +dependencies = [ + "itoa", + "ryu", + "serde", +] + +[[package]] +name = "sha1_smol" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ae1a47186c03a32177042e55dbc5fd5aee900b8e0069a8d70fba96a9375cd012" + +[[package]] +name = "strsim" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623" + +[[package]] +name = "syn" +version = "1.0.109" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "syn" +version = "2.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32d41677bcbe24c20c52e7c70b0d8db04134c5d1066bf98662e2871ad200ea3e" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "tar" +version = "0.4.38" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4b55807c0344e1e6c04d7c965f5289c39a8d94ae23ed5c0b57aabac549f871c6" +dependencies = [ + "filetime", + "libc", + "xattr", +] + +[[package]] +name = "tempfile" +version = "3.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "31c0432476357e58790aaa47a8efb0c5138f137343f3b5f23bd36a27e3b0a6d6" +dependencies = [ + "autocfg", + "cfg-if", + "fastrand", + "redox_syscall 0.3.5", + "rustix", + "windows-sys 0.48.0", +] + +[[package]] +name = "thiserror" +version = "1.0.40" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "978c9a314bd8dc99be594bc3c175faaa9794be04a5a5e153caba6915336cebac" +dependencies = [ + "thiserror-impl", +] + +[[package]] +name = "thiserror-impl" +version = "1.0.40" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f9456a42c5b0d803c8cd86e73dd7cc9edd429499f37a3550d286d5e86720569f" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.18", +] + +[[package]] +name = "tinytemplate" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "be4d6b5f19ff7664e8c98d03e2139cb510db9b0a60b55f8e8709b689d939b6bc" +dependencies = [ + "serde", + "serde_json", +] + +[[package]] +name = "tinyvec" +version = "1.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "87cc5ceb3875bb20c2890005a4e226a4651264a5c75edb2421b52861a0a0cb50" +dependencies = [ + "tinyvec_macros", +] + +[[package]] +name = "tinyvec_macros" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" + +[[package]] +name = "unicode-bidi" +version = "0.3.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "92888ba5573ff080736b3648696b70cafad7d250551175acbaa4e0385b3e1460" + +[[package]] +name = "unicode-ident" +version = "1.0.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b15811caf2415fb889178633e7724bad2509101cde276048e013b9def5e51fa0" + +[[package]] +name = "unicode-normalization" +version = "0.1.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c5713f0fc4b5db668a2ac63cdb7bb4469d8c9fed047b1d0292cc7b0ce2ba921" +dependencies = [ + "tinyvec", +] + +[[package]] +name = "ureq" +version = "2.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b11c96ac7ee530603dcdf68ed1557050f374ce55a5a07193ebf8cbc9f8927e9" +dependencies = [ + "base64", + "flate2", + "log", + "native-tls", + "once_cell", + "rustls-native-certs", + "url", +] + +[[package]] +name = "url" +version = "2.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "50bff7831e19200a85b17131d085c25d7811bc4e186efdaf54bbd132994a88cb" +dependencies = [ + "form_urlencoded", + "idna", + "percent-encoding", +] + +[[package]] +name = "utf8parse" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "711b9620af191e0cdc7468a8d14e709c3dcdb115b36f838e601583af800a370a" + +[[package]] +name = "uuid" +version = "1.3.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fa2982af2eec27de306107c027578ff7f423d65f7250e40ce0fea8f45248b81" +dependencies = [ + "sha1_smol", +] + +[[package]] +name = "vcpkg" +version = "0.2.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426" + +[[package]] +name = "vector" +version = "0.1.0" +dependencies = [ + "approx", + "base64", + "bincode", + "bytemuck", + "cc", + "half 2.2.1", + "rand", + "serde", + "thiserror", +] + +[[package]] +name = "vector_base64" +version = "0.1.0" +dependencies = [ + "base64", + "bincode", + "half 2.2.1", + "serde", +] + +[[package]] +name = "version_check" +version = "0.9.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" + +[[package]] +name = "w32-error" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fa7c61a6bd91e168c12fc170985725340f6b458eb6f971d1cf6c34f74ffafb43" +dependencies = [ + "winapi", +] + +[[package]] +name = "walkdir" +version = "2.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "36df944cda56c7d8d8b7496af378e6b16de9284591917d307c9b4d313c44e698" +dependencies = [ + "same-file", + "winapi-util", +] + +[[package]] +name = "wasi" +version = "0.11.0+wasi-snapshot-preview1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" + +[[package]] +name = "wasm-bindgen" +version = "0.2.87" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7706a72ab36d8cb1f80ffbf0e071533974a60d0a308d01a5d0375bf60499a342" +dependencies = [ + "cfg-if", + "wasm-bindgen-macro", +] + +[[package]] +name = "wasm-bindgen-backend" +version = "0.2.87" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5ef2b6d3c510e9625e5fe6f509ab07d66a760f0885d858736483c32ed7809abd" +dependencies = [ + "bumpalo", + "log", + "once_cell", + "proc-macro2", + "quote", + "syn 2.0.18", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-macro" +version = "0.2.87" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dee495e55982a3bd48105a7b947fd2a9b4a8ae3010041b9e0faab3f9cd028f1d" +dependencies = [ + "quote", + "wasm-bindgen-macro-support", +] + +[[package]] +name = "wasm-bindgen-macro-support" +version = "0.2.87" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "54681b18a46765f095758388f2d0cf16eb8d4169b639ab575a8f5693af210c7b" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.18", + "wasm-bindgen-backend", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-shared" +version = "0.2.87" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ca6ad05a4870b2bf5fe995117d3728437bd27d7cd5f06f13c17443ef369775a1" + +[[package]] +name = "web-sys" +version = "0.3.64" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b85cbef8c220a6abc02aefd892dfc0fc23afb1c6a426316ec33253a3877249b" +dependencies = [ + "js-sys", + "wasm-bindgen", +] + +[[package]] +name = "which" +version = "4.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2441c784c52b289a054b7201fc93253e288f094e2f4be9058343127c4226a269" +dependencies = [ + "either", + "libc", + "once_cell", +] + +[[package]] +name = "widestring" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "653f141f39ec16bba3c5abe400a0c60da7468261cc2cbf36805022876bc721a8" + +[[package]] +name = "win_etw_macros" +version = "0.1.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f1bc4c591edb4858e3445f7a60c7e0a50915aedadfa044f28f17c98c145ef54d" +dependencies = [ + "proc-macro2", + "quote", + "sha1_smol", + "syn 1.0.109", + "uuid", + "win_etw_metadata", +] + +[[package]] +name = "win_etw_metadata" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e50d0fa665033a19ecefd281b4fb5481eba2972dedbb5ec129c9392a206d652f" +dependencies = [ + "bitflags 1.3.2", +] + +[[package]] +name = "win_etw_provider" +version = "0.1.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dffcc196e0e180e73a275a91f6914f173227fd627cabac3efdd8d6adec113892" +dependencies = [ + "w32-error", + "widestring", + "win_etw_metadata", + "winapi", + "zerocopy", +] + +[[package]] +name = "winapi" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" +dependencies = [ + "winapi-i686-pc-windows-gnu", + "winapi-x86_64-pc-windows-gnu", +] + +[[package]] +name = "winapi-i686-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" + +[[package]] +name = "winapi-util" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "70ec6ce85bb158151cae5e5c87f95a8e97d2c0c4b001223f33a334e3ce5de178" +dependencies = [ + "winapi", +] + +[[package]] +name = "winapi-x86_64-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" + +[[package]] +name = "windows-sys" +version = "0.42.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a3e1820f08b8513f676f7ab6c1f99ff312fb97b553d30ff4dd86f9f15728aa7" +dependencies = [ + "windows_aarch64_gnullvm 0.42.2", + "windows_aarch64_msvc 0.42.2", + "windows_i686_gnu 0.42.2", + "windows_i686_msvc 0.42.2", + "windows_x86_64_gnu 0.42.2", + "windows_x86_64_gnullvm 0.42.2", + "windows_x86_64_msvc 0.42.2", +] + +[[package]] +name = "windows-sys" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "677d2418bec65e3338edb076e806bc1ec15693c5d0104683f2efe857f61056a9" +dependencies = [ + "windows-targets", +] + +[[package]] +name = "windows-targets" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7b1eb6f0cd7c80c79759c929114ef071b87354ce476d9d94271031c0497adfd5" +dependencies = [ + "windows_aarch64_gnullvm 0.48.0", + "windows_aarch64_msvc 0.48.0", + "windows_i686_gnu 0.48.0", + "windows_i686_msvc 0.48.0", + "windows_x86_64_gnu 0.48.0", + "windows_x86_64_gnullvm 0.48.0", + "windows_x86_64_msvc 0.48.0", +] + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "597a5118570b68bc08d8d59125332c54f1ba9d9adeedeef5b99b02ba2b0698f8" + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "91ae572e1b79dba883e0d315474df7305d12f569b400fcf90581b06062f7e1bc" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e08e8864a60f06ef0d0ff4ba04124db8b0fb3be5776a5cd47641e942e58c4d43" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b2ef27e0d7bdfcfc7b868b317c1d32c641a6fe4629c171b8928c7b08d98d7cf3" + +[[package]] +name = "windows_i686_gnu" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c61d927d8da41da96a81f029489353e68739737d3beca43145c8afec9a31a84f" + +[[package]] +name = "windows_i686_gnu" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "622a1962a7db830d6fd0a69683c80a18fda201879f0f447f065a3b7467daa241" + +[[package]] +name = "windows_i686_msvc" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "44d840b6ec649f480a41c8d80f9c65108b92d89345dd94027bfe06ac444d1060" + +[[package]] +name = "windows_i686_msvc" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4542c6e364ce21bf45d69fdd2a8e455fa38d316158cfd43b3ac1c5b1b19f8e00" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8de912b8b8feb55c064867cf047dda097f92d51efad5b491dfb98f6bbb70cb36" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ca2b8a661f7628cbd23440e50b05d705db3686f894fc9580820623656af974b1" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "26d41b46a36d453748aedef1486d5c7a85db22e56aff34643984ea85514e94a3" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7896dbc1f41e08872e9d5e8f8baa8fdd2677f29468c4e156210174edc7f7b953" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9aec5da331524158c6d1a4ac0ab1541149c0b9505fde06423b02f5ef0106b9f0" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1a515f5799fe4961cb532f983ce2b23082366b898e52ffbce459c86f67c8378a" + +[[package]] +name = "xattr" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6d1526bbe5aaeb5eb06885f4d987bcdfa5e23187055de9b83fe00156a821fabc" +dependencies = [ + "libc", +] + +[[package]] +name = "zerocopy" +version = "0.6.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "854e949ac82d619ee9a14c66a1b674ac730422372ccb759ce0c39cabcf2bf8e6" +dependencies = [ + "byteorder", + "zerocopy-derive", +] + +[[package]] +name = "zerocopy-derive" +version = "0.6.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "125139de3f6b9d625c39e2efdd73d41bdac468ccd556556440e322be0e1bbd91" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.18", +] diff --git a/packages/leann-backend-diskann/third_party/DiskANN/rust/Cargo.toml b/packages/leann-backend-diskann/third_party/DiskANN/rust/Cargo.toml new file mode 100644 index 0000000..5236f96 --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/rust/Cargo.toml @@ -0,0 +1,23 @@ +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT license. + +[workspace] +members = [ + "cmd_drivers/build_memory_index", + "cmd_drivers/build_and_insert_memory_index", + "cmd_drivers/load_and_insert_memory_index", + "cmd_drivers/convert_f32_to_bf16", + "cmd_drivers/search_memory_index", + "cmd_drivers/build_disk_index", + "cmd_drivers/build_and_insert_delete_memory_index", + "vector", + "diskann", + "platform", + "logger", + "vector_base64" +] +resolver = "2" + +[profile.release] +opt-level = 3 +codegen-units=1 diff --git a/packages/leann-backend-diskann/third_party/DiskANN/rust/cmd_drivers/build_and_insert_delete_memory_index/Cargo.toml b/packages/leann-backend-diskann/third_party/DiskANN/rust/cmd_drivers/build_and_insert_delete_memory_index/Cargo.toml new file mode 100644 index 0000000..42aa185 --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/rust/cmd_drivers/build_and_insert_delete_memory_index/Cargo.toml @@ -0,0 +1,14 @@ +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT license. +[package] +name = "build_and_insert_delete_memory_index" +version = "0.1.0" +edition = "2021" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +diskann = { path = "../../diskann" } +logger = { path = "../../logger" } +vector = { path = "../../vector" } + diff --git a/packages/leann-backend-diskann/third_party/DiskANN/rust/cmd_drivers/build_and_insert_delete_memory_index/src/main.rs b/packages/leann-backend-diskann/third_party/DiskANN/rust/cmd_drivers/build_and_insert_delete_memory_index/src/main.rs new file mode 100644 index 0000000..4593a9e --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/rust/cmd_drivers/build_and_insert_delete_memory_index/src/main.rs @@ -0,0 +1,420 @@ +/* + * Copyright (c) Microsoft Corporation. All rights reserved. + * Licensed under the MIT license. + */ +use std::env; + +use diskann::{ + common::{ANNError, ANNResult}, + index::create_inmem_index, + model::{ + configuration::index_write_parameters::IndexWriteParametersBuilder, + vertex::{DIM_104, DIM_128, DIM_256}, + IndexConfiguration, + }, + utils::round_up, + utils::{file_exists, load_ids_to_delete_from_file, load_metadata_from_file, Timer}, +}; + +use vector::{FullPrecisionDistance, Half, Metric}; + +// The main function to build an in-memory index +#[allow(clippy::too_many_arguments)] +fn build_and_insert_delete_in_memory_index( + metric: Metric, + data_path: &str, + delta_path: &str, + r: u32, + l: u32, + alpha: f32, + save_path: &str, + num_threads: u32, + _use_pq_build: bool, + _num_pq_bytes: usize, + use_opq: bool, + delete_path: &str, +) -> ANNResult<()> +where + T: Default + Copy + Sync + Send + Into, + [T; DIM_104]: FullPrecisionDistance, + [T; DIM_128]: FullPrecisionDistance, + [T; DIM_256]: FullPrecisionDistance, +{ + let index_write_parameters = IndexWriteParametersBuilder::new(l, r) + .with_alpha(alpha) + .with_saturate_graph(false) + .with_num_threads(num_threads) + .build(); + + let (data_num, data_dim) = load_metadata_from_file(data_path)?; + + let config = IndexConfiguration::new( + metric, + data_dim, + round_up(data_dim as u64, 8_u64) as usize, + data_num, + false, + 0, + use_opq, + 0, + 2.0f32, + index_write_parameters, + ); + let mut index = create_inmem_index::(config)?; + + let timer = Timer::new(); + + index.build(data_path, data_num)?; + + let diff = timer.elapsed(); + + println!("Initial indexing time: {}", diff.as_secs_f64()); + + let (delta_data_num, _) = load_metadata_from_file(delta_path)?; + + index.insert(delta_path, delta_data_num)?; + + if !delete_path.is_empty() { + if !file_exists(delete_path) { + return Err(ANNError::log_index_error(format!( + "ERROR: Data file for delete {} does not exist.", + delete_path + ))); + } + + let (num_points_to_delete, vertex_ids_to_delete) = + load_ids_to_delete_from_file(delete_path)?; + index.soft_delete(vertex_ids_to_delete, num_points_to_delete)?; + } + + index.save(save_path)?; + + Ok(()) +} + +fn main() -> ANNResult<()> { + let mut data_type = String::new(); + let mut dist_fn = String::new(); + let mut data_path = String::new(); + let mut insert_path = String::new(); + let mut index_path_prefix = String::new(); + let mut delete_path = String::new(); + + let mut num_threads = 0u32; + let mut r = 64u32; + let mut l = 100u32; + + let mut alpha = 1.2f32; + let mut build_pq_bytes = 0u32; + let mut _use_pq_build = false; + let mut use_opq = false; + + let args: Vec = env::args().collect(); + let mut iter = args.iter().skip(1).peekable(); + + while let Some(arg) = iter.next() { + match arg.as_str() { + "--help" | "-h" => { + print_help(); + return Ok(()); + } + "--data_type" => { + data_type = iter + .next() + .ok_or_else(|| { + ANNError::log_index_config_error( + "data_type".to_string(), + "Missing data type".to_string(), + ) + })? + .to_owned(); + } + "--dist_fn" => { + dist_fn = iter + .next() + .ok_or_else(|| { + ANNError::log_index_config_error( + "dist_fn".to_string(), + "Missing distance function".to_string(), + ) + })? + .to_owned(); + } + "--data_path" => { + data_path = iter + .next() + .ok_or_else(|| { + ANNError::log_index_config_error( + "data_path".to_string(), + "Missing data path".to_string(), + ) + })? + .to_owned(); + } + "--insert_path" => { + insert_path = iter + .next() + .ok_or_else(|| { + ANNError::log_index_config_error( + "insert_path".to_string(), + "Missing insert path".to_string(), + ) + })? + .to_owned(); + } + "--index_path_prefix" => { + index_path_prefix = iter + .next() + .ok_or_else(|| { + ANNError::log_index_config_error( + "index_path_prefix".to_string(), + "Missing index path prefix".to_string(), + ) + })? + .to_owned(); + } + "--max_degree" | "-R" => { + r = iter + .next() + .ok_or_else(|| { + ANNError::log_index_config_error( + "max_degree".to_string(), + "Missing max degree".to_string(), + ) + })? + .parse() + .map_err(|err| { + ANNError::log_index_config_error( + "max_degree".to_string(), + format!("ParseIntError: {}", err), + ) + })?; + } + "--Lbuild" | "-L" => { + l = iter + .next() + .ok_or_else(|| { + ANNError::log_index_config_error( + "Lbuild".to_string(), + "Missing build complexity".to_string(), + ) + })? + .parse() + .map_err(|err| { + ANNError::log_index_config_error( + "Lbuild".to_string(), + format!("ParseIntError: {}", err), + ) + })?; + } + "--alpha" => { + alpha = iter + .next() + .ok_or_else(|| { + ANNError::log_index_config_error( + "alpha".to_string(), + "Missing alpha".to_string(), + ) + })? + .parse() + .map_err(|err| { + ANNError::log_index_config_error( + "alpha".to_string(), + format!("ParseFloatError: {}", err), + ) + })?; + } + "--num_threads" | "-T" => { + num_threads = iter + .next() + .ok_or_else(|| { + ANNError::log_index_config_error( + "num_threads".to_string(), + "Missing number of threads".to_string(), + ) + })? + .parse() + .map_err(|err| { + ANNError::log_index_config_error( + "num_threads".to_string(), + format!("ParseIntError: {}", err), + ) + })?; + } + "--build_PQ_bytes" => { + build_pq_bytes = iter + .next() + .ok_or_else(|| { + ANNError::log_index_config_error( + "build_PQ_bytes".to_string(), + "Missing PQ bytes".to_string(), + ) + })? + .parse() + .map_err(|err| { + ANNError::log_index_config_error( + "build_PQ_bytes".to_string(), + format!("ParseIntError: {}", err), + ) + })?; + } + "--use_opq" => { + use_opq = iter + .next() + .ok_or_else(|| { + ANNError::log_index_config_error( + "use_opq".to_string(), + "Missing use_opq flag".to_string(), + ) + })? + .parse() + .map_err(|err| { + ANNError::log_index_config_error( + "use_opq".to_string(), + format!("ParseBoolError: {}", err), + ) + })?; + } + "--delete_path" => { + delete_path = iter + .next() + .ok_or_else(|| { + ANNError::log_index_config_error( + "delete_path".to_string(), + "Missing delete_path".to_string(), + ) + })? + .parse() + .map_err(|err| { + ANNError::log_index_config_error( + "delete_set_path".to_string(), + format!("ParseStringError: {}", err), + ) + })?; + } + _ => { + return Err(ANNError::log_index_config_error( + String::from(""), + format!("Unknown argument: {}", arg), + )); + } + } + } + + if data_type.is_empty() + || dist_fn.is_empty() + || data_path.is_empty() + || index_path_prefix.is_empty() + { + return Err(ANNError::log_index_config_error( + String::from(""), + "Missing required arguments".to_string(), + )); + } + + _use_pq_build = build_pq_bytes > 0; + + let metric = dist_fn + .parse::() + .map_err(|err| ANNError::log_index_config_error("dist_fn".to_string(), err.to_string()))?; + + println!( + "Starting index build with R: {} Lbuild: {} alpha: {} #threads: {}", + r, l, alpha, num_threads + ); + + match data_type.as_str() { + "int8" => { + build_and_insert_delete_in_memory_index::( + metric, + &data_path, + &insert_path, + r, + l, + alpha, + &index_path_prefix, + num_threads, + _use_pq_build, + build_pq_bytes as usize, + use_opq, + &delete_path, + )?; + } + "uint8" => { + build_and_insert_delete_in_memory_index::( + metric, + &data_path, + &insert_path, + r, + l, + alpha, + &index_path_prefix, + num_threads, + _use_pq_build, + build_pq_bytes as usize, + use_opq, + &delete_path, + )?; + } + "float" => { + build_and_insert_delete_in_memory_index::( + metric, + &data_path, + &insert_path, + r, + l, + alpha, + &index_path_prefix, + num_threads, + _use_pq_build, + build_pq_bytes as usize, + use_opq, + &delete_path, + )?; + } + "f16" => { + build_and_insert_delete_in_memory_index::( + metric, + &data_path, + &insert_path, + r, + l, + alpha, + &index_path_prefix, + num_threads, + _use_pq_build, + build_pq_bytes as usize, + use_opq, + &delete_path, + )?; + } + _ => { + println!("Unsupported type. Use one of int8, uint8 or float."); + return Err(ANNError::log_index_config_error( + "data_type".to_string(), + "Invalid data type".to_string(), + )); + } + } + + Ok(()) +} + +fn print_help() { + println!("Arguments"); + println!("--help, -h Print information on arguments"); + println!("--data_type data type (required)"); + println!("--dist_fn distance function (required)"); + println!( + "--data_path Input data file in bin format for initial build (required)" + ); + println!("--insert_path Input data file in bin format for insert (required)"); + println!("--index_path_prefix Path prefix for saving index file components (required)"); + println!("--max_degree, -R Maximum graph degree (default: 64)"); + println!("--Lbuild, -L Build complexity, higher value results in better graphs (default: 100)"); + println!("--alpha alpha controls density and diameter of graph, set 1 for sparse graph, 1.2 or 1.4 for denser graphs with lower diameter (default: 1.2)"); + println!("--num_threads, -T Number of threads used for building index (defaults to num of CPU logic cores)"); + println!("--build_PQ_bytes Number of PQ bytes to build the index; 0 for full precision build (default: 0)"); + println!("--use_opq Set true for OPQ compression while using PQ distance comparisons for building the index, and false for PQ compression (default: false)"); +} + diff --git a/packages/leann-backend-diskann/third_party/DiskANN/rust/cmd_drivers/build_and_insert_memory_index/Cargo.toml b/packages/leann-backend-diskann/third_party/DiskANN/rust/cmd_drivers/build_and_insert_memory_index/Cargo.toml new file mode 100644 index 0000000..d9811fc --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/rust/cmd_drivers/build_and_insert_memory_index/Cargo.toml @@ -0,0 +1,14 @@ +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT license. +[package] +name = "build_and_insert_memory_index" +version = "0.1.0" +edition = "2021" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +diskann = { path = "../../diskann" } +logger = { path = "../../logger" } +vector = { path = "../../vector" } + diff --git a/packages/leann-backend-diskann/third_party/DiskANN/rust/cmd_drivers/build_and_insert_memory_index/src/main.rs b/packages/leann-backend-diskann/third_party/DiskANN/rust/cmd_drivers/build_and_insert_memory_index/src/main.rs new file mode 100644 index 0000000..46e4ba4 --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/rust/cmd_drivers/build_and_insert_memory_index/src/main.rs @@ -0,0 +1,382 @@ +/* + * Copyright (c) Microsoft Corporation. All rights reserved. + * Licensed under the MIT license. + */ +use std::env; + +use diskann::{ + common::{ANNResult, ANNError}, + index::create_inmem_index, + utils::round_up, + model::{ + IndexWriteParametersBuilder, + IndexConfiguration, + vertex::{DIM_128, DIM_256, DIM_104} + }, + utils::{load_metadata_from_file, Timer}, +}; + +use vector::{Metric, FullPrecisionDistance, Half}; + +// The main function to build an in-memory index +#[allow(clippy::too_many_arguments)] +fn build_and_insert_in_memory_index ( + metric: Metric, + data_path: &str, + delta_path: &str, + r: u32, + l: u32, + alpha: f32, + save_path: &str, + num_threads: u32, + _use_pq_build: bool, + _num_pq_bytes: usize, + use_opq: bool +) -> ANNResult<()> +where + T: Default + Copy + Sync + Send + Into, + [T; DIM_104]: FullPrecisionDistance, + [T; DIM_128]: FullPrecisionDistance, + [T; DIM_256]: FullPrecisionDistance +{ + let index_write_parameters = IndexWriteParametersBuilder::new(l, r) + .with_alpha(alpha) + .with_saturate_graph(false) + .with_num_threads(num_threads) + .build(); + + let (data_num, data_dim) = load_metadata_from_file(data_path)?; + + let config = IndexConfiguration::new( + metric, + data_dim, + round_up(data_dim as u64, 8_u64) as usize, + data_num, + false, + 0, + use_opq, + 0, + 2.0f32, + index_write_parameters, + ); + let mut index = create_inmem_index::(config)?; + + let timer = Timer::new(); + + index.build(data_path, data_num)?; + + let diff = timer.elapsed(); + + println!("Initial indexing time: {}", diff.as_secs_f64()); + + let (delta_data_num, _) = load_metadata_from_file(delta_path)?; + + index.insert(delta_path, delta_data_num)?; + + index.save(save_path)?; + + Ok(()) +} + +fn main() -> ANNResult<()> { + let mut data_type = String::new(); + let mut dist_fn = String::new(); + let mut data_path = String::new(); + let mut insert_path = String::new(); + let mut index_path_prefix = String::new(); + + let mut num_threads = 0u32; + let mut r = 64u32; + let mut l = 100u32; + + let mut alpha = 1.2f32; + let mut build_pq_bytes = 0u32; + let mut _use_pq_build = false; + let mut use_opq = false; + + let args: Vec = env::args().collect(); + let mut iter = args.iter().skip(1).peekable(); + + while let Some(arg) = iter.next() { + match arg.as_str() { + "--help" | "-h" => { + print_help(); + return Ok(()); + } + "--data_type" => { + data_type = iter + .next() + .ok_or_else(|| { + ANNError::log_index_config_error( + "data_type".to_string(), + "Missing data type".to_string(), + ) + })? + .to_owned(); + } + "--dist_fn" => { + dist_fn = iter + .next() + .ok_or_else(|| { + ANNError::log_index_config_error( + "dist_fn".to_string(), + "Missing distance function".to_string(), + ) + })? + .to_owned(); + } + "--data_path" => { + data_path = iter + .next() + .ok_or_else(|| { + ANNError::log_index_config_error( + "data_path".to_string(), + "Missing data path".to_string(), + ) + })? + .to_owned(); + } + "--insert_path" => { + insert_path = iter + .next() + .ok_or_else(|| { + ANNError::log_index_config_error( + "insert_path".to_string(), + "Missing insert path".to_string(), + ) + })? + .to_owned(); + } + "--index_path_prefix" => { + index_path_prefix = iter + .next() + .ok_or_else(|| { + ANNError::log_index_config_error( + "index_path_prefix".to_string(), + "Missing index path prefix".to_string(), + ) + })? + .to_owned(); + } + "--max_degree" | "-R" => { + r = iter + .next() + .ok_or_else(|| { + ANNError::log_index_config_error( + "max_degree".to_string(), + "Missing max degree".to_string(), + ) + })? + .parse() + .map_err(|err| { + ANNError::log_index_config_error( + "max_degree".to_string(), + format!("ParseIntError: {}", err), + ) + })?; + } + "--Lbuild" | "-L" => { + l = iter + .next() + .ok_or_else(|| { + ANNError::log_index_config_error( + "Lbuild".to_string(), + "Missing build complexity".to_string(), + ) + })? + .parse() + .map_err(|err| { + ANNError::log_index_config_error( + "Lbuild".to_string(), + format!("ParseIntError: {}", err), + ) + })?; + } + "--alpha" => { + alpha = iter + .next() + .ok_or_else(|| { + ANNError::log_index_config_error( + "alpha".to_string(), + "Missing alpha".to_string(), + ) + })? + .parse() + .map_err(|err| { + ANNError::log_index_config_error( + "alpha".to_string(), + format!("ParseFloatError: {}", err), + ) + })?; + } + "--num_threads" | "-T" => { + num_threads = iter + .next() + .ok_or_else(|| { + ANNError::log_index_config_error( + "num_threads".to_string(), + "Missing number of threads".to_string(), + ) + })? + .parse() + .map_err(|err| { + ANNError::log_index_config_error( + "num_threads".to_string(), + format!("ParseIntError: {}", err), + ) + })?; + } + "--build_PQ_bytes" => { + build_pq_bytes = iter + .next() + .ok_or_else(|| { + ANNError::log_index_config_error( + "build_PQ_bytes".to_string(), + "Missing PQ bytes".to_string(), + ) + })? + .parse() + .map_err(|err| { + ANNError::log_index_config_error( + "build_PQ_bytes".to_string(), + format!("ParseIntError: {}", err), + ) + })?; + } + "--use_opq" => { + use_opq = iter + .next() + .ok_or_else(|| { + ANNError::log_index_config_error( + "use_opq".to_string(), + "Missing use_opq flag".to_string(), + ) + })? + .parse() + .map_err(|err| { + ANNError::log_index_config_error( + "use_opq".to_string(), + format!("ParseBoolError: {}", err), + ) + })?; + } + _ => { + return Err(ANNError::log_index_config_error( + String::from(""), + format!("Unknown argument: {}", arg), + )); + } + } + } + + if data_type.is_empty() + || dist_fn.is_empty() + || data_path.is_empty() + || index_path_prefix.is_empty() + { + return Err(ANNError::log_index_config_error( + String::from(""), + "Missing required arguments".to_string(), + )); + } + + _use_pq_build = build_pq_bytes > 0; + + let metric = dist_fn + .parse::() + .map_err(|err| ANNError::log_index_config_error( + "dist_fn".to_string(), + err.to_string(), + ))?; + + println!( + "Starting index build with R: {} Lbuild: {} alpha: {} #threads: {}", + r, l, alpha, num_threads + ); + + match data_type.as_str() { + "int8" => { + build_and_insert_in_memory_index::( + metric, + &data_path, + &insert_path, + r, + l, + alpha, + &index_path_prefix, + num_threads, + _use_pq_build, + build_pq_bytes as usize, + use_opq, + )?; + } + "uint8" => { + build_and_insert_in_memory_index::( + metric, + &data_path, + &insert_path, + r, + l, + alpha, + &index_path_prefix, + num_threads, + _use_pq_build, + build_pq_bytes as usize, + use_opq, + )?; + } + "float" => { + build_and_insert_in_memory_index::( + metric, + &data_path, + &insert_path, + r, + l, + alpha, + &index_path_prefix, + num_threads, + _use_pq_build, + build_pq_bytes as usize, + use_opq, + )?; + } + "f16" => { + build_and_insert_in_memory_index::( + metric, + &data_path, + &insert_path, + r, + l, + alpha, + &index_path_prefix, + num_threads, + _use_pq_build, + build_pq_bytes as usize, + use_opq, + )?; + } + _ => { + println!("Unsupported type. Use one of int8, uint8 or float."); + return Err(ANNError::log_index_config_error("data_type".to_string(), "Invalid data type".to_string())); + } + } + + Ok(()) +} + +fn print_help() { + println!("Arguments"); + println!("--help, -h Print information on arguments"); + println!("--data_type data type (required)"); + println!("--dist_fn distance function (required)"); + println!("--data_path Input data file in bin format for initial build (required)"); + println!("--insert_path Input data file in bin format for insert (required)"); + println!("--index_path_prefix Path prefix for saving index file components (required)"); + println!("--max_degree, -R Maximum graph degree (default: 64)"); + println!("--Lbuild, -L Build complexity, higher value results in better graphs (default: 100)"); + println!("--alpha alpha controls density and diameter of graph, set 1 for sparse graph, 1.2 or 1.4 for denser graphs with lower diameter (default: 1.2)"); + println!("--num_threads, -T Number of threads used for building index (defaults to num of CPU logic cores)"); + println!("--build_PQ_bytes Number of PQ bytes to build the index; 0 for full precision build (default: 0)"); + println!("--use_opq Set true for OPQ compression while using PQ distance comparisons for building the index, and false for PQ compression (default: false)"); +} + diff --git a/packages/leann-backend-diskann/third_party/DiskANN/rust/cmd_drivers/build_disk_index/Cargo.toml b/packages/leann-backend-diskann/third_party/DiskANN/rust/cmd_drivers/build_disk_index/Cargo.toml new file mode 100644 index 0000000..afe5e5b --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/rust/cmd_drivers/build_disk_index/Cargo.toml @@ -0,0 +1,14 @@ +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT license. +[package] +name = "build_disk_index" +version = "0.1.0" +edition = "2021" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +diskann = { path = "../../diskann" } +logger = { path = "../../logger" } +vector = { path = "../../vector" } +openblas-src = { version = "0.10.8", features = ["system", "static"] } diff --git a/packages/leann-backend-diskann/third_party/DiskANN/rust/cmd_drivers/build_disk_index/src/main.rs b/packages/leann-backend-diskann/third_party/DiskANN/rust/cmd_drivers/build_disk_index/src/main.rs new file mode 100644 index 0000000..e0b6dbe --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/rust/cmd_drivers/build_disk_index/src/main.rs @@ -0,0 +1,377 @@ +/* + * Copyright (c) Microsoft Corporation. All rights reserved. + * Licensed under the MIT license. + */ +use std::env; + +use diskann::{ + common::{ANNError, ANNResult}, + index::ann_disk_index::create_disk_index, + model::{ + default_param_vals::ALPHA, + vertex::{DIM_104, DIM_128, DIM_256}, + DiskIndexBuildParameters, IndexConfiguration, IndexWriteParametersBuilder, + }, + storage::DiskIndexStorage, + utils::round_up, + utils::{load_metadata_from_file, Timer}, +}; + +use vector::{FullPrecisionDistance, Half, Metric}; + +/// The main function to build a disk index +#[allow(clippy::too_many_arguments)] +fn build_disk_index( + metric: Metric, + data_path: &str, + r: u32, + l: u32, + index_path_prefix: &str, + num_threads: u32, + search_ram_limit_gb: f64, + index_build_ram_limit_gb: f64, + num_pq_chunks: usize, + use_opq: bool, +) -> ANNResult<()> +where + T: Default + Copy + Sync + Send + Into, + [T; DIM_104]: FullPrecisionDistance, + [T; DIM_128]: FullPrecisionDistance, + [T; DIM_256]: FullPrecisionDistance, +{ + let disk_index_build_parameters = + DiskIndexBuildParameters::new(search_ram_limit_gb, index_build_ram_limit_gb)?; + + let index_write_parameters = IndexWriteParametersBuilder::new(l, r) + .with_saturate_graph(true) + .with_num_threads(num_threads) + .build(); + + let (data_num, data_dim) = load_metadata_from_file(data_path)?; + + let config = IndexConfiguration::new( + metric, + data_dim, + round_up(data_dim as u64, 8_u64) as usize, + data_num, + num_pq_chunks > 0, + num_pq_chunks, + use_opq, + 0, + 1f32, + index_write_parameters, + ); + let storage = DiskIndexStorage::new(data_path.to_string(), index_path_prefix.to_string())?; + let mut index = create_disk_index::(Some(disk_index_build_parameters), config, storage)?; + + let timer = Timer::new(); + + index.build("")?; + + let diff = timer.elapsed(); + println!("Indexing time: {}", diff.as_secs_f64()); + + Ok(()) +} + +fn main() -> ANNResult<()> { + let mut data_type = String::new(); + let mut dist_fn = String::new(); + let mut data_path = String::new(); + let mut index_path_prefix = String::new(); + + let mut num_threads = 0u32; + let mut r = 64u32; + let mut l = 100u32; + let mut search_ram_limit_gb = 0f64; + let mut index_build_ram_limit_gb = 0f64; + + let mut build_pq_bytes = 0u32; + let mut use_opq = false; + + let args: Vec = env::args().collect(); + let mut iter = args.iter().skip(1).peekable(); + + while let Some(arg) = iter.next() { + match arg.as_str() { + "--help" | "-h" => { + print_help(); + return Ok(()); + } + "--data_type" => { + data_type = iter + .next() + .ok_or_else(|| { + ANNError::log_index_config_error( + "data_type".to_string(), + "Missing data type".to_string(), + ) + })? + .to_owned(); + } + "--dist_fn" => { + dist_fn = iter + .next() + .ok_or_else(|| { + ANNError::log_index_config_error( + "dist_fn".to_string(), + "Missing distance function".to_string(), + ) + })? + .to_owned(); + } + "--data_path" => { + data_path = iter + .next() + .ok_or_else(|| { + ANNError::log_index_config_error( + "data_path".to_string(), + "Missing data path".to_string(), + ) + })? + .to_owned(); + } + "--index_path_prefix" => { + index_path_prefix = iter + .next() + .ok_or_else(|| { + ANNError::log_index_config_error( + "index_path_prefix".to_string(), + "Missing index path prefix".to_string(), + ) + })? + .to_owned(); + } + "--max_degree" | "-R" => { + r = iter + .next() + .ok_or_else(|| { + ANNError::log_index_config_error( + "max_degree".to_string(), + "Missing max degree".to_string(), + ) + })? + .parse() + .map_err(|err| { + ANNError::log_index_config_error( + "max_degree".to_string(), + format!("ParseIntError: {}", err), + ) + })?; + } + "--Lbuild" | "-L" => { + l = iter + .next() + .ok_or_else(|| { + ANNError::log_index_config_error( + "Lbuild".to_string(), + "Missing build complexity".to_string(), + ) + })? + .parse() + .map_err(|err| { + ANNError::log_index_config_error( + "Lbuild".to_string(), + format!("ParseIntError: {}", err), + ) + })?; + } + "--num_threads" | "-T" => { + num_threads = iter + .next() + .ok_or_else(|| { + ANNError::log_index_config_error( + "num_threads".to_string(), + "Missing number of threads".to_string(), + ) + })? + .parse() + .map_err(|err| { + ANNError::log_index_config_error( + "num_threads".to_string(), + format!("ParseIntError: {}", err), + ) + })?; + } + "--build_PQ_bytes" => { + build_pq_bytes = iter + .next() + .ok_or_else(|| { + ANNError::log_index_config_error( + "build_PQ_bytes".to_string(), + "Missing PQ bytes".to_string(), + ) + })? + .parse() + .map_err(|err| { + ANNError::log_index_config_error( + "build_PQ_bytes".to_string(), + format!("ParseIntError: {}", err), + ) + })?; + } + "--use_opq" => { + use_opq = iter + .next() + .ok_or_else(|| { + ANNError::log_index_config_error( + "use_opq".to_string(), + "Missing use_opq flag".to_string(), + ) + })? + .parse() + .map_err(|err| { + ANNError::log_index_config_error( + "use_opq".to_string(), + format!("ParseBoolError: {}", err), + ) + })?; + } + "--search_DRAM_budget" | "-B" => { + search_ram_limit_gb = iter + .next() + .ok_or_else(|| { + ANNError::log_index_config_error( + "search_DRAM_budget".to_string(), + "Missing search_DRAM_budget flag".to_string(), + ) + })? + .parse() + .map_err(|err| { + ANNError::log_index_config_error( + "search_DRAM_budget".to_string(), + format!("ParseBoolError: {}", err), + ) + })?; + } + "--build_DRAM_budget" | "-M" => { + index_build_ram_limit_gb = iter + .next() + .ok_or_else(|| { + ANNError::log_index_config_error( + "build_DRAM_budget".to_string(), + "Missing build_DRAM_budget flag".to_string(), + ) + })? + .parse() + .map_err(|err| { + ANNError::log_index_config_error( + "build_DRAM_budget".to_string(), + format!("ParseBoolError: {}", err), + ) + })?; + } + _ => { + return Err(ANNError::log_index_config_error( + String::from(""), + format!("Unknown argument: {}", arg), + )); + } + } + } + + if data_type.is_empty() + || dist_fn.is_empty() + || data_path.is_empty() + || index_path_prefix.is_empty() + { + return Err(ANNError::log_index_config_error( + String::from(""), + "Missing required arguments".to_string(), + )); + } + + let metric = dist_fn + .parse::() + .map_err(|err| ANNError::log_index_config_error("dist_fn".to_string(), err.to_string()))?; + + println!( + "Starting index build with R: {} Lbuild: {} alpha: {} #threads: {} search_DRAM_budget: {} build_DRAM_budget: {}", + r, l, ALPHA, num_threads, search_ram_limit_gb, index_build_ram_limit_gb + ); + + let err = match data_type.as_str() { + "int8" => build_disk_index::( + metric, + &data_path, + r, + l, + &index_path_prefix, + num_threads, + search_ram_limit_gb, + index_build_ram_limit_gb, + build_pq_bytes as usize, + use_opq, + ), + "uint8" => build_disk_index::( + metric, + &data_path, + r, + l, + &index_path_prefix, + num_threads, + search_ram_limit_gb, + index_build_ram_limit_gb, + build_pq_bytes as usize, + use_opq, + ), + "float" => build_disk_index::( + metric, + &data_path, + r, + l, + &index_path_prefix, + num_threads, + search_ram_limit_gb, + index_build_ram_limit_gb, + build_pq_bytes as usize, + use_opq, + ), + "f16" => build_disk_index::( + metric, + &data_path, + r, + l, + &index_path_prefix, + num_threads, + search_ram_limit_gb, + index_build_ram_limit_gb, + build_pq_bytes as usize, + use_opq, + ), + _ => { + println!("Unsupported type. Use one of int8, uint8, float or f16."); + return Err(ANNError::log_index_config_error( + "data_type".to_string(), + "Invalid data type".to_string(), + )); + } + }; + + match err { + Ok(_) => { + println!("Index build completed successfully"); + Ok(()) + } + Err(err) => { + eprintln!("Error: {:?}", err); + Err(err) + } + } +} + +fn print_help() { + println!("Arguments"); + println!("--help, -h Print information on arguments"); + println!("--data_type data type (required)"); + println!("--dist_fn distance function (required)"); + println!("--data_path Input data file in bin format (required)"); + println!("--index_path_prefix Path prefix for saving index file components (required)"); + println!("--max_degree, -R Maximum graph degree (default: 64)"); + println!("--Lbuild, -L Build complexity, higher value results in better graphs (default: 100)"); + println!("--search_DRAM_budget Bound on the memory footprint of the index at search time in GB. Once built, the index will use up only the specified RAM limit, the rest will reside on disk"); + println!("--build_DRAM_budget Limit on the memory allowed for building the index in GB"); + println!("--num_threads, -T Number of threads used for building index (defaults to num of CPU logic cores)"); + println!("--build_PQ_bytes Number of PQ bytes to build the index; 0 for full precision build (default: 0)"); + println!("--use_opq Set true for OPQ compression while using PQ distance comparisons for building the index, and false for PQ compression (default: false)"); +} diff --git a/packages/leann-backend-diskann/third_party/DiskANN/rust/cmd_drivers/build_memory_index/Cargo.toml b/packages/leann-backend-diskann/third_party/DiskANN/rust/cmd_drivers/build_memory_index/Cargo.toml new file mode 100644 index 0000000..eb4708d --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/rust/cmd_drivers/build_memory_index/Cargo.toml @@ -0,0 +1,15 @@ +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT license. +[package] +name = "build_memory_index" +version = "0.1.0" +edition = "2021" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +clap = { version = "4.3.8", features = ["derive"] } +diskann = { path = "../../diskann" } +logger = { path = "../../logger" } +vector = { path = "../../vector" } + diff --git a/packages/leann-backend-diskann/third_party/DiskANN/rust/cmd_drivers/build_memory_index/src/args.rs b/packages/leann-backend-diskann/third_party/DiskANN/rust/cmd_drivers/build_memory_index/src/args.rs new file mode 100644 index 0000000..ede31f2 --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/rust/cmd_drivers/build_memory_index/src/args.rs @@ -0,0 +1,62 @@ +use clap::{Args, Parser}; + +#[derive(Debug, Args)] +enum DataType { + /// Float data type. + Float, + + /// Half data type. + FP16, +} + +#[derive(Debug, Args)] +enum DistanceFunction { + /// Euclidean distance. + L2, + + /// Cosine distance. + Cosine, +} + +#[derive(Debug, Parser)] +struct BuildMemoryIndexArgs { + /// Data type of the vectors. + #[clap(long, default_value = "float")] + pub data_type: DataType, + + /// Distance function to use. + #[clap(long, default_value = "l2")] + pub dist_fn: Metric, + + /// Path to the data file. The file should be in the format specified by the `data_type` argument. + #[clap(long, short, required = true)] + pub data_path: String, + + /// Path to the index file. The index will be saved to this prefixed name. + #[clap(long, short, required = true)] + pub index_path_prefix: String, + + /// Number of max out degree from a vertex. + #[clap(long, default_value = "32")] + pub max_degree: usize, + + /// Number of candidates to consider when building out edges + #[clap(long, short default_value = "50")] + pub l_build: usize, + + /// Alpha to use to build diverse edges + #[clap(long, short default_value = "1.0")] + pub alpha: f32, + + /// Number of threads to use. + #[clap(long, short, default_value = "1")] + pub num_threads: u8, + + /// Number of PQ bytes to use. + #[clap(long, short, default_value = "8")] + pub build_pq_bytes: usize, + + /// Use opq? + #[clap(long, short, default_value = "false")] + pub use_opq: bool, +} diff --git a/packages/leann-backend-diskann/third_party/DiskANN/rust/cmd_drivers/build_memory_index/src/main.rs b/packages/leann-backend-diskann/third_party/DiskANN/rust/cmd_drivers/build_memory_index/src/main.rs new file mode 100644 index 0000000..cdccc00 --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/rust/cmd_drivers/build_memory_index/src/main.rs @@ -0,0 +1,174 @@ +/* + * Copyright (c) Microsoft Corporation. All rights reserved. + * Licensed under the MIT license. + */ +use clap::{Parser, ValueEnum}; +use std::path::PathBuf; + +use diskann::{ + common::ANNResult, + index::create_inmem_index, + model::{ + vertex::{DIM_104, DIM_128, DIM_256}, + IndexConfiguration, IndexWriteParametersBuilder, + }, + utils::round_up, + utils::{load_metadata_from_file, Timer}, +}; + +use vector::{FullPrecisionDistance, Half, Metric}; + +/// The main function to build an in-memory index +#[allow(clippy::too_many_arguments)] +fn build_in_memory_index( + metric: Metric, + data_path: &str, + r: u32, + l: u32, + alpha: f32, + save_path: &str, + num_threads: u32, + _use_pq_build: bool, + _num_pq_bytes: usize, + use_opq: bool, +) -> ANNResult<()> +where + T: Default + Copy + Sync + Send + Into, + [T; DIM_104]: FullPrecisionDistance, + [T; DIM_128]: FullPrecisionDistance, + [T; DIM_256]: FullPrecisionDistance, +{ + let index_write_parameters = IndexWriteParametersBuilder::new(l, r) + .with_alpha(alpha) + .with_saturate_graph(false) + .with_num_threads(num_threads) + .build(); + + let (data_num, data_dim) = load_metadata_from_file(data_path)?; + + let config = IndexConfiguration::new( + metric, + data_dim, + round_up(data_dim as u64, 8_u64) as usize, + data_num, + false, + 0, + use_opq, + 0, + 1f32, + index_write_parameters, + ); + let mut index = create_inmem_index::(config)?; + + let timer = Timer::new(); + + index.build(data_path, data_num)?; + + let diff = timer.elapsed(); + + println!("Indexing time: {}", diff.as_secs_f64()); + index.save(save_path)?; + + Ok(()) +} + +fn main() -> ANNResult<()> { + let args = BuildMemoryIndexArgs::parse(); + + let _use_pq_build = args.build_pq_bytes > 0; + + println!( + "Starting index build with R: {} Lbuild: {} alpha: {} #threads: {}", + args.max_degree, args.l_build, args.alpha, args.num_threads + ); + + let err = match args.data_type { + DataType::Float => build_in_memory_index::( + args.dist_fn, + &args.data_path.to_string_lossy(), + args.max_degree, + args.l_build, + args.alpha, + &args.index_path_prefix, + args.num_threads, + _use_pq_build, + args.build_pq_bytes, + args.use_opq, + ), + DataType::FP16 => build_in_memory_index::( + args.dist_fn, + &args.data_path.to_string_lossy(), + args.max_degree, + args.l_build, + args.alpha, + &args.index_path_prefix, + args.num_threads, + _use_pq_build, + args.build_pq_bytes, + args.use_opq, + ), + }; + + match err { + Ok(_) => { + println!("Index build completed successfully"); + Ok(()) + } + Err(err) => { + eprintln!("Error: {:?}", err); + Err(err) + } + } +} + +#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, ValueEnum, Debug)] +enum DataType { + /// Float data type. + Float, + + /// Half data type. + FP16, +} + +#[derive(Debug, Parser)] +struct BuildMemoryIndexArgs { + /// data type (required) + #[arg(long = "data_type", default_value = "float")] + pub data_type: DataType, + + /// Distance function to use. + #[arg(long = "dist_fn", default_value = "l2")] + pub dist_fn: Metric, + + /// Path to the data file. The file should be in the format specified by the `data_type` argument. + #[arg(long = "data_path", short, required = true)] + pub data_path: PathBuf, + + /// Path to the index file. The index will be saved to this prefixed name. + #[arg(long = "index_path_prefix", short, required = true)] + pub index_path_prefix: String, + + /// Number of max out degree from a vertex. + #[arg(long = "max_degree", short = 'R', default_value = "64")] + pub max_degree: u32, + + /// Number of candidates to consider when building out edges + #[arg(long = "l_build", short = 'L', default_value = "100")] + pub l_build: u32, + + /// alpha controls density and diameter of graph, set 1 for sparse graph, 1.2 or 1.4 for denser graphs with lower diameter + #[arg(long, short, default_value = "1.2")] + pub alpha: f32, + + /// Number of threads to use. + #[arg(long = "num_threads", short = 'T', default_value = "1")] + pub num_threads: u32, + + /// Number of PQ bytes to build the index; 0 for full precision build + #[arg(long = "build_pq_bytes", short, default_value = "0")] + pub build_pq_bytes: usize, + + /// Set true for OPQ compression while using PQ distance comparisons for building the index, and false for PQ compression + #[arg(long = "use_opq", short, default_value = "false")] + pub use_opq: bool, +} diff --git a/packages/leann-backend-diskann/third_party/DiskANN/rust/cmd_drivers/convert_f32_to_bf16/Cargo.toml b/packages/leann-backend-diskann/third_party/DiskANN/rust/cmd_drivers/convert_f32_to_bf16/Cargo.toml new file mode 100644 index 0000000..1993aab --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/rust/cmd_drivers/convert_f32_to_bf16/Cargo.toml @@ -0,0 +1,11 @@ +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT license. +[package] +name = "convert_f32_to_bf16" +version = "0.1.0" +edition = "2021" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +half = "2.2.1" diff --git a/packages/leann-backend-diskann/third_party/DiskANN/rust/cmd_drivers/convert_f32_to_bf16/src/main.rs b/packages/leann-backend-diskann/third_party/DiskANN/rust/cmd_drivers/convert_f32_to_bf16/src/main.rs new file mode 100644 index 0000000..87b4fba --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/rust/cmd_drivers/convert_f32_to_bf16/src/main.rs @@ -0,0 +1,154 @@ +/* + * Copyright (c) Microsoft Corporation. All rights reserved. + * Licensed under the MIT license. + */ +use half::{bf16, f16}; +use std::env; +use std::fs::{File, OpenOptions}; +use std::io::{self, Read, Write, BufReader, BufWriter}; + +enum F16OrBF16 { + F16(f16), + BF16(bf16), +} + +fn main() -> io::Result<()> { + // Retrieve command-line arguments + let args: Vec = env::args().collect(); + + match args.len() { + 3|4|5|6=> {}, + _ => { + print_usage(); + std::process::exit(1); + } + } + + // Retrieve the input and output file paths from the arguments + let input_file_path = &args[1]; + let output_file_path = &args[2]; + let use_f16 = args.len() >= 4 && args[3] == "f16"; + let save_as_float = args.len() >= 5 && args[4] == "save_as_float"; + let batch_size = if args.len() >= 6 { args[5].parse::().unwrap() } else { 100000 }; + println!("use_f16: {}", use_f16); + println!("save_as_float: {}", save_as_float); + println!("batch_size: {}", batch_size); + + // Open the input file for reading + let mut input_file = BufReader::new(File::open(input_file_path)?); + + // Open the output file for writing + let mut output_file = BufWriter::new(OpenOptions::new().write(true).create(true).open(output_file_path)?); + + // Read the first 8 bytes as metadata + let mut metadata = [0; 8]; + input_file.read_exact(&mut metadata)?; + + // Write the metadata to the output file + output_file.write_all(&metadata)?; + + // Extract the number of points and dimension from the metadata + let num_points = i32::from_le_bytes(metadata[..4].try_into().unwrap()); + let dimension = i32::from_le_bytes(metadata[4..].try_into().unwrap()); + let num_batches = num_points / batch_size; + // Calculate the size of one data point in bytes + let data_point_size = (dimension * 4 * batch_size) as usize; + let mut batches_processed = 0; + let numbers_to_print = 2; + let mut numbers_printed = 0; + let mut num_fb16_wins = 0; + let mut num_f16_wins = 0; + let mut bf16_overflow = 0; + let mut f16_overflow = 0; + + // Process each data point + for _ in 0..num_batches { + // Read one data point from the input file + let mut buffer = vec![0; data_point_size]; + match input_file.read_exact(&mut buffer){ + Ok(()) => { + // Convert the float32 data to bf16 + let half_data: Vec = buffer + .chunks_exact(4) + .map(|chunk| { + let value = f32::from_le_bytes([chunk[0], chunk[1], chunk[2], chunk[3]]); + let converted_bf16 = bf16::from_f32(value); + let converted_f16 = f16::from_f32(value); + let distance_f16 = (converted_f16.to_f32() - value).abs(); + let distance_bf16 = (converted_bf16.to_f32() - value).abs(); + + if distance_f16 < distance_bf16 { + num_f16_wins += 1; + } else { + num_fb16_wins += 1; + } + + if (converted_bf16 == bf16::INFINITY) || (converted_bf16 == bf16::NEG_INFINITY) { + bf16_overflow += 1; + } + + if (converted_f16 == f16::INFINITY) || (converted_f16 == f16::NEG_INFINITY) { + f16_overflow += 1; + } + + if numbers_printed < numbers_to_print { + numbers_printed += 1; + println!("f32 value: {} f16 value: {} | distance {}, bf16 value: {} | distance {},", + value, converted_f16, converted_f16.to_f32() - value, converted_bf16, converted_bf16.to_f32() - value); + } + + if use_f16 { + F16OrBF16::F16(converted_f16) + } else { + F16OrBF16::BF16(converted_bf16) + } + }) + .collect(); + + batches_processed += 1; + + match save_as_float { + true => { + for float_val in half_data { + match float_val { + F16OrBF16::F16(f16_val) => output_file.write_all(&f16_val.to_f32().to_le_bytes())?, + F16OrBF16::BF16(bf16_val) => output_file.write_all(&bf16_val.to_f32().to_le_bytes())?, + } + } + } + false => { + for float_val in half_data { + match float_val { + F16OrBF16::F16(f16_val) => output_file.write_all(&f16_val.to_le_bytes())?, + F16OrBF16::BF16(bf16_val) => output_file.write_all(&bf16_val.to_le_bytes())?, + } + } + } + } + + // Print the number of points processed + println!("Processed {} points out of {}", batches_processed * batch_size, num_points); + } + Err(ref e) if e.kind() == io::ErrorKind::UnexpectedEof => { + println!("Conversion completed! {} of times f16 wins | overflow count {}, {} of times bf16 wins | overflow count{}", + num_f16_wins, f16_overflow, num_fb16_wins, bf16_overflow); + break; + } + Err(err) => { + println!("Error: {}", err); + break; + } + }; + } + + Ok(()) +} + +/// Prints the usage information +fn print_usage() { + println!("Usage: program_name input_file output_file [f16] [save_as_float] [batch_size]]"); + println!("specify f16 to downscale to f16. otherwise, downscale to bf16."); + println!("specify save_as_float to downcast to f16 or bf16, and upcast to float before saving the output data. otherwise, the data will be saved as half type."); + println!("specify the batch_size as a int, the default value is 100000."); +} + diff --git a/packages/leann-backend-diskann/third_party/DiskANN/rust/cmd_drivers/load_and_insert_memory_index/Cargo.toml b/packages/leann-backend-diskann/third_party/DiskANN/rust/cmd_drivers/load_and_insert_memory_index/Cargo.toml new file mode 100644 index 0000000..cbb4e1e --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/rust/cmd_drivers/load_and_insert_memory_index/Cargo.toml @@ -0,0 +1,14 @@ +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT license. +[package] +name = "load_and_insert_memory_index" +version = "0.1.0" +edition = "2021" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +diskann = { path = "../../diskann" } +logger = { path = "../../logger" } +vector = { path = "../../vector" } + diff --git a/packages/leann-backend-diskann/third_party/DiskANN/rust/cmd_drivers/load_and_insert_memory_index/src/main.rs b/packages/leann-backend-diskann/third_party/DiskANN/rust/cmd_drivers/load_and_insert_memory_index/src/main.rs new file mode 100644 index 0000000..4168046 --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/rust/cmd_drivers/load_and_insert_memory_index/src/main.rs @@ -0,0 +1,313 @@ +/* + * Copyright (c) Microsoft Corporation. All rights reserved. + * Licensed under the MIT license. + */ +use std::env; + +use diskann::{ + common::{ANNResult, ANNError}, + index::create_inmem_index, + utils::round_up, + model::{ + IndexWriteParametersBuilder, + IndexConfiguration, + vertex::{DIM_128, DIM_256, DIM_104} + }, + utils::{Timer, load_metadata_from_file}, +}; + +use vector::{Metric, FullPrecisionDistance, Half}; + +// The main function to build an in-memory index +#[allow(clippy::too_many_arguments)] +fn load_and_insert_in_memory_index ( + metric: Metric, + data_path: &str, + delta_path: &str, + r: u32, + l: u32, + alpha: f32, + save_path: &str, + num_threads: u32, + _use_pq_build: bool, + _num_pq_bytes: usize, + use_opq: bool +) -> ANNResult<()> +where + T: Default + Copy + Sync + Send + Into, + [T; DIM_104]: FullPrecisionDistance, + [T; DIM_128]: FullPrecisionDistance, + [T; DIM_256]: FullPrecisionDistance +{ + let index_write_parameters = IndexWriteParametersBuilder::new(l, r) + .with_alpha(alpha) + .with_saturate_graph(false) + .with_num_threads(num_threads) + .build(); + + let (data_num, data_dim) = load_metadata_from_file(&format!("{}.data", data_path))?; + + let config = IndexConfiguration::new( + metric, + data_dim, + round_up(data_dim as u64, 8_u64) as usize, + data_num, + false, + 0, + use_opq, + 0, + 2.0f32, + index_write_parameters, + ); + let mut index = create_inmem_index::(config)?; + + let timer = Timer::new(); + + index.load(data_path, data_num)?; + + let diff = timer.elapsed(); + + println!("Initial indexing time: {}", diff.as_secs_f64()); + + let (delta_data_num, _) = load_metadata_from_file(delta_path)?; + + index.insert(delta_path, delta_data_num)?; + + index.save(save_path)?; + + Ok(()) +} + +fn main() -> ANNResult<()> { + let mut data_type = String::new(); + let mut dist_fn = String::new(); + let mut data_path = String::new(); + let mut insert_path = String::new(); + let mut index_path_prefix = String::new(); + + let mut num_threads = 0u32; + let mut r = 64u32; + let mut l = 100u32; + + let mut alpha = 1.2f32; + let mut build_pq_bytes = 0u32; + let mut _use_pq_build = false; + let mut use_opq = false; + + let args: Vec = env::args().collect(); + let mut iter = args.iter().skip(1).peekable(); + + while let Some(arg) = iter.next() { + match arg.as_str() { + "--help" | "-h" => { + print_help(); + return Ok(()); + } + "--data_type" => { + data_type = iter.next().ok_or_else(|| ANNError::log_index_config_error( + "data_type".to_string(), + "Missing data type".to_string()) + )? + .to_owned(); + } + "--dist_fn" => { + dist_fn = iter.next().ok_or_else(|| ANNError::log_index_config_error( + "dist_fn".to_string(), + "Missing distance function".to_string()) + )? + .to_owned(); + } + "--data_path" => { + data_path = iter.next().ok_or_else(|| ANNError::log_index_config_error( + "data_path".to_string(), + "Missing data path".to_string()) + )? + .to_owned(); + } + "--insert_path" => { + insert_path = iter.next().ok_or_else(|| ANNError::log_index_config_error( + "insert_path".to_string(), + "Missing insert path".to_string()) + )? + .to_owned(); + } + "--index_path_prefix" => { + index_path_prefix = iter.next().ok_or_else(|| ANNError::log_index_config_error( + "index_path_prefix".to_string(), + "Missing index path prefix".to_string()))? + .to_owned(); + } + "--max_degree" | "-R" => { + r = iter.next().ok_or_else(|| ANNError::log_index_config_error( + "max_degree".to_string(), + "Missing max degree".to_string()))? + .parse() + .map_err(|err| ANNError::log_index_config_error( + "max_degree".to_string(), + format!("ParseIntError: {}", err)) + )?; + } + "--Lbuild" | "-L" => { + l = iter.next().ok_or_else(|| ANNError::log_index_config_error( + "Lbuild".to_string(), + "Missing build complexity".to_string()))? + .parse() + .map_err(|err| ANNError::log_index_config_error( + "Lbuild".to_string(), + format!("ParseIntError: {}", err)) + )?; + } + "--alpha" => { + alpha = iter.next().ok_or_else(|| ANNError::log_index_config_error( + "alpha".to_string(), + "Missing alpha".to_string()))? + .parse() + .map_err(|err| ANNError::log_index_config_error( + "alpha".to_string(), + format!("ParseFloatError: {}", err)) + )?; + } + "--num_threads" | "-T" => { + num_threads = iter.next().ok_or_else(|| ANNError::log_index_config_error( + "num_threads".to_string(), + "Missing number of threads".to_string()))? + .parse() + .map_err(|err| ANNError::log_index_config_error( + "num_threads".to_string(), + format!("ParseIntError: {}", err)) + )?; + } + "--build_PQ_bytes" => { + build_pq_bytes = iter.next().ok_or_else(|| ANNError::log_index_config_error( + "build_PQ_bytes".to_string(), + "Missing PQ bytes".to_string()))? + .parse() + .map_err(|err| ANNError::log_index_config_error( + "build_PQ_bytes".to_string(), + format!("ParseIntError: {}", err)) + )?; + } + "--use_opq" => { + use_opq = iter.next().ok_or_else(|| ANNError::log_index_config_error( + "use_opq".to_string(), + "Missing use_opq flag".to_string()))? + .parse() + .map_err(|err| ANNError::log_index_config_error( + "use_opq".to_string(), + format!("ParseBoolError: {}", err)) + )?; + } + _ => { + return Err(ANNError::log_index_config_error(String::from(""), format!("Unknown argument: {}", arg))); + } + } + } + + if data_type.is_empty() + || dist_fn.is_empty() + || data_path.is_empty() + || index_path_prefix.is_empty() + { + return Err(ANNError::log_index_config_error(String::from(""), "Missing required arguments".to_string())); + } + + _use_pq_build = build_pq_bytes > 0; + + let metric = dist_fn + .parse::() + .map_err(|err| ANNError::log_index_config_error( + "dist_fn".to_string(), + err.to_string(), + ))?; + + println!( + "Starting index build with R: {} Lbuild: {} alpha: {} #threads: {}", + r, l, alpha, num_threads + ); + + match data_type.as_str() { + "int8" => { + load_and_insert_in_memory_index::( + metric, + &data_path, + &insert_path, + r, + l, + alpha, + &index_path_prefix, + num_threads, + _use_pq_build, + build_pq_bytes as usize, + use_opq, + )?; + } + "uint8" => { + load_and_insert_in_memory_index::( + metric, + &data_path, + &insert_path, + r, + l, + alpha, + &index_path_prefix, + num_threads, + _use_pq_build, + build_pq_bytes as usize, + use_opq, + )?; + } + "float" => { + load_and_insert_in_memory_index::( + metric, + &data_path, + &insert_path, + r, + l, + alpha, + &index_path_prefix, + num_threads, + _use_pq_build, + build_pq_bytes as usize, + use_opq, + )?; + } + "f16" => { + load_and_insert_in_memory_index::( + metric, + &data_path, + &insert_path, + r, + l, + alpha, + &index_path_prefix, + num_threads, + _use_pq_build, + build_pq_bytes as usize, + use_opq, + )? + } + _ => { + println!("Unsupported type. Use one of int8, uint8 or float."); + return Err(ANNError::log_index_config_error("data_type".to_string(), "Invalid data type".to_string())); + } + } + + Ok(()) +} + +fn print_help() { + println!("Arguments"); + println!("--help, -h Print information on arguments"); + println!("--data_type data type (required)"); + println!("--dist_fn distance function (required)"); + println!("--data_path Input data file in bin format for initial build (required)"); + println!("--insert_path Input data file in bin format for insert (required)"); + println!("--index_path_prefix Path prefix for saving index file components (required)"); + println!("--max_degree, -R Maximum graph degree (default: 64)"); + println!("--Lbuild, -L Build complexity, higher value results in better graphs (default: 100)"); + println!("--alpha alpha controls density and diameter of graph, set 1 for sparse graph, 1.2 or 1.4 for denser graphs with lower diameter (default: 1.2)"); + println!("--num_threads, -T Number of threads used for building index (defaults to num of CPU logic cores)"); + println!("--build_PQ_bytes Number of PQ bytes to build the index; 0 for full precision build (default: 0)"); + println!("--use_opq Set true for OPQ compression while using PQ distance comparisons for building the index, and false for PQ compression (default: false)"); +} + diff --git a/packages/leann-backend-diskann/third_party/DiskANN/rust/cmd_drivers/search_memory_index/Cargo.toml b/packages/leann-backend-diskann/third_party/DiskANN/rust/cmd_drivers/search_memory_index/Cargo.toml new file mode 100644 index 0000000..cba3709 --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/rust/cmd_drivers/search_memory_index/Cargo.toml @@ -0,0 +1,16 @@ +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT license. +[package] +name = "search_memory_index" +version = "0.1.0" +edition = "2021" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +bytemuck = "1.13.1" +diskann = { path = "../../diskann" } +num_cpus = "1.15.0" +rayon = "1.7.0" +vector = { path = "../../vector" } + diff --git a/packages/leann-backend-diskann/third_party/DiskANN/rust/cmd_drivers/search_memory_index/src/main.rs b/packages/leann-backend-diskann/third_party/DiskANN/rust/cmd_drivers/search_memory_index/src/main.rs new file mode 100644 index 0000000..ca4d4cd --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/rust/cmd_drivers/search_memory_index/src/main.rs @@ -0,0 +1,430 @@ +/* + * Copyright (c) Microsoft Corporation. All rights reserved. + * Licensed under the MIT license. + */ +mod search_index_utils; +use bytemuck::Pod; +use diskann::{ + common::{ANNError, ANNResult}, + index, + model::{ + configuration::index_write_parameters::{default_param_vals, IndexWriteParametersBuilder}, + vertex::{DIM_104, DIM_128, DIM_256}, + IndexConfiguration, + }, + utils::{load_metadata_from_file, save_bin_u32}, +}; +use std::{env, path::Path, process::exit, time::Instant}; +use vector::{FullPrecisionDistance, Half, Metric}; + +use rayon::prelude::*; + +#[allow(clippy::too_many_arguments)] +fn search_memory_index( + metric: Metric, + index_path: &str, + result_path_prefix: &str, + query_file: &str, + truthset_file: &str, + num_threads: u32, + recall_at: u32, + print_all_recalls: bool, + l_vec: &Vec, + show_qps_per_thread: bool, + fail_if_recall_below: f32, +) -> ANNResult +where + T: Default + Copy + Sized + Pod + Sync + Send + Into, + [T; DIM_104]: FullPrecisionDistance, + [T; DIM_128]: FullPrecisionDistance, + [T; DIM_256]: FullPrecisionDistance, +{ + // Load the query file + let (query, query_num, query_dim, query_aligned_dim) = + search_index_utils::load_aligned_bin::(query_file)?; + let mut gt_dim: usize = 0; + let mut gt_ids: Option> = None; + let mut gt_dists: Option> = None; + + // Check for ground truth + let mut calc_recall_flag = false; + if !truthset_file.is_empty() && Path::new(truthset_file).exists() { + let ret = search_index_utils::load_truthset(truthset_file)?; + gt_ids = Some(ret.0); + gt_dists = ret.1; + let gt_num = ret.2; + gt_dim = ret.3; + + if gt_num != query_num { + println!("Error. Mismatch in number of queries and ground truth data"); + } + + calc_recall_flag = true; + } else { + println!( + "Truthset file {} not found. Not computing recall", + truthset_file + ); + } + + let num_frozen_pts = search_index_utils::get_graph_num_frozen_points(index_path)?; + + // C++ uses the max given L value, so we do the same here. Max degree is never specified in C++ so use the rust default + let index_write_params = IndexWriteParametersBuilder::new( + *l_vec.iter().max().unwrap(), + default_param_vals::MAX_DEGREE, + ) + .with_num_threads(num_threads) + .build(); + + let (index_num_points, _) = load_metadata_from_file(&format!("{}.data", index_path))?; + + let index_config = IndexConfiguration::new( + metric, + query_dim, + query_aligned_dim, + index_num_points, + false, + 0, + false, + num_frozen_pts, + 1f32, + index_write_params, + ); + let mut index = index::create_inmem_index::(index_config)?; + + index.load(index_path, index_num_points)?; + + println!("Using {} threads to search", num_threads); + let qps_title = if show_qps_per_thread { + "QPS/thread" + } else { + "QPS" + }; + let mut table_width = 4 + 12 + 18 + 20 + 15; + let mut table_header_str = format!( + "{:>4}{:>12}{:>18}{:>20}{:>15}", + "Ls", qps_title, "Avg dist cmps", "Mean Latency (mus)", "99.9 Latency" + ); + + let first_recall: u32 = if print_all_recalls { 1 } else { recall_at }; + let mut recalls_to_print: usize = 0; + if calc_recall_flag { + for curr_recall in first_recall..=recall_at { + let recall_str = format!("Recall@{}", curr_recall); + table_header_str.push_str(&format!("{:>12}", recall_str)); + recalls_to_print = (recall_at + 1 - first_recall) as usize; + table_width += recalls_to_print * 12; + } + } + + println!("{}", table_header_str); + println!("{}", "=".repeat(table_width)); + + let mut query_result_ids: Vec> = + vec![vec![0; query_num * recall_at as usize]; l_vec.len()]; + let mut latency_stats: Vec = vec![0.0; query_num]; + let mut cmp_stats: Vec = vec![0; query_num]; + let mut best_recall = 0.0; + + std::env::set_var("RAYON_NUM_THREADS", num_threads.to_string()); + + for test_id in 0..l_vec.len() { + let l_value = l_vec[test_id]; + + if l_value < recall_at { + println!( + "Ignoring search with L:{} since it's smaller than K:{}", + l_value, recall_at + ); + continue; + } + + let zipped = cmp_stats + .par_iter_mut() + .zip(latency_stats.par_iter_mut()) + .zip(query_result_ids[test_id].par_chunks_mut(recall_at as usize)) + .zip(query.par_chunks(query_aligned_dim)); + + let start = Instant::now(); + zipped.for_each(|(((cmp, latency), query_result), query_chunk)| { + let query_start = Instant::now(); + *cmp = index + .search(query_chunk, recall_at as usize, l_value, query_result) + .unwrap(); + + let query_end = Instant::now(); + let diff = query_end.duration_since(query_start); + *latency = diff.as_micros() as f32; + }); + let diff = Instant::now().duration_since(start); + + let mut displayed_qps: f32 = query_num as f32 / diff.as_secs_f32(); + if show_qps_per_thread { + displayed_qps /= num_threads as f32; + } + + let mut recalls: Vec = Vec::new(); + if calc_recall_flag { + recalls.reserve(recalls_to_print); + for curr_recall in first_recall..=recall_at { + recalls.push(search_index_utils::calculate_recall( + query_num, + gt_ids.as_ref().unwrap(), + >_dists, + gt_dim, + &query_result_ids[test_id], + recall_at, + curr_recall, + )? as f32); + } + } + + latency_stats.sort_by(|a, b| a.partial_cmp(b).unwrap()); + let mean_latency = latency_stats.iter().sum::() / query_num as f32; + let avg_cmps = cmp_stats.iter().sum::() as f32 / query_num as f32; + + let mut stat_str = format!( + "{: >4}{: >12.2}{: >18.2}{: >20.2}{: >15.2}", + l_value, + displayed_qps, + avg_cmps, + mean_latency, + latency_stats[(0.999 * query_num as f32).round() as usize] + ); + + for recall in recalls.iter() { + stat_str.push_str(&format!("{: >12.2}", recall)); + best_recall = f32::max(best_recall, *recall); + } + + println!("{}", stat_str); + } + + println!("Done searching. Now saving results"); + for (test_id, l_value) in l_vec.iter().enumerate() { + if *l_value < recall_at { + println!( + "Ignoring all search with L: {} since it's smaller than K: {}", + l_value, recall_at + ); + } + + let cur_result_path = format!("{}_{}_idx_uint32.bin", result_path_prefix, l_value); + save_bin_u32( + &cur_result_path, + query_result_ids[test_id].as_slice(), + query_num, + recall_at as usize, + 0, + )?; + } + + if best_recall >= fail_if_recall_below { + Ok(0) + } else { + Ok(-1) + } +} + +fn main() -> ANNResult<()> { + let return_val: i32; + { + let mut data_type: String = String::new(); + let mut metric: Option = None; + let mut index_path: String = String::new(); + let mut result_path_prefix: String = String::new(); + let mut query_file: String = String::new(); + let mut truthset_file: String = String::new(); + let mut num_cpus: u32 = num_cpus::get() as u32; + let mut recall_at: Option = None; + let mut print_all_recalls: bool = false; + let mut l_vec: Vec = Vec::new(); + let mut show_qps_per_thread: bool = false; + let mut fail_if_recall_below: f32 = 0.0; + + let args: Vec = env::args().collect(); + let mut iter = args.iter().skip(1).peekable(); + while let Some(arg) = iter.next() { + let ann_error = + || ANNError::log_index_config_error(String::from(arg), format!("Missing {}", arg)); + match arg.as_str() { + "--help" | "-h" => { + print_help(); + return Ok(()); + } + "--data_type" => { + data_type = iter.next().ok_or_else(ann_error)?.to_owned(); + } + "--dist_fn" => { + metric = Some(iter.next().ok_or_else(ann_error)?.parse().map_err(|err| { + ANNError::log_index_config_error( + String::from(arg), + format!("ParseError: {}", err), + ) + })?); + } + "--index_path_prefix" => { + index_path = iter.next().ok_or_else(ann_error)?.to_owned(); + } + "--result_path" => { + result_path_prefix = iter.next().ok_or_else(ann_error)?.to_owned(); + } + "--query_file" => { + query_file = iter.next().ok_or_else(ann_error)?.to_owned(); + } + "--gt_file" => { + truthset_file = iter.next().ok_or_else(ann_error)?.to_owned(); + } + "--recall_at" | "-K" => { + recall_at = + Some(iter.next().ok_or_else(ann_error)?.parse().map_err(|err| { + ANNError::log_index_config_error( + String::from(arg), + format!("ParseError: {}", err), + ) + })?); + } + "--print_all_recalls" => { + print_all_recalls = true; + } + "--search_list" | "-L" => { + while iter.peek().is_some() && !iter.peek().unwrap().starts_with('-') { + l_vec.push(iter.next().ok_or_else(ann_error)?.parse().map_err(|err| { + ANNError::log_index_config_error( + String::from(arg), + format!("ParseError: {}", err), + ) + })?); + } + } + "--num_threads" => { + num_cpus = iter.next().ok_or_else(ann_error)?.parse().map_err(|err| { + ANNError::log_index_config_error( + String::from(arg), + format!("ParseError: {}", err), + ) + })?; + } + "--qps_per_thread" => { + show_qps_per_thread = true; + } + "--fail_if_recall_below" => { + fail_if_recall_below = + iter.next().ok_or_else(ann_error)?.parse().map_err(|err| { + ANNError::log_index_config_error( + String::from(arg), + format!("ParseError: {}", err), + ) + })?; + } + _ => { + return Err(ANNError::log_index_error(format!( + "Unknown argument: {}", + arg + ))); + } + } + } + + if metric.is_none() { + return Err(ANNError::log_index_error(String::from("No metric given!"))); + } else if recall_at.is_none() { + return Err(ANNError::log_index_error(String::from( + "No recall_at given!", + ))); + } + + // Seems like float is the only supported data type for FullPrecisionDistance right now, + // but keep the structure in place here for future data types + match data_type.as_str() { + "float" => { + return_val = search_memory_index::( + metric.unwrap(), + &index_path, + &result_path_prefix, + &query_file, + &truthset_file, + num_cpus, + recall_at.unwrap(), + print_all_recalls, + &l_vec, + show_qps_per_thread, + fail_if_recall_below, + )?; + } + "int8" => { + return_val = search_memory_index::( + metric.unwrap(), + &index_path, + &result_path_prefix, + &query_file, + &truthset_file, + num_cpus, + recall_at.unwrap(), + print_all_recalls, + &l_vec, + show_qps_per_thread, + fail_if_recall_below, + )?; + } + "uint8" => { + return_val = search_memory_index::( + metric.unwrap(), + &index_path, + &result_path_prefix, + &query_file, + &truthset_file, + num_cpus, + recall_at.unwrap(), + print_all_recalls, + &l_vec, + show_qps_per_thread, + fail_if_recall_below, + )?; + } + "f16" => { + return_val = search_memory_index::( + metric.unwrap(), + &index_path, + &result_path_prefix, + &query_file, + &truthset_file, + num_cpus, + recall_at.unwrap(), + print_all_recalls, + &l_vec, + show_qps_per_thread, + fail_if_recall_below, + )?; + } + _ => { + return Err(ANNError::log_index_error(format!( + "Unknown data type: {}!", + data_type + ))); + } + } + } + + // Rust only allows returning values with this method, but this will immediately terminate the program without running destructors on the + // stack. To get around this enclose main function logic in a block so that by the time we return here all destructors have been called. + exit(return_val); +} + +fn print_help() { + println!("Arguments"); + println!("--help, -h Print information on arguments"); + println!("--data_type data type (required)"); + println!("--dist_fn distance function (required)"); + println!("--index_path_prefix Path prefix to the index (required)"); + println!("--result_path Path prefix for saving results of the queries (required)"); + println!("--query_file Query file in binary format"); + println!("--gt_file Ground truth file for the queryset"); + println!("--recall_at, -K Number of neighbors to be returned"); + println!("--print_all_recalls Print recalls at all positions, from 1 up to specified recall_at value"); + println!("--search_list List of L values of search"); + println!("----num_threads, -T Number of threads used for building index (defaults to num_cpus::get())"); + println!("--qps_per_thread Print overall QPS divided by the number of threads in the output table"); + println!("--fail_if_recall_below If set to a value >0 and <100%, program returns -1 if best recall found is below this threshold"); +} diff --git a/packages/leann-backend-diskann/third_party/DiskANN/rust/cmd_drivers/search_memory_index/src/search_index_utils.rs b/packages/leann-backend-diskann/third_party/DiskANN/rust/cmd_drivers/search_memory_index/src/search_index_utils.rs new file mode 100644 index 0000000..c7b04a4 --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/rust/cmd_drivers/search_memory_index/src/search_index_utils.rs @@ -0,0 +1,186 @@ +/* + * Copyright (c) Microsoft Corporation. All rights reserved. + * Licensed under the MIT license. + */ +use bytemuck::{cast_slice, Pod}; +use diskann::{ + common::{ANNError, ANNResult, AlignedBoxWithSlice}, + model::data_store::DatasetDto, + utils::{copy_aligned_data_from_file, is_aligned, round_up}, +}; +use std::collections::HashSet; +use std::fs::File; +use std::io::Read; +use std::mem::size_of; + +pub(crate) fn calculate_recall( + num_queries: usize, + gold_std: &[u32], + gs_dist: &Option>, + dim_gs: usize, + our_results: &[u32], + dim_or: u32, + recall_at: u32, +) -> ANNResult { + let mut total_recall: f64 = 0.0; + let (mut gt, mut res): (HashSet, HashSet) = (HashSet::new(), HashSet::new()); + + for i in 0..num_queries { + gt.clear(); + res.clear(); + + let gt_slice = &gold_std[dim_gs * i..]; + let res_slice = &our_results[dim_or as usize * i..]; + let mut tie_breaker = recall_at as usize; + + if gs_dist.is_some() { + tie_breaker = (recall_at - 1) as usize; + let gt_dist_vec = &gs_dist.as_ref().unwrap()[dim_gs * i..]; + while tie_breaker < dim_gs + && gt_dist_vec[tie_breaker] == gt_dist_vec[(recall_at - 1) as usize] + { + tie_breaker += 1; + } + } + + (0..tie_breaker).for_each(|idx| { + gt.insert(gt_slice[idx]); + }); + + (0..tie_breaker).for_each(|idx| { + res.insert(res_slice[idx]); + }); + + let mut cur_recall: u32 = 0; + for v in gt.iter() { + if res.contains(v) { + cur_recall += 1; + } + } + + total_recall += cur_recall as f64; + } + + Ok(total_recall / num_queries as f64 * (100.0 / recall_at as f64)) +} + +pub(crate) fn get_graph_num_frozen_points(graph_file: &str) -> ANNResult { + let mut file = File::open(graph_file)?; + let mut usize_buffer = [0; size_of::()]; + let mut u32_buffer = [0; size_of::()]; + + file.read_exact(&mut usize_buffer)?; + file.read_exact(&mut u32_buffer)?; + file.read_exact(&mut u32_buffer)?; + file.read_exact(&mut usize_buffer)?; + let file_frozen_pts = usize::from_le_bytes(usize_buffer); + + Ok(file_frozen_pts) +} + +#[inline] +pub(crate) fn load_truthset( + bin_file: &str, +) -> ANNResult<(Vec, Option>, usize, usize)> { + let mut file = File::open(bin_file)?; + let actual_file_size = file.metadata()?.len() as usize; + + let mut buffer = [0; size_of::()]; + file.read_exact(&mut buffer)?; + let npts = i32::from_le_bytes(buffer) as usize; + + file.read_exact(&mut buffer)?; + let dim = i32::from_le_bytes(buffer) as usize; + + println!("Metadata: #pts = {npts}, #dims = {dim}... "); + + let expected_file_size_with_dists: usize = + 2 * npts * dim * size_of::() + 2 * size_of::(); + let expected_file_size_just_ids: usize = npts * dim * size_of::() + 2 * size_of::(); + + let truthset_type : i32 = match actual_file_size + { + // This is in the C++ code, but nothing is done in this case. Keeping it here for future reference just in case. + // expected_file_size_just_ids => 2, + x if x == expected_file_size_with_dists => 1, + _ => return Err(ANNError::log_index_error(format!("Error. File size mismatch. File should have bin format, with npts followed by ngt + followed by npts*ngt ids and optionally followed by npts*ngt distance values; actual size: {}, expected: {} or {}", + actual_file_size, + expected_file_size_with_dists, + expected_file_size_just_ids))) + }; + + let mut ids: Vec = vec![0; npts * dim]; + let mut buffer = vec![0; npts * dim * size_of::()]; + file.read_exact(&mut buffer)?; + ids.clone_from_slice(cast_slice::(&buffer)); + + if truthset_type == 1 { + let mut dists: Vec = vec![0.0; npts * dim]; + let mut buffer = vec![0; npts * dim * size_of::()]; + file.read_exact(&mut buffer)?; + dists.clone_from_slice(cast_slice::(&buffer)); + + return Ok((ids, Some(dists), npts, dim)); + } + + Ok((ids, None, npts, dim)) +} + +#[inline] +pub(crate) fn load_aligned_bin( + bin_file: &str, +) -> ANNResult<(AlignedBoxWithSlice, usize, usize, usize)> { + let t_size = size_of::(); + let (npts, dim, file_size): (usize, usize, usize); + { + println!("Reading (with alignment) bin file: {bin_file}"); + let mut file = File::open(bin_file)?; + file_size = file.metadata()?.len() as usize; + + let mut buffer = [0; size_of::()]; + file.read_exact(&mut buffer)?; + npts = i32::from_le_bytes(buffer) as usize; + + file.read_exact(&mut buffer)?; + dim = i32::from_le_bytes(buffer) as usize; + } + + let rounded_dim = round_up(dim, 8); + let expected_actual_file_size = npts * dim * size_of::() + 2 * size_of::(); + + if file_size != expected_actual_file_size { + return Err(ANNError::log_index_error(format!( + "ERROR: File size mismatch. Actual size is {} while expected size is {} + npts = {}, #dims = {}, aligned_dim = {}", + file_size, expected_actual_file_size, npts, dim, rounded_dim + ))); + } + + println!("Metadata: #pts = {npts}, #dims = {dim}, aligned_dim = {rounded_dim}..."); + + let alloc_size = npts * rounded_dim; + let alignment = 8 * t_size; + println!( + "allocating aligned memory of {} bytes... ", + alloc_size * t_size + ); + if !is_aligned(alloc_size * t_size, alignment) { + return Err(ANNError::log_index_error(format!( + "Requested memory size is not a multiple of {}. Can not be allocated.", + alignment + ))); + } + + let mut data = AlignedBoxWithSlice::::new(alloc_size, alignment)?; + let dto = DatasetDto { + data: &mut data, + rounded_dim, + }; + + println!("done. Copying data to mem_aligned buffer..."); + + let (_, _) = copy_aligned_data_from_file(bin_file, dto, 0)?; + + Ok((data, npts, dim, rounded_dim)) +} diff --git a/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/Cargo.toml b/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/Cargo.toml new file mode 100644 index 0000000..a5be547 --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/Cargo.toml @@ -0,0 +1,45 @@ +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT license. +[package] +name = "diskann" +version = "0.1.0" +edition = "2021" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +bincode = "1.3.3" +bit-vec = "0.6.3" +byteorder = "1.4.3" +cblas = "0.4.0" +crossbeam = "0.8.2" +half = "2.2.1" +hashbrown = "0.13.2" +num-traits = "0.2.15" +once_cell = "1.17.1" +openblas-src = { version = "0.10.8", features = ["system"] } +rand = { version = "0.8.5", features = [ "small_rng" ] } +rayon = "1.7.0" +serde = { version = "1.0.130", features = ["derive"] } +thiserror = "1.0.40" +winapi = { version = "0.3.9", features = ["errhandlingapi", "fileapi", "ioapiset", "handleapi", "winnt", "minwindef", "basetsd", "winerror", "winbase"] } + +logger = { path = "../logger" } +platform = { path = "../platform" } +vector = { path = "../vector" } + +[build-dependencies] +cc = "1.0.79" + +[dev-dependencies] +approx = "0.5.1" +criterion = "0.5.1" + + +[[bench]] +name = "distance_bench" +harness = false + +[[bench]] +name = "neighbor_bench" +harness = false diff --git a/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/benches/distance_bench.rs b/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/benches/distance_bench.rs new file mode 100644 index 0000000..885c95b --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/benches/distance_bench.rs @@ -0,0 +1,47 @@ +/* + * Copyright (c) Microsoft Corporation. All rights reserved. + * Licensed under the MIT license. + */ +use criterion::{black_box, criterion_group, criterion_main, Criterion}; + +use rand::{thread_rng, Rng}; +use vector::{FullPrecisionDistance, Metric}; + +// make sure the vector is 256-bit (32 bytes) aligned required by _mm256_load_ps +#[repr(C, align(32))] +struct Vector32ByteAligned { + v: [f32; 256], +} + +fn benchmark_l2_distance_float_rust(c: &mut Criterion) { + let (a, b) = prepare_random_aligned_vectors(); + let mut group = c.benchmark_group("avx-computation"); + group.sample_size(5000); + + group.bench_function("AVX Rust run", |f| { + f.iter(|| { + black_box(<[f32; 256]>::distance_compare( + black_box(&a.v), + black_box(&b.v), + Metric::L2, + )) + }) + }); +} + +// make sure the vector is 256-bit (32 bytes) aligned required by _mm256_load_ps +fn prepare_random_aligned_vectors() -> (Box, Box) { + let a = Box::new(Vector32ByteAligned { + v: [(); 256].map(|_| thread_rng().gen_range(0.0..100.0)), + }); + + let b = Box::new(Vector32ByteAligned { + v: [(); 256].map(|_| thread_rng().gen_range(0.0..100.0)), + }); + + (a, b) +} + +criterion_group!(benches, benchmark_l2_distance_float_rust,); +criterion_main!(benches); + diff --git a/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/benches/kmeans_bench.rs b/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/benches/kmeans_bench.rs new file mode 100644 index 0000000..c69c16a --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/benches/kmeans_bench.rs @@ -0,0 +1,70 @@ +/* + * Copyright (c) Microsoft Corporation. All rights reserved. + * Licensed under the MIT license. + */ +use criterion::{criterion_group, criterion_main, Criterion}; +use diskann::utils::k_means_clustering; +use rand::Rng; + +const NUM_POINTS: usize = 10000; +const DIM: usize = 100; +const NUM_CENTERS: usize = 256; +const MAX_KMEANS_REPS: usize = 12; + +fn benchmark_kmeans_rust(c: &mut Criterion) { + let mut rng = rand::thread_rng(); + let data: Vec = (0..NUM_POINTS * DIM) + .map(|_| rng.gen_range(-1.0..1.0)) + .collect(); + let centers: Vec = vec![0.0; NUM_CENTERS * DIM]; + + let mut group = c.benchmark_group("kmeans-computation"); + group.sample_size(500); + + group.bench_function("K-Means Rust run", |f| { + f.iter(|| { + // let mut centers_copy = centers.clone(); + let data_copy = data.clone(); + let mut centers_copy = centers.clone(); + k_means_clustering( + &data_copy, + NUM_POINTS, + DIM, + &mut centers_copy, + NUM_CENTERS, + MAX_KMEANS_REPS, + ) + }) + }); +} + +fn benchmark_kmeans_c(c: &mut Criterion) { + let mut rng = rand::thread_rng(); + let data: Vec = (0..NUM_POINTS * DIM) + .map(|_| rng.gen_range(-1.0..1.0)) + .collect(); + let centers: Vec = vec![0.0; NUM_CENTERS * DIM]; + + let mut group = c.benchmark_group("kmeans-computation"); + group.sample_size(500); + + group.bench_function("K-Means C++ Run", |f| { + f.iter(|| { + let data_copy = data.clone(); + let mut centers_copy = centers.clone(); + let _ = k_means_clustering( + data_copy.as_slice(), + NUM_POINTS, + DIM, + centers_copy.as_mut_slice(), + NUM_CENTERS, + MAX_KMEANS_REPS, + ); + }) + }); +} + +criterion_group!(benches, benchmark_kmeans_rust, benchmark_kmeans_c); + +criterion_main!(benches); + diff --git a/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/benches/neighbor_bench.rs b/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/benches/neighbor_bench.rs new file mode 100644 index 0000000..958acdc --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/benches/neighbor_bench.rs @@ -0,0 +1,49 @@ +/* + * Copyright (c) Microsoft Corporation. All rights reserved. + * Licensed under the MIT license. + */ +use std::time::Duration; + +use criterion::{black_box, criterion_group, criterion_main, Criterion}; + +use diskann::model::{Neighbor, NeighborPriorityQueue}; +use rand::distributions::{Distribution, Uniform}; +use rand::rngs::StdRng; +use rand::SeedableRng; + +fn benchmark_priority_queue_insert(c: &mut Criterion) { + let vec = generate_random_floats(); + let mut group = c.benchmark_group("neighborqueue-insert"); + group.measurement_time(Duration::from_secs(3)).sample_size(500); + + let mut queue = NeighborPriorityQueue::with_capacity(64_usize); + group.bench_function("Neighbor Priority Queue Insert", |f| { + f.iter(|| { + queue.clear(); + for n in vec.iter() { + queue.insert(*n); + } + + black_box(&1) + }); + }); +} + +fn generate_random_floats() -> Vec { + let seed: [u8; 32] = [73; 32]; + let mut rng: StdRng = SeedableRng::from_seed(seed); + let range = Uniform::new(0.0, 1.0); + let mut random_floats = Vec::with_capacity(100); + + for i in 0..100 { + let random_float = range.sample(&mut rng) as f32; + let n = Neighbor::new(i, random_float); + random_floats.push(n); + } + + random_floats +} + +criterion_group!(benches, benchmark_priority_queue_insert); +criterion_main!(benches); + diff --git a/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/src/algorithm/mod.rs b/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/src/algorithm/mod.rs new file mode 100644 index 0000000..87e377c --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/src/algorithm/mod.rs @@ -0,0 +1,7 @@ +/* + * Copyright (c) Microsoft Corporation. All rights reserved. + * Licensed under the MIT license. + */ +pub mod search; + +pub mod prune; diff --git a/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/src/algorithm/prune/mod.rs b/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/src/algorithm/prune/mod.rs new file mode 100644 index 0000000..4627eeb --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/src/algorithm/prune/mod.rs @@ -0,0 +1,6 @@ +/* + * Copyright (c) Microsoft Corporation. All rights reserved. + * Licensed under the MIT license. + */ +#[allow(clippy::module_inception)] +pub mod prune; diff --git a/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/src/algorithm/prune/prune.rs b/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/src/algorithm/prune/prune.rs new file mode 100644 index 0000000..40fec4a --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/src/algorithm/prune/prune.rs @@ -0,0 +1,288 @@ +/* + * Copyright (c) Microsoft Corporation. All rights reserved. + * Licensed under the MIT license. + */ +use hashbrown::HashSet; +use vector::{FullPrecisionDistance, Metric}; + +use crate::common::{ANNError, ANNResult}; +use crate::index::InmemIndex; +use crate::model::graph::AdjacencyList; +use crate::model::neighbor::SortedNeighborVector; +use crate::model::scratch::InMemQueryScratch; +use crate::model::Neighbor; + +impl InmemIndex +where + T: Default + Copy + Sync + Send + Into, + [T; N]: FullPrecisionDistance, +{ + /// A method that occludes a list of neighbors based on some criteria + #[allow(clippy::too_many_arguments)] + fn occlude_list( + &self, + location: u32, + pool: &mut SortedNeighborVector, + alpha: f32, + degree: u32, + max_candidate_size: usize, + result: &mut AdjacencyList, + scratch: &mut InMemQueryScratch, + delete_set_ptr: Option<&HashSet>, + ) -> ANNResult<()> { + if pool.is_empty() { + return Ok(()); + } + + if !result.is_empty() { + return Err(ANNError::log_index_error( + "result is not empty.".to_string(), + )); + } + + // Truncate pool at max_candidate_size and initialize scratch spaces + if pool.len() > max_candidate_size { + pool.truncate(max_candidate_size); + } + + let occlude_factor = &mut scratch.occlude_factor; + + // occlude_list can be called with the same scratch more than once by + // search_for_point_and_add_link through inter_insert. + occlude_factor.clear(); + + // Initialize occlude_factor to pool.len() many 0.0 values for correctness + occlude_factor.resize(pool.len(), 0.0); + + let mut cur_alpha = 1.0; + while cur_alpha <= alpha && result.len() < degree as usize { + for (i, neighbor) in pool.iter().enumerate() { + if result.len() >= degree as usize { + break; + } + if occlude_factor[i] > cur_alpha { + continue; + } + // Set the entry to f32::MAX so that is not considered again + occlude_factor[i] = f32::MAX; + + // Add the entry to the result if its not been deleted, and doesn't + // add a self loop + if delete_set_ptr.map_or(true, |delete_set| !delete_set.contains(&neighbor.id)) + && neighbor.id != location + { + result.push(neighbor.id); + } + + // Update occlude factor for points from i+1 to pool.len() + for (j, neighbor2) in pool.iter().enumerate().skip(i + 1) { + if occlude_factor[j] > alpha { + continue; + } + + // todo - self.filtered_index + let djk = self.get_distance(neighbor2.id, neighbor.id)?; + match self.configuration.dist_metric { + Metric::L2 | Metric::Cosine => { + occlude_factor[j] = if djk == 0.0 { + f32::MAX + } else { + occlude_factor[j].max(neighbor2.distance / djk) + }; + } + } + } + } + + cur_alpha *= 1.2; + } + + Ok(()) + } + + /// Prunes the neighbors of a given data point based on some criteria and returns a list of pruned ids. + /// + /// # Arguments + /// + /// * `location` - The id of the data point whose neighbors are to be pruned. + /// * `pool` - A vector of neighbors to be pruned, sorted by distance to the query point. + /// * `pruned_list` - A vector to store the ids of the pruned neighbors. + /// * `scratch` - A mutable reference to a scratch space for in-memory queries. + /// + /// # Panics + /// + /// Panics if `pruned_list` contains more than `range` elements after pruning. + pub fn prune_neighbors( + &self, + location: u32, + pool: &mut Vec, + pruned_list: &mut AdjacencyList, + scratch: &mut InMemQueryScratch, + ) -> ANNResult<()> { + self.robust_prune( + location, + pool, + self.configuration.index_write_parameter.max_degree, + self.configuration.index_write_parameter.max_occlusion_size, + self.configuration.index_write_parameter.alpha, + pruned_list, + scratch, + ) + } + + /// Prunes the neighbors of a given data point based on some criteria and returns a list of pruned ids. + /// + /// # Arguments + /// + /// * `location` - The id of the data point whose neighbors are to be pruned. + /// * `pool` - A vector of neighbors to be pruned, sorted by distance to the query point. + /// * `range` - The maximum number of neighbors to keep after pruning. + /// * `max_candidate_size` - The maximum number of candidates to consider for pruning. + /// * `alpha` - A parameter that controls the occlusion pruning strategy. + /// * `pruned_list` - A vector to store the ids of the pruned neighbors. + /// * `scratch` - A mutable reference to a scratch space for in-memory queries. + /// + /// # Error + /// + /// Return error if `pruned_list` contains more than `range` elements after pruning. + #[allow(clippy::too_many_arguments)] + fn robust_prune( + &self, + location: u32, + pool: &mut Vec, + range: u32, + max_candidate_size: u32, + alpha: f32, + pruned_list: &mut AdjacencyList, + scratch: &mut InMemQueryScratch, + ) -> ANNResult<()> { + if pool.is_empty() { + // if the pool is empty, behave like a noop + pruned_list.clear(); + return Ok(()); + } + + // If using _pq_build, over-write the PQ distances with actual distances + // todo : pq_dist + + // sort the pool based on distance to query and prune it with occlude_list + let mut pool = SortedNeighborVector::new(pool); + pruned_list.clear(); + + self.occlude_list( + location, + &mut pool, + alpha, + range, + max_candidate_size as usize, + pruned_list, + scratch, + Option::None, + )?; + + if pruned_list.len() > range as usize { + return Err(ANNError::log_index_error(format!( + "pruned_list's len {} is over range {}.", + pruned_list.len(), + range + ))); + } + + if self.configuration.index_write_parameter.saturate_graph && alpha > 1.0f32 { + for neighbor in pool.iter() { + if pruned_list.len() >= (range as usize) { + break; + } + if !pruned_list.contains(&neighbor.id) && neighbor.id != location { + pruned_list.push(neighbor.id); + } + } + } + + Ok(()) + } + + /// A method that inserts a point n into the graph of its neighbors and their neighbors, + /// pruning the graph if necessary to keep it within the specified range + /// * `n` - The index of the new point + /// * `pruned_list` is a vector of the neighbors of n that have been pruned by a previous step + /// * `range` is the target number of neighbors for each point + /// * `scratch` is a mutable reference to a scratch space that can be reused for intermediate computations + pub fn inter_insert( + &self, + n: u32, + pruned_list: &Vec, + range: u32, + scratch: &mut InMemQueryScratch, + ) -> ANNResult<()> { + // Borrow the pruned_list as a source pool of neighbors + let src_pool = pruned_list; + + if src_pool.is_empty() { + return Err(ANNError::log_index_error("src_pool is empty.".to_string())); + } + + for &vertex_id in src_pool { + // vertex is the index of a neighbor of n + // Assert that vertex is within the valid range of points + if (vertex_id as usize) + >= self.configuration.max_points + self.configuration.num_frozen_pts + { + return Err(ANNError::log_index_error(format!( + "vertex_id {} is out of valid range of points {}", + vertex_id, + self.configuration.max_points + self.configuration.num_frozen_pts, + ))); + } + + let neighbors = self.add_to_neighbors(vertex_id, n, range)?; + + if let Some(copy_of_neighbors) = neighbors { + // Pruning is needed, create a dummy set and a dummy vector to store the unique neighbors of vertex_id + let mut dummy_pool = self.get_unique_neighbors(©_of_neighbors, vertex_id)?; + + // Create a new vector to store the pruned neighbors of vertex_id + let mut new_out_neighbors = + AdjacencyList::for_range(self.configuration.write_range()); + // Prune the neighbors of vertex_id using a helper method + self.prune_neighbors(vertex_id, &mut dummy_pool, &mut new_out_neighbors, scratch)?; + + self.set_neighbors(vertex_id, new_out_neighbors)?; + } + } + + Ok(()) + } + + /// Adds a node to the list of neighbors for the given node. + /// + /// # Arguments + /// + /// * `vertex_id` - The ID of the node to add the neighbor to. + /// * `node_id` - The ID of the node to add. + /// * `range` - The range of the graph. + /// + /// # Return + /// + /// Returns `None` if the node is already in the list of neighbors, or a `Vec` containing the updated list of neighbors if the list of neighbors is full. + fn add_to_neighbors( + &self, + vertex_id: u32, + node_id: u32, + range: u32, + ) -> ANNResult>> { + // vertex contains a vector of the neighbors of vertex_id + let mut vertex_guard = self.final_graph.write_vertex_and_neighbors(vertex_id)?; + + Ok(vertex_guard.add_to_neighbors(node_id, range)) + } + + fn set_neighbors(&self, vertex_id: u32, new_out_neighbors: AdjacencyList) -> ANNResult<()> { + // vertex contains a vector of the neighbors of vertex_id + let mut vertex_guard = self.final_graph.write_vertex_and_neighbors(vertex_id)?; + + vertex_guard.set_neighbors(new_out_neighbors); + Ok(()) + } +} + diff --git a/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/src/algorithm/search/mod.rs b/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/src/algorithm/search/mod.rs new file mode 100644 index 0000000..9f007ab --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/src/algorithm/search/mod.rs @@ -0,0 +1,7 @@ +/* + * Copyright (c) Microsoft Corporation. All rights reserved. + * Licensed under the MIT license. + */ +#[allow(clippy::module_inception)] +pub mod search; + diff --git a/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/src/algorithm/search/search.rs b/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/src/algorithm/search/search.rs new file mode 100644 index 0000000..ab6d016 --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/src/algorithm/search/search.rs @@ -0,0 +1,359 @@ +/* + * Copyright (c) Microsoft Corporation. All rights reserved. + * Licensed under the MIT license. + */ +#![warn(missing_debug_implementations, missing_docs)] + +//! Search algorithm for index construction and query + +use crate::common::{ANNError, ANNResult}; +use crate::index::InmemIndex; +use crate::model::{scratch::InMemQueryScratch, Neighbor, Vertex}; +use hashbrown::hash_set::Entry::*; +use vector::FullPrecisionDistance; + +impl InmemIndex +where + T: Default + Copy + Sync + Send + Into, + [T; N]: FullPrecisionDistance, +{ + /// Search for query using given L value, for benchmarking purposes + /// # Arguments + /// * `query` - query vertex + /// * `scratch` - in-memory query scratch + /// * `search_list_size` - search list size to use for the benchmark + pub fn search_with_l_override( + &self, + query: &Vertex, + scratch: &mut InMemQueryScratch, + search_list_size: usize, + ) -> ANNResult { + let init_ids = self.get_init_ids()?; + self.init_graph_for_point(query, init_ids, scratch)?; + // Scratch is created using largest L val from search_memory_index, so we artifically make it smaller here + // This allows us to use the same scratch for all L values without having to rebuild the query scratch + scratch.best_candidates.set_capacity(search_list_size); + let (_, cmp) = self.greedy_search(query, scratch)?; + + Ok(cmp) + } + + /// search for point + /// # Arguments + /// * `query` - query vertex + /// * `scratch` - in-memory query scratch + /// TODO: use_filter, filteredLindex + pub fn search_for_point( + &self, + query: &Vertex, + scratch: &mut InMemQueryScratch, + ) -> ANNResult> { + let init_ids = self.get_init_ids()?; + self.init_graph_for_point(query, init_ids, scratch)?; + let (mut visited_nodes, _) = self.greedy_search(query, scratch)?; + + visited_nodes.retain(|&element| element.id != query.vertex_id()); + Ok(visited_nodes) + } + + /// Returns the locations of start point and frozen points suitable for use with iterate_to_fixed_point. + fn get_init_ids(&self) -> ANNResult> { + let mut init_ids = Vec::with_capacity(1 + self.configuration.num_frozen_pts); + init_ids.push(self.start); + + for frozen in self.configuration.max_points + ..(self.configuration.max_points + self.configuration.num_frozen_pts) + { + let frozen_u32 = frozen.try_into()?; + if frozen_u32 != self.start { + init_ids.push(frozen_u32); + } + } + + Ok(init_ids) + } + + /// Initialize graph for point + /// # Arguments + /// * `query` - query vertex + /// * `init_ids` - initial nodes from which search starts + /// * `scratch` - in-memory query scratch + /// * `search_list_size_override` - override for search list size in index config + fn init_graph_for_point( + &self, + query: &Vertex, + init_ids: Vec, + scratch: &mut InMemQueryScratch, + ) -> ANNResult<()> { + scratch + .best_candidates + .reserve(self.configuration.index_write_parameter.search_list_size as usize); + scratch.query.memcpy(query.vector())?; + + if !scratch.id_scratch.is_empty() { + return Err(ANNError::log_index_error( + "id_scratch is not empty.".to_string(), + )); + } + + let query_vertex = Vertex::::try_from((&scratch.query[..], query.vertex_id())) + .map_err(|err| { + ANNError::log_index_error(format!( + "TryFromSliceError: failed to get Vertex for query, err={}", + err + )) + })?; + + for id in init_ids { + if (id as usize) >= self.configuration.max_points + self.configuration.num_frozen_pts { + return Err(ANNError::log_index_error(format!( + "vertex_id {} is out of valid range of points {}", + id, + self.configuration.max_points + self.configuration.num_frozen_pts + ))); + } + + if let Vacant(entry) = scratch.node_visited_robinset.entry(id) { + entry.insert(); + + let vertex = self.dataset.get_vertex(id)?; + + let distance = vertex.compare(&query_vertex, self.configuration.dist_metric); + let neighbor = Neighbor::new(id, distance); + scratch.best_candidates.insert(neighbor); + } + } + + Ok(()) + } + + /// GreedySearch against query node + /// Returns visited nodes + /// # Arguments + /// * `query` - query vertex + /// * `scratch` - in-memory query scratch + /// TODO: use_filter, filter_label, search_invocation + fn greedy_search( + &self, + query: &Vertex, + scratch: &mut InMemQueryScratch, + ) -> ANNResult<(Vec, u32)> { + let mut visited_nodes = + Vec::with_capacity((3 * scratch.candidate_size + scratch.max_degree) as usize); + + // TODO: uncomment hops? + // let mut hops: u32 = 0; + let mut cmps: u32 = 0; + + let query_vertex = Vertex::::try_from((&scratch.query[..], query.vertex_id())) + .map_err(|err| { + ANNError::log_index_error(format!( + "TryFromSliceError: failed to get Vertex for query, err={}", + err + )) + })?; + + while scratch.best_candidates.has_notvisited_node() { + let closest_node = scratch.best_candidates.closest_notvisited(); + + // Add node to visited nodes to create pool for prune later + // TODO: search_invocation and use_filter + visited_nodes.push(closest_node); + + // Find which of the nodes in des have not been visited before + scratch.id_scratch.clear(); + + let max_vertex_id = self.configuration.max_points + self.configuration.num_frozen_pts; + + for id in self + .final_graph + .read_vertex_and_neighbors(closest_node.id)? + .get_neighbors() + { + let current_vertex_id = *id; + debug_assert!( + (current_vertex_id as usize) < max_vertex_id, + "current_vertex_id {} is out of valid range of points {}", + current_vertex_id, + max_vertex_id + ); + if current_vertex_id as usize >= max_vertex_id { + continue; + } + + // quickly de-dup. Remember, we are in a read lock + // we want to exit out of it quickly + if scratch.node_visited_robinset.insert(current_vertex_id) { + scratch.id_scratch.push(current_vertex_id); + } + } + + let len = scratch.id_scratch.len(); + for (m, &id) in scratch.id_scratch.iter().enumerate() { + if m + 1 < len { + let next_node = unsafe { *scratch.id_scratch.get_unchecked(m + 1) }; + self.dataset.prefetch_vector(next_node); + } + + let vertex = self.dataset.get_vertex(id)?; + let distance = query_vertex.compare(&vertex, self.configuration.dist_metric); + + // Insert pairs into the pool of candidates + scratch.best_candidates.insert(Neighbor::new(id, distance)); + } + + cmps += len as u32; + } + + Ok((visited_nodes, cmps)) + } +} + +#[cfg(test)] +mod search_test { + use vector::Metric; + + use crate::model::configuration::index_write_parameters::IndexWriteParametersBuilder; + use crate::model::graph::AdjacencyList; + use crate::model::IndexConfiguration; + use crate::test_utils::inmem_index_initialization::create_index_with_test_data; + + use super::*; + + #[test] + fn get_init_ids_no_forzen_pts() { + let index_write_parameters = IndexWriteParametersBuilder::new(50, 4) + .with_alpha(1.2) + .build(); + let config = IndexConfiguration::new( + Metric::L2, + 256, + 256, + 256, + false, + 0, + false, + 0, + 1f32, + index_write_parameters, + ); + + let index = InmemIndex::::new(config).unwrap(); + let init_ids = index.get_init_ids().unwrap(); + assert_eq!(init_ids.len(), 1); + assert_eq!(init_ids[0], 256); + } + + #[test] + fn get_init_ids_with_forzen_pts() { + let index_write_parameters = IndexWriteParametersBuilder::new(50, 4) + .with_alpha(1.2) + .build(); + let config = IndexConfiguration::new( + Metric::L2, + 256, + 256, + 256, + false, + 0, + false, + 2, + 1f32, + index_write_parameters, + ); + + let index = InmemIndex::::new(config).unwrap(); + let init_ids = index.get_init_ids().unwrap(); + assert_eq!(init_ids.len(), 2); + assert_eq!(init_ids[0], 256); + assert_eq!(init_ids[1], 257); + } + + #[test] + fn search_for_point_initial_call() { + let index = create_index_with_test_data(); + let query = index.dataset.get_vertex(0).unwrap(); + + let mut scratch = InMemQueryScratch::new( + index.configuration.index_write_parameter.search_list_size, + &index.configuration.index_write_parameter, + false, + ) + .unwrap(); + let visited_nodes = index.search_for_point(&query, &mut scratch).unwrap(); + assert_eq!(visited_nodes.len(), 1); + assert_eq!(scratch.best_candidates.size(), 1); + assert_eq!(scratch.best_candidates[0].id, 72); + assert_eq!(scratch.best_candidates[0].distance, 125678.0_f32); + assert!(scratch.best_candidates[0].visited); + } + + fn set_neighbors(index: &InmemIndex, vertex_id: u32, neighbors: Vec) { + index + .final_graph + .write_vertex_and_neighbors(vertex_id) + .unwrap() + .set_neighbors(AdjacencyList::from(neighbors)); + } + #[test] + fn search_for_point_works_with_edges() { + let index = create_index_with_test_data(); + let query = index.dataset.get_vertex(14).unwrap(); + + set_neighbors(&index, 0, vec![12, 72, 5, 9]); + set_neighbors(&index, 1, vec![2, 12, 10, 4]); + set_neighbors(&index, 2, vec![1, 72, 9]); + set_neighbors(&index, 3, vec![13, 6, 5, 11]); + set_neighbors(&index, 4, vec![1, 3, 7, 9]); + set_neighbors(&index, 5, vec![3, 0, 8, 11, 13]); + set_neighbors(&index, 6, vec![3, 72, 7, 10, 13]); + set_neighbors(&index, 7, vec![72, 4, 6]); + set_neighbors(&index, 8, vec![72, 5, 9, 12]); + set_neighbors(&index, 9, vec![8, 4, 0, 2]); + set_neighbors(&index, 10, vec![72, 1, 9, 6]); + set_neighbors(&index, 11, vec![3, 0, 5]); + set_neighbors(&index, 12, vec![1, 0, 8, 9]); + set_neighbors(&index, 13, vec![3, 72, 5, 6]); + set_neighbors(&index, 72, vec![7, 2, 10, 8, 13]); + + let mut scratch = InMemQueryScratch::new( + index.configuration.index_write_parameter.search_list_size, + &index.configuration.index_write_parameter, + false, + ) + .unwrap(); + let visited_nodes = index.search_for_point(&query, &mut scratch).unwrap(); + assert_eq!(visited_nodes.len(), 15); + assert_eq!(scratch.best_candidates.size(), 15); + assert_eq!(scratch.best_candidates[0].id, 2); + assert_eq!(scratch.best_candidates[0].distance, 120899.0_f32); + assert_eq!(scratch.best_candidates[1].id, 8); + assert_eq!(scratch.best_candidates[1].distance, 145538.0_f32); + assert_eq!(scratch.best_candidates[2].id, 72); + assert_eq!(scratch.best_candidates[2].distance, 146046.0_f32); + assert_eq!(scratch.best_candidates[3].id, 4); + assert_eq!(scratch.best_candidates[3].distance, 148462.0_f32); + assert_eq!(scratch.best_candidates[4].id, 7); + assert_eq!(scratch.best_candidates[4].distance, 148912.0_f32); + assert_eq!(scratch.best_candidates[5].id, 10); + assert_eq!(scratch.best_candidates[5].distance, 154570.0_f32); + assert_eq!(scratch.best_candidates[6].id, 1); + assert_eq!(scratch.best_candidates[6].distance, 159448.0_f32); + assert_eq!(scratch.best_candidates[7].id, 12); + assert_eq!(scratch.best_candidates[7].distance, 170698.0_f32); + assert_eq!(scratch.best_candidates[8].id, 9); + assert_eq!(scratch.best_candidates[8].distance, 177205.0_f32); + assert_eq!(scratch.best_candidates[9].id, 0); + assert_eq!(scratch.best_candidates[9].distance, 259996.0_f32); + assert_eq!(scratch.best_candidates[10].id, 6); + assert_eq!(scratch.best_candidates[10].distance, 371819.0_f32); + assert_eq!(scratch.best_candidates[11].id, 5); + assert_eq!(scratch.best_candidates[11].distance, 385240.0_f32); + assert_eq!(scratch.best_candidates[12].id, 3); + assert_eq!(scratch.best_candidates[12].distance, 413899.0_f32); + assert_eq!(scratch.best_candidates[13].id, 13); + assert_eq!(scratch.best_candidates[13].distance, 416386.0_f32); + assert_eq!(scratch.best_candidates[14].id, 11); + assert_eq!(scratch.best_candidates[14].distance, 449266.0_f32); + } +} diff --git a/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/src/common/aligned_allocator.rs b/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/src/common/aligned_allocator.rs new file mode 100644 index 0000000..6164a1f --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/src/common/aligned_allocator.rs @@ -0,0 +1,281 @@ +/* + * Copyright (c) Microsoft Corporation. All rights reserved. + * Licensed under the MIT license. + */ +#![warn(missing_debug_implementations, missing_docs)] + +//! Aligned allocator + +use std::alloc::Layout; +use std::ops::{Deref, DerefMut, Range}; +use std::ptr::copy_nonoverlapping; + +use super::{ANNResult, ANNError}; + +#[derive(Debug)] +/// A box that holds a slice but is aligned to the specified layout. +/// +/// This type is useful for working with types that require a certain alignment, +/// such as SIMD vectors or FFI structs. It allocates memory using the global allocator +/// and frees it when dropped. It also implements Deref and DerefMut to allow access +/// to the underlying slice. +pub struct AlignedBoxWithSlice { + /// The layout of the allocated memory. + layout: Layout, + + /// The slice that points to the allocated memory. + val: Box<[T]>, +} + +impl AlignedBoxWithSlice { + /// Creates a new `AlignedBoxWithSlice` with the given capacity and alignment. + /// The allocated memory are set to 0. + /// + /// # Error + /// + /// Return IndexError if the alignment is not a power of two or if the layout is invalid. + /// + /// This function is unsafe because it allocates uninitialized memory and casts it to + /// a slice of `T`. The caller must ensure that the capacity and alignment are valid + /// for the type `T` and that the memory is initialized before accessing the elements + /// of the slice. + pub fn new(capacity: usize, alignment: usize) -> ANNResult { + let allocsize = capacity.checked_mul(std::mem::size_of::()) + .ok_or_else(|| ANNError::log_index_error("capacity overflow".to_string()))?; + let layout = Layout::from_size_align(allocsize, alignment) + .map_err(ANNError::log_mem_alloc_layout_error)?; + + let val = unsafe { + let mem = std::alloc::alloc_zeroed(layout); + let ptr = mem as *mut T; + let slice = std::slice::from_raw_parts_mut(ptr, capacity); + std::boxed::Box::from_raw(slice) + }; + + Ok(Self { layout, val }) + } + + /// Returns a reference to the slice. + pub fn as_slice(&self) -> &[T] { + &self.val + } + + /// Returns a mutable reference to the slice. + pub fn as_mut_slice(&mut self) -> &mut [T] { + &mut self.val + } + + /// Copies data from the source slice to the destination box. + pub fn memcpy(&mut self, src: &[T]) -> ANNResult<()> { + if src.len() > self.val.len() { + return Err(ANNError::log_index_error(format!("source slice is too large (src:{}, dst:{})", src.len(), self.val.len()))); + } + + // Check that they don't overlap + let src_ptr = src.as_ptr(); + let src_end = unsafe { src_ptr.add(src.len()) }; + let dst_ptr = self.val.as_mut_ptr(); + let dst_end = unsafe { dst_ptr.add(self.val.len()) }; + + if src_ptr < dst_end && src_end > dst_ptr { + return Err(ANNError::log_index_error("Source and destination overlap".to_string())); + } + + unsafe { + copy_nonoverlapping(src.as_ptr(), self.val.as_mut_ptr(), src.len()); + } + + Ok(()) + } + + /// Split the range of memory into nonoverlapping mutable slices. + /// The number of returned slices is (range length / slice_len) and each has a length of slice_len. + pub fn split_into_nonoverlapping_mut_slices(&mut self, range: Range, slice_len: usize) -> ANNResult> { + if range.len() % slice_len != 0 || range.end > self.len() { + return Err(ANNError::log_index_error(format!( + "Cannot split range ({:?}) of AlignedBoxWithSlice (len: {}) into nonoverlapping mutable slices with length {}", + range, + self.len(), + slice_len, + ))); + } + + let mut slices = Vec::with_capacity(range.len() / slice_len); + let mut remaining_slice = &mut self.val[range]; + + while remaining_slice.len() >= slice_len { + let (left, right) = remaining_slice.split_at_mut(slice_len); + slices.push(left); + remaining_slice = right; + } + + Ok(slices) + } +} + + +impl Drop for AlignedBoxWithSlice { + /// Frees the memory allocated for the slice using the global allocator. + fn drop(&mut self) { + let val = std::mem::take(&mut self.val); + let mut val2 = std::mem::ManuallyDrop::new(val); + let ptr = val2.as_mut_ptr(); + + unsafe { + // let nonNull = NonNull::new_unchecked(ptr as *mut u8); + std::alloc::dealloc(ptr as *mut u8, self.layout) + } + } +} + +impl Deref for AlignedBoxWithSlice { + type Target = [T]; + + fn deref(&self) -> &Self::Target { + &self.val + } +} + +impl DerefMut for AlignedBoxWithSlice { + fn deref_mut(&mut self) -> &mut Self::Target { + &mut self.val + } +} + +#[cfg(test)] +mod tests { + use rand::Rng; + + use crate::utils::is_aligned; + + use super::*; + + #[test] + fn create_alignedvec_works_32() { + (0..100).for_each(|_| { + let size = 1_000_000; + println!("Attempting {}", size); + let data = AlignedBoxWithSlice::::new(size, 32).unwrap(); + assert_eq!(data.len(), size, "Capacity should match"); + + let ptr = data.as_ptr() as usize; + assert_eq!(ptr % 32, 0, "Ptr should be aligned to 32"); + + // assert that the slice is initialized. + (0..size).for_each(|i| { + assert_eq!(data[i], f32::default()); + }); + + drop(data); + }); + } + + #[test] + fn create_alignedvec_works_256() { + let mut rng = rand::thread_rng(); + + (0..100).for_each(|_| { + let n = rng.gen::(); + let size = usize::from(n) + 1; + println!("Attempting {}", size); + let data = AlignedBoxWithSlice::::new(size, 256).unwrap(); + assert_eq!(data.len(), size, "Capacity should match"); + + let ptr = data.as_ptr() as usize; + assert_eq!(ptr % 256, 0, "Ptr should be aligned to 32"); + + // assert that the slice is initialized. + (0..size).for_each(|i| { + assert_eq!(data[i], u8::default()); + }); + + drop(data); + }); + } + + #[test] + fn as_slice_test() { + let size = 1_000_000; + let data = AlignedBoxWithSlice::::new(size, 32).unwrap(); + // assert that the slice is initialized. + (0..size).for_each(|i| { + assert_eq!(data[i], f32::default()); + }); + + let slice = data.as_slice(); + (0..size).for_each(|i| { + assert_eq!(slice[i], f32::default()); + }); + } + + #[test] + fn as_mut_slice_test() { + let size = 1_000_000; + let mut data = AlignedBoxWithSlice::::new(size, 32).unwrap(); + let mut_slice = data.as_mut_slice(); + (0..size).for_each(|i| { + assert_eq!(mut_slice[i], f32::default()); + }); + } + + #[test] + fn memcpy_test() { + let size = 1_000_000; + let mut data = AlignedBoxWithSlice::::new(size, 32).unwrap(); + let mut destination = AlignedBoxWithSlice::::new(size-2, 32).unwrap(); + let mut_destination = destination.as_mut_slice(); + data.memcpy(mut_destination).unwrap(); + (0..size-2).for_each(|i| { + assert_eq!(data[i], mut_destination[i]); + }); + } + + #[test] + #[should_panic(expected = "source slice is too large (src:1000000, dst:999998)")] + fn memcpy_panic_test() { + let size = 1_000_000; + let mut data = AlignedBoxWithSlice::::new(size-2, 32).unwrap(); + let mut destination = AlignedBoxWithSlice::::new(size, 32).unwrap(); + let mut_destination = destination.as_mut_slice(); + data.memcpy(mut_destination).unwrap(); + } + + #[test] + fn is_aligned_test() { + assert!(is_aligned(256,256)); + assert!(!is_aligned(255,256)); + } + + #[test] + fn split_into_nonoverlapping_mut_slices_test() { + let size = 10; + let slice_len = 2; + let mut data = AlignedBoxWithSlice::::new(size, 32).unwrap(); + let slices = data.split_into_nonoverlapping_mut_slices(2..8, slice_len).unwrap(); + assert_eq!(slices.len(), 3); + for (i, slice) in slices.into_iter().enumerate() { + assert_eq!(slice.len(), slice_len); + slice[0] = i as f32 + 1.0; + slice[1] = i as f32 + 1.0; + } + let expected_arr = [0.0f32, 0.0, 1.0, 1.0, 2.0, 2.0, 3.0, 3.0, 0.0, 0.0]; + assert_eq!(data.as_ref(), &expected_arr); + } + + #[test] + fn split_into_nonoverlapping_mut_slices_error_when_indivisible() { + let size = 10; + let slice_len = 2; + let range = 2..7; + let mut data = AlignedBoxWithSlice::::new(size, 32).unwrap(); + let result = data.split_into_nonoverlapping_mut_slices(range.clone(), slice_len); + let expected_err_str = format!( + "IndexError: Cannot split range ({:?}) of AlignedBoxWithSlice (len: {}) into nonoverlapping mutable slices with length {}", + range, + size, + slice_len, + ); + assert!(result.is_err_and(|e| e.to_string() == expected_err_str)); + } +} + diff --git a/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/src/common/ann_result.rs b/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/src/common/ann_result.rs new file mode 100644 index 0000000..69fcf03 --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/src/common/ann_result.rs @@ -0,0 +1,179 @@ +/* + * Copyright (c) Microsoft Corporation. All rights reserved. + * Licensed under the MIT license. + */ +use std::alloc::LayoutError; +use std::array::TryFromSliceError; +use std::io; +use std::num::TryFromIntError; + +use logger::error_logger::log_error; +use logger::log_error::LogError; + +/// Result +pub type ANNResult = Result; + +/// DiskANN Error +/// ANNError is `Send` (i.e., safe to send across threads) +#[derive(thiserror::Error, Debug)] +pub enum ANNError { + /// Index construction and search error + #[error("IndexError: {err}")] + IndexError { err: String }, + + /// Index configuration error + #[error("IndexConfigError: {parameter} is invalid, err={err}")] + IndexConfigError { parameter: String, err: String }, + + /// Integer conversion error + #[error("TryFromIntError: {err}")] + TryFromIntError { + #[from] + err: TryFromIntError, + }, + + /// IO error + #[error("IOError: {err}")] + IOError { + #[from] + err: io::Error, + }, + + /// Layout error in memory allocation + #[error("MemoryAllocLayoutError: {err}")] + MemoryAllocLayoutError { + #[from] + err: LayoutError, + }, + + /// PoisonError which can be returned whenever a lock is acquired + /// Both Mutexes and RwLocks are poisoned whenever a thread fails while the lock is held + #[error("LockPoisonError: {err}")] + LockPoisonError { err: String }, + + /// DiskIOAlignmentError which can be returned when calling windows API CreateFileA for the disk index file fails. + #[error("DiskIOAlignmentError: {err}")] + DiskIOAlignmentError { err: String }, + + /// Logging error + #[error("LogError: {err}")] + LogError { + #[from] + err: LogError, + }, + + // PQ construction error + // Error happened when we construct PQ pivot or PQ compressed table + #[error("PQError: {err}")] + PQError { err: String }, + + /// Array conversion error + #[error("Error try creating array from slice: {err}")] + TryFromSliceError { + #[from] + err: TryFromSliceError, + }, +} + +impl ANNError { + /// Create, log and return IndexError + #[inline] + pub fn log_index_error(err: String) -> Self { + let ann_err = ANNError::IndexError { err }; + match log_error(ann_err.to_string()) { + Ok(()) => ann_err, + Err(log_err) => ANNError::LogError { err: log_err }, + } + } + + /// Create, log and return IndexConfigError + #[inline] + pub fn log_index_config_error(parameter: String, err: String) -> Self { + let ann_err = ANNError::IndexConfigError { parameter, err }; + match log_error(ann_err.to_string()) { + Ok(()) => ann_err, + Err(log_err) => ANNError::LogError { err: log_err }, + } + } + + /// Create, log and return TryFromIntError + #[inline] + pub fn log_try_from_int_error(err: TryFromIntError) -> Self { + let ann_err = ANNError::TryFromIntError { err }; + match log_error(ann_err.to_string()) { + Ok(()) => ann_err, + Err(log_err) => ANNError::LogError { err: log_err }, + } + } + + /// Create, log and return IOError + #[inline] + pub fn log_io_error(err: io::Error) -> Self { + let ann_err = ANNError::IOError { err }; + match log_error(ann_err.to_string()) { + Ok(()) => ann_err, + Err(log_err) => ANNError::LogError { err: log_err }, + } + } + + /// Create, log and return DiskIOAlignmentError + /// #[inline] + pub fn log_disk_io_request_alignment_error(err: String) -> Self { + let ann_err: ANNError = ANNError::DiskIOAlignmentError { err }; + match log_error(ann_err.to_string()) { + Ok(()) => ann_err, + Err(log_err) => ANNError::LogError { err: log_err }, + } + } + + /// Create, log and return IOError + #[inline] + pub fn log_mem_alloc_layout_error(err: LayoutError) -> Self { + let ann_err = ANNError::MemoryAllocLayoutError { err }; + match log_error(ann_err.to_string()) { + Ok(()) => ann_err, + Err(log_err) => ANNError::LogError { err: log_err }, + } + } + + /// Create, log and return LockPoisonError + #[inline] + pub fn log_lock_poison_error(err: String) -> Self { + let ann_err = ANNError::LockPoisonError { err }; + match log_error(ann_err.to_string()) { + Ok(()) => ann_err, + Err(log_err) => ANNError::LogError { err: log_err }, + } + } + + /// Create, log and return PQError + #[inline] + pub fn log_pq_error(err: String) -> Self { + let ann_err = ANNError::PQError { err }; + match log_error(ann_err.to_string()) { + Ok(()) => ann_err, + Err(log_err) => ANNError::LogError { err: log_err }, + } + } + + /// Create, log and return TryFromSliceError + #[inline] + pub fn log_try_from_slice_error(err: TryFromSliceError) -> Self { + let ann_err = ANNError::TryFromSliceError { err }; + match log_error(ann_err.to_string()) { + Ok(()) => ann_err, + Err(log_err) => ANNError::LogError { err: log_err }, + } + } +} + +#[cfg(test)] +mod ann_result_test { + use super::*; + + #[test] + fn ann_err_is_send() { + fn assert_send() {} + assert_send::(); + } +} diff --git a/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/src/common/mod.rs b/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/src/common/mod.rs new file mode 100644 index 0000000..d9da72b --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/src/common/mod.rs @@ -0,0 +1,9 @@ +/* + * Copyright (c) Microsoft Corporation. All rights reserved. + * Licensed under the MIT license. + */ +mod aligned_allocator; +pub use aligned_allocator::AlignedBoxWithSlice; + +mod ann_result; +pub use ann_result::*; diff --git a/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/src/index/disk_index/ann_disk_index.rs b/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/src/index/disk_index/ann_disk_index.rs new file mode 100644 index 0000000..a6e053e --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/src/index/disk_index/ann_disk_index.rs @@ -0,0 +1,54 @@ +/* + * Copyright (c) Microsoft Corporation. All rights reserved. + * Licensed under the MIT license. + */ +#![warn(missing_docs)] + +//! ANN disk index abstraction + +use vector::FullPrecisionDistance; + +use crate::model::{IndexConfiguration, DiskIndexBuildParameters}; +use crate::storage::DiskIndexStorage; +use crate::model::vertex::{DIM_128, DIM_256, DIM_104}; + +use crate::common::{ANNResult, ANNError}; + +use super::DiskIndex; + +/// ANN disk index abstraction for custom +pub trait ANNDiskIndex : Sync + Send +where T : Default + Copy + Sync + Send + Into + { + /// Build index + fn build(&mut self, codebook_prefix: &str) -> ANNResult<()>; +} + +/// Create Index based on configuration +pub fn create_disk_index<'a, T>( + disk_build_param: Option, + config: IndexConfiguration, + storage: DiskIndexStorage, +) -> ANNResult + 'a>> +where + T: Default + Copy + Sync + Send + Into + 'a, + [T; DIM_104]: FullPrecisionDistance, + [T; DIM_128]: FullPrecisionDistance, + [T; DIM_256]: FullPrecisionDistance, +{ + match config.aligned_dim { + DIM_104 => { + let index = Box::new(DiskIndex::::new(disk_build_param, config, storage)); + Ok(index as Box>) + }, + DIM_128 => { + let index = Box::new(DiskIndex::::new(disk_build_param, config, storage)); + Ok(index as Box>) + }, + DIM_256 => { + let index = Box::new(DiskIndex::::new(disk_build_param, config, storage)); + Ok(index as Box>) + }, + _ => Err(ANNError::log_index_error(format!("Invalid dimension: {}", config.aligned_dim))), + } +} diff --git a/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/src/index/disk_index/disk_index.rs b/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/src/index/disk_index/disk_index.rs new file mode 100644 index 0000000..16f0d59 --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/src/index/disk_index/disk_index.rs @@ -0,0 +1,161 @@ +/* + * Copyright (c) Microsoft Corporation. All rights reserved. + * Licensed under the MIT license. + */ +use std::mem; + +use logger::logger::indexlog::DiskIndexConstructionCheckpoint; +use vector::FullPrecisionDistance; + +use crate::common::{ANNResult, ANNError}; +use crate::index::{InmemIndex, ANNInmemIndex}; +use crate::instrumentation::DiskIndexBuildLogger; +use crate::model::configuration::DiskIndexBuildParameters; +use crate::model::{IndexConfiguration, MAX_PQ_TRAINING_SET_SIZE, MAX_PQ_CHUNKS, generate_quantized_data, GRAPH_SLACK_FACTOR}; +use crate::storage::DiskIndexStorage; +use crate::utils::set_rayon_num_threads; + +use super::ann_disk_index::ANNDiskIndex; + +pub const OVERHEAD_FACTOR: f64 = 1.1f64; + +pub const MAX_SAMPLE_POINTS_FOR_WARMUP: usize = 100_000; + +pub struct DiskIndex +where + [T; N]: FullPrecisionDistance, +{ + /// Parameters for index construction + /// None for query path + disk_build_param: Option, + + configuration: IndexConfiguration, + + pub storage: DiskIndexStorage, +} + +impl DiskIndex +where + T: Default + Copy + Sync + Send + Into, + [T; N]: FullPrecisionDistance, +{ + pub fn new( + disk_build_param: Option, + configuration: IndexConfiguration, + storage: DiskIndexStorage, + ) -> Self { + Self { + disk_build_param, + configuration, + storage, + } + } + + pub fn disk_build_param(&self) -> &Option { + &self.disk_build_param + } + + pub fn index_configuration(&self) -> &IndexConfiguration { + &self.configuration + } + + fn build_inmem_index(&self, num_points: usize, data_path: &str, inmem_index_path: &str) -> ANNResult<()> { + let estimated_index_ram = self.estimate_ram_usage(num_points); + if estimated_index_ram >= self.fetch_disk_build_param()?.index_build_ram_limit() * 1024_f64 * 1024_f64 * 1024_f64 { + return Err(ANNError::log_index_error(format!( + "Insufficient memory budget for index build, index_build_ram_limit={}GB estimated_index_ram={}GB", + self.fetch_disk_build_param()?.index_build_ram_limit(), + estimated_index_ram / (1024_f64 * 1024_f64 * 1024_f64), + ))); + } + + let mut index = InmemIndex::::new(self.configuration.clone())?; + index.build(data_path, num_points)?; + index.save(inmem_index_path)?; + + Ok(()) + } + + #[inline] + fn estimate_ram_usage(&self, size: usize) -> f64 { + let degree = self.configuration.index_write_parameter.max_degree as usize; + let datasize = mem::size_of::(); + + let dataset_size = (size * N * datasize) as f64; + let graph_size = (size * degree * mem::size_of::()) as f64 * GRAPH_SLACK_FACTOR; + + OVERHEAD_FACTOR * (dataset_size + graph_size) + } + + #[inline] + fn fetch_disk_build_param(&self) -> ANNResult<&DiskIndexBuildParameters> { + self.disk_build_param + .as_ref() + .ok_or_else(|| ANNError::log_index_config_error( + "disk_build_param".to_string(), + "disk_build_param is None".to_string())) + } +} + +impl ANNDiskIndex for DiskIndex +where + T: Default + Copy + Sync + Send + Into, + [T; N]: FullPrecisionDistance, +{ + fn build(&mut self, codebook_prefix: &str) -> ANNResult<()> { + if self.configuration.index_write_parameter.num_threads > 0 { + set_rayon_num_threads(self.configuration.index_write_parameter.num_threads); + } + + println!("Starting index build: R={} L={} Query RAM budget={} Indexing RAM budget={} T={}", + self.configuration.index_write_parameter.max_degree, + self.configuration.index_write_parameter.search_list_size, + self.fetch_disk_build_param()?.search_ram_limit(), + self.fetch_disk_build_param()?.index_build_ram_limit(), + self.configuration.index_write_parameter.num_threads + ); + + let mut logger = DiskIndexBuildLogger::new(DiskIndexConstructionCheckpoint::PqConstruction); + + // PQ memory consumption = PQ pivots + PQ compressed table + // PQ pivots: dim * num_centroids * sizeof::() + // PQ compressed table: num_pts * num_pq_chunks * (dim / num_pq_chunks) * sizeof::() + // * Because num_centroids is 256, centroid id can be represented by u8 + let num_points = self.configuration.max_points; + let dim = self.configuration.dim; + let p_val = MAX_PQ_TRAINING_SET_SIZE / (num_points as f64); + let mut num_pq_chunks = ((self.fetch_disk_build_param()?.search_ram_limit() / (num_points as f64)).floor()) as usize; + num_pq_chunks = if num_pq_chunks == 0 { 1 } else { num_pq_chunks }; + num_pq_chunks = if num_pq_chunks > dim { dim } else { num_pq_chunks }; + num_pq_chunks = if num_pq_chunks > MAX_PQ_CHUNKS { MAX_PQ_CHUNKS } else { num_pq_chunks }; + + println!("Compressing {}-dimensional data into {} bytes per vector.", dim, num_pq_chunks); + + // TODO: Decouple PQ from file access + generate_quantized_data::( + p_val, + num_pq_chunks, + codebook_prefix, + self.storage.get_pq_storage(), + )?; + logger.log_checkpoint(DiskIndexConstructionCheckpoint::InmemIndexBuild)?; + + // TODO: Decouple index from file access + let inmem_index_path = self.storage.index_path_prefix().clone() + "_mem.index"; + self.build_inmem_index(num_points, self.storage.dataset_file(), inmem_index_path.as_str())?; + logger.log_checkpoint(DiskIndexConstructionCheckpoint::DiskLayout)?; + + self.storage.create_disk_layout()?; + logger.log_checkpoint(DiskIndexConstructionCheckpoint::None)?; + + let ten_percent_points = ((num_points as f64) * 0.1_f64).ceil(); + let num_sample_points = if ten_percent_points > (MAX_SAMPLE_POINTS_FOR_WARMUP as f64) { MAX_SAMPLE_POINTS_FOR_WARMUP as f64 } else { ten_percent_points }; + let sample_sampling_rate = num_sample_points / (num_points as f64); + self.storage.gen_query_warmup_data(sample_sampling_rate)?; + + self.storage.index_build_cleanup()?; + + Ok(()) + } +} + diff --git a/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/src/index/disk_index/mod.rs b/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/src/index/disk_index/mod.rs new file mode 100644 index 0000000..4f07bd7 --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/src/index/disk_index/mod.rs @@ -0,0 +1,9 @@ +/* + * Copyright (c) Microsoft Corporation. All rights reserved. + * Licensed under the MIT license. + */ +#[allow(clippy::module_inception)] +mod disk_index; +pub use disk_index::DiskIndex; + +pub mod ann_disk_index; diff --git a/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/src/index/inmem_index/ann_inmem_index.rs b/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/src/index/inmem_index/ann_inmem_index.rs new file mode 100644 index 0000000..dc8dfc8 --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/src/index/inmem_index/ann_inmem_index.rs @@ -0,0 +1,97 @@ +/* + * Copyright (c) Microsoft Corporation. All rights reserved. + * Licensed under the MIT license. + */ +#![warn(missing_docs)] + +//! ANN in-memory index abstraction + +use vector::FullPrecisionDistance; + +use crate::model::{vertex::{DIM_128, DIM_256, DIM_104}, IndexConfiguration}; +use crate::common::{ANNResult, ANNError}; + +use super::InmemIndex; + +/// ANN inmem-index abstraction for custom +pub trait ANNInmemIndex : Sync + Send +where T : Default + Copy + Sync + Send + Into + { + /// Build index + fn build(&mut self, filename: &str, num_points_to_load: usize) -> ANNResult<()>; + + /// Save index + fn save(&mut self, filename: &str) -> ANNResult<()>; + + /// Load index + fn load(&mut self, filename: &str, expected_num_points: usize) -> ANNResult<()>; + + /// insert index + fn insert(&mut self, filename: &str, num_points_to_insert: usize) -> ANNResult<()>; + + /// Search the index for K nearest neighbors of query using given L value, for benchmarking purposes + fn search(&self, query : &[T], k_value : usize, l_value : u32, indices : &mut[u32]) -> ANNResult; + + /// Soft deletes the nodes with the ids in the given array. + fn soft_delete(&mut self, vertex_ids_to_delete: Vec, num_points_to_delete: usize) -> ANNResult<()>; +} + +/// Create Index based on configuration +pub fn create_inmem_index<'a, T>(config: IndexConfiguration) -> ANNResult + 'a>> +where + T: Default + Copy + Sync + Send + Into + 'a, + [T; DIM_104]: FullPrecisionDistance, + [T; DIM_128]: FullPrecisionDistance, + [T; DIM_256]: FullPrecisionDistance, +{ + match config.aligned_dim { + DIM_104 => { + let index = Box::new(InmemIndex::::new(config)?); + Ok(index as Box>) + }, + DIM_128 => { + let index = Box::new(InmemIndex::::new(config)?); + Ok(index as Box>) + }, + DIM_256 => { + let index = Box::new(InmemIndex::::new(config)?); + Ok(index as Box>) + }, + _ => Err(ANNError::log_index_error(format!("Invalid dimension: {}", config.aligned_dim))), + } +} + +#[cfg(test)] +mod dataset_test { + use vector::Metric; + + use crate::model::configuration::index_write_parameters::IndexWriteParametersBuilder; + + use super::*; + + #[test] + #[should_panic(expected = "ERROR: Data file fake_file does not exist.")] + fn create_index_test() { + let index_write_parameters = IndexWriteParametersBuilder::new(50, 4) + .with_alpha(1.2) + .with_saturate_graph(false) + .with_num_threads(1) + .build(); + + let config = IndexConfiguration::new( + Metric::L2, + 128, + 256, + 1_000_000, + false, + 0, + false, + 0, + 1f32, + index_write_parameters, + ); + let mut index = create_inmem_index::(config).unwrap(); + index.build("fake_file", 100).unwrap(); + } +} + diff --git a/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/src/index/inmem_index/inmem_index.rs b/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/src/index/inmem_index/inmem_index.rs new file mode 100644 index 0000000..871d210 --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/src/index/inmem_index/inmem_index.rs @@ -0,0 +1,1033 @@ +/* + * Copyright (c) Microsoft Corporation. All rights reserved. + * Licensed under the MIT license. + */ +use std::cmp; +use std::sync::RwLock; +use std::time::Duration; + +use hashbrown::hash_set::Entry::*; +use hashbrown::HashSet; +use vector::FullPrecisionDistance; + +use crate::common::{ANNError, ANNResult}; +use crate::index::ANNInmemIndex; +use crate::instrumentation::IndexLogger; +use crate::model::graph::AdjacencyList; +use crate::model::{ + ArcConcurrentBoxedQueue, InMemQueryScratch, InMemoryGraph, IndexConfiguration, InmemDataset, + Neighbor, ScratchStoreManager, Vertex, +}; + +use crate::utils::file_util::{file_exists, load_metadata_from_file}; +use crate::utils::rayon_util::execute_with_rayon; +use crate::utils::{set_rayon_num_threads, Timer}; + +/// In-memory Index +pub struct InmemIndex +where + [T; N]: FullPrecisionDistance, +{ + /// Dataset + pub dataset: InmemDataset, + + /// Graph + pub final_graph: InMemoryGraph, + + /// Index configuration + pub configuration: IndexConfiguration, + + /// Start point of the search. When _num_frozen_pts is greater than zero, + /// this is the location of the first frozen point. Otherwise, this is a + /// location of one of the points in index. + pub start: u32, + + /// Max observed out degree + pub max_observed_degree: u32, + + /// Number of active points i.e. existing in the graph + pub num_active_pts: usize, + + /// query scratch queue. + query_scratch_queue: ArcConcurrentBoxedQueue>, + + pub delete_set: RwLock>, +} + +impl InmemIndex +where + T: Default + Copy + Sync + Send + Into, + [T; N]: FullPrecisionDistance, +{ + /// Create Index obj based on configuration + pub fn new(mut config: IndexConfiguration) -> ANNResult { + // Sanity check. While logically it is correct, max_points = 0 causes + // downstream problems. + if config.max_points == 0 { + config.max_points = 1; + } + + let total_internal_points = config.max_points + config.num_frozen_pts; + + if config.use_pq_dist { + // TODO: pq + todo!("PQ is not supported now"); + } + + let start = config.max_points.try_into()?; + + let query_scratch_queue = ArcConcurrentBoxedQueue::>::new(); + let delete_set = RwLock::new(HashSet::::new()); + + Ok(Self { + dataset: InmemDataset::::new(total_internal_points, config.growth_potential)?, + final_graph: InMemoryGraph::new( + total_internal_points, + config.index_write_parameter.max_degree, + ), + configuration: config, + start, + max_observed_degree: 0, + num_active_pts: 0, + query_scratch_queue, + delete_set, + }) + } + + /// Get distance between two vertices. + pub fn get_distance(&self, id1: u32, id2: u32) -> ANNResult { + self.dataset + .get_distance(id1, id2, self.configuration.dist_metric) + } + + fn build_with_data_populated(&mut self) -> ANNResult<()> { + println!( + "Starting index build with {} points...", + self.num_active_pts + ); + + if self.num_active_pts < 1 { + return Err(ANNError::log_index_error( + "Error: Trying to build an index with 0 points.".to_string(), + )); + } + + if self.query_scratch_queue.size()? == 0 { + self.initialize_query_scratch( + 5 + self.configuration.index_write_parameter.num_threads, + self.configuration.index_write_parameter.search_list_size, + )?; + } + + // TODO: generate_frozen_point() + + self.link()?; + + self.print_stats()?; + + Ok(()) + } + + fn link(&mut self) -> ANNResult<()> { + // visit_order is a vector that is initialized to the entire graph + let mut visit_order = + Vec::with_capacity(self.num_active_pts + self.configuration.num_frozen_pts); + for i in 0..self.num_active_pts { + visit_order.push(i as u32); + } + + // If there are any frozen points, add them all. + for frozen in self.configuration.max_points + ..(self.configuration.max_points + self.configuration.num_frozen_pts) + { + visit_order.push(frozen as u32); + } + + // if there are frozen points, the first such one is set to be the _start + if self.configuration.num_frozen_pts > 0 { + self.start = self.configuration.max_points as u32; + } else { + self.start = self.dataset.calculate_medoid_point_id()?; + } + + let timer = Timer::new(); + + let range = visit_order.len(); + let logger = IndexLogger::new(range); + + execute_with_rayon( + 0..range, + self.configuration.index_write_parameter.num_threads, + |idx| { + self.insert_vertex_id(visit_order[idx])?; + logger.vertex_processed()?; + + Ok(()) + }, + )?; + + self.cleanup_graph(&visit_order)?; + + if self.num_active_pts > 0 { + println!("{}", timer.elapsed_seconds_for_step("Link time: ")); + } + + Ok(()) + } + + fn insert_vertex_id(&self, vertex_id: u32) -> ANNResult<()> { + let mut scratch_manager = + ScratchStoreManager::new(self.query_scratch_queue.clone(), Duration::from_millis(10))?; + let scratch = scratch_manager.scratch_space().ok_or_else(|| { + ANNError::log_index_error( + "ScratchStoreManager doesn't have InMemQueryScratch instance available".to_string(), + ) + })?; + + let new_neighbors = self.search_for_point_and_prune(scratch, vertex_id)?; + self.update_vertex_with_neighbors(vertex_id, new_neighbors)?; + self.update_neighbors_of_vertex(vertex_id, scratch)?; + + Ok(()) + } + + fn update_neighbors_of_vertex( + &self, + vertex_id: u32, + scratch: &mut InMemQueryScratch, + ) -> Result<(), ANNError> { + let vertex = self.final_graph.read_vertex_and_neighbors(vertex_id)?; + assert!(vertex.size() <= self.configuration.index_write_parameter.max_degree as usize); + self.inter_insert( + vertex_id, + vertex.get_neighbors(), + self.configuration.index_write_parameter.max_degree, + scratch, + )?; + Ok(()) + } + + fn update_vertex_with_neighbors( + &self, + vertex_id: u32, + new_neighbors: AdjacencyList, + ) -> Result<(), ANNError> { + let vertex = &mut self.final_graph.write_vertex_and_neighbors(vertex_id)?; + vertex.set_neighbors(new_neighbors); + assert!(vertex.size() <= self.configuration.index_write_parameter.max_degree as usize); + Ok(()) + } + + fn search_for_point_and_prune( + &self, + scratch: &mut InMemQueryScratch, + vertex_id: u32, + ) -> ANNResult { + let mut pruned_list = + AdjacencyList::for_range(self.configuration.index_write_parameter.max_degree as usize); + let vertex = self.dataset.get_vertex(vertex_id)?; + let mut visited_nodes = self.search_for_point(&vertex, scratch)?; + + self.prune_neighbors(vertex_id, &mut visited_nodes, &mut pruned_list, scratch)?; + + if pruned_list.is_empty() { + return Err(ANNError::log_index_error( + "pruned_list is empty.".to_string(), + )); + } + + if self.final_graph.size() + != self.configuration.max_points + self.configuration.num_frozen_pts + { + return Err(ANNError::log_index_error(format!( + "final_graph has {} vertices instead of {}", + self.final_graph.size(), + self.configuration.max_points + self.configuration.num_frozen_pts, + ))); + } + + Ok(pruned_list) + } + + fn search( + &self, + query: &Vertex, + k_value: usize, + l_value: u32, + indices: &mut [u32], + ) -> ANNResult { + if k_value > l_value as usize { + return Err(ANNError::log_index_error(format!( + "Set L: {} to a value of at least K: {}", + l_value, k_value + ))); + } + + let mut scratch_manager = + ScratchStoreManager::new(self.query_scratch_queue.clone(), Duration::from_millis(10))?; + + let scratch = scratch_manager.scratch_space().ok_or_else(|| { + ANNError::log_index_error( + "ScratchStoreManager doesn't have InMemQueryScratch instance available".to_string(), + ) + })?; + + if l_value > scratch.candidate_size { + println!("Attempting to expand query scratch_space. Was created with Lsize: {} but search L is: {}", scratch.candidate_size, l_value); + scratch.resize_for_new_candidate_size(l_value); + println!( + "Resize completed. New scratch size is: {}", + scratch.candidate_size + ); + } + + let cmp = self.search_with_l_override(query, scratch, l_value as usize)?; + let mut pos = 0; + + for i in 0..scratch.best_candidates.size() { + if scratch.best_candidates[i].id < self.configuration.max_points as u32 { + // Filter out the deleted points. + if let Ok(delete_set_guard) = self.delete_set.read() { + if !delete_set_guard.contains(&scratch.best_candidates[i].id) { + indices[pos] = scratch.best_candidates[i].id; + pos += 1; + } + } else { + return Err(ANNError::log_lock_poison_error( + "failed to acquire the lock for delete_set.".to_string(), + )); + } + } + + if pos == k_value { + break; + } + } + + if pos < k_value { + eprintln!( + "Found fewer than K elements for query! Found: {} but K: {}", + pos, k_value + ); + } + + Ok(cmp) + } + + fn cleanup_graph(&mut self, visit_order: &Vec) -> ANNResult<()> { + if self.num_active_pts > 0 { + println!("Starting final cleanup.."); + } + + execute_with_rayon( + 0..visit_order.len(), + self.configuration.index_write_parameter.num_threads, + |idx| { + let vertex_id = visit_order[idx]; + let num_nbrs = self.get_neighbor_count(vertex_id)?; + + if num_nbrs <= self.configuration.index_write_parameter.max_degree as usize { + // Neighbor list is already small enough. + return Ok(()); + } + + let mut scratch_manager = ScratchStoreManager::new( + self.query_scratch_queue.clone(), + Duration::from_millis(10), + )?; + let scratch = scratch_manager.scratch_space().ok_or_else(|| { + ANNError::log_index_error( + "ScratchStoreManager doesn't have InMemQueryScratch instance available" + .to_string(), + ) + })?; + + let mut dummy_pool = self.get_neighbors_for_vertex(vertex_id)?; + + let mut new_out_neighbors = AdjacencyList::for_range( + self.configuration.index_write_parameter.max_degree as usize, + ); + self.prune_neighbors(vertex_id, &mut dummy_pool, &mut new_out_neighbors, scratch)?; + + self.final_graph + .write_vertex_and_neighbors(vertex_id)? + .set_neighbors(new_out_neighbors); + + Ok(()) + }, + ) + } + + /// Get the unique neighbors for a vertex. + /// + /// This code feels out of place here. This should have nothing to do with whether this + /// is in memory index? + /// # Errors + /// + /// This function will return an error if we are not able to get the read lock. + fn get_neighbors_for_vertex(&self, vertex_id: u32) -> ANNResult> { + let binding = self.final_graph.read_vertex_and_neighbors(vertex_id)?; + let neighbors = binding.get_neighbors(); + let dummy_pool = self.get_unique_neighbors(neighbors, vertex_id)?; + + Ok(dummy_pool) + } + + /// Returns a vector of unique neighbors for the given vertex, along with their distances. + /// + /// # Arguments + /// + /// * `neighbors` - A vector of neighbor id index for the given vertex. + /// * `vertex_id` - The given vertex id. + /// + /// # Errors + /// + /// Returns an `ANNError` if there is an error retrieving the vertex or one of its neighbors. + pub fn get_unique_neighbors( + &self, + neighbors: &Vec, + vertex_id: u32, + ) -> Result, ANNError> { + let vertex = self.dataset.get_vertex(vertex_id)?; + + let len = neighbors.len(); + if len == 0 { + return Ok(Vec::new()); + } + + self.dataset.prefetch_vector(neighbors[0]); + + let mut dummy_visited: HashSet = HashSet::with_capacity(len); + let mut dummy_pool: Vec = Vec::with_capacity(len); + + // let slice = ['w', 'i', 'n', 'd', 'o', 'w', 's']; + // for window in slice.windows(2) { + // &println!{"[{}, {}]", window[0], window[1]}; + // } + // prints: [w, i] -> [i, n] -> [n, d] -> [d, o] -> [o, w] -> [w, s] + for current in neighbors.windows(2) { + // Prefetch the next item. + self.dataset.prefetch_vector(current[1]); + let current = current[0]; + + self.insert_neighbor_if_unique( + &mut dummy_visited, + current, + vertex_id, + &vertex, + &mut dummy_pool, + )?; + } + + // Insert the last neighbor + #[allow(clippy::unwrap_used)] + self.insert_neighbor_if_unique( + &mut dummy_visited, + *neighbors.last().unwrap(), // we know len != 0, so this is safe. + vertex_id, + &vertex, + &mut dummy_pool, + )?; + + Ok(dummy_pool) + } + + fn insert_neighbor_if_unique( + &self, + dummy_visited: &mut HashSet, + current: u32, + vertex_id: u32, + vertex: &Vertex<'_, T, N>, + dummy_pool: &mut Vec, + ) -> Result<(), ANNError> { + if current != vertex_id { + if let Vacant(entry) = dummy_visited.entry(current) { + let cur_nbr_vertex = self.dataset.get_vertex(current)?; + let dist = vertex.compare(&cur_nbr_vertex, self.configuration.dist_metric); + dummy_pool.push(Neighbor::new(current, dist)); + entry.insert(); + } + } + + Ok(()) + } + + /// Get count of neighbors for a given vertex. + /// + /// # Errors + /// + /// This function will return an error if we can't get a lock. + fn get_neighbor_count(&self, vertex_id: u32) -> ANNResult { + let num_nbrs = self + .final_graph + .read_vertex_and_neighbors(vertex_id)? + .size(); + Ok(num_nbrs) + } + + fn soft_delete_vertex(&self, vertex_id_to_delete: u32) -> ANNResult<()> { + if vertex_id_to_delete as usize > self.num_active_pts { + return Err(ANNError::log_index_error(format!( + "vertex_id_to_delete: {} is greater than the number of active points in the graph: {}", + vertex_id_to_delete, self.num_active_pts + ))); + } + + let mut delete_set_guard = match self.delete_set.write() { + Ok(guard) => guard, + Err(_) => { + return Err(ANNError::log_index_error(format!( + "Failed to acquire delete_set lock, cannot delete vertex {}", + vertex_id_to_delete + ))); + } + }; + + delete_set_guard.insert(vertex_id_to_delete); + Ok(()) + } + + fn initialize_query_scratch( + &mut self, + num_threads: u32, + search_candidate_size: u32, + ) -> ANNResult<()> { + self.query_scratch_queue.reserve(num_threads as usize)?; + for _ in 0..num_threads { + let scratch = Box::new(InMemQueryScratch::::new( + search_candidate_size, + &self.configuration.index_write_parameter, + false, + )?); + + self.query_scratch_queue.push(scratch)?; + } + + Ok(()) + } + + fn print_stats(&mut self) -> ANNResult<()> { + let mut max = 0; + let mut min = usize::MAX; + let mut total = 0; + let mut cnt = 0; + + for i in 0..self.num_active_pts { + let vertex_id = i.try_into()?; + let pool_size = self + .final_graph + .read_vertex_and_neighbors(vertex_id)? + .size(); + max = cmp::max(max, pool_size); + min = cmp::min(min, pool_size); + total += pool_size; + if pool_size < 2 { + cnt += 1; + } + } + + println!( + "Index built with degree: max: {} avg: {} min: {} count(deg<2): {}", + max, + (total as f32) / ((self.num_active_pts + self.configuration.num_frozen_pts) as f32), + min, + cnt + ); + + match self.delete_set.read() { + Ok(guard) => { + println!( + "Number of soft deleted vertices {}, soft deleted percentage: {}", + guard.len(), + (guard.len() as f32) + / ((self.num_active_pts + self.configuration.num_frozen_pts) as f32), + ); + } + Err(_) => { + return Err(ANNError::log_lock_poison_error( + "Failed to acquire delete_set lock, cannot get the number of deleted vertices" + .to_string(), + )); + } + }; + + self.max_observed_degree = cmp::max(max as u32, self.max_observed_degree); + + Ok(()) + } +} + +impl ANNInmemIndex for InmemIndex +where + T: Default + Copy + Sync + Send + Into, + [T; N]: FullPrecisionDistance, +{ + fn build(&mut self, filename: &str, num_points_to_load: usize) -> ANNResult<()> { + // TODO: fresh-diskANN + // std::unique_lock ul(_update_lock); + + if !file_exists(filename) { + return Err(ANNError::log_index_error(format!( + "ERROR: Data file {} does not exist.", + filename + ))); + } + + let (file_num_points, file_dim) = load_metadata_from_file(filename)?; + if file_num_points > self.configuration.max_points { + return Err(ANNError::log_index_error(format!( + "ERROR: Driver requests loading {} points and file has {} points, + but index can support only {} points as specified in configuration.", + num_points_to_load, file_num_points, self.configuration.max_points + ))); + } + + if num_points_to_load > file_num_points { + return Err(ANNError::log_index_error(format!( + "ERROR: Driver requests loading {} points and file has only {} points.", + num_points_to_load, file_num_points + ))); + } + + if file_dim != self.configuration.dim { + return Err(ANNError::log_index_error(format!( + "ERROR: Driver requests loading {} dimension, but file has {} dimension.", + self.configuration.dim, file_dim + ))); + } + + if self.configuration.use_pq_dist { + // TODO: PQ + todo!("PQ is not supported now"); + } + + if self.configuration.index_write_parameter.num_threads > 0 { + set_rayon_num_threads(self.configuration.index_write_parameter.num_threads); + } + + self.dataset.build_from_file(filename, num_points_to_load)?; + + println!("Using only first {} from file.", num_points_to_load); + + // TODO: tag_lock + + self.num_active_pts = num_points_to_load; + self.build_with_data_populated()?; + + Ok(()) + } + + fn insert(&mut self, filename: &str, num_points_to_insert: usize) -> ANNResult<()> { + // fresh-diskANN + if !file_exists(filename) { + return Err(ANNError::log_index_error(format!( + "ERROR: Data file {} does not exist.", + filename + ))); + } + + let (file_num_points, file_dim) = load_metadata_from_file(filename)?; + + if num_points_to_insert > file_num_points { + return Err(ANNError::log_index_error(format!( + "ERROR: Driver requests loading {} points and file has only {} points.", + num_points_to_insert, file_num_points + ))); + } + + if file_dim != self.configuration.dim { + return Err(ANNError::log_index_error(format!( + "ERROR: Driver requests loading {} dimension, but file has {} dimension.", + self.configuration.dim, file_dim + ))); + } + + if self.configuration.use_pq_dist { + // TODO: PQ + todo!("PQ is not supported now"); + } + + if self.query_scratch_queue.size()? == 0 { + self.initialize_query_scratch( + 5 + self.configuration.index_write_parameter.num_threads, + self.configuration.index_write_parameter.search_list_size, + )?; + } + + if self.configuration.index_write_parameter.num_threads > 0 { + // set the thread count of Rayon, otherwise it will use threads as many as logical cores. + std::env::set_var( + "RAYON_NUM_THREADS", + self.configuration + .index_write_parameter + .num_threads + .to_string(), + ); + } + + self.dataset + .append_from_file(filename, num_points_to_insert)?; + self.final_graph.extend( + num_points_to_insert, + self.configuration.index_write_parameter.max_degree, + ); + + // TODO: this should not consider frozen points + let previous_last_pt = self.num_active_pts; + self.num_active_pts += num_points_to_insert; + self.configuration.max_points += num_points_to_insert; + + println!("Inserting {} vectors from file.", num_points_to_insert); + + // TODO: tag_lock + let logger = IndexLogger::new(num_points_to_insert); + let timer = Timer::new(); + execute_with_rayon( + previous_last_pt..self.num_active_pts, + self.configuration.index_write_parameter.num_threads, + |idx| { + self.insert_vertex_id(idx as u32)?; + logger.vertex_processed()?; + + Ok(()) + }, + )?; + + let mut visit_order = + Vec::with_capacity(self.num_active_pts + self.configuration.num_frozen_pts); + for i in 0..self.num_active_pts { + visit_order.push(i as u32); + } + + self.cleanup_graph(&visit_order)?; + println!("{}", timer.elapsed_seconds_for_step("Insert time: ")); + + self.print_stats()?; + + Ok(()) + } + + fn save(&mut self, filename: &str) -> ANNResult<()> { + let data_file = filename.to_string() + ".data"; + let delete_file = filename.to_string() + ".delete"; + + self.save_graph(filename)?; + self.save_data(data_file.as_str())?; + self.save_delete_list(delete_file.as_str())?; + + Ok(()) + } + + fn load(&mut self, filename: &str, expected_num_points: usize) -> ANNResult<()> { + self.num_active_pts = expected_num_points; + self.dataset + .build_from_file(&format!("{}.data", filename), expected_num_points)?; + + self.load_graph(filename, expected_num_points)?; + self.load_delete_list(&format!("{}.delete", filename))?; + + if self.query_scratch_queue.size()? == 0 { + self.initialize_query_scratch( + 5 + self.configuration.index_write_parameter.num_threads, + self.configuration.index_write_parameter.search_list_size, + )?; + } + + Ok(()) + } + + fn search( + &self, + query: &[T], + k_value: usize, + l_value: u32, + indices: &mut [u32], + ) -> ANNResult { + let query_vector = Vertex::new(<&[T; N]>::try_from(query)?, 0); + InmemIndex::search(self, &query_vector, k_value, l_value, indices) + } + + fn soft_delete( + &mut self, + vertex_ids_to_delete: Vec, + num_points_to_delete: usize, + ) -> ANNResult<()> { + println!("Deleting {} vectors from file.", num_points_to_delete); + + let logger = IndexLogger::new(num_points_to_delete); + let timer = Timer::new(); + + execute_with_rayon( + 0..num_points_to_delete, + self.configuration.index_write_parameter.num_threads, + |idx: usize| { + self.soft_delete_vertex(vertex_ids_to_delete[idx])?; + logger.vertex_processed()?; + + Ok(()) + }, + )?; + + println!("{}", timer.elapsed_seconds_for_step("Delete time: ")); + self.print_stats()?; + + Ok(()) + } +} + +#[cfg(test)] +mod index_test { + use vector::Metric; + + use super::*; + use crate::{ + model::{ + configuration::index_write_parameters::IndexWriteParametersBuilder, vertex::DIM_128, + }, + test_utils::get_test_file_path, + utils::file_util::load_ids_to_delete_from_file, + utils::round_up, + }; + + const TEST_DATA_FILE: &str = "tests/data/siftsmall_learn_256pts.fbin"; + const TRUTH_GRAPH: &str = "tests/data/truth_index_siftsmall_learn_256pts_R4_L50_A1.2"; + const TEST_DELETE_FILE: &str = "tests/data/delete_set_50pts.bin"; + const TRUTH_GRAPH_WITH_SATURATED: &str = + "tests/data/disk_index_siftsmall_learn_256pts_R4_L50_A1.2_mem.index"; + const R: u32 = 4; + const L: u32 = 50; + const ALPHA: f32 = 1.2; + + /// Build the index with TEST_DATA_FILE and compare the index graph with truth graph TRUTH_GRAPH + /// Change above constants if you want to test with different dataset + macro_rules! index_end_to_end_test_singlethread { + ($saturate_graph:expr, $truth_graph:expr) => {{ + let (data_num, dim) = + load_metadata_from_file(get_test_file_path(TEST_DATA_FILE).as_str()).unwrap(); + + let index_write_parameters = IndexWriteParametersBuilder::new(L, R) + .with_alpha(ALPHA) + .with_num_threads(1) + .with_saturate_graph($saturate_graph) + .build(); + let config = IndexConfiguration::new( + Metric::L2, + dim, + round_up(dim as u64, 16_u64) as usize, + data_num, + false, + 0, + false, + 0, + 1.0f32, + index_write_parameters, + ); + let mut index: InmemIndex = InmemIndex::new(config.clone()).unwrap(); + + index + .build(get_test_file_path(TEST_DATA_FILE).as_str(), data_num) + .unwrap(); + + let mut truth_index: InmemIndex = InmemIndex::new(config).unwrap(); + truth_index + .load_graph(get_test_file_path($truth_graph).as_str(), data_num) + .unwrap(); + + compare_graphs(&index, &truth_index); + }}; + } + + #[test] + fn index_end_to_end_test_singlethread() { + index_end_to_end_test_singlethread!(false, TRUTH_GRAPH); + } + + #[test] + fn index_end_to_end_test_singlethread_with_saturate_graph() { + index_end_to_end_test_singlethread!(true, TRUTH_GRAPH_WITH_SATURATED); + } + + #[test] + fn index_end_to_end_test_multithread() { + let (data_num, dim) = + load_metadata_from_file(get_test_file_path(TEST_DATA_FILE).as_str()).unwrap(); + + let index_write_parameters = IndexWriteParametersBuilder::new(L, R) + .with_alpha(ALPHA) + .with_num_threads(8) + .build(); + let config = IndexConfiguration::new( + Metric::L2, + dim, + round_up(dim as u64, 16_u64) as usize, + data_num, + false, + 0, + false, + 0, + 1f32, + index_write_parameters, + ); + let mut index: InmemIndex = InmemIndex::new(config).unwrap(); + + index + .build(get_test_file_path(TEST_DATA_FILE).as_str(), data_num) + .unwrap(); + + for i in 0..index.final_graph.size() { + assert_ne!( + index + .final_graph + .read_vertex_and_neighbors(i as u32) + .unwrap() + .size(), + 0 + ); + } + } + + const TEST_DATA_FILE_2: &str = "tests/data/siftsmall_learn_256pts_2.fbin"; + const INSERT_TRUTH_GRAPH: &str = + "tests/data/truth_index_siftsmall_learn_256pts_1+2_R4_L50_A1.2"; + const INSERT_TRUTH_GRAPH_WITH_SATURATED: &str = + "tests/data/truth_index_siftsmall_learn_256pts_1+2_saturated_R4_L50_A1.2"; + + /// Build the index with TEST_DATA_FILE, insert TEST_DATA_FILE_2 and compare the index graph with truth graph TRUTH_GRAPH + /// Change above constants if you want to test with different dataset + macro_rules! index_insert_end_to_end_test_singlethread { + ($saturate_graph:expr, $truth_graph:expr) => {{ + let (data_num, dim) = + load_metadata_from_file(get_test_file_path(TEST_DATA_FILE).as_str()).unwrap(); + + let index_write_parameters = IndexWriteParametersBuilder::new(L, R) + .with_alpha(ALPHA) + .with_num_threads(1) + .with_saturate_graph($saturate_graph) + .build(); + let config = IndexConfiguration::new( + Metric::L2, + dim, + round_up(dim as u64, 16_u64) as usize, + data_num, + false, + 0, + false, + 0, + 2.0f32, + index_write_parameters, + ); + let mut index: InmemIndex = InmemIndex::new(config.clone()).unwrap(); + + index + .build(get_test_file_path(TEST_DATA_FILE).as_str(), data_num) + .unwrap(); + index + .insert(get_test_file_path(TEST_DATA_FILE_2).as_str(), data_num) + .unwrap(); + + let config2 = IndexConfiguration::new( + Metric::L2, + dim, + round_up(dim as u64, 16_u64) as usize, + data_num * 2, + false, + 0, + false, + 0, + 1.0f32, + index_write_parameters, + ); + let mut truth_index: InmemIndex = InmemIndex::new(config2).unwrap(); + truth_index + .load_graph(get_test_file_path($truth_graph).as_str(), data_num) + .unwrap(); + + compare_graphs(&index, &truth_index); + }}; + } + + /// Build the index with TEST_DATA_FILE, and delete the vertices with id defined in TEST_DELETE_SET + macro_rules! index_delete_end_to_end_test_singlethread { + () => {{ + let (data_num, dim) = + load_metadata_from_file(get_test_file_path(TEST_DATA_FILE).as_str()).unwrap(); + + let index_write_parameters = IndexWriteParametersBuilder::new(L, R) + .with_alpha(ALPHA) + .with_num_threads(1) + .build(); + let config = IndexConfiguration::new( + Metric::L2, + dim, + round_up(dim as u64, 16_u64) as usize, + data_num, + false, + 0, + false, + 0, + 2.0f32, + index_write_parameters, + ); + let mut index: InmemIndex = InmemIndex::new(config.clone()).unwrap(); + + index + .build(get_test_file_path(TEST_DATA_FILE).as_str(), data_num) + .unwrap(); + + let (num_points_to_delete, vertex_ids_to_delete) = + load_ids_to_delete_from_file(TEST_DELETE_FILE).unwrap(); + index + .soft_delete(vertex_ids_to_delete, num_points_to_delete) + .unwrap(); + assert!(index.delete_set.read().unwrap().len() == num_points_to_delete); + }}; + } + + #[test] + fn index_insert_end_to_end_test_singlethread() { + index_insert_end_to_end_test_singlethread!(false, INSERT_TRUTH_GRAPH); + } + + #[test] + fn index_delete_end_to_end_test_singlethread() { + index_delete_end_to_end_test_singlethread!(); + } + + #[test] + fn index_insert_end_to_end_test_saturated_singlethread() { + index_insert_end_to_end_test_singlethread!(true, INSERT_TRUTH_GRAPH_WITH_SATURATED); + } + + fn compare_graphs(index: &InmemIndex, truth_index: &InmemIndex) { + assert_eq!(index.start, truth_index.start); + assert_eq!(index.max_observed_degree, truth_index.max_observed_degree); + assert_eq!(index.final_graph.size(), truth_index.final_graph.size()); + + for i in 0..index.final_graph.size() { + assert_eq!( + index + .final_graph + .read_vertex_and_neighbors(i as u32) + .unwrap() + .size(), + truth_index + .final_graph + .read_vertex_and_neighbors(i as u32) + .unwrap() + .size() + ); + assert_eq!( + index + .final_graph + .read_vertex_and_neighbors(i as u32) + .unwrap() + .get_neighbors(), + truth_index + .final_graph + .read_vertex_and_neighbors(i as u32) + .unwrap() + .get_neighbors() + ); + } + } +} diff --git a/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/src/index/inmem_index/inmem_index_storage.rs b/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/src/index/inmem_index/inmem_index_storage.rs new file mode 100644 index 0000000..fa14d70 --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/src/index/inmem_index/inmem_index_storage.rs @@ -0,0 +1,304 @@ +/* + * Copyright (c) Microsoft Corporation. All rights reserved. + * Licensed under the MIT license. + */ +use std::fs::File; +use std::io::{BufReader, BufWriter, Seek, SeekFrom, Write}; +use std::path::Path; + +use byteorder::{LittleEndian, ReadBytesExt}; +use vector::FullPrecisionDistance; + +use crate::common::{ANNError, ANNResult}; +use crate::model::graph::AdjacencyList; +use crate::model::InMemoryGraph; +use crate::utils::{file_exists, save_data_in_base_dimensions}; + +use super::InmemIndex; + +impl InmemIndex +where + T: Default + Copy + Sync + Send + Into, + [T; N]: FullPrecisionDistance, +{ + pub fn load_graph(&mut self, filename: &str, expected_num_points: usize) -> ANNResult { + // let file_offset = 0; // will need this for single file format support + + let mut in_file = BufReader::new(File::open(Path::new(filename))?); + // in_file.seek(SeekFrom::Start(file_offset as u64))?; + + let expected_file_size: usize = in_file.read_u64::()? as usize; + self.max_observed_degree = in_file.read_u32::()?; + self.start = in_file.read_u32::()?; + let file_frozen_pts: usize = in_file.read_u64::()? as usize; + + let vamana_metadata_size = 24; + + println!("From graph header, expected_file_size: {}, max_observed_degree: {}, start: {}, file_frozen_pts: {}", + expected_file_size, self.max_observed_degree, self.start, file_frozen_pts); + + if file_frozen_pts != self.configuration.num_frozen_pts { + if file_frozen_pts == 1 { + return Err(ANNError::log_index_config_error( + "num_frozen_pts".to_string(), + "ERROR: When loading index, detected dynamic index, but constructor asks for static index. Exitting.".to_string()) + ); + } else { + return Err(ANNError::log_index_config_error( + "num_frozen_pts".to_string(), + "ERROR: When loading index, detected static index, but constructor asks for dynamic index. Exitting.".to_string()) + ); + } + } + + println!("Loading vamana graph {}...", filename); + + let expected_max_points = expected_num_points - file_frozen_pts; + + // If user provides more points than max_points + // resize the _final_graph to the larger size. + if self.configuration.max_points < expected_max_points { + println!("Number of points in data: {} is greater than max_points: {} Setting max points to: {}", expected_max_points, self.configuration.max_points, expected_max_points); + + self.configuration.max_points = expected_max_points; + self.final_graph = InMemoryGraph::new( + self.configuration.max_points + self.configuration.num_frozen_pts, + self.configuration.index_write_parameter.max_degree, + ); + } + + let mut bytes_read = vamana_metadata_size; + let mut num_edges = 0; + let mut nodes_read = 0; + let mut max_observed_degree = 0; + + while bytes_read != expected_file_size { + let num_nbrs = in_file.read_u32::()?; + max_observed_degree = if num_nbrs > max_observed_degree { + num_nbrs + } else { + max_observed_degree + }; + + if num_nbrs == 0 { + return Err(ANNError::log_index_error(format!( + "ERROR: Point found with no out-neighbors, point# {}", + nodes_read + ))); + } + + num_edges += num_nbrs; + nodes_read += 1; + let mut tmp: Vec = Vec::with_capacity(num_nbrs as usize); + for _ in 0..num_nbrs { + tmp.push(in_file.read_u32::()?); + } + + self.final_graph + .write_vertex_and_neighbors(nodes_read - 1)? + .set_neighbors(AdjacencyList::from(tmp)); + bytes_read += 4 * (num_nbrs as usize + 1); + } + + println!( + "Done. Index has {} nodes and {} out-edges, _start is set to {}", + nodes_read, num_edges, self.start + ); + + self.max_observed_degree = max_observed_degree; + Ok(nodes_read as usize) + } + + /// Save the graph index on a file as an adjacency list. + /// For each point, first store the number of neighbors, + /// and then the neighbor list (each as 4 byte u32) + pub fn save_graph(&mut self, graph_file: &str) -> ANNResult { + let file: File = File::create(graph_file)?; + let mut out = BufWriter::new(file); + + let file_offset: u64 = 0; + out.seek(SeekFrom::Start(file_offset))?; + let mut index_size: u64 = 24; + let mut max_degree: u32 = 0; + out.write_all(&index_size.to_le_bytes())?; + out.write_all(&self.max_observed_degree.to_le_bytes())?; + out.write_all(&self.start.to_le_bytes())?; + out.write_all(&(self.configuration.num_frozen_pts as u64).to_le_bytes())?; + + // At this point, either nd == max_points or any frozen points have + // been temporarily moved to nd, so nd + num_frozen_points is the valid + // location limit + for i in 0..self.num_active_pts + self.configuration.num_frozen_pts { + let idx = i as u32; + let gk: u32 = self.final_graph.read_vertex_and_neighbors(idx)?.size() as u32; + out.write_all(&gk.to_le_bytes())?; + for neighbor in self + .final_graph + .read_vertex_and_neighbors(idx)? + .get_neighbors() + .iter() + { + out.write_all(&neighbor.to_le_bytes())?; + } + max_degree = + if self.final_graph.read_vertex_and_neighbors(idx)?.size() as u32 > max_degree { + self.final_graph.read_vertex_and_neighbors(idx)?.size() as u32 + } else { + max_degree + }; + index_size += (std::mem::size_of::() * (gk as usize + 1)) as u64; + } + out.seek(SeekFrom::Start(file_offset))?; + out.write_all(&index_size.to_le_bytes())?; + out.write_all(&max_degree.to_le_bytes())?; + out.flush()?; + Ok(index_size) + } + + /// Save the data on a file. + pub fn save_data(&mut self, data_file: &str) -> ANNResult { + // Note: at this point, either _nd == _max_points or any frozen points have + // been temporarily moved to _nd, so _nd + _num_frozen_points is the valid + // location limit. + Ok(save_data_in_base_dimensions( + data_file, + &mut self.dataset.data, + self.num_active_pts + self.configuration.num_frozen_pts, + self.configuration.dim, + self.configuration.aligned_dim, + 0, + )?) + } + + /// Save the delete list to a file only if the delete list length is not zero. + pub fn save_delete_list(&mut self, delete_list_file: &str) -> ANNResult { + let mut delete_file_size = 0; + if let Ok(delete_set) = self.delete_set.read() { + let delete_set_len = delete_set.len() as u32; + + if delete_set_len != 0 { + let file: File = File::create(delete_list_file)?; + let mut writer = BufWriter::new(file); + + // Write the length of the set. + writer.write_all(&delete_set_len.to_le_bytes())?; + delete_file_size += std::mem::size_of::(); + + // Write the elements of the set. + for &item in delete_set.iter() { + writer.write_all(&item.to_be_bytes())?; + delete_file_size += std::mem::size_of::(); + } + + writer.flush()?; + } + } else { + return Err(ANNError::log_lock_poison_error( + "Poisoned lock on delete set. Can't save deleted list.".to_string(), + )); + } + + Ok(delete_file_size) + } + + // load the deleted list from the delete file if it exists. + pub fn load_delete_list(&mut self, delete_list_file: &str) -> ANNResult { + let mut len = 0; + + if file_exists(delete_list_file) { + let file = File::open(delete_list_file)?; + let mut reader = BufReader::new(file); + + len = reader.read_u32::()? as usize; + + if let Ok(mut delete_set) = self.delete_set.write() { + for _ in 0..len { + let item = reader.read_u32::()?; + delete_set.insert(item); + } + } else { + return Err(ANNError::log_lock_poison_error( + "Poisoned lock on delete set. Can't load deleted list.".to_string(), + )); + } + } + + Ok(len) + } +} + +#[cfg(test)] +mod index_test { + use std::fs; + + use vector::Metric; + + use super::*; + use crate::{ + index::ANNInmemIndex, + model::{ + configuration::index_write_parameters::IndexWriteParametersBuilder, vertex::DIM_128, + IndexConfiguration, + }, + utils::{load_metadata_from_file, round_up}, + }; + + const TEST_DATA_FILE: &str = "tests/data/siftsmall_learn_256pts.fbin"; + const R: u32 = 4; + const L: u32 = 50; + const ALPHA: f32 = 1.2; + + #[cfg_attr(not(coverage), test)] + fn save_graph_test() { + let parameters = IndexWriteParametersBuilder::new(50, 4) + .with_alpha(1.2) + .build(); + let config = + IndexConfiguration::new(Metric::L2, 10, 16, 16, false, 0, false, 8, 1f32, parameters); + let mut index = InmemIndex::::new(config).unwrap(); + let final_graph = InMemoryGraph::new(10, 3); + let num_active_pts = 2_usize; + index.final_graph = final_graph; + index.num_active_pts = num_active_pts; + let graph_file = "test_save_graph_data.bin"; + let result = index.save_graph(graph_file); + assert!(result.is_ok()); + + fs::remove_file(graph_file).expect("Failed to delete file"); + } + + #[test] + fn save_data_test() { + let (data_num, dim) = load_metadata_from_file(TEST_DATA_FILE).unwrap(); + + let index_write_parameters = IndexWriteParametersBuilder::new(L, R) + .with_alpha(ALPHA) + .build(); + let config = IndexConfiguration::new( + Metric::L2, + dim, + round_up(dim as u64, 16_u64) as usize, + data_num, + false, + 0, + false, + 0, + 1f32, + index_write_parameters, + ); + let mut index: InmemIndex = InmemIndex::new(config).unwrap(); + + index.build(TEST_DATA_FILE, data_num).unwrap(); + + let data_file = "test.data"; + let result = index.save_data(data_file); + assert_eq!( + result.unwrap(), + 2 * std::mem::size_of::() + + (index.num_active_pts + index.configuration.num_frozen_pts) + * index.configuration.dim + * (std::mem::size_of::()) + ); + fs::remove_file(data_file).expect("Failed to delete file"); + } +} diff --git a/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/src/index/inmem_index/mod.rs b/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/src/index/inmem_index/mod.rs new file mode 100644 index 0000000..f2a091a --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/src/index/inmem_index/mod.rs @@ -0,0 +1,12 @@ +/* + * Copyright (c) Microsoft Corporation. All rights reserved. + * Licensed under the MIT license. + */ +#[allow(clippy::module_inception)] +mod inmem_index; +pub use inmem_index::InmemIndex; + +mod inmem_index_storage; + +pub mod ann_inmem_index; + diff --git a/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/src/index/mod.rs b/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/src/index/mod.rs new file mode 100644 index 0000000..18c3bd5 --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/src/index/mod.rs @@ -0,0 +1,11 @@ +/* + * Copyright (c) Microsoft Corporation. All rights reserved. + * Licensed under the MIT license. + */ +mod inmem_index; +pub use inmem_index::ann_inmem_index::*; +pub use inmem_index::InmemIndex; + +mod disk_index; +pub use disk_index::*; + diff --git a/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/src/instrumentation/disk_index_build_logger.rs b/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/src/instrumentation/disk_index_build_logger.rs new file mode 100644 index 0000000..d349353 --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/src/instrumentation/disk_index_build_logger.rs @@ -0,0 +1,57 @@ +/* + * Copyright (c) Microsoft Corporation. All rights reserved. + * Licensed under the MIT license. + */ +use logger::logger::indexlog::DiskIndexConstructionCheckpoint; +use logger::logger::indexlog::DiskIndexConstructionLog; +use logger::logger::indexlog::Log; +use logger::logger::indexlog::LogLevel; +use logger::message_handler::send_log; + +use crate::{utils::Timer, common::ANNResult}; + +pub struct DiskIndexBuildLogger { + timer: Timer, + checkpoint: DiskIndexConstructionCheckpoint, +} + +impl DiskIndexBuildLogger { + pub fn new(checkpoint: DiskIndexConstructionCheckpoint) -> Self { + Self { + timer: Timer::new(), + checkpoint, + } + } + + pub fn log_checkpoint(&mut self, next_checkpoint: DiskIndexConstructionCheckpoint) -> ANNResult<()> { + if self.checkpoint == DiskIndexConstructionCheckpoint::None { + return Ok(()); + } + + let mut log = Log::default(); + let disk_index_construction_log = DiskIndexConstructionLog { + checkpoint: self.checkpoint as i32, + time_spent_in_seconds: self.timer.elapsed().as_secs_f32(), + g_cycles_spent: self.timer.elapsed_gcycles(), + log_level: LogLevel::Info as i32, + }; + log.disk_index_construction_log = Some(disk_index_construction_log); + + send_log(log)?; + self.checkpoint = next_checkpoint; + self.timer.reset(); + Ok(()) + } +} + +#[cfg(test)] +mod dataset_test { + use super::*; + + #[test] + fn test_log() { + let mut logger = DiskIndexBuildLogger::new(DiskIndexConstructionCheckpoint::PqConstruction); + logger.log_checkpoint(DiskIndexConstructionCheckpoint::InmemIndexBuild).unwrap();logger.log_checkpoint(logger::logger::indexlog::DiskIndexConstructionCheckpoint::DiskLayout).unwrap(); + } +} + diff --git a/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/src/instrumentation/index_logger.rs b/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/src/instrumentation/index_logger.rs new file mode 100644 index 0000000..dfc81ad --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/src/instrumentation/index_logger.rs @@ -0,0 +1,47 @@ +/* + * Copyright (c) Microsoft Corporation. All rights reserved. + * Licensed under the MIT license. + */ +use std::sync::atomic::{AtomicUsize, Ordering}; + +use logger::logger::indexlog::IndexConstructionLog; +use logger::logger::indexlog::Log; +use logger::logger::indexlog::LogLevel; +use logger::message_handler::send_log; + +use crate::common::ANNResult; +use crate::utils::Timer; + +pub struct IndexLogger { + items_processed: AtomicUsize, + timer: Timer, + range: usize, +} + +impl IndexLogger { + pub fn new(range: usize) -> Self { + Self { + items_processed: AtomicUsize::new(0), + timer: Timer::new(), + range, + } + } + + pub fn vertex_processed(&self) -> ANNResult<()> { + let count = self.items_processed.fetch_add(1, Ordering::Relaxed); + if count % 100_000 == 0 { + let mut log = Log::default(); + let index_construction_log = IndexConstructionLog { + percentage_complete: (100_f32 * count as f32) / (self.range as f32), + time_spent_in_seconds: self.timer.elapsed().as_secs_f32(), + g_cycles_spent: self.timer.elapsed_gcycles(), + log_level: LogLevel::Info as i32, + }; + log.index_construction_log = Some(index_construction_log); + + send_log(log)?; + } + + Ok(()) + } +} diff --git a/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/src/instrumentation/mod.rs b/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/src/instrumentation/mod.rs new file mode 100644 index 0000000..234e53c --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/src/instrumentation/mod.rs @@ -0,0 +1,9 @@ +/* + * Copyright (c) Microsoft Corporation. All rights reserved. + * Licensed under the MIT license. + */ +mod index_logger; +pub use index_logger::IndexLogger; + +mod disk_index_build_logger; +pub use disk_index_build_logger::DiskIndexBuildLogger; diff --git a/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/src/lib.rs b/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/src/lib.rs new file mode 100644 index 0000000..1f89e33 --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/src/lib.rs @@ -0,0 +1,26 @@ +/* + * Copyright (c) Microsoft Corporation. All rights reserved. + * Licensed under the MIT license. + */ +#![cfg_attr( + not(test), + warn(clippy::panic, clippy::unwrap_used, clippy::expect_used) +)] +#![cfg_attr(test, allow(clippy::unused_io_amount))] + +pub mod utils; + +pub mod algorithm; + +pub mod model; + +pub mod common; + +pub mod index; + +pub mod storage; + +pub mod instrumentation; + +#[cfg(test)] +pub mod test_utils; diff --git a/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/src/model/configuration/disk_index_build_parameter.rs b/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/src/model/configuration/disk_index_build_parameter.rs new file mode 100644 index 0000000..539192a --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/src/model/configuration/disk_index_build_parameter.rs @@ -0,0 +1,85 @@ +/* + * Copyright (c) Microsoft Corporation. All rights reserved. + * Licensed under the MIT license. + */ +#![warn(missing_debug_implementations, missing_docs)] + +//! Parameters for disk index construction. + +use crate::common::{ANNResult, ANNError}; + +/// Cached nodes size in GB +const SPACE_FOR_CACHED_NODES_IN_GB: f64 = 0.25; + +/// Threshold for caching in GB +const THRESHOLD_FOR_CACHING_IN_GB: f64 = 1.0; + +/// Parameters specific for disk index construction. +#[derive(Clone, Copy, PartialEq, Debug)] +pub struct DiskIndexBuildParameters { + /// Bound on the memory footprint of the index at search time in bytes. + /// Once built, the index will use up only the specified RAM limit, the rest will reside on disk. + /// This will dictate how aggressively we compress the data vectors to store in memory. + /// Larger will yield better performance at search time. + search_ram_limit: f64, + + /// Limit on the memory allowed for building the index in bytes. + index_build_ram_limit: f64, +} + +impl DiskIndexBuildParameters { + /// Create DiskIndexBuildParameters instance + pub fn new(search_ram_limit_gb: f64, index_build_ram_limit_gb: f64) -> ANNResult { + let param = Self { + search_ram_limit: Self::get_memory_budget(search_ram_limit_gb), + index_build_ram_limit: index_build_ram_limit_gb * 1024_f64 * 1024_f64 * 1024_f64, + }; + + if param.search_ram_limit <= 0f64 { + return Err(ANNError::log_index_config_error("search_ram_limit".to_string(), "RAM budget should be > 0".to_string())) + } + + if param.index_build_ram_limit <= 0f64 { + return Err(ANNError::log_index_config_error("index_build_ram_limit".to_string(), "RAM budget should be > 0".to_string())) + } + + Ok(param) + } + + /// Get search_ram_limit + pub fn search_ram_limit(&self) -> f64 { + self.search_ram_limit + } + + /// Get index_build_ram_limit + pub fn index_build_ram_limit(&self) -> f64 { + self.index_build_ram_limit + } + + fn get_memory_budget(mut index_ram_limit_gb: f64) -> f64 { + if index_ram_limit_gb - SPACE_FOR_CACHED_NODES_IN_GB > THRESHOLD_FOR_CACHING_IN_GB { + // slack for space used by cached nodes + index_ram_limit_gb -= SPACE_FOR_CACHED_NODES_IN_GB; + } + + index_ram_limit_gb * 1024_f64 * 1024_f64 * 1024_f64 + } +} + +#[cfg(test)] +mod dataset_test { + use super::*; + + #[test] + fn sufficient_ram_for_caching() { + let param = DiskIndexBuildParameters::new(1.26_f64, 1.0_f64).unwrap(); + assert_eq!(param.search_ram_limit, 1.01_f64 * 1024_f64 * 1024_f64 * 1024_f64); + } + + #[test] + fn insufficient_ram_for_caching() { + let param = DiskIndexBuildParameters::new(0.03_f64, 1.0_f64).unwrap(); + assert_eq!(param.search_ram_limit, 0.03_f64 * 1024_f64 * 1024_f64 * 1024_f64); + } +} + diff --git a/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/src/model/configuration/index_configuration.rs b/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/src/model/configuration/index_configuration.rs new file mode 100644 index 0000000..3e8c472 --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/src/model/configuration/index_configuration.rs @@ -0,0 +1,92 @@ +/* + * Copyright (c) Microsoft Corporation. All rights reserved. + * Licensed under the MIT license. + */ +#![warn(missing_debug_implementations, missing_docs)] + +//! Index configuration. + +use vector::Metric; + +use super::index_write_parameters::IndexWriteParameters; + +/// The index configuration +#[derive(Debug, Clone)] +pub struct IndexConfiguration { + /// Index write parameter + pub index_write_parameter: IndexWriteParameters, + + /// Distance metric + pub dist_metric: Metric, + + /// Dimension of the raw data + pub dim: usize, + + /// Aligned dimension - round up dim to the nearest multiple of 8 + pub aligned_dim: usize, + + /// Total number of points in given data set + pub max_points: usize, + + /// Number of points which are used as initial candidates when iterating to + /// closest point(s). These are not visible externally and won't be returned + /// by search. DiskANN forces at least 1 frozen point for dynamic index. + /// The frozen points have consecutive locations. + pub num_frozen_pts: usize, + + /// Calculate distance by PQ or not + pub use_pq_dist: bool, + + /// Number of PQ chunks + pub num_pq_chunks: usize, + + /// Use optimized product quantization + /// Currently not supported + pub use_opq: bool, + + /// potential for growth. 1.2 means the index can grow by up to 20%. + pub growth_potential: f32, + + // TODO: below settings are not supported in current iteration + // pub concurrent_consolidate: bool, + // pub has_built: bool, + // pub save_as_one_file: bool, + // pub dynamic_index: bool, + // pub enable_tags: bool, + // pub normalize_vecs: bool, +} + +impl IndexConfiguration { + /// Create IndexConfiguration instance + #[allow(clippy::too_many_arguments)] + pub fn new( + dist_metric: Metric, + dim: usize, + aligned_dim: usize, + max_points: usize, + use_pq_dist: bool, + num_pq_chunks: usize, + use_opq: bool, + num_frozen_pts: usize, + growth_potential: f32, + index_write_parameter: IndexWriteParameters + ) -> Self { + Self { + index_write_parameter, + dist_metric, + dim, + aligned_dim, + max_points, + num_frozen_pts, + use_pq_dist, + num_pq_chunks, + use_opq, + growth_potential, + } + } + + /// Get the size of adjacency list that we build out. + pub fn write_range(&self) -> usize { + self.index_write_parameter.max_degree as usize + } +} diff --git a/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/src/model/configuration/index_write_parameters.rs b/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/src/model/configuration/index_write_parameters.rs new file mode 100644 index 0000000..cb71f42 --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/src/model/configuration/index_write_parameters.rs @@ -0,0 +1,245 @@ +/* + * Copyright (c) Microsoft Corporation. All rights reserved. + * Licensed under the MIT license. + */ +#![warn(missing_debug_implementations, missing_docs)] + +//! Index write parameters. + +/// Default parameter values. +pub mod default_param_vals { + /// Default value of alpha. + pub const ALPHA: f32 = 1.2; + + /// Default value of number of threads. + pub const NUM_THREADS: u32 = 0; + + /// Default value of number of rounds. + pub const NUM_ROUNDS: u32 = 2; + + /// Default value of max occlusion size. + pub const MAX_OCCLUSION_SIZE: u32 = 750; + + /// Default value of filter list size. + pub const FILTER_LIST_SIZE: u32 = 0; + + /// Default value of number of frozen points. + pub const NUM_FROZEN_POINTS: u32 = 0; + + /// Default value of max degree. + pub const MAX_DEGREE: u32 = 64; + + /// Default value of build list size. + pub const BUILD_LIST_SIZE: u32 = 100; + + /// Default value of saturate graph. + pub const SATURATE_GRAPH: bool = false; + + /// Default value of search list size. + pub const SEARCH_LIST_SIZE: u32 = 100; +} + +/// Index write parameters. +#[derive(Clone, Copy, PartialEq, Debug)] +pub struct IndexWriteParameters { + /// Search list size - L. + pub search_list_size: u32, + + /// Max degree - R. + pub max_degree: u32, + + /// Saturate graph. + pub saturate_graph: bool, + + /// Max occlusion size - C. + pub max_occlusion_size: u32, + + /// Alpha. + pub alpha: f32, + + /// Number of rounds. + pub num_rounds: u32, + + /// Number of threads. + pub num_threads: u32, + + /// Number of frozen points. + pub num_frozen_points: u32, +} + +impl Default for IndexWriteParameters { + /// Create IndexWriteParameters with default values + fn default() -> Self { + Self { + search_list_size: default_param_vals::SEARCH_LIST_SIZE, + max_degree: default_param_vals::MAX_DEGREE, + saturate_graph: default_param_vals::SATURATE_GRAPH, + max_occlusion_size: default_param_vals::MAX_OCCLUSION_SIZE, + alpha: default_param_vals::ALPHA, + num_rounds: default_param_vals::NUM_ROUNDS, + num_threads: default_param_vals::NUM_THREADS, + num_frozen_points: default_param_vals::NUM_FROZEN_POINTS + } + } +} + +/// The builder for IndexWriteParameters. +#[derive(Debug)] +pub struct IndexWriteParametersBuilder { + search_list_size: u32, + max_degree: u32, + max_occlusion_size: Option, + saturate_graph: Option, + alpha: Option, + num_rounds: Option, + num_threads: Option, + // filter_list_size: Option, + num_frozen_points: Option, +} + +impl IndexWriteParametersBuilder { + /// Initialize IndexWriteParametersBuilder + pub fn new(search_list_size: u32, max_degree: u32) -> Self { + Self { + search_list_size, + max_degree, + max_occlusion_size: None, + saturate_graph: None, + alpha: None, + num_rounds: None, + num_threads: None, + // filter_list_size: None, + num_frozen_points: None, + } + } + + /// Set max occlusion size. + pub fn with_max_occlusion_size(mut self, max_occlusion_size: u32) -> Self { + self.max_occlusion_size = Some(max_occlusion_size); + self + } + + /// Set saturate graph. + pub fn with_saturate_graph(mut self, saturate_graph: bool) -> Self { + self.saturate_graph = Some(saturate_graph); + self + } + + /// Set alpha. + pub fn with_alpha(mut self, alpha: f32) -> Self { + self.alpha = Some(alpha); + self + } + + /// Set number of rounds. + pub fn with_num_rounds(mut self, num_rounds: u32) -> Self { + self.num_rounds = Some(num_rounds); + self + } + + /// Set number of threads. + pub fn with_num_threads(mut self, num_threads: u32) -> Self { + self.num_threads = Some(num_threads); + self + } + + /* + pub fn with_filter_list_size(mut self, filter_list_size: u32) -> Self { + self.filter_list_size = Some(filter_list_size); + self + } + */ + + /// Set number of frozen points. + pub fn with_num_frozen_points(mut self, num_frozen_points: u32) -> Self { + self.num_frozen_points = Some(num_frozen_points); + self + } + + /// Build IndexWriteParameters from IndexWriteParametersBuilder. + pub fn build(self) -> IndexWriteParameters { + IndexWriteParameters { + search_list_size: self.search_list_size, + max_degree: self.max_degree, + saturate_graph: self.saturate_graph.unwrap_or(default_param_vals::SATURATE_GRAPH), + max_occlusion_size: self.max_occlusion_size.unwrap_or(default_param_vals::MAX_OCCLUSION_SIZE), + alpha: self.alpha.unwrap_or(default_param_vals::ALPHA), + num_rounds: self.num_rounds.unwrap_or(default_param_vals::NUM_ROUNDS), + num_threads: self.num_threads.unwrap_or(default_param_vals::NUM_THREADS), + // filter_list_size: self.filter_list_size.unwrap_or(default_param_vals::FILTER_LIST_SIZE), + num_frozen_points: self.num_frozen_points.unwrap_or(default_param_vals::NUM_FROZEN_POINTS), + } + } +} + +/// Construct IndexWriteParametersBuilder from IndexWriteParameters. +impl From for IndexWriteParametersBuilder { + fn from(param: IndexWriteParameters) -> Self { + Self { + search_list_size: param.search_list_size, + max_degree: param.max_degree, + max_occlusion_size: Some(param.max_occlusion_size), + saturate_graph: Some(param.saturate_graph), + alpha: Some(param.alpha), + num_rounds: Some(param.num_rounds), + num_threads: Some(param.num_threads), + // filter_list_size: Some(param.filter_list_size), + num_frozen_points: Some(param.num_frozen_points), + } + } +} + +#[cfg(test)] +mod parameters_test { + use crate::model::configuration::index_write_parameters::*; + + #[test] + fn test_default_index_params() { + let wp1 = IndexWriteParameters::default(); + assert_eq!(wp1.search_list_size, default_param_vals::SEARCH_LIST_SIZE); + assert_eq!(wp1.max_degree, default_param_vals::MAX_DEGREE); + assert_eq!(wp1.saturate_graph, default_param_vals::SATURATE_GRAPH); + assert_eq!(wp1.max_occlusion_size, default_param_vals::MAX_OCCLUSION_SIZE); + assert_eq!(wp1.alpha, default_param_vals::ALPHA); + assert_eq!(wp1.num_rounds, default_param_vals::NUM_ROUNDS); + assert_eq!(wp1.num_threads, default_param_vals::NUM_THREADS); + assert_eq!(wp1.num_frozen_points, default_param_vals::NUM_FROZEN_POINTS); + } + + #[test] + fn test_index_write_parameters_builder() { + // default value + let wp1 = IndexWriteParametersBuilder::new(10, 20).build(); + assert_eq!(wp1.search_list_size, 10); + assert_eq!(wp1.max_degree, 20); + assert_eq!(wp1.saturate_graph, default_param_vals::SATURATE_GRAPH); + assert_eq!(wp1.max_occlusion_size, default_param_vals::MAX_OCCLUSION_SIZE); + assert_eq!(wp1.alpha, default_param_vals::ALPHA); + assert_eq!(wp1.num_rounds, default_param_vals::NUM_ROUNDS); + assert_eq!(wp1.num_threads, default_param_vals::NUM_THREADS); + assert_eq!(wp1.num_frozen_points, default_param_vals::NUM_FROZEN_POINTS); + + // build with custom values + let wp2 = IndexWriteParametersBuilder::new(10, 20) + .with_max_occlusion_size(30) + .with_saturate_graph(true) + .with_alpha(0.5) + .with_num_rounds(40) + .with_num_threads(50) + .with_num_frozen_points(60) + .build(); + assert_eq!(wp2.search_list_size, 10); + assert_eq!(wp2.max_degree, 20); + assert!(wp2.saturate_graph); + assert_eq!(wp2.max_occlusion_size, 30); + assert_eq!(wp2.alpha, 0.5); + assert_eq!(wp2.num_rounds, 40); + assert_eq!(wp2.num_threads, 50); + assert_eq!(wp2.num_frozen_points, 60); + + // test from + let wp3 = IndexWriteParametersBuilder::from(wp2).build(); + assert_eq!(wp3, wp2); + } +} + diff --git a/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/src/model/configuration/mod.rs b/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/src/model/configuration/mod.rs new file mode 100644 index 0000000..201f97e --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/src/model/configuration/mod.rs @@ -0,0 +1,12 @@ +/* + * Copyright (c) Microsoft Corporation. All rights reserved. + * Licensed under the MIT license. + */ +pub mod index_configuration; +pub use index_configuration::IndexConfiguration; + +pub mod index_write_parameters; +pub use index_write_parameters::*; + +pub mod disk_index_build_parameter; +pub use disk_index_build_parameter::DiskIndexBuildParameters; diff --git a/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/src/model/data_store/disk_scratch_dataset.rs b/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/src/model/data_store/disk_scratch_dataset.rs new file mode 100644 index 0000000..0d9a007 --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/src/model/data_store/disk_scratch_dataset.rs @@ -0,0 +1,76 @@ +/* + * Copyright (c) Microsoft Corporation. All rights reserved. + * Licensed under the MIT license. + */ +#![warn(missing_debug_implementations, missing_docs)] + +//! Disk scratch dataset + +use std::mem::{size_of, size_of_val}; +use std::ptr; + +use crate::common::{AlignedBoxWithSlice, ANNResult}; +use crate::model::MAX_N_CMPS; +use crate::utils::round_up; + +/// DiskScratchDataset alignment +pub const DISK_SCRATCH_DATASET_ALIGN: usize = 256; + +/// Disk scratch dataset storing fp vectors with aligned dim +#[derive(Debug)] +pub struct DiskScratchDataset +{ + /// fp vectors with aligned dim + pub data: AlignedBoxWithSlice, + + /// current index to store the next fp vector + pub cur_index: usize, +} + +impl DiskScratchDataset +{ + /// Create DiskScratchDataset instance + pub fn new() -> ANNResult { + Ok(Self { + // C++ code allocates round_up(MAX_N_CMPS * N, 256) bytes, shouldn't it be round_up(MAX_N_CMPS * N, 256) * size_of:: bytes? + data: AlignedBoxWithSlice::new( + round_up(MAX_N_CMPS * N, DISK_SCRATCH_DATASET_ALIGN), + DISK_SCRATCH_DATASET_ALIGN)?, + cur_index: 0, + }) + } + + /// memcpy from fp vector bytes (its len should be `dim * size_of::()`) to self.data + /// The dest slice is a fp vector with aligned dim + /// * fp_vector_buf's dim might not be aligned dim (N) + /// # Safety + /// Behavior is undefined if any of the following conditions are violated: + /// + /// * `fp_vector_buf`'s len must be `dim * size_of::()` bytes + /// + /// * `fp_vector_buf` must be smaller than or equal to `N * size_of::()` bytes. + /// + /// * `fp_vector_buf` and `self.data` must be nonoverlapping. + pub unsafe fn memcpy_from_fp_vector_buf(&mut self, fp_vector_buf: &[u8]) -> &[T] { + if self.cur_index == MAX_N_CMPS { + self.cur_index = 0; + } + + let aligned_dim_vector = &mut self.data[self.cur_index * N..(self.cur_index + 1) * N]; + + assert!(fp_vector_buf.len() % size_of::() == 0); + assert!(fp_vector_buf.len() <= size_of_val(aligned_dim_vector)); + + // memcpy from fp_vector_buf to aligned_dim_vector + unsafe { + ptr::copy_nonoverlapping( + fp_vector_buf.as_ptr(), + aligned_dim_vector.as_mut_ptr() as *mut u8, + fp_vector_buf.len(), + ); + } + + self.cur_index += 1; + aligned_dim_vector + } +} diff --git a/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/src/model/data_store/inmem_dataset.rs b/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/src/model/data_store/inmem_dataset.rs new file mode 100644 index 0000000..6d8b649 --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/src/model/data_store/inmem_dataset.rs @@ -0,0 +1,285 @@ +/* + * Copyright (c) Microsoft Corporation. All rights reserved. + * Licensed under the MIT license. + */ +#![warn(missing_debug_implementations, missing_docs)] + +//! In-memory Dataset + +use rayon::prelude::*; +use std::mem; +use vector::{FullPrecisionDistance, Metric}; + +use crate::common::{ANNError, ANNResult, AlignedBoxWithSlice}; +use crate::model::Vertex; +use crate::utils::copy_aligned_data_from_file; + +/// Dataset of all in-memory FP points +#[derive(Debug)] +pub struct InmemDataset +where + [T; N]: FullPrecisionDistance, +{ + /// All in-memory points + pub data: AlignedBoxWithSlice, + + /// Number of points we anticipate to have + pub num_points: usize, + + /// Number of active points i.e. existing in the graph + pub num_active_pts: usize, + + /// Capacity of the dataset + pub capacity: usize, +} + +impl<'a, T, const N: usize> InmemDataset +where + T: Default + Copy + Sync + Send + Into, + [T; N]: FullPrecisionDistance, +{ + /// Create the dataset with size num_points and growth factor. + /// growth factor=1 means no growth (provision 100% space of num_points) + /// growth factor=1.2 means provision 120% space of num_points (20% extra space) + pub fn new(num_points: usize, index_growth_factor: f32) -> ANNResult { + let capacity = (((num_points * N) as f32) * index_growth_factor) as usize; + + Ok(Self { + data: AlignedBoxWithSlice::new(capacity, mem::size_of::() * 16)?, + num_points, + num_active_pts: num_points, + capacity, + }) + } + + /// get immutable data slice + pub fn get_data(&self) -> &[T] { + &self.data + } + + /// Build the dataset from file + pub fn build_from_file(&mut self, filename: &str, num_points_to_load: usize) -> ANNResult<()> { + println!( + "Loading {} vectors from file {} into dataset...", + num_points_to_load, filename + ); + self.num_active_pts = num_points_to_load; + + copy_aligned_data_from_file(filename, self.into_dto(), 0)?; + + println!("Dataset loaded."); + Ok(()) + } + + /// Append the dataset from file + pub fn append_from_file( + &mut self, + filename: &str, + num_points_to_append: usize, + ) -> ANNResult<()> { + println!( + "Appending {} vectors from file {} into dataset...", + num_points_to_append, filename + ); + if self.num_points + num_points_to_append > self.capacity { + return Err(ANNError::log_index_error(format!( + "Cannot append {} points to dataset of capacity {}", + num_points_to_append, self.capacity + ))); + } + + let pts_offset = self.num_active_pts; + copy_aligned_data_from_file(filename, self.into_dto(), pts_offset)?; + + self.num_active_pts += num_points_to_append; + self.num_points += num_points_to_append; + + println!("Dataset appended."); + Ok(()) + } + + /// Get vertex by id + pub fn get_vertex(&'a self, id: u32) -> ANNResult> { + let start = id as usize * N; + let end = start + N; + + if end <= self.data.len() { + let val = <&[T; N]>::try_from(&self.data[start..end]).map_err(|err| { + ANNError::log_index_error(format!("Failed to get vertex {}, err={}", id, err)) + })?; + Ok(Vertex::new(val, id)) + } else { + Err(ANNError::log_index_error(format!( + "Invalid vertex id {}.", + id + ))) + } + } + + /// Get full precision distance between two nodes + pub fn get_distance(&self, id1: u32, id2: u32, metric: Metric) -> ANNResult { + let vertex1 = self.get_vertex(id1)?; + let vertex2 = self.get_vertex(id2)?; + + Ok(vertex1.compare(&vertex2, metric)) + } + + /// find out the medoid, the vertex in the dataset that is closest to the centroid + pub fn calculate_medoid_point_id(&self) -> ANNResult { + Ok(self.find_nearest_point_id(self.calculate_centroid_point()?)) + } + + /// calculate centroid, average of all vertices in the dataset + fn calculate_centroid_point(&self) -> ANNResult<[f32; N]> { + // Allocate and initialize the centroid vector + let mut center: [f32; N] = [0.0; N]; + + // Sum the data points' components + for i in 0..self.num_active_pts { + let vertex = self.get_vertex(i as u32)?; + let vertex_slice = vertex.vector(); + for j in 0..N { + center[j] += vertex_slice[j].into(); + } + } + + // Divide by the number of points to calculate the centroid + let capacity = self.num_active_pts as f32; + for item in center.iter_mut().take(N) { + *item /= capacity; + } + + Ok(center) + } + + /// find out the vertex closest to the given point + fn find_nearest_point_id(&self, point: [f32; N]) -> u32 { + // compute all to one distance + let mut distances = vec![0f32; self.num_active_pts]; + let slice = &self.data[..]; + distances.par_iter_mut().enumerate().for_each(|(i, dist)| { + let start = i * N; + for j in 0..N { + let diff: f32 = (point.as_slice()[j] - slice[start + j].into()) + * (point.as_slice()[j] - slice[start + j].into()); + *dist += diff; + } + }); + + let mut min_idx = 0; + let mut min_dist = f32::MAX; + for (i, distance) in distances.iter().enumerate().take(self.num_active_pts) { + if *distance < min_dist { + min_idx = i; + min_dist = *distance; + } + } + min_idx as u32 + } + + /// Prefetch vertex data in the memory hierarchy + /// NOTE: good efficiency when total_vec_size is integral multiple of 64 + #[inline] + pub fn prefetch_vector(&self, id: u32) { + let start = id as usize * N; + let end = start + N; + + if end <= self.data.len() { + let vec = &self.data[start..end]; + vector::prefetch_vector(vec); + } + } + + /// Convert into dto object + pub fn into_dto(&mut self) -> DatasetDto { + DatasetDto { + data: &mut self.data, + rounded_dim: N, + } + } +} + +/// Dataset dto used for other layer, such as storage +/// N is the aligned dimension +#[derive(Debug)] +pub struct DatasetDto<'a, T> { + /// data slice borrow from dataset + pub data: &'a mut [T], + + /// rounded dimension + pub rounded_dim: usize, +} + +#[cfg(test)] +mod dataset_test { + use std::fs; + + use super::*; + use crate::model::vertex::DIM_128; + + #[test] + fn get_vertex_within_range() { + let num_points = 1_000_000; + let id = 999_999; + let dataset = InmemDataset::::new(num_points, 1f32).unwrap(); + + let vertex = dataset.get_vertex(999_999).unwrap(); + + assert_eq!(vertex.vertex_id(), id); + assert_eq!(vertex.vector().len(), DIM_128); + assert_eq!(vertex.vector().as_ptr(), unsafe { + dataset.data.as_ptr().add((id as usize) * DIM_128) + }); + } + + #[test] + fn get_vertex_out_of_range() { + let num_points = 1_000_000; + let invalid_id = 1_000_000; + let dataset = InmemDataset::::new(num_points, 1f32).unwrap(); + + if dataset.get_vertex(invalid_id).is_ok() { + panic!("id ({}) should be out of range", invalid_id) + }; + } + + #[test] + fn load_data_test() { + let file_name = "dataset_test_load_data_test.bin"; + //npoints=2, dim=8, 2 vectors [1.0;8] [2.0;8] + let data: [u8; 72] = [ + 2, 0, 0, 0, 8, 0, 0, 0, 0x00, 0x00, 0x80, 0x3f, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, + 0x40, 0x40, 0x00, 0x00, 0x80, 0x40, 0x00, 0x00, 0xa0, 0x40, 0x00, 0x00, 0xc0, 0x40, + 0x00, 0x00, 0xe0, 0x40, 0x00, 0x00, 0x00, 0x41, 0x00, 0x00, 0x10, 0x41, 0x00, 0x00, + 0x20, 0x41, 0x00, 0x00, 0x30, 0x41, 0x00, 0x00, 0x40, 0x41, 0x00, 0x00, 0x50, 0x41, + 0x00, 0x00, 0x60, 0x41, 0x00, 0x00, 0x70, 0x41, 0x00, 0x00, 0x80, 0x41, + ]; + std::fs::write(file_name, data).expect("Failed to write sample file"); + + let mut dataset = InmemDataset::::new(2, 1f32).unwrap(); + + match copy_aligned_data_from_file( + file_name, + dataset.into_dto(), + 0, + ) { + Ok((npts, dim)) => { + fs::remove_file(file_name).expect("Failed to delete file"); + assert!(npts == 2); + assert!(dim == 8); + assert!(dataset.data.len() == 16); + + let first_vertex = dataset.get_vertex(0).unwrap(); + let second_vertex = dataset.get_vertex(1).unwrap(); + + assert!(*first_vertex.vector() == [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0]); + assert!(*second_vertex.vector() == [9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0]); + } + Err(e) => { + fs::remove_file(file_name).expect("Failed to delete file"); + panic!("{}", e) + } + } + } +} + diff --git a/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/src/model/data_store/mod.rs b/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/src/model/data_store/mod.rs new file mode 100644 index 0000000..4e7e683 --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/src/model/data_store/mod.rs @@ -0,0 +1,11 @@ +/* + * Copyright (c) Microsoft Corporation. All rights reserved. + * Licensed under the MIT license. + */ +#[allow(clippy::module_inception)] +mod inmem_dataset; +pub use inmem_dataset::InmemDataset; +pub use inmem_dataset::DatasetDto; + +mod disk_scratch_dataset; +pub use disk_scratch_dataset::*; diff --git a/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/src/model/graph/adjacency_list.rs b/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/src/model/graph/adjacency_list.rs new file mode 100644 index 0000000..7ad2d7d --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/src/model/graph/adjacency_list.rs @@ -0,0 +1,64 @@ +/* + * Copyright (c) Microsoft Corporation. All rights reserved. + * Licensed under the MIT license. + */ +#![warn(missing_debug_implementations, missing_docs)] + +//! Adjacency List + +use std::ops::{Deref, DerefMut}; + +#[derive(Debug, Eq, PartialEq)] +/// Represents the out neighbors of a vertex +pub struct AdjacencyList { + edges: Vec, +} + +/// In-mem index related limits +const GRAPH_SLACK_FACTOR: f32 = 1.3_f32; + +impl AdjacencyList { + /// Create AdjacencyList with capacity slack for a range. + pub fn for_range(range: usize) -> Self { + let capacity = (range as f32 * GRAPH_SLACK_FACTOR).ceil() as usize; + Self { + edges: Vec::with_capacity(capacity), + } + } + + /// Push a node to the list of neighbors for the given node. + pub fn push(&mut self, node_id: u32) { + debug_assert!(self.edges.len() < self.edges.capacity()); + self.edges.push(node_id); + } +} + +impl From> for AdjacencyList { + fn from(edges: Vec) -> Self { + Self { edges } + } +} + +impl Deref for AdjacencyList { + type Target = Vec; + + fn deref(&self) -> &Self::Target { + &self.edges + } +} + +impl DerefMut for AdjacencyList { + fn deref_mut(&mut self) -> &mut Self::Target { + &mut self.edges + } +} + +impl<'a> IntoIterator for &'a AdjacencyList { + type Item = &'a u32; + type IntoIter = std::slice::Iter<'a, u32>; + + fn into_iter(self) -> Self::IntoIter { + self.edges.iter() + } +} + diff --git a/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/src/model/graph/disk_graph.rs b/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/src/model/graph/disk_graph.rs new file mode 100644 index 0000000..49190b1 --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/src/model/graph/disk_graph.rs @@ -0,0 +1,179 @@ +/* + * Copyright (c) Microsoft Corporation. All rights reserved. + * Licensed under the MIT license. + */ +#![warn(missing_docs)] + +//! Disk graph + +use byteorder::{LittleEndian, ByteOrder}; +use vector::FullPrecisionDistance; + +use crate::common::{ANNResult, ANNError}; +use crate::model::data_store::DiskScratchDataset; +use crate::model::Vertex; +use crate::storage::DiskGraphStorage; + +use super::{VertexAndNeighbors, SectorGraph, AdjacencyList}; + +/// Disk graph +pub struct DiskGraph { + /// dim of fp vector in disk sector + dim: usize, + + /// number of nodes per sector + num_nodes_per_sector: u64, + + /// max node length in bytes + max_node_len: u64, + + /// the len of fp vector + fp_vector_len: u64, + + /// list of nodes (vertex_id) to fetch from disk + nodes_to_fetch: Vec, + + /// Sector graph + sector_graph: SectorGraph, +} + +impl<'a> DiskGraph { + /// Create DiskGraph instance + pub fn new( + dim: usize, + num_nodes_per_sector: u64, + max_node_len: u64, + fp_vector_len: u64, + beam_width: usize, + graph_storage: DiskGraphStorage, + ) -> ANNResult { + let graph = Self { + dim, + num_nodes_per_sector, + max_node_len, + fp_vector_len, + nodes_to_fetch: Vec::with_capacity(2 * beam_width), + sector_graph: SectorGraph::new(graph_storage)?, + }; + + Ok(graph) + } + + /// Add vertex_id into the list to fetch from disk + pub fn add_vertex(&mut self, id: u32) { + self.nodes_to_fetch.push(id); + } + + /// Fetch nodes from disk index + pub fn fetch_nodes(&mut self) -> ANNResult<()> { + let sectors_to_fetch: Vec = self.nodes_to_fetch.iter().map(|&id| self.node_sector_index(id)).collect(); + self.sector_graph.read_graph(§ors_to_fetch)?; + + Ok(()) + } + + /// Copy disk fp vector to DiskScratchDataset + /// Return the fp vector with aligned dim from DiskScratchDataset + pub fn copy_fp_vector_to_disk_scratch_dataset( + &self, + node_index: usize, + disk_scratch_dataset: &'a mut DiskScratchDataset + ) -> ANNResult> + where + [T; N]: FullPrecisionDistance, + { + if self.dim > N { + return Err(ANNError::log_index_error(format!( + "copy_sector_fp_to_aligned_dataset: dim {} is greater than aligned dim {}", + self.dim, N))); + } + + let fp_vector_buf = self.node_fp_vector_buf(node_index); + + // Safety condition is met here + let aligned_dim_vector = unsafe { disk_scratch_dataset.memcpy_from_fp_vector_buf(fp_vector_buf) }; + + Vertex::<'a, T, N>::try_from((aligned_dim_vector, self.nodes_to_fetch[node_index])) + .map_err(|err| ANNError::log_index_error(format!("TryFromSliceError: failed to get Vertex for disk index node, err={}", err))) + } + + /// Reset graph + pub fn reset(&mut self) { + self.nodes_to_fetch.clear(); + self.sector_graph.reset(); + } + + fn get_vertex_and_neighbors(&self, node_index: usize) -> VertexAndNeighbors { + let node_disk_buf = self.node_disk_buf(node_index); + let buf = &node_disk_buf[self.fp_vector_len as usize..]; + let num_neighbors = LittleEndian::read_u32(&buf[0..4]) as usize; + let neighbors_buf = &buf[4..4 + num_neighbors * 4]; + + let mut adjacency_list = AdjacencyList::for_range(num_neighbors); + for chunk in neighbors_buf.chunks(4) { + let neighbor_id = LittleEndian::read_u32(chunk); + adjacency_list.push(neighbor_id); + } + + VertexAndNeighbors::new(self.nodes_to_fetch[node_index], adjacency_list) + } + + #[inline] + fn node_sector_index(&self, vertex_id: u32) -> u64 { + vertex_id as u64 / self.num_nodes_per_sector + 1 + } + + #[inline] + fn node_disk_buf(&self, node_index: usize) -> &[u8] { + let vertex_id = self.nodes_to_fetch[node_index]; + + // get sector_buf where this node is located + let sector_buf = self.sector_graph.get_sector_buf(node_index); + let node_offset = (vertex_id as u64 % self.num_nodes_per_sector * self.max_node_len) as usize; + §or_buf[node_offset..node_offset + self.max_node_len as usize] + } + + #[inline] + fn node_fp_vector_buf(&self, node_index: usize) -> &[u8] { + let node_disk_buf = self.node_disk_buf(node_index); + &node_disk_buf[..self.fp_vector_len as usize] + } +} + +/// Iterator for DiskGraph +pub struct DiskGraphIntoIterator<'a> { + graph: &'a DiskGraph, + index: usize, +} + +impl<'a> IntoIterator for &'a DiskGraph +{ + type IntoIter = DiskGraphIntoIterator<'a>; + type Item = ANNResult<(usize, VertexAndNeighbors)>; + + #[inline] + fn into_iter(self) -> Self::IntoIter { + DiskGraphIntoIterator { + graph: self, + index: 0, + } + } +} + +impl<'a> Iterator for DiskGraphIntoIterator<'a> +{ + type Item = ANNResult<(usize, VertexAndNeighbors)>; + + fn next(&mut self) -> Option { + if self.index >= self.graph.nodes_to_fetch.len() { + return None; + } + + let node_index = self.index; + let vertex_and_neighbors = self.graph.get_vertex_and_neighbors(self.index); + + self.index += 1; + Some(Ok((node_index, vertex_and_neighbors))) + } +} + diff --git a/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/src/model/graph/inmem_graph.rs b/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/src/model/graph/inmem_graph.rs new file mode 100644 index 0000000..3d08db8 --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/src/model/graph/inmem_graph.rs @@ -0,0 +1,141 @@ +/* + * Copyright (c) Microsoft Corporation. All rights reserved. + * Licensed under the MIT license. + */ +#![warn(missing_debug_implementations, missing_docs)] + +//! In-memory graph + +use std::sync::{RwLock, RwLockReadGuard, RwLockWriteGuard}; + +use crate::common::ANNError; + +use super::VertexAndNeighbors; + +/// The entire graph of in-memory index +#[derive(Debug)] +pub struct InMemoryGraph { + /// The entire graph + pub final_graph: Vec>, +} + +impl InMemoryGraph { + /// Create InMemoryGraph instance + pub fn new(size: usize, max_degree: u32) -> Self { + let mut graph = Vec::with_capacity(size); + for id in 0..size { + graph.push(RwLock::new(VertexAndNeighbors::for_range( + id as u32, + max_degree as usize, + ))); + } + Self { final_graph: graph } + } + + /// Size of graph + pub fn size(&self) -> usize { + self.final_graph.len() + } + + /// Extend the graph by size vectors + pub fn extend(&mut self, size: usize, max_degree: u32) { + for id in 0..size { + self.final_graph + .push(RwLock::new(VertexAndNeighbors::for_range( + id as u32, + max_degree as usize, + ))); + } + } + + /// Get read guard of vertex_id + pub fn read_vertex_and_neighbors( + &self, + vertex_id: u32, + ) -> Result, ANNError> { + self.final_graph[vertex_id as usize].read().map_err(|err| { + ANNError::log_lock_poison_error(format!( + "PoisonError: Lock poisoned when reading final_graph for vertex_id {}, err={}", + vertex_id, err + )) + }) + } + + /// Get write guard of vertex_id + pub fn write_vertex_and_neighbors( + &self, + vertex_id: u32, + ) -> Result, ANNError> { + self.final_graph[vertex_id as usize].write().map_err(|err| { + ANNError::log_lock_poison_error(format!( + "PoisonError: Lock poisoned when writing final_graph for vertex_id {}, err={}", + vertex_id, err + )) + }) + } +} + +#[cfg(test)] +mod graph_tests { + use crate::model::{graph::AdjacencyList, GRAPH_SLACK_FACTOR}; + + use super::*; + + #[test] + fn test_new() { + let graph = InMemoryGraph::new(10, 10); + let capacity = (GRAPH_SLACK_FACTOR * 10_f64).ceil() as usize; + + assert_eq!(graph.final_graph.len(), 10); + for i in 0..10 { + let neighbor = graph.final_graph[i].read().unwrap(); + assert_eq!(neighbor.vertex_id, i as u32); + assert_eq!(neighbor.get_neighbors().capacity(), capacity); + } + } + + #[test] + fn test_size() { + let graph = InMemoryGraph::new(10, 10); + assert_eq!(graph.size(), 10); + } + + #[test] + fn test_extend() { + let mut graph = InMemoryGraph::new(10, 10); + graph.extend(10, 10); + + assert_eq!(graph.size(), 20); + + let capacity = (GRAPH_SLACK_FACTOR * 10_f64).ceil() as usize; + let mut id: u32 = 0; + + for i in 10..20 { + let neighbor = graph.final_graph[i].read().unwrap(); + assert_eq!(neighbor.vertex_id, id); + assert_eq!(neighbor.get_neighbors().capacity(), capacity); + id += 1; + } + } + + #[test] + fn test_read_vertex_and_neighbors() { + let graph = InMemoryGraph::new(10, 10); + let neighbor = graph.read_vertex_and_neighbors(0); + assert!(neighbor.is_ok()); + assert_eq!(neighbor.unwrap().vertex_id, 0); + } + + #[test] + fn test_write_vertex_and_neighbors() { + let graph = InMemoryGraph::new(10, 10); + { + let neighbor = graph.write_vertex_and_neighbors(0); + assert!(neighbor.is_ok()); + neighbor.unwrap().add_to_neighbors(10, 10); + } + + let neighbor = graph.read_vertex_and_neighbors(0).unwrap(); + assert_eq!(neighbor.get_neighbors(), &AdjacencyList::from(vec![10_u32])); + } +} diff --git a/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/src/model/graph/mod.rs b/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/src/model/graph/mod.rs new file mode 100644 index 0000000..d1457f1 --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/src/model/graph/mod.rs @@ -0,0 +1,20 @@ +/* + * Copyright (c) Microsoft Corporation. All rights reserved. + * Licensed under the MIT license. + */ +#[allow(clippy::module_inception)] +mod inmem_graph; +pub use inmem_graph::InMemoryGraph; + +pub mod vertex_and_neighbors; +pub use vertex_and_neighbors::VertexAndNeighbors; + +mod adjacency_list; +pub use adjacency_list::AdjacencyList; + +mod sector_graph; +pub use sector_graph::*; + +mod disk_graph; +pub use disk_graph::*; + diff --git a/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/src/model/graph/sector_graph.rs b/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/src/model/graph/sector_graph.rs new file mode 100644 index 0000000..e51e0bf --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/src/model/graph/sector_graph.rs @@ -0,0 +1,87 @@ +/* + * Copyright (c) Microsoft Corporation. All rights reserved. + * Licensed under the MIT license. + */ +#![warn(missing_docs)] + +//! Sector graph + +use std::ops::Deref; + +use crate::common::{AlignedBoxWithSlice, ANNResult, ANNError}; +use crate::model::{MAX_N_SECTOR_READS, SECTOR_LEN, AlignedRead}; +use crate::storage::DiskGraphStorage; + +/// Sector graph read from disk index +pub struct SectorGraph { + /// Sector bytes from disk + /// One sector has num_nodes_per_sector nodes + /// Each node's layout: {full precision vector:[T; DIM]}{num_nbrs: u32}{neighbors: [u32; num_nbrs]} + /// The fp vector is not aligned + sectors_data: AlignedBoxWithSlice, + + /// Graph storage to read sectors + graph_storage: DiskGraphStorage, + + /// Current sector index into which the next read reads data + cur_sector_idx: u64, +} + +impl SectorGraph { + /// Create SectorGraph instance + pub fn new(graph_storage: DiskGraphStorage) -> ANNResult { + Ok(Self { + sectors_data: AlignedBoxWithSlice::new(MAX_N_SECTOR_READS * SECTOR_LEN, SECTOR_LEN)?, + graph_storage, + cur_sector_idx: 0, + }) + } + + /// Reset SectorGraph + pub fn reset(&mut self) { + self.cur_sector_idx = 0; + } + + /// Read sectors into sectors_data + /// They are in the same order as sectors_to_fetch + pub fn read_graph(&mut self, sectors_to_fetch: &[u64]) -> ANNResult<()> { + let cur_sector_idx_usize: usize = self.cur_sector_idx.try_into()?; + if sectors_to_fetch.len() > MAX_N_SECTOR_READS - cur_sector_idx_usize { + return Err(ANNError::log_index_error(format!( + "Trying to read too many sectors. number of sectors to read: {}, max number of sectors can read: {}", + sectors_to_fetch.len(), + MAX_N_SECTOR_READS - cur_sector_idx_usize, + ))); + } + + let mut sector_slices = self.sectors_data.split_into_nonoverlapping_mut_slices( + cur_sector_idx_usize * SECTOR_LEN..(cur_sector_idx_usize + sectors_to_fetch.len()) * SECTOR_LEN, + SECTOR_LEN)?; + + let mut read_requests = Vec::with_capacity(sector_slices.len()); + for (local_sector_idx, slice) in sector_slices.iter_mut().enumerate() { + let sector_id = sectors_to_fetch[local_sector_idx]; + read_requests.push(AlignedRead::new(sector_id * SECTOR_LEN as u64, slice)?); + } + + self.graph_storage.read(&mut read_requests)?; + self.cur_sector_idx += sectors_to_fetch.len() as u64; + + Ok(()) + } + + /// Get sector data by local index + #[inline] + pub fn get_sector_buf(&self, local_sector_idx: usize) -> &[u8] { + &self.sectors_data[local_sector_idx * SECTOR_LEN..(local_sector_idx + 1) * SECTOR_LEN] + } +} + +impl Deref for SectorGraph { + type Target = [u8]; + + fn deref(&self) -> &Self::Target { + &self.sectors_data + } +} + diff --git a/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/src/model/graph/vertex_and_neighbors.rs b/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/src/model/graph/vertex_and_neighbors.rs new file mode 100644 index 0000000..a9fa389 --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/src/model/graph/vertex_and_neighbors.rs @@ -0,0 +1,159 @@ +/* + * Copyright (c) Microsoft Corporation. All rights reserved. + * Licensed under the MIT license. + */ +#![warn(missing_debug_implementations, missing_docs)] + +//! Vertex and its Adjacency List + +use crate::model::GRAPH_SLACK_FACTOR; + +use super::AdjacencyList; + +/// The out neighbors of vertex_id +#[derive(Debug)] +pub struct VertexAndNeighbors { + /// The id of the vertex + pub vertex_id: u32, + + /// All out neighbors (id) of vertex_id + neighbors: AdjacencyList, +} + +impl VertexAndNeighbors { + /// Create VertexAndNeighbors with id and capacity + pub fn for_range(id: u32, range: usize) -> Self { + Self { + vertex_id: id, + neighbors: AdjacencyList::for_range(range), + } + } + + /// Create VertexAndNeighbors with id and neighbors + pub fn new(vertex_id: u32, neighbors: AdjacencyList) -> Self { + Self { + vertex_id, + neighbors, + } + } + + /// Get size of neighbors + #[inline(always)] + pub fn size(&self) -> usize { + self.neighbors.len() + } + + /// Update the neighbors vector (post a pruning exercise) + #[inline(always)] + pub fn set_neighbors(&mut self, new_neighbors: AdjacencyList) { + // Replace the graph entry with the pruned neighbors + self.neighbors = new_neighbors; + } + + /// Get the neighbors + #[inline(always)] + pub fn get_neighbors(&self) -> &AdjacencyList { + &self.neighbors + } + + /// Adds a node to the list of neighbors for the given node. + /// + /// # Arguments + /// + /// * `node_id` - The ID of the node to add. + /// * `range` - The range of the graph. + /// + /// # Return + /// + /// Returns `None` if the node is already in the list of neighbors, or a `Vec` containing the updated list of neighbors if the list of neighbors is full. + pub fn add_to_neighbors(&mut self, node_id: u32, range: u32) -> Option> { + // Check if n is already in the graph entry + if self.neighbors.contains(&node_id) { + return None; + } + + let neighbor_len = self.neighbors.len(); + + // If not, check if the graph entry has enough space + if neighbor_len < (GRAPH_SLACK_FACTOR * range as f64) as usize { + // If yes, add n to the graph entry + self.neighbors.push(node_id); + return None; + } + + let mut copy_of_neighbors = Vec::with_capacity(neighbor_len + 1); + unsafe { + let dst = copy_of_neighbors.as_mut_ptr(); + std::ptr::copy_nonoverlapping(self.neighbors.as_ptr(), dst, neighbor_len); + dst.add(neighbor_len).write(node_id); + copy_of_neighbors.set_len(neighbor_len + 1); + } + + Some(copy_of_neighbors) + } +} + +#[cfg(test)] +mod vertex_and_neighbors_tests { + use crate::model::GRAPH_SLACK_FACTOR; + + use super::*; + + #[test] + fn test_set_with_capacity() { + let neighbors = VertexAndNeighbors::for_range(20, 10); + assert_eq!(neighbors.vertex_id, 20); + assert_eq!( + neighbors.neighbors.capacity(), + (10_f32 * GRAPH_SLACK_FACTOR as f32).ceil() as usize + ); + } + + #[test] + fn test_size() { + let mut neighbors = VertexAndNeighbors::for_range(20, 10); + + for i in 0..5 { + neighbors.neighbors.push(i); + } + + assert_eq!(neighbors.size(), 5); + } + + #[test] + fn test_set_neighbors() { + let mut neighbors = VertexAndNeighbors::for_range(20, 10); + let new_vec = AdjacencyList::from(vec![1, 2, 3, 4, 5]); + neighbors.set_neighbors(AdjacencyList::from(new_vec.clone())); + + assert_eq!(neighbors.neighbors, new_vec); + } + + #[test] + fn test_get_neighbors() { + let mut neighbors = VertexAndNeighbors::for_range(20, 10); + neighbors.set_neighbors(AdjacencyList::from(vec![1, 2, 3, 4, 5])); + let neighbor_ref = neighbors.get_neighbors(); + + assert!(std::ptr::eq(&neighbors.neighbors, neighbor_ref)) + } + + #[test] + fn test_add_to_neighbors() { + let mut neighbors = VertexAndNeighbors::for_range(20, 10); + + assert_eq!(neighbors.add_to_neighbors(1, 1), None); + assert_eq!(neighbors.neighbors, AdjacencyList::from(vec![1])); + + assert_eq!(neighbors.add_to_neighbors(1, 1), None); + assert_eq!(neighbors.neighbors, AdjacencyList::from(vec![1])); + + let ret = neighbors.add_to_neighbors(2, 1); + assert!(ret.is_some()); + assert_eq!(ret.unwrap(), vec![1, 2]); + assert_eq!(neighbors.neighbors, AdjacencyList::from(vec![1])); + + assert_eq!(neighbors.add_to_neighbors(2, 2), None); + assert_eq!(neighbors.neighbors, AdjacencyList::from(vec![1, 2])); + } +} diff --git a/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/src/model/mod.rs b/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/src/model/mod.rs new file mode 100644 index 0000000..a4f15ee --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/src/model/mod.rs @@ -0,0 +1,29 @@ +/* + * Copyright (c) Microsoft Corporation. All rights reserved. + * Licensed under the MIT license. + */ +pub mod neighbor; +pub use neighbor::Neighbor; +pub use neighbor::NeighborPriorityQueue; + +pub mod data_store; +pub use data_store::InmemDataset; + +pub mod graph; +pub use graph::InMemoryGraph; +pub use graph::VertexAndNeighbors; + +pub mod configuration; +pub use configuration::*; + +pub mod scratch; +pub use scratch::*; + +pub mod vertex; +pub use vertex::Vertex; + +pub mod pq; +pub use pq::*; + +pub mod windows_aligned_file_reader; +pub use windows_aligned_file_reader::*; diff --git a/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/src/model/neighbor/mod.rs b/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/src/model/neighbor/mod.rs new file mode 100644 index 0000000..cd0dbad --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/src/model/neighbor/mod.rs @@ -0,0 +1,13 @@ +/* + * Copyright (c) Microsoft Corporation. All rights reserved. + * Licensed under the MIT license. + */ +#[allow(clippy::module_inception)] +mod neighbor; +pub use neighbor::*; + +mod neighbor_priority_queue; +pub use neighbor_priority_queue::*; + +mod sorted_neighbor_vector; +pub use sorted_neighbor_vector::SortedNeighborVector; diff --git a/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/src/model/neighbor/neighbor.rs b/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/src/model/neighbor/neighbor.rs new file mode 100644 index 0000000..8c712bc --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/src/model/neighbor/neighbor.rs @@ -0,0 +1,104 @@ +/* + * Copyright (c) Microsoft Corporation. All rights reserved. + * Licensed under the MIT license. + */ +use std::cmp::Ordering; + +/// Neighbor node +#[derive(Debug, Clone, Copy)] +pub struct Neighbor { + /// The id of the node + pub id: u32, + + /// The distance from the query node to current node + pub distance: f32, + + /// Whether the current is visited or not + pub visited: bool, +} + +impl Neighbor { + /// Create the neighbor node and it has not been visited + pub fn new (id: u32, distance: f32) -> Self { + Self { + id, + distance, + visited: false + } + } +} + +impl Default for Neighbor { + fn default() -> Self { + Self { id: 0, distance: 0.0_f32, visited: false } + } +} + +impl PartialEq for Neighbor { + #[inline] + fn eq(&self, other: &Self) -> bool { + self.id == other.id + } +} + +impl Eq for Neighbor {} + +impl Ord for Neighbor { + fn cmp(&self, other: &Self) -> Ordering { + let ord = self.distance.partial_cmp(&other.distance).unwrap_or(std::cmp::Ordering::Equal); + + if ord == Ordering::Equal { + return self.id.cmp(&other.id); + } + + ord + } +} + +impl PartialOrd for Neighbor { + #[inline] + fn lt(&self, other: &Self) -> bool { + self.distance < other.distance || (self.distance == other.distance && self.id < other.id) + } + + // Reason for allowing panic = "Does not support comparing Neighbor with partial_cmp" + #[allow(clippy::panic)] + fn partial_cmp(&self, _: &Self) -> Option { + panic!("Neighbor only allows eq and lt") + } +} + +#[cfg(test)] +mod neighbor_test { + use super::*; + + #[test] + fn eq_lt_works() { + let n1 = Neighbor::new(1, 1.1); + let n2 = Neighbor::new(2, 2.0); + let n3 = Neighbor::new(1, 1.1); + + assert!(n1 != n2); + assert!(n1 < n2); + assert!(n1 == n3); + } + + #[test] + #[should_panic] + fn gt_should_panic() { + let n1 = Neighbor::new(1, 1.1); + let n2 = Neighbor::new(2, 2.0); + + assert!(n2 > n1); + } + + #[test] + #[should_panic] + fn le_should_panic() { + let n1 = Neighbor::new(1, 1.1); + let n2 = Neighbor::new(2, 2.0); + + assert!(n1 <= n2); + } +} + diff --git a/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/src/model/neighbor/neighbor_priority_queue.rs b/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/src/model/neighbor/neighbor_priority_queue.rs new file mode 100644 index 0000000..81b1610 --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/src/model/neighbor/neighbor_priority_queue.rs @@ -0,0 +1,241 @@ +/* + * Copyright (c) Microsoft Corporation. All rights reserved. + * Licensed under the MIT license. + */ +use crate::model::Neighbor; + +/// Neighbor priority Queue based on the distance to the query node +#[derive(Debug)] +pub struct NeighborPriorityQueue { + /// The size of the priority queue + size: usize, + + /// The capacity of the priority queue + capacity: usize, + + /// The current notvisited neighbor whose distance is smallest among all notvisited neighbor + cur: usize, + + /// The neighbor collection + data: Vec, +} + +impl Default for NeighborPriorityQueue { + fn default() -> Self { + Self::new() + } +} + +impl NeighborPriorityQueue { + /// Create NeighborPriorityQueue without capacity + pub fn new() -> Self { + Self { + size: 0, + capacity: 0, + cur: 0, + data: Vec::new(), + } + } + + /// Create NeighborPriorityQueue with capacity + pub fn with_capacity(capacity: usize) -> Self { + Self { + size: 0, + capacity, + cur: 0, + data: vec![Neighbor::default(); capacity + 1], + } + } + + /// Inserts item with order. + /// The item will be dropped if queue is full / already exist in queue / it has a greater distance than the last item. + /// The set cursor that is used to pop() the next item will be set to the lowest index of an uncheck item. + pub fn insert(&mut self, nbr: Neighbor) { + if self.size == self.capacity && self.get_at(self.size - 1) < &nbr { + return; + } + + let mut lo = 0; + let mut hi = self.size; + while lo < hi { + let mid = (lo + hi) >> 1; + if &nbr < self.get_at(mid) { + hi = mid; + } else if self.get_at(mid).id == nbr.id { + // Make sure the same neighbor isn't inserted into the set + return; + } else { + lo = mid + 1; + } + } + + if lo < self.capacity { + self.data.copy_within(lo..self.size, lo + 1); + } + self.data[lo] = Neighbor::new(nbr.id, nbr.distance); + if self.size < self.capacity { + self.size += 1; + } + if lo < self.cur { + self.cur = lo; + } + } + + /// Get the neighbor at index - SAFETY: index must be less than size + fn get_at(&self, index: usize) -> &Neighbor { + unsafe { self.data.get_unchecked(index) } + } + + /// Get the closest and notvisited neighbor + pub fn closest_notvisited(&mut self) -> Neighbor { + self.data[self.cur].visited = true; + let pre = self.cur; + while self.cur < self.size && self.get_at(self.cur).visited { + self.cur += 1; + } + self.data[pre] + } + + /// Whether there is notvisited node or not + pub fn has_notvisited_node(&self) -> bool { + self.cur < self.size + } + + /// Get the size of the NeighborPriorityQueue + pub fn size(&self) -> usize { + self.size + } + + /// Get the capacity of the NeighborPriorityQueue + pub fn capacity(&self) -> usize { + self.capacity + } + + /// Sets an artificial capacity of the NeighborPriorityQueue. For benchmarking purposes only. + pub fn set_capacity(&mut self, capacity: usize) { + if capacity < self.data.len() { + self.capacity = capacity; + } + } + + /// Reserve capacity + pub fn reserve(&mut self, capacity: usize) { + if capacity > self.capacity { + self.data.resize(capacity + 1, Neighbor::default()); + self.capacity = capacity; + } + } + + /// Set size and cur to 0 + pub fn clear(&mut self) { + self.size = 0; + self.cur = 0; + } +} + +impl std::ops::Index for NeighborPriorityQueue { + type Output = Neighbor; + + fn index(&self, i: usize) -> &Self::Output { + &self.data[i] + } +} + +#[cfg(test)] +mod neighbor_priority_queue_test { + use super::*; + + #[test] + fn test_reserve_capacity() { + let mut queue = NeighborPriorityQueue::with_capacity(10); + assert_eq!(queue.capacity(), 10); + queue.reserve(20); + assert_eq!(queue.capacity(), 20); + } + + #[test] + fn test_insert() { + let mut queue = NeighborPriorityQueue::with_capacity(3); + assert_eq!(queue.size(), 0); + queue.insert(Neighbor::new(1, 1.0)); + queue.insert(Neighbor::new(2, 0.5)); + assert_eq!(queue.size(), 2); + queue.insert(Neighbor::new(2, 0.5)); // should be ignored as the same neighbor + assert_eq!(queue.size(), 2); + queue.insert(Neighbor::new(3, 0.9)); + assert_eq!(queue.size(), 3); + assert_eq!(queue[2].id, 1); + queue.insert(Neighbor::new(4, 2.0)); // should be dropped as queue is full and distance is greater than last item + assert_eq!(queue.size(), 3); + assert_eq!(queue[0].id, 2); // node id in queue should be [2,3,1] + assert_eq!(queue[1].id, 3); + assert_eq!(queue[2].id, 1); + println!("{:?}", queue); + } + + #[test] + fn test_index() { + let mut queue = NeighborPriorityQueue::with_capacity(3); + queue.insert(Neighbor::new(1, 1.0)); + queue.insert(Neighbor::new(2, 0.5)); + queue.insert(Neighbor::new(3, 1.5)); + assert_eq!(queue[0].id, 2); + assert_eq!(queue[0].distance, 0.5); + } + + #[test] + fn test_visit() { + let mut queue = NeighborPriorityQueue::with_capacity(3); + queue.insert(Neighbor::new(1, 1.0)); + queue.insert(Neighbor::new(2, 0.5)); + queue.insert(Neighbor::new(3, 1.5)); // node id in queue should be [2,1,3] + assert!(queue.has_notvisited_node()); + let nbr = queue.closest_notvisited(); + assert_eq!(nbr.id, 2); + assert_eq!(nbr.distance, 0.5); + assert!(nbr.visited); + assert!(queue.has_notvisited_node()); + let nbr = queue.closest_notvisited(); + assert_eq!(nbr.id, 1); + assert_eq!(nbr.distance, 1.0); + assert!(nbr.visited); + assert!(queue.has_notvisited_node()); + let nbr = queue.closest_notvisited(); + assert_eq!(nbr.id, 3); + assert_eq!(nbr.distance, 1.5); + assert!(nbr.visited); + assert!(!queue.has_notvisited_node()); + } + + #[test] + fn test_clear_queue() { + let mut queue = NeighborPriorityQueue::with_capacity(3); + queue.insert(Neighbor::new(1, 1.0)); + queue.insert(Neighbor::new(2, 0.5)); + assert_eq!(queue.size(), 2); + assert!(queue.has_notvisited_node()); + queue.clear(); + assert_eq!(queue.size(), 0); + assert!(!queue.has_notvisited_node()); + } + + #[test] + fn test_reserve() { + let mut queue = NeighborPriorityQueue::new(); + queue.reserve(10); + assert_eq!(queue.data.len(), 11); + assert_eq!(queue.capacity, 10); + } + + #[test] + fn test_set_capacity() { + let mut queue = NeighborPriorityQueue::with_capacity(10); + queue.set_capacity(5); + assert_eq!(queue.capacity, 5); + assert_eq!(queue.data.len(), 11); + + queue.set_capacity(11); + assert_eq!(queue.capacity, 5); + } +} + diff --git a/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/src/model/neighbor/sorted_neighbor_vector.rs b/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/src/model/neighbor/sorted_neighbor_vector.rs new file mode 100644 index 0000000..4c3eff0 --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/src/model/neighbor/sorted_neighbor_vector.rs @@ -0,0 +1,37 @@ +/* + * Copyright (c) Microsoft Corporation. All rights reserved. + * Licensed under the MIT license. + */ +#![warn(missing_debug_implementations, missing_docs)] + +//! Sorted Neighbor Vector + +use std::ops::{Deref, DerefMut}; + +use super::Neighbor; + +/// A newtype on top of vector of neighbors, is sorted by distance +#[derive(Debug)] +pub struct SortedNeighborVector<'a>(&'a mut Vec); + +impl<'a> SortedNeighborVector<'a> { + /// Create a new SortedNeighborVector + pub fn new(vec: &'a mut Vec) -> Self { + vec.sort_unstable(); + Self(vec) + } +} + +impl<'a> Deref for SortedNeighborVector<'a> { + type Target = Vec; + + fn deref(&self) -> &Self::Target { + self.0 + } +} + +impl<'a> DerefMut for SortedNeighborVector<'a> { + fn deref_mut(&mut self) -> &mut Self::Target { + self.0 + } +} diff --git a/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/src/model/pq/fixed_chunk_pq_table.rs b/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/src/model/pq/fixed_chunk_pq_table.rs new file mode 100644 index 0000000..bfedcae --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/src/model/pq/fixed_chunk_pq_table.rs @@ -0,0 +1,483 @@ +/* + * Copyright (c) Microsoft Corporation. All rights reserved. + * Licensed under the MIT license. + */ +#![warn(missing_debug_implementations)] + +use hashbrown::HashMap; +use rayon::prelude::{ + IndexedParallelIterator, IntoParallelRefMutIterator, ParallelIterator, ParallelSliceMut, +}; +use std::arch::x86_64::{_mm_prefetch, _MM_HINT_T0}; + +use crate::{ + common::{ANNError, ANNResult}, + model::NUM_PQ_CENTROIDS, +}; + +/// PQ Pivot table loading and calculate distance +#[derive(Debug)] +pub struct FixedChunkPQTable { + /// pq_tables = float array of size [256 * ndims] + pq_table: Vec, + + /// ndims = true dimension of vectors + dim: usize, + + /// num_pq_chunks = the pq chunk number + num_pq_chunks: usize, + + /// chunk_offsets = the offset of each chunk, start from 0 + chunk_offsets: Vec, + + /// centroid of each dimension + centroids: Vec, + + /// Becasue we're using L2 distance, this is no needed now. + /// Transport of pq_table. transport_pq_table = float array of size [ndims * 256]. + /// e.g. if pa_table is 2 centroids * 3 dims + /// [ 1, 2, 3, + /// 4, 5, 6] + /// then transport_pq_table would be 3 dims * 2 centroids + /// [ 1, 4, + /// 2, 5, + /// 3, 6] + /// transport_pq_table: Vec, + + /// Map dim offset to chunk index e.g., 8 dims in to 2 chunks + /// then would be [(0,0), (1,0), (2,0), (3,0), (4,1), (5,1), (6,1), (7,1)] + dimoffset_chunk_mapping: HashMap, +} + +impl FixedChunkPQTable { + /// Create the FixedChunkPQTable with dim and chunk numbers and pivot file data (pivot table + cenroids + chunk offsets) + pub fn new( + dim: usize, + num_pq_chunks: usize, + pq_table: Vec, + centroids: Vec, + chunk_offsets: Vec, + ) -> Self { + let mut dimoffset_chunk_mapping = HashMap::new(); + for chunk_index in 0..num_pq_chunks { + for dim_offset in chunk_offsets[chunk_index]..chunk_offsets[chunk_index + 1] { + dimoffset_chunk_mapping.insert(dim_offset, chunk_index); + } + } + + Self { + pq_table, + dim, + num_pq_chunks, + chunk_offsets, + centroids, + dimoffset_chunk_mapping, + } + } + + /// Get chunk number + pub fn get_num_chunks(&self) -> usize { + self.num_pq_chunks + } + + /// Shifting the query according to mean or the whole corpus + pub fn preprocess_query(&self, query_vec: &mut [f32]) { + for (query, ¢roid) in query_vec.iter_mut().zip(self.centroids.iter()) { + *query -= centroid; + } + } + + /// Pre-calculated the distance between query and each centroid by l2 distance + /// * `query_vec` - query vector: 1 * dim + /// * `dist_vec` - pre-calculated the distance between query and each centroid: chunk_size * num_centroids + #[allow(clippy::needless_range_loop)] + pub fn populate_chunk_distances(&self, query_vec: &[f32]) -> Vec { + let mut dist_vec = vec![0.0; self.num_pq_chunks * NUM_PQ_CENTROIDS]; + for centroid_index in 0..NUM_PQ_CENTROIDS { + for chunk_index in 0..self.num_pq_chunks { + for dim_offset in + self.chunk_offsets[chunk_index]..self.chunk_offsets[chunk_index + 1] + { + let diff: f32 = self.pq_table[self.dim * centroid_index + dim_offset] + - query_vec[dim_offset]; + dist_vec[chunk_index * NUM_PQ_CENTROIDS + centroid_index] += diff * diff; + } + } + } + dist_vec + } + + /// Pre-calculated the distance between query and each centroid by inner product + /// * `query_vec` - query vector: 1 * dim + /// * `dist_vec` - pre-calculated the distance between query and each centroid: chunk_size * num_centroids + /// + /// Reason to allow clippy::needless_range_loop: + /// The inner loop is operating over a range that is different for each iteration of the outer loop. + /// This isn't a scenario where using iter().enumerate() would be easily applicable, + /// because the inner loop isn't iterating directly over the contents of a slice or array. + /// Thus, using indexing might be the most straightforward way to express this logic. + #[allow(clippy::needless_range_loop)] + pub fn populate_chunk_inner_products(&self, query_vec: &[f32]) -> Vec { + let mut dist_vec = vec![0.0; self.num_pq_chunks * NUM_PQ_CENTROIDS]; + for centroid_index in 0..NUM_PQ_CENTROIDS { + for chunk_index in 0..self.num_pq_chunks { + for dim_offset in + self.chunk_offsets[chunk_index]..self.chunk_offsets[chunk_index + 1] + { + // assumes that we are not shifting the vectors to mean zero, i.e., centroid + // array should be all zeros returning negative to keep the search code + // clean (max inner product vs min distance) + let diff: f32 = self.pq_table[self.dim * centroid_index + dim_offset] + * query_vec[dim_offset]; + dist_vec[chunk_index * NUM_PQ_CENTROIDS + centroid_index] -= diff; + } + } + } + dist_vec + } + + /// Calculate the distance between query and given centroid by l2 distance + /// * `query_vec` - query vector: 1 * dim + /// * `base_vec` - given centroid array: 1 * num_pq_chunks + #[allow(clippy::needless_range_loop)] + pub fn l2_distance(&self, query_vec: &[f32], base_vec: &[u8]) -> f32 { + let mut res_vec: Vec = vec![0.0; self.num_pq_chunks]; + res_vec + .par_iter_mut() + .enumerate() + .for_each(|(chunk_index, chunk_diff)| { + for dim_offset in + self.chunk_offsets[chunk_index]..self.chunk_offsets[chunk_index + 1] + { + let diff = self.pq_table + [self.dim * base_vec[chunk_index] as usize + dim_offset] + - query_vec[dim_offset]; + *chunk_diff += diff * diff; + } + }); + + let res: f32 = res_vec.iter().sum::(); + + res + } + + /// Calculate the distance between query and given centroid by inner product + /// * `query_vec` - query vector: 1 * dim + /// * `base_vec` - given centroid array: 1 * num_pq_chunks + #[allow(clippy::needless_range_loop)] + pub fn inner_product(&self, query_vec: &[f32], base_vec: &[u8]) -> f32 { + let mut res_vec: Vec = vec![0.0; self.num_pq_chunks]; + res_vec + .par_iter_mut() + .enumerate() + .for_each(|(chunk_index, chunk_diff)| { + for dim_offset in + self.chunk_offsets[chunk_index]..self.chunk_offsets[chunk_index + 1] + { + *chunk_diff += self.pq_table + [self.dim * base_vec[chunk_index] as usize + dim_offset] + * query_vec[dim_offset]; + } + }); + + let res: f32 = res_vec.iter().sum::(); + + // returns negative value to simulate distances (max -> min conversion) + -res + } + + /// Revert vector by adding centroid + /// * `base_vec` - given centroid array: 1 * num_pq_chunks + /// * `out_vec` - reverted vector + pub fn inflate_vector(&self, base_vec: &[u8]) -> ANNResult> { + let mut out_vec: Vec = vec![0.0; self.dim]; + for (dim_offset, value) in out_vec.iter_mut().enumerate() { + let chunk_index = + self.dimoffset_chunk_mapping + .get(&dim_offset) + .ok_or(ANNError::log_pq_error( + "ERROR: dim_offset not found in dimoffset_chunk_mapping".to_string(), + ))?; + *value = self.pq_table[self.dim * base_vec[*chunk_index] as usize + dim_offset] + + self.centroids[dim_offset]; + } + + Ok(out_vec) + } +} + +/// Given a batch input nodes, return a batch of PQ distance +/// * `pq_ids` - batch nodes: n_pts * pq_nchunks +/// * `n_pts` - batch number +/// * `pq_nchunks` - pq chunk number number +/// * `pq_dists` - pre-calculated the distance between query and each centroid: chunk_size * num_centroids +/// * `dists_out` - n_pts * 1 +pub fn pq_dist_lookup( + pq_ids: &[u8], + n_pts: usize, + pq_nchunks: usize, + pq_dists: &[f32], +) -> Vec { + let mut dists_out: Vec = vec![0.0; n_pts]; + unsafe { + _mm_prefetch(dists_out.as_ptr() as *const i8, _MM_HINT_T0); + _mm_prefetch(pq_ids.as_ptr() as *const i8, _MM_HINT_T0); + _mm_prefetch(pq_ids.as_ptr().add(64) as *const i8, _MM_HINT_T0); + _mm_prefetch(pq_ids.as_ptr().add(128) as *const i8, _MM_HINT_T0); + } + for chunk in 0..pq_nchunks { + let chunk_dists = &pq_dists[256 * chunk..]; + if chunk < pq_nchunks - 1 { + unsafe { + _mm_prefetch( + chunk_dists.as_ptr().offset(256 * chunk as isize).add(256) as *const i8, + _MM_HINT_T0, + ); + } + } + dists_out + .par_iter_mut() + .enumerate() + .for_each(|(n_iter, dist)| { + let pq_centerid = pq_ids[pq_nchunks * n_iter + chunk]; + *dist += chunk_dists[pq_centerid as usize]; + }); + } + dists_out +} + +pub fn aggregate_coords(ids: &[u32], all_coords: &[u8], ndims: usize) -> Vec { + let mut out: Vec = vec![0u8; ids.len() * ndims]; + let ndim_u32 = ndims as u32; + out.par_chunks_mut(ndims) + .enumerate() + .for_each(|(index, chunk)| { + let id_compressed_pivot = &all_coords + [(ids[index] * ndim_u32) as usize..(ids[index] * ndim_u32 + ndim_u32) as usize]; + let temp_slice = + unsafe { std::slice::from_raw_parts(id_compressed_pivot.as_ptr(), ndims) }; + chunk.copy_from_slice(temp_slice); + }); + + out +} + +#[cfg(test)] +mod fixed_chunk_pq_table_test { + + use super::*; + use crate::common::{ANNError, ANNResult}; + use crate::utils::{convert_types_u32_usize, convert_types_u64_usize, file_exists, load_bin}; + + const DIM: usize = 128; + + #[test] + fn load_pivot_test() { + let pq_pivots_path: &str = "tests/data/siftsmall_learn.bin_pq_pivots.bin"; + let (dim, pq_table, centroids, chunk_offsets) = + load_pq_pivots_bin(pq_pivots_path, &1).unwrap(); + let fixed_chunk_pq_table = + FixedChunkPQTable::new(dim, 1, pq_table, centroids, chunk_offsets); + + assert_eq!(dim, DIM); + assert_eq!(fixed_chunk_pq_table.pq_table.len(), DIM * NUM_PQ_CENTROIDS); + assert_eq!(fixed_chunk_pq_table.centroids.len(), DIM); + + assert_eq!(fixed_chunk_pq_table.chunk_offsets[0], 0); + assert_eq!(fixed_chunk_pq_table.chunk_offsets[1], DIM); + assert_eq!(fixed_chunk_pq_table.chunk_offsets.len(), 2); + } + + #[test] + fn get_num_chunks_test() { + let num_chunks = 7; + let pa_table = vec![0.0; DIM * NUM_PQ_CENTROIDS]; + let centroids = vec![0.0; DIM]; + let chunk_offsets = vec![0, 7, 9, 11, 22, 34, 78, 127]; + let fixed_chunk_pq_table = + FixedChunkPQTable::new(DIM, num_chunks, pa_table, centroids, chunk_offsets); + let chunk: usize = fixed_chunk_pq_table.get_num_chunks(); + assert_eq!(chunk, num_chunks); + } + + #[test] + fn preprocess_query_test() { + let pq_pivots_path: &str = "tests/data/siftsmall_learn.bin_pq_pivots.bin"; + let (dim, pq_table, centroids, chunk_offsets) = + load_pq_pivots_bin(pq_pivots_path, &1).unwrap(); + let fixed_chunk_pq_table = + FixedChunkPQTable::new(dim, 1, pq_table, centroids, chunk_offsets); + + let mut query_vec: Vec = vec![ + 32.39f32, 78.57f32, 50.32f32, 80.46f32, 6.47f32, 69.76f32, 94.2f32, 83.36f32, 5.8f32, + 68.78f32, 42.32f32, 61.77f32, 90.26f32, 60.41f32, 3.86f32, 61.21f32, 16.6f32, 54.46f32, + 7.29f32, 54.24f32, 92.49f32, 30.18f32, 65.36f32, 99.09f32, 3.8f32, 36.4f32, 86.72f32, + 65.18f32, 29.87f32, 62.21f32, 58.32f32, 43.23f32, 94.3f32, 79.61f32, 39.67f32, + 11.18f32, 48.88f32, 38.19f32, 93.95f32, 10.46f32, 36.7f32, 14.75f32, 81.64f32, + 59.18f32, 99.03f32, 74.23f32, 1.26f32, 82.69f32, 35.7f32, 38.39f32, 46.17f32, 64.75f32, + 7.15f32, 36.55f32, 77.32f32, 18.65f32, 32.8f32, 74.84f32, 18.12f32, 20.19f32, 70.06f32, + 48.37f32, 40.18f32, 45.69f32, 88.3f32, 39.15f32, 60.97f32, 71.29f32, 61.79f32, + 47.23f32, 94.71f32, 58.04f32, 52.4f32, 34.66f32, 59.1f32, 47.11f32, 30.2f32, 58.72f32, + 74.35f32, 83.68f32, 66.8f32, 28.57f32, 29.45f32, 52.02f32, 91.95f32, 92.44f32, + 65.25f32, 38.3f32, 35.6f32, 41.67f32, 91.33f32, 76.81f32, 74.88f32, 33.17f32, 48.36f32, + 41.42f32, 23f32, 8.31f32, 81.69f32, 80.08f32, 50.55f32, 54.46f32, 23.79f32, 43.46f32, + 84.5f32, 10.42f32, 29.51f32, 19.73f32, 46.48f32, 35.01f32, 52.3f32, 66.97f32, 4.8f32, + 74.81f32, 2.82f32, 61.82f32, 25.06f32, 17.3f32, 17.29f32, 63.2f32, 64.1f32, 61.68f32, + 37.42f32, 3.39f32, 97.45f32, 5.32f32, 59.02f32, 35.6f32, + ]; + fixed_chunk_pq_table.preprocess_query(&mut query_vec); + assert_eq!(query_vec[0], 32.39f32 - fixed_chunk_pq_table.centroids[0]); + assert_eq!( + query_vec[127], + 35.6f32 - fixed_chunk_pq_table.centroids[127] + ); + } + + #[test] + fn calculate_distances_tests() { + let pq_pivots_path: &str = "tests/data/siftsmall_learn.bin_pq_pivots.bin"; + + let (dim, pq_table, centroids, chunk_offsets) = + load_pq_pivots_bin(pq_pivots_path, &1).unwrap(); + let fixed_chunk_pq_table = + FixedChunkPQTable::new(dim, 1, pq_table, centroids, chunk_offsets); + + let query_vec: Vec = vec![ + 32.39f32, 78.57f32, 50.32f32, 80.46f32, 6.47f32, 69.76f32, 94.2f32, 83.36f32, 5.8f32, + 68.78f32, 42.32f32, 61.77f32, 90.26f32, 60.41f32, 3.86f32, 61.21f32, 16.6f32, 54.46f32, + 7.29f32, 54.24f32, 92.49f32, 30.18f32, 65.36f32, 99.09f32, 3.8f32, 36.4f32, 86.72f32, + 65.18f32, 29.87f32, 62.21f32, 58.32f32, 43.23f32, 94.3f32, 79.61f32, 39.67f32, + 11.18f32, 48.88f32, 38.19f32, 93.95f32, 10.46f32, 36.7f32, 14.75f32, 81.64f32, + 59.18f32, 99.03f32, 74.23f32, 1.26f32, 82.69f32, 35.7f32, 38.39f32, 46.17f32, 64.75f32, + 7.15f32, 36.55f32, 77.32f32, 18.65f32, 32.8f32, 74.84f32, 18.12f32, 20.19f32, 70.06f32, + 48.37f32, 40.18f32, 45.69f32, 88.3f32, 39.15f32, 60.97f32, 71.29f32, 61.79f32, + 47.23f32, 94.71f32, 58.04f32, 52.4f32, 34.66f32, 59.1f32, 47.11f32, 30.2f32, 58.72f32, + 74.35f32, 83.68f32, 66.8f32, 28.57f32, 29.45f32, 52.02f32, 91.95f32, 92.44f32, + 65.25f32, 38.3f32, 35.6f32, 41.67f32, 91.33f32, 76.81f32, 74.88f32, 33.17f32, 48.36f32, + 41.42f32, 23f32, 8.31f32, 81.69f32, 80.08f32, 50.55f32, 54.46f32, 23.79f32, 43.46f32, + 84.5f32, 10.42f32, 29.51f32, 19.73f32, 46.48f32, 35.01f32, 52.3f32, 66.97f32, 4.8f32, + 74.81f32, 2.82f32, 61.82f32, 25.06f32, 17.3f32, 17.29f32, 63.2f32, 64.1f32, 61.68f32, + 37.42f32, 3.39f32, 97.45f32, 5.32f32, 59.02f32, 35.6f32, + ]; + + let dist_vec = fixed_chunk_pq_table.populate_chunk_distances(&query_vec); + assert_eq!(dist_vec.len(), 256); + + // populate_chunk_distances_test + let mut sampled_output = 0.0; + (0..DIM).for_each(|dim_offset| { + let diff = fixed_chunk_pq_table.pq_table[dim_offset] - query_vec[dim_offset]; + sampled_output += diff * diff; + }); + assert_eq!(sampled_output, dist_vec[0]); + + // populate_chunk_inner_products_test + let dist_vec = fixed_chunk_pq_table.populate_chunk_inner_products(&query_vec); + assert_eq!(dist_vec.len(), 256); + + let mut sampled_output = 0.0; + (0..DIM).for_each(|dim_offset| { + sampled_output -= fixed_chunk_pq_table.pq_table[dim_offset] * query_vec[dim_offset]; + }); + assert_eq!(sampled_output, dist_vec[0]); + + // l2_distance_test + let base_vec: Vec = vec![3u8]; + let dist = fixed_chunk_pq_table.l2_distance(&query_vec, &base_vec); + let mut l2_output = 0.0; + (0..DIM).for_each(|dim_offset| { + let diff = fixed_chunk_pq_table.pq_table[3 * DIM + dim_offset] - query_vec[dim_offset]; + l2_output += diff * diff; + }); + assert_eq!(l2_output, dist); + + // inner_product_test + let dist = fixed_chunk_pq_table.inner_product(&query_vec, &base_vec); + let mut l2_output = 0.0; + (0..DIM).for_each(|dim_offset| { + l2_output -= + fixed_chunk_pq_table.pq_table[3 * DIM + dim_offset] * query_vec[dim_offset]; + }); + assert_eq!(l2_output, dist); + + // inflate_vector_test + let inflate_vector = fixed_chunk_pq_table.inflate_vector(&base_vec).unwrap(); + assert_eq!(inflate_vector.len(), DIM); + assert_eq!( + inflate_vector[0], + fixed_chunk_pq_table.pq_table[3 * DIM] + fixed_chunk_pq_table.centroids[0] + ); + assert_eq!( + inflate_vector[1], + fixed_chunk_pq_table.pq_table[3 * DIM + 1] + fixed_chunk_pq_table.centroids[1] + ); + assert_eq!( + inflate_vector[127], + fixed_chunk_pq_table.pq_table[3 * DIM + 127] + fixed_chunk_pq_table.centroids[127] + ); + } + + fn load_pq_pivots_bin( + pq_pivots_path: &str, + num_pq_chunks: &usize, + ) -> ANNResult<(usize, Vec, Vec, Vec)> { + if !file_exists(pq_pivots_path) { + return Err(ANNError::log_pq_error( + "ERROR: PQ k-means pivot file not found.".to_string(), + )); + } + + let (data, offset_num, offset_dim) = load_bin::(pq_pivots_path, 0)?; + let file_offset_data = convert_types_u64_usize(&data, offset_num, offset_dim); + if offset_num != 4 { + let error_message = format!("Error reading pq_pivots file {}. Offsets don't contain correct metadata, # offsets = {}, but expecting 4.", pq_pivots_path, offset_num); + return Err(ANNError::log_pq_error(error_message)); + } + + let (data, pq_center_num, dim) = load_bin::(pq_pivots_path, file_offset_data[0])?; + let pq_table = data.to_vec(); + if pq_center_num != NUM_PQ_CENTROIDS { + let error_message = format!( + "Error reading pq_pivots file {}. file_num_centers = {}, but expecting {} centers.", + pq_pivots_path, pq_center_num, NUM_PQ_CENTROIDS + ); + return Err(ANNError::log_pq_error(error_message)); + } + + let (data, centroid_dim, nc) = load_bin::(pq_pivots_path, file_offset_data[1])?; + let centroids = data.to_vec(); + if centroid_dim != dim || nc != 1 { + let error_message = format!("Error reading pq_pivots file {}. file_dim = {}, file_cols = {} but expecting {} entries in 1 dimension.", pq_pivots_path, centroid_dim, nc, dim); + return Err(ANNError::log_pq_error(error_message)); + } + + let (data, chunk_offset_num, nc) = load_bin::(pq_pivots_path, file_offset_data[2])?; + let chunk_offsets = convert_types_u32_usize(&data, chunk_offset_num, nc); + if chunk_offset_num != num_pq_chunks + 1 || nc != 1 { + let error_message = format!("Error reading pq_pivots file at chunk offsets; file has nr={}, nc={} but expecting nr={} and nc=1.", chunk_offset_num, nc, num_pq_chunks + 1); + return Err(ANNError::log_pq_error(error_message)); + } + + Ok((dim, pq_table, centroids, chunk_offsets)) + } +} + +#[cfg(test)] +mod pq_index_prune_query_test { + + use super::*; + + #[test] + fn pq_dist_lookup_test() { + let pq_ids: Vec = vec![1u8, 3u8, 2u8, 2u8]; + let mut pq_dists: Vec = Vec::with_capacity(256 * 2); + for _ in 0..pq_dists.capacity() { + pq_dists.push(rand::random()); + } + + let dists_out = pq_dist_lookup(&pq_ids, 2, 2, &pq_dists); + assert_eq!(dists_out.len(), 2); + assert_eq!(dists_out[0], pq_dists[0 + 1] + pq_dists[256 + 3]); + assert_eq!(dists_out[1], pq_dists[0 + 2] + pq_dists[256 + 2]); + } +} diff --git a/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/src/model/pq/mod.rs b/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/src/model/pq/mod.rs new file mode 100644 index 0000000..85daaa7 --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/src/model/pq/mod.rs @@ -0,0 +1,9 @@ +/* + * Copyright (c) Microsoft Corporation. All rights reserved. + * Licensed under the MIT license. + */ +mod fixed_chunk_pq_table; +pub use fixed_chunk_pq_table::*; + +mod pq_construction; +pub use pq_construction::*; diff --git a/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/src/model/pq/pq_construction.rs b/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/src/model/pq/pq_construction.rs new file mode 100644 index 0000000..0a7b078 --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/src/model/pq/pq_construction.rs @@ -0,0 +1,398 @@ +/* + * Copyright (c) Microsoft Corporation. All rights reserved. + * Licensed under the MIT license. + */ +#![warn(missing_debug_implementations)] + +use rayon::prelude::{IndexedParallelIterator, ParallelIterator}; +use rayon::slice::ParallelSliceMut; + +use crate::common::{ANNError, ANNResult}; +use crate::storage::PQStorage; +use crate::utils::{compute_closest_centers, file_exists, k_means_clustering}; + +/// Max size of PQ training set +pub const MAX_PQ_TRAINING_SET_SIZE: f64 = 256_000f64; + +/// Max number of PQ chunks +pub const MAX_PQ_CHUNKS: usize = 512; + +pub const NUM_PQ_CENTROIDS: usize = 256; +/// block size for reading/processing large files and matrices in blocks +const BLOCK_SIZE: usize = 5000000; +const NUM_KMEANS_REPS_PQ: usize = 12; + +/// given training data in train_data of dimensions num_train * dim, generate +/// PQ pivots using k-means algorithm to partition the co-ordinates into +/// num_pq_chunks (if it divides dimension, else rounded) chunks, and runs +/// k-means in each chunk to compute the PQ pivots and stores in bin format in +/// file pq_pivots_path as a s num_centers*dim floating point binary file +/// PQ pivot table layout: {pivot offsets data: METADATA_SIZE}{pivot vector:[dim; num_centroid]}{centroid vector:[dim; 1]}{chunk offsets:[chunk_num+1; 1]} +fn generate_pq_pivots( + train_data: &mut [f32], + num_train: usize, + dim: usize, + num_centers: usize, + num_pq_chunks: usize, + max_k_means_reps: usize, + pq_storage: &mut PQStorage, +) -> ANNResult<()> { + if num_pq_chunks > dim { + return Err(ANNError::log_pq_error( + "Error: number of chunks more than dimension.".to_string(), + )); + } + + if pq_storage.pivot_data_exist() { + let (file_num_centers, file_dim) = pq_storage.read_pivot_metadata()?; + if file_dim == dim && file_num_centers == num_centers { + // PQ pivot file exists. Not generating again. + return Ok(()); + } + } + + // Calculate centroid and center the training data + // If we use L2 distance, there is an option to + // translate all vectors to make them centered and + // then compute PQ. This needs to be set to false + // when using PQ for MIPS as such translations dont + // preserve inner products. + // Now, we're using L2 as default. + let mut centroid: Vec = vec![0.0; dim]; + for dim_index in 0..dim { + for train_data_index in 0..num_train { + centroid[dim_index] += train_data[train_data_index * dim + dim_index]; + } + centroid[dim_index] /= num_train as f32; + } + for dim_index in 0..dim { + for train_data_index in 0..num_train { + train_data[train_data_index * dim + dim_index] -= centroid[dim_index]; + } + } + + // Calculate each chunk's offset + // If we have 8 dimension and 3 chunk then offsets would be [0,3,6,8] + let mut chunk_offsets: Vec = vec![0; num_pq_chunks + 1]; + let mut chunk_offset: usize = 0; + for chunk_index in 0..num_pq_chunks { + chunk_offset += dim / num_pq_chunks; + if chunk_index < (dim % num_pq_chunks) { + chunk_offset += 1; + } + chunk_offsets[chunk_index + 1] = chunk_offset; + } + + let mut full_pivot_data: Vec = vec![0.0; num_centers * dim]; + for chunk_index in 0..num_pq_chunks { + let chunk_size = chunk_offsets[chunk_index + 1] - chunk_offsets[chunk_index]; + + let mut cur_train_data: Vec = vec![0.0; num_train * chunk_size]; + let mut cur_pivot_data: Vec = vec![0.0; num_centers * chunk_size]; + + cur_train_data + .par_chunks_mut(chunk_size) + .enumerate() + .for_each(|(train_data_index, chunk)| { + for (dim_offset, item) in chunk.iter_mut().enumerate() { + *item = train_data + [train_data_index * dim + chunk_offsets[chunk_index] + dim_offset]; + } + }); + + // Run kmeans to get the centroids of this chunk. + let (_closest_docs, _closest_center, _residual) = k_means_clustering( + &cur_train_data, + num_train, + chunk_size, + &mut cur_pivot_data, + num_centers, + max_k_means_reps, + )?; + + // Copy centroids from this chunk table to full table + for center_index in 0..num_centers { + full_pivot_data[center_index * dim + chunk_offsets[chunk_index] + ..center_index * dim + chunk_offsets[chunk_index + 1]] + .copy_from_slice( + &cur_pivot_data[center_index * chunk_size..(center_index + 1) * chunk_size], + ); + } + } + + pq_storage.write_pivot_data( + &full_pivot_data, + ¢roid, + &chunk_offsets, + num_centers, + dim, + )?; + + Ok(()) +} + +/// streams the base file (data_file), and computes the closest centers in each +/// chunk to generate the compressed data_file and stores it in +/// pq_compressed_vectors_path. +/// If the numbber of centers is < 256, it stores as byte vector, else as +/// 4-byte vector in binary format. +/// Compressed PQ table layout: {num_points: usize}{num_chunks: usize}{compressed pq table: [num_points; num_chunks]} +fn generate_pq_data_from_pivots>( + num_centers: usize, + num_pq_chunks: usize, + pq_storage: &mut PQStorage, +) -> ANNResult<()> { + let (num_points, dim) = pq_storage.read_pq_data_metadata()?; + + let full_pivot_data: Vec; + let centroid: Vec; + let chunk_offsets: Vec; + + if !pq_storage.pivot_data_exist() { + return Err(ANNError::log_pq_error( + "ERROR: PQ k-means pivot file not found.".to_string(), + )); + } else { + (full_pivot_data, centroid, chunk_offsets) = + pq_storage.load_pivot_data(&num_pq_chunks, &num_centers, &dim)?; + } + + pq_storage.write_compressed_pivot_metadata(num_points as i32, num_pq_chunks as i32)?; + + let block_size = if num_points <= BLOCK_SIZE { + num_points + } else { + BLOCK_SIZE + }; + let num_blocks = (num_points / block_size) + (num_points % block_size != 0) as usize; + + for block_index in 0..num_blocks { + let start_index: usize = block_index * block_size; + let end_index: usize = std::cmp::min((block_index + 1) * block_size, num_points); + let cur_block_size: usize = end_index - start_index; + + let mut block_compressed_base: Vec = vec![0; cur_block_size * num_pq_chunks]; + + let block_data: Vec = pq_storage.read_pq_block_data(cur_block_size, dim)?; + + let mut adjusted_block_data: Vec = vec![0.0; cur_block_size * dim]; + + for block_data_index in 0..cur_block_size { + for dim_index in 0..dim { + adjusted_block_data[block_data_index * dim + dim_index] = + block_data[block_data_index * dim + dim_index].into() - centroid[dim_index]; + } + } + + for chunk_index in 0..num_pq_chunks { + let cur_chunk_size = chunk_offsets[chunk_index + 1] - chunk_offsets[chunk_index]; + if cur_chunk_size == 0 { + continue; + } + + let mut cur_pivot_data: Vec = vec![0.0; num_centers * cur_chunk_size]; + let mut cur_data: Vec = vec![0.0; cur_block_size * cur_chunk_size]; + let mut closest_center: Vec = vec![0; cur_block_size]; + + // Divide the data into chunks and process each chunk in parallel. + cur_data + .par_chunks_mut(cur_chunk_size) + .enumerate() + .for_each(|(block_data_index, chunk)| { + for (dim_offset, item) in chunk.iter_mut().enumerate() { + *item = adjusted_block_data + [block_data_index * dim + chunk_offsets[chunk_index] + dim_offset]; + } + }); + + cur_pivot_data + .par_chunks_mut(cur_chunk_size) + .enumerate() + .for_each(|(center_index, chunk)| { + for (din_offset, item) in chunk.iter_mut().enumerate() { + *item = full_pivot_data + [center_index * dim + chunk_offsets[chunk_index] + din_offset]; + } + }); + + // Compute the closet centers + compute_closest_centers( + &cur_data, + cur_block_size, + cur_chunk_size, + &cur_pivot_data, + num_centers, + 1, + &mut closest_center, + None, + None, + )?; + + block_compressed_base + .par_chunks_mut(num_pq_chunks) + .enumerate() + .for_each(|(block_data_index, slice)| { + slice[chunk_index] = closest_center[block_data_index] as usize; + }); + } + + _ = pq_storage.write_compressed_pivot_data( + &block_compressed_base, + num_centers, + cur_block_size, + num_pq_chunks, + ); + } + Ok(()) +} + +/// Save the data on a file. +/// # Arguments +/// * `p_val` - choose how many ratio sample data as trained data to get pivot +/// * `num_pq_chunks` - pq chunk number +/// * `codebook_prefix` - predefined pivots file named +/// * `pq_storage` - pq file access +pub fn generate_quantized_data>( + p_val: f64, + num_pq_chunks: usize, + codebook_prefix: &str, + pq_storage: &mut PQStorage, +) -> ANNResult<()> { + // If predefined pivots already exists, skip training. + if !file_exists(codebook_prefix) { + // Instantiates train data with random sample updates train_data_vector + // Training data with train_size samples loaded. + // Each sampled file has train_dim. + let (mut train_data_vector, train_size, train_dim) = + pq_storage.gen_random_slice::(p_val)?; + + generate_pq_pivots( + &mut train_data_vector, + train_size, + train_dim, + NUM_PQ_CENTROIDS, + num_pq_chunks, + NUM_KMEANS_REPS_PQ, + pq_storage, + )?; + } + generate_pq_data_from_pivots::(NUM_PQ_CENTROIDS, num_pq_chunks, pq_storage)?; + Ok(()) +} + +#[cfg(test)] +mod pq_test { + + use std::fs::File; + use std::io::Write; + + use super::*; + use crate::utils::{convert_types_u32_usize, convert_types_u64_usize, load_bin, METADATA_SIZE}; + + #[test] + fn generate_pq_pivots_test() { + let pivot_file_name = "generate_pq_pivots_test.bin"; + let compressed_file_name = "compressed.bin"; + let pq_training_file_name = "tests/data/siftsmall_learn.bin"; + let mut pq_storage = + PQStorage::new(pivot_file_name, compressed_file_name, pq_training_file_name).unwrap(); + let mut train_data: Vec = vec![ + 1.0f32, 1.0f32, 1.0f32, 1.0f32, 1.0f32, 1.0f32, 1.0f32, 1.0f32, 2.0f32, 2.0f32, 2.0f32, + 2.0f32, 2.0f32, 2.0f32, 2.0f32, 2.0f32, 2.1f32, 2.1f32, 2.1f32, 2.1f32, 2.1f32, 2.1f32, + 2.1f32, 2.1f32, 2.2f32, 2.2f32, 2.2f32, 2.2f32, 2.2f32, 2.2f32, 2.2f32, 2.2f32, + 100.0f32, 100.0f32, 100.0f32, 100.0f32, 100.0f32, 100.0f32, 100.0f32, 100.0f32, + ]; + generate_pq_pivots(&mut train_data, 5, 8, 2, 2, 5, &mut pq_storage).unwrap(); + + let (data, nr, nc) = load_bin::(pivot_file_name, 0).unwrap(); + let file_offset_data = convert_types_u64_usize(&data, nr, nc); + assert_eq!(file_offset_data[0], METADATA_SIZE); + assert_eq!(nr, 4); + assert_eq!(nc, 1); + + let (data, nr, nc) = load_bin::(pivot_file_name, file_offset_data[0]).unwrap(); + let full_pivot_data = data.to_vec(); + assert_eq!(full_pivot_data.len(), 16); + assert_eq!(nr, 2); + assert_eq!(nc, 8); + + let (data, nr, nc) = load_bin::(pivot_file_name, file_offset_data[1]).unwrap(); + let centroid = data.to_vec(); + assert_eq!( + centroid[0], + (1.0f32 + 2.0f32 + 2.1f32 + 2.2f32 + 100.0f32) / 5.0f32 + ); + assert_eq!(nr, 8); + assert_eq!(nc, 1); + + let (data, nr, nc) = load_bin::(pivot_file_name, file_offset_data[2]).unwrap(); + let chunk_offsets = convert_types_u32_usize(&data, nr, nc); + assert_eq!(chunk_offsets[0], 0); + assert_eq!(chunk_offsets[1], 4); + assert_eq!(chunk_offsets[2], 8); + assert_eq!(nr, 3); + assert_eq!(nc, 1); + std::fs::remove_file(pivot_file_name).unwrap(); + } + + #[test] + fn generate_pq_data_from_pivots_test() { + let data_file = "generate_pq_data_from_pivots_test_data.bin"; + //npoints=5, dim=8, 5 vectors [1.0;8] [2.0;8] [2.1;8] [2.2;8] [100.0;8] + let mut train_data: Vec = vec![ + 1.0f32, 1.0f32, 1.0f32, 1.0f32, 1.0f32, 1.0f32, 1.0f32, 1.0f32, 2.0f32, 2.0f32, 2.0f32, + 2.0f32, 2.0f32, 2.0f32, 2.0f32, 2.0f32, 2.1f32, 2.1f32, 2.1f32, 2.1f32, 2.1f32, 2.1f32, + 2.1f32, 2.1f32, 2.2f32, 2.2f32, 2.2f32, 2.2f32, 2.2f32, 2.2f32, 2.2f32, 2.2f32, + 100.0f32, 100.0f32, 100.0f32, 100.0f32, 100.0f32, 100.0f32, 100.0f32, 100.0f32, + ]; + let my_nums_unstructured: &[u8] = unsafe { + std::slice::from_raw_parts(train_data.as_ptr() as *const u8, train_data.len() * 4) + }; + let meta: Vec = vec![5, 8]; + let meta_unstructured: &[u8] = + unsafe { std::slice::from_raw_parts(meta.as_ptr() as *const u8, meta.len() * 4) }; + let mut data_file_writer = File::create(data_file).unwrap(); + data_file_writer + .write_all(meta_unstructured) + .expect("Failed to write sample file"); + data_file_writer + .write_all(my_nums_unstructured) + .expect("Failed to write sample file"); + + let pq_pivots_path = "generate_pq_data_from_pivots_test_pivot.bin"; + let pq_compressed_vectors_path = "generate_pq_data_from_pivots_test.bin"; + let mut pq_storage = + PQStorage::new(pq_pivots_path, pq_compressed_vectors_path, data_file).unwrap(); + generate_pq_pivots(&mut train_data, 5, 8, 2, 2, 5, &mut pq_storage).unwrap(); + generate_pq_data_from_pivots::(2, 2, &mut pq_storage).unwrap(); + let (data, nr, nc) = load_bin::(pq_compressed_vectors_path, 0).unwrap(); + assert_eq!(nr, 5); + assert_eq!(nc, 2); + assert_eq!(data[0], data[2]); + assert_ne!(data[0], data[8]); + + std::fs::remove_file(data_file).unwrap(); + std::fs::remove_file(pq_pivots_path).unwrap(); + std::fs::remove_file(pq_compressed_vectors_path).unwrap(); + } + + #[test] + fn pq_end_to_end_validation_with_codebook_test() { + let data_file = "tests/data/siftsmall_learn.bin"; + let pq_pivots_path = "tests/data/siftsmall_learn.bin_pq_pivots.bin"; + let gound_truth_path = "tests/data/siftsmall_learn.bin_pq_compressed.bin"; + let pq_compressed_vectors_path = "validation.bin"; + let mut pq_storage = + PQStorage::new(pq_pivots_path, pq_compressed_vectors_path, data_file).unwrap(); + generate_quantized_data::(0.5, 1, pq_pivots_path, &mut pq_storage).unwrap(); + + let (data, nr, nc) = load_bin::(pq_compressed_vectors_path, 0).unwrap(); + let (gt_data, gt_nr, gt_nc) = load_bin::(gound_truth_path, 0).unwrap(); + assert_eq!(nr, gt_nr); + assert_eq!(nc, gt_nc); + for i in 0..data.len() { + assert_eq!(data[i], gt_data[i]); + } + std::fs::remove_file(pq_compressed_vectors_path).unwrap(); + } +} diff --git a/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/src/model/scratch/concurrent_queue.rs b/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/src/model/scratch/concurrent_queue.rs new file mode 100644 index 0000000..8c72bab --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/src/model/scratch/concurrent_queue.rs @@ -0,0 +1,312 @@ +/* + * Copyright (c) Microsoft Corporation. All rights reserved. + * Licensed under the MIT license. + */ +#![warn(missing_debug_implementations, missing_docs)] + +//! Aligned allocator + +use std::collections::VecDeque; +use std::ops::Deref; +use std::sync::{Arc, Condvar, Mutex, MutexGuard}; +use std::time::Duration; + +use crate::common::{ANNError, ANNResult}; + +#[derive(Debug)] +/// Query scratch data structures +pub struct ConcurrentQueue { + q: Mutex>, + c: Mutex, + push_cv: Condvar, +} + +impl Default for ConcurrentQueue { + fn default() -> Self { + Self::new() + } +} + +impl ConcurrentQueue { + /// Create a concurrent queue + pub fn new() -> Self { + Self { + q: Mutex::new(VecDeque::new()), + c: Mutex::new(false), + push_cv: Condvar::new(), + } + } + + /// Block the current thread until it is able to acquire the mutex + pub fn reserve(&self, size: usize) -> ANNResult<()> { + let mut guard = lock(&self.q)?; + guard.reserve(size); + Ok(()) + } + + /// queue stats + pub fn size(&self) -> ANNResult { + let guard = lock(&self.q)?; + + Ok(guard.len()) + } + + /// empty the queue + pub fn is_empty(&self) -> ANNResult { + Ok(self.size()? == 0) + } + + /// push back + pub fn push(&self, new_val: T) -> ANNResult<()> { + let mut guard = lock(&self.q)?; + self.push_internal(&mut guard, new_val); + self.push_cv.notify_all(); + Ok(()) + } + + /// push back + fn push_internal(&self, guard: &mut MutexGuard>, new_val: T) { + guard.push_back(new_val); + } + + /// insert into queue + pub fn insert(&self, iter: I) -> ANNResult<()> + where + I: IntoIterator, + { + let mut guard = lock(&self.q)?; + for item in iter { + self.push_internal(&mut guard, item); + } + + self.push_cv.notify_all(); + Ok(()) + } + + /// pop front + pub fn pop(&self) -> ANNResult> { + let mut guard = lock(&self.q)?; + Ok(guard.pop_front()) + } + + /// Empty - is this necessary? + pub fn empty_queue(&self) -> ANNResult<()> { + let mut guard = lock(&self.q)?; + while !guard.is_empty() { + let _ = guard.pop_front(); + } + Ok(()) + } + + /// register for push notifications + pub fn wait_for_push_notify(&self, wait_time: Duration) -> ANNResult<()> { + let guard_lock = lock(&self.c)?; + let _ = self + .push_cv + .wait_timeout(guard_lock, wait_time) + .map_err(|err| { + ANNError::log_lock_poison_error(format!( + "ConcurrentQueue Lock is poisoned, err={}", + err + )) + })?; + Ok(()) + } +} + +fn lock(mutex: &Mutex) -> ANNResult> { + let guard = mutex.lock().map_err(|err| { + ANNError::log_lock_poison_error(format!("ConcurrentQueue lock is poisoned, err={}", err)) + })?; + Ok(guard) +} + +/// A thread-safe queue that holds instances of `T`. +/// Each instance is stored in a `Box` to keep the size of the queue node constant. +#[derive(Debug)] +pub struct ArcConcurrentBoxedQueue { + internal_queue: Arc>>, +} + +impl ArcConcurrentBoxedQueue { + /// Create a new `ArcConcurrentBoxedQueue`. + pub fn new() -> Self { + Self { + internal_queue: Arc::new(ConcurrentQueue::new()), + } + } +} + +impl Default for ArcConcurrentBoxedQueue { + fn default() -> Self { + Self::new() + } +} + +impl Clone for ArcConcurrentBoxedQueue { + /// Create a new `ArcConcurrentBoxedQueue` that shares the same internal queue + /// with the existing one. This allows multiple `ArcConcurrentBoxedQueue` to + /// operate on the same underlying queue. + fn clone(&self) -> Self { + Self { + internal_queue: Arc::clone(&self.internal_queue), + } + } +} + +/// Deref to the ConcurrentQueue. +impl Deref for ArcConcurrentBoxedQueue { + type Target = ConcurrentQueue>; + + fn deref(&self) -> &Self::Target { + &self.internal_queue + } +} + +#[cfg(test)] +mod tests { + use crate::model::ConcurrentQueue; + use std::sync::Arc; + use std::thread; + use std::time::Duration; + + #[test] + fn test_push_pop() { + let queue = ConcurrentQueue::::new(); + + queue.push(1).unwrap(); + queue.push(2).unwrap(); + queue.push(3).unwrap(); + + assert_eq!(queue.pop().unwrap(), Some(1)); + assert_eq!(queue.pop().unwrap(), Some(2)); + assert_eq!(queue.pop().unwrap(), Some(3)); + assert_eq!(queue.pop().unwrap(), None); + } + + #[test] + fn test_size_empty() { + let queue = ConcurrentQueue::new(); + + assert_eq!(queue.size().unwrap(), 0); + assert!(queue.is_empty().unwrap()); + + queue.push(1).unwrap(); + queue.push(2).unwrap(); + + assert_eq!(queue.size().unwrap(), 2); + assert!(!queue.is_empty().unwrap()); + + queue.pop().unwrap(); + queue.pop().unwrap(); + + assert_eq!(queue.size().unwrap(), 0); + assert!(queue.is_empty().unwrap()); + } + + #[test] + fn test_insert() { + let queue = ConcurrentQueue::new(); + + let data = vec![1, 2, 3]; + queue.insert(data.into_iter()).unwrap(); + + assert_eq!(queue.pop().unwrap(), Some(1)); + assert_eq!(queue.pop().unwrap(), Some(2)); + assert_eq!(queue.pop().unwrap(), Some(3)); + assert_eq!(queue.pop().unwrap(), None); + } + + #[test] + fn test_notifications() { + let queue = Arc::new(ConcurrentQueue::new()); + let queue_clone = Arc::clone(&queue); + + let producer = thread::spawn(move || { + for i in 0..3 { + thread::sleep(Duration::from_millis(50)); + queue_clone.push(i).unwrap(); + } + }); + + let consumer = thread::spawn(move || { + let mut values = vec![]; + + for _ in 0..3 { + let mut val = -1; + while val == -1 { + queue + .wait_for_push_notify(Duration::from_millis(10)) + .unwrap(); + val = queue.pop().unwrap().unwrap_or(-1); + } + + values.push(val); + } + + values + }); + + producer.join().unwrap(); + let consumer_results = consumer.join().unwrap(); + + assert_eq!(consumer_results, vec![0, 1, 2]); + } + + #[test] + fn test_multithreaded_push_pop() { + let queue = Arc::new(ConcurrentQueue::new()); + let queue_clone = Arc::clone(&queue); + + let producer = thread::spawn(move || { + for i in 0..10 { + queue_clone.push(i).unwrap(); + thread::sleep(Duration::from_millis(50)); + } + }); + + let consumer = thread::spawn(move || { + let mut values = vec![]; + + for _ in 0..10 { + let mut val = -1; + while val == -1 { + val = queue.pop().unwrap().unwrap_or(-1); + thread::sleep(Duration::from_millis(10)); + } + + values.push(val); + } + + values + }); + + producer.join().unwrap(); + let consumer_results = consumer.join().unwrap(); + + assert_eq!(consumer_results, (0..10).collect::>()); + } + + /// This is a single value test. It avoids the unlimited wait until the collectin got empty on the previous test. + /// It will make sure the signal mutex is matching the waiting mutex. + #[test] + fn test_wait_for_push_notify() { + let queue = Arc::new(ConcurrentQueue::::new()); + let queue_clone = Arc::clone(&queue); + + let producer = thread::spawn(move || { + thread::sleep(Duration::from_millis(100)); + queue_clone.push(1).unwrap(); + }); + + let consumer = thread::spawn(move || { + queue + .wait_for_push_notify(Duration::from_millis(200)) + .unwrap(); + assert_eq!(queue.pop().unwrap(), Some(1)); + }); + + producer.join().unwrap(); + consumer.join().unwrap(); + } +} diff --git a/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/src/model/scratch/inmem_query_scratch.rs b/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/src/model/scratch/inmem_query_scratch.rs new file mode 100644 index 0000000..f0fa432 --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/src/model/scratch/inmem_query_scratch.rs @@ -0,0 +1,186 @@ +/* + * Copyright (c) Microsoft Corporation. All rights reserved. + * Licensed under the MIT license. + */ +#![warn(missing_debug_implementations, missing_docs)] + +//! Scratch space for in-memory index based search + +use std::cmp::max; +use std::mem; + +use hashbrown::HashSet; + +use crate::common::{ANNError, ANNResult, AlignedBoxWithSlice}; +use crate::model::configuration::index_write_parameters::IndexWriteParameters; +use crate::model::{Neighbor, NeighborPriorityQueue, PQScratch}; + +use super::Scratch; + +/// In-mem index related limits +pub const GRAPH_SLACK_FACTOR: f64 = 1.3_f64; + +/// Max number of points for using bitset +pub const MAX_POINTS_FOR_USING_BITSET: usize = 100000; + +/// TODO: SSD Index related limits +pub const MAX_GRAPH_DEGREE: usize = 512; + +/// TODO: SSD Index related limits +pub const MAX_N_CMPS: usize = 16384; + +/// TODO: SSD Index related limits +pub const SECTOR_LEN: usize = 4096; + +/// TODO: SSD Index related limits +pub const MAX_N_SECTOR_READS: usize = 128; + +/// The alignment required for memory access. This will be multiplied with size of T to get the actual alignment +pub const QUERY_ALIGNMENT_OF_T_SIZE: usize = 16; + +/// Scratch space for in-memory index based search +#[derive(Debug)] +pub struct InMemQueryScratch { + /// Size of the candidate queue + pub candidate_size: u32, + + /// Max degree for each vertex + pub max_degree: u32, + + /// Max occlusion size + pub max_occlusion_size: u32, + + /// Query node + pub query: AlignedBoxWithSlice, + + /// Best candidates, whose size is candidate_queue_size + pub best_candidates: NeighborPriorityQueue, + + /// Occlude factor + pub occlude_factor: Vec, + + /// Visited neighbor id + pub id_scratch: Vec, + + /// The distance between visited neighbor and query node + pub dist_scratch: Vec, + + /// The PQ Scratch, keey it private since this class use the Box to own the memory. Use the function pq_scratch to get its reference + pub pq_scratch: Option>, + + /// Buffers used in process delete, capacity increases as needed + pub expanded_nodes_set: HashSet, + + /// Expanded neighbors + pub expanded_neighbors_vector: Vec, + + /// Occlude list + pub occlude_list_output: Vec, + + /// RobinSet for larger dataset + pub node_visited_robinset: HashSet, +} + +impl InMemQueryScratch { + /// Create InMemQueryScratch instance + pub fn new( + search_candidate_size: u32, + index_write_parameter: &IndexWriteParameters, + init_pq_scratch: bool, + ) -> ANNResult { + let indexing_candidate_size = index_write_parameter.search_list_size; + let max_degree = index_write_parameter.max_degree; + let max_occlusion_size = index_write_parameter.max_occlusion_size; + + if search_candidate_size == 0 || indexing_candidate_size == 0 || max_degree == 0 || N == 0 { + return Err(ANNError::log_index_error(format!( + "In InMemQueryScratch, one of search_candidate_size = {}, indexing_candidate_size = {}, dim = {} or max_degree = {} is zero.", + search_candidate_size, indexing_candidate_size, N, max_degree))); + } + + let query = AlignedBoxWithSlice::new(N, mem::size_of::() * QUERY_ALIGNMENT_OF_T_SIZE)?; + let pq_scratch = if init_pq_scratch { + Some(Box::new(PQScratch::new(MAX_GRAPH_DEGREE, N)?)) + } else { + None + }; + + let occlude_factor = Vec::with_capacity(max_occlusion_size as usize); + + let capacity = (1.5 * GRAPH_SLACK_FACTOR * (max_degree as f64)).ceil() as usize; + let id_scratch = Vec::with_capacity(capacity); + let dist_scratch = Vec::with_capacity(capacity); + + let expanded_nodes_set = HashSet::::new(); + let expanded_neighbors_vector = Vec::::new(); + let occlude_list_output = Vec::::new(); + + let candidate_size = max(search_candidate_size, indexing_candidate_size); + let node_visited_robinset = HashSet::::with_capacity(20 * candidate_size as usize); + let scratch = Self { + candidate_size, + max_degree, + max_occlusion_size, + query, + best_candidates: NeighborPriorityQueue::with_capacity(candidate_size as usize), + occlude_factor, + id_scratch, + dist_scratch, + pq_scratch, + expanded_nodes_set, + expanded_neighbors_vector, + occlude_list_output, + node_visited_robinset, + }; + + Ok(scratch) + } + + /// Resize the scratch with new candidate size + pub fn resize_for_new_candidate_size(&mut self, new_candidate_size: u32) { + if new_candidate_size > self.candidate_size { + let delta = new_candidate_size - self.candidate_size; + self.candidate_size = new_candidate_size; + self.best_candidates.reserve(delta as usize); + self.node_visited_robinset.reserve((20 * delta) as usize); + } + } +} + +impl Scratch for InMemQueryScratch { + fn clear(&mut self) { + self.best_candidates.clear(); + self.occlude_factor.clear(); + + self.node_visited_robinset.clear(); + + self.id_scratch.clear(); + self.dist_scratch.clear(); + + self.expanded_nodes_set.clear(); + self.expanded_neighbors_vector.clear(); + self.occlude_list_output.clear(); + } +} + +#[cfg(test)] +mod inmemory_query_scratch_test { + use crate::model::configuration::index_write_parameters::IndexWriteParametersBuilder; + + use super::*; + + #[test] + fn node_visited_robinset_test() { + let index_write_parameter = IndexWriteParametersBuilder::new(10, 10) + .with_max_occlusion_size(5) + .build(); + + let mut scratch = + InMemQueryScratch::::new(100, &index_write_parameter, false).unwrap(); + + assert_eq!(scratch.node_visited_robinset.len(), 0); + + scratch.clear(); + assert_eq!(scratch.node_visited_robinset.len(), 0); + } +} diff --git a/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/src/model/scratch/mod.rs b/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/src/model/scratch/mod.rs new file mode 100644 index 0000000..cf9ee29 --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/src/model/scratch/mod.rs @@ -0,0 +1,28 @@ +/* + * Copyright (c) Microsoft Corporation. All rights reserved. + * Licensed under the MIT license. + */ +pub mod scratch_traits; +pub use scratch_traits::*; + +pub mod concurrent_queue; +pub use concurrent_queue::*; + +pub mod pq_scratch; +pub use pq_scratch::*; + + +pub mod inmem_query_scratch; +pub use inmem_query_scratch::*; + +pub mod scratch_store_manager; +pub use scratch_store_manager::*; + +pub mod ssd_query_scratch; +pub use ssd_query_scratch::*; + +pub mod ssd_thread_data; +pub use ssd_thread_data::*; + +pub mod ssd_io_context; +pub use ssd_io_context::*; diff --git a/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/src/model/scratch/pq_scratch.rs b/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/src/model/scratch/pq_scratch.rs new file mode 100644 index 0000000..bf9d6c5 --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/src/model/scratch/pq_scratch.rs @@ -0,0 +1,105 @@ +/* + * Copyright (c) Microsoft Corporation. All rights reserved. + * Licensed under the MIT license. + */ +#![warn(missing_debug_implementations, missing_docs)] + +//! Aligned allocator + +use std::mem::size_of; + +use crate::common::{ANNResult, AlignedBoxWithSlice}; + +const MAX_PQ_CHUNKS: usize = 512; + +#[derive(Debug)] +/// PQ scratch +pub struct PQScratch { + /// Aligned pq table dist scratch, must be at least [256 * NCHUNKS] + pub aligned_pqtable_dist_scratch: AlignedBoxWithSlice, + /// Aligned dist scratch, must be at least diskann MAX_DEGREE + pub aligned_dist_scratch: AlignedBoxWithSlice, + /// Aligned pq coord scratch, must be at least [N_CHUNKS * MAX_DEGREE] + pub aligned_pq_coord_scratch: AlignedBoxWithSlice, + /// Rotated query + pub rotated_query: AlignedBoxWithSlice, + /// Aligned query float + pub aligned_query_float: AlignedBoxWithSlice, +} + +impl PQScratch { + const ALIGNED_ALLOC_256: usize = 256; + + /// Create a new pq scratch + pub fn new(graph_degree: usize, aligned_dim: usize) -> ANNResult { + let aligned_pq_coord_scratch = + AlignedBoxWithSlice::new(graph_degree * MAX_PQ_CHUNKS, PQScratch::ALIGNED_ALLOC_256)?; + let aligned_pqtable_dist_scratch = + AlignedBoxWithSlice::new(256 * MAX_PQ_CHUNKS, PQScratch::ALIGNED_ALLOC_256)?; + let aligned_dist_scratch = + AlignedBoxWithSlice::new(graph_degree, PQScratch::ALIGNED_ALLOC_256)?; + let aligned_query_float = AlignedBoxWithSlice::new(aligned_dim, 8 * size_of::())?; + let rotated_query = AlignedBoxWithSlice::new(aligned_dim, 8 * size_of::())?; + + Ok(Self { + aligned_pqtable_dist_scratch, + aligned_dist_scratch, + aligned_pq_coord_scratch, + rotated_query, + aligned_query_float, + }) + } + + /// Set rotated_query and aligned_query_float values + pub fn set(&mut self, dim: usize, query: &[T], norm: f32) + where + T: Into + Copy, + { + for (d, item) in query.iter().enumerate().take(dim) { + let query_val: f32 = (*item).into(); + if (norm - 1.0).abs() > f32::EPSILON { + self.rotated_query[d] = query_val / norm; + self.aligned_query_float[d] = query_val / norm; + } else { + self.rotated_query[d] = query_val; + self.aligned_query_float[d] = query_val; + } + } + } +} + +#[cfg(test)] +mod tests { + use crate::model::PQScratch; + + #[test] + fn test_pq_scratch() { + let graph_degree = 512; + let aligned_dim = 8; + + let mut pq_scratch: PQScratch = PQScratch::new(graph_degree, aligned_dim).unwrap(); + + // Check alignment + assert_eq!( + (pq_scratch.aligned_pqtable_dist_scratch.as_ptr() as usize) % 256, + 0 + ); + assert_eq!((pq_scratch.aligned_dist_scratch.as_ptr() as usize) % 256, 0); + assert_eq!( + (pq_scratch.aligned_pq_coord_scratch.as_ptr() as usize) % 256, + 0 + ); + assert_eq!((pq_scratch.rotated_query.as_ptr() as usize) % 32, 0); + assert_eq!((pq_scratch.aligned_query_float.as_ptr() as usize) % 32, 0); + + // Test set() method + let query = vec![1u8, 2, 3, 4, 5, 6, 7, 8]; + let norm = 2.0f32; + pq_scratch.set::(query.len(), &query, norm); + + (0..query.len()).for_each(|i| { + assert_eq!(pq_scratch.rotated_query[i], query[i] as f32 / norm); + assert_eq!(pq_scratch.aligned_query_float[i], query[i] as f32 / norm); + }); + } +} diff --git a/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/src/model/scratch/scratch_store_manager.rs b/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/src/model/scratch/scratch_store_manager.rs new file mode 100644 index 0000000..4e2397f --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/src/model/scratch/scratch_store_manager.rs @@ -0,0 +1,84 @@ +/* + * Copyright (c) Microsoft Corporation. All rights reserved. + * Licensed under the MIT license. + */ +use crate::common::ANNResult; + +use super::ArcConcurrentBoxedQueue; +use super::{scratch_traits::Scratch}; +use std::time::Duration; + +pub struct ScratchStoreManager { + scratch: Option>, + scratch_pool: ArcConcurrentBoxedQueue, +} + +impl ScratchStoreManager { + pub fn new(scratch_pool: ArcConcurrentBoxedQueue, wait_time: Duration) -> ANNResult { + let mut scratch = scratch_pool.pop()?; + while scratch.is_none() { + scratch_pool.wait_for_push_notify(wait_time)?; + scratch = scratch_pool.pop()?; + } + + Ok(ScratchStoreManager { + scratch, + scratch_pool, + }) + } + + pub fn scratch_space(&mut self) -> Option<&mut T> { + self.scratch.as_deref_mut() + } +} + +impl Drop for ScratchStoreManager { + fn drop(&mut self) { + if let Some(mut scratch) = self.scratch.take() { + scratch.clear(); + let _ = self.scratch_pool.push(scratch); + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[derive(Debug)] + struct MyScratch { + data: Vec, + } + + impl Scratch for MyScratch { + fn clear(&mut self) { + self.data.clear(); + } + } + + #[test] + fn test_scratch_store_manager() { + let wait_time = Duration::from_millis(100); + + let scratch_pool = ArcConcurrentBoxedQueue::new(); + for i in 1..3 { + scratch_pool.push(Box::new(MyScratch { + data: vec![i, 2 * i, 3 * i], + })).unwrap(); + } + + let mut manager = ScratchStoreManager::new(scratch_pool.clone(), wait_time).unwrap(); + let scratch_space = manager.scratch_space().unwrap(); + + assert_eq!(scratch_space.data, vec![1, 2, 3]); + + // At this point, the ScratchStoreManager will go out of scope, + // causing the Drop implementation to be called, which should + // call the clear method on MyScratch. + drop(manager); + + let current_scratch = scratch_pool.pop().unwrap().unwrap(); + assert_eq!(current_scratch.data, vec![2, 4, 6]); + } +} + diff --git a/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/src/model/scratch/scratch_traits.rs b/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/src/model/scratch/scratch_traits.rs new file mode 100644 index 0000000..71e4b93 --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/src/model/scratch/scratch_traits.rs @@ -0,0 +1,8 @@ +/* + * Copyright (c) Microsoft Corporation. All rights reserved. + * Licensed under the MIT license. + */ +pub trait Scratch { + fn clear(&mut self); +} + diff --git a/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/src/model/scratch/ssd_io_context.rs b/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/src/model/scratch/ssd_io_context.rs new file mode 100644 index 0000000..d4dff0c --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/src/model/scratch/ssd_io_context.rs @@ -0,0 +1,38 @@ +/* + * Copyright (c) Microsoft Corporation. All rights reserved. + * Licensed under the MIT license. + */ +#![allow(dead_code)] // Todo: Remove this when the disk index query code is complete. +use crate::common::ANNError; + +use platform::{FileHandle, IOCompletionPort}; + +// The IOContext struct for disk I/O. One for each thread. +pub struct IOContext { + pub status: Status, + pub file_handle: FileHandle, + pub io_completion_port: IOCompletionPort, +} + +impl Default for IOContext { + fn default() -> Self { + IOContext { + status: Status::ReadWait, + file_handle: FileHandle::default(), + io_completion_port: IOCompletionPort::default(), + } + } +} + +impl IOContext { + pub fn new() -> Self { + Self::default() + } +} + +pub enum Status { + ReadWait, + ReadSuccess, + ReadFailed(ANNError), + ProcessComplete, +} diff --git a/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/src/model/scratch/ssd_query_scratch.rs b/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/src/model/scratch/ssd_query_scratch.rs new file mode 100644 index 0000000..b366693 --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/src/model/scratch/ssd_query_scratch.rs @@ -0,0 +1,132 @@ +/* + * Copyright (c) Microsoft Corporation. All rights reserved. + * Licensed under the MIT license. + */ +#![allow(dead_code)] // Todo: Remove this when the disk index query code is complete. +use std::mem; +use std::vec::Vec; + +use hashbrown::HashSet; + +use crate::{ + common::{ANNResult, AlignedBoxWithSlice}, + model::{Neighbor, NeighborPriorityQueue}, + model::data_store::DiskScratchDataset, +}; + +use super::{PQScratch, Scratch, MAX_GRAPH_DEGREE, QUERY_ALIGNMENT_OF_T_SIZE}; + +// Scratch space for disk index based search. +pub struct SSDQueryScratch +{ + // Disk scratch dataset storing fp vectors with aligned dim (N) + pub scratch_dataset: DiskScratchDataset, + + // The query scratch. + pub query: AlignedBoxWithSlice, + + /// The PQ Scratch. + pub pq_scratch: Option>, + + // The visited set. + pub id_scratch: HashSet, + + /// Best candidates, whose size is candidate_queue_size + pub best_candidates: NeighborPriorityQueue, + + // Full return set. + pub full_return_set: Vec, +} + +// +impl SSDQueryScratch +{ + pub fn new( + visited_reserve: usize, + candidate_queue_size: usize, + init_pq_scratch: bool, + ) -> ANNResult { + let scratch_dataset = DiskScratchDataset::::new()?; + + let query = AlignedBoxWithSlice::::new(N, mem::size_of::() * QUERY_ALIGNMENT_OF_T_SIZE)?; + + let id_scratch = HashSet::::with_capacity(visited_reserve); + let full_return_set = Vec::::with_capacity(visited_reserve); + let best_candidates = NeighborPriorityQueue::with_capacity(candidate_queue_size); + + let pq_scratch = if init_pq_scratch { + Some(Box::new(PQScratch::new(MAX_GRAPH_DEGREE, N)?)) + } else { + None + }; + + Ok(Self { + scratch_dataset, + query, + pq_scratch, + id_scratch, + best_candidates, + full_return_set, + }) + } + + pub fn pq_scratch(&mut self) -> &Option> { + &self.pq_scratch + } +} + +impl Scratch for SSDQueryScratch +{ + fn clear(&mut self) { + self.id_scratch.clear(); + self.best_candidates.clear(); + self.full_return_set.clear(); + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_new() { + // Arrange + let visited_reserve = 100; + let candidate_queue_size = 10; + let init_pq_scratch = true; + + // Act + let result = + SSDQueryScratch::::new(visited_reserve, candidate_queue_size, init_pq_scratch); + + // Assert + assert!(result.is_ok()); + + let scratch = result.unwrap(); + + // Assert the properties of the scratch instance + assert!(scratch.pq_scratch.is_some()); + assert!(scratch.id_scratch.is_empty()); + assert!(scratch.best_candidates.size() == 0); + assert!(scratch.full_return_set.is_empty()); + } + + #[test] + fn test_clear() { + // Arrange + let mut scratch = SSDQueryScratch::::new(100, 10, true).unwrap(); + + // Add some data to scratch fields + scratch.id_scratch.insert(1); + scratch.best_candidates.insert(Neighbor::new(2, 0.5)); + scratch.full_return_set.push(Neighbor::new(3, 0.8)); + + // Act + scratch.clear(); + + // Assert + assert!(scratch.id_scratch.is_empty()); + assert!(scratch.best_candidates.size() == 0); + assert!(scratch.full_return_set.is_empty()); + } +} diff --git a/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/src/model/scratch/ssd_thread_data.rs b/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/src/model/scratch/ssd_thread_data.rs new file mode 100644 index 0000000..e374959 --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/src/model/scratch/ssd_thread_data.rs @@ -0,0 +1,92 @@ +/* + * Copyright (c) Microsoft Corporation. All rights reserved. + * Licensed under the MIT license. + */ +#![allow(dead_code)] // Todo: Remove this when the disk index query code is complete. +use std::sync::Arc; + +use super::{scratch_traits::Scratch, IOContext, SSDQueryScratch}; +use crate::common::ANNResult; + +// The thread data struct for SSD I/O. One for each thread, contains the ScratchSpace and the IOContext. +pub struct SSDThreadData { + pub scratch: SSDQueryScratch, + pub io_context: Option>, +} + +impl SSDThreadData { + pub fn new( + aligned_dim: usize, + visited_reserve: usize, + init_pq_scratch: bool, + ) -> ANNResult { + let scratch = SSDQueryScratch::new(aligned_dim, visited_reserve, init_pq_scratch)?; + Ok(SSDThreadData { + scratch, + io_context: None, + }) + } + + pub fn clear(&mut self) { + self.scratch.clear(); + } +} + +#[cfg(test)] +mod tests { + use crate::model::Neighbor; + + use super::*; + + #[test] + fn test_new() { + // Arrange + let aligned_dim = 10; + let visited_reserve = 100; + let init_pq_scratch = true; + + // Act + let result = SSDThreadData::::new(aligned_dim, visited_reserve, init_pq_scratch); + + // Assert + assert!(result.is_ok()); + + let thread_data = result.unwrap(); + + // Assert the properties of the thread data instance + assert!(thread_data.io_context.is_none()); + + let scratch = &thread_data.scratch; + // Assert the properties of the scratch instance + assert!(scratch.pq_scratch.is_some()); + assert!(scratch.id_scratch.is_empty()); + assert!(scratch.best_candidates.size() == 0); + assert!(scratch.full_return_set.is_empty()); + } + + #[test] + fn test_clear() { + // Arrange + let mut thread_data = SSDThreadData::::new(10, 100, true).unwrap(); + + // Add some data to scratch fields + thread_data.scratch.id_scratch.insert(1); + thread_data + .scratch + .best_candidates + .insert(Neighbor::new(2, 0.5)); + thread_data + .scratch + .full_return_set + .push(Neighbor::new(3, 0.8)); + + // Act + thread_data.clear(); + + // Assert + assert!(thread_data.scratch.id_scratch.is_empty()); + assert!(thread_data.scratch.best_candidates.size() == 0); + assert!(thread_data.scratch.full_return_set.is_empty()); + } +} + diff --git a/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/src/model/vertex/dimension.rs b/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/src/model/vertex/dimension.rs new file mode 100644 index 0000000..32670a8 --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/src/model/vertex/dimension.rs @@ -0,0 +1,22 @@ +/* + * Copyright (c) Microsoft Corporation. All rights reserved. + * Licensed under the MIT license. + */ +#![warn(missing_debug_implementations, missing_docs)] + +//! Vertex dimension + +/// 32 vertex dimension +pub const DIM_32: usize = 32; + +/// 64 vertex dimension +pub const DIM_64: usize = 64; + +/// 104 vertex dimension +pub const DIM_104: usize = 104; + +/// 128 vertex dimension +pub const DIM_128: usize = 128; + +/// 256 vertex dimension +pub const DIM_256: usize = 256; diff --git a/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/src/model/vertex/mod.rs b/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/src/model/vertex/mod.rs new file mode 100644 index 0000000..224d476 --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/src/model/vertex/mod.rs @@ -0,0 +1,10 @@ +/* + * Copyright (c) Microsoft Corporation. All rights reserved. + * Licensed under the MIT license. + */ +#[allow(clippy::module_inception)] +mod vertex; +pub use vertex::Vertex; + +mod dimension; +pub use dimension::*; diff --git a/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/src/model/vertex/vertex.rs b/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/src/model/vertex/vertex.rs new file mode 100644 index 0000000..5536974 --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/src/model/vertex/vertex.rs @@ -0,0 +1,68 @@ +/* + * Copyright (c) Microsoft Corporation. All rights reserved. + * Licensed under the MIT license. + */ +#![warn(missing_debug_implementations, missing_docs)] + +//! Vertex + +use std::array::TryFromSliceError; + +use vector::{FullPrecisionDistance, Metric}; + +/// Vertex with data type T and dimension N +#[derive(Debug)] +pub struct Vertex<'a, T, const N: usize> +where + [T; N]: FullPrecisionDistance, +{ + /// Vertex value + val: &'a [T; N], + + /// Vertex Id + id: u32, +} + +impl<'a, T, const N: usize> Vertex<'a, T, N> +where + [T; N]: FullPrecisionDistance, +{ + /// Create the vertex with data + pub fn new(val: &'a [T; N], id: u32) -> Self { + Self { + val, + id, + } + } + + /// Compare the vertex with another. + #[inline(always)] + pub fn compare(&self, other: &Vertex<'a, T, N>, metric: Metric) -> f32 { + <[T; N]>::distance_compare(self.val, other.val, metric) + } + + /// Get the vector associated with the vertex. + #[inline] + pub fn vector(&self) -> &[T; N] { + self.val + } + + /// Get the vertex id. + #[inline] + pub fn vertex_id(&self) -> u32 { + self.id + } +} + +impl<'a, T, const N: usize> TryFrom<(&'a [T], u32)> for Vertex<'a, T, N> +where + [T; N]: FullPrecisionDistance, +{ + type Error = TryFromSliceError; + + fn try_from((mem_slice, id): (&'a [T], u32)) -> Result { + let array: &[T; N] = mem_slice.try_into()?; + Ok(Vertex::new(array, id)) + } +} + diff --git a/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/src/model/windows_aligned_file_reader/mod.rs b/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/src/model/windows_aligned_file_reader/mod.rs new file mode 100644 index 0000000..0e63df0 --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/src/model/windows_aligned_file_reader/mod.rs @@ -0,0 +1,7 @@ +/* + * Copyright (c) Microsoft Corporation. All rights reserved. + * Licensed under the MIT license. + */ +#[allow(clippy::module_inception)] +mod windows_aligned_file_reader; +pub use windows_aligned_file_reader::*; diff --git a/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/src/model/windows_aligned_file_reader/windows_aligned_file_reader.rs b/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/src/model/windows_aligned_file_reader/windows_aligned_file_reader.rs new file mode 100644 index 0000000..1cc3dc0 --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/src/model/windows_aligned_file_reader/windows_aligned_file_reader.rs @@ -0,0 +1,414 @@ +/* + * Copyright (c) Microsoft Corporation. All rights reserved. + * Licensed under the MIT license. + */ +use std::sync::Arc; +use std::time::Duration; +use std::{ptr, thread}; + +use crossbeam::sync::ShardedLock; +use hashbrown::HashMap; +use once_cell::sync::Lazy; + +use platform::file_handle::{AccessMode, ShareMode}; +use platform::{ + file_handle::FileHandle, + file_io::{get_queued_completion_status, read_file_to_slice}, + io_completion_port::IOCompletionPort, +}; + +use winapi::{ + shared::{basetsd::ULONG_PTR, minwindef::DWORD}, + um::minwinbase::OVERLAPPED, +}; + +use crate::common::{ANNError, ANNResult}; +use crate::model::IOContext; + +pub const MAX_IO_CONCURRENCY: usize = 128; // To do: explore the optimal value for this. The current value is taken from C++ code. +pub const FILE_ATTRIBUTE_READONLY: DWORD = 0x00000001; +pub const IO_COMPLETION_TIMEOUT: DWORD = u32::MAX; // Infinite timeout. +pub const DISK_IO_ALIGNMENT: usize = 512; +pub const ASYNC_IO_COMPLETION_CHECK_INTERVAL: Duration = Duration::from_micros(5); + +/// Aligned read struct for disk IO, it takes the ownership of the AlignedBoxedSlice and returns the AlignedBoxWithSlice data immutably. +pub struct AlignedRead<'a, T> { + /// where to read from + /// offset needs to be aligned with DISK_IO_ALIGNMENT + offset: u64, + + /// where to read into + /// aligned_buf and its len need to be aligned with DISK_IO_ALIGNMENT + aligned_buf: &'a mut [T], +} + +impl<'a, T> AlignedRead<'a, T> { + pub fn new(offset: u64, aligned_buf: &'a mut [T]) -> ANNResult { + Self::assert_is_aligned(offset as usize)?; + Self::assert_is_aligned(std::mem::size_of_val(aligned_buf))?; + + Ok(Self { + offset, + aligned_buf, + }) + } + + fn assert_is_aligned(val: usize) -> ANNResult<()> { + match val % DISK_IO_ALIGNMENT { + 0 => Ok(()), + _ => Err(ANNError::log_disk_io_request_alignment_error(format!( + "The offset or length of AlignedRead request is not {} bytes aligned", + DISK_IO_ALIGNMENT + ))), + } + } + + pub fn aligned_buf(&self) -> &[T] { + self.aligned_buf + } +} + +pub struct WindowsAlignedFileReader { + file_name: String, + + // ctx_map is the mapping from thread id to io context. It is hashmap behind a sharded lock to allow concurrent access from multiple threads. + // ShardedLock: shardedlock provides an implementation of a reader-writer lock that offers concurrent read access to the shared data while allowing exclusive write access. + // It achieves better scalability by dividing the shared data into multiple shards, and each with its own internal lock. + // Multiple threads can read from different shards simultaneously, reducing contention. + // https://docs.rs/crossbeam/0.8.2/crossbeam/sync/struct.ShardedLock.html + // Comparing to RwLock, ShardedLock provides higher concurrency for read operations and is suitable for read heavy workloads. + // The value of the hashmap is an Arc to allow immutable access to IOContext with automatic reference counting. + ctx_map: Lazy>>>, +} + +impl WindowsAlignedFileReader { + pub fn new(fname: &str) -> ANNResult { + let reader: WindowsAlignedFileReader = WindowsAlignedFileReader { + file_name: fname.to_string(), + ctx_map: Lazy::new(|| ShardedLock::new(HashMap::new())), + }; + + reader.register_thread()?; + Ok(reader) + } + + // Register the io context for a thread if it hasn't been registered. + pub fn register_thread(&self) -> ANNResult<()> { + let mut ctx_map = self.ctx_map.write().map_err(|_| { + ANNError::log_lock_poison_error("unable to acquire read lock on ctx_map".to_string()) + })?; + + let id = thread::current().id(); + if ctx_map.contains_key(&id) { + println!( + "Warning:: Duplicate registration for thread_id : {:?}. Directly call get_ctx to get the thread context data.", + id); + + return Ok(()); + } + + let mut ctx = IOContext::new(); + + match unsafe { FileHandle::new(&self.file_name, AccessMode::Read, ShareMode::Read) } { + Ok(file_handle) => ctx.file_handle = file_handle, + Err(err) => { + return Err(ANNError::log_io_error(err)); + } + } + + // Create a io completion port for the file handle, later it will be used to get the completion status. + match IOCompletionPort::new(&ctx.file_handle, None, 0, 0) { + Ok(io_completion_port) => ctx.io_completion_port = io_completion_port, + Err(err) => { + return Err(ANNError::log_io_error(err)); + } + } + + ctx_map.insert(id, Arc::new(ctx)); + + Ok(()) + } + + // Get the reference counted io context for the current thread. + pub fn get_ctx(&self) -> ANNResult> { + let ctx_map = self.ctx_map.read().map_err(|_| { + ANNError::log_lock_poison_error("unable to acquire read lock on ctx_map".to_string()) + })?; + + let id = thread::current().id(); + match ctx_map.get(&id) { + Some(ctx) => Ok(Arc::clone(ctx)), + None => Err(ANNError::log_index_error(format!( + "unable to find IOContext for thread_id {:?}", + id + ))), + } + } + + // Read the data from the file by sending concurrent io requests in batches. + pub fn read(&self, read_requests: &mut [AlignedRead], ctx: &IOContext) -> ANNResult<()> { + let n_requests = read_requests.len(); + let n_batches = (n_requests + MAX_IO_CONCURRENCY - 1) / MAX_IO_CONCURRENCY; + + let mut overlapped_in_out = + vec![unsafe { std::mem::zeroed::() }; MAX_IO_CONCURRENCY]; + + for batch_idx in 0..n_batches { + let batch_start = MAX_IO_CONCURRENCY * batch_idx; + let batch_size = std::cmp::min(n_requests - batch_start, MAX_IO_CONCURRENCY); + + for j in 0..batch_size { + let req = &mut read_requests[batch_start + j]; + let os = &mut overlapped_in_out[j]; + + match unsafe { + read_file_to_slice(&ctx.file_handle, req.aligned_buf, os, req.offset) + } { + Ok(_) => {} + Err(error) => { + return Err(ANNError::IOError { err: (error) }); + } + } + } + + let mut n_read: DWORD = 0; + let mut n_complete: u64 = 0; + let mut completion_key: ULONG_PTR = 0; + let mut lp_os: *mut OVERLAPPED = ptr::null_mut(); + while n_complete < batch_size as u64 { + match unsafe { + get_queued_completion_status( + &ctx.io_completion_port, + &mut n_read, + &mut completion_key, + &mut lp_os, + IO_COMPLETION_TIMEOUT, + ) + } { + // An IO request completed. + Ok(true) => n_complete += 1, + // No IO request completed, continue to wait. + Ok(false) => { + thread::sleep(ASYNC_IO_COMPLETION_CHECK_INTERVAL); + } + // An error ocurred. + Err(error) => return Err(ANNError::IOError { err: (error) }), + } + } + } + + Ok(()) + } +} + +#[cfg(test)] +mod tests { + use std::{fs::File, io::BufReader}; + + use bincode::deserialize_from; + use serde::{Deserialize, Serialize}; + + use crate::{common::AlignedBoxWithSlice, model::SECTOR_LEN}; + + use super::*; + pub const TEST_INDEX_PATH: &str = + "./tests/data/disk_index_siftsmall_learn_256pts_R4_L50_A1.2_alligned_reader_test.index"; + pub const TRUTH_NODE_DATA_PATH: &str = + "./tests/data/disk_index_node_data_aligned_reader_truth.bin"; + + #[derive(Debug, Serialize, Deserialize)] + struct NodeData { + num_neighbors: u32, + coordinates: Vec, + neighbors: Vec, + } + + impl PartialEq for NodeData { + fn eq(&self, other: &Self) -> bool { + self.num_neighbors == other.num_neighbors + && self.coordinates == other.coordinates + && self.neighbors == other.neighbors + } + } + + #[test] + fn test_new_aligned_file_reader() { + // Replace "test_file_path" with actual file path + let result = WindowsAlignedFileReader::new(TEST_INDEX_PATH); + assert!(result.is_ok()); + + let reader = result.unwrap(); + assert_eq!(reader.file_name, TEST_INDEX_PATH); + } + + #[test] + fn test_read() { + let reader = WindowsAlignedFileReader::new(TEST_INDEX_PATH).unwrap(); + let ctx = reader.get_ctx().unwrap(); + + let read_length = 512; // adjust according to your logic + let num_read = 10; + let mut aligned_mem = AlignedBoxWithSlice::::new(read_length * num_read, 512).unwrap(); + + // create and add AlignedReads to the vector + let mut mem_slices = aligned_mem + .split_into_nonoverlapping_mut_slices(0..aligned_mem.len(), read_length) + .unwrap(); + + let mut aligned_reads: Vec> = mem_slices + .iter_mut() + .enumerate() + .map(|(i, slice)| { + let offset = (i * read_length) as u64; + AlignedRead::new(offset, slice).unwrap() + }) + .collect(); + + let result = reader.read(&mut aligned_reads, &ctx); + assert!(result.is_ok()); + } + + #[test] + fn test_read_disk_index_by_sector() { + let reader = WindowsAlignedFileReader::new(TEST_INDEX_PATH).unwrap(); + let ctx = reader.get_ctx().unwrap(); + + let read_length = SECTOR_LEN; // adjust according to your logic + let num_sector = 10; + let mut aligned_mem = + AlignedBoxWithSlice::::new(read_length * num_sector, 512).unwrap(); + + // Each slice will be used as the buffer for a read request of a sector. + let mut mem_slices = aligned_mem + .split_into_nonoverlapping_mut_slices(0..aligned_mem.len(), read_length) + .unwrap(); + + let mut aligned_reads: Vec> = mem_slices + .iter_mut() + .enumerate() + .map(|(sector_id, slice)| { + let offset = (sector_id * read_length) as u64; + AlignedRead::new(offset, slice).unwrap() + }) + .collect(); + + let result = reader.read(&mut aligned_reads, &ctx); + assert!(result.is_ok()); + + aligned_reads.iter().for_each(|read| { + assert_eq!(read.aligned_buf.len(), SECTOR_LEN); + }); + + let disk_layout_meta = reconstruct_disk_meta(aligned_reads[0].aligned_buf); + assert!(disk_layout_meta.len() > 9); + + let dims = disk_layout_meta[1]; + let num_pts = disk_layout_meta[0]; + let max_node_len = disk_layout_meta[3]; + let max_num_nodes_per_sector = disk_layout_meta[4]; + + assert!(max_node_len * max_num_nodes_per_sector < SECTOR_LEN as u64); + + let num_nbrs_start = (dims as usize) * std::mem::size_of::(); + let nbrs_buf_start = num_nbrs_start + std::mem::size_of::(); + + let mut node_data_array = Vec::with_capacity(max_num_nodes_per_sector as usize * 9); + + // Only validate the first 9 sectors with graph nodes. + (1..9).for_each(|sector_id| { + let sector_data = &mem_slices[sector_id]; + for node_data in sector_data.chunks_exact(max_node_len as usize) { + // Extract coordinates data from the start of the node_data + let coordinates_end = (dims as usize) * std::mem::size_of::(); + let coordinates = node_data[0..coordinates_end] + .chunks_exact(std::mem::size_of::()) + .map(|chunk| f32::from_le_bytes(chunk.try_into().unwrap())) + .collect(); + + // Extract number of neighbors from the node_data + let neighbors_num = u32::from_le_bytes( + node_data[num_nbrs_start..nbrs_buf_start] + .try_into() + .unwrap(), + ); + + let nbors_buf_end = + nbrs_buf_start + (neighbors_num as usize) * std::mem::size_of::(); + + // Extract neighbors from the node data. + let mut neighbors = Vec::new(); + for nbors_data in node_data[nbrs_buf_start..nbors_buf_end] + .chunks_exact(std::mem::size_of::()) + { + let nbors_id = u32::from_le_bytes(nbors_data.try_into().unwrap()); + assert!(nbors_id < num_pts as u32); + neighbors.push(nbors_id); + } + + // Create NodeData struct and push it to the node_data_array + node_data_array.push(NodeData { + num_neighbors: neighbors_num, + coordinates, + neighbors, + }); + } + }); + + // Compare that each node read from the disk index are expected. + let node_data_truth_file = File::open(TRUTH_NODE_DATA_PATH).unwrap(); + let reader = BufReader::new(node_data_truth_file); + + let node_data_vec: Vec = deserialize_from(reader).unwrap(); + for (node_from_node_data_file, node_from_disk_index) in + node_data_vec.iter().zip(node_data_array.iter()) + { + // Verify that the NodeData from the file is equal to the NodeData in node_data_array + assert_eq!(node_from_node_data_file, node_from_disk_index); + } + } + + #[test] + fn test_read_fail_invalid_file() { + let reader = WindowsAlignedFileReader::new("/invalid_path"); + assert!(reader.is_err()); + } + + #[test] + fn test_read_no_requests() { + let reader = WindowsAlignedFileReader::new(TEST_INDEX_PATH).unwrap(); + let ctx = reader.get_ctx().unwrap(); + + let mut read_requests = Vec::>::new(); + let result = reader.read(&mut read_requests, &ctx); + assert!(result.is_ok()); + } + + #[test] + fn test_get_ctx() { + let reader = WindowsAlignedFileReader::new(TEST_INDEX_PATH).unwrap(); + let result = reader.get_ctx(); + assert!(result.is_ok()); + } + + #[test] + fn test_register_thread() { + let reader = WindowsAlignedFileReader::new(TEST_INDEX_PATH).unwrap(); + let result = reader.register_thread(); + assert!(result.is_ok()); + } + + fn reconstruct_disk_meta(buffer: &[u8]) -> Vec { + let size_of_u64 = std::mem::size_of::(); + + let num_values = buffer.len() / size_of_u64; + let mut disk_layout_meta = Vec::with_capacity(num_values); + let meta_data = &buffer[8..]; + + for chunk in meta_data.chunks_exact(size_of_u64) { + let value = u64::from_le_bytes(chunk.try_into().unwrap()); + disk_layout_meta.push(value); + } + + disk_layout_meta + } +} diff --git a/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/src/storage/disk_graph_storage.rs b/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/src/storage/disk_graph_storage.rs new file mode 100644 index 0000000..4481752 --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/src/storage/disk_graph_storage.rs @@ -0,0 +1,37 @@ +/* + * Copyright (c) Microsoft Corporation. All rights reserved. + * Licensed under the MIT license. + */ +#![warn(missing_docs)] + +//! Disk graph storage + +use std::sync::Arc; + +use crate::{model::{WindowsAlignedFileReader, IOContext, AlignedRead}, common::ANNResult}; + +/// Graph storage for disk index +/// One thread has one storage instance +pub struct DiskGraphStorage { + /// Disk graph reader + disk_graph_reader: Arc, + + /// IOContext of current thread + ctx: Arc, +} + +impl DiskGraphStorage { + /// Create a new DiskGraphStorage instance + pub fn new(disk_graph_reader: Arc) -> ANNResult { + let ctx = disk_graph_reader.get_ctx()?; + Ok(Self { + disk_graph_reader, + ctx, + }) + } + + /// Read disk graph data + pub fn read(&self, read_requests: &mut [AlignedRead]) -> ANNResult<()> { + self.disk_graph_reader.read(read_requests, &self.ctx) + } +} diff --git a/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/src/storage/disk_index_storage.rs b/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/src/storage/disk_index_storage.rs new file mode 100644 index 0000000..0c55808 --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/src/storage/disk_index_storage.rs @@ -0,0 +1,363 @@ +/* + * Copyright (c) Microsoft Corporation. All rights reserved. + * Licensed under the MIT license. + */ +use byteorder::{ByteOrder, LittleEndian, ReadBytesExt}; +use std::fs::File; +use std::io::Read; +use std::marker::PhantomData; +use std::{fs, mem}; + +use crate::common::{ANNError, ANNResult}; +use crate::model::NUM_PQ_CENTROIDS; +use crate::storage::PQStorage; +use crate::utils::{convert_types_u32_usize, convert_types_u64_usize, load_bin, save_bin_u64}; +use crate::utils::{ + file_exists, gen_sample_data, get_file_size, round_up, CachedReader, CachedWriter, +}; + +const SECTOR_LEN: usize = 4096; + +/// Todo: Remove the allow(dead_code) when the disk search code is complete +#[allow(dead_code)] +pub struct PQPivotData { + dim: usize, + pq_table: Vec, + centroids: Vec, + chunk_offsets: Vec, +} + +pub struct DiskIndexStorage { + /// Dataset file + dataset_file: String, + + /// Index file path prefix + index_path_prefix: String, + + // TODO: Only a placeholder for T, will be removed later + _marker: PhantomData, + + pq_storage: PQStorage, +} + +impl DiskIndexStorage { + /// Create DiskIndexStorage instance + pub fn new(dataset_file: String, index_path_prefix: String) -> ANNResult { + let pq_storage: PQStorage = PQStorage::new( + &(index_path_prefix.clone() + ".bin_pq_pivots.bin"), + &(index_path_prefix.clone() + ".bin_pq_compressed.bin"), + &dataset_file, + )?; + + Ok(DiskIndexStorage { + dataset_file, + index_path_prefix, + _marker: PhantomData, + pq_storage, + }) + } + + pub fn get_pq_storage(&mut self) -> &mut PQStorage { + &mut self.pq_storage + } + + pub fn dataset_file(&self) -> &String { + &self.dataset_file + } + + pub fn index_path_prefix(&self) -> &String { + &self.index_path_prefix + } + + /// Create disk layout + /// Sector #1: disk_layout_meta + /// Sector #n: num_nodes_per_sector nodes + /// Each node's layout: {full precision vector:[T; DIM]}{num_nbrs: u32}{neighbors: [u32; num_nbrs]} + /// # Arguments + /// * `dataset_file` - dataset file containing full precision vectors + /// * `mem_index_file` - in-memory index graph file + /// * `disk_layout_file` - output disk layout file + pub fn create_disk_layout(&self) -> ANNResult<()> { + let mem_index_file = self.mem_index_file(); + let disk_layout_file = self.disk_index_file(); + + // amount to read or write in one shot + let read_blk_size = 64 * 1024 * 1024; + let write_blk_size = read_blk_size; + let mut dataset_reader = CachedReader::new(self.dataset_file.as_str(), read_blk_size)?; + + let num_pts = dataset_reader.read_u32()? as u64; + let dims = dataset_reader.read_u32()? as u64; + + // Create cached reader + writer + let actual_file_size = get_file_size(mem_index_file.as_str())?; + println!("Vamana index file size={}", actual_file_size); + + let mut vamana_reader = File::open(mem_index_file)?; + let mut diskann_writer = CachedWriter::new(disk_layout_file.as_str(), write_blk_size)?; + + let index_file_size = vamana_reader.read_u64::()?; + if index_file_size != actual_file_size { + println!( + "Vamana Index file size does not match expected size per meta-data. file size from file: {}, actual file size: {}", + index_file_size, actual_file_size + ); + } + + let max_degree = vamana_reader.read_u32::()?; + let medoid = vamana_reader.read_u32::()?; + let vamana_frozen_num = vamana_reader.read_u64::()?; + + let mut vamana_frozen_loc = 0; + if vamana_frozen_num == 1 { + vamana_frozen_loc = medoid; + } + + let max_node_len = ((max_degree as u64 + 1) * (mem::size_of::() as u64)) + + (dims * (mem::size_of::() as u64)); + let num_nodes_per_sector = (SECTOR_LEN as u64) / max_node_len; + + println!("medoid: {}B", medoid); + println!("max_node_len: {}B", max_node_len); + println!("num_nodes_per_sector: {}B", num_nodes_per_sector); + + // SECTOR_LEN buffer for each sector + let mut sector_buf = vec![0u8; SECTOR_LEN]; + let mut node_buf = vec![0u8; max_node_len as usize]; + + let num_nbrs_start = (dims as usize) * mem::size_of::(); + let nbrs_buf_start = num_nbrs_start + mem::size_of::(); + + // number of sectors (1 for meta data) + let num_sectors = round_up(num_pts, num_nodes_per_sector) / num_nodes_per_sector; + let disk_index_file_size = (num_sectors + 1) * (SECTOR_LEN as u64); + + let disk_layout_meta = vec![ + num_pts, + dims, + medoid as u64, + max_node_len, + num_nodes_per_sector, + vamana_frozen_num, + vamana_frozen_loc as u64, + // append_reorder_data + // We are not supporting this. Temporarily write it into the layout so that + // we can leverage C++ query driver to test the disk index + false as u64, + disk_index_file_size, + ]; + + diskann_writer.write(§or_buf)?; + + let mut cur_node_coords = vec![0u8; (dims as usize) * mem::size_of::()]; + let mut cur_node_id = 0u64; + + for sector in 0..num_sectors { + if sector % 100_000 == 0 { + println!("Sector #{} written", sector); + } + sector_buf.fill(0); + + for sector_node_id in 0..num_nodes_per_sector { + if cur_node_id >= num_pts { + break; + } + + node_buf.fill(0); + + // read cur node's num_nbrs + let num_nbrs = vamana_reader.read_u32::()?; + + // sanity checks on num_nbrs + debug_assert!(num_nbrs > 0); + debug_assert!(num_nbrs <= max_degree); + + // write coords of node first + dataset_reader.read(&mut cur_node_coords)?; + node_buf[..cur_node_coords.len()].copy_from_slice(&cur_node_coords); + + // write num_nbrs + LittleEndian::write_u32( + &mut node_buf[num_nbrs_start..(num_nbrs_start + mem::size_of::())], + num_nbrs, + ); + + // write neighbors + let nbrs_buf = &mut node_buf[nbrs_buf_start + ..(nbrs_buf_start + (num_nbrs as usize) * mem::size_of::())]; + vamana_reader.read_exact(nbrs_buf)?; + + // get offset into sector_buf + let sector_node_buf_start = (sector_node_id * max_node_len) as usize; + let sector_node_buf = &mut sector_buf + [sector_node_buf_start..(sector_node_buf_start + max_node_len as usize)]; + sector_node_buf.copy_from_slice(&node_buf[..(max_node_len as usize)]); + + cur_node_id += 1; + } + + // flush sector to disk + diskann_writer.write(§or_buf)?; + } + + diskann_writer.flush()?; + save_bin_u64( + disk_layout_file.as_str(), + &disk_layout_meta, + disk_layout_meta.len(), + 1, + 0, + )?; + + Ok(()) + } + + pub fn index_build_cleanup(&self) -> ANNResult<()> { + fs::remove_file(self.mem_index_file())?; + Ok(()) + } + + pub fn gen_query_warmup_data(&self, sampling_rate: f64) -> ANNResult<()> { + gen_sample_data::( + &self.dataset_file, + &self.warmup_query_prefix(), + sampling_rate, + )?; + Ok(()) + } + + /// Load pre-trained pivot table + pub fn load_pq_pivots_bin( + &self, + num_pq_chunks: &usize, + ) -> ANNResult { + let pq_pivots_path = &self.pq_pivot_file(); + if !file_exists(pq_pivots_path) { + return Err(ANNError::log_pq_error( + "ERROR: PQ k-means pivot file not found.".to_string(), + )); + } + + let (data, offset_num, offset_dim) = load_bin::(pq_pivots_path, 0)?; + let file_offset_data = convert_types_u64_usize(&data, offset_num, offset_dim); + if offset_num != 4 { + let error_message = format!("Error reading pq_pivots file {}. Offsets don't contain correct metadata, # offsets = {}, but expecting 4.", pq_pivots_path, offset_num); + return Err(ANNError::log_pq_error(error_message)); + } + + let (data, pivot_num, dim) = load_bin::(pq_pivots_path, file_offset_data[0])?; + let pq_table = data.to_vec(); + if pivot_num != NUM_PQ_CENTROIDS { + let error_message = format!( + "Error reading pq_pivots file {}. file_num_centers = {}, but expecting {} centers.", + pq_pivots_path, pivot_num, NUM_PQ_CENTROIDS + ); + return Err(ANNError::log_pq_error(error_message)); + } + + let (data, centroid_dim, nc) = load_bin::(pq_pivots_path, file_offset_data[1])?; + let centroids = data.to_vec(); + if centroid_dim != dim || nc != 1 { + let error_message = format!("Error reading pq_pivots file {}. file_dim = {}, file_cols = {} but expecting {} entries in 1 dimension.", pq_pivots_path, centroid_dim, nc, dim); + return Err(ANNError::log_pq_error(error_message)); + } + + let (data, chunk_offset_num, nc) = load_bin::(pq_pivots_path, file_offset_data[2])?; + let chunk_offsets = convert_types_u32_usize(&data, chunk_offset_num, nc); + if chunk_offset_num != num_pq_chunks + 1 || nc != 1 { + let error_message = format!("Error reading pq_pivots file at chunk offsets; file has nr={}, nc={} but expecting nr={} and nc=1.", chunk_offset_num, nc, num_pq_chunks + 1); + return Err(ANNError::log_pq_error(error_message)); + } + + Ok(PQPivotData { + dim, + pq_table, + centroids, + chunk_offsets + }) + } + + fn mem_index_file(&self) -> String { + self.index_path_prefix.clone() + "_mem.index" + } + + fn disk_index_file(&self) -> String { + self.index_path_prefix.clone() + "_disk.index" + } + + fn warmup_query_prefix(&self) -> String { + self.index_path_prefix.clone() + "_sample" + } + + pub fn pq_pivot_file(&self) -> String { + self.index_path_prefix.clone() + ".bin_pq_pivots.bin" + } + + pub fn compressed_pq_pivot_file(&self) -> String { + self.index_path_prefix.clone() + ".bin_pq_compressed.bin" + } +} + +#[cfg(test)] +mod disk_index_storage_test { + use std::fs; + + use crate::test_utils::get_test_file_path; + + use super::*; + + const TEST_DATA_FILE: &str = "tests/data/siftsmall_learn_256pts.fbin"; + const DISK_INDEX_PATH_PREFIX: &str = "tests/data/disk_index_siftsmall_learn_256pts_R4_L50_A1.2"; + const TRUTH_DISK_LAYOUT: &str = + "tests/data/truth_disk_index_siftsmall_learn_256pts_R4_L50_A1.2_disk.index"; + + #[test] + fn create_disk_layout_test() { + let storage = DiskIndexStorage::::new( + get_test_file_path(TEST_DATA_FILE), + get_test_file_path(DISK_INDEX_PATH_PREFIX), + ).unwrap(); + storage.create_disk_layout().unwrap(); + + let disk_layout_file = storage.disk_index_file(); + let rust_disk_layout = fs::read(disk_layout_file.as_str()).unwrap(); + let truth_disk_layout = fs::read(get_test_file_path(TRUTH_DISK_LAYOUT).as_str()).unwrap(); + + assert!(rust_disk_layout == truth_disk_layout); + + fs::remove_file(disk_layout_file.as_str()).expect("Failed to delete file"); + } + + #[test] + fn load_pivot_test() { + let dim: usize = 128; + let num_pq_chunk: usize = 1; + let pivot_file_prefix: &str = "tests/data/siftsmall_learn"; + let storage = DiskIndexStorage::::new( + get_test_file_path(TEST_DATA_FILE), + pivot_file_prefix.to_string(), + ).unwrap(); + + let pq_pivot_data = + storage.load_pq_pivots_bin(&num_pq_chunk).unwrap(); + + assert_eq!(pq_pivot_data.pq_table.len(), NUM_PQ_CENTROIDS * dim); + assert_eq!(pq_pivot_data.centroids.len(), dim); + + assert_eq!(pq_pivot_data.chunk_offsets[0], 0); + assert_eq!(pq_pivot_data.chunk_offsets[1], dim); + assert_eq!(pq_pivot_data.chunk_offsets.len(), num_pq_chunk + 1); + } + + #[test] + #[should_panic(expected = "ERROR: PQ k-means pivot file not found.")] + fn load_pivot_file_not_exist_test() { + let num_pq_chunk: usize = 1; + let pivot_file_prefix: &str = "tests/data/siftsmall_learn_file_not_exist"; + let storage = DiskIndexStorage::::new( + get_test_file_path(TEST_DATA_FILE), + pivot_file_prefix.to_string(), + ).unwrap(); + let _ = storage.load_pq_pivots_bin(&num_pq_chunk).unwrap(); + } +} diff --git a/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/src/storage/mod.rs b/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/src/storage/mod.rs new file mode 100644 index 0000000..03c5b8e --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/src/storage/mod.rs @@ -0,0 +1,12 @@ +/* + * Copyright (c) Microsoft Corporation. All rights reserved. + * Licensed under the MIT license. + */ +mod disk_index_storage; +pub use disk_index_storage::*; + +mod disk_graph_storage; +pub use disk_graph_storage::*; + +mod pq_storage; +pub use pq_storage::*; diff --git a/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/src/storage/pq_storage.rs b/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/src/storage/pq_storage.rs new file mode 100644 index 0000000..b1d3fa0 --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/src/storage/pq_storage.rs @@ -0,0 +1,367 @@ +/* + * Copyright (c) Microsoft Corporation. All rights reserved. + * Licensed under the MIT license. + */ +use byteorder::{LittleEndian, ReadBytesExt}; +use rand::distributions::{Distribution, Uniform}; +use std::fs::File; +use std::io::{Read, Seek, SeekFrom, Write}; +use std::mem; + +use crate::common::{ANNError, ANNResult}; +use crate::utils::CachedReader; +use crate::utils::{ + convert_types_u32_usize, convert_types_u64_usize, convert_types_usize_u32, + convert_types_usize_u64, convert_types_usize_u8, save_bin_f32, save_bin_u32, save_bin_u64, +}; +use crate::utils::{file_exists, load_bin, open_file_to_write, METADATA_SIZE}; + +#[derive(Debug)] +pub struct PQStorage { + /// Pivot table path + pivot_file: String, + + /// Compressed pivot path + compressed_pivot_file: String, + + /// Data used to construct PQ table and PQ compressed table + pq_data_file: String, + + /// PQ data reader + pq_data_file_reader: File, +} + +impl PQStorage { + pub fn new( + pivot_file: &str, + compressed_pivot_file: &str, + pq_data_file: &str, + ) -> std::io::Result { + let pq_data_file_reader = File::open(pq_data_file)?; + Ok(Self { + pivot_file: pivot_file.to_string(), + compressed_pivot_file: compressed_pivot_file.to_string(), + pq_data_file: pq_data_file.to_string(), + pq_data_file_reader, + }) + } + + pub fn write_compressed_pivot_metadata(&self, npts: i32, pq_chunk: i32) -> std::io::Result<()> { + let mut writer = open_file_to_write(&self.compressed_pivot_file)?; + writer.write_all(&npts.to_le_bytes())?; + writer.write_all(&pq_chunk.to_le_bytes())?; + Ok(()) + } + + pub fn write_compressed_pivot_data( + &self, + compressed_base: &[usize], + num_centers: usize, + block_size: usize, + num_pq_chunks: usize, + ) -> std::io::Result<()> { + let mut writer = open_file_to_write(&self.compressed_pivot_file)?; + writer.seek(SeekFrom::Start((std::mem::size_of::() * 2) as u64))?; + if num_centers > 256 { + writer.write_all(unsafe { + std::slice::from_raw_parts( + compressed_base.as_ptr() as *const u8, + block_size * num_pq_chunks * std::mem::size_of::(), + ) + })?; + } else { + let compressed_base_u8 = + convert_types_usize_u8(compressed_base, block_size, num_pq_chunks); + writer.write_all(&compressed_base_u8)?; + } + Ok(()) + } + + pub fn write_pivot_data( + &self, + full_pivot_data: &[f32], + centroid: &[f32], + chunk_offsets: &[usize], + num_centers: usize, + dim: usize, + ) -> std::io::Result<()> { + let mut cumul_bytes: Vec = vec![0; 4]; + cumul_bytes[0] = METADATA_SIZE; + cumul_bytes[1] = cumul_bytes[0] + + save_bin_f32( + &self.pivot_file, + full_pivot_data, + num_centers, + dim, + cumul_bytes[0], + )?; + cumul_bytes[2] = + cumul_bytes[1] + save_bin_f32(&self.pivot_file, centroid, dim, 1, cumul_bytes[1])?; + + // Because the writer only can write u32, u64 but not usize, so we need to convert the type first. + let chunk_offsets_u64 = convert_types_usize_u32(chunk_offsets, chunk_offsets.len(), 1); + cumul_bytes[3] = cumul_bytes[2] + + save_bin_u32( + &self.pivot_file, + &chunk_offsets_u64, + chunk_offsets.len(), + 1, + cumul_bytes[2], + )?; + + let cumul_bytes_u64 = convert_types_usize_u64(&cumul_bytes, 4, 1); + save_bin_u64(&self.pivot_file, &cumul_bytes_u64, cumul_bytes.len(), 1, 0)?; + + Ok(()) + } + + pub fn pivot_data_exist(&self) -> bool { + file_exists(&self.pivot_file) + } + + pub fn read_pivot_metadata(&self) -> std::io::Result<(usize, usize)> { + let (_, file_num_centers, file_dim) = load_bin::(&self.pivot_file, METADATA_SIZE)?; + Ok((file_num_centers, file_dim)) + } + + pub fn load_pivot_data( + &self, + num_pq_chunks: &usize, + num_centers: &usize, + dim: &usize, + ) -> ANNResult<(Vec, Vec, Vec)> { + // Load file offset data. File saved as offset data(4*1) -> pivot data(centroid num*dim) -> centroid of dim data(dim*1) -> chunk offset data(chunksize+1*1) + // Because we only can write u64 rather than usize, so the file stored as u64 type. Need to convert to usize when use. + let (data, offset_num, nc) = load_bin::(&self.pivot_file, 0)?; + let file_offset_data = convert_types_u64_usize(&data, offset_num, nc); + if offset_num != 4 { + let error_message = format!("Error reading pq_pivots file {}. Offsets don't contain correct metadata, # offsets = {}, but expecting 4.", &self.pivot_file, offset_num); + return Err(ANNError::log_pq_error(error_message)); + } + + let (data, pivot_num, pivot_dim) = load_bin::(&self.pivot_file, file_offset_data[0])?; + let full_pivot_data = data; + if pivot_num != *num_centers || pivot_dim != *dim { + let error_message = format!("Error reading pq_pivots file {}. file_num_centers = {}, file_dim = {} but expecting {} centers in {} dimensions.", &self.pivot_file, pivot_num, pivot_dim, num_centers, dim); + return Err(ANNError::log_pq_error(error_message)); + } + + let (data, centroid_dim, nc) = load_bin::(&self.pivot_file, file_offset_data[1])?; + let centroid = data; + if centroid_dim != *dim || nc != 1 { + let error_message = format!("Error reading pq_pivots file {}. file_dim = {}, file_cols = {} but expecting {} entries in 1 dimension.", &self.pivot_file, centroid_dim, nc, dim); + return Err(ANNError::log_pq_error(error_message)); + } + + let (data, chunk_offset_number, nc) = + load_bin::(&self.pivot_file, file_offset_data[2])?; + let chunk_offsets = convert_types_u32_usize(&data, chunk_offset_number, nc); + if chunk_offset_number != *num_pq_chunks + 1 || nc != 1 { + let error_message = format!("Error reading pq_pivots file at chunk offsets; file has nr={}, nc={} but expecting nr={} and nc=1.", chunk_offset_number, nc, num_pq_chunks + 1); + return Err(ANNError::log_pq_error(error_message)); + } + Ok((full_pivot_data, centroid, chunk_offsets)) + } + + pub fn read_pq_data_metadata(&mut self) -> std::io::Result<(usize, usize)> { + let npts_i32 = self.pq_data_file_reader.read_i32::()?; + let dim_i32 = self.pq_data_file_reader.read_i32::()?; + let num_points = npts_i32 as usize; + let dim = dim_i32 as usize; + Ok((num_points, dim)) + } + + pub fn read_pq_block_data( + &mut self, + cur_block_size: usize, + dim: usize, + ) -> std::io::Result> { + let mut buf = vec![0u8; cur_block_size * dim * std::mem::size_of::()]; + self.pq_data_file_reader.read_exact(&mut buf)?; + + let ptr = buf.as_ptr() as *const T; + let block_data = unsafe { std::slice::from_raw_parts(ptr, cur_block_size * dim) }; + Ok(block_data.to_vec()) + } + + /// streams data from the file, and samples each vector with probability p_val + /// and returns a matrix of size slice_size* ndims as floating point type. + /// the slice_size and ndims are set inside the function. + /// # Arguments + /// * `file_name` - filename where the data is + /// * `p_val` - possibility to sample data + /// * `sampled_vectors` - sampled vector chose by p_val possibility + /// * `slice_size` - how many sampled data return + /// * `dim` - each sample data dimension + pub fn gen_random_slice>( + &self, + mut p_val: f64, + ) -> ANNResult<(Vec, usize, usize)> { + let read_blk_size = 64 * 1024 * 1024; + let mut reader = CachedReader::new(&self.pq_data_file, read_blk_size)?; + + let npts = reader.read_u32()? as usize; + let dim = reader.read_u32()? as usize; + let mut sampled_vectors: Vec = Vec::new(); + let mut slice_size = 0; + p_val = if p_val < 1f64 { p_val } else { 1f64 }; + + let mut generator = rand::thread_rng(); + let distribution = Uniform::from(0.0..1.0); + + for _ in 0..npts { + let mut cur_vector_bytes = vec![0u8; dim * mem::size_of::()]; + reader.read(&mut cur_vector_bytes)?; + let random_value = distribution.sample(&mut generator); + if random_value < p_val { + let ptr = cur_vector_bytes.as_ptr() as *const T; + let cur_vector_t = unsafe { std::slice::from_raw_parts(ptr, dim) }; + sampled_vectors.extend(cur_vector_t.iter().map(|&t| t.into())); + slice_size += 1; + } + } + + Ok((sampled_vectors, slice_size, dim)) + } +} + +#[cfg(test)] +mod pq_storage_tests { + use rand::Rng; + + use super::*; + use crate::utils::gen_random_slice; + + const DATA_FILE: &str = "tests/data/siftsmall_learn.bin"; + const PQ_PIVOT_PATH: &str = "tests/data/siftsmall_learn.bin_pq_pivots.bin"; + const PQ_COMPRESSED_PATH: &str = "tests/data/empty_pq_compressed.bin"; + + #[test] + fn new_test() { + let result = PQStorage::new(PQ_PIVOT_PATH, PQ_COMPRESSED_PATH, DATA_FILE); + assert!(result.is_ok()); + } + + #[test] + fn write_compressed_pivot_metadata_test() { + let compress_pivot_path = "write_compressed_pivot_metadata_test.bin"; + let result = PQStorage::new(PQ_PIVOT_PATH, compress_pivot_path, DATA_FILE).unwrap(); + + _ = result.write_compressed_pivot_metadata(100, 20); + let mut result_reader = File::open(compress_pivot_path).unwrap(); + let npts_i32 = result_reader.read_i32::().unwrap(); + let dim_i32 = result_reader.read_i32::().unwrap(); + + assert_eq!(npts_i32, 100); + assert_eq!(dim_i32, 20); + + std::fs::remove_file(compress_pivot_path).unwrap(); + } + + #[test] + fn write_compressed_pivot_data_test() { + let compress_pivot_path = "write_compressed_pivot_data_test.bin"; + let result = PQStorage::new(PQ_PIVOT_PATH, compress_pivot_path, DATA_FILE).unwrap(); + + let mut rng = rand::thread_rng(); + + let num_centers = 256; + let block_size = 4; + let num_pq_chunks = 2; + let compressed_base: Vec = (0..block_size * num_pq_chunks) + .map(|_| rng.gen_range(0..num_centers)) + .collect(); + _ = result.write_compressed_pivot_data( + &compressed_base, + num_centers, + block_size, + num_pq_chunks, + ); + + let mut result_reader = File::open(compress_pivot_path).unwrap(); + _ = result_reader.read_i32::().unwrap(); + _ = result_reader.read_i32::().unwrap(); + let mut buf = vec![0u8; block_size * num_pq_chunks * std::mem::size_of::()]; + result_reader.read_exact(&mut buf).unwrap(); + + let ptr = buf.as_ptr() as *const u8; + let block_data = unsafe { std::slice::from_raw_parts(ptr, block_size * num_pq_chunks) }; + + for index in 0..block_data.len() { + assert_eq!(compressed_base[index], block_data[index] as usize); + } + std::fs::remove_file(compress_pivot_path).unwrap(); + } + + #[test] + fn pivot_data_exist_test() { + let result = PQStorage::new(PQ_PIVOT_PATH, PQ_COMPRESSED_PATH, DATA_FILE).unwrap(); + assert!(result.pivot_data_exist()); + + let pivot_path = "not_exist_pivot_path.bin"; + let result = PQStorage::new(pivot_path, PQ_COMPRESSED_PATH, DATA_FILE).unwrap(); + assert!(!result.pivot_data_exist()); + } + + #[test] + fn read_pivot_metadata_test() { + let result = PQStorage::new(PQ_PIVOT_PATH, PQ_COMPRESSED_PATH, DATA_FILE).unwrap(); + let (npt, dim) = result.read_pivot_metadata().unwrap(); + + assert_eq!(npt, 256); + assert_eq!(dim, 128); + } + + #[test] + fn load_pivot_data_test() { + let result = PQStorage::new(PQ_PIVOT_PATH, PQ_COMPRESSED_PATH, DATA_FILE).unwrap(); + let (pq_pivot_data, centroids, chunk_offsets) = + result.load_pivot_data(&1, &256, &128).unwrap(); + + assert_eq!(pq_pivot_data.len(), 256 * 128); + assert_eq!(centroids.len(), 128); + assert_eq!(chunk_offsets.len(), 2); + } + + #[test] + fn read_pq_data_metadata_test() { + let mut result = PQStorage::new(PQ_PIVOT_PATH, PQ_COMPRESSED_PATH, DATA_FILE).unwrap(); + let (npt, dim) = result.read_pq_data_metadata().unwrap(); + + assert_eq!(npt, 25000); + assert_eq!(dim, 128); + } + + #[test] + fn gen_random_slice_test() { + let file_name = "gen_random_slice_test.bin"; + //npoints=2, dim=8 + let data: [u8; 72] = [ + 2, 0, 0, 0, 8, 0, 0, 0, 0x00, 0x00, 0x80, 0x3f, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, + 0x40, 0x40, 0x00, 0x00, 0x80, 0x40, 0x00, 0x00, 0xa0, 0x40, 0x00, 0x00, 0xc0, 0x40, + 0x00, 0x00, 0xe0, 0x40, 0x00, 0x00, 0x00, 0x41, 0x00, 0x00, 0x10, 0x41, 0x00, 0x00, + 0x20, 0x41, 0x00, 0x00, 0x30, 0x41, 0x00, 0x00, 0x40, 0x41, 0x00, 0x00, 0x50, 0x41, + 0x00, 0x00, 0x60, 0x41, 0x00, 0x00, 0x70, 0x41, 0x00, 0x00, 0x80, 0x41, + ]; + std::fs::write(file_name, data).expect("Failed to write sample file"); + + let (sampled_vectors, slice_size, ndims) = + gen_random_slice::(file_name, 1f64).unwrap(); + let mut start = 8; + (0..sampled_vectors.len()).for_each(|i| { + assert_eq!(sampled_vectors[i].to_le_bytes(), data[start..start + 4]); + start += 4; + }); + assert_eq!(sampled_vectors.len(), 16); + assert_eq!(slice_size, 2); + assert_eq!(ndims, 8); + + let (sampled_vectors, slice_size, ndims) = + gen_random_slice::(file_name, 0f64).unwrap(); + assert_eq!(sampled_vectors.len(), 0); + assert_eq!(slice_size, 0); + assert_eq!(ndims, 8); + + std::fs::remove_file(file_name).expect("Failed to delete file"); + } +} diff --git a/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/src/test_utils/inmem_index_initialization.rs b/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/src/test_utils/inmem_index_initialization.rs new file mode 100644 index 0000000..db3b581 --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/src/test_utils/inmem_index_initialization.rs @@ -0,0 +1,74 @@ +/* + * Copyright (c) Microsoft Corporation. All rights reserved. + * Licensed under the MIT license. + */ +use vector::Metric; + +use crate::index::InmemIndex; +use crate::model::configuration::index_write_parameters::IndexWriteParametersBuilder; +use crate::model::{IndexConfiguration}; +use crate::model::vertex::DIM_128; +use crate::utils::{file_exists, load_metadata_from_file}; + +use super::get_test_file_path; + +// f32, 128 DIM and 256 points source data +const TEST_DATA_FILE: &str = "tests/data/siftsmall_learn_256pts.fbin"; +const NUM_POINTS_TO_LOAD: usize = 256; + +pub fn create_index_with_test_data() -> InmemIndex { + let index_write_parameters = IndexWriteParametersBuilder::new(50, 4).with_alpha(1.2).build(); + let config = IndexConfiguration::new( + Metric::L2, + 128, + 128, + 256, + false, + 0, + false, + 0, + 1.0f32, + index_write_parameters); + let mut index: InmemIndex = InmemIndex::new(config).unwrap(); + + build_test_index(&mut index, get_test_file_path(TEST_DATA_FILE).as_str(), NUM_POINTS_TO_LOAD); + + index.start = index.dataset.calculate_medoid_point_id().unwrap(); + + index +} + +fn build_test_index(index: &mut InmemIndex, filename: &str, num_points_to_load: usize) { + if !file_exists(filename) { + panic!("ERROR: Data file {} does not exist.", filename); + } + + let (file_num_points, file_dim) = load_metadata_from_file(filename).unwrap(); + if file_num_points > index.configuration.max_points { + panic!( + "ERROR: Driver requests loading {} points and file has {} points, + but index can support only {} points as specified in configuration.", + num_points_to_load, file_num_points, index.configuration.max_points + ); + } + + if num_points_to_load > file_num_points { + panic!( + "ERROR: Driver requests loading {} points and file has only {} points.", + num_points_to_load, file_num_points + ); + } + + if file_dim != index.configuration.dim { + panic!( + "ERROR: Driver requests loading {} dimension, but file has {} dimension.", + index.configuration.dim, file_dim + ); + } + + index.dataset.build_from_file(filename, num_points_to_load).unwrap(); + + println!("Using only first {} from file.", num_points_to_load); + + index.num_active_pts = num_points_to_load; +} diff --git a/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/src/test_utils/mod.rs b/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/src/test_utils/mod.rs new file mode 100644 index 0000000..fc8de5f --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/src/test_utils/mod.rs @@ -0,0 +1,11 @@ +/* + * Copyright (c) Microsoft Corporation. All rights reserved. + * Licensed under the MIT license. + */ +pub mod inmem_index_initialization; + +/// test files should be placed under tests folder +pub fn get_test_file_path(relative_path: &str) -> String { + format!("{}/{}", env!("CARGO_MANIFEST_DIR"), relative_path) +} + diff --git a/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/src/utils/bit_vec_extension.rs b/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/src/utils/bit_vec_extension.rs new file mode 100644 index 0000000..9571a72 --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/src/utils/bit_vec_extension.rs @@ -0,0 +1,45 @@ +/* + * Copyright (c) Microsoft Corporation. All rights reserved. + * Licensed under the MIT license. + */ +use std::cmp::Ordering; + +use bit_vec::BitVec; + +pub trait BitVecExtension { + fn resize(&mut self, new_len: usize, value: bool); +} + +impl BitVecExtension for BitVec { + fn resize(&mut self, new_len: usize, value: bool) { + let old_len = self.len(); + match new_len.cmp(&old_len) { + Ordering::Less => self.truncate(new_len), + Ordering::Greater => self.grow(new_len - old_len, value), + Ordering::Equal => {} + } + } +} + +#[cfg(test)] +mod bit_vec_extension_test { + use super::*; + + #[test] + fn resize_test() { + let mut bitset = BitVec::new(); + + bitset.resize(10, false); + assert_eq!(bitset.len(), 10); + assert!(bitset.none()); + + bitset.resize(11, true); + assert_eq!(bitset.len(), 11); + assert!(bitset[10]); + + bitset.resize(5, false); + assert_eq!(bitset.len(), 5); + assert!(bitset.none()); + } +} + diff --git a/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/src/utils/cached_reader.rs b/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/src/utils/cached_reader.rs new file mode 100644 index 0000000..1a21f1a --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/src/utils/cached_reader.rs @@ -0,0 +1,160 @@ +/* + * Copyright (c) Microsoft Corporation. All rights reserved. + * Licensed under the MIT license. + */ +use std::fs::File; +use std::io::{Seek, Read}; + +use crate::common::{ANNResult, ANNError}; + +/// Sequential cached reads +pub struct CachedReader { + /// File reader + reader: File, + + /// # bytes to cache in one shot read + cache_size: u64, + + /// Underlying buf for cache + cache_buf: Vec, + + /// Offset into cache_buf for cur_pos + cur_off: u64, + + /// File size + fsize: u64, +} + +impl CachedReader { + pub fn new(filename: &str, cache_size: u64) -> std::io::Result { + let mut reader = File::open(filename)?; + let metadata = reader.metadata()?; + let fsize = metadata.len(); + + let cache_size = cache_size.min(fsize); + let mut cache_buf = vec![0; cache_size as usize]; + reader.read_exact(&mut cache_buf)?; + println!("Opened: {}, size: {}, cache_size: {}", filename, fsize, cache_size); + + Ok(Self { + reader, + cache_size, + cache_buf, + cur_off: 0, + fsize, + }) + } + + pub fn get_file_size(&self) -> u64 { + self.fsize + } + + pub fn read(&mut self, read_buf: &mut [u8]) -> ANNResult<()> { + let n_bytes = read_buf.len() as u64; + if n_bytes <= (self.cache_size - self.cur_off) { + // case 1: cache contains all data + read_buf.copy_from_slice(&self.cache_buf[(self.cur_off as usize)..(self.cur_off as usize + n_bytes as usize)]); + self.cur_off += n_bytes; + } else { + // case 2: cache contains some data + let cached_bytes = self.cache_size - self.cur_off; + if n_bytes - cached_bytes > self.fsize - self.reader.stream_position()? { + return Err(ANNError::log_index_error(format!( + "Reading beyond end of file, n_bytes: {} cached_bytes: {} fsize: {} current pos: {}", + n_bytes, cached_bytes, self.fsize, self.reader.stream_position()?)) + ); + } + + read_buf[..cached_bytes as usize].copy_from_slice(&self.cache_buf[self.cur_off as usize..]); + // go to disk and fetch more data + self.reader.read_exact(&mut read_buf[cached_bytes as usize..])?; + // reset cur off + self.cur_off = self.cache_size; + + let size_left = self.fsize - self.reader.stream_position()?; + if size_left >= self.cache_size { + self.reader.read_exact(&mut self.cache_buf)?; + self.cur_off = 0; + } + // note that if size_left < cache_size, then cur_off = cache_size, + // so subsequent reads will all be directly from file + } + Ok(()) + } + + pub fn read_u32(&mut self) -> ANNResult { + let mut bytes = [0u8; 4]; + self.read(&mut bytes)?; + Ok(u32::from_le_bytes(bytes)) + } +} + +#[cfg(test)] +mod cached_reader_test { + use std::fs; + + use super::*; + + #[test] + fn cached_reader_works() { + let file_name = "cached_reader_works_test.bin"; + //npoints=2, dim=8, 2 vectors [1.0;8] [2.0;8] + let data: [u8; 72] = [2, 0, 1, 2, 8, 0, 1, 3, + 0x00, 0x01, 0x80, 0x3f, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x40, 0x40, 0x00, 0x00, 0x80, 0x40, + 0x00, 0x00, 0xa0, 0x40, 0x00, 0x00, 0xc0, 0x40, 0x00, 0x00, 0xe0, 0x40, 0x00, 0x00, 0x00, 0x41, + 0x00, 0x00, 0x10, 0x41, 0x00, 0x00, 0x20, 0x41, 0x00, 0x00, 0x30, 0x41, 0x00, 0x00, 0x40, 0x41, + 0x00, 0x00, 0x50, 0x41, 0x00, 0x00, 0x60, 0x41, 0x00, 0x00, 0x70, 0x41, 0x00, 0x11, 0x80, 0x41]; + std::fs::write(file_name, data).expect("Failed to write sample file"); + + let mut reader = CachedReader::new(file_name, 8).unwrap(); + assert_eq!(reader.get_file_size(), 72); + assert_eq!(reader.cache_size, 8); + + let mut all_from_cache_buf = vec![0; 4]; + reader.read(all_from_cache_buf.as_mut_slice()).unwrap(); + assert_eq!(all_from_cache_buf, [2, 0, 1, 2]); + assert_eq!(reader.cur_off, 4); + + let mut partial_from_cache_buf = vec![0; 6]; + reader.read(partial_from_cache_buf.as_mut_slice()).unwrap(); + assert_eq!(partial_from_cache_buf, [8, 0, 1, 3, 0x00, 0x01]); + assert_eq!(reader.cur_off, 0); + + let mut over_cache_size_buf = vec![0; 60]; + reader.read(over_cache_size_buf.as_mut_slice()).unwrap(); + assert_eq!( + over_cache_size_buf, + [0x80, 0x3f, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x40, 0x40, 0x00, 0x00, 0x80, 0x40, + 0x00, 0x00, 0xa0, 0x40, 0x00, 0x00, 0xc0, 0x40, 0x00, 0x00, 0xe0, 0x40, 0x00, 0x00, 0x00, 0x41, + 0x00, 0x00, 0x10, 0x41, 0x00, 0x00, 0x20, 0x41, 0x00, 0x00, 0x30, 0x41, 0x00, 0x00, 0x40, 0x41, + 0x00, 0x00, 0x50, 0x41, 0x00, 0x00, 0x60, 0x41, 0x00, 0x00, 0x70, 0x41, 0x00, 0x11] + ); + + let mut remaining_less_than_cache_size_buf = vec![0; 2]; + reader.read(remaining_less_than_cache_size_buf.as_mut_slice()).unwrap(); + assert_eq!(remaining_less_than_cache_size_buf, [0x80, 0x41]); + assert_eq!(reader.cur_off, reader.cache_size); + + fs::remove_file(file_name).expect("Failed to delete file"); + } + + #[test] + #[should_panic(expected = "n_bytes: 73 cached_bytes: 8 fsize: 72 current pos: 8")] + fn failed_for_reading_beyond_end_of_file() { + let file_name = "failed_for_reading_beyond_end_of_file_test.bin"; + //npoints=2, dim=8, 2 vectors [1.0;8] [2.0;8] + let data: [u8; 72] = [2, 0, 1, 2, 8, 0, 1, 3, + 0x00, 0x01, 0x80, 0x3f, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x40, 0x40, 0x00, 0x00, 0x80, 0x40, + 0x00, 0x00, 0xa0, 0x40, 0x00, 0x00, 0xc0, 0x40, 0x00, 0x00, 0xe0, 0x40, 0x00, 0x00, 0x00, 0x41, + 0x00, 0x00, 0x10, 0x41, 0x00, 0x00, 0x20, 0x41, 0x00, 0x00, 0x30, 0x41, 0x00, 0x00, 0x40, 0x41, + 0x00, 0x00, 0x50, 0x41, 0x00, 0x00, 0x60, 0x41, 0x00, 0x00, 0x70, 0x41, 0x00, 0x11, 0x80, 0x41]; + std::fs::write(file_name, data).expect("Failed to write sample file"); + + let mut reader = CachedReader::new(file_name, 8).unwrap(); + fs::remove_file(file_name).expect("Failed to delete file"); + + let mut over_size_buf = vec![0; 73]; + reader.read(over_size_buf.as_mut_slice()).unwrap(); + } +} + diff --git a/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/src/utils/cached_writer.rs b/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/src/utils/cached_writer.rs new file mode 100644 index 0000000..d3929be --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/src/utils/cached_writer.rs @@ -0,0 +1,142 @@ +/* + * Copyright (c) Microsoft Corporation. All rights reserved. + * Licensed under the MIT license. + */ +use std::io::{Write, Seek, SeekFrom}; +use std::fs::{OpenOptions, File}; +use std::path::Path; + +pub struct CachedWriter { + /// File writer + writer: File, + + /// # bytes to cache for one shot write + cache_size: u64, + + /// Underlying buf for cache + cache_buf: Vec, + + /// Offset into cache_buf for cur_pos + cur_off: u64, + + /// File size + fsize: u64, +} + +impl CachedWriter { + pub fn new(filename: &str, cache_size: u64) -> std::io::Result { + let writer = OpenOptions::new() + .write(true) + .create(true) + .open(Path::new(filename))?; + + if cache_size == 0 { + return Err(std::io::Error::new(std::io::ErrorKind::Other, "Cache size must be greater than 0")); + } + + println!("Opened: {}, cache_size: {}", filename, cache_size); + Ok(Self { + writer, + cache_size, + cache_buf: vec![0; cache_size as usize], + cur_off: 0, + fsize: 0, + }) + } + + pub fn flush(&mut self) -> std::io::Result<()> { + // dump any remaining data in memory + if self.cur_off > 0 { + self.flush_cache()?; + } + + self.writer.flush()?; + println!("Finished writing {}B", self.fsize); + Ok(()) + } + + pub fn get_file_size(&self) -> u64 { + self.fsize + } + + /// Writes n_bytes from write_buf to the underlying cache + pub fn write(&mut self, write_buf: &[u8]) -> std::io::Result<()> { + let n_bytes = write_buf.len() as u64; + if n_bytes <= (self.cache_size - self.cur_off) { + // case 1: cache can take all data + self.cache_buf[(self.cur_off as usize)..((self.cur_off + n_bytes) as usize)].copy_from_slice(&write_buf[..n_bytes as usize]); + self.cur_off += n_bytes; + } else { + // case 2: cache cant take all data + // go to disk and write existing cache data + self.writer.write_all(&self.cache_buf[..self.cur_off as usize])?; + self.fsize += self.cur_off; + // write the new data to disk + self.writer.write_all(write_buf)?; + self.fsize += n_bytes; + // clear cache data and reset cur_off + self.cache_buf.fill(0); + self.cur_off = 0; + } + Ok(()) + } + + pub fn reset(&mut self) -> std::io::Result<()> { + self.flush_cache()?; + self.writer.seek(SeekFrom::Start(0))?; + Ok(()) + } + + fn flush_cache(&mut self) -> std::io::Result<()> { + self.writer.write_all(&self.cache_buf[..self.cur_off as usize])?; + self.fsize += self.cur_off; + self.cache_buf.fill(0); + self.cur_off = 0; + Ok(()) + } +} + +impl Drop for CachedWriter { + fn drop(&mut self) { + let _ = self.flush(); + } +} + +#[cfg(test)] +mod cached_writer_test { + use std::fs; + + use super::*; + + #[test] + fn cached_writer_works() { + let file_name = "cached_writer_works_test.bin"; + //npoints=2, dim=8, 2 vectors [1.0;8] [2.0;8] + let data: [u8; 72] = [2, 0, 1, 2, 8, 0, 1, 3, + 0x00, 0x01, 0x80, 0x3f, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x40, 0x40, 0x00, 0x00, 0x80, 0x40, + 0x00, 0x00, 0xa0, 0x40, 0x00, 0x00, 0xc0, 0x40, 0x00, 0x00, 0xe0, 0x40, 0x00, 0x00, 0x00, 0x41, + 0x00, 0x00, 0x10, 0x41, 0x00, 0x00, 0x20, 0x41, 0x00, 0x00, 0x30, 0x41, 0x00, 0x00, 0x40, 0x41, + 0x00, 0x00, 0x50, 0x41, 0x00, 0x00, 0x60, 0x41, 0x00, 0x00, 0x70, 0x41, 0x00, 0x11, 0x80, 0x41]; + + let mut writer = CachedWriter::new(file_name, 8).unwrap(); + assert_eq!(writer.get_file_size(), 0); + assert_eq!(writer.cache_size, 8); + assert_eq!(writer.get_file_size(), 0); + + let cache_all_buf = &data[0..4]; + writer.write(cache_all_buf).unwrap(); + assert_eq!(&writer.cache_buf[..4], cache_all_buf); + assert_eq!(&writer.cache_buf[4..], vec![0; 4]); + assert_eq!(writer.cur_off, 4); + assert_eq!(writer.get_file_size(), 0); + + let write_all_buf = &data[4..10]; + writer.write(write_all_buf).unwrap(); + assert_eq!(writer.cache_buf, vec![0; 8]); + assert_eq!(writer.cur_off, 0); + assert_eq!(writer.get_file_size(), 10); + + fs::remove_file(file_name).expect("Failed to delete file"); + } +} + diff --git a/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/src/utils/file_util.rs b/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/src/utils/file_util.rs new file mode 100644 index 0000000..f187d01 --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/src/utils/file_util.rs @@ -0,0 +1,377 @@ +/* + * Copyright (c) Microsoft Corporation. All rights reserved. + * Licensed under the MIT license. + */ +#![warn(missing_debug_implementations, missing_docs)] + +//! File operations + +use byteorder::{LittleEndian, ReadBytesExt, WriteBytesExt}; +use std::{mem, io}; +use std::fs::{self, File, OpenOptions}; +use std::io::{Read, BufReader, Write, Seek, SeekFrom}; +use std::path::Path; + +use crate::model::data_store::DatasetDto; + +/// Read metadata of data file. +pub fn load_metadata_from_file(file_name: &str) -> std::io::Result<(usize, usize)> { + let file = File::open(file_name)?; + let mut reader = BufReader::new(file); + + let npoints = reader.read_i32::()? as usize; + let ndims = reader.read_i32::()? as usize; + + Ok((npoints, ndims)) +} + +/// Read the deleted vertex ids from file. +pub fn load_ids_to_delete_from_file(file_name: &str) -> std::io::Result<(usize, Vec)> { + // The first 4 bytes are the number of vector ids. + // The rest of the file are the vector ids in the format of usize. + // The vector ids are sorted in ascending order. + let mut file = File::open(file_name)?; + let num_ids = file.read_u32::()? as usize; + + let mut ids = Vec::with_capacity(num_ids); + for _ in 0..num_ids { + let id = file.read_u32::()?; + ids.push(id); + } + + Ok((num_ids, ids)) +} + +/// Copy data from file +/// # Arguments +/// * `bin_file` - filename where the data is +/// * `data` - destination dataset dto to which the data is copied +/// * `pts_offset` - offset of points. data will be loaded after this point in dataset +/// * `npts` - number of points read from bin_file +/// * `dim` - point dimension read from bin_file +/// * `rounded_dim` - rounded dimension (padding zero if it's > dim) +/// # Return +/// * `npts` - number of points read from bin_file +/// * `dim` - point dimension read from bin_file +pub fn copy_aligned_data_from_file( + bin_file: &str, + dataset_dto: DatasetDto, + pts_offset: usize, +) -> std::io::Result<(usize, usize)> { + let mut reader = File::open(bin_file)?; + + let npts = reader.read_i32::()? as usize; + let dim = reader.read_i32::()? as usize; + let rounded_dim = dataset_dto.rounded_dim; + let offset = pts_offset * rounded_dim; + + for i in 0..npts { + let data_slice = &mut dataset_dto.data[offset + i * rounded_dim..offset + i * rounded_dim + dim]; + let mut buf = vec![0u8; dim * mem::size_of::()]; + reader.read_exact(&mut buf)?; + + let ptr = buf.as_ptr() as *const T; + let temp_slice = unsafe { std::slice::from_raw_parts(ptr, dim) }; + data_slice.copy_from_slice(temp_slice); + + (i * rounded_dim + dim..i * rounded_dim + rounded_dim).for_each(|j| { + dataset_dto.data[j] = T::default(); + }); + } + + Ok((npts, dim)) +} + +/// Open a file to write +/// # Arguments +/// * `writer` - mutable File reference +/// * `file_name` - file name +#[inline] +pub fn open_file_to_write(file_name: &str) -> std::io::Result { + OpenOptions::new() + .write(true) + .create(true) + .open(Path::new(file_name)) +} + +/// Delete a file +/// # Arguments +/// * `file_name` - file name +pub fn delete_file(file_name: &str) -> std::io::Result<()> { + if file_exists(file_name) { + fs::remove_file(file_name)?; + } + + Ok(()) +} + +/// Check whether file exists or not +pub fn file_exists(filename: &str) -> bool { + std::path::Path::new(filename).exists() +} + +/// Save data to file +/// # Arguments +/// * `filename` - filename where the data is +/// * `data` - information data +/// * `npts` - number of points +/// * `ndims` - point dimension +/// * `aligned_dim` - aligned dimension +/// * `offset` - data offset in file +pub fn save_data_in_base_dimensions( + filename: &str, + data: &mut [T], + npts: usize, + ndims: usize, + aligned_dim: usize, + offset: usize, +) -> std::io::Result { + let mut writer = open_file_to_write(filename)?; + let npts_i32 = npts as i32; + let ndims_i32 = ndims as i32; + let bytes_written = 2 * std::mem::size_of::() + npts * ndims * (std::mem::size_of::()); + + writer.seek(std::io::SeekFrom::Start(offset as u64))?; + writer.write_all(&npts_i32.to_le_bytes())?; + writer.write_all(&ndims_i32.to_le_bytes())?; + let data_ptr = data.as_ptr() as *const u8; + for i in 0..npts { + let middle_offset = i * aligned_dim * std::mem::size_of::(); + let middle_slice = unsafe { std::slice::from_raw_parts(data_ptr.add(middle_offset), ndims * std::mem::size_of::()) }; + writer.write_all(middle_slice)?; + } + writer.flush()?; + Ok(bytes_written) +} + +/// Read data file +/// # Arguments +/// * `bin_file` - filename where the data is +/// * `file_offset` - data offset in file +/// * `data` - information data +/// * `npts` - number of points +/// * `ndims` - point dimension +pub fn load_bin( + bin_file: &str, + file_offset: usize) -> std::io::Result<(Vec, usize, usize)> +{ + let mut reader = File::open(bin_file)?; + reader.seek(std::io::SeekFrom::Start(file_offset as u64))?; + let npts = reader.read_i32::()? as usize; + let dim = reader.read_i32::()? as usize; + + let size = npts * dim * std::mem::size_of::(); + let mut buf = vec![0u8; size]; + reader.read_exact(&mut buf)?; + + let ptr = buf.as_ptr() as *const T; + let data = unsafe { std::slice::from_raw_parts(ptr, npts * dim)}; + + Ok((data.to_vec(), npts, dim)) +} + +/// Get file size +pub fn get_file_size(filename: &str) -> io::Result { + let reader = File::open(filename)?; + let metadata = reader.metadata()?; + Ok(metadata.len()) +} + +macro_rules! save_bin { + ($name:ident, $t:ty, $write_func:ident) => { + /// Write data into file + pub fn $name(filename: &str, data: &[$t], num_pts: usize, dims: usize, offset: usize) -> std::io::Result { + let mut writer = open_file_to_write(filename)?; + + println!("Writing bin: {}", filename); + writer.seek(SeekFrom::Start(offset as u64))?; + let num_pts_i32 = num_pts as i32; + let dims_i32 = dims as i32; + let bytes_written = num_pts * dims * mem::size_of::<$t>() + 2 * mem::size_of::(); + + writer.write_i32::(num_pts_i32)?; + writer.write_i32::(dims_i32)?; + println!("bin: #pts = {}, #dims = {}, size = {}B", num_pts, dims, bytes_written); + + for item in data.iter() { + writer.$write_func::(*item)?; + } + + writer.flush()?; + + println!("Finished writing bin."); + Ok(bytes_written) + } + }; +} + +save_bin!(save_bin_f32, f32, write_f32); +save_bin!(save_bin_u64, u64, write_u64); +save_bin!(save_bin_u32, u32, write_u32); + +#[cfg(test)] +mod file_util_test { + use crate::model::data_store::InmemDataset; + use std::fs; + use super::*; + + pub const DIM_8: usize = 8; + + #[test] + fn load_metadata_test() { + let file_name = "test_load_metadata_test.bin"; + let data = [200, 0, 0, 0, 128, 0, 0, 0]; // 200 and 128 in little endian bytes + std::fs::write(file_name, data).expect("Failed to write sample file"); + match load_metadata_from_file(file_name) { + Ok((npoints, ndims)) => { + assert!(npoints == 200); + assert!(ndims == 128); + }, + Err(_e) => {}, + } + fs::remove_file(file_name).expect("Failed to delete file"); + } + + #[test] + fn load_data_test() { + let file_name = "test_load_data_test.bin"; + //npoints=2, dim=8, 2 vectors [1.0;8] [2.0;8] + let data: [u8; 72] = [2, 0, 0, 0, 8, 0, 0, 0, + 0x00, 0x00, 0x80, 0x3f, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x40, 0x40, 0x00, 0x00, 0x80, 0x40, + 0x00, 0x00, 0xa0, 0x40, 0x00, 0x00, 0xc0, 0x40, 0x00, 0x00, 0xe0, 0x40, 0x00, 0x00, 0x00, 0x41, + 0x00, 0x00, 0x10, 0x41, 0x00, 0x00, 0x20, 0x41, 0x00, 0x00, 0x30, 0x41, 0x00, 0x00, 0x40, 0x41, + 0x00, 0x00, 0x50, 0x41, 0x00, 0x00, 0x60, 0x41, 0x00, 0x00, 0x70, 0x41, 0x00, 0x00, 0x80, 0x41]; + std::fs::write(file_name, data).expect("Failed to write sample file"); + + let mut dataset = InmemDataset::::new(2, 1f32).unwrap(); + + match copy_aligned_data_from_file(file_name, dataset.into_dto(), 0) { + Ok((num_points, dim)) => { + fs::remove_file(file_name).expect("Failed to delete file"); + assert!(num_points == 2); + assert!(dim == 8); + assert!(dataset.data.len() == 16); + + let first_vertex = dataset.get_vertex(0).unwrap(); + let second_vertex = dataset.get_vertex(1).unwrap(); + + assert!(*first_vertex.vector() == [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0]); + assert!(*second_vertex.vector() == [9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0]); + }, + Err(e) => { + fs::remove_file(file_name).expect("Failed to delete file"); + panic!("{}", e) + }, + } + } + + #[test] + fn open_file_to_write_test() { + let file_name = "test_open_file_to_write_test.bin"; + let mut writer = File::create(file_name).unwrap(); + let data = [200, 0, 0, 0, 128, 0, 0, 0]; + writer.write(&data).expect("Failed to write sample file"); + + let _ = open_file_to_write(file_name); + + fs::remove_file(file_name).expect("Failed to delete file"); + } + + #[test] + fn delete_file_test() { + let file_name = "test_delete_file_test.bin"; + let mut file = File::create(file_name).unwrap(); + writeln!(file, "test delete file").unwrap(); + + let result = delete_file(file_name); + + assert!(result.is_ok()); + assert!(fs::metadata(file_name).is_err()); + } + + #[test] + fn save_data_in_base_dimensions_test() { + //npoints=2, dim=8 + let mut data: [u8; 72] = [2, 0, 0, 0, 8, 0, 0, 0, + 0x00, 0x00, 0x80, 0x3f, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x40, 0x40, 0x00, 0x00, 0x80, 0x40, + 0x00, 0x00, 0xa0, 0x40, 0x00, 0x00, 0xc0, 0x40, 0x00, 0x00, 0xe0, 0x40, 0x00, 0x00, 0x00, 0x41, + 0x00, 0x00, 0x10, 0x41, 0x00, 0x00, 0x20, 0x41, 0x00, 0x00, 0x30, 0x41, 0x00, 0x00, 0x40, 0x41, + 0x00, 0x00, 0x50, 0x41, 0x00, 0x00, 0x60, 0x41, 0x00, 0x00, 0x70, 0x41, 0x00, 0x00, 0x80, 0x41]; + let num_points = 2; + let dim = DIM_8; + let data_file = "save_data_in_base_dimensions_test.data"; + match save_data_in_base_dimensions(data_file, &mut data, num_points, dim, DIM_8, 0) { + Ok(num) => { + assert!(file_exists(data_file)); + assert_eq!(num, 2 * std::mem::size_of::() + num_points * dim * std::mem::size_of::()); + fs::remove_file(data_file).expect("Failed to delete file"); + }, + Err(e) => { + fs::remove_file(data_file).expect("Failed to delete file"); + panic!("{}", e) + } + } + } + + #[test] + fn save_bin_test() { + let filename = "save_bin_test"; + let data = vec![0u64, 1u64, 2u64]; + let num_pts = data.len(); + let dims = 1; + let bytes_written = save_bin_u64(filename, &data, num_pts, dims, 0).unwrap(); + assert_eq!(bytes_written, 32); + + let mut file = File::open(filename).unwrap(); + let mut buffer = vec![]; + + let npts_read = file.read_i32::().unwrap() as usize; + let dims_read = file.read_i32::().unwrap() as usize; + + file.read_to_end(&mut buffer).unwrap(); + let data_read: Vec = buffer + .chunks_exact(8) + .map(|b| u64::from_le_bytes([b[0], b[1], b[2], b[3], b[4], b[5], b[6], b[7]])) + .collect(); + + std::fs::remove_file(filename).unwrap(); + + assert_eq!(num_pts, npts_read); + assert_eq!(dims, dims_read); + assert_eq!(data, data_read); + } + + #[test] + fn load_bin_test() { + let file_name = "load_bin_test"; + let data = vec![0u64, 1u64, 2u64]; + let num_pts = data.len(); + let dims = 1; + let bytes_written = save_bin_u64(file_name, &data, num_pts, dims, 0).unwrap(); + assert_eq!(bytes_written, 32); + + let (load_data, load_num_pts, load_dims) = load_bin::(file_name, 0).unwrap(); + assert_eq!(load_num_pts, num_pts); + assert_eq!(load_dims, dims); + assert_eq!(load_data, data); + std::fs::remove_file(file_name).unwrap(); + } + + #[test] + fn load_bin_offset_test() { + let offset:usize = 32; + let file_name = "load_bin_offset_test"; + let data = vec![0u64, 1u64, 2u64]; + let num_pts = data.len(); + let dims = 1; + let bytes_written = save_bin_u64(file_name, &data, num_pts, dims, offset).unwrap(); + assert_eq!(bytes_written, 32); + + let (load_data, load_num_pts, load_dims) = load_bin::(file_name, offset).unwrap(); + assert_eq!(load_num_pts, num_pts); + assert_eq!(load_dims, dims); + assert_eq!(load_data, data); + std::fs::remove_file(file_name).unwrap(); + } +} + diff --git a/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/src/utils/hashset_u32.rs b/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/src/utils/hashset_u32.rs new file mode 100644 index 0000000..15db687 --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/src/utils/hashset_u32.rs @@ -0,0 +1,46 @@ +/* + * Copyright (c) Microsoft Corporation. All rights reserved. + * Licensed under the MIT license. + */ +/* + * Copyright (c) Microsoft Corporation. All rights reserved. + * Licensed under the MIT license. + */ +use hashbrown::HashSet; +use std::{hash::BuildHasherDefault, ops::{Deref, DerefMut}}; +use fxhash::FxHasher; + +lazy_static::lazy_static! { + /// Singleton hasher. + static ref HASHER: BuildHasherDefault = { + BuildHasherDefault::::default() + }; +} + +pub struct HashSetForU32 { + hashset: HashSet::>, +} + +impl HashSetForU32 { + pub fn with_capacity(capacity: usize) -> HashSetForU32 { + let hashset = HashSet::>::with_capacity_and_hasher(capacity, HASHER.clone()); + HashSetForU32 { + hashset + } + } +} + +impl Deref for HashSetForU32 { + type Target = HashSet::>; + + fn deref(&self) -> &Self::Target { + &self.hashset + } +} + +impl DerefMut for HashSetForU32 { + fn deref_mut(&mut self) -> &mut Self::Target { + &mut self.hashset + } +} + diff --git a/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/src/utils/kmeans.rs b/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/src/utils/kmeans.rs new file mode 100644 index 0000000..d1edffa --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/src/utils/kmeans.rs @@ -0,0 +1,430 @@ +/* + * Copyright (c) Microsoft Corporation. All rights reserved. + * Licensed under the MIT license. + */ +#![warn(missing_debug_implementations, missing_docs)] + +//! Aligned allocator + +use rand::{distributions::Uniform, prelude::Distribution, thread_rng}; +use rayon::prelude::*; +use std::cmp::min; + +use crate::common::ANNResult; +use crate::utils::math_util::{calc_distance, compute_closest_centers, compute_vecs_l2sq}; + +/// Run Lloyds one iteration +/// Given data in row-major num_points * dim, and centers in row-major +/// num_centers * dim and squared lengths of ata points, output the closest +/// center to each data point, update centers, and also return inverted index. +/// If closest_centers == NULL, will allocate memory and return. +/// Similarly, if closest_docs == NULL, will allocate memory and return. +#[allow(clippy::too_many_arguments)] +fn lloyds_iter( + data: &[f32], + num_points: usize, + dim: usize, + centers: &mut [f32], + num_centers: usize, + docs_l2sq: &[f32], + mut closest_docs: &mut Vec>, + closest_center: &mut [u32], +) -> ANNResult { + let compute_residual = true; + + closest_docs.iter_mut().for_each(|doc| doc.clear()); + + compute_closest_centers( + data, + num_points, + dim, + centers, + num_centers, + 1, + closest_center, + Some(&mut closest_docs), + Some(docs_l2sq), + )?; + + centers.fill(0.0); + + centers + .par_chunks_mut(dim) + .enumerate() + .for_each(|(c, center)| { + let mut cluster_sum = vec![0.0; dim]; + for &doc_index in &closest_docs[c] { + let current = &data[doc_index * dim..(doc_index + 1) * dim]; + for (j, current_val) in current.iter().enumerate() { + cluster_sum[j] += *current_val as f64; + } + } + if !closest_docs[c].is_empty() { + for (i, sum_val) in cluster_sum.iter().enumerate() { + center[i] = (*sum_val / closest_docs[c].len() as f64) as f32; + } + } + }); + + let mut residual = 0.0; + if compute_residual { + let buf_pad: usize = 32; + let chunk_size: usize = 2 * 8192; + let nchunks = + num_points / chunk_size + (if num_points % chunk_size == 0 { 0 } else { 1 } as usize); + + let mut residuals: Vec = vec![0.0; nchunks * buf_pad]; + + residuals + .par_iter_mut() + .enumerate() + .for_each(|(chunk, res)| { + for d in (chunk * chunk_size)..min(num_points, (chunk + 1) * chunk_size) { + *res += calc_distance( + &data[d * dim..(d + 1) * dim], + ¢ers[closest_center[d] as usize * dim..], + dim, + ); + } + }); + + for chunk in 0..nchunks { + residual += residuals[chunk * buf_pad]; + } + } + + Ok(residual) +} + +/// Run Lloyds until max_reps or stopping criterion +/// If you pass NULL for closest_docs and closest_center, it will NOT return +/// the results, else it will assume appropriate allocation as closest_docs = +/// new vec [num_centers], and closest_center = new size_t[num_points] +/// Final centers are output in centers as row-major num_centers * dim. +fn run_lloyds( + data: &[f32], + num_points: usize, + dim: usize, + centers: &mut [f32], + num_centers: usize, + max_reps: usize, +) -> ANNResult<(Vec>, Vec, f32)> { + let mut residual = f32::MAX; + + let mut closest_docs = vec![Vec::new(); num_centers]; + let mut closest_center = vec![0; num_points]; + + let mut docs_l2sq = vec![0.0; num_points]; + compute_vecs_l2sq(&mut docs_l2sq, data, num_points, dim); + + let mut old_residual; + + for i in 0..max_reps { + old_residual = residual; + + residual = lloyds_iter( + data, + num_points, + dim, + centers, + num_centers, + &docs_l2sq, + &mut closest_docs, + &mut closest_center, + )?; + + if (i != 0 && (old_residual - residual) / residual < 0.00001) || (residual < f32::EPSILON) { + println!( + "Residuals unchanged: {} becomes {}. Early termination.", + old_residual, residual + ); + break; + } + } + + Ok((closest_docs, closest_center, residual)) +} + +/// Assume memory allocated for pivot_data as new float[num_centers * dim] +/// and select randomly num_centers points as pivots +fn selecting_pivots( + data: &[f32], + num_points: usize, + dim: usize, + pivot_data: &mut [f32], + num_centers: usize, +) { + let mut picked = Vec::new(); + let mut rng = thread_rng(); + let distribution = Uniform::from(0..num_points); + + for j in 0..num_centers { + let mut tmp_pivot = distribution.sample(&mut rng); + while picked.contains(&tmp_pivot) { + tmp_pivot = distribution.sample(&mut rng); + } + picked.push(tmp_pivot); + let data_offset = tmp_pivot * dim; + let pivot_offset = j * dim; + pivot_data[pivot_offset..pivot_offset + dim] + .copy_from_slice(&data[data_offset..data_offset + dim]); + } +} + +/// Select pivots in k-means++ algorithm +/// Points that are farther away from the already chosen centroids +/// have a higher probability of being selected as the next centroid. +/// The k-means++ algorithm helps avoid poor initial centroid +/// placement that can result in suboptimal clustering. +fn k_meanspp_selecting_pivots( + data: &[f32], + num_points: usize, + dim: usize, + pivot_data: &mut [f32], + num_centers: usize, +) { + if num_points > (1 << 23) { + println!("ERROR: n_pts {} currently not supported for k-means++, maximum is 8388608. Falling back to random pivot selection.", num_points); + selecting_pivots(data, num_points, dim, pivot_data, num_centers); + return; + } + + let mut picked: Vec = Vec::new(); + let mut rng = thread_rng(); + let real_distribution = Uniform::from(0.0..1.0); + let int_distribution = Uniform::from(0..num_points); + + let init_id = int_distribution.sample(&mut rng); + let mut num_picked = 1; + + picked.push(init_id); + let init_data_offset = init_id * dim; + pivot_data[0..dim].copy_from_slice(&data[init_data_offset..init_data_offset + dim]); + + let mut dist = vec![0.0; num_points]; + + dist.par_iter_mut().enumerate().for_each(|(i, dist_i)| { + *dist_i = calc_distance( + &data[i * dim..(i + 1) * dim], + &data[init_id * dim..(init_id + 1) * dim], + dim, + ); + }); + + let mut dart_val: f64; + let mut tmp_pivot = 0; + let mut sum_flag = false; + + while num_picked < num_centers { + dart_val = real_distribution.sample(&mut rng); + + let mut sum: f64 = 0.0; + for item in dist.iter().take(num_points) { + sum += *item as f64; + } + if sum == 0.0 { + sum_flag = true; + } + + dart_val *= sum; + + let mut prefix_sum: f64 = 0.0; + for (i, pivot) in dist.iter().enumerate().take(num_points) { + tmp_pivot = i; + if dart_val >= prefix_sum && dart_val < (prefix_sum + *pivot as f64) { + break; + } + + prefix_sum += *pivot as f64; + } + + if picked.contains(&tmp_pivot) && !sum_flag { + continue; + } + + picked.push(tmp_pivot); + let pivot_offset = num_picked * dim; + let data_offset = tmp_pivot * dim; + pivot_data[pivot_offset..pivot_offset + dim] + .copy_from_slice(&data[data_offset..data_offset + dim]); + + dist.par_iter_mut().enumerate().for_each(|(i, dist_i)| { + *dist_i = (*dist_i).min(calc_distance( + &data[i * dim..(i + 1) * dim], + &data[tmp_pivot * dim..(tmp_pivot + 1) * dim], + dim, + )); + }); + + num_picked += 1; + } +} + +/// k-means algorithm interface +pub fn k_means_clustering( + data: &[f32], + num_points: usize, + dim: usize, + centers: &mut [f32], + num_centers: usize, + max_reps: usize, +) -> ANNResult<(Vec>, Vec, f32)> { + k_meanspp_selecting_pivots(data, num_points, dim, centers, num_centers); + let (closest_docs, closest_center, residual) = + run_lloyds(data, num_points, dim, centers, num_centers, max_reps)?; + Ok((closest_docs, closest_center, residual)) +} + +#[cfg(test)] +mod kmeans_test { + use super::*; + use approx::assert_relative_eq; + use rand::Rng; + + #[test] + fn lloyds_iter_test() { + let dim = 2; + let num_points = 10; + let num_centers = 3; + + let data: Vec = (1..=num_points * dim).map(|x| x as f32).collect(); + let mut centers = [1.0, 2.0, 7.0, 8.0, 19.0, 20.0]; + + let mut closest_docs: Vec> = vec![vec![]; num_centers]; + let mut closest_center: Vec = vec![0; num_points]; + let docs_l2sq: Vec = data + .chunks(dim) + .map(|chunk| chunk.iter().map(|val| val.powi(2)).sum()) + .collect(); + + let residual = lloyds_iter( + &data, + num_points, + dim, + &mut centers, + num_centers, + &docs_l2sq, + &mut closest_docs, + &mut closest_center, + ) + .unwrap(); + + let expected_centers: [f32; 6] = [2.0, 3.0, 9.0, 10.0, 17.0, 18.0]; + let expected_closest_docs: Vec> = + vec![vec![0, 1], vec![2, 3, 4, 5, 6], vec![7, 8, 9]]; + let expected_closest_center: [u32; 10] = [0, 0, 1, 1, 1, 1, 1, 2, 2, 2]; + let expected_residual: f32 = 100.0; + + // sort data for assert + centers.sort_by(|a, b| a.partial_cmp(b).unwrap()); + for inner_vec in &mut closest_docs { + inner_vec.sort(); + } + closest_center.sort_by(|a, b| a.partial_cmp(b).unwrap()); + + assert_eq!(centers, expected_centers); + assert_eq!(closest_docs, expected_closest_docs); + assert_eq!(closest_center, expected_closest_center); + assert_relative_eq!(residual, expected_residual, epsilon = 1.0e-6_f32); + } + + #[test] + fn run_lloyds_test() { + let dim = 2; + let num_points = 10; + let num_centers = 3; + let max_reps = 5; + + let data: Vec = (1..=num_points * dim).map(|x| x as f32).collect(); + let mut centers = [1.0, 2.0, 7.0, 8.0, 19.0, 20.0]; + + let (mut closest_docs, mut closest_center, residual) = + run_lloyds(&data, num_points, dim, &mut centers, num_centers, max_reps).unwrap(); + + let expected_centers: [f32; 6] = [3.0, 4.0, 10.0, 11.0, 17.0, 18.0]; + let expected_closest_docs: Vec> = + vec![vec![0, 1, 2], vec![3, 4, 5, 6], vec![7, 8, 9]]; + let expected_closest_center: [u32; 10] = [0, 0, 0, 1, 1, 1, 1, 2, 2, 2]; + let expected_residual: f32 = 72.0; + + // sort data for assert + centers.sort_by(|a, b| a.partial_cmp(b).unwrap()); + for inner_vec in &mut closest_docs { + inner_vec.sort(); + } + closest_center.sort_by(|a, b| a.partial_cmp(b).unwrap()); + + assert_eq!(centers, expected_centers); + assert_eq!(closest_docs, expected_closest_docs); + assert_eq!(closest_center, expected_closest_center); + assert_relative_eq!(residual, expected_residual, epsilon = 1.0e-6_f32); + } + + #[test] + fn selecting_pivots_test() { + let dim = 2; + let num_points = 10; + let num_centers = 3; + + // Generate some random data points + let mut rng = rand::thread_rng(); + let data: Vec = (0..num_points * dim).map(|_| rng.gen()).collect(); + + let mut pivot_data = vec![0.0; num_centers * dim]; + + selecting_pivots(&data, num_points, dim, &mut pivot_data, num_centers); + + // Verify that each pivot point corresponds to a point in the data + for i in 0..num_centers { + let pivot_offset = i * dim; + let pivot = &pivot_data[pivot_offset..(pivot_offset + dim)]; + + // Make sure the pivot is found in the data + let mut found = false; + for j in 0..num_points { + let data_offset = j * dim; + let point = &data[data_offset..(data_offset + dim)]; + + if pivot == point { + found = true; + break; + } + } + assert!(found, "Pivot not found in data"); + } + } + + #[test] + fn k_meanspp_selecting_pivots_test() { + let dim = 2; + let num_points = 10; + let num_centers = 3; + + // Generate some random data points + let mut rng = rand::thread_rng(); + let data: Vec = (0..num_points * dim).map(|_| rng.gen()).collect(); + + let mut pivot_data = vec![0.0; num_centers * dim]; + + k_meanspp_selecting_pivots(&data, num_points, dim, &mut pivot_data, num_centers); + + // Verify that each pivot point corresponds to a point in the data + for i in 0..num_centers { + let pivot_offset = i * dim; + let pivot = &pivot_data[pivot_offset..pivot_offset + dim]; + + // Make sure the pivot is found in the data + let mut found = false; + for j in 0..num_points { + let data_offset = j * dim; + let point = &data[data_offset..data_offset + dim]; + + if pivot == point { + found = true; + break; + } + } + assert!(found, "Pivot not found in data"); + } + } +} diff --git a/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/src/utils/math_util.rs b/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/src/utils/math_util.rs new file mode 100644 index 0000000..ef30c76 --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/src/utils/math_util.rs @@ -0,0 +1,481 @@ +/* + * Copyright (c) Microsoft Corporation. All rights reserved. + * Licensed under the MIT license. + */ +#![warn(missing_debug_implementations, missing_docs)] + +//! Aligned allocator + +extern crate cblas; +extern crate openblas_src; + +use cblas::{sgemm, snrm2, Layout, Transpose}; +use rayon::prelude::*; +use std::{ + cmp::{min, Ordering}, + collections::BinaryHeap, + sync::{Arc, Mutex}, +}; + +use crate::common::{ANNError, ANNResult}; + +struct PivotContainer { + piv_id: usize, + piv_dist: f32, +} + +impl PartialOrd for PivotContainer { + fn partial_cmp(&self, other: &Self) -> Option { + other.piv_dist.partial_cmp(&self.piv_dist) + } +} + +impl Ord for PivotContainer { + fn cmp(&self, other: &Self) -> std::cmp::Ordering { + // Treat NaN as less than all other values. + // piv_dist should never be NaN. + self.partial_cmp(other).unwrap_or(Ordering::Less) + } +} + +impl PartialEq for PivotContainer { + fn eq(&self, other: &Self) -> bool { + self.piv_dist == other.piv_dist + } +} + +impl Eq for PivotContainer {} + +/// Calculate the Euclidean distance between two vectors +pub fn calc_distance(vec_1: &[f32], vec_2: &[f32], dim: usize) -> f32 { + let mut dist = 0.0; + for j in 0..dim { + let diff = vec_1[j] - vec_2[j]; + dist += diff * diff; + } + dist +} + +/// Compute L2-squared norms of data stored in row-major num_points * dim, +/// need to be pre-allocated +pub fn compute_vecs_l2sq(vecs_l2sq: &mut [f32], data: &[f32], num_points: usize, dim: usize) { + assert_eq!(vecs_l2sq.len(), num_points); + + vecs_l2sq + .par_iter_mut() + .enumerate() + .for_each(|(n_iter, vec_l2sq)| { + let slice = &data[n_iter * dim..(n_iter + 1) * dim]; + let norm = unsafe { snrm2(dim as i32, slice, 1) }; + *vec_l2sq = norm * norm; + }); +} + +/// Calculate k closest centers to data of num_points * dim (row-major) +/// Centers is num_centers * dim (row-major) +/// data_l2sq has pre-computed squared norms of data +/// centers_l2sq has pre-computed squared norms of centers +/// Pre-allocated center_index will contain id of nearest center +/// Pre-allocated dist_matrix should be num_points * num_centers and contain squared distances +/// Default value of k is 1 +/// Ideally used only by compute_closest_centers +#[allow(clippy::too_many_arguments)] +pub fn compute_closest_centers_in_block( + data: &[f32], + num_points: usize, + dim: usize, + centers: &[f32], + num_centers: usize, + docs_l2sq: &[f32], + centers_l2sq: &[f32], + center_index: &mut [u32], + dist_matrix: &mut [f32], + k: usize, +) -> ANNResult<()> { + if k > num_centers { + return Err(ANNError::log_index_error(format!( + "ERROR: k ({}) > num_centers({})", + k, num_centers + ))); + } + + let ones_a: Vec = vec![1.0; num_centers]; + let ones_b: Vec = vec![1.0; num_points]; + + unsafe { + sgemm( + Layout::RowMajor, + Transpose::None, + Transpose::Ordinary, + num_points as i32, + num_centers as i32, + 1, + 1.0, + docs_l2sq, + 1, + &ones_a, + 1, + 0.0, + dist_matrix, + num_centers as i32, + ); + } + + unsafe { + sgemm( + Layout::RowMajor, + Transpose::None, + Transpose::Ordinary, + num_points as i32, + num_centers as i32, + 1, + 1.0, + &ones_b, + 1, + centers_l2sq, + 1, + 1.0, + dist_matrix, + num_centers as i32, + ); + } + + unsafe { + sgemm( + Layout::RowMajor, + Transpose::None, + Transpose::Ordinary, + num_points as i32, + num_centers as i32, + dim as i32, + -2.0, + data, + dim as i32, + centers, + dim as i32, + 1.0, + dist_matrix, + num_centers as i32, + ); + } + + if k == 1 { + center_index + .par_iter_mut() + .enumerate() + .for_each(|(i, center_idx)| { + let mut min = f32::MAX; + let current = &dist_matrix[i * num_centers..(i + 1) * num_centers]; + let mut min_idx = 0; + for (j, &distance) in current.iter().enumerate() { + if distance < min { + min = distance; + min_idx = j; + } + } + *center_idx = min_idx as u32; + }); + } else { + center_index + .par_chunks_mut(k) + .enumerate() + .for_each(|(i, center_chunk)| { + let current = &dist_matrix[i * num_centers..(i + 1) * num_centers]; + let mut top_k_queue = BinaryHeap::new(); + for (j, &distance) in current.iter().enumerate() { + let this_piv = PivotContainer { + piv_id: j, + piv_dist: distance, + }; + if top_k_queue.len() < k { + top_k_queue.push(this_piv); + } else { + // Safe unwrap, top_k_queue is not empty + #[allow(clippy::unwrap_used)] + let mut top = top_k_queue.peek_mut().unwrap(); + if this_piv.piv_dist < top.piv_dist { + *top = this_piv; + } + } + } + for (_j, center_idx) in center_chunk.iter_mut().enumerate() { + if let Some(this_piv) = top_k_queue.pop() { + *center_idx = this_piv.piv_id as u32; + } else { + break; + } + } + }); + } + + Ok(()) +} + +/// Given data in num_points * new_dim row major +/// Pivots stored in full_pivot_data as num_centers * new_dim row major +/// Calculate the k closest pivot for each point and store it in vector +/// closest_centers_ivf (row major, num_points*k) (which needs to be allocated +/// outside) Additionally, if inverted index is not null (and pre-allocated), +/// it will return inverted index for each center, assuming each of the inverted +/// indices is an empty vector. Additionally, if pts_norms_squared is not null, +/// then it will assume that point norms are pre-computed and use those values +#[allow(clippy::too_many_arguments)] +pub fn compute_closest_centers( + data: &[f32], + num_points: usize, + dim: usize, + pivot_data: &[f32], + num_centers: usize, + k: usize, + closest_centers_ivf: &mut [u32], + mut inverted_index: Option<&mut Vec>>, + pts_norms_squared: Option<&[f32]>, +) -> ANNResult<()> { + if k > num_centers { + return Err(ANNError::log_index_error(format!( + "ERROR: k ({}) > num_centers({})", + k, num_centers + ))); + } + + let _is_norm_given_for_pts = pts_norms_squared.is_some(); + + let mut pivs_norms_squared = vec![0.0; num_centers]; + + let mut pts_norms_squared = if let Some(pts_norms) = pts_norms_squared { + pts_norms.to_vec() + } else { + let mut norms_squared = vec![0.0; num_points]; + compute_vecs_l2sq(&mut norms_squared, data, num_points, dim); + norms_squared + }; + + compute_vecs_l2sq(&mut pivs_norms_squared, pivot_data, num_centers, dim); + + let par_block_size = num_points; + let n_blocks = if num_points % par_block_size == 0 { + num_points / par_block_size + } else { + num_points / par_block_size + 1 + }; + + let mut closest_centers = vec![0u32; par_block_size * k]; + let mut distance_matrix = vec![0.0; num_centers * par_block_size]; + + for cur_blk in 0..n_blocks { + let data_cur_blk = &data[cur_blk * par_block_size * dim..]; + let num_pts_blk = min(par_block_size, num_points - cur_blk * par_block_size); + let pts_norms_blk = &mut pts_norms_squared[cur_blk * par_block_size..]; + + compute_closest_centers_in_block( + data_cur_blk, + num_pts_blk, + dim, + pivot_data, + num_centers, + pts_norms_blk, + &pivs_norms_squared, + &mut closest_centers, + &mut distance_matrix, + k, + )?; + + closest_centers_ivf.clone_from_slice(&closest_centers); + + if let Some(inverted_index_inner) = inverted_index.as_mut() { + let inverted_index_arc = Arc::new(Mutex::new(inverted_index_inner)); + + (0..num_points) + .into_par_iter() + .try_for_each(|j| -> ANNResult<()> { + let this_center_id = closest_centers[j] as usize; + let mut guard = inverted_index_arc.lock().map_err(|err| { + ANNError::log_index_error(format!( + "PoisonError: Lock poisoned when acquiring inverted_index_arc, err={}", + err + )) + })?; + guard[this_center_id].push(j); + + Ok(()) + })?; + } + } + + Ok(()) +} + +/// If to_subtract is true, will subtract nearest center from each row. +/// Else will add. +/// Output will be in data_load itself. +/// Nearest centers need to be provided in closest_centers. +pub fn process_residuals( + data_load: &mut [f32], + num_points: usize, + dim: usize, + cur_pivot_data: &[f32], + num_centers: usize, + closest_centers: &[u32], + to_subtract: bool, +) { + println!( + "Processing residuals of {} points in {} dimensions using {} centers", + num_points, dim, num_centers + ); + + data_load + .par_chunks_mut(dim) + .enumerate() + .for_each(|(n_iter, chunk)| { + let cur_pivot_index = closest_centers[n_iter] as usize * dim; + for d_iter in 0..dim { + if to_subtract { + chunk[d_iter] -= cur_pivot_data[cur_pivot_index + d_iter]; + } else { + chunk[d_iter] += cur_pivot_data[cur_pivot_index + d_iter]; + } + } + }); +} + +#[cfg(test)] +mod math_util_test { + use super::*; + use approx::assert_abs_diff_eq; + + #[test] + fn calc_distance_test() { + let vec1 = vec![1.0, 2.0, 3.0]; + let vec2 = vec![4.0, 5.0, 6.0]; + let dim = vec1.len(); + + let dist = calc_distance(&vec1, &vec2, dim); + + let expected = 27.0; + + assert_eq!(dist, expected); + } + + #[test] + fn compute_vecs_l2sq_test() { + let data = vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0]; + let num_points = 2; + let dim = 3; + let mut vecs_l2sq = vec![0.0; num_points]; + + compute_vecs_l2sq(&mut vecs_l2sq, &data, num_points, dim); + + let expected = vec![14.0, 77.0]; + + assert_eq!(vecs_l2sq.len(), num_points); + assert_abs_diff_eq!(vecs_l2sq[0], expected[0], epsilon = 1e-6); + assert_abs_diff_eq!(vecs_l2sq[1], expected[1], epsilon = 1e-6); + } + + #[test] + fn compute_closest_centers_in_block_test() { + let num_points = 10; + let dim = 5; + let num_centers = 3; + let data = vec![ + 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, + 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, + 31.0, 32.0, 33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0, 40.0, 41.0, 42.0, 43.0, 44.0, + 45.0, 46.0, 47.0, 48.0, 49.0, 50.0, + ]; + let centers = vec![ + 1.0, 2.0, 3.0, 4.0, 5.0, 21.0, 22.0, 23.0, 24.0, 25.0, 31.0, 32.0, 33.0, 34.0, 35.0, + ]; + let mut docs_l2sq = vec![0.0; num_points]; + compute_vecs_l2sq(&mut docs_l2sq, &data, num_points, dim); + let mut centers_l2sq = vec![0.0; num_centers]; + compute_vecs_l2sq(&mut centers_l2sq, ¢ers, num_centers, dim); + let mut center_index = vec![0; num_points]; + let mut dist_matrix = vec![0.0; num_points * num_centers]; + let k = 1; + + compute_closest_centers_in_block( + &data, + num_points, + dim, + ¢ers, + num_centers, + &docs_l2sq, + ¢ers_l2sq, + &mut center_index, + &mut dist_matrix, + k, + ) + .unwrap(); + + assert_eq!(center_index.len(), num_points); + let expected_center_index = vec![0, 0, 0, 1, 1, 1, 2, 2, 2, 2]; + assert_abs_diff_eq!(*center_index, expected_center_index); + + assert_eq!(dist_matrix.len(), num_points * num_centers); + let expected_dist_matrix = vec![ + 0.0, 2000.0, 4500.0, 125.0, 1125.0, 3125.0, 500.0, 500.0, 2000.0, 1125.0, 125.0, + 1125.0, 2000.0, 0.0, 500.0, 3125.0, 125.0, 125.0, 4500.0, 500.0, 0.0, 6125.0, 1125.0, + 125.0, 8000.0, 2000.0, 500.0, 10125.0, 3125.0, 1125.0, + ]; + assert_abs_diff_eq!(*dist_matrix, expected_dist_matrix, epsilon = 1e-2); + } + + #[test] + fn test_compute_closest_centers() { + let num_points = 4; + let dim = 3; + let num_centers = 2; + let mut data = vec![ + 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, + ]; + let pivot_data = vec![1.0, 2.0, 3.0, 10.0, 11.0, 12.0]; + let k = 1; + + let mut closest_centers_ivf = vec![0u32; num_points * k]; + let mut inverted_index: Vec> = vec![vec![], vec![]]; + + compute_closest_centers( + &data, + num_points, + dim, + &pivot_data, + num_centers, + k, + &mut closest_centers_ivf, + Some(&mut inverted_index), + None, + ) + .unwrap(); + + assert_eq!(closest_centers_ivf, vec![0, 0, 1, 1]); + + for vec in inverted_index.iter_mut() { + vec.sort_unstable(); + } + assert_eq!(inverted_index, vec![vec![0, 1], vec![2, 3]]); + } + + #[test] + fn process_residuals_test() { + let mut data_load = vec![1.0, 2.0, 3.0, 4.0]; + let num_points = 2; + let dim = 2; + let cur_pivot_data = vec![0.5, 1.5, 2.5, 3.5]; + let num_centers = 2; + let closest_centers = vec![0, 1]; + let to_subtract = true; + + process_residuals( + &mut data_load, + num_points, + dim, + &cur_pivot_data, + num_centers, + &closest_centers, + to_subtract, + ); + + assert_eq!(data_load, vec![0.5, 0.5, 0.5, 0.5]); + } +} diff --git a/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/src/utils/mod.rs b/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/src/utils/mod.rs new file mode 100644 index 0000000..df174f8 --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/src/utils/mod.rs @@ -0,0 +1,34 @@ +/* + * Copyright (c) Microsoft Corporation. All rights reserved. + * Licensed under the MIT license. + */ +pub mod file_util; +pub use file_util::*; + +#[allow(clippy::module_inception)] +pub mod utils; +pub use utils::*; + +pub mod bit_vec_extension; +pub use bit_vec_extension::*; + +pub mod rayon_util; +pub use rayon_util::*; + +pub mod timer; +pub use timer::*; + +pub mod cached_reader; +pub use cached_reader::*; + +pub mod cached_writer; +pub use cached_writer::*; + +pub mod partition; +pub use partition::*; + +pub mod math_util; +pub use math_util::*; + +pub mod kmeans; +pub use kmeans::*; diff --git a/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/src/utils/partition.rs b/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/src/utils/partition.rs new file mode 100644 index 0000000..dbe6862 --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/src/utils/partition.rs @@ -0,0 +1,151 @@ +/* + * Copyright (c) Microsoft Corporation. All rights reserved. + * Licensed under the MIT license. + */ +use std::mem; +use std::{fs::File, path::Path}; +use std::io::{Write, Seek, SeekFrom}; +use rand::distributions::{Distribution, Uniform}; + +use crate::common::ANNResult; + +use super::CachedReader; + +/// streams data from the file, and samples each vector with probability p_val +/// and returns a matrix of size slice_size* ndims as floating point type. +/// the slice_size and ndims are set inside the function. +/// # Arguments +/// * `file_name` - filename where the data is +/// * `p_val` - possibility to sample data +/// * `sampled_vectors` - sampled vector chose by p_val possibility +/// * `slice_size` - how many sampled data return +/// * `dim` - each sample data dimension +pub fn gen_random_slice>(data_file: &str, mut p_val: f64) -> ANNResult<(Vec, usize, usize)> { + let read_blk_size = 64 * 1024 * 1024; + let mut reader = CachedReader::new(data_file, read_blk_size)?; + + let npts = reader.read_u32()? as usize; + let dim = reader.read_u32()? as usize; + let mut sampled_vectors: Vec = Vec::new(); + let mut slice_size = 0; + p_val = if p_val < 1f64 { p_val } else { 1f64 }; + + let mut generator = rand::thread_rng(); + let distribution = Uniform::from(0.0..1.0); + + for _ in 0..npts { + let mut cur_vector_bytes = vec![0u8; dim * mem::size_of::()]; + reader.read(&mut cur_vector_bytes)?; + let random_value = distribution.sample(&mut generator); + if random_value < p_val { + let ptr = cur_vector_bytes.as_ptr() as *const T; + let cur_vector_t = unsafe { std::slice::from_raw_parts(ptr, dim) }; + sampled_vectors.extend(cur_vector_t.iter().map(|&t| t.into())); + slice_size += 1; + } + } + + Ok((sampled_vectors, slice_size, dim)) +} + +/// Generate random sample data and write into output_file +pub fn gen_sample_data(data_file: &str, output_file: &str, sampling_rate: f64) -> ANNResult<()> { + let read_blk_size = 64 * 1024 * 1024; + let mut reader = CachedReader::new(data_file, read_blk_size)?; + + let sample_data_path = format!("{}_data.bin", output_file); + let sample_ids_path = format!("{}_ids.bin", output_file); + let mut sample_data_writer = File::create(Path::new(&sample_data_path))?; + let mut sample_id_writer = File::create(Path::new(&sample_ids_path))?; + + let mut num_sampled_pts = 0u32; + let one_const = 1u32; + let mut generator = rand::thread_rng(); + let distribution = Uniform::from(0.0..1.0); + + let npts_u32 = reader.read_u32()?; + let dim_u32 = reader.read_u32()?; + let dim = dim_u32 as usize; + sample_data_writer.write_all(&num_sampled_pts.to_le_bytes())?; + sample_data_writer.write_all(&dim_u32.to_le_bytes())?; + sample_id_writer.write_all(&num_sampled_pts.to_le_bytes())?; + sample_id_writer.write_all(&one_const.to_le_bytes())?; + + for id in 0..npts_u32 { + let mut cur_row_bytes = vec![0u8; dim * mem::size_of::()]; + reader.read(&mut cur_row_bytes)?; + let random_value = distribution.sample(&mut generator); + if random_value < sampling_rate { + sample_data_writer.write_all(&cur_row_bytes)?; + sample_id_writer.write_all(&id.to_le_bytes())?; + num_sampled_pts += 1; + } + } + + sample_data_writer.seek(SeekFrom::Start(0))?; + sample_data_writer.write_all(&num_sampled_pts.to_le_bytes())?; + sample_id_writer.seek(SeekFrom::Start(0))?; + sample_id_writer.write_all(&num_sampled_pts.to_le_bytes())?; + println!("Wrote {} points to sample file: {}", num_sampled_pts, sample_data_path); + + Ok(()) +} + +#[cfg(test)] +mod partition_test { + use std::{fs, io::Read}; + use byteorder::{ReadBytesExt, LittleEndian}; + + use crate::utils::file_exists; + + use super::*; + + #[test] + fn gen_sample_data_test() { + let file_name = "gen_sample_data_test.bin"; + //npoints=2, dim=8 + let data: [u8; 72] = [2, 0, 0, 0, 8, 0, 0, 0, + 0x00, 0x00, 0x80, 0x3f, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x40, 0x40, 0x00, 0x00, 0x80, 0x40, + 0x00, 0x00, 0xa0, 0x40, 0x00, 0x00, 0xc0, 0x40, 0x00, 0x00, 0xe0, 0x40, 0x00, 0x00, 0x00, 0x41, + 0x00, 0x00, 0x10, 0x41, 0x00, 0x00, 0x20, 0x41, 0x00, 0x00, 0x30, 0x41, 0x00, 0x00, 0x40, 0x41, + 0x00, 0x00, 0x50, 0x41, 0x00, 0x00, 0x60, 0x41, 0x00, 0x00, 0x70, 0x41, 0x00, 0x00, 0x80, 0x41]; + std::fs::write(file_name, data).expect("Failed to write sample file"); + + let sample_file_prefix = file_name.to_string() + "_sample"; + gen_sample_data::(file_name, sample_file_prefix.as_str(), 1f64).unwrap(); + + let sample_data_path = format!("{}_data.bin", sample_file_prefix); + let sample_ids_path = format!("{}_ids.bin", sample_file_prefix); + assert!(file_exists(sample_data_path.as_str())); + assert!(file_exists(sample_ids_path.as_str())); + + let mut data_file_reader = File::open(sample_data_path.as_str()).unwrap(); + let mut ids_file_reader = File::open(sample_ids_path.as_str()).unwrap(); + + let mut num_sampled_pts = data_file_reader.read_u32::().unwrap(); + assert_eq!(num_sampled_pts, 2); + num_sampled_pts = ids_file_reader.read_u32::().unwrap(); + assert_eq!(num_sampled_pts, 2); + + let dim = data_file_reader.read_u32::().unwrap() as usize; + assert_eq!(dim, 8); + assert_eq!(ids_file_reader.read_u32::().unwrap(), 1); + + let mut start = 8; + for i in 0..num_sampled_pts { + let mut data_bytes = vec![0u8; dim * 4]; + data_file_reader.read_exact(&mut data_bytes).unwrap(); + assert_eq!(data_bytes, data[start..start + dim * 4]); + + let id = ids_file_reader.read_u32::().unwrap(); + assert_eq!(id, i); + + start += dim * 4; + } + + fs::remove_file(file_name).expect("Failed to delete file"); + fs::remove_file(sample_data_path.as_str()).expect("Failed to delete file"); + fs::remove_file(sample_ids_path.as_str()).expect("Failed to delete file"); + } +} + diff --git a/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/src/utils/rayon_util.rs b/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/src/utils/rayon_util.rs new file mode 100644 index 0000000..f8174ee --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/src/utils/rayon_util.rs @@ -0,0 +1,33 @@ +/* + * Copyright (c) Microsoft Corporation. All rights reserved. + * Licensed under the MIT license. + */ +use std::ops::Range; +use rayon::prelude::{IntoParallelIterator, ParallelIterator}; + +use crate::common::ANNResult; + +/// based on thread_num, execute the task in parallel using Rayon or serial +#[inline] +pub fn execute_with_rayon(range: Range, num_threads: u32, f: F) -> ANNResult<()> +where F: Fn(usize) -> ANNResult<()> + Sync + Send + Copy +{ + if num_threads == 1 { + for i in range { + f(i)?; + } + Ok(()) + } else { + range.into_par_iter().try_for_each(f) + } +} + +/// set the thread count of Rayon, otherwise it will use threads as many as logical cores. +#[inline] +pub fn set_rayon_num_threads(num_threads: u32) { + std::env::set_var( + "RAYON_NUM_THREADS", + num_threads.to_string(), + ); +} + diff --git a/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/src/utils/timer.rs b/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/src/utils/timer.rs new file mode 100644 index 0000000..2f4b38b --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/src/utils/timer.rs @@ -0,0 +1,101 @@ +/* + * Copyright (c) Microsoft Corporation. All rights reserved. + * Licensed under the MIT license. + */ +use platform::*; +use std::time::{Duration, Instant}; + +#[derive(Clone)] +pub struct Timer { + check_point: Instant, + pid: Option, + cycles: Option, +} + +impl Default for Timer { + fn default() -> Self { + Self::new() + } +} + +impl Timer { + pub fn new() -> Timer { + let pid = get_process_handle(); + let cycles = get_process_cycle_time(pid); + Timer { + check_point: Instant::now(), + pid, + cycles, + } + } + + pub fn reset(&mut self) { + self.check_point = Instant::now(); + self.cycles = get_process_cycle_time(self.pid); + } + + pub fn elapsed(&self) -> Duration { + Instant::now().duration_since(self.check_point) + } + + pub fn elapsed_seconds(&self) -> f64 { + self.elapsed().as_secs_f64() + } + + pub fn elapsed_gcycles(&self) -> f32 { + let cur_cycles = get_process_cycle_time(self.pid); + if let (Some(cur_cycles), Some(cycles)) = (cur_cycles, self.cycles) { + let spent_cycles = + ((cur_cycles - cycles) as f64 * 1.0f64) / (1024 * 1024 * 1024) as f64; + return spent_cycles as f32; + } + + 0.0 + } + + pub fn elapsed_seconds_for_step(&self, step: &str) -> String { + format!( + "Time for {}: {:.3} seconds, {:.3}B cycles", + step, + self.elapsed_seconds(), + self.elapsed_gcycles() + ) + } +} + +#[cfg(test)] +mod timer_tests { + use super::*; + use std::{thread, time}; + + #[test] + fn test_new() { + let timer = Timer::new(); + assert!(timer.check_point.elapsed().as_secs() < 1); + if cfg!(windows) { + assert!(timer.pid.is_some()); + assert!(timer.cycles.is_some()); + } + else { + assert!(timer.pid.is_none()); + assert!(timer.cycles.is_none()); + } + } + + #[test] + fn test_reset() { + let mut timer = Timer::new(); + thread::sleep(time::Duration::from_millis(100)); + timer.reset(); + assert!(timer.check_point.elapsed().as_millis() < 10); + } + + #[test] + fn test_elapsed() { + let timer = Timer::new(); + thread::sleep(time::Duration::from_millis(100)); + assert!(timer.elapsed().as_millis() > 100); + assert!(timer.elapsed_seconds() > 0.1); + } +} + diff --git a/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/src/utils/utils.rs b/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/src/utils/utils.rs new file mode 100644 index 0000000..2e80676 --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/src/utils/utils.rs @@ -0,0 +1,154 @@ +/* + * Copyright (c) Microsoft Corporation. All rights reserved. + * Licensed under the MIT license. + */ +use std::sync::Mutex; +use num_traits::Num; + +/// Non recursive mutex +pub type NonRecursiveMutex = Mutex<()>; + +/// Round up X to the nearest multiple of Y +#[inline] +pub fn round_up(x: T, y: T) -> T +where T : Num + Copy +{ + div_round_up(x, y) * y +} + +/// Rounded-up division +#[inline] +pub fn div_round_up(x: T, y: T) -> T +where T : Num + Copy +{ + (x / y) + if x % y != T::zero() {T::one()} else {T::zero()} +} + +/// Round down X to the nearest multiple of Y +#[inline] +pub fn round_down(x: T, y: T) -> T +where T : Num + Copy +{ + (x / y) * y +} + +/// Is aligned +#[inline] +pub fn is_aligned(x: T, y: T) -> bool +where T : Num + Copy +{ + x % y == T::zero() +} + +#[inline] +pub fn is_512_aligned(x: u64) -> bool { + is_aligned(x, 512) +} + +#[inline] +pub fn is_4096_aligned(x: u64) -> bool { + is_aligned(x, 4096) +} + +/// all metadata of individual sub-component files is written in first 4KB for unified files +pub const METADATA_SIZE: usize = 4096; + +pub const BUFFER_SIZE_FOR_CACHED_IO: usize = 1024 * 1048576; + +pub const PBSTR: &str = "||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||"; + +pub const PBWIDTH: usize = 60; + +macro_rules! convert_types { + ($name:ident, $intput_type:ty, $output_type:ty) => { + /// Write data into file + pub fn $name(srcmat: &[$intput_type], npts: usize, dim: usize) -> Vec<$output_type> { + let mut destmat: Vec<$output_type> = Vec::new(); + for i in 0..npts { + for j in 0..dim { + destmat.push(srcmat[i * dim + j] as $output_type); + } + } + destmat + } + }; +} +convert_types!(convert_types_usize_u8, usize, u8); +convert_types!(convert_types_usize_u32, usize, u32); +convert_types!(convert_types_usize_u64, usize, u64); +convert_types!(convert_types_u64_usize, u64, usize); +convert_types!(convert_types_u32_usize, u32, usize); + +#[cfg(test)] +mod file_util_test { + use super::*; + use std::any::type_name; + + #[test] + fn round_up_test() { + assert_eq!(round_up(252, 8), 256); + assert_eq!(round_up(256, 8), 256); + } + + #[test] + fn div_round_up_test() { + assert_eq!(div_round_up(252, 8), 32); + assert_eq!(div_round_up(256, 8), 32); + } + + #[test] + fn round_down_test() { + assert_eq!(round_down(252, 8), 248); + assert_eq!(round_down(256, 8), 256); + } + + #[test] + fn is_aligned_test() { + assert!(!is_aligned(252, 8)); + assert!(is_aligned(256, 8)); + } + + #[test] + fn is_512_aligned_test() { + assert!(!is_512_aligned(520)); + assert!(is_512_aligned(512)); + } + + #[test] + fn is_4096_aligned_test() { + assert!(!is_4096_aligned(4090)); + assert!(is_4096_aligned(4096)); + } + + #[test] + fn convert_types_test() { + let data = vec![0u64, 1u64, 2u64]; + let output = convert_types_u64_usize(&data, 3, 1); + assert_eq!(output.len(), 3); + assert_eq!(type_of(output[0]), "usize"); + assert_eq!(output[0], 0usize); + + let data = vec![0usize, 1usize, 2usize]; + let output = convert_types_usize_u8(&data, 3, 1); + assert_eq!(output.len(), 3); + assert_eq!(type_of(output[0]), "u8"); + assert_eq!(output[0], 0u8); + + let data = vec![0usize, 1usize, 2usize]; + let output = convert_types_usize_u64(&data, 3, 1); + assert_eq!(output.len(), 3); + assert_eq!(type_of(output[0]), "u64"); + assert_eq!(output[0], 0u64); + + let data = vec![0u32, 1u32, 2u32]; + let output = convert_types_u32_usize(&data, 3, 1); + assert_eq!(output.len(), 3); + assert_eq!(type_of(output[0]), "usize"); + assert_eq!(output[0],0usize); + } + + fn type_of(_: T) -> &'static str { + type_name::() + } +} + diff --git a/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/tests/data/siftsmall_learn_256pts.fbin b/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/tests/data/siftsmall_learn_256pts.fbin new file mode 100644 index 0000000..357a9db Binary files /dev/null and b/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/tests/data/siftsmall_learn_256pts.fbin differ diff --git a/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/tests/data/siftsmall_learn_256pts_2.fbin b/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/tests/data/siftsmall_learn_256pts_2.fbin new file mode 100644 index 0000000..9528e4b Binary files /dev/null and b/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/tests/data/siftsmall_learn_256pts_2.fbin differ diff --git a/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/tests/data/truth_index_siftsmall_learn_256pts_1+2_R4_L50_A1.2 b/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/tests/data/truth_index_siftsmall_learn_256pts_1+2_R4_L50_A1.2 new file mode 100644 index 0000000..9c803c3 Binary files /dev/null and b/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/tests/data/truth_index_siftsmall_learn_256pts_1+2_R4_L50_A1.2 differ diff --git a/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/tests/data/truth_index_siftsmall_learn_256pts_1+2_saturated_R4_L50_A1.2 b/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/tests/data/truth_index_siftsmall_learn_256pts_1+2_saturated_R4_L50_A1.2 new file mode 100644 index 0000000..a9dac10 Binary files /dev/null and b/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/tests/data/truth_index_siftsmall_learn_256pts_1+2_saturated_R4_L50_A1.2 differ diff --git a/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/tests/data/truth_index_siftsmall_learn_256pts_R4_L50_A1.2 b/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/tests/data/truth_index_siftsmall_learn_256pts_R4_L50_A1.2 new file mode 100644 index 0000000..8170090 Binary files /dev/null and b/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/tests/data/truth_index_siftsmall_learn_256pts_R4_L50_A1.2 differ diff --git a/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/tests/data/truth_index_siftsmall_learn_256pts_R4_L50_A1.2.data b/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/tests/data/truth_index_siftsmall_learn_256pts_R4_L50_A1.2.data new file mode 100644 index 0000000..357a9db Binary files /dev/null and b/packages/leann-backend-diskann/third_party/DiskANN/rust/diskann/tests/data/truth_index_siftsmall_learn_256pts_R4_L50_A1.2.data differ diff --git a/packages/leann-backend-diskann/third_party/DiskANN/rust/logger/Cargo.toml b/packages/leann-backend-diskann/third_party/DiskANN/rust/logger/Cargo.toml new file mode 100644 index 0000000..e750d95 --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/rust/logger/Cargo.toml @@ -0,0 +1,29 @@ +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT license. +[package] +name = "logger" +version = "0.1.0" +edition = "2021" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +lazy_static = "1.4.0" +log="0.4.17" +once_cell = "1.17.1" +prost = "0.11.9" +prost-types = "0.11.9" +thiserror = "1.0.40" +win_etw_macros="0.1.8" +win_etw_provider="0.1.8" + +[build-dependencies] +prost-build = "0.11.9" + +[[example]] +name="trace_example" +path= "src/examples/trace_example.rs" + +[target."cfg(target_os=\"windows\")".build-dependencies.vcpkg] +version = "0.2" + diff --git a/packages/leann-backend-diskann/third_party/DiskANN/rust/logger/build.rs b/packages/leann-backend-diskann/third_party/DiskANN/rust/logger/build.rs new file mode 100644 index 0000000..76058f7 --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/rust/logger/build.rs @@ -0,0 +1,33 @@ +/* + * Copyright (c) Microsoft Corporation. All rights reserved. + * Licensed under the MIT license. + */ +use std::env; + +extern crate prost_build; + +fn main() { + let protopkg = vcpkg::find_package("protobuf").unwrap(); + let protobuf_path = protopkg.link_paths[0].parent().unwrap(); + + let protobuf_bin_path = protobuf_path + .join("tools") + .join("protobuf") + .join("protoc.exe") + .to_str() + .unwrap() + .to_string(); + env::set_var("PROTOC", protobuf_bin_path); + + let protobuf_inc_path = protobuf_path + .join("include") + .join("google") + .join("protobuf") + .to_str() + .unwrap() + .to_string(); + env::set_var("PROTOC_INCLUDE", protobuf_inc_path); + + prost_build::compile_protos(&["src/indexlog.proto"], &["src/"]).unwrap(); +} + diff --git a/packages/leann-backend-diskann/third_party/DiskANN/rust/logger/src/error_logger.rs b/packages/leann-backend-diskann/third_party/DiskANN/rust/logger/src/error_logger.rs new file mode 100644 index 0000000..50069b4 --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/rust/logger/src/error_logger.rs @@ -0,0 +1,29 @@ +/* + * Copyright (c) Microsoft Corporation. All rights reserved. + * Licensed under the MIT license. + */ +use crate::log_error::LogError; +use crate::logger::indexlog::{ErrorLog, Log, LogLevel}; +use crate::message_handler::send_log; + +pub fn log_error(error_message: String) -> Result<(), LogError> { + let mut log = Log::default(); + let error_log = ErrorLog { + log_level: LogLevel::Error as i32, + error_message, + }; + log.error_log = Some(error_log); + + send_log(log) +} + +#[cfg(test)] +mod error_logger_test { + use super::*; + + #[test] + fn log_error_works() { + log_error(String::from("Error")).unwrap(); + } +} + diff --git a/packages/leann-backend-diskann/third_party/DiskANN/rust/logger/src/examples/trace_example.rs b/packages/leann-backend-diskann/third_party/DiskANN/rust/logger/src/examples/trace_example.rs new file mode 100644 index 0000000..7933a56 --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/rust/logger/src/examples/trace_example.rs @@ -0,0 +1,30 @@ +/* + * Copyright (c) Microsoft Corporation. All rights reserved. + * Licensed under the MIT license. + */ +use log::{debug, info, log_enabled, warn, Level}; +use logger::trace_logger::TraceLogger; + +// cargo run --example trace_example + +fn main() { + static LOGGER: TraceLogger = TraceLogger {}; + log::set_logger(&LOGGER) + .map(|()| log::set_max_level(log::LevelFilter::Trace)) + .unwrap(); + + info!("Rust logging n = {}", 42); + warn!("This is too much fun!"); + debug!("Maybe we can make this code work"); + + let error_is_enabled = log_enabled!(Level::Error); + let warn_is_enabled = log_enabled!(Level::Warn); + let info_is_enabled = log_enabled!(Level::Info); + let debug_is_enabled = log_enabled!(Level::Debug); + let trace_is_enabled = log_enabled!(Level::Trace); + println!( + "is_enabled? error: {:5?}, warn: {:5?}, info: {:5?}, debug: {:5?}, trace: {:5?}", + error_is_enabled, warn_is_enabled, info_is_enabled, debug_is_enabled, trace_is_enabled, + ); +} + diff --git a/packages/leann-backend-diskann/third_party/DiskANN/rust/logger/src/indexlog.proto b/packages/leann-backend-diskann/third_party/DiskANN/rust/logger/src/indexlog.proto new file mode 100644 index 0000000..68310ae --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/rust/logger/src/indexlog.proto @@ -0,0 +1,50 @@ +syntax = "proto3"; + +package diskann_logger; + +message Log { + IndexConstructionLog IndexConstructionLog = 1; + DiskIndexConstructionLog DiskIndexConstructionLog = 2; + ErrorLog ErrorLog = 3; + TraceLog TraceLog = 100; +} + +enum LogLevel { + UNSPECIFIED = 0; + Error = 1; + Warn = 2; + Info = 3; + Debug = 4; + Trace = 5; +} + +message IndexConstructionLog { + float PercentageComplete = 1; + float TimeSpentInSeconds = 2; + float GCyclesSpent = 3; + LogLevel LogLevel = 4; +} + +message DiskIndexConstructionLog { + DiskIndexConstructionCheckpoint checkpoint = 1; + float TimeSpentInSeconds = 2; + float GCyclesSpent = 3; + LogLevel LogLevel = 4; +} + +enum DiskIndexConstructionCheckpoint { + None = 0; + PqConstruction = 1; + InmemIndexBuild = 2; + DiskLayout = 3; +} + +message TraceLog { + string LogLine = 1; + LogLevel LogLevel = 2; +} + +message ErrorLog { + string ErrorMessage = 1; + LogLevel LogLevel = 2; +} \ No newline at end of file diff --git a/packages/leann-backend-diskann/third_party/DiskANN/rust/logger/src/lib.rs b/packages/leann-backend-diskann/third_party/DiskANN/rust/logger/src/lib.rs new file mode 100644 index 0000000..6cfe2d5 --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/rust/logger/src/lib.rs @@ -0,0 +1,19 @@ +/* + * Copyright (c) Microsoft Corporation. All rights reserved. + * Licensed under the MIT license. + */ +#![cfg_attr( + not(test), + warn(clippy::panic, clippy::unwrap_used, clippy::expect_used) +)] + +pub mod logger { + pub mod indexlog { + include!(concat!(env!("OUT_DIR"), "/diskann_logger.rs")); + } +} + +pub mod error_logger; +pub mod log_error; +pub mod message_handler; +pub mod trace_logger; diff --git a/packages/leann-backend-diskann/third_party/DiskANN/rust/logger/src/log_error.rs b/packages/leann-backend-diskann/third_party/DiskANN/rust/logger/src/log_error.rs new file mode 100644 index 0000000..149d094 --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/rust/logger/src/log_error.rs @@ -0,0 +1,27 @@ +/* + * Copyright (c) Microsoft Corporation. All rights reserved. + * Licensed under the MIT license. + */ +use std::sync::mpsc::SendError; + +use crate::logger::indexlog::Log; + +#[derive(thiserror::Error, Debug, Clone)] +pub enum LogError { + /// Sender failed to send message to the channel + #[error("IOError: {err}")] + SendError { + #[from] + err: SendError, + }, + + /// PoisonError which can be returned whenever a lock is acquired + /// Both Mutexes and RwLocks are poisoned whenever a thread fails while the lock is held + #[error("LockPoisonError: {err}")] + LockPoisonError { err: String }, + + /// Failed to create EtwPublisher + #[error("EtwProviderError: {err:?}")] + ETWProviderError { err: win_etw_provider::Error }, +} + diff --git a/packages/leann-backend-diskann/third_party/DiskANN/rust/logger/src/message_handler.rs b/packages/leann-backend-diskann/third_party/DiskANN/rust/logger/src/message_handler.rs new file mode 100644 index 0000000..37f352a --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/rust/logger/src/message_handler.rs @@ -0,0 +1,167 @@ +/* + * Copyright (c) Microsoft Corporation. All rights reserved. + * Licensed under the MIT license. + */ +use crate::log_error::LogError; +use crate::logger::indexlog::DiskIndexConstructionCheckpoint; +use crate::logger::indexlog::Log; +use crate::logger::indexlog::LogLevel; + +use std::sync::mpsc::{self, Sender}; +use std::sync::Mutex; +use std::thread; + +use win_etw_macros::trace_logging_provider; + +trait MessagePublisher { + fn publish(&self, log_level: LogLevel, message: &str); +} + +// ETW provider - the GUID specified here is that of the default provider for Geneva Metric Extensions +// We are just using it as a placeholder until we have a version of OpenTelemetry exporter for Rust +#[trace_logging_provider(guid = "edc24920-e004-40f6-a8e1-0e6e48f39d84")] +trait EtwTraceProvider { + fn write(msg: &str); +} + +struct EtwPublisher { + provider: EtwTraceProvider, + publish_to_stdout: bool, +} + +impl EtwPublisher { + pub fn new() -> Result { + let provider = EtwTraceProvider::new(); + Ok(EtwPublisher { + provider, + publish_to_stdout: true, + }) + } +} + +fn log_level_to_etw(level: LogLevel) -> win_etw_provider::Level { + match level { + LogLevel::Error => win_etw_provider::Level::ERROR, + LogLevel::Warn => win_etw_provider::Level::WARN, + LogLevel::Info => win_etw_provider::Level::INFO, + LogLevel::Debug => win_etw_provider::Level::VERBOSE, + LogLevel::Trace => win_etw_provider::Level(6), + LogLevel::Unspecified => win_etw_provider::Level(6), + } +} + +fn i32_to_log_level(value: i32) -> LogLevel { + match value { + 0 => LogLevel::Unspecified, + 1 => LogLevel::Error, + 2 => LogLevel::Warn, + 3 => LogLevel::Info, + 4 => LogLevel::Debug, + 5 => LogLevel::Trace, + _ => LogLevel::Unspecified, + } +} + +impl MessagePublisher for EtwPublisher { + fn publish(&self, log_level: LogLevel, message: &str) { + let options = win_etw_provider::EventOptions { + level: Some(log_level_to_etw(log_level)), + ..Default::default() + }; + self.provider.write(Some(&options), message); + + if self.publish_to_stdout { + println!("{}", message); + } + } +} + +struct MessageProcessor { + sender: Mutex>, +} + +impl MessageProcessor { + pub fn start_processing() -> Self { + let (sender, receiver) = mpsc::channel::(); + thread::spawn(move || -> Result<(), LogError> { + for message in receiver { + // Process the received message + if let Some(indexlog) = message.index_construction_log { + let str = format!( + "Time for {}% of index build completed: {:.3} seconds, {:.3}B cycles", + indexlog.percentage_complete, + indexlog.time_spent_in_seconds, + indexlog.g_cycles_spent + ); + publish(i32_to_log_level(indexlog.log_level), &str)?; + } + + if let Some(disk_index_log) = message.disk_index_construction_log { + let str = format!( + "Time for disk index build [Checkpoint: {:?}] completed: {:.3} seconds, {:.3}B cycles", + DiskIndexConstructionCheckpoint::from_i32(disk_index_log.checkpoint).unwrap_or(DiskIndexConstructionCheckpoint::None), + disk_index_log.time_spent_in_seconds, + disk_index_log.g_cycles_spent + ); + publish(i32_to_log_level(disk_index_log.log_level), &str)?; + } + + if let Some(tracelog) = message.trace_log { + let str = format!("{}:{}", tracelog.log_level, tracelog.log_line); + publish(i32_to_log_level(tracelog.log_level), &str)?; + } + + if let Some(err) = message.error_log { + publish(i32_to_log_level(err.log_level), &err.error_message)?; + } + } + + Ok(()) + }); + + let sender = Mutex::new(sender); + MessageProcessor { sender } + } + + /// Log the message. + fn log(&self, message: Log) -> Result<(), LogError> { + Ok(self + .sender + .lock() + .map_err(|err| LogError::LockPoisonError { + err: err.to_string(), + })? + .send(message)?) + } +} + +lazy_static::lazy_static! { + /// Singleton logger. + static ref PROCESSOR: MessageProcessor = { + + MessageProcessor::start_processing() + }; +} + +lazy_static::lazy_static! { + /// Singleton publisher. + static ref PUBLISHER: Result = { + EtwPublisher::new() + }; +} + +/// Send a message to the logging system. +pub fn send_log(message: Log) -> Result<(), LogError> { + PROCESSOR.log(message) +} + +fn publish(log_level: LogLevel, message: &str) -> Result<(), LogError> { + match *PUBLISHER { + Ok(ref etw_publisher) => { + etw_publisher.publish(log_level, message); + Ok(()) + } + Err(ref err) => Err(LogError::ETWProviderError { err: err.clone() }), + } +} + diff --git a/packages/leann-backend-diskann/third_party/DiskANN/rust/logger/src/trace_logger.rs b/packages/leann-backend-diskann/third_party/DiskANN/rust/logger/src/trace_logger.rs new file mode 100644 index 0000000..96ef386 --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/rust/logger/src/trace_logger.rs @@ -0,0 +1,41 @@ +/* + * Copyright (c) Microsoft Corporation. All rights reserved. + * Licensed under the MIT license. + */ +use crate::logger::indexlog::{Log, TraceLog}; +use crate::message_handler::send_log; + +use log; + +pub struct TraceLogger {} + +fn level_to_i32(value: log::Level) -> i32 { + match value { + log::Level::Error => 1, + log::Level::Warn => 2, + log::Level::Info => 3, + log::Level::Debug => 4, + log::Level::Trace => 5, + } +} + +impl log::Log for TraceLogger { + fn enabled(&self, metadata: &log::Metadata) -> bool { + metadata.level() <= log::max_level() + } + + fn log(&self, record: &log::Record) { + let message = record.args().to_string(); + let metadata = record.metadata(); + let mut log = Log::default(); + let trace_log = TraceLog { + log_line: message, + log_level: level_to_i32(metadata.level()), + }; + log.trace_log = Some(trace_log); + let _ = send_log(log); + } + + fn flush(&self) {} +} + diff --git a/packages/leann-backend-diskann/third_party/DiskANN/rust/platform/Cargo.toml b/packages/leann-backend-diskann/third_party/DiskANN/rust/platform/Cargo.toml new file mode 100644 index 0000000..057f9e8 --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/rust/platform/Cargo.toml @@ -0,0 +1,13 @@ +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT license. +[package] +name = "platform" +version = "0.1.0" +edition = "2021" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +log="0.4.18" +winapi = { version = "0.3.9", features = ["errhandlingapi", "fileapi", "ioapiset", "handleapi", "winnt", "minwindef", "basetsd", "winerror", "winbase"] } + diff --git a/packages/leann-backend-diskann/third_party/DiskANN/rust/platform/src/file_handle.rs b/packages/leann-backend-diskann/third_party/DiskANN/rust/platform/src/file_handle.rs new file mode 100644 index 0000000..23da879 --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/rust/platform/src/file_handle.rs @@ -0,0 +1,212 @@ +/* + * Copyright (c) Microsoft Corporation. All rights reserved. + * Licensed under the MIT license. + */ +use std::ffi::CString; +use std::{io, ptr}; + +use winapi::um::fileapi::OPEN_EXISTING; +use winapi::um::winbase::{FILE_FLAG_NO_BUFFERING, FILE_FLAG_OVERLAPPED, FILE_FLAG_RANDOM_ACCESS}; +use winapi::um::winnt::{FILE_SHARE_DELETE, FILE_SHARE_READ, FILE_SHARE_WRITE, GENERIC_READ, GENERIC_WRITE}; + +use winapi::{ + shared::minwindef::DWORD, + um::{ + errhandlingapi::GetLastError, + fileapi::CreateFileA, + handleapi::{CloseHandle, INVALID_HANDLE_VALUE}, + winnt::HANDLE, + }, +}; + +pub const FILE_ATTRIBUTE_READONLY: DWORD = 0x00000001; + +/// `AccessMode` determines how a file can be accessed. +/// These modes are used when creating or opening a file to decide what operations are allowed +/// to be performed on the file. +/// +/// # Variants +/// +/// - `Read`: The file is opened in read-only mode. +/// +/// - `Write`: The file is opened in write-only mode. +/// +/// - `ReadWrite`: The file is opened for both reading and writing. +pub enum AccessMode { + Read, + Write, + ReadWrite, +} + +/// `ShareMode` determines how a file can be shared. +/// +/// These modes are used when creating or opening a file to decide what operations other +/// opening instances of the file can perform on it. +/// # Variants +/// - `None`: Prevents other processes from opening a file if they request delete, +/// read, or write access. +/// +/// - `Read`: Allows subsequent open operations on the same file to request read access. +/// +/// - `Write`: Allows subsequent open operations on the same file file to request write access. +/// +/// - `Delete`: Allows subsequent open operations on the same file file to request delete access. +pub enum ShareMode { + None, + Read, + Write, + Delete, +} + +/// # Windows File Handle Wrapper +/// +/// Introduces a Rust-friendly wrapper around the native Windows `HANDLE` object, `FileHandle`. +/// `FileHandle` provides safe creation and automatic cleanup of Windows file handles, leveraging Rust's ownership model. + +/// `FileHandle` struct that wraps a native Windows `HANDLE` object +#[cfg(target_os = "windows")] +pub struct FileHandle { + handle: HANDLE, +} + +impl FileHandle { + /// Creates a new `FileHandle` by opening an existing file with the given access and shared mode. + /// + /// This function is marked unsafe because it creates a raw pointer to the filename and try to create + /// a Windows `HANDLE` object without checking if you have sufficient permissions. + /// + /// # Safety + /// + /// Ensure that the file specified by `file_name` is valid and the calling process has + /// sufficient permissions to perform the specified `access_mode` and `share_mode` operations. + /// + /// # Parameters + /// + /// - `file_name`: The name of the file. + /// - `access_mode`: The access mode to be used for the file. + /// - `share_mode`: The share mode to be used for the file + /// + /// # Errors + /// This function will return an error if the `file_name` is invalid or if the file cannot + /// be opened with the specified `access_mode` and `share_mode`. + pub unsafe fn new( + file_name: &str, + access_mode: AccessMode, + share_mode: ShareMode, + ) -> io::Result { + let file_name_c = CString::new(file_name).map_err(|_| { + io::Error::new( + io::ErrorKind::InvalidData, + format!("Invalid file name. {}", file_name), + ) + })?; + + let dw_desired_access = match access_mode { + AccessMode::Read => GENERIC_READ, + AccessMode::Write => GENERIC_WRITE, + AccessMode::ReadWrite => GENERIC_READ | GENERIC_WRITE, + }; + + let dw_share_mode = match share_mode { + ShareMode::None => 0, + ShareMode::Read => FILE_SHARE_READ, + ShareMode::Write => FILE_SHARE_WRITE, + ShareMode::Delete => FILE_SHARE_DELETE, + }; + + let dw_flags_and_attributes = FILE_ATTRIBUTE_READONLY + | FILE_FLAG_NO_BUFFERING + | FILE_FLAG_OVERLAPPED + | FILE_FLAG_RANDOM_ACCESS; + + let handle = unsafe { + CreateFileA( + file_name_c.as_ptr(), + dw_desired_access, + dw_share_mode, + ptr::null_mut(), + OPEN_EXISTING, + dw_flags_and_attributes, + ptr::null_mut(), + ) + }; + + if handle == INVALID_HANDLE_VALUE { + let error_code = unsafe { GetLastError() }; + Err(io::Error::from_raw_os_error(error_code as i32)) + } else { + Ok(Self { handle }) + } + } + + pub fn raw_handle(&self) -> HANDLE { + self.handle + } +} + +impl Drop for FileHandle { + /// Automatically closes the `FileHandle` when it goes out of scope. + /// Any errors in closing the handle are logged, as `Drop` does not support returning `Result`. + fn drop(&mut self) { + let result = unsafe { CloseHandle(self.handle) }; + if result == 0 { + let error_code = unsafe { GetLastError() }; + let error = io::Error::from_raw_os_error(error_code as i32); + + // Only log the error if dropping the handle fails, since Rust's Drop trait does not support returning Result types from the drop method, + // and panicking in the drop method is considered bad practice + log::warn!("Error when dropping IOCompletionPort: {:?}", error); + } + } +} + +/// Returns a `FileHandle` with an `INVALID_HANDLE_VALUE`. +impl Default for FileHandle { + fn default() -> Self { + Self { + handle: INVALID_HANDLE_VALUE, + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + use std::fs::File; + use std::path::Path; + + #[test] + fn test_create_file() { + // Create a dummy file + let dummy_file_path = "dummy_file.txt"; + { + let _file = File::create(dummy_file_path).expect("Failed to create dummy file."); + } + + let path = Path::new(dummy_file_path); + { + let file_handle = unsafe { + FileHandle::new(path.to_str().unwrap(), AccessMode::Read, ShareMode::Read) + }; + + // Check that the file handle is valid + assert!(file_handle.is_ok()); + } + + // Try to delete the file. If the handle was correctly dropped, this should succeed. + match std::fs::remove_file(dummy_file_path) { + Ok(()) => (), // File was deleted successfully, which means the handle was closed. + Err(e) => panic!("Failed to delete file: {}", e), // Failed to delete the file, likely because the handle is still open. + } + } + + #[test] + fn test_file_not_found() { + let path = Path::new("non_existent_file.txt"); + let file_handle = + unsafe { FileHandle::new(path.to_str().unwrap(), AccessMode::Read, ShareMode::Read) }; + + // Check that opening a non-existent file returns an error + assert!(file_handle.is_err()); + } +} diff --git a/packages/leann-backend-diskann/third_party/DiskANN/rust/platform/src/file_io.rs b/packages/leann-backend-diskann/third_party/DiskANN/rust/platform/src/file_io.rs new file mode 100644 index 0000000..e5de247 --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/rust/platform/src/file_io.rs @@ -0,0 +1,154 @@ +/* + * Copyright (c) Microsoft Corporation. All rights reserved. + * Licensed under the MIT license. + */ +/// The module provides unsafe wrappers around two Windows API functions: `ReadFile` and `GetQueuedCompletionStatus`. +/// +/// These wrappers aim to simplify and abstract the use of these functions, providing easier error handling and a safer interface. +/// They return standard Rust `io::Result` types for convenience and consistency with the rest of the Rust standard library. +use std::io; +use std::ptr; + +use winapi::{ + ctypes::c_void, + shared::{ + basetsd::ULONG_PTR, + minwindef::{DWORD, FALSE}, + winerror::{ERROR_IO_PENDING, WAIT_TIMEOUT}, + }, + um::{ + errhandlingapi::GetLastError, fileapi::ReadFile, ioapiset::GetQueuedCompletionStatus, + minwinbase::OVERLAPPED, + }, +}; + +use crate::FileHandle; +use crate::IOCompletionPort; + +/// Asynchronously queue a read request from a file into a buffer slice. +/// +/// Wraps the unsafe Windows API function `ReadFile`, making it safe to call only when the overlapped buffer +/// remains valid and unchanged anywhere else during the entire async operation. +/// +/// Returns a boolean indicating whether the read operation completed synchronously or is pending. +/// +/// # Safety +/// +/// This function is marked as `unsafe` because it uses raw pointers and requires the caller to ensure +/// that the buffer slice and the overlapped buffer stay valid during the whole async operation. +pub unsafe fn read_file_to_slice( + file_handle: &FileHandle, + buffer_slice: &mut [T], + overlapped: *mut OVERLAPPED, + offset: u64, +) -> io::Result { + let num_bytes = std::mem::size_of_val(buffer_slice); + unsafe { + ptr::write(overlapped, std::mem::zeroed()); + (*overlapped).u.s_mut().Offset = offset as u32; + (*overlapped).u.s_mut().OffsetHigh = (offset >> 32) as u32; + } + + let result = unsafe { + ReadFile( + file_handle.raw_handle(), + buffer_slice.as_mut_ptr() as *mut c_void, + num_bytes as DWORD, + ptr::null_mut(), + overlapped, + ) + }; + + match result { + FALSE => { + let error = unsafe { GetLastError() }; + if error != ERROR_IO_PENDING { + Err(io::Error::from_raw_os_error(error as i32)) + } else { + Ok(false) + } + } + _ => Ok(true), + } +} + +/// Retrieves the results of an asynchronous I/O operation on an I/O completion port. +/// +/// Wraps the unsafe Windows API function `GetQueuedCompletionStatus`, making it safe to call only when the overlapped buffer +/// remains valid and unchanged anywhere else during the entire async operation. +/// +/// Returns a boolean indicating whether an I/O operation completed synchronously or is still pending. +/// +/// # Safety +/// +/// This function is marked as `unsafe` because it uses raw pointers and requires the caller to ensure +/// that the overlapped buffer stays valid during the whole async operation. +pub unsafe fn get_queued_completion_status( + completion_port: &IOCompletionPort, + lp_number_of_bytes: &mut DWORD, + lp_completion_key: &mut ULONG_PTR, + lp_overlapped: *mut *mut OVERLAPPED, + dw_milliseconds: DWORD, +) -> io::Result { + let result = unsafe { + GetQueuedCompletionStatus( + completion_port.raw_handle(), + lp_number_of_bytes, + lp_completion_key, + lp_overlapped, + dw_milliseconds, + ) + }; + + match result { + 0 => { + let error = unsafe { GetLastError() }; + if error == WAIT_TIMEOUT { + Ok(false) + } else { + Err(io::Error::from_raw_os_error(error as i32)) + } + } + _ => Ok(true), + } +} + +#[cfg(test)] +mod tests { + use crate::file_handle::{AccessMode, ShareMode}; + + use super::*; + use std::fs::File; + use std::io::Write; + use std::path::Path; + + #[test] + fn test_read_file_to_slice() { + // Create a temporary file and write some data into it + let path = Path::new("temp.txt"); + { + let mut file = File::create(path).unwrap(); + file.write_all(b"Hello, world!").unwrap(); + } + + let mut buffer: [u8; 512] = [0; 512]; + let mut overlapped = unsafe { std::mem::zeroed::() }; + { + let file_handle = unsafe { + FileHandle::new(path.to_str().unwrap(), AccessMode::Read, ShareMode::Read) + } + .unwrap(); + + // Call the function under test + let result = + unsafe { read_file_to_slice(&file_handle, &mut buffer, &mut overlapped, 0) }; + + assert!(result.is_ok()); + let result_str = std::str::from_utf8(&buffer[.."Hello, world!".len()]).unwrap(); + assert_eq!(result_str, "Hello, world!"); + } + + // Clean up + std::fs::remove_file("temp.txt").unwrap(); + } +} diff --git a/packages/leann-backend-diskann/third_party/DiskANN/rust/platform/src/io_completion_port.rs b/packages/leann-backend-diskann/third_party/DiskANN/rust/platform/src/io_completion_port.rs new file mode 100644 index 0000000..5bb3322 --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/rust/platform/src/io_completion_port.rs @@ -0,0 +1,142 @@ +/* + * Copyright (c) Microsoft Corporation. All rights reserved. + * Licensed under the MIT license. + */ +use std::io; + +use winapi::{ + ctypes::c_void, + shared::{basetsd::ULONG_PTR, minwindef::DWORD}, + um::{ + errhandlingapi::GetLastError, + handleapi::{CloseHandle, INVALID_HANDLE_VALUE}, + ioapiset::CreateIoCompletionPort, + winnt::HANDLE, + }, +}; + +use crate::FileHandle; + +/// This module provides a safe and idiomatic Rust interface over the IOCompletionPort handle and associated Windows API functions. +/// This struct represents an I/O completion port, which is an object used in asynchronous I/O operations on Windows. +pub struct IOCompletionPort { + io_completion_port: HANDLE, +} + +impl IOCompletionPort { + /// Create a new IOCompletionPort. + /// This function wraps the Windows CreateIoCompletionPort function, providing error handling and automatic resource management. + /// + /// # Arguments + /// + /// * `file_handle` - A reference to a FileHandle to associate with the IOCompletionPort. + /// * `existing_completion_port` - An optional reference to an existing IOCompletionPort. If provided, the new IOCompletionPort will be associated with it. + /// * `completion_key` - The completion key associated with the file handle. + /// * `number_of_concurrent_threads` - The maximum number of threads that the operating system can allow to concurrently process I/O completion packets for the I/O completion port. + /// + /// # Return + /// + /// Returns a Result with the new IOCompletionPort if successful, or an io::Error if the function fails. + pub fn new( + file_handle: &FileHandle, + existing_completion_port: Option<&IOCompletionPort>, + completion_key: ULONG_PTR, + number_of_concurrent_threads: DWORD, + ) -> io::Result { + let io_completion_port = unsafe { + CreateIoCompletionPort( + file_handle.raw_handle(), + existing_completion_port + .map_or(std::ptr::null_mut::(), |io_completion_port| { + io_completion_port.raw_handle() + }), + completion_key, + number_of_concurrent_threads, + ) + }; + + if io_completion_port == INVALID_HANDLE_VALUE { + let error_code = unsafe { GetLastError() }; + return Err(io::Error::from_raw_os_error(error_code as i32)); + } + + Ok(IOCompletionPort { io_completion_port }) + } + + pub fn raw_handle(&self) -> HANDLE { + self.io_completion_port + } +} + +impl Drop for IOCompletionPort { + /// Drop method for IOCompletionPort. + /// This wraps the Windows CloseHandle function, providing automatic resource cleanup when the IOCompletionPort is dropped. + /// If an error occurs while dropping, it is logged and the drop continues. This is because panicking in Drop can cause unwinding issues. + fn drop(&mut self) { + let result = unsafe { CloseHandle(self.io_completion_port) }; + if result == 0 { + let error_code = unsafe { GetLastError() }; + let error = io::Error::from_raw_os_error(error_code as i32); + + // Only log the error if dropping the handle fails, since Rust's Drop trait does not support returning Result types from the drop method, + // and panicking in the drop method is considered bad practice + log::warn!("Error when dropping IOCompletionPort: {:?}", error); + } + } +} + +impl Default for IOCompletionPort { + /// Create a default IOCompletionPort, whose handle is set to INVALID_HANDLE_VALUE. + /// Returns a new IOCompletionPort with handle set to INVALID_HANDLE_VALUE. + fn default() -> Self { + Self { + io_completion_port: INVALID_HANDLE_VALUE, + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::file_handle::{AccessMode, ShareMode}; + + #[test] + fn create_io_completion_port() { + let file_name = "../diskann/tests/data/delete_set_50pts.bin"; + let file_handle = unsafe { FileHandle::new(file_name, AccessMode::Read, ShareMode::Read) } + .expect("Failed to create file handle."); + + let io_completion_port = IOCompletionPort::new(&file_handle, None, 0, 0); + + assert!( + io_completion_port.is_ok(), + "Failed to create IOCompletionPort." + ); + } + + #[test] + fn drop_io_completion_port() { + let file_name = "../diskann/tests/data/delete_set_50pts.bin"; + let file_handle = unsafe { FileHandle::new(file_name, AccessMode::Read, ShareMode::Read) } + .expect("Failed to create file handle."); + + let io_completion_port = IOCompletionPort::new(&file_handle, None, 0, 0) + .expect("Failed to create IOCompletionPort."); + + // After this line, io_completion_port goes out of scope and its Drop trait will be called. + let _ = io_completion_port; + // We have no easy way to test that the Drop trait works correctly, but if it doesn't, + // a resource leak or other problem may become apparent in later tests or in real use of the code. + } + + #[test] + fn default_io_completion_port() { + let io_completion_port = IOCompletionPort::default(); + assert_eq!( + io_completion_port.raw_handle(), + INVALID_HANDLE_VALUE, + "Default IOCompletionPort did not have INVALID_HANDLE_VALUE." + ); + } +} + diff --git a/packages/leann-backend-diskann/third_party/DiskANN/rust/platform/src/lib.rs b/packages/leann-backend-diskann/third_party/DiskANN/rust/platform/src/lib.rs new file mode 100644 index 0000000..e282570 --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/rust/platform/src/lib.rs @@ -0,0 +1,20 @@ +/* + * Copyright (c) Microsoft Corporation. All rights reserved. + * Licensed under the MIT license. + */ +#![cfg_attr( + not(test), + warn(clippy::panic, clippy::unwrap_used, clippy::expect_used) +)] + +pub mod perf; +pub use perf::{get_process_cycle_time, get_process_handle}; + +pub mod file_io; +pub use file_io::{get_queued_completion_status, read_file_to_slice}; + +pub mod file_handle; +pub use file_handle::FileHandle; + +pub mod io_completion_port; +pub use io_completion_port::IOCompletionPort; diff --git a/packages/leann-backend-diskann/third_party/DiskANN/rust/platform/src/perf.rs b/packages/leann-backend-diskann/third_party/DiskANN/rust/platform/src/perf.rs new file mode 100644 index 0000000..1ea146f --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/rust/platform/src/perf.rs @@ -0,0 +1,50 @@ +/* + * Copyright (c) Microsoft Corporation. All rights reserved. + * Licensed under the MIT license. + */ +#[cfg(target_os = "windows")] +#[link(name = "kernel32")] +extern "system" { + fn OpenProcess(dwDesiredAccess: u32, bInheritHandle: bool, dwProcessId: u32) -> usize; + fn QueryProcessCycleTime(hProcess: usize, lpCycleTime: *mut u64) -> bool; + fn GetCurrentProcessId() -> u32; +} + +/// Get current process handle. +pub fn get_process_handle() -> Option { + if cfg!(windows) { + const PROCESS_QUERY_INFORMATION: u32 = 0x0400; + const PROCESS_VM_READ: u32 = 0x0010; + + unsafe { + let current_process_id = GetCurrentProcessId(); + let handle = OpenProcess( + PROCESS_QUERY_INFORMATION | PROCESS_VM_READ, + false, + current_process_id, + ); + if handle == 0 { + None + } else { + Some(handle) + } + } + } else { + None + } +} + +pub fn get_process_cycle_time(process_handle: Option) -> Option { + let mut cycle_time: u64 = 0; + if cfg!(windows) { + if let Some(handle) = process_handle { + let result = unsafe { QueryProcessCycleTime(handle, &mut cycle_time as *mut u64) }; + if result { + return Some(cycle_time); + } + } + } + + None +} + diff --git a/packages/leann-backend-diskann/third_party/DiskANN/rust/project.code-workspace b/packages/leann-backend-diskann/third_party/DiskANN/rust/project.code-workspace new file mode 100644 index 0000000..29bed00 --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/rust/project.code-workspace @@ -0,0 +1,58 @@ +{ + "folders": [ + { + "path": "." + } + ], + "settings": { + "search.exclude": { + "target": true, + }, + "files.exclude": { + "target": true, + }, + "rust-analyzer.linkedProjects": [ + ".\\vector\\Cargo.toml", + ".\\vector\\Cargo.toml", + ".\\vector\\Cargo.toml", + ".\\diskann\\Cargo.toml" + ], + "[rust]": { + "editor.defaultFormatter": "rust-lang.rust-analyzer", + "editor.formatOnSave": true, + } + }, + "launch": { + "version": "0.2.0", + "configurations": [ + { + "name": "Build memory index", + "type": "cppvsdbg", + "request": "launch", + "program": "${workspaceRoot}\\target\\debug\\build_memory_index.exe", + "args": [ + "--data_type", + "float", + "--dist_fn", + "l2", + "--data_path", + ".\\base1m.fbin", + "--index_path_prefix", + ".\\rust_index_sift_base_R32_L50_A1.2_T1", + "-R", + "64", + "-L", + "100", + "--alpha", + "1.2", + "-T", + "1" + ], + "stopAtEntry": false, + "cwd": "c:\\data", + "environment": [], + "externalConsole": true + }, + ] + } +} \ No newline at end of file diff --git a/packages/leann-backend-diskann/third_party/DiskANN/rust/readme.md b/packages/leann-backend-diskann/third_party/DiskANN/rust/readme.md new file mode 100644 index 0000000..a6c5a1b --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/rust/readme.md @@ -0,0 +1,25 @@ + +# readme + +run commands under disnann_rust directory. + +build: +``` +cargo build // Debug + +cargo build -r // Release +``` + + +run: +``` +cargo run // Debug + +cargo run -r // Release +``` + + +test: +``` +cargo test +``` diff --git a/packages/leann-backend-diskann/third_party/DiskANN/rust/rust-toolchain.toml b/packages/leann-backend-diskann/third_party/DiskANN/rust/rust-toolchain.toml new file mode 100644 index 0000000..183a72c --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/rust/rust-toolchain.toml @@ -0,0 +1,4 @@ +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT license. +[toolchain] +channel = "stable" diff --git a/packages/leann-backend-diskann/third_party/DiskANN/rust/vector/Cargo.toml b/packages/leann-backend-diskann/third_party/DiskANN/rust/vector/Cargo.toml new file mode 100644 index 0000000..709a290 --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/rust/vector/Cargo.toml @@ -0,0 +1,24 @@ +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT license. +[package] +name = "vector" +version = "0.1.0" +edition = "2021" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +half = "2.2.1" +thiserror = "1.0.40" +bytemuck = "1.7.0" + +[build-dependencies] +cc = "1.0.79" + +[dev-dependencies] +base64 = "0.21.2" +bincode = "1.3.3" +serde = "1.0.163" +approx = "0.5.1" +rand = "0.8.5" + diff --git a/packages/leann-backend-diskann/third_party/DiskANN/rust/vector/build.rs b/packages/leann-backend-diskann/third_party/DiskANN/rust/vector/build.rs new file mode 100644 index 0000000..2d36c21 --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/rust/vector/build.rs @@ -0,0 +1,29 @@ +/* + * Copyright (c) Microsoft Corporation. All rights reserved. + * Licensed under the MIT license. + */ +fn main() { + println!("cargo:rerun-if-changed=distance.c"); + if cfg!(target_os = "macos") { + std::env::set_var("CFLAGS", "-mavx2 -mfma -Wno-error -MP -O2 -D NDEBUG -D MKL_ILP64 -D USE_AVX2 -D USE_ACCELERATED_PQ -D NOMINMAX -D _TARGET_ARM_APPLE_DARWIN"); + + cc::Build::new() + .file("distance.c") + .warnings_into_errors(true) + .debug(false) + .target("x86_64-apple-darwin") + .compile("nativefunctions.lib"); + } else { + std::env::set_var("CFLAGS", "/permissive- /MP /ifcOutput /GS- /W3 /Gy /Zi /Gm- /O2 /Ob2 /Zc:inline /fp:fast /D NDEBUG /D MKL_ILP64 /D USE_AVX2 /D USE_ACCELERATED_PQ /D NOMINMAX /fp:except- /errorReport:prompt /WX /openmp:experimental /Zc:forScope /GR /arch:AVX2 /Gd /Oy /Oi /MD /std:c++14 /FC /EHsc /nologo /Ot"); + // std::env::set_var("CFLAGS", "/permissive- /MP /ifcOutput /GS- /W3 /Gy /Zi /Gm- /Obd /Zc:inline /fp:fast /D DEBUG /D MKL_ILP64 /D USE_AVX2 /D USE_ACCELERATED_PQ /D NOMINMAX /fp:except- /errorReport:prompt /WX /openmp:experimental /Zc:forScope /GR /arch:AVX512 /Gd /Oy /Oi /MD /std:c++14 /FC /EHsc /nologo /Ot"); + + cc::Build::new() + .file("distance.c") + .warnings_into_errors(true) + .debug(false) + .compile("nativefunctions"); + + println!("cargo:rustc-link-arg=nativefunctions.lib"); + } +} + diff --git a/packages/leann-backend-diskann/third_party/DiskANN/rust/vector/distance.c b/packages/leann-backend-diskann/third_party/DiskANN/rust/vector/distance.c new file mode 100644 index 0000000..ee5333a --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/rust/vector/distance.c @@ -0,0 +1,35 @@ +#include +#include + +inline __m256i load_128bit_to_256bit(const __m128i *ptr) +{ + __m128i value128 = _mm_loadu_si128(ptr); + __m256i value256 = _mm256_castsi128_si256(value128); + return _mm256_inserti128_si256(value256, _mm_setzero_si128(), 1); +} + +float distance_compare_avx512f_f16(const unsigned char *vec1, const unsigned char *vec2, size_t size) +{ + __m512 sum_squared_diff = _mm512_setzero_ps(); + + for (int i = 0; i < size / 16; i += 1) + { + __m512 v1 = _mm512_cvtph_ps(_mm256_loadu_si256((const __m256i *)(vec1 + i * 2 * 16))); + __m512 v2 = _mm512_cvtph_ps(_mm256_loadu_si256((const __m256i *)(vec2 + i * 2 * 16))); + + __m512 diff = _mm512_sub_ps(v1, v2); + sum_squared_diff = _mm512_fmadd_ps(diff, diff, sum_squared_diff); + } + + size_t i = (size / 16) * 16; + + if (i != size) + { + __m512 va = _mm512_cvtph_ps(load_128bit_to_256bit((const __m128i *)(vec1 + i * 2))); + __m512 vb = _mm512_cvtph_ps(load_128bit_to_256bit((const __m128i *)(vec2 + i * 2))); + __m512 diff512 = _mm512_sub_ps(va, vb); + sum_squared_diff = _mm512_fmadd_ps(diff512, diff512, sum_squared_diff); + } + + return _mm512_reduce_add_ps(sum_squared_diff); +} diff --git a/packages/leann-backend-diskann/third_party/DiskANN/rust/vector/src/distance.rs b/packages/leann-backend-diskann/third_party/DiskANN/rust/vector/src/distance.rs new file mode 100644 index 0000000..8ca6cb2 --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/rust/vector/src/distance.rs @@ -0,0 +1,442 @@ +/* + * Copyright (c) Microsoft Corporation. All rights reserved. + * Licensed under the MIT license. + */ +use crate::l2_float_distance::{distance_l2_vector_f16, distance_l2_vector_f32}; +use crate::{Half, Metric}; + +/// Distance contract for full-precision vertex +pub trait FullPrecisionDistance { + /// Get the distance between vertex a and vertex b + fn distance_compare(a: &[T; N], b: &[T; N], vec_type: Metric) -> f32; +} + +// reason = "Not supported Metric type Metric::Cosine" +#[allow(clippy::panic)] +impl FullPrecisionDistance for [f32; N] { + /// Calculate distance between two f32 Vertex + #[inline(always)] + fn distance_compare(a: &[f32; N], b: &[f32; N], metric: Metric) -> f32 { + match metric { + Metric::L2 => distance_l2_vector_f32::(a, b), + _ => panic!("Not supported Metric type {:?}", metric), + } + } +} + +// reason = "Not supported Metric type Metric::Cosine" +#[allow(clippy::panic)] +impl FullPrecisionDistance for [Half; N] { + fn distance_compare(a: &[Half; N], b: &[Half; N], metric: Metric) -> f32 { + match metric { + Metric::L2 => distance_l2_vector_f16::(a, b), + _ => panic!("Not supported Metric type {:?}", metric), + } + } +} + +// reason = "Not yet supported Vector i8" +#[allow(clippy::panic)] +impl FullPrecisionDistance for [i8; N] { + fn distance_compare(_a: &[i8; N], _b: &[i8; N], _metric: Metric) -> f32 { + panic!("Not supported VectorType i8") + } +} + +// reason = "Not yet supported Vector u8" +#[allow(clippy::panic)] +impl FullPrecisionDistance for [u8; N] { + fn distance_compare(_a: &[u8; N], _b: &[u8; N], _metric: Metric) -> f32 { + panic!("Not supported VectorType u8") + } +} + +#[cfg(test)] +mod distance_test { + use super::*; + + #[repr(C, align(32))] + pub struct F32Slice112([f32; 112]); + + #[repr(C, align(32))] + pub struct F16Slice112([Half; 112]); + + fn get_turing_test_data() -> (F32Slice112, F32Slice112) { + let a_slice: [f32; 112] = [ + 0.13961786, + -0.031577103, + -0.09567415, + 0.06695563, + -0.1588727, + 0.089852564, + -0.019837005, + 0.07497972, + 0.010418192, + -0.054594643, + 0.08613386, + -0.05103466, + 0.16568437, + -0.02703799, + 0.00728657, + -0.15313251, + 0.16462992, + -0.030570814, + 0.11635703, + 0.23938893, + 0.018022912, + -0.12646551, + 0.018048918, + -0.035986554, + 0.031986624, + -0.015286017, + 0.010117953, + -0.032691937, + 0.12163067, + -0.04746277, + 0.010213069, + -0.043672588, + -0.099362016, + 0.06599016, + -0.19397286, + -0.13285528, + -0.22040887, + 0.017690737, + -0.104262285, + -0.0044555613, + -0.07383778, + -0.108652934, + 0.13399786, + 0.054912474, + 0.20181285, + 0.1795591, + -0.05425621, + -0.10765217, + 0.1405377, + -0.14101997, + -0.12017701, + 0.011565498, + 0.06952187, + 0.060136646, + 0.0023214167, + 0.04204699, + 0.048470616, + 0.17398086, + 0.024218207, + -0.15626553, + -0.11291045, + -0.09688122, + 0.14393932, + -0.14713104, + -0.108876854, + 0.035279203, + -0.05440188, + 0.017205412, + 0.011413814, + 0.04009471, + 0.11070237, + -0.058998976, + 0.07260045, + -0.057893746, + -0.0036240944, + -0.0064988653, + -0.13842176, + -0.023219328, + 0.0035885905, + -0.0719257, + -0.21335067, + 0.11415403, + -0.0059823603, + 0.12091869, + 0.08136634, + -0.10769281, + 0.024518685, + 0.0009200326, + -0.11628049, + 0.07448965, + 0.13736208, + -0.04144517, + -0.16426727, + -0.06380103, + -0.21386267, + 0.022373492, + -0.05874115, + 0.017314062, + -0.040344074, + 0.01059176, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + ]; + let b_slice: [f32; 112] = [ + -0.07209058, + -0.17755842, + -0.030627966, + 0.163028, + -0.2233766, + 0.057412963, + 0.0076995124, + -0.017121306, + -0.015759075, + -0.026947778, + -0.010282468, + -0.23968373, + -0.021486737, + -0.09903155, + 0.09361805, + 0.0042711576, + -0.08695552, + -0.042165346, + 0.064218745, + -0.06707651, + 0.07846054, + 0.12235762, + -0.060716823, + 0.18496591, + -0.13023394, + 0.022469055, + 0.056764495, + 0.07168404, + -0.08856144, + -0.15343173, + 0.099879816, + -0.033529017, + 0.0795304, + -0.009242254, + -0.10254546, + 0.13086525, + -0.101518914, + -0.1031299, + -0.056826904, + 0.033196196, + 0.044143833, + -0.049787212, + -0.018148342, + -0.11172959, + -0.06776237, + -0.09185828, + -0.24171598, + 0.05080982, + -0.0727684, + 0.045031235, + -0.11363879, + -0.063389264, + 0.105850354, + -0.19847773, + 0.08828623, + -0.087071925, + 0.033512704, + 0.16118294, + 0.14111553, + 0.020884402, + -0.088860825, + 0.018745849, + 0.047522716, + -0.03665169, + 0.15726231, + -0.09930561, + 0.057844743, + -0.10532736, + -0.091297254, + 0.067029804, + 0.04153976, + 0.06393326, + 0.054578528, + 0.0038539872, + 0.1023088, + -0.10653885, + -0.108500294, + -0.046606563, + 0.020439683, + -0.120957725, + -0.13334097, + -0.13425854, + -0.20481694, + 0.07009538, + 0.08660361, + -0.0096641015, + 0.095316306, + -0.002898167, + -0.19680002, + 0.08466311, + 0.04812689, + -0.028978813, + 0.04780206, + -0.2001506, + -0.036866356, + -0.023720587, + 0.10731964, + 0.05517358, + -0.09580819, + 0.14595725, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + ]; + + (F32Slice112(a_slice), F32Slice112(b_slice)) + } + + fn get_turing_test_data_f16() -> (F16Slice112, F16Slice112) { + let (a_slice, b_slice) = get_turing_test_data(); + let a_data = a_slice.0.iter().map(|x| Half::from_f32(*x)); + let b_data = b_slice.0.iter().map(|x| Half::from_f32(*x)); + + ( + F16Slice112(a_data.collect::>().try_into().unwrap()), + F16Slice112(b_data.collect::>().try_into().unwrap()), + ) + } + + use crate::test_util::*; + use approx::assert_abs_diff_eq; + + #[test] + fn test_dist_l2_float_turing() { + // two vectors are allocated in the contiguous heap memory + let (a_slice, b_slice) = get_turing_test_data(); + let distance = <[f32; 112] as FullPrecisionDistance>::distance_compare( + &a_slice.0, + &b_slice.0, + Metric::L2, + ); + + assert_abs_diff_eq!( + distance, + no_vector_compare_f32(&a_slice.0, &b_slice.0), + epsilon = 1e-6 + ); + } + + #[test] + fn test_dist_l2_f16_turing() { + // two vectors are allocated in the contiguous heap memory + let (a_slice, b_slice) = get_turing_test_data_f16(); + let distance = <[Half; 112] as FullPrecisionDistance>::distance_compare( + &a_slice.0, + &b_slice.0, + Metric::L2, + ); + + // Note the variance between the full 32 bit precision and the 16 bit precision + assert_eq!(distance, no_vector_compare_f16(&a_slice.0, &b_slice.0)); + } + + #[test] + fn distance_test() { + #[repr(C, align(32))] + struct Vector32ByteAligned { + v: [f32; 512], + } + + // two vectors are allocated in the contiguous heap memory + let two_vec = Box::new(Vector32ByteAligned { + v: [ + 69.02492, 78.84786, 63.125072, 90.90581, 79.2592, 70.81731, 3.0829668, 33.33287, + 20.777142, 30.147898, 23.681915, 42.553043, 12.602162, 7.3808074, 19.157589, + 65.6791, 76.44677, 76.89124, 86.40756, 84.70118, 87.86142, 16.126896, 5.1277637, + 95.11038, 83.946945, 22.735607, 11.548555, 59.51482, 24.84603, 15.573776, 78.27185, + 71.13179, 38.574017, 80.0228, 13.175261, 62.887978, 15.205181, 18.89392, 96.13162, + 87.55455, 34.179806, 62.920044, 4.9305916, 54.349373, 21.731495, 14.982187, + 40.262867, 20.15214, 36.61963, 72.450806, 55.565, 95.5375, 93.73356, 95.36308, + 66.30762, 58.0397, 18.951357, 67.11702, 43.043316, 30.65622, 99.85361, 2.5889993, + 27.844774, 39.72441, 46.463238, 71.303764, 90.45308, 36.390602, 63.344395, + 26.427078, 35.99528, 82.35505, 32.529175, 23.165905, 74.73179, 9.856939, 59.38126, + 35.714924, 79.81213, 46.704124, 24.47884, 36.01743, 0.46678782, 29.528152, + 1.8980742, 24.68853, 75.58984, 98.72279, 68.62601, 11.890173, 49.49361, 55.45572, + 72.71067, 34.107483, 51.357758, 76.400635, 81.32725, 66.45081, 17.848074, + 62.398876, 94.20444, 2.10886, 17.416393, 64.88253, 29.000723, 62.434315, 53.907238, + 70.51412, 78.70744, 55.181683, 64.45116, 23.419212, 53.68544, 43.506958, 46.89598, + 35.905994, 64.51397, 91.95555, 20.322979, 74.80128, 97.548744, 58.312725, 78.81985, + 31.911612, 14.445949, 49.85094, 70.87396, 40.06766, 7.129991, 78.48008, 75.21636, + 93.623604, 95.95479, 29.571129, 22.721554, 26.73875, 52.075504, 56.783104, + 94.65493, 61.778534, 85.72401, 85.369514, 29.922367, 41.410553, 94.12884, + 80.276855, 55.604828, 54.70947, 74.07216, 44.61955, 31.38113, 68.48596, 34.56782, + 14.424729, 48.204506, 9.675444, 32.01946, 92.32695, 36.292683, 78.31955, 98.05327, + 14.343918, 46.017002, 95.90888, 82.63626, 16.873539, 3.698051, 7.8042626, + 64.194405, 96.71023, 67.93692, 21.618402, 51.92182, 22.834194, 61.56986, 19.749891, + 55.31206, 38.29552, 67.57593, 67.145836, 38.92673, 94.95708, 72.38746, 90.70901, + 69.43995, 9.394085, 31.646872, 88.20112, 9.134722, 99.98214, 5.423498, 41.51995, + 76.94409, 77.373276, 3.2966614, 9.611201, 57.231106, 30.747868, 76.10228, 91.98308, + 70.893585, 0.9067178, 43.96515, 16.321218, 27.734184, 83.271835, 88.23312, + 87.16445, 5.556643, 15.627432, 58.547127, 93.6459, 40.539192, 49.124157, 91.13276, + 57.485855, 8.827019, 4.9690843, 46.511234, 53.91469, 97.71925, 20.135271, + 23.353004, 70.92099, 93.38748, 87.520134, 51.684677, 29.89813, 9.110392, 65.809204, + 34.16554, 93.398605, 84.58669, 96.409645, 9.876037, 94.767784, 99.21523, 1.9330144, + 94.92429, 75.12728, 17.218828, 97.89164, 35.476578, 77.629456, 69.573746, + 40.200542, 42.117836, 5.861628, 75.45282, 82.73633, 0.98086596, 77.24894, + 11.248695, 61.070026, 52.692616, 80.5449, 80.76036, 29.270136, 67.60252, 48.782394, + 95.18851, 83.47162, 52.068756, 46.66002, 90.12216, 15.515327, 33.694042, 96.963036, + 73.49627, 62.805485, 44.715607, 59.98627, 3.8921833, 37.565327, 29.69184, + 39.429665, 83.46899, 44.286453, 21.54851, 56.096413, 18.169249, 5.214751, + 14.691341, 99.779335, 26.32643, 67.69903, 36.41243, 67.27333, 12.157213, 96.18984, + 2.438283, 78.14289, 0.14715195, 98.769, 53.649532, 21.615898, 39.657497, 95.45616, + 18.578386, 71.47976, 22.348118, 17.85519, 6.3717127, 62.176777, 22.033644, + 23.178005, 79.44858, 89.70233, 37.21273, 71.86182, 21.284317, 52.908623, 30.095518, + 63.64478, 77.55823, 80.04871, 15.133011, 30.439043, 70.16561, 4.4014096, 89.28944, + 26.29093, 46.827854, 11.764729, 61.887516, 47.774887, 57.19503, 59.444664, + 28.592825, 98.70386, 1.2497544, 82.28431, 46.76423, 83.746124, 53.032673, 86.53457, + 99.42168, 90.184, 92.27852, 9.059965, 71.75723, 70.45299, 10.924053, 68.329704, + 77.27232, 6.677854, 75.63629, 57.370533, 17.09031, 10.554659, 99.56178, 37.53221, + 72.311104, 75.7565, 65.2042, 36.096478, 64.69502, 38.88497, 64.33723, 84.87812, + 66.84958, 8.508932, 79.134, 83.431015, 66.72124, 61.801838, 64.30524, 37.194263, + 77.94725, 89.705185, 23.643505, 19.505919, 48.40264, 43.01083, 21.171177, + 18.717121, 10.805857, 69.66983, 77.85261, 57.323063, 3.28964, 38.758026, 5.349946, + 7.46572, 57.485138, 30.822384, 33.9411, 95.53746, 65.57723, 42.1077, 28.591347, + 11.917269, 5.031073, 31.835615, 19.34116, 85.71027, 87.4516, 1.3798475, 70.70583, + 51.988052, 45.217144, 14.308596, 54.557167, 86.18323, 79.13666, 76.866745, + 46.010685, 79.739235, 44.667603, 39.36416, 72.605896, 73.83187, 13.137412, + 6.7911267, 63.952374, 10.082436, 86.00318, 99.760376, 92.84948, 63.786434, + 3.4429908, 18.244314, 75.65299, 14.964747, 70.126366, 80.89449, 91.266655, + 96.58798, 46.439327, 38.253975, 87.31036, 21.093178, 37.19671, 58.28973, 9.75231, + 12.350321, 25.75115, 87.65073, 53.610504, 36.850048, 18.66356, 94.48941, 83.71898, + 44.49315, 44.186737, 19.360733, 84.365974, 46.76272, 44.924366, 50.279808, + 54.868866, 91.33004, 18.683397, 75.13282, 15.070831, 47.04839, 53.780903, + 26.911152, 74.65651, 57.659935, 25.604189, 37.235474, 65.39667, 53.952206, + 40.37131, 59.173275, 96.00756, 54.591274, 10.787476, 69.51549, 31.970142, + 25.408005, 55.972492, 85.01888, 97.48981, 91.006134, 28.98619, 97.151276, + 34.388496, 47.498177, 11.985874, 64.73775, 33.877014, 13.370312, 34.79146, + 86.19321, 15.019405, 94.07832, 93.50433, 60.168625, 50.95409, 38.27827, 47.458614, + 32.83715, 69.54998, 69.0361, 84.1418, 34.270298, 74.23852, 70.707466, 78.59845, + 9.651399, 24.186779, 58.255756, 53.72362, 92.46477, 97.75528, 20.257462, 30.122698, + 50.41517, 28.156603, 42.644154, + ], + }); + + let distance = compare::(256, Metric::L2, &two_vec.v); + + assert_eq!(distance, 429141.2); + } + + fn compare(dim: usize, metric: Metric, v: &[f32]) -> f32 + where + for<'a> [T; N]: FullPrecisionDistance, + { + let a_ptr = v.as_ptr(); + let b_ptr = unsafe { a_ptr.add(dim) }; + + let a_ref = + <&[f32; N]>::try_from(unsafe { std::slice::from_raw_parts(a_ptr, dim) }).unwrap(); + let b_ref = + <&[f32; N]>::try_from(unsafe { std::slice::from_raw_parts(b_ptr, dim) }).unwrap(); + + <[f32; N]>::distance_compare(a_ref, b_ref, metric) + } +} diff --git a/packages/leann-backend-diskann/third_party/DiskANN/rust/vector/src/distance_test.rs b/packages/leann-backend-diskann/third_party/DiskANN/rust/vector/src/distance_test.rs new file mode 100644 index 0000000..0def026 --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/rust/vector/src/distance_test.rs @@ -0,0 +1,152 @@ +/* + * Copyright (c) Microsoft Corporation. All rights reserved. + * Licensed under the MIT license. + */ +#[cfg(test)] +mod e2e_test { + + #[repr(C, align(32))] + pub struct F32Slice104([f32; 104]); + + #[repr(C, align(32))] + pub struct F16Slice104([Half; 104]); + + use approx::assert_abs_diff_eq; + + use crate::half::Half; + use crate::l2_float_distance::{distance_l2_vector_f16, distance_l2_vector_f32}; + + fn no_vector_compare_f32(a: &[f32], b: &[f32]) -> f32 { + let mut sum = 0.0; + for i in 0..a.len() { + let a_f32 = a[i]; + let b_f32 = b[i]; + let diff = a_f32 - b_f32; + sum += diff * diff; + } + sum + } + + fn no_vector_compare(a: &[Half], b: &[Half]) -> f32 { + let mut sum = 0.0; + for i in 0..a.len() { + let a_f32 = a[i].to_f32(); + let b_f32 = b[i].to_f32(); + let diff = a_f32 - b_f32; + sum += diff * diff; + } + sum + } + + #[test] + fn avx2_matches_novector() { + for i in 1..3 { + let (f1, f2) = get_test_data(0, i); + + let distance_f32x8 = distance_l2_vector_f32::<104>(&f1.0, &f2.0); + let distance = no_vector_compare_f32(&f1.0, &f2.0); + + assert_abs_diff_eq!(distance, distance_f32x8, epsilon = 1e-6); + } + } + + #[test] + fn avx2_matches_novector_random() { + let (f1, f2) = get_test_data_random(); + + let distance_f32x8 = distance_l2_vector_f32::<104>(&f1.0, &f2.0); + let distance = no_vector_compare_f32(&f1.0, &f2.0); + + assert_abs_diff_eq!(distance, distance_f32x8, epsilon = 1e-4); + } + + #[test] + fn avx_f16_matches_novector() { + for i in 1..3 { + let (f1, f2) = get_test_data_f16(0, i); + let _a_slice = f1.0.map(|x| x.to_f32().to_string()).join(", "); + let _b_slice = f2.0.map(|x| x.to_f32().to_string()).join(", "); + + let expected = no_vector_compare(f1.0[0..].as_ref(), f2.0[0..].as_ref()); + let distance_f16x8 = distance_l2_vector_f16::<104>(&f1.0, &f2.0); + + assert_abs_diff_eq!(distance_f16x8, expected, epsilon = 1e-4); + } + } + + #[test] + fn avx_f16_matches_novector_random() { + let (f1, f2) = get_test_data_f16_random(); + + let expected = no_vector_compare(f1.0[0..].as_ref(), f2.0[0..].as_ref()); + let distance_f16x8 = distance_l2_vector_f16::<104>(&f1.0, &f2.0); + + assert_abs_diff_eq!(distance_f16x8, expected, epsilon = 1e-4); + } + + fn get_test_data_f16(i1: usize, i2: usize) -> (F16Slice104, F16Slice104) { + let (a_slice, b_slice) = get_test_data(i1, i2); + let a_data = a_slice.0.iter().map(|x| Half::from_f32(*x)); + let b_data = b_slice.0.iter().map(|x| Half::from_f32(*x)); + + ( + F16Slice104(a_data.collect::>().try_into().unwrap()), + F16Slice104(b_data.collect::>().try_into().unwrap()), + ) + } + + fn get_test_data(i1: usize, i2: usize) -> (F32Slice104, F32Slice104) { + use base64::{engine::general_purpose, Engine as _}; + + let b64 = general_purpose::STANDARD.decode(TEST_DATA).unwrap(); + + let decoded: Vec> = bincode::deserialize(&b64).unwrap(); + debug_assert!(decoded.len() > i1); + debug_assert!(decoded.len() > i2); + + let mut f1 = F32Slice104([0.0; 104]); + let v1 = &decoded[i1]; + debug_assert!(v1.len() == 104); + f1.0.copy_from_slice(v1); + + let mut f2 = F32Slice104([0.0; 104]); + let v2 = &decoded[i2]; + debug_assert!(v2.len() == 104); + f2.0.copy_from_slice(v2); + + (f1, f2) + } + + fn get_test_data_f16_random() -> (F16Slice104, F16Slice104) { + let (a_slice, b_slice) = get_test_data_random(); + let a_data = a_slice.0.iter().map(|x| Half::from_f32(*x)); + let b_data = b_slice.0.iter().map(|x| Half::from_f32(*x)); + + ( + F16Slice104(a_data.collect::>().try_into().unwrap()), + F16Slice104(b_data.collect::>().try_into().unwrap()), + ) + } + + fn get_test_data_random() -> (F32Slice104, F32Slice104) { + use rand::Rng; + + let mut rng = rand::thread_rng(); + let mut f1 = F32Slice104([0.0; 104]); + + for i in 0..104 { + f1.0[i] = rng.gen_range(-1.0..1.0); + } + + let mut f2 = F32Slice104([0.0; 104]); + + for i in 0..104 { + f2.0[i] = rng.gen_range(-1.0..1.0); + } + + (f1, f2) + } + + const TEST_DATA: &str = "BQAAAAAAAABoAAAAAAAAAPz3Dj7+VgG9z/DDvQkgiT2GryK+nwS4PTeBorz4jpk9ELEqPKKeX73zZrA9uAlRvSqpKT7Gft28LsTuO8XOHL6/lCg+pW/6vJhM7j1fInU+yaSTPC2AAb5T25M8o2YTvWgEAz00cnq8xcUlPPvnBb2AGfk9UmhCvbdUJzwH4jK9UH7Lvdklhz3SoEa+NwsIvt2yYb4q7JA8d4fVvfX/kbtDOJe9boXevbw2CT7n62A9B6hOPlfeNz7CO169vnjcvR3pDz6KZxC+XR/2vTd9PTx7YY492FF2PekiGDt3OSw9IIlGPQooMj5DZcY8EgQgvpg9572paca91GQTPoWpFr7U+t697YAQPYHUXr1d8ow8AQE7PFo6JD3tt+I96ahxvYuvlD3+IW29N4Jtu2/01Ltvvg2+dja+vI8uazvITZO9mXhavpfJ6T2tB8S7OKT3PWWjpj0Mjty9advIPFgucTp3JO69CI6YPaWoDD5pwim9rjUovh2qgr3R/lq+nUi3PI+acL041o081D8lvRCJLTwAAAAAAAAAAAAAAAAAAAAAaAAAAAAAAAA6pJO94NE1voDn+rzQ8CY+1rxkvtspaz0xTPw7+0GMvC0ZgbyWwdy8zHcovKdvdb70BLC8DtHKvdK6vz0R9Ys7vBWyvZK1LL0ehYM9aV+JveuvoD2ilvo9NLJ4vbRnPT4MXAW+BhG4POOBaD0Vz5I9s1+1vTUdHb7Kjcw9uVUJvdbgoj3TbBe8WwPSvYoBBj4m6c+9xTXTvVTDaL28+Ac9KtA0Pa3tS73Vq5S8fNLkvf/Gir0yILy9ZYR3vvUdUD2ZB5W9rHI4PXS76L070oG9EsjYPb89S75pz7Q9xFKyvZ5ECT0kDSU+l4AQPsQVqzyq/LW95ZCZPC6nQj0VIBa9XwkhPr1gy72c7mw937XXvQ76ur3sRok9mCUqPXHvgj28jV89LZN8O0eH0T0KMdq9ZzXevYbmPr0fcac8r7j3vYmKCL4Sewm+iLtRviuOjz08XbE9LlYevDI1wz0s7z278oVJvtpjrT20IEU9+mTtvBjMQz1H9Ey+LQEXva1Rwrxmyts9sf1hPRY3xL3RdRU+AAAAAAAAAAAAAAAAAAAAAGgAAAAAAAAARqSTvbYJpLx1x869cW67PeeJhb7/cBu9m0eFPQO3oL0I+L49YQDavTYSez3SmTg96hBGPuh4oL2x2ow6WdCUO6XUSz4xcU88GReAvVfekj0Ph3Y9z43hvBzT5z1I2my9UVy3vAj8jL08Gtm9CfJcPRihTr1+8Yu9TiP+PNrJa77Dfa09IhpEPesJNr0XzFU8yye3PZKFyz3uzJ09FLRUvYq3l73X4X07DDUzvq9VXjwWtg8+JrzYPcFCkr0jDCg9T9zlvZbZjz4Y8pM89xo8PgAcfbvYSnY8XoFKvO05/L36yzE8J+5yPqfe5r2AZFq8ULRDvnkTgrw+S7q9qGYLvQDZYL1T8d09bFikvZw3+jsYLdO8H3GVveHBYT4gnsE8ZBIJPpzOEj7OSDC+ZYu+vFc1Erzko4M9GqLtPBHH5TwpeRs+miC4PBHH5Tw9Z9k9VUsUPjnppj0oC5C9mcqDvY7y1rxdvZU8PdFAPov9lz0bOmq94kdyPBBokTxtOj89fu4avSsazj1P7iE+x8YkPAAAAAAAAAAAAAAAAAAAAABoAAAAAAAAAHEruT3mgKM8JnEvvAsfHL63906+ifhgvldl1r14OeO9waUyuw3yUzx+PDW9UbDhPQP4Lb4KRRk+Oky2vaLfaT30mrA9YMeZPfzPMz4h42M+XfCHva4AGr6MOSM+iBOzvdsaE7xFxgI+gJGXvVMzE75kHY+8oAWNvVqNK7yOx589fU3lvVVPg730Cwk+DKkEPWYtxjqQ2MK9H0T+vTnGQj2yq5w8L49BvrEJrzyB4Yo9AXV7PYGCLr3MxsG9oWM7PTyu8TzEOhW+dyWrvUTxHD2nL+c9+VKFPcthhLsc0PM8FdyPPeLj/z1WAHS8ZvW2PGg4Cb5u3IU9g4CovSHW+L2CWoG++nZnPAi2ST3HmUC9P5rJuxQbU765lwU+7FLBPUPTfL0uGgk+yKy2PYwXaT1I4I+9AU6VPQ5QaDx9mdE8Qg8zPfGCUjzD/io9rr+BvTNDqT0MFNi9mHatvS1iJD0nVrK78WmIPE0QsL3PAQq9cMRgPWXmmr3yTcw9UcXrPccwa76+cBq+5iVOvUg9c70AAAAAAAAAAAAAAAAAAAAAaAAAAAAAAAB/K7k9hCsnPUJXJr2Wg4a9MEtXve33Sj0VJZ89pciEvWLqwLzUgyu8ADTGPAVenL2UZ/c96YtMved+Wr3LUro9H8a7vGTSA77C5n69Lf3pPQj4KD5cFKq9fZ0uvvYQCT7b23G9XGMCPrGuy736Z9A9kZzFPSuCSD7/9/07Y4/6POxLir3/JBS9qFKMvkSzjryPgVY+ugq8PC9yhbsXaiq+O6WfPcvFK7vZXAy+goAQvXpHHj5jwPI87eokvrySET5QoOm8h8ixOhXzKb5s8+A9sjcJPjiLAz598yQ9yCYSPq6eGz4rvjE82lvGvWuIOLx23zK9hHg8vTWOv70/Tse81fA6Pr2wNz34Eza+2Uj3PZ3trr0aXAI9PCkKPiybe721P9U9QkNLO927jT3LpRA+mpJUvUeU6rwC/Qa+lr4Cvgrpnj1pQ/i9TxhSvJqYr72RS6y8aQLTPQzPiz3vSRY94NfrPJl6LL2adjO8iYfPuhRzZz2f7R8+iVskPcUeXr12ZiI+nd3xvIYv8bwqYlg+AAAAAAAAAAAAAAAAAAAAAA=="; +} + diff --git a/packages/leann-backend-diskann/third_party/DiskANN/rust/vector/src/half.rs b/packages/leann-backend-diskann/third_party/DiskANN/rust/vector/src/half.rs new file mode 100644 index 0000000..87d7df6 --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/rust/vector/src/half.rs @@ -0,0 +1,82 @@ +/* + * Copyright (c) Microsoft Corporation. All rights reserved. + * Licensed under the MIT license. + */ +use bytemuck::{Pod, Zeroable}; +use half::f16; +use std::convert::AsRef; +use std::fmt; + +// Define the Half type as a new type over f16. +// the memory layout of the Half struct will be the same as the memory layout of the f16 type itself. +// The Half struct serves as a simple wrapper around the f16 type and does not introduce any additional memory overhead. +// Test function: +// use half::f16; +// pub struct Half(f16); +// fn main() { +// let size_of_half = std::mem::size_of::(); +// let alignment_of_half = std::mem::align_of::(); +// println!("Size of Half: {} bytes", size_of_half); +// println!("Alignment of Half: {} bytes", alignment_of_half); +// } +// Output: +// Size of Half: 2 bytes +// Alignment of Half: 2 bytes +pub struct Half(f16); + +unsafe impl Pod for Half {} +unsafe impl Zeroable for Half {} + +// Implement From for Half +impl From for f32 { + fn from(val: Half) -> Self { + val.0.to_f32() + } +} + +// Implement AsRef for Half so that it can be used in distance_compare. +impl AsRef for Half { + fn as_ref(&self) -> &f16 { + &self.0 + } +} + +// Implement From for Half. +impl Half { + pub fn from_f32(value: f32) -> Self { + Self(f16::from_f32(value)) + } +} + +// Implement Default for Half. +impl Default for Half { + fn default() -> Self { + Self(f16::from_f32(Default::default())) + } +} + +// Implement Clone for Half. +impl Clone for Half { + fn clone(&self) -> Self { + Half(self.0) + } +} + +// Implement PartialEq for Half. +impl fmt::Debug for Half { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "Half({:?})", self.0) + } +} + +impl Copy for Half {} + +impl Half { + pub fn to_f32(&self) -> f32 { + self.0.to_f32() + } +} + +unsafe impl Send for Half {} +unsafe impl Sync for Half {} + diff --git a/packages/leann-backend-diskann/third_party/DiskANN/rust/vector/src/l2_float_distance.rs b/packages/leann-backend-diskann/third_party/DiskANN/rust/vector/src/l2_float_distance.rs new file mode 100644 index 0000000..b818899 --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/rust/vector/src/l2_float_distance.rs @@ -0,0 +1,78 @@ +/* + * Copyright (c) Microsoft Corporation. All rights reserved. + * Licensed under the MIT license. + */ +#![warn(missing_debug_implementations, missing_docs)] + +//! Distance calculation for L2 Metric + +#[cfg(not(target_feature = "avx2"))] +compile_error!("Library must be compiled with -C target-feature=+avx2"); + +use std::arch::x86_64::*; + +use crate::Half; + +/// Calculate the distance by vector arithmetic +#[inline(never)] +pub fn distance_l2_vector_f16(a: &[Half; N], b: &[Half; N]) -> f32 { + debug_assert_eq!(N % 8, 0); + + // make sure the addresses are bytes aligned + debug_assert_eq!(a.as_ptr().align_offset(32), 0); + debug_assert_eq!(b.as_ptr().align_offset(32), 0); + + unsafe { + let mut sum = _mm256_setzero_ps(); + let a_ptr = a.as_ptr() as *const __m128i; + let b_ptr = b.as_ptr() as *const __m128i; + + // Iterate over the elements in steps of 8 + for i in (0..N).step_by(8) { + let a_vec = _mm256_cvtph_ps(_mm_load_si128(a_ptr.add(i / 8))); + let b_vec = _mm256_cvtph_ps(_mm_load_si128(b_ptr.add(i / 8))); + + let diff = _mm256_sub_ps(a_vec, b_vec); + sum = _mm256_fmadd_ps(diff, diff, sum); + } + + let x128: __m128 = _mm_add_ps(_mm256_extractf128_ps(sum, 1), _mm256_castps256_ps128(sum)); + /* ( -, -, x1+x3+x5+x7, x0+x2+x4+x6 ) */ + let x64: __m128 = _mm_add_ps(x128, _mm_movehl_ps(x128, x128)); + /* ( -, -, -, x0+x1+x2+x3+x4+x5+x6+x7 ) */ + let x32: __m128 = _mm_add_ss(x64, _mm_shuffle_ps(x64, x64, 0x55)); + /* Conversion to float is a no-op on x86-64 */ + _mm_cvtss_f32(x32) + } +} + +/// Calculate the distance by vector arithmetic +#[inline(never)] +pub fn distance_l2_vector_f32(a: &[f32; N], b: &[f32; N]) -> f32 { + debug_assert_eq!(N % 8, 0); + + // make sure the addresses are bytes aligned + debug_assert_eq!(a.as_ptr().align_offset(32), 0); + debug_assert_eq!(b.as_ptr().align_offset(32), 0); + + unsafe { + let mut sum = _mm256_setzero_ps(); + + // Iterate over the elements in steps of 8 + for i in (0..N).step_by(8) { + let a_vec = _mm256_load_ps(&a[i]); + let b_vec = _mm256_load_ps(&b[i]); + let diff = _mm256_sub_ps(a_vec, b_vec); + sum = _mm256_fmadd_ps(diff, diff, sum); + } + + let x128: __m128 = _mm_add_ps(_mm256_extractf128_ps(sum, 1), _mm256_castps256_ps128(sum)); + /* ( -, -, x1+x3+x5+x7, x0+x2+x4+x6 ) */ + let x64: __m128 = _mm_add_ps(x128, _mm_movehl_ps(x128, x128)); + /* ( -, -, -, x0+x1+x2+x3+x4+x5+x6+x7 ) */ + let x32: __m128 = _mm_add_ss(x64, _mm_shuffle_ps(x64, x64, 0x55)); + /* Conversion to float is a no-op on x86-64 */ + _mm_cvtss_f32(x32) + } +} + diff --git a/packages/leann-backend-diskann/third_party/DiskANN/rust/vector/src/lib.rs b/packages/leann-backend-diskann/third_party/DiskANN/rust/vector/src/lib.rs new file mode 100644 index 0000000..d221070 --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/rust/vector/src/lib.rs @@ -0,0 +1,26 @@ +/* + * Copyright (c) Microsoft Corporation. All rights reserved. + * Licensed under the MIT license. + */ +#![cfg_attr( + not(test), + warn(clippy::panic, clippy::unwrap_used, clippy::expect_used) +)] + +// #![feature(stdsimd)] +// mod f32x16; +// Uncomment above 2 to experiment with f32x16 +mod distance; +mod half; +mod l2_float_distance; +mod metric; +mod utils; + +pub use crate::half::Half; +pub use distance::FullPrecisionDistance; +pub use metric::Metric; +pub use utils::prefetch_vector; + +#[cfg(test)] +mod distance_test; +mod test_util; diff --git a/packages/leann-backend-diskann/third_party/DiskANN/rust/vector/src/metric.rs b/packages/leann-backend-diskann/third_party/DiskANN/rust/vector/src/metric.rs new file mode 100644 index 0000000..c60ef29 --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/rust/vector/src/metric.rs @@ -0,0 +1,36 @@ +/* + * Copyright (c) Microsoft Corporation. All rights reserved. + * Licensed under the MIT license. + */ +#![warn(missing_debug_implementations, missing_docs)] +use std::str::FromStr; + +/// Distance metric +#[derive(Debug, PartialEq, Eq, Clone, Copy)] +pub enum Metric { + /// Squared Euclidean (L2-Squared) + L2, + + /// Cosine similarity + /// TODO: T should be float for Cosine distance + Cosine, +} + +#[derive(thiserror::Error, Debug)] +pub enum ParseMetricError { + #[error("Invalid format for Metric: {0}")] + InvalidFormat(String), +} + +impl FromStr for Metric { + type Err = ParseMetricError; + + fn from_str(s: &str) -> Result { + match s.to_lowercase().as_str() { + "l2" => Ok(Metric::L2), + "cosine" => Ok(Metric::Cosine), + _ => Err(ParseMetricError::InvalidFormat(String::from(s))), + } + } +} + diff --git a/packages/leann-backend-diskann/third_party/DiskANN/rust/vector/src/test_util.rs b/packages/leann-backend-diskann/third_party/DiskANN/rust/vector/src/test_util.rs new file mode 100644 index 0000000..7cfc929 --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/rust/vector/src/test_util.rs @@ -0,0 +1,29 @@ +/* + * Copyright (c) Microsoft Corporation. All rights reserved. + * Licensed under the MIT license. + */ +#[cfg(test)] +use crate::Half; + +#[cfg(test)] +pub fn no_vector_compare_f16(a: &[Half], b: &[Half]) -> f32 { + let mut sum = 0.0; + debug_assert_eq!(a.len(), b.len()); + + for i in 0..a.len() { + sum += (a[i].to_f32() - b[i].to_f32()).powi(2); + } + sum +} + +#[cfg(test)] +pub fn no_vector_compare_f32(a: &[f32], b: &[f32]) -> f32 { + let mut sum = 0.0; + debug_assert_eq!(a.len(), b.len()); + + for i in 0..a.len() { + sum += (a[i] - b[i]).powi(2); + } + sum +} + diff --git a/packages/leann-backend-diskann/third_party/DiskANN/rust/vector/src/utils.rs b/packages/leann-backend-diskann/third_party/DiskANN/rust/vector/src/utils.rs new file mode 100644 index 0000000..a61c99a --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/rust/vector/src/utils.rs @@ -0,0 +1,21 @@ +/* + * Copyright (c) Microsoft Corporation. All rights reserved. + * Licensed under the MIT license. + */ +use std::arch::x86_64::{_mm_prefetch, _MM_HINT_T0}; + +/// Prefetch the given vector in chunks of 64 bytes, which is a cache line size +/// NOTE: good efficiency when total_vec_size is integral multiple of 64 +#[inline] +pub fn prefetch_vector(vec: &[T]) { + let vec_ptr = vec.as_ptr() as *const i8; + let vecsize = std::mem::size_of_val(vec); + let max_prefetch_size = (vecsize / 64) * 64; + + for d in (0..max_prefetch_size).step_by(64) { + unsafe { + _mm_prefetch(vec_ptr.add(d), _MM_HINT_T0); + } + } +} + diff --git a/packages/leann-backend-diskann/third_party/DiskANN/rust/vector_base64/Cargo.toml b/packages/leann-backend-diskann/third_party/DiskANN/rust/vector_base64/Cargo.toml new file mode 100644 index 0000000..6f50ad9 --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/rust/vector_base64/Cargo.toml @@ -0,0 +1,15 @@ +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT license. +[package] +name = "vector_base64" +version = "0.1.0" +edition = "2021" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +base64 = "0.21.2" +bincode = "1.3.3" +half = "2.2.1" +serde = "1.0.163" + diff --git a/packages/leann-backend-diskann/third_party/DiskANN/rust/vector_base64/src/main.rs b/packages/leann-backend-diskann/third_party/DiskANN/rust/vector_base64/src/main.rs new file mode 100644 index 0000000..2867436 --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/rust/vector_base64/src/main.rs @@ -0,0 +1,82 @@ +/* + * Copyright (c) Microsoft Corporation. All rights reserved. + * Licensed under the MIT license. + */ +use std::fs::File; +use std::io::{self, BufReader, Read}; +use std::{env, vec}; + +fn main() -> io::Result<()> { + // Retrieve command-line arguments + let args: Vec = env::args().collect(); + + // Check if the correct number of arguments is provided + if args.len() != 4 { + print_usage(); + return Ok(()); + } + + // Retrieve the input and output file paths from the arguments + let input_file_path = &args[1]; + let item_count: usize = args[2].parse::().unwrap(); + let return_dimension: usize = args[3].parse::().unwrap(); + + // Open the input file for reading + let mut input_file = BufReader::new(File::open(input_file_path)?); + + // Read the first 8 bytes as metadata + let mut metadata = [0; 8]; + input_file.read_exact(&mut metadata)?; + + // Extract the number of points and dimension from the metadata + let _ = i32::from_le_bytes(metadata[..4].try_into().unwrap()); + let mut dimension: usize = (i32::from_le_bytes(metadata[4..].try_into().unwrap())) as usize; + if return_dimension < dimension { + dimension = return_dimension; + } + + let mut float_array = Vec::>::with_capacity(item_count); + + // Process each data point + for _ in 0..item_count { + // Read one data point from the input file + let mut buffer = vec![0; dimension * std::mem::size_of::()]; + match input_file.read_exact(&mut buffer) { + Ok(()) => { + let mut float_data = buffer + .chunks_exact(4) + .map(|chunk| f32::from_le_bytes([chunk[0], chunk[1], chunk[2], chunk[3]])) + .collect::>(); + + let mut i = return_dimension; + while i > dimension { + float_data.push(0.0); + i -= 1; + } + + float_array.push(float_data); + } + Err(err) => { + println!("Error: {}", err); + break; + } + } + } + + use base64::{engine::general_purpose, Engine as _}; + + let encoded: Vec = bincode::serialize(&float_array).unwrap(); + let b64 = general_purpose::STANDARD.encode(encoded); + println!("Float {}", b64); + + Ok(()) +} + +/// Prints the usage information +fn print_usage() { + println!("Usage: program_name input_file "); + println!( + "Itemcount is the number of items to convert. Expand to dimension if provided is smaller" + ); +} + diff --git a/packages/leann-backend-diskann/third_party/DiskANN/setup.py b/packages/leann-backend-diskann/third_party/DiskANN/setup.py new file mode 100644 index 0000000..01184f8 --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/setup.py @@ -0,0 +1,176 @@ +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT license. + +import os +import re +import shutil +import subprocess +import sys +from pathlib import Path + +from setuptools import Extension, setup +from setuptools.command.build_ext import build_ext +from setuptools.command.install_lib import install_lib + +# Convert distutils Windows platform specifiers to CMake -A arguments +PLAT_TO_CMAKE = { + "win-amd64": "x64" +} + + +class CMakeExtension(Extension): + def __init__(self, name: str, sourcedir: str = "") -> None: + super().__init__(name, sources=[]) + self.sourcedir = os.fspath(Path(sourcedir).resolve()) + + +class CMakeBuild(build_ext): + def build_extension(self, ext: CMakeExtension) -> None: + # Must be in this form due to bug in .resolve() only fixed in Python 3.10+ + ext_fullpath = Path.cwd() / self.get_ext_fullpath(ext.name) # type: ignore[no-untyped-call] + extdir = ext_fullpath.parent.resolve() + # Using this requires trailing slash for auto-detection & inclusion of + # auxiliary "native" libs + + debug = int(os.environ.get("DEBUG", 0)) if self.debug is None else self.debug + cfg = "Debug" if debug else "Release" + + # CMake lets you override the generator - we need to check this. + # Can be set with Conda-Build, for example. + cmake_generator = os.environ.get("CMAKE_GENERATOR", "") + + # Set Python_EXECUTABLE instead if you use PYBIND11_FINDPYTHON + # EXAMPLE_VERSION_INFO shows you how to pass a value into the C++ code + # from Python. + cmake_args = [ + f"-DCMAKE_LIBRARY_OUTPUT_DIRECTORY={extdir}{os.sep}", + f"-DPYTHON_EXECUTABLE={sys.executable}", + f"-DCMAKE_BUILD_TYPE={cfg}", # not used on MSVC, but no harm + f"-DVERSION_INFO={self.distribution.get_version()}" # commented out, we want this set in the CMake file + ] + build_args = [] + # Adding CMake arguments set as environment variable + # (needed e.g. to build for ARM OSx on conda-forge) + if "CMAKE_ARGS" in os.environ: + cmake_args += [item for item in os.environ["CMAKE_ARGS"].split(" ") if item] + + # In this example, we pass in the version to C++. You might not need to. + # cmake_args += [f"-DVERSION_INFO={self.distribution.get_version()}"] # type: ignore[attr-defined] + + if self.compiler.compiler_type != "msvc": + # Using Ninja-build since it a) is available as a wheel and b) + # multithreads automatically. MSVC would require all variables be + # exported for Ninja to pick it up, which is a little tricky to do. + # Users can override the generator with CMAKE_GENERATOR in CMake + # 3.15+. + if not cmake_generator or cmake_generator == "Ninja": + try: + import ninja # noqa: F401 + + ninja_executable_path = Path(ninja.BIN_DIR) / "ninja" + cmake_args += [ + "-GNinja", + f"-DCMAKE_MAKE_PROGRAM:FILEPATH={ninja_executable_path}", + ] + except ImportError: + pass + + else: + + # Single config generators are handled "normally" + single_config = any(x in cmake_generator for x in {"NMake", "Ninja"}) + + # CMake allows an arch-in-generator style for backward compatibility + contains_arch = any(x in cmake_generator for x in {"ARM", "Win64"}) + + # Specify the arch if using MSVC generator, but only if it doesn't + # contain a backward-compatibility arch spec already in the + # generator name. + if not single_config and not contains_arch: + cmake_args += ["-A", PLAT_TO_CMAKE[self.plat_name]] + + # Multi-config generators have a different way to specify configs + if not single_config: + cmake_args += [ + f"-DCMAKE_LIBRARY_OUTPUT_DIRECTORY_{cfg.upper()}={extdir}" + ] + build_args += ["--config", cfg] + + if sys.platform.startswith("darwin"): + # Cross-compile support for macOS - respect ARCHFLAGS if set + archs = re.findall(r"-arch (\S+)", os.environ.get("ARCHFLAGS", "")) + if archs: + cmake_args += ["-DCMAKE_OSX_ARCHITECTURES={}".format(";".join(archs))] + + # Set CMAKE_BUILD_PARALLEL_LEVEL to control the parallel build level + # across all generators. + if "CMAKE_BUILD_PARALLEL_LEVEL" not in os.environ: + # self.parallel is a Python 3 only way to set parallel jobs by hand + # using -j in the build_ext call, not supported by pip or PyPA-build. + if hasattr(self, "parallel") and self.parallel: + # CMake 3.12+ only. + build_args += [f"-j{self.parallel}"] + + build_temp = Path(self.build_temp) / ext.name + if not build_temp.exists(): + build_temp.mkdir(parents=True) + + # this next line is problematic. we tell it to use the ext.sourcedir but, when + # using `python -m build`, we actually have a copy of everything made and pushed + # into a venv isolation area + if os.environ.get("USE_CONDA", "") == '1' and os.environ.get("CONDA_PREFIX", "") != "": + subprocess.run( + ["cmake", "-DPYBIND=True", "-DCMAKE_PREFIX_PATH=" + os.environ.get("CONDA_PREFIX", ""), + "-DProtobuf_DIR=" + os.path.join(os.environ.get("CONDA_PREFIX", ""), "lib/cmake/protobuf"), + ext.sourcedir] + cmake_args, cwd=build_temp, check=True + ) + else: + subprocess.run( + ["cmake", "-DPYBIND=True", ext.sourcedir] + cmake_args, cwd=build_temp, check=True + ) + + subprocess.run( + ["cmake", "--build", "."] + build_args, cwd=build_temp, check=True + ) + + +class InstallCMakeLibs(install_lib): + def run(self): + """ + Windows only copy from the x64/Release directory and place them in the package + """ + + self.announce("Moving library files", level=3) + + self.skip_build = True + + # we only need to move the windows build output + windows_build_output_dir = Path('.') / 'x64' / 'Release' + + if windows_build_output_dir.exists(): + libs = [ + os.path.join(windows_build_output_dir, _lib) for _lib in + os.listdir(windows_build_output_dir) if + os.path.isfile(os.path.join(windows_build_output_dir, _lib)) and + os.path.splitext(_lib)[1] in [".dll", '.lib', '.pyd', '.exp'] + ] + + for lib in libs: + shutil.move( + lib, + os.path.join(self.build_dir, 'diskannpy', os.path.basename(lib)) + ) + + super().run() + + +setup( + ext_modules=[CMakeExtension("diskannpy._diskannpy", ".")], + cmdclass={ + "build_ext": CMakeBuild, + 'install_lib': InstallCMakeLibs + }, + zip_safe=False, + package_dir={"diskannpy": "python/src"}, + exclude_package_data={"diskannpy": ["diskann_bindings.cpp"]} +) diff --git a/packages/leann-backend-diskann/third_party/DiskANN/src/CMakeLists.txt b/packages/leann-backend-diskann/third_party/DiskANN/src/CMakeLists.txt new file mode 100644 index 0000000..97b00c7 --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/src/CMakeLists.txt @@ -0,0 +1,26 @@ +#Copyright(c) Microsoft Corporation.All rights reserved. +#Licensed under the MIT license. + +set(CMAKE_CXX_STANDARD 17) +set(CMAKE_COMPILE_WARNING_AS_ERROR ON) + +if(MSVC) + add_subdirectory(dll) +else() + #file(GLOB CPP_SOURCES *.cpp) + set(CPP_SOURCES abstract_data_store.cpp ann_exception.cpp apple_aligned_file_reader.cpp disk_utils.cpp + distance.cpp index.cpp in_mem_graph_store.cpp in_mem_data_store.cpp + linux_aligned_file_reader.cpp math_utils.cpp natural_number_map.cpp + in_mem_data_store.cpp in_mem_graph_store.cpp + natural_number_set.cpp memory_mapper.cpp partition.cpp pq.cpp + pq_flash_index.cpp scratch.cpp logger.cpp utils.cpp filter_utils.cpp index_factory.cpp abstract_index.cpp pq_l2_distance.cpp pq_data_store.cpp) + if (RESTAPI) + list(APPEND CPP_SOURCES restapi/search_wrapper.cpp restapi/server.cpp) + endif() + add_library(${PROJECT_NAME} ${CPP_SOURCES}) + add_library(${PROJECT_NAME}_s STATIC ${CPP_SOURCES}) +endif() + +if (NOT MSVC) + install(TARGETS ${PROJECT_NAME} LIBRARY) +endif() diff --git a/packages/leann-backend-diskann/third_party/DiskANN/src/abstract_data_store.cpp b/packages/leann-backend-diskann/third_party/DiskANN/src/abstract_data_store.cpp new file mode 100644 index 0000000..0cff015 --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/src/abstract_data_store.cpp @@ -0,0 +1,45 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT license. + +#include +#include "abstract_data_store.h" + +namespace diskann +{ + +template +AbstractDataStore::AbstractDataStore(const location_t capacity, const size_t dim) + : _capacity(capacity), _dim(dim) +{ +} + +template location_t AbstractDataStore::capacity() const +{ + return _capacity; +} + +template size_t AbstractDataStore::get_dims() const +{ + return _dim; +} + +template location_t AbstractDataStore::resize(const location_t new_num_points) +{ + if (new_num_points > _capacity) + { + return expand(new_num_points); + } + else if (new_num_points < _capacity) + { + return shrink(new_num_points); + } + else + { + return _capacity; + } +} + +template DISKANN_DLLEXPORT class AbstractDataStore; +template DISKANN_DLLEXPORT class AbstractDataStore; +template DISKANN_DLLEXPORT class AbstractDataStore; +} // namespace diskann diff --git a/packages/leann-backend-diskann/third_party/DiskANN/src/abstract_index.cpp b/packages/leann-backend-diskann/third_party/DiskANN/src/abstract_index.cpp new file mode 100644 index 0000000..9266582 --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/src/abstract_index.cpp @@ -0,0 +1,334 @@ +#include "common_includes.h" +#include "windows_customizations.h" +#include "abstract_index.h" + +namespace diskann +{ + +template +void AbstractIndex::build(const data_type *data, const size_t num_points_to_load, const std::vector &tags) +{ + auto any_data = std::any(data); + auto any_tags_vec = TagVector(tags); + this->_build(any_data, num_points_to_load, any_tags_vec); +} + +template +std::pair AbstractIndex::search(const data_type *query, const size_t K, const uint32_t L, + IDType *indices, float *distances) +{ + auto any_indices = std::any(indices); + auto any_query = std::any(query); + return _search(any_query, K, L, any_indices, distances); +} + +template +size_t AbstractIndex::search_with_tags(const data_type *query, const uint64_t K, const uint32_t L, tag_type *tags, + float *distances, std::vector &res_vectors, bool use_filters, + const std::string filter_label) +{ + auto any_query = std::any(query); + auto any_tags = std::any(tags); + auto any_res_vectors = DataVector(res_vectors); + return this->_search_with_tags(any_query, K, L, any_tags, distances, any_res_vectors, use_filters, filter_label); +} + +template +std::pair AbstractIndex::search_with_filters(const DataType &query, const std::string &raw_label, + const size_t K, const uint32_t L, IndexType *indices, + float *distances) +{ + auto any_indices = std::any(indices); + return _search_with_filters(query, raw_label, K, L, any_indices, distances); +} + +template +void AbstractIndex::search_with_optimized_layout(const data_type *query, size_t K, size_t L, uint32_t *indices) +{ + auto any_query = std::any(query); + this->_search_with_optimized_layout(any_query, K, L, indices); +} + +template +int AbstractIndex::insert_point(const data_type *point, const tag_type tag) +{ + auto any_point = std::any(point); + auto any_tag = std::any(tag); + return this->_insert_point(any_point, any_tag); +} + +template +int AbstractIndex::insert_point(const data_type *point, const tag_type tag, const std::vector &labels) +{ + auto any_point = std::any(point); + auto any_tag = std::any(tag); + auto any_labels = Labelvector(labels); + return this->_insert_point(any_point, any_tag, any_labels); +} + +template int AbstractIndex::lazy_delete(const tag_type &tag) +{ + auto any_tag = std::any(tag); + return this->_lazy_delete(any_tag); +} + +template +void AbstractIndex::lazy_delete(const std::vector &tags, std::vector &failed_tags) +{ + auto any_tags = TagVector(tags); + auto any_failed_tags = TagVector(failed_tags); + this->_lazy_delete(any_tags, any_failed_tags); +} + +template void AbstractIndex::get_active_tags(tsl::robin_set &active_tags) +{ + auto any_active_tags = TagRobinSet(active_tags); + this->_get_active_tags(any_active_tags); +} + +template void AbstractIndex::set_start_points_at_random(data_type radius, uint32_t random_seed) +{ + auto any_radius = std::any(radius); + this->_set_start_points_at_random(any_radius, random_seed); +} + +template int AbstractIndex::get_vector_by_tag(tag_type &tag, data_type *vec) +{ + auto any_tag = std::any(tag); + auto any_data_ptr = std::any(vec); + return this->_get_vector_by_tag(any_tag, any_data_ptr); +} + +template void AbstractIndex::set_universal_label(const label_type universal_label) +{ + auto any_label = std::any(universal_label); + this->_set_universal_label(any_label); +} + +// exports +template DISKANN_DLLEXPORT void AbstractIndex::build(const float *data, const size_t num_points_to_load, + const std::vector &tags); +template DISKANN_DLLEXPORT void AbstractIndex::build(const int8_t *data, + const size_t num_points_to_load, + const std::vector &tags); +template DISKANN_DLLEXPORT void AbstractIndex::build(const uint8_t *data, + const size_t num_points_to_load, + const std::vector &tags); +template DISKANN_DLLEXPORT void AbstractIndex::build(const float *data, + const size_t num_points_to_load, + const std::vector &tags); +template DISKANN_DLLEXPORT void AbstractIndex::build(const int8_t *data, + const size_t num_points_to_load, + const std::vector &tags); +template DISKANN_DLLEXPORT void AbstractIndex::build(const uint8_t *data, + const size_t num_points_to_load, + const std::vector &tags); +template DISKANN_DLLEXPORT void AbstractIndex::build(const float *data, const size_t num_points_to_load, + const std::vector &tags); +template DISKANN_DLLEXPORT void AbstractIndex::build(const int8_t *data, + const size_t num_points_to_load, + const std::vector &tags); +template DISKANN_DLLEXPORT void AbstractIndex::build(const uint8_t *data, + const size_t num_points_to_load, + const std::vector &tags); +template DISKANN_DLLEXPORT void AbstractIndex::build(const float *data, + const size_t num_points_to_load, + const std::vector &tags); +template DISKANN_DLLEXPORT void AbstractIndex::build(const int8_t *data, + const size_t num_points_to_load, + const std::vector &tags); +template DISKANN_DLLEXPORT void AbstractIndex::build(const uint8_t *data, + const size_t num_points_to_load, + const std::vector &tags); + +template DISKANN_DLLEXPORT std::pair AbstractIndex::search( + const float *query, const size_t K, const uint32_t L, uint32_t *indices, float *distances); +template DISKANN_DLLEXPORT std::pair AbstractIndex::search( + const uint8_t *query, const size_t K, const uint32_t L, uint32_t *indices, float *distances); +template DISKANN_DLLEXPORT std::pair AbstractIndex::search( + const int8_t *query, const size_t K, const uint32_t L, uint32_t *indices, float *distances); + +template DISKANN_DLLEXPORT std::pair AbstractIndex::search( + const float *query, const size_t K, const uint32_t L, uint64_t *indices, float *distances); +template DISKANN_DLLEXPORT std::pair AbstractIndex::search( + const uint8_t *query, const size_t K, const uint32_t L, uint64_t *indices, float *distances); +template DISKANN_DLLEXPORT std::pair AbstractIndex::search( + const int8_t *query, const size_t K, const uint32_t L, uint64_t *indices, float *distances); + +template DISKANN_DLLEXPORT std::pair AbstractIndex::search_with_filters( + const DataType &query, const std::string &raw_label, const size_t K, const uint32_t L, uint32_t *indices, + float *distances); + +template DISKANN_DLLEXPORT std::pair AbstractIndex::search_with_filters( + const DataType &query, const std::string &raw_label, const size_t K, const uint32_t L, uint64_t *indices, + float *distances); + +template DISKANN_DLLEXPORT size_t AbstractIndex::search_with_tags( + const float *query, const uint64_t K, const uint32_t L, int32_t *tags, float *distances, + std::vector &res_vectors, bool use_filters, const std::string filter_label); + +template DISKANN_DLLEXPORT size_t AbstractIndex::search_with_tags( + const uint8_t *query, const uint64_t K, const uint32_t L, int32_t *tags, float *distances, + std::vector &res_vectors, bool use_filters, const std::string filter_label); + +template DISKANN_DLLEXPORT size_t AbstractIndex::search_with_tags( + const int8_t *query, const uint64_t K, const uint32_t L, int32_t *tags, float *distances, + std::vector &res_vectors, bool use_filters, const std::string filter_label); + +template DISKANN_DLLEXPORT size_t AbstractIndex::search_with_tags( + const float *query, const uint64_t K, const uint32_t L, uint32_t *tags, float *distances, + std::vector &res_vectors, bool use_filters, const std::string filter_label); + +template DISKANN_DLLEXPORT size_t AbstractIndex::search_with_tags( + const uint8_t *query, const uint64_t K, const uint32_t L, uint32_t *tags, float *distances, + std::vector &res_vectors, bool use_filters, const std::string filter_label); + +template DISKANN_DLLEXPORT size_t AbstractIndex::search_with_tags( + const int8_t *query, const uint64_t K, const uint32_t L, uint32_t *tags, float *distances, + std::vector &res_vectors, bool use_filters, const std::string filter_label); + +template DISKANN_DLLEXPORT size_t AbstractIndex::search_with_tags( + const float *query, const uint64_t K, const uint32_t L, int64_t *tags, float *distances, + std::vector &res_vectors, bool use_filters, const std::string filter_label); + +template DISKANN_DLLEXPORT size_t AbstractIndex::search_with_tags( + const uint8_t *query, const uint64_t K, const uint32_t L, int64_t *tags, float *distances, + std::vector &res_vectors, bool use_filters, const std::string filter_label); + +template DISKANN_DLLEXPORT size_t AbstractIndex::search_with_tags( + const int8_t *query, const uint64_t K, const uint32_t L, int64_t *tags, float *distances, + std::vector &res_vectors, bool use_filters, const std::string filter_label); + +template DISKANN_DLLEXPORT size_t AbstractIndex::search_with_tags( + const float *query, const uint64_t K, const uint32_t L, uint64_t *tags, float *distances, + std::vector &res_vectors, bool use_filters, const std::string filter_label); + +template DISKANN_DLLEXPORT size_t AbstractIndex::search_with_tags( + const uint8_t *query, const uint64_t K, const uint32_t L, uint64_t *tags, float *distances, + std::vector &res_vectors, bool use_filters, const std::string filter_label); + +template DISKANN_DLLEXPORT size_t AbstractIndex::search_with_tags( + const int8_t *query, const uint64_t K, const uint32_t L, uint64_t *tags, float *distances, + std::vector &res_vectors, bool use_filters, const std::string filter_label); + +template DISKANN_DLLEXPORT void AbstractIndex::search_with_optimized_layout(const float *query, size_t K, + size_t L, uint32_t *indices); +template DISKANN_DLLEXPORT void AbstractIndex::search_with_optimized_layout(const uint8_t *query, size_t K, + size_t L, uint32_t *indices); +template DISKANN_DLLEXPORT void AbstractIndex::search_with_optimized_layout(const int8_t *query, size_t K, + size_t L, uint32_t *indices); + +template DISKANN_DLLEXPORT int AbstractIndex::insert_point(const float *point, const int32_t tag); +template DISKANN_DLLEXPORT int AbstractIndex::insert_point(const uint8_t *point, const int32_t tag); +template DISKANN_DLLEXPORT int AbstractIndex::insert_point(const int8_t *point, const int32_t tag); + +template DISKANN_DLLEXPORT int AbstractIndex::insert_point(const float *point, const uint32_t tag); +template DISKANN_DLLEXPORT int AbstractIndex::insert_point(const uint8_t *point, const uint32_t tag); +template DISKANN_DLLEXPORT int AbstractIndex::insert_point(const int8_t *point, const uint32_t tag); + +template DISKANN_DLLEXPORT int AbstractIndex::insert_point(const float *point, const int64_t tag); +template DISKANN_DLLEXPORT int AbstractIndex::insert_point(const uint8_t *point, const int64_t tag); +template DISKANN_DLLEXPORT int AbstractIndex::insert_point(const int8_t *point, const int64_t tag); + +template DISKANN_DLLEXPORT int AbstractIndex::insert_point(const float *point, const uint64_t tag); +template DISKANN_DLLEXPORT int AbstractIndex::insert_point(const uint8_t *point, const uint64_t tag); +template DISKANN_DLLEXPORT int AbstractIndex::insert_point(const int8_t *point, const uint64_t tag); + +template DISKANN_DLLEXPORT int AbstractIndex::insert_point( + const float *point, const int32_t tag, const std::vector &labels); +template DISKANN_DLLEXPORT int AbstractIndex::insert_point( + const uint8_t *point, const int32_t tag, const std::vector &labels); +template DISKANN_DLLEXPORT int AbstractIndex::insert_point( + const int8_t *point, const int32_t tag, const std::vector &labels); + +template DISKANN_DLLEXPORT int AbstractIndex::insert_point( + const float *point, const uint32_t tag, const std::vector &labels); +template DISKANN_DLLEXPORT int AbstractIndex::insert_point( + const uint8_t *point, const uint32_t tag, const std::vector &labels); +template DISKANN_DLLEXPORT int AbstractIndex::insert_point( + const int8_t *point, const uint32_t tag, const std::vector &labels); + +template DISKANN_DLLEXPORT int AbstractIndex::insert_point( + const float *point, const int64_t tag, const std::vector &labels); +template DISKANN_DLLEXPORT int AbstractIndex::insert_point( + const uint8_t *point, const int64_t tag, const std::vector &labels); +template DISKANN_DLLEXPORT int AbstractIndex::insert_point( + const int8_t *point, const int64_t tag, const std::vector &labels); + +template DISKANN_DLLEXPORT int AbstractIndex::insert_point( + const float *point, const uint64_t tag, const std::vector &labels); +template DISKANN_DLLEXPORT int AbstractIndex::insert_point( + const uint8_t *point, const uint64_t tag, const std::vector &labels); +template DISKANN_DLLEXPORT int AbstractIndex::insert_point( + const int8_t *point, const uint64_t tag, const std::vector &labels); + +template DISKANN_DLLEXPORT int AbstractIndex::insert_point( + const float *point, const int32_t tag, const std::vector &labels); +template DISKANN_DLLEXPORT int AbstractIndex::insert_point( + const uint8_t *point, const int32_t tag, const std::vector &labels); +template DISKANN_DLLEXPORT int AbstractIndex::insert_point( + const int8_t *point, const int32_t tag, const std::vector &labels); + +template DISKANN_DLLEXPORT int AbstractIndex::insert_point( + const float *point, const uint32_t tag, const std::vector &labels); +template DISKANN_DLLEXPORT int AbstractIndex::insert_point( + const uint8_t *point, const uint32_t tag, const std::vector &labels); +template DISKANN_DLLEXPORT int AbstractIndex::insert_point( + const int8_t *point, const uint32_t tag, const std::vector &labels); + +template DISKANN_DLLEXPORT int AbstractIndex::insert_point( + const float *point, const int64_t tag, const std::vector &labels); +template DISKANN_DLLEXPORT int AbstractIndex::insert_point( + const uint8_t *point, const int64_t tag, const std::vector &labels); +template DISKANN_DLLEXPORT int AbstractIndex::insert_point( + const int8_t *point, const int64_t tag, const std::vector &labels); + +template DISKANN_DLLEXPORT int AbstractIndex::insert_point( + const float *point, const uint64_t tag, const std::vector &labels); +template DISKANN_DLLEXPORT int AbstractIndex::insert_point( + const uint8_t *point, const uint64_t tag, const std::vector &labels); +template DISKANN_DLLEXPORT int AbstractIndex::insert_point( + const int8_t *point, const uint64_t tag, const std::vector &labels); + +template DISKANN_DLLEXPORT int AbstractIndex::lazy_delete(const int32_t &tag); +template DISKANN_DLLEXPORT int AbstractIndex::lazy_delete(const uint32_t &tag); +template DISKANN_DLLEXPORT int AbstractIndex::lazy_delete(const int64_t &tag); +template DISKANN_DLLEXPORT int AbstractIndex::lazy_delete(const uint64_t &tag); + +template DISKANN_DLLEXPORT void AbstractIndex::lazy_delete(const std::vector &tags, + std::vector &failed_tags); +template DISKANN_DLLEXPORT void AbstractIndex::lazy_delete(const std::vector &tags, + std::vector &failed_tags); +template DISKANN_DLLEXPORT void AbstractIndex::lazy_delete(const std::vector &tags, + std::vector &failed_tags); +template DISKANN_DLLEXPORT void AbstractIndex::lazy_delete(const std::vector &tags, + std::vector &failed_tags); + +template DISKANN_DLLEXPORT void AbstractIndex::get_active_tags(tsl::robin_set &active_tags); +template DISKANN_DLLEXPORT void AbstractIndex::get_active_tags(tsl::robin_set &active_tags); +template DISKANN_DLLEXPORT void AbstractIndex::get_active_tags(tsl::robin_set &active_tags); +template DISKANN_DLLEXPORT void AbstractIndex::get_active_tags(tsl::robin_set &active_tags); + +template DISKANN_DLLEXPORT void AbstractIndex::set_start_points_at_random(float radius, uint32_t random_seed); +template DISKANN_DLLEXPORT void AbstractIndex::set_start_points_at_random(uint8_t radius, + uint32_t random_seed); +template DISKANN_DLLEXPORT void AbstractIndex::set_start_points_at_random(int8_t radius, uint32_t random_seed); + +template DISKANN_DLLEXPORT int AbstractIndex::get_vector_by_tag(int32_t &tag, float *vec); +template DISKANN_DLLEXPORT int AbstractIndex::get_vector_by_tag(int32_t &tag, uint8_t *vec); +template DISKANN_DLLEXPORT int AbstractIndex::get_vector_by_tag(int32_t &tag, int8_t *vec); +template DISKANN_DLLEXPORT int AbstractIndex::get_vector_by_tag(uint32_t &tag, float *vec); +template DISKANN_DLLEXPORT int AbstractIndex::get_vector_by_tag(uint32_t &tag, uint8_t *vec); +template DISKANN_DLLEXPORT int AbstractIndex::get_vector_by_tag(uint32_t &tag, int8_t *vec); + +template DISKANN_DLLEXPORT int AbstractIndex::get_vector_by_tag(int64_t &tag, float *vec); +template DISKANN_DLLEXPORT int AbstractIndex::get_vector_by_tag(int64_t &tag, uint8_t *vec); +template DISKANN_DLLEXPORT int AbstractIndex::get_vector_by_tag(int64_t &tag, int8_t *vec); +template DISKANN_DLLEXPORT int AbstractIndex::get_vector_by_tag(uint64_t &tag, float *vec); +template DISKANN_DLLEXPORT int AbstractIndex::get_vector_by_tag(uint64_t &tag, uint8_t *vec); +template DISKANN_DLLEXPORT int AbstractIndex::get_vector_by_tag(uint64_t &tag, int8_t *vec); + +template DISKANN_DLLEXPORT void AbstractIndex::set_universal_label(const uint16_t label); +template DISKANN_DLLEXPORT void AbstractIndex::set_universal_label(const uint32_t label); + +} // namespace diskann diff --git a/packages/leann-backend-diskann/third_party/DiskANN/src/ann_exception.cpp b/packages/leann-backend-diskann/third_party/DiskANN/src/ann_exception.cpp new file mode 100644 index 0000000..ba55e36 --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/src/ann_exception.cpp @@ -0,0 +1,36 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT license. + +#include "ann_exception.h" +#include +#include + +namespace diskann +{ +ANNException::ANNException(const std::string &message, int errorCode) + : std::runtime_error(message), _errorCode(errorCode) +{ +} + +std::string package_string(const std::string &item_name, const std::string &item_val) +{ + return std::string("[") + item_name + ": " + std::string(item_val) + std::string("]"); +} + +ANNException::ANNException(const std::string &message, int errorCode, const std::string &funcSig, + const std::string &fileName, uint32_t lineNum) + : ANNException(package_string(std::string("FUNC"), funcSig) + package_string(std::string("FILE"), fileName) + + package_string(std::string("LINE"), std::to_string(lineNum)) + " " + message, + errorCode) +{ +} + +FileException::FileException(const std::string &filename, std::system_error &e, const std::string &funcSig, + const std::string &fileName, uint32_t lineNum) + : ANNException(std::string(" While opening file \'") + filename + std::string("\', error code: ") + + std::to_string(e.code().value()) + " " + e.code().message(), + e.code().value(), funcSig, fileName, lineNum) +{ +} + +} // namespace diskann \ No newline at end of file diff --git a/packages/leann-backend-diskann/third_party/DiskANN/src/apple_aligned_file_reader.cpp b/packages/leann-backend-diskann/third_party/DiskANN/src/apple_aligned_file_reader.cpp new file mode 100644 index 0000000..4ef1c22 --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/src/apple_aligned_file_reader.cpp @@ -0,0 +1,383 @@ +#include "aligned_file_reader.h" +#ifdef __APPLE__ + +#include "apple_aligned_file_reader.h" +#include "utils.h" + +#define SECTOR_LEN 4096 + +AppleAlignedFileReader::AppleAlignedFileReader() +{ + this->file_desc = -1; + diskann::cout << "AppleAlignedFileReader created, this=" << this << std::endl; +} + +AppleAlignedFileReader::~AppleAlignedFileReader() +{ + diskann::cout << "AppleAlignedFileReader destructor called, this=" << this << std::endl; + + // 先解注册所有线程 + deregister_all_threads(); + + // 关闭文件描述符 + if (this->file_desc >= 0) + { + diskann::cout << "Closing file in destructor, fd=" << this->file_desc << std::endl; + ::close(this->file_desc); + this->file_desc = -1; + } +} + +IOContext &AppleAlignedFileReader::get_ctx() +{ + auto thread_id = std::this_thread::get_id(); + + // 创建一个静态空上下文用于错误情况 + static IOContext empty_ctx; + static bool initialized = false; + + if (!initialized) + { + empty_ctx.queue = nullptr; + empty_ctx.grp = nullptr; + empty_ctx.channel = nullptr; + initialized = true; + } + + std::unique_lock lk(this->ctx_mut); + + // 如果线程未注册,自动注册它 + if (ctx_map.find(thread_id) == ctx_map.end()) + { + lk.unlock(); + diskann::cerr << "Thread " << thread_id << " not registered, auto-registering" << std::endl; + + // 自动注册线程 + if (this->file_desc >= 0) + { + this->register_thread(); + + // 再次检查是否注册成功 + lk.lock(); + if (ctx_map.find(thread_id) != ctx_map.end()) + { + return ctx_map[thread_id]; + } + lk.unlock(); + } + + return empty_ctx; + } + + // 如果已注册,直接返回上下文 + IOContext &ctx = ctx_map[thread_id]; + lk.unlock(); + return ctx; +} + +void AppleAlignedFileReader::register_thread() +{ + auto current_id = std::this_thread::get_id(); + diskann::cout << "register_thread called from thread " << current_id << " on instance " << this << std::endl; + + // 检查文件描述符是否有效 + if (this->file_desc < 0) + { + diskann::cerr << "Thread " << current_id << " - register_thread called with invalid file descriptor" + << std::endl; + return; + } + + // 检查线程是否已注册 + { + std::lock_guard ctx_lock(this->ctx_mut); + if (ctx_map.find(current_id) != ctx_map.end()) + { + diskann::cout << "Thread " << current_id << " already registered" << std::endl; + return; + } + } + + // 创建线程上下文 + IOContext ctx; + ctx.queue = nullptr; + ctx.grp = nullptr; + ctx.channel = nullptr; + + std::string queue_name = + "diskann_io_" + std::to_string(*static_cast(static_cast(¤t_id))); + ctx.queue = dispatch_queue_create(queue_name.c_str(), DISPATCH_QUEUE_SERIAL); + if (!ctx.queue) + { + diskann::cerr << "Failed to create queue for thread " << current_id << std::endl; + return; + } + + ctx.grp = dispatch_group_create(); + if (!ctx.grp) + { + diskann::cerr << "Failed to create group for thread " << current_id << std::endl; + dispatch_release(ctx.queue); + return; + } + + // 复制文件描述符 + int dup_fd = ::dup(this->file_desc); + if (dup_fd == -1) + { + diskann::cerr << "Failed to duplicate file descriptor: " << this->file_desc << ", errno=" << errno << std::endl; + dispatch_release(ctx.grp); + dispatch_release(ctx.queue); + return; + } + + // 创建IO通道 + ctx.channel = dispatch_io_create(DISPATCH_IO_RANDOM, dup_fd, ctx.queue, ^(int error) { + ::close(dup_fd); + diskann::cout << "IO channel cleanup called, closed fd=" << dup_fd << std::endl; + }); + + if (!ctx.channel) + { + diskann::cerr << "Failed to create IO channel for thread " << current_id << ", fd=" << dup_fd + << ", errno=" << errno << std::endl; + ::close(dup_fd); + dispatch_release(ctx.grp); + dispatch_release(ctx.queue); + return; + } + + // 设置IO通道参数 + dispatch_io_set_low_water(ctx.channel, SECTOR_LEN); + dispatch_io_set_high_water(ctx.channel, SECTOR_LEN * 16); + + // 添加到线程映射 + { + std::lock_guard ctx_lock(this->ctx_mut); + ctx_map[current_id] = ctx; + } + + diskann::cout << "Thread " << current_id << " successfully registered with fd=" << dup_fd << std::endl; +} + +void AppleAlignedFileReader::deregister_thread() +{ + auto my_id = std::this_thread::get_id(); + diskann::cout << "deregister_thread called from thread " << my_id << " on instance " << this << std::endl; + + IOContext ctx; + bool found = false; + + { + std::lock_guard ctx_lock(this->ctx_mut); + if (ctx_map.find(my_id) != ctx_map.end()) + { + ctx = ctx_map[my_id]; + ctx_map.erase(my_id); + found = true; + } + } + + if (!found) + { + diskann::cerr << "Thread " << my_id << " not registered, cannot deregister" << std::endl; + return; + } + + if (ctx.channel) + { + dispatch_io_close(ctx.channel, DISPATCH_IO_STOP); + dispatch_release(ctx.channel); + } + + if (ctx.grp) + { + dispatch_release(ctx.grp); + } + + if (ctx.queue) + { + dispatch_release(ctx.queue); + } + + diskann::cout << "Thread " << my_id << " deregistered" << std::endl; +} + +void AppleAlignedFileReader::deregister_all_threads() +{ + diskann::cout << "deregister_all_threads called on instance " << this << std::endl; + + std::vector contexts; + + { + std::lock_guard ctx_lock(this->ctx_mut); + diskann::cout << "Deregistering " << ctx_map.size() << " threads" << std::endl; + for (auto &pair : ctx_map) + { + contexts.push_back(pair.second); + } + ctx_map.clear(); + } + + for (auto &ctx : contexts) + { + if (ctx.channel) + { + dispatch_io_close(ctx.channel, DISPATCH_IO_STOP); + dispatch_release(ctx.channel); + } + + if (ctx.grp) + { + dispatch_release(ctx.grp); + } + + if (ctx.queue) + { + dispatch_release(ctx.queue); + } + } + + diskann::cout << "All threads deregistered" << std::endl; +} + +void AppleAlignedFileReader::open(const std::string &fname) +{ + diskann::cout << "open called for file: " << fname << " on instance " << this << std::endl; + + // 关闭已存在的文件 + if (this->file_desc >= 0) + { + diskann::cout << "Closing existing file descriptor: " << this->file_desc << std::endl; + ::close(this->file_desc); + this->file_desc = -1; + } + + // 清空所有线程上下文 + deregister_all_threads(); + + // 打开新文件 + this->file_desc = ::open(fname.c_str(), O_RDONLY); + if (this->file_desc == -1) + { + diskann::cerr << "Failed to open file: " << fname << ", errno=" << errno << std::endl; + throw std::runtime_error("Failed to open file"); // 文件打开失败是致命错误 + } + + // 获取文件信息 + struct stat file_info; + if (::fstat(this->file_desc, &file_info) == 0) + { + diskann::cout << "File opened successfully: " << fname << ", size: " << file_info.st_size + << " bytes, fd=" << this->file_desc << std::endl; + } + else + { + diskann::cout << "File opened but couldn't get file info, fd=" << this->file_desc << std::endl; + } +} + +void AppleAlignedFileReader::close() +{ + diskann::cout << "close called on instance " << this << std::endl; + + // 先清理线程上下文 + deregister_all_threads(); + + // 关闭文件描述符 + if (this->file_desc >= 0) + { + diskann::cout << "Closing file descriptor: " << this->file_desc << std::endl; + ::close(this->file_desc); + this->file_desc = -1; + } +} + +void AppleAlignedFileReader::read(std::vector &read_reqs, IOContext &ctx, bool async) +{ + auto thread_id = std::this_thread::get_id(); + + // 如果通道无效,自动尝试注册线程 + if (!ctx.channel && this->file_desc >= 0) + { + diskann::cout << "Auto-registering thread " << thread_id << " during read" << std::endl; + this->register_thread(); + // 获取新的上下文 + ctx = this->get_ctx(); + } + + // 安全检查 + if (!ctx.channel || !ctx.queue || !ctx.grp) + { + diskann::cerr << "Invalid IO context in thread " << thread_id << std::endl; + return; + } + + dispatch_io_t channel = ctx.channel; + dispatch_queue_t q = ctx.queue; + dispatch_group_t group = ctx.grp; + + // 处理所有读取请求 + uint64_t n_reqs = read_reqs.size(); + for (uint64_t i = 0; i < n_reqs; i++) + { + AlignedRead &req = read_reqs[i]; + + // 检查对齐 + if (!IS_ALIGNED(req.buf, SECTOR_LEN) || !IS_ALIGNED(req.offset, SECTOR_LEN) || !IS_ALIGNED(req.len, SECTOR_LEN)) + { + diskann::cerr << "Thread " << thread_id << " - alignment error for request " << i << std::endl; + continue; + } + + dispatch_group_enter(group); + + dispatch_io_read(channel, req.offset, req.len, q, ^(bool done, dispatch_data_t data, int error) { + if (error) + { + diskann::cerr << "Thread " << thread_id << " read error: " << error << " when reading at offset " + << req.offset << std::endl; + if (done) + dispatch_group_leave(group); + return; + } + + if (data) + { + size_t actual_size = dispatch_data_get_size(data); + if (actual_size > 0) + { + __block size_t total_copied = 0; + dispatch_data_apply(data, + ^(dispatch_data_t region, size_t region_offset, const void *buffer, size_t size) { + if (region_offset + size <= req.len) + { + memcpy((char *)req.buf + region_offset, buffer, size); + total_copied += size; + return (bool)true; + } + diskann::cerr << "Buffer overflow: region_offset=" << region_offset + << ", size=" << size << ", req.len=" << req.len << std::endl; + return (bool)false; + }); + + if (total_copied != req.len && done) + { + diskann::cerr << "Warning: Only copied " << total_copied << " of " << req.len + << " requested bytes" << std::endl; + } + } + } + + // 仅在完成时离开组 + if (done) + { + dispatch_group_leave(group); + } + }); + } + + dispatch_group_wait(group, DISPATCH_TIME_FOREVER); +} + +#endif \ No newline at end of file diff --git a/packages/leann-backend-diskann/third_party/DiskANN/src/disk_utils.cpp b/packages/leann-backend-diskann/third_party/DiskANN/src/disk_utils.cpp new file mode 100644 index 0000000..a17d126 --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/src/disk_utils.cpp @@ -0,0 +1,1544 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT license. + +#include "common_includes.h" + +#if defined(DISKANN_RELEASE_UNUSED_TCMALLOC_MEMORY_AT_CHECKPOINTS) && defined(DISKANN_BUILD) +#include "gperftools/malloc_extension.h" +#endif + +#ifdef __APPLE__ +#include +#else +#include "mkl.h" +#endif + +#include "logger.h" +#include "disk_utils.h" +#include "cached_io.h" +#include "index.h" +#include "omp.h" +#include "percentile_stats.h" +#include "partition.h" +#include "pq_flash_index.h" +#include "timer.h" +#include "tsl/robin_set.h" + +namespace diskann +{ + +void add_new_file_to_single_index(std::string index_file, std::string new_file) +{ + std::unique_ptr metadata; + size_t nr, nc; + diskann::load_bin(index_file, metadata, nr, nc); + if (nc != 1) + { + std::stringstream stream; + stream << "Error, index file specified does not have correct metadata. " << std::endl; + throw diskann::ANNException(stream.str(), -1); + } + size_t index_ending_offset = metadata[nr - 1]; + size_t read_blk_size = 64 * 1024 * 1024; + cached_ofstream writer(index_file, read_blk_size); + size_t check_file_size = get_file_size(index_file); + if (check_file_size != index_ending_offset) + { + std::stringstream stream; + stream << "Error, index file specified does not have correct metadata " + "(last entry must match the filesize). " + << std::endl; + throw diskann::ANNException(stream.str(), -1); + } + + cached_ifstream reader(new_file, read_blk_size); + size_t fsize = reader.get_file_size(); + if (fsize == 0) + { + std::stringstream stream; + stream << "Error, new file specified is empty. Not appending. " << std::endl; + throw diskann::ANNException(stream.str(), -1); + } + + size_t num_blocks = DIV_ROUND_UP(fsize, read_blk_size); + char *dump = new char[read_blk_size]; + for (uint64_t i = 0; i < num_blocks; i++) + { + size_t cur_block_size = + read_blk_size > fsize - (i * read_blk_size) ? fsize - (i * read_blk_size) : read_blk_size; + reader.read(dump, cur_block_size); + writer.write(dump, cur_block_size); + } + // reader.close(); + // writer.close(); + + delete[] dump; + std::vector new_meta; + for (uint64_t i = 0; i < nr; i++) + new_meta.push_back(metadata[i]); + new_meta.push_back(metadata[nr - 1] + fsize); + + diskann::save_bin(index_file, new_meta.data(), new_meta.size(), 1); +} + +double get_memory_budget(double search_ram_budget) +{ + double final_index_ram_limit = search_ram_budget; + if (search_ram_budget - SPACE_FOR_CACHED_NODES_IN_GB > THRESHOLD_FOR_CACHING_IN_GB) + { // slack for space used by cached + // nodes + final_index_ram_limit = search_ram_budget - SPACE_FOR_CACHED_NODES_IN_GB; + } + return final_index_ram_limit * 1024 * 1024 * 1024; +} + +double get_memory_budget(const std::string &mem_budget_str) +{ + double search_ram_budget = atof(mem_budget_str.c_str()); + return get_memory_budget(search_ram_budget); +} + +size_t calculate_num_pq_chunks(double final_index_ram_limit, size_t points_num, uint32_t dim, + const std::vector ¶m_list) +{ + size_t num_pq_chunks = (size_t)(std::floor)(uint64_t(final_index_ram_limit / (double)points_num)); + diskann::cout << "Calculated num_pq_chunks :" << num_pq_chunks << std::endl; + if (param_list.size() >= 6) + { + float compress_ratio = (float)atof(param_list[5].c_str()); + if (compress_ratio > 0 && compress_ratio <= 1) + { + size_t chunks_by_cr = (size_t)(std::floor)(compress_ratio * dim); + + if (chunks_by_cr > 0 && chunks_by_cr < num_pq_chunks) + { + diskann::cout << "Compress ratio:" << compress_ratio << " new #pq_chunks:" << chunks_by_cr << std::endl; + num_pq_chunks = chunks_by_cr; + } + else + { + diskann::cout << "Compress ratio: " << compress_ratio << " #new pq_chunks: " << chunks_by_cr + << " is either zero or greater than num_pq_chunks: " << num_pq_chunks + << ". num_pq_chunks is unchanged. " << std::endl; + } + } + else + { + diskann::cerr << "Compression ratio: " << compress_ratio << " should be in (0,1]" << std::endl; + } + } + + num_pq_chunks = num_pq_chunks <= 0 ? 1 : num_pq_chunks; + num_pq_chunks = num_pq_chunks > dim ? dim : num_pq_chunks; + num_pq_chunks = num_pq_chunks > MAX_PQ_CHUNKS ? MAX_PQ_CHUNKS : num_pq_chunks; + + diskann::cout << "Compressing " << dim << "-dimensional data into " << num_pq_chunks << " bytes per vector." + << std::endl; + return num_pq_chunks; +} + +template T *generateRandomWarmup(uint64_t warmup_num, uint64_t warmup_dim, uint64_t warmup_aligned_dim) +{ + T *warmup = nullptr; + warmup_num = 100000; + diskann::cout << "Generating random warmup file with dim " << warmup_dim << " and aligned dim " + << warmup_aligned_dim << std::flush; + diskann::alloc_aligned(((void **)&warmup), warmup_num * warmup_aligned_dim * sizeof(T), 8 * sizeof(T)); + std::memset(warmup, 0, warmup_num * warmup_aligned_dim * sizeof(T)); + std::random_device rd; + std::mt19937 gen(rd()); + std::uniform_int_distribution<> dis(-128, 127); + for (uint32_t i = 0; i < warmup_num; i++) + { + for (uint32_t d = 0; d < warmup_dim; d++) + { + warmup[i * warmup_aligned_dim + d] = (T)dis(gen); + } + } + diskann::cout << "..done" << std::endl; + return warmup; +} + +#ifdef EXEC_ENV_OLS +template +T *load_warmup(MemoryMappedFiles &files, const std::string &cache_warmup_file, uint64_t &warmup_num, + uint64_t warmup_dim, uint64_t warmup_aligned_dim) +{ + T *warmup = nullptr; + uint64_t file_dim, file_aligned_dim; + + if (files.fileExists(cache_warmup_file)) + { + diskann::load_aligned_bin(files, cache_warmup_file, warmup, warmup_num, file_dim, file_aligned_dim); + diskann::cout << "In the warmup file: " << cache_warmup_file << " File dim: " << file_dim + << " File aligned dim: " << file_aligned_dim << " Expected dim: " << warmup_dim + << " Expected aligned dim: " << warmup_aligned_dim << std::endl; + + if (file_dim != warmup_dim || file_aligned_dim != warmup_aligned_dim) + { + std::stringstream stream; + stream << "Mismatched dimensions in sample file. file_dim = " << file_dim + << " file_aligned_dim: " << file_aligned_dim << " index_dim: " << warmup_dim + << " index_aligned_dim: " << warmup_aligned_dim << std::endl; + diskann::cerr << stream.str(); + throw diskann::ANNException(stream.str(), -1); + } + } + else + { + warmup = generateRandomWarmup(warmup_num, warmup_dim, warmup_aligned_dim); + } + return warmup; +} +#endif + +template +T *load_warmup(const std::string &cache_warmup_file, uint64_t &warmup_num, uint64_t warmup_dim, + uint64_t warmup_aligned_dim) +{ + T *warmup = nullptr; + size_t file_dim, file_aligned_dim; + + if (file_exists(cache_warmup_file)) + { + diskann::load_aligned_bin(cache_warmup_file, warmup, (size_t &)warmup_num, file_dim, file_aligned_dim); + if (file_dim != warmup_dim || file_aligned_dim != warmup_aligned_dim) + { + std::stringstream stream; + stream << "Mismatched dimensions in sample file. file_dim = " << file_dim + << " file_aligned_dim: " << file_aligned_dim << " index_dim: " << warmup_dim + << " index_aligned_dim: " << warmup_aligned_dim << std::endl; + throw diskann::ANNException(stream.str(), -1); + } + } + else + { + warmup = generateRandomWarmup(warmup_num, warmup_dim, warmup_aligned_dim); + } + return warmup; +} + +/*************************************************** + Support for Merging Many Vamana Indices + ***************************************************/ + +void read_idmap(const std::string &fname, std::vector &ivecs) +{ + uint32_t npts32, dim; + size_t actual_file_size = get_file_size(fname); + std::ifstream reader(fname.c_str(), std::ios::binary); + reader.read((char *)&npts32, sizeof(uint32_t)); + reader.read((char *)&dim, sizeof(uint32_t)); + if (dim != 1 || actual_file_size != ((size_t)npts32) * sizeof(uint32_t) + 2 * sizeof(uint32_t)) + { + std::stringstream stream; + stream << "Error reading idmap file. Check if the file is bin file with " + "1 dimensional data. Actual: " + << actual_file_size << ", expected: " << (size_t)npts32 + 2 * sizeof(uint32_t) << std::endl; + + throw diskann::ANNException(stream.str(), -1, __FUNCSIG__, __FILE__, __LINE__); + } + ivecs.resize(npts32); + reader.read((char *)ivecs.data(), ((size_t)npts32) * sizeof(uint32_t)); + reader.close(); +} + +int merge_shards(const std::string &vamana_prefix, const std::string &vamana_suffix, const std::string &idmaps_prefix, + const std::string &idmaps_suffix, const uint64_t nshards, uint32_t max_degree, + const std::string &output_vamana, const std::string &medoids_file, bool use_filters, + const std::string &labels_to_medoids_file) +{ + // Read ID maps + std::vector vamana_names(nshards); + std::vector> idmaps(nshards); + for (uint64_t shard = 0; shard < nshards; shard++) + { + vamana_names[shard] = vamana_prefix + std::to_string(shard) + vamana_suffix; + read_idmap(idmaps_prefix + std::to_string(shard) + idmaps_suffix, idmaps[shard]); + } + + // find max node id + size_t nnodes = 0; + size_t nelems = 0; + for (auto &idmap : idmaps) + { + for (auto &id : idmap) + { + nnodes = std::max(nnodes, (size_t)id); + } + nelems += idmap.size(); + } + nnodes++; + diskann::cout << "# nodes: " << nnodes << ", max. degree: " << max_degree << std::endl; + + // compute inverse map: node -> shards + std::vector> node_shard; + node_shard.reserve(nelems); + for (size_t shard = 0; shard < nshards; shard++) + { + diskann::cout << "Creating inverse map -- shard #" << shard << std::endl; + for (size_t idx = 0; idx < idmaps[shard].size(); idx++) + { + size_t node_id = idmaps[shard][idx]; + node_shard.push_back(std::make_pair((uint32_t)node_id, (uint32_t)shard)); + } + } + std::sort(node_shard.begin(), node_shard.end(), [](const auto &left, const auto &right) { + return left.first < right.first || (left.first == right.first && left.second < right.second); + }); + diskann::cout << "Finished computing node -> shards map" << std::endl; + + // will merge all the labels to medoids files of each shard into one + // combined file + if (use_filters) + { + std::unordered_map> global_label_to_medoids; + + for (size_t i = 0; i < nshards; i++) + { + std::ifstream mapping_reader; + std::string map_file = vamana_names[i] + "_labels_to_medoids.txt"; + mapping_reader.open(map_file); + + std::string line, token; + uint32_t line_cnt = 0; + + while (std::getline(mapping_reader, line)) + { + std::istringstream iss(line); + uint32_t cnt = 0; + uint32_t medoid = 0; + uint32_t label = 0; + while (std::getline(iss, token, ',')) + { + token.erase(std::remove(token.begin(), token.end(), '\n'), token.end()); + token.erase(std::remove(token.begin(), token.end(), '\r'), token.end()); + + uint32_t token_as_num = std::stoul(token); + + if (cnt == 0) + label = token_as_num; + else + medoid = token_as_num; + cnt++; + } + global_label_to_medoids[label].push_back(idmaps[i][medoid]); + line_cnt++; + } + mapping_reader.close(); + } + + std::ofstream mapping_writer(labels_to_medoids_file); + assert(mapping_writer.is_open()); + for (auto iter : global_label_to_medoids) + { + mapping_writer << iter.first << ", "; + auto &vec = iter.second; + for (uint32_t idx = 0; idx < vec.size() - 1; idx++) + { + mapping_writer << vec[idx] << ", "; + } + mapping_writer << vec[vec.size() - 1] << std::endl; + } + mapping_writer.close(); + } + + // create cached vamana readers + std::vector vamana_readers(nshards); + for (size_t i = 0; i < nshards; i++) + { + vamana_readers[i].open(vamana_names[i], BUFFER_SIZE_FOR_CACHED_IO); + size_t expected_file_size; + vamana_readers[i].read((char *)&expected_file_size, sizeof(uint64_t)); + } + + size_t vamana_metadata_size = + sizeof(uint64_t) + sizeof(uint32_t) + sizeof(uint32_t) + sizeof(uint64_t); // expected file size + max degree + + // medoid_id + frozen_point info + + // create cached vamana writers + cached_ofstream merged_vamana_writer(output_vamana, BUFFER_SIZE_FOR_CACHED_IO); + + size_t merged_index_size = vamana_metadata_size; // we initialize the size of the merged index to + // the metadata size + size_t merged_index_frozen = 0; + merged_vamana_writer.write((char *)&merged_index_size, + sizeof(uint64_t)); // we will overwrite the index size at the end + + uint32_t output_width = max_degree; + uint32_t max_input_width = 0; + // read width from each vamana to advance buffer by sizeof(uint32_t) bytes + for (auto &reader : vamana_readers) + { + uint32_t input_width; + reader.read((char *)&input_width, sizeof(uint32_t)); + max_input_width = input_width > max_input_width ? input_width : max_input_width; + } + + diskann::cout << "Max input width: " << max_input_width << ", output width: " << output_width << std::endl; + + merged_vamana_writer.write((char *)&output_width, sizeof(uint32_t)); + std::ofstream medoid_writer(medoids_file.c_str(), std::ios::binary); + uint32_t nshards_u32 = (uint32_t)nshards; + uint32_t one_val = 1; + medoid_writer.write((char *)&nshards_u32, sizeof(uint32_t)); + medoid_writer.write((char *)&one_val, sizeof(uint32_t)); + + uint64_t vamana_index_frozen = 0; // as of now the functionality to merge many overlapping vamana + // indices is supported only for bulk indices without frozen point. + // Hence the final index will also not have any frozen points. + for (uint64_t shard = 0; shard < nshards; shard++) + { + uint32_t medoid; + // read medoid + vamana_readers[shard].read((char *)&medoid, sizeof(uint32_t)); + vamana_readers[shard].read((char *)&vamana_index_frozen, sizeof(uint64_t)); + assert(vamana_index_frozen == false); + // rename medoid + medoid = idmaps[shard][medoid]; + + medoid_writer.write((char *)&medoid, sizeof(uint32_t)); + // write renamed medoid + if (shard == (nshards - 1)) //--> uncomment if running hierarchical + merged_vamana_writer.write((char *)&medoid, sizeof(uint32_t)); + } + merged_vamana_writer.write((char *)&merged_index_frozen, sizeof(uint64_t)); + medoid_writer.close(); + + diskann::cout << "Starting merge" << std::endl; + + // Gopal. random_shuffle() is deprecated. + std::random_device rng; + std::mt19937 urng(rng()); + + std::vector nhood_set(nnodes, 0); + std::vector final_nhood; + + uint32_t nnbrs = 0, shard_nnbrs = 0; + uint32_t cur_id = 0; + for (const auto &id_shard : node_shard) + { + uint32_t node_id = id_shard.first; + uint32_t shard_id = id_shard.second; + if (cur_id < node_id) + { + // Gopal. random_shuffle() is deprecated. + std::shuffle(final_nhood.begin(), final_nhood.end(), urng); + nnbrs = (uint32_t)(std::min)(final_nhood.size(), (size_t)max_degree); + // write into merged ofstream + merged_vamana_writer.write((char *)&nnbrs, sizeof(uint32_t)); + merged_vamana_writer.write((char *)final_nhood.data(), nnbrs * sizeof(uint32_t)); + merged_index_size += (sizeof(uint32_t) + nnbrs * sizeof(uint32_t)); + if (cur_id % 499999 == 1) + { + diskann::cout << "." << std::flush; + } + cur_id = node_id; + nnbrs = 0; + for (auto &p : final_nhood) + nhood_set[p] = 0; + final_nhood.clear(); + } + // read from shard_id ifstream + vamana_readers[shard_id].read((char *)&shard_nnbrs, sizeof(uint32_t)); + + if (shard_nnbrs == 0) + { + diskann::cout << "WARNING: shard #" << shard_id << ", node_id " << node_id << " has 0 nbrs" << std::endl; + } + + std::vector shard_nhood(shard_nnbrs); + if (shard_nnbrs > 0) + vamana_readers[shard_id].read((char *)shard_nhood.data(), shard_nnbrs * sizeof(uint32_t)); + // rename nodes + for (uint64_t j = 0; j < shard_nnbrs; j++) + { + if (nhood_set[idmaps[shard_id][shard_nhood[j]]] == 0) + { + nhood_set[idmaps[shard_id][shard_nhood[j]]] = 1; + final_nhood.emplace_back(idmaps[shard_id][shard_nhood[j]]); + } + } + } + + // Gopal. random_shuffle() is deprecated. + std::shuffle(final_nhood.begin(), final_nhood.end(), urng); + nnbrs = (uint32_t)(std::min)(final_nhood.size(), (size_t)max_degree); + // write into merged ofstream + merged_vamana_writer.write((char *)&nnbrs, sizeof(uint32_t)); + if (nnbrs > 0) + { + merged_vamana_writer.write((char *)final_nhood.data(), nnbrs * sizeof(uint32_t)); + } + merged_index_size += (sizeof(uint32_t) + nnbrs * sizeof(uint32_t)); + for (auto &p : final_nhood) + nhood_set[p] = 0; + final_nhood.clear(); + + diskann::cout << "Expected size: " << merged_index_size << std::endl; + + merged_vamana_writer.reset(); + merged_vamana_writer.write((char *)&merged_index_size, sizeof(uint64_t)); + + diskann::cout << "Finished merge" << std::endl; + return 0; +} + +// TODO: Make this a streaming implementation to avoid exceeding the memory +// budget +/* If the number of filters per point N exceeds the graph degree R, + then it is difficult to have edges to all labels from this point. + This function break up such dense points to have only a threshold of maximum + T labels per point It divides one graph nodes to multiple nodes and append + the new nodes at the end. The dummy map contains the real graph id of the + new nodes added to the graph */ +template +void breakup_dense_points(const std::string data_file, const std::string labels_file, uint32_t density, + const std::string out_data_file, const std::string out_labels_file, + const std::string out_metadata_file) +{ + std::string token, line; + std::ifstream labels_stream(labels_file); + T *data; + size_t npts, ndims; + diskann::load_bin(data_file, data, npts, ndims); + + std::unordered_map dummy_pt_ids; + uint32_t next_dummy_id = (uint32_t)npts; + + uint32_t point_cnt = 0; + + std::vector> labels_per_point; + labels_per_point.resize(npts); + + uint32_t dense_pts = 0; + if (labels_stream.is_open()) + { + while (getline(labels_stream, line)) + { + std::stringstream iss(line); + uint32_t lbl_cnt = 0; + uint32_t label_host = point_cnt; + while (getline(iss, token, ',')) + { + if (lbl_cnt == density) + { + if (label_host == point_cnt) + dense_pts++; + label_host = next_dummy_id; + labels_per_point.resize(next_dummy_id + 1); + dummy_pt_ids[next_dummy_id] = (uint32_t)point_cnt; + next_dummy_id++; + lbl_cnt = 0; + } + token.erase(std::remove(token.begin(), token.end(), '\n'), token.end()); + token.erase(std::remove(token.begin(), token.end(), '\r'), token.end()); + uint32_t token_as_num = std::stoul(token); + labels_per_point[label_host].push_back(token_as_num); + lbl_cnt++; + } + point_cnt++; + } + } + diskann::cout << "fraction of dense points with >= " << density << " labels = " << (float)dense_pts / (float)npts + << std::endl; + + if (labels_per_point.size() != 0) + { + diskann::cout << labels_per_point.size() << " is the new number of points" << std::endl; + std::ofstream label_writer(out_labels_file); + assert(label_writer.is_open()); + for (uint32_t i = 0; i < labels_per_point.size(); i++) + { + for (uint32_t j = 0; j < (labels_per_point[i].size() - 1); j++) + { + label_writer << labels_per_point[i][j] << ","; + } + if (labels_per_point[i].size() != 0) + label_writer << labels_per_point[i][labels_per_point[i].size() - 1]; + label_writer << std::endl; + } + label_writer.close(); + } + + if (dummy_pt_ids.size() != 0) + { + diskann::cout << dummy_pt_ids.size() << " is the number of dummy points created" << std::endl; + + T *ptr = (T *)std::realloc((void *)data, labels_per_point.size() * ndims * sizeof(T)); + if (ptr == nullptr) + { + diskann::cerr << "Realloc failed while creating dummy points" << std::endl; + free(data); + data = nullptr; + throw new diskann::ANNException("Realloc failed while expanding data.", -1, __FUNCTION__, __FILE__, + __LINE__); + } + else + { + data = ptr; + } + + std::ofstream dummy_writer(out_metadata_file); + assert(dummy_writer.is_open()); + for (auto i = dummy_pt_ids.begin(); i != dummy_pt_ids.end(); i++) + { + dummy_writer << i->first << "," << i->second << std::endl; + std::memcpy(data + i->first * ndims, data + i->second * ndims, ndims * sizeof(T)); + } + dummy_writer.close(); + } + + diskann::save_bin(out_data_file, data, labels_per_point.size(), ndims); +} + +void extract_shard_labels(const std::string &in_label_file, const std::string &shard_ids_bin, + const std::string &shard_label_file) +{ // assumes ith row is for ith + // point in labels file + diskann::cout << "Extracting labels for shard" << std::endl; + + uint32_t *ids = nullptr; + size_t num_ids, tmp_dim; + diskann::load_bin(shard_ids_bin, ids, num_ids, tmp_dim); + + uint32_t counter = 0, shard_counter = 0; + std::string cur_line; + + std::ifstream label_reader(in_label_file); + std::ofstream label_writer(shard_label_file); + assert(label_reader.is_open()); + assert(label_reader.is_open()); + if (label_reader && label_writer) + { + while (std::getline(label_reader, cur_line)) + { + if (shard_counter >= num_ids) + { + break; + } + if (counter == ids[shard_counter]) + { + label_writer << cur_line << "\n"; + shard_counter++; + } + counter++; + } + } + if (ids != nullptr) + delete[] ids; +} + +template +int build_merged_vamana_index(std::string base_file, diskann::Metric compareMetric, uint32_t L, uint32_t R, + double sampling_rate, double ram_budget, std::string mem_index_path, + std::string medoids_file, std::string centroids_file, size_t build_pq_bytes, bool use_opq, + uint32_t num_threads, bool use_filters, const std::string &label_file, + const std::string &labels_to_medoids_file, const std::string &universal_label, + const uint32_t Lf) +{ + size_t base_num, base_dim; + diskann::get_bin_metadata(base_file, base_num, base_dim); + + double full_index_ram = estimate_ram_usage(base_num, (uint32_t)base_dim, sizeof(T), R); + + // TODO: Make this honest when there is filter support + if (full_index_ram < ram_budget * 1024 * 1024 * 1024) + { + diskann::cout << "Full index fits in RAM budget, should consume at most " + << full_index_ram / (1024 * 1024 * 1024) << "GiBs, so building in one shot" << std::endl; + + diskann::IndexWriteParameters paras = diskann::IndexWriteParametersBuilder(L, R) + .with_filter_list_size(Lf) + .with_saturate_graph(!use_filters) + .with_num_threads(num_threads) + .build(); + using TagT = uint32_t; + diskann::Index _index(compareMetric, base_dim, base_num, + std::make_shared(paras), nullptr, + defaults::NUM_FROZEN_POINTS_STATIC, false, false, false, + build_pq_bytes > 0, build_pq_bytes, use_opq, use_filters); + if (!use_filters) + _index.build(base_file.c_str(), base_num); + else + { + if (universal_label != "") + { // indicates no universal label + LabelT unv_label_as_num = 0; + _index.set_universal_label(unv_label_as_num); + } + _index.build_filtered_index(base_file.c_str(), label_file, base_num); + } + _index.save(mem_index_path.c_str()); + + if (use_filters) + { + // need to copy the labels_to_medoids file to the specified input + // file + std::remove(labels_to_medoids_file.c_str()); + std::string mem_labels_to_medoid_file = mem_index_path + "_labels_to_medoids.txt"; + copy_file(mem_labels_to_medoid_file, labels_to_medoids_file); + std::remove(mem_labels_to_medoid_file.c_str()); + } + + std::remove(medoids_file.c_str()); + std::remove(centroids_file.c_str()); + return 0; + } + + diskann::cout << "Full index does not fit in RAM budget, building in multiple shots" << std::endl; + + // where the universal label is to be saved in the final graph + std::string final_index_universal_label_file = mem_index_path + "_universal_label.txt"; + + std::string merged_index_prefix = mem_index_path + "_tempFiles"; + + Timer timer; + int num_parts = + partition_with_ram_budget(base_file, sampling_rate, ram_budget, 2 * R / 3, merged_index_prefix, 2); + diskann::cout << timer.elapsed_seconds_for_step("partitioning data ") << std::endl; + + std::string cur_centroid_filepath = merged_index_prefix + "_centroids.bin"; + std::rename(cur_centroid_filepath.c_str(), centroids_file.c_str()); + + timer.reset(); + for (int p = 0; p < num_parts; p++) + { +#if defined(DISKANN_RELEASE_UNUSED_TCMALLOC_MEMORY_AT_CHECKPOINTS) && defined(DISKANN_BUILD) + MallocExtension::instance()->ReleaseFreeMemory(); +#endif + + std::string shard_base_file = merged_index_prefix + "_subshard-" + std::to_string(p) + ".bin"; + + std::string shard_ids_file = merged_index_prefix + "_subshard-" + std::to_string(p) + "_ids_uint32.bin"; + + std::string shard_labels_file = merged_index_prefix + "_subshard-" + std::to_string(p) + "_labels.txt"; + + retrieve_shard_data_from_ids(base_file, shard_ids_file, shard_base_file); + + std::string shard_index_file = merged_index_prefix + "_subshard-" + std::to_string(p) + "_mem.index"; + + diskann::IndexWriteParameters low_degree_params = diskann::IndexWriteParametersBuilder(L, 2 * R / 3) + .with_filter_list_size(Lf) + .with_saturate_graph(false) + .with_num_threads(num_threads) + .build(); + + size_t shard_base_dim, shard_base_pts; + get_bin_metadata(shard_base_file, shard_base_pts, shard_base_dim); + + diskann::Index _index(compareMetric, shard_base_dim, shard_base_pts, + std::make_shared(low_degree_params), nullptr, + defaults::NUM_FROZEN_POINTS_STATIC, false, false, false, build_pq_bytes > 0, + build_pq_bytes, use_opq); + if (!use_filters) + { + _index.build(shard_base_file.c_str(), shard_base_pts); + } + else + { + diskann::extract_shard_labels(label_file, shard_ids_file, shard_labels_file); + if (universal_label != "") + { // indicates no universal label + LabelT unv_label_as_num = 0; + _index.set_universal_label(unv_label_as_num); + } + _index.build_filtered_index(shard_base_file.c_str(), shard_labels_file, shard_base_pts); + } + + // cal deg stats + size_t max_deg = 0, min_deg = SIZE_MAX, avg_deg = 0, cnt_deg = 0; + _index.get_degree_stats(max_deg, min_deg, avg_deg, cnt_deg); + std::cout << "! For shard " << p << " Degree stats: " << max_deg << ", " << min_deg << ", " << avg_deg << ", " + << cnt_deg << std::endl; + std::string shard_degree_stats_file = shard_index_file + "_degree_stats.txt"; + _index.dump_degree_stats(shard_degree_stats_file); + + _index.save(shard_index_file.c_str()); + // copy universal label file from first shard to the final destination + // index, since all shards anyway share the universal label + if (p == 0) + { + std::string shard_universal_label_file = shard_index_file + "_universal_label.txt"; + if (universal_label != "") + { + copy_file(shard_universal_label_file, final_index_universal_label_file); + } + } + + std::remove(shard_base_file.c_str()); + } + diskann::cout << timer.elapsed_seconds_for_step("building indices on shards") << std::endl; + + timer.reset(); + diskann::merge_shards(merged_index_prefix + "_subshard-", "_mem.index", merged_index_prefix + "_subshard-", + "_ids_uint32.bin", num_parts, R, mem_index_path, medoids_file, use_filters, + labels_to_medoids_file); + diskann::cout << timer.elapsed_seconds_for_step("merging indices") << std::endl; + + // delete tempFiles + for (int p = 0; p < num_parts; p++) + { + std::string shard_base_file = merged_index_prefix + "_subshard-" + std::to_string(p) + ".bin"; + std::string shard_id_file = merged_index_prefix + "_subshard-" + std::to_string(p) + "_ids_uint32.bin"; + std::string shard_labels_file = merged_index_prefix + "_subshard-" + std::to_string(p) + "_labels.txt"; + std::string shard_index_file = merged_index_prefix + "_subshard-" + std::to_string(p) + "_mem.index"; + std::string shard_index_file_data = shard_index_file + ".data"; + + // std::remove(shard_base_file.c_str()); + // std::remove(shard_id_file.c_str()); + // std::remove(shard_index_file.c_str()); + // std::remove(shard_index_file_data.c_str()); + if (use_filters) + { + std::string shard_index_label_file = shard_index_file + "_labels.txt"; + std::string shard_index_univ_label_file = shard_index_file + "_universal_label.txt"; + std::string shard_index_label_map_file = shard_index_file + "_labels_to_medoids.txt"; + std::remove(shard_labels_file.c_str()); + std::remove(shard_index_label_file.c_str()); + std::remove(shard_index_label_map_file.c_str()); + std::remove(shard_index_univ_label_file.c_str()); + } + } + return 0; +} + +// General purpose support for DiskANN interface + +// optimizes the beamwidth to maximize QPS for a given L_search subject to +// 99.9 latency not blowing up +template +uint32_t optimize_beamwidth(std::unique_ptr> &pFlashIndex, T *tuning_sample, + uint64_t tuning_sample_num, uint64_t tuning_sample_aligned_dim, uint32_t L, + uint32_t nthreads, uint32_t start_bw) +{ + uint32_t cur_bw = start_bw; + double max_qps = 0; + uint32_t best_bw = start_bw; + bool stop_flag = false; + + while (!stop_flag) + { + std::vector tuning_sample_result_ids_64(tuning_sample_num, 0); + std::vector tuning_sample_result_dists(tuning_sample_num, 0); + diskann::QueryStats *stats = new diskann::QueryStats[tuning_sample_num]; + + auto s = std::chrono::high_resolution_clock::now(); +#pragma omp parallel for schedule(dynamic, 1) num_threads(nthreads) + for (int64_t i = 0; i < (int64_t)tuning_sample_num; i++) + { + pFlashIndex->cached_beam_search(tuning_sample + (i * tuning_sample_aligned_dim), 1, L, + tuning_sample_result_ids_64.data() + (i * 1), + tuning_sample_result_dists.data() + (i * 1), cur_bw, false, stats + i); + } + auto e = std::chrono::high_resolution_clock::now(); + std::chrono::duration diff = e - s; + double qps = (1.0f * (float)tuning_sample_num) / (1.0f * (float)diff.count()); + + double lat_999 = diskann::get_percentile_stats( + stats, tuning_sample_num, 0.999f, [](const diskann::QueryStats &stats) { return stats.total_us; }); + + double mean_latency = diskann::get_mean_stats( + stats, tuning_sample_num, [](const diskann::QueryStats &stats) { return stats.total_us; }); + + if (qps > max_qps && lat_999 < (15000) + mean_latency * 2) + { + max_qps = qps; + best_bw = cur_bw; + cur_bw = (uint32_t)(std::ceil)((float)cur_bw * 1.1f); + } + else + { + stop_flag = true; + } + if (cur_bw > 64) + stop_flag = true; + + delete[] stats; + } + return best_bw; +} + +template +void create_disk_layout(const std::string base_file, const std::string mem_index_file, const std::string output_file, + const std::string reorder_data_file) +{ + uint32_t npts, ndims; + + // amount to read or write in one shot + size_t read_blk_size = 64 * 1024 * 1024; + size_t write_blk_size = read_blk_size; + cached_ifstream base_reader(base_file, read_blk_size); + base_reader.read((char *)&npts, sizeof(uint32_t)); + base_reader.read((char *)&ndims, sizeof(uint32_t)); + + size_t npts_64, ndims_64; + npts_64 = npts; + ndims_64 = ndims; + + // Check if we need to append data for re-ordering + bool append_reorder_data = false; + std::ifstream reorder_data_reader; + + uint32_t npts_reorder_file = 0, ndims_reorder_file = 0; + if (reorder_data_file != std::string("")) + { + append_reorder_data = true; + size_t reorder_data_file_size = get_file_size(reorder_data_file); + reorder_data_reader.exceptions(std::ofstream::failbit | std::ofstream::badbit); + + try + { + reorder_data_reader.open(reorder_data_file, std::ios::binary); + reorder_data_reader.read((char *)&npts_reorder_file, sizeof(uint32_t)); + reorder_data_reader.read((char *)&ndims_reorder_file, sizeof(uint32_t)); + if (npts_reorder_file != npts) + throw ANNException("Mismatch in num_points between reorder " + "data file and base file", + -1, __FUNCSIG__, __FILE__, __LINE__); + if (reorder_data_file_size != 8 + sizeof(float) * (size_t)npts_reorder_file * (size_t)ndims_reorder_file) + throw ANNException("Discrepancy in reorder data file size ", -1, __FUNCSIG__, __FILE__, __LINE__); + } + catch (std::system_error &e) + { + throw FileException(reorder_data_file, e, __FUNCSIG__, __FILE__, __LINE__); + } + } + + // create cached reader + writer + size_t actual_file_size = get_file_size(mem_index_file); + diskann::cout << "Vamana index file size=" << actual_file_size << std::endl; + std::ifstream vamana_reader(mem_index_file, std::ios::binary); + cached_ofstream diskann_writer(output_file, write_blk_size); + + // metadata: width, medoid + uint32_t width_u32, medoid_u32; + size_t index_file_size; + + vamana_reader.read((char *)&index_file_size, sizeof(uint64_t)); + if (index_file_size != actual_file_size) + { + std::stringstream stream; + stream << "Vamana Index file size does not match expected size per " + "meta-data." + << " file size from file: " << index_file_size << " actual file size: " << actual_file_size << std::endl; + + throw diskann::ANNException(stream.str(), -1, __FUNCSIG__, __FILE__, __LINE__); + } + uint64_t vamana_frozen_num = false, vamana_frozen_loc = 0; + + vamana_reader.read((char *)&width_u32, sizeof(uint32_t)); + vamana_reader.read((char *)&medoid_u32, sizeof(uint32_t)); + vamana_reader.read((char *)&vamana_frozen_num, sizeof(uint64_t)); + // compute + uint64_t medoid, max_node_len, nnodes_per_sector; + npts_64 = (uint64_t)npts; + medoid = (uint64_t)medoid_u32; + if (vamana_frozen_num == 1) + vamana_frozen_loc = medoid; + max_node_len = (((uint64_t)width_u32 + 1) * sizeof(uint32_t)) + (ndims_64 * sizeof(T)); + nnodes_per_sector = defaults::SECTOR_LEN / max_node_len; // 0 if max_node_len > SECTOR_LEN + + diskann::cout << "medoid: " << medoid << "B" << std::endl; + diskann::cout << "max_node_len: " << max_node_len << "B" << std::endl; + diskann::cout << "nnodes_per_sector: " << nnodes_per_sector << "B" << std::endl; + + // defaults::SECTOR_LEN buffer for each sector + std::unique_ptr sector_buf = std::make_unique(defaults::SECTOR_LEN); + std::unique_ptr multisector_buf = std::make_unique(ROUND_UP(max_node_len, defaults::SECTOR_LEN)); + std::unique_ptr node_buf = std::make_unique(max_node_len); + uint32_t &nnbrs = *(uint32_t *)(node_buf.get() + ndims_64 * sizeof(T)); + uint32_t *nhood_buf = (uint32_t *)(node_buf.get() + (ndims_64 * sizeof(T)) + sizeof(uint32_t)); + + // number of sectors (1 for meta data) + uint64_t n_sectors = nnodes_per_sector > 0 ? ROUND_UP(npts_64, nnodes_per_sector) / nnodes_per_sector + : npts_64 * DIV_ROUND_UP(max_node_len, defaults::SECTOR_LEN); + uint64_t n_reorder_sectors = 0; + uint64_t n_data_nodes_per_sector = 0; + + if (append_reorder_data) + { + n_data_nodes_per_sector = defaults::SECTOR_LEN / (ndims_reorder_file * sizeof(float)); + n_reorder_sectors = ROUND_UP(npts_64, n_data_nodes_per_sector) / n_data_nodes_per_sector; + } + uint64_t disk_index_file_size = (n_sectors + n_reorder_sectors + 1) * defaults::SECTOR_LEN; + + std::vector output_file_meta; + output_file_meta.push_back(npts_64); + output_file_meta.push_back(ndims_64); + output_file_meta.push_back(medoid); + output_file_meta.push_back(max_node_len); + output_file_meta.push_back(nnodes_per_sector); + output_file_meta.push_back(vamana_frozen_num); + output_file_meta.push_back(vamana_frozen_loc); + output_file_meta.push_back((uint64_t)append_reorder_data); + if (append_reorder_data) + { + output_file_meta.push_back(n_sectors + 1); + output_file_meta.push_back(ndims_reorder_file); + output_file_meta.push_back(n_data_nodes_per_sector); + } + output_file_meta.push_back(disk_index_file_size); + + diskann_writer.write(sector_buf.get(), defaults::SECTOR_LEN); + + std::unique_ptr cur_node_coords = std::make_unique(ndims_64); + diskann::cout << "# sectors: " << n_sectors << std::endl; + uint64_t cur_node_id = 0; + + if (nnodes_per_sector > 0) + { // Write multiple nodes per sector + for (uint64_t sector = 0; sector < n_sectors; sector++) + { + if (sector % 100000 == 0) + { + diskann::cout << "Sector #" << sector << "written" << std::endl; + } + memset(sector_buf.get(), 0, defaults::SECTOR_LEN); + for (uint64_t sector_node_id = 0; sector_node_id < nnodes_per_sector && cur_node_id < npts_64; + sector_node_id++) + { + memset(node_buf.get(), 0, max_node_len); + // read cur node's nnbrs + vamana_reader.read((char *)&nnbrs, sizeof(uint32_t)); + + // sanity checks on nnbrs + assert(nnbrs > 0); + assert(nnbrs <= width_u32); + + // read node's nhood + vamana_reader.read((char *)nhood_buf, (std::min)(nnbrs, width_u32) * sizeof(uint32_t)); + if (nnbrs > width_u32) + { + vamana_reader.seekg((nnbrs - width_u32) * sizeof(uint32_t), vamana_reader.cur); + } + + // write coords of node first + // T *node_coords = data + ((uint64_t) ndims_64 * cur_node_id); + base_reader.read((char *)cur_node_coords.get(), sizeof(T) * ndims_64); + memcpy(node_buf.get(), cur_node_coords.get(), ndims_64 * sizeof(T)); + + // write nnbrs + *(uint32_t *)(node_buf.get() + ndims_64 * sizeof(T)) = (std::min)(nnbrs, width_u32); + + // write nhood next + memcpy(node_buf.get() + ndims_64 * sizeof(T) + sizeof(uint32_t), nhood_buf, + (std::min)(nnbrs, width_u32) * sizeof(uint32_t)); + + // get offset into sector_buf + char *sector_node_buf = sector_buf.get() + (sector_node_id * max_node_len); + + // copy node buf into sector_node_buf + memcpy(sector_node_buf, node_buf.get(), max_node_len); + cur_node_id++; + } + // flush sector to disk + diskann_writer.write(sector_buf.get(), defaults::SECTOR_LEN); + } + } + else + { // Write multi-sector nodes + uint64_t nsectors_per_node = DIV_ROUND_UP(max_node_len, defaults::SECTOR_LEN); + for (uint64_t i = 0; i < npts_64; i++) + { + if ((i * nsectors_per_node) % 100000 == 0) + { + diskann::cout << "Sector #" << i * nsectors_per_node << "written" << std::endl; + } + memset(multisector_buf.get(), 0, nsectors_per_node * defaults::SECTOR_LEN); + + memset(node_buf.get(), 0, max_node_len); + // read cur node's nnbrs + vamana_reader.read((char *)&nnbrs, sizeof(uint32_t)); + + // sanity checks on nnbrs + assert(nnbrs > 0); + assert(nnbrs <= width_u32); + + // read node's nhood + vamana_reader.read((char *)nhood_buf, (std::min)(nnbrs, width_u32) * sizeof(uint32_t)); + if (nnbrs > width_u32) + { + vamana_reader.seekg((nnbrs - width_u32) * sizeof(uint32_t), vamana_reader.cur); + } + + // write coords of node first + // T *node_coords = data + ((uint64_t) ndims_64 * cur_node_id); + base_reader.read((char *)cur_node_coords.get(), sizeof(T) * ndims_64); + memcpy(multisector_buf.get(), cur_node_coords.get(), ndims_64 * sizeof(T)); + + // write nnbrs + *(uint32_t *)(multisector_buf.get() + ndims_64 * sizeof(T)) = (std::min)(nnbrs, width_u32); + + // write nhood next + memcpy(multisector_buf.get() + ndims_64 * sizeof(T) + sizeof(uint32_t), nhood_buf, + (std::min)(nnbrs, width_u32) * sizeof(uint32_t)); + + // flush sector to disk + diskann_writer.write(multisector_buf.get(), nsectors_per_node * defaults::SECTOR_LEN); + } + } + + if (append_reorder_data) + { + diskann::cout << "Index written. Appending reorder data..." << std::endl; + + auto vec_len = ndims_reorder_file * sizeof(float); + std::unique_ptr vec_buf = std::make_unique(vec_len); + + for (uint64_t sector = 0; sector < n_reorder_sectors; sector++) + { + if (sector % 100000 == 0) + { + diskann::cout << "Reorder data Sector #" << sector << "written" << std::endl; + } + + memset(sector_buf.get(), 0, defaults::SECTOR_LEN); + + for (uint64_t sector_node_id = 0; sector_node_id < n_data_nodes_per_sector && sector_node_id < npts_64; + sector_node_id++) + { + memset(vec_buf.get(), 0, vec_len); + reorder_data_reader.read(vec_buf.get(), vec_len); + + // copy node buf into sector_node_buf + memcpy(sector_buf.get() + (sector_node_id * vec_len), vec_buf.get(), vec_len); + } + // flush sector to disk + diskann_writer.write(sector_buf.get(), defaults::SECTOR_LEN); + } + } + diskann_writer.close(); + diskann::save_bin(output_file, output_file_meta.data(), output_file_meta.size(), 1, 0); + diskann::cout << "Output disk index file written to " << output_file << std::endl; +} + +template +int build_disk_index(const char *dataFilePath, const char *indexFilePath, const char *indexBuildParameters, + diskann::Metric compareMetric, bool use_opq, const std::string &codebook_prefix, bool use_filters, + const std::string &label_file, const std::string &universal_label, const uint32_t filter_threshold, + const uint32_t Lf) +{ + std::stringstream parser; + parser << std::string(indexBuildParameters); + std::string cur_param; + std::vector param_list; + while (parser >> cur_param) + { + param_list.push_back(cur_param); + } + if (param_list.size() < 5 || param_list.size() > 9) + { + diskann::cout << "Correct usage of parameters is R (max degree)\n" + "L (indexing list size, better if >= R)\n" + "B (RAM limit of final index in GB)\n" // search + "M (memory limit while indexing)\n" // build + "T (number of threads for indexing)\n" + "B' (PQ bytes for disk index: optional parameter for " + "very large dimensional data)\n" + "reorder (set true to include full precision in data file" + ": optional paramter, use only when using disk PQ\n" + "build_PQ_byte (number of PQ bytes for inde build; set 0 to use " + "full precision vectors)\n" + "QD Quantized Dimension to overwrite the derived dim from B " + << std::endl; + return -1; + } + + if (!std::is_same::value && + (compareMetric == diskann::Metric::INNER_PRODUCT || compareMetric == diskann::Metric::COSINE)) + { + std::stringstream stream; + stream << "Disk-index build currently only supports floating point data for Max " + "Inner Product Search/ cosine similarity. " + << std::endl; + throw diskann::ANNException(stream.str(), -1); + } + + size_t disk_pq_dims = 0; + bool use_disk_pq = false; + size_t build_pq_bytes = 0; + + // if there is a 6th parameter, it means we compress the disk index + // vectors also using PQ data (for very large dimensionality data). If the + // provided parameter is 0, it means we store full vectors. + if (param_list.size() > 5) + { + disk_pq_dims = atoi(param_list[5].c_str()); + use_disk_pq = true; + if (disk_pq_dims == 0) + use_disk_pq = false; + } + + bool reorder_data = false; + if (param_list.size() >= 7) + { + if (1 == atoi(param_list[6].c_str())) + { + reorder_data = true; + } + } + + if (param_list.size() >= 8) + { + build_pq_bytes = atoi(param_list[7].c_str()); + } + + std::string base_file(dataFilePath); + std::string data_file_to_use = base_file; + std::string labels_file_original = label_file; + std::string index_prefix_path(indexFilePath); + std::string labels_file_to_use = index_prefix_path + "_label_formatted.txt"; + std::string pq_pivots_path_base = codebook_prefix; + std::string pq_pivots_path = file_exists(pq_pivots_path_base) ? pq_pivots_path_base + "_pq_pivots.bin" + : index_prefix_path + "_pq_pivots.bin"; + std::string pq_compressed_vectors_path = index_prefix_path + "_pq_compressed.bin"; + std::string mem_index_path = index_prefix_path + "_mem.index"; + std::string disk_index_path = index_prefix_path + "_disk.index"; + std::string medoids_path = disk_index_path + "_medoids.bin"; + std::string centroids_path = disk_index_path + "_centroids.bin"; + + std::string labels_to_medoids_path = disk_index_path + "_labels_to_medoids.txt"; + std::string mem_labels_file = mem_index_path + "_labels.txt"; + std::string disk_labels_file = disk_index_path + "_labels.txt"; + std::string mem_univ_label_file = mem_index_path + "_universal_label.txt"; + std::string disk_univ_label_file = disk_index_path + "_universal_label.txt"; + std::string disk_labels_int_map_file = disk_index_path + "_labels_map.txt"; + std::string dummy_remap_file = disk_index_path + "_dummy_map.txt"; // remap will be used if we break-up points of + // high label-density to create copies + + std::string sample_base_prefix = index_prefix_path + "_sample"; + // optional, used if disk index file must store pq data + std::string disk_pq_pivots_path = index_prefix_path + "_disk.index_pq_pivots.bin"; + // optional, used if disk index must store pq data + std::string disk_pq_compressed_vectors_path = index_prefix_path + "_disk.index_pq_compressed.bin"; + std::string prepped_base = + index_prefix_path + + "_prepped_base.bin"; // temp file for storing pre-processed base file for cosine/ mips metrics + bool created_temp_file_for_processed_data = false; + + // output a new base file which contains extra dimension with sqrt(1 - + // ||x||^2/M^2) for every x, M is max norm of all points. Extra space on + // disk needed! + if (compareMetric == diskann::Metric::INNER_PRODUCT) + { + Timer timer; + std::cout << "Using Inner Product search, so need to pre-process base " + "data into temp file. Please ensure there is additional " + "(n*(d+1)*4) bytes for storing pre-processed base vectors, " + "apart from the interim indices created by DiskANN and the final index." + << std::endl; + data_file_to_use = prepped_base; + float max_norm_of_base = diskann::prepare_base_for_inner_products(base_file, prepped_base); + std::string norm_file = disk_index_path + "_max_base_norm.bin"; + + diskann::save_bin(norm_file, &max_norm_of_base, 1, 1); + diskann::cout << timer.elapsed_seconds_for_step("preprocessing data for inner product") << std::endl; + created_temp_file_for_processed_data = true; + + diskann::cout << "Reading max_norm_of_base from " << norm_file << std::endl; + float *max_norm_of_base_ptr; + size_t npts, ndims; + diskann::load_bin(norm_file, max_norm_of_base_ptr, npts, ndims); + if (max_norm_of_base != *max_norm_of_base_ptr) + { + diskann::cout << "max_norm_of_base mismatch: " << max_norm_of_base << " != " << *max_norm_of_base_ptr + << std::endl; + assert(false); + } + diskann::cout << "max_norm_of_base: " << max_norm_of_base << std::endl; + diskann::cout << "! Using prepped_base file at " << prepped_base << std::endl; + if (!file_exists(prepped_base)) + { + diskann::cout << "! prepped_base file does not exist, please check the file path" << std::endl; + assert(false); + } + } + else if (compareMetric == diskann::Metric::COSINE) + { + Timer timer; + std::cout << "Normalizing data for cosine to temporary file, please ensure there is additional " + "(n*d*4) bytes for storing normalized base vectors, " + "apart from the interim indices created by DiskANN and the final index." + << std::endl; + data_file_to_use = prepped_base; + diskann::normalize_data_file(base_file, prepped_base); + diskann::cout << timer.elapsed_seconds_for_step("preprocessing data for cosine") << std::endl; + created_temp_file_for_processed_data = true; + } + + uint32_t R = (uint32_t)atoi(param_list[0].c_str()); + uint32_t L = (uint32_t)atoi(param_list[1].c_str()); + + double final_index_ram_limit = get_memory_budget(param_list[2]); + if (final_index_ram_limit <= 0) + { + std::cerr << "Insufficient memory budget (or string was not in right " + "format). Should be > 0." + << std::endl; + return -1; + } + double indexing_ram_budget = (float)atof(param_list[3].c_str()); + if (indexing_ram_budget <= 0) + { + std::cerr << "Not building index. Please provide more RAM budget" << std::endl; + return -1; + } + uint32_t num_threads = (uint32_t)atoi(param_list[4].c_str()); + + if (num_threads != 0) + { + omp_set_num_threads(num_threads); +#ifdef __x86_64__ + mkl_set_num_threads(num_threads); +#endif + } + + diskann::cout << "Starting index build: R=" << R << " L=" << L << " Query RAM budget: " << final_index_ram_limit + << " Indexing ram budget: " << indexing_ram_budget << " T: " << num_threads << std::endl; + + auto s = std::chrono::high_resolution_clock::now(); + + // If there is filter support, we break-up points which have too many labels + // into replica dummy points which evenly distribute the filters. The rest + // of index build happens on the augmented base and labels + std::string augmented_data_file, augmented_labels_file; + if (use_filters) + { + convert_labels_string_to_int(labels_file_original, labels_file_to_use, disk_labels_int_map_file, + universal_label); + augmented_data_file = index_prefix_path + "_augmented_data.bin"; + augmented_labels_file = index_prefix_path + "_augmented_labels.txt"; + if (filter_threshold != 0) + { + breakup_dense_points(data_file_to_use, labels_file_to_use, filter_threshold, augmented_data_file, + augmented_labels_file, + dummy_remap_file); // RKNOTE: This has large memory footprint, + // need to make this streaming + data_file_to_use = augmented_data_file; + labels_file_to_use = augmented_labels_file; + } + } + + size_t points_num, dim; + + diskann::cout << "getting bin metadata" << std::endl; + Timer timer; + diskann::get_bin_metadata(data_file_to_use.c_str(), points_num, dim); + diskann::cout << timer.elapsed_seconds_for_step("getting bin metadata") << std::endl; + const double p_val = ((double)MAX_PQ_TRAINING_SET_SIZE / (double)points_num); + + if (use_disk_pq) + { + generate_disk_quantized_data(data_file_to_use, disk_pq_pivots_path, disk_pq_compressed_vectors_path, + compareMetric, p_val, disk_pq_dims); + } + size_t num_pq_chunks = (size_t)(std::floor)(uint64_t(final_index_ram_limit / points_num)); + + num_pq_chunks = num_pq_chunks <= 0 ? 1 : num_pq_chunks; + num_pq_chunks = num_pq_chunks > dim ? dim : num_pq_chunks; + num_pq_chunks = num_pq_chunks > MAX_PQ_CHUNKS ? MAX_PQ_CHUNKS : num_pq_chunks; + + if (param_list.size() >= 9 && atoi(param_list[8].c_str()) <= MAX_PQ_CHUNKS && atoi(param_list[8].c_str()) > 0) + { + std::cout << "Use quantized dimension (QD) to overwrite derived quantized " + "dimension from search_DRAM_budget (B)" + << std::endl; + num_pq_chunks = atoi(param_list[8].c_str()); + } + + diskann::cout << "Compressing " << dim << "-dimensional data into " << num_pq_chunks << " bytes per vector." + << std::endl; + + generate_quantized_data(data_file_to_use, pq_pivots_path, pq_compressed_vectors_path, compareMetric, p_val, + num_pq_chunks, use_opq, codebook_prefix); + diskann::cout << timer.elapsed_seconds_for_step("generating quantized data") << std::endl; + +// Gopal. Splitting diskann_dll into separate DLLs for search and build. +// This code should only be available in the "build" DLL. +#if defined(DISKANN_RELEASE_UNUSED_TCMALLOC_MEMORY_AT_CHECKPOINTS) && defined(DISKANN_BUILD) + MallocExtension::instance()->ReleaseFreeMemory(); +#endif + // Whether it is cosine or inner product, we still L2 metric due to the pre-processing. + timer.reset(); + diskann::build_merged_vamana_index(data_file_to_use.c_str(), diskann::Metric::L2, L, R, p_val, + indexing_ram_budget, mem_index_path, medoids_path, centroids_path, + build_pq_bytes, use_opq, num_threads, use_filters, labels_file_to_use, + labels_to_medoids_path, universal_label, Lf); + diskann::cout << timer.elapsed_seconds_for_step("building merged vamana index") << std::endl; + + timer.reset(); + if (!use_disk_pq) + { + diskann::create_disk_layout(data_file_to_use.c_str(), mem_index_path, disk_index_path); + } + else + { + if (!reorder_data) + diskann::create_disk_layout(disk_pq_compressed_vectors_path, mem_index_path, disk_index_path); + else + diskann::create_disk_layout(disk_pq_compressed_vectors_path, mem_index_path, disk_index_path, + data_file_to_use.c_str()); + } + diskann::cout << timer.elapsed_seconds_for_step("generating disk layout") << std::endl; + + double ten_percent_points = std::ceil(points_num * 0.1); + double num_sample_points = + ten_percent_points > MAX_SAMPLE_POINTS_FOR_WARMUP ? MAX_SAMPLE_POINTS_FOR_WARMUP : ten_percent_points; + double sample_sampling_rate = num_sample_points / points_num; + gen_random_slice(data_file_to_use.c_str(), sample_base_prefix, sample_sampling_rate); + if (use_filters) + { + copy_file(labels_file_to_use, disk_labels_file); + std::remove(mem_labels_file.c_str()); + if (universal_label != "") + { + copy_file(mem_univ_label_file, disk_univ_label_file); + std::remove(mem_univ_label_file.c_str()); + } + std::remove(augmented_data_file.c_str()); + std::remove(augmented_labels_file.c_str()); + std::remove(labels_file_to_use.c_str()); + } + if (created_temp_file_for_processed_data) + std::remove(prepped_base.c_str()); + std::remove(mem_index_path.c_str()); + std::remove((mem_index_path + ".data").c_str()); + std::remove((mem_index_path + ".tags").c_str()); + if (use_disk_pq) + std::remove(disk_pq_compressed_vectors_path.c_str()); + + auto e = std::chrono::high_resolution_clock::now(); + std::chrono::duration diff = e - s; + diskann::cout << "Indexing time: " << diff.count() << std::endl; + + return 0; +} + +template DISKANN_DLLEXPORT void create_disk_layout(const std::string base_file, + const std::string mem_index_file, + const std::string output_file, + const std::string reorder_data_file); +template DISKANN_DLLEXPORT void create_disk_layout(const std::string base_file, + const std::string mem_index_file, + const std::string output_file, + const std::string reorder_data_file); +template DISKANN_DLLEXPORT void create_disk_layout(const std::string base_file, const std::string mem_index_file, + const std::string output_file, + const std::string reorder_data_file); + +template DISKANN_DLLEXPORT int8_t *load_warmup(const std::string &cache_warmup_file, uint64_t &warmup_num, + uint64_t warmup_dim, uint64_t warmup_aligned_dim); +template DISKANN_DLLEXPORT uint8_t *load_warmup(const std::string &cache_warmup_file, uint64_t &warmup_num, + uint64_t warmup_dim, uint64_t warmup_aligned_dim); +template DISKANN_DLLEXPORT float *load_warmup(const std::string &cache_warmup_file, uint64_t &warmup_num, + uint64_t warmup_dim, uint64_t warmup_aligned_dim); + +#ifdef EXEC_ENV_OLS +template DISKANN_DLLEXPORT int8_t *load_warmup(MemoryMappedFiles &files, const std::string &cache_warmup_file, + uint64_t &warmup_num, uint64_t warmup_dim, + uint64_t warmup_aligned_dim); +template DISKANN_DLLEXPORT uint8_t *load_warmup(MemoryMappedFiles &files, const std::string &cache_warmup_file, + uint64_t &warmup_num, uint64_t warmup_dim, + uint64_t warmup_aligned_dim); +template DISKANN_DLLEXPORT float *load_warmup(MemoryMappedFiles &files, const std::string &cache_warmup_file, + uint64_t &warmup_num, uint64_t warmup_dim, + uint64_t warmup_aligned_dim); +#endif + +template DISKANN_DLLEXPORT uint32_t optimize_beamwidth( + std::unique_ptr> &pFlashIndex, int8_t *tuning_sample, + uint64_t tuning_sample_num, uint64_t tuning_sample_aligned_dim, uint32_t L, uint32_t nthreads, uint32_t start_bw); +template DISKANN_DLLEXPORT uint32_t optimize_beamwidth( + std::unique_ptr> &pFlashIndex, uint8_t *tuning_sample, + uint64_t tuning_sample_num, uint64_t tuning_sample_aligned_dim, uint32_t L, uint32_t nthreads, uint32_t start_bw); +template DISKANN_DLLEXPORT uint32_t optimize_beamwidth( + std::unique_ptr> &pFlashIndex, float *tuning_sample, + uint64_t tuning_sample_num, uint64_t tuning_sample_aligned_dim, uint32_t L, uint32_t nthreads, uint32_t start_bw); + +template DISKANN_DLLEXPORT uint32_t optimize_beamwidth( + std::unique_ptr> &pFlashIndex, int8_t *tuning_sample, + uint64_t tuning_sample_num, uint64_t tuning_sample_aligned_dim, uint32_t L, uint32_t nthreads, uint32_t start_bw); +template DISKANN_DLLEXPORT uint32_t optimize_beamwidth( + std::unique_ptr> &pFlashIndex, uint8_t *tuning_sample, + uint64_t tuning_sample_num, uint64_t tuning_sample_aligned_dim, uint32_t L, uint32_t nthreads, uint32_t start_bw); +template DISKANN_DLLEXPORT uint32_t optimize_beamwidth( + std::unique_ptr> &pFlashIndex, float *tuning_sample, + uint64_t tuning_sample_num, uint64_t tuning_sample_aligned_dim, uint32_t L, uint32_t nthreads, uint32_t start_bw); + +template DISKANN_DLLEXPORT int build_disk_index(const char *dataFilePath, const char *indexFilePath, + const char *indexBuildParameters, + diskann::Metric compareMetric, bool use_opq, + const std::string &codebook_prefix, bool use_filters, + const std::string &label_file, + const std::string &universal_label, + const uint32_t filter_threshold, const uint32_t Lf); +template DISKANN_DLLEXPORT int build_disk_index(const char *dataFilePath, const char *indexFilePath, + const char *indexBuildParameters, + diskann::Metric compareMetric, bool use_opq, + const std::string &codebook_prefix, bool use_filters, + const std::string &label_file, + const std::string &universal_label, + const uint32_t filter_threshold, const uint32_t Lf); +template DISKANN_DLLEXPORT int build_disk_index(const char *dataFilePath, const char *indexFilePath, + const char *indexBuildParameters, + diskann::Metric compareMetric, bool use_opq, + const std::string &codebook_prefix, bool use_filters, + const std::string &label_file, + const std::string &universal_label, + const uint32_t filter_threshold, const uint32_t Lf); +// LabelT = uint16 +template DISKANN_DLLEXPORT int build_disk_index(const char *dataFilePath, const char *indexFilePath, + const char *indexBuildParameters, + diskann::Metric compareMetric, bool use_opq, + const std::string &codebook_prefix, bool use_filters, + const std::string &label_file, + const std::string &universal_label, + const uint32_t filter_threshold, const uint32_t Lf); +template DISKANN_DLLEXPORT int build_disk_index(const char *dataFilePath, const char *indexFilePath, + const char *indexBuildParameters, + diskann::Metric compareMetric, bool use_opq, + const std::string &codebook_prefix, bool use_filters, + const std::string &label_file, + const std::string &universal_label, + const uint32_t filter_threshold, const uint32_t Lf); +template DISKANN_DLLEXPORT int build_disk_index(const char *dataFilePath, const char *indexFilePath, + const char *indexBuildParameters, + diskann::Metric compareMetric, bool use_opq, + const std::string &codebook_prefix, bool use_filters, + const std::string &label_file, + const std::string &universal_label, + const uint32_t filter_threshold, const uint32_t Lf); + +template DISKANN_DLLEXPORT int build_merged_vamana_index( + std::string base_file, diskann::Metric compareMetric, uint32_t L, uint32_t R, double sampling_rate, + double ram_budget, std::string mem_index_path, std::string medoids_path, std::string centroids_file, + size_t build_pq_bytes, bool use_opq, uint32_t num_threads, bool use_filters, const std::string &label_file, + const std::string &labels_to_medoids_file, const std::string &universal_label, const uint32_t Lf); +template DISKANN_DLLEXPORT int build_merged_vamana_index( + std::string base_file, diskann::Metric compareMetric, uint32_t L, uint32_t R, double sampling_rate, + double ram_budget, std::string mem_index_path, std::string medoids_path, std::string centroids_file, + size_t build_pq_bytes, bool use_opq, uint32_t num_threads, bool use_filters, const std::string &label_file, + const std::string &labels_to_medoids_file, const std::string &universal_label, const uint32_t Lf); +template DISKANN_DLLEXPORT int build_merged_vamana_index( + std::string base_file, diskann::Metric compareMetric, uint32_t L, uint32_t R, double sampling_rate, + double ram_budget, std::string mem_index_path, std::string medoids_path, std::string centroids_file, + size_t build_pq_bytes, bool use_opq, uint32_t num_threads, bool use_filters, const std::string &label_file, + const std::string &labels_to_medoids_file, const std::string &universal_label, const uint32_t Lf); +// Label=16_t +template DISKANN_DLLEXPORT int build_merged_vamana_index( + std::string base_file, diskann::Metric compareMetric, uint32_t L, uint32_t R, double sampling_rate, + double ram_budget, std::string mem_index_path, std::string medoids_path, std::string centroids_file, + size_t build_pq_bytes, bool use_opq, uint32_t num_threads, bool use_filters, const std::string &label_file, + const std::string &labels_to_medoids_file, const std::string &universal_label, const uint32_t Lf); +template DISKANN_DLLEXPORT int build_merged_vamana_index( + std::string base_file, diskann::Metric compareMetric, uint32_t L, uint32_t R, double sampling_rate, + double ram_budget, std::string mem_index_path, std::string medoids_path, std::string centroids_file, + size_t build_pq_bytes, bool use_opq, uint32_t num_threads, bool use_filters, const std::string &label_file, + const std::string &labels_to_medoids_file, const std::string &universal_label, const uint32_t Lf); +template DISKANN_DLLEXPORT int build_merged_vamana_index( + std::string base_file, diskann::Metric compareMetric, uint32_t L, uint32_t R, double sampling_rate, + double ram_budget, std::string mem_index_path, std::string medoids_path, std::string centroids_file, + size_t build_pq_bytes, bool use_opq, uint32_t num_threads, bool use_filters, const std::string &label_file, + const std::string &labels_to_medoids_file, const std::string &universal_label, const uint32_t Lf); +}; // namespace diskann diff --git a/packages/leann-backend-diskann/third_party/DiskANN/src/distance.cpp b/packages/leann-backend-diskann/third_party/DiskANN/src/distance.cpp new file mode 100644 index 0000000..2fa4c7a --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/src/distance.cpp @@ -0,0 +1,743 @@ +// TODO +// CHECK COSINE ON LINUX + +#ifdef _WINDOWS +#include +#include +#include +#include +#include +#include "simd_utils.h" +#elif __APPLE__ +#include +#else +#include +#include "simd_utils.h" +#endif + +#include + +#include "distance.h" +#include "utils.h" +#include "logger.h" +#include "ann_exception.h" + +namespace diskann +{ + +// +// Base Class Implementatons +// +template +float Distance::compare(const T *a, const T *b, const float normA, const float normB, uint32_t length) const +{ + throw std::logic_error("This function is not implemented."); +} + +template uint32_t Distance::post_normalization_dimension(uint32_t orig_dimension) const +{ + return orig_dimension; +} + +template diskann::Metric Distance::get_metric() const +{ + return _distance_metric; +} + +template bool Distance::preprocessing_required() const +{ + return false; +} + +template +void Distance::preprocess_base_points(T *original_data, const size_t orig_dim, const size_t num_points) +{ +} + +template void Distance::preprocess_query(const T *query_vec, const size_t query_dim, T *scratch_query) +{ + std::memcpy(scratch_query, query_vec, query_dim * sizeof(T)); +} + +template size_t Distance::get_required_alignment() const +{ + return _alignment_factor; +} + +// +// Cosine distance functions. +// + +float DistanceCosineInt8::compare(const int8_t *a, const int8_t *b, uint32_t length) const +{ +#ifdef _WINDOWS + return diskann::CosineSimilarity2(a, b, length); +#else + int magA = 0, magB = 0, scalarProduct = 0; + for (uint32_t i = 0; i < length; i++) + { + magA += ((int32_t)a[i]) * ((int32_t)a[i]); + magB += ((int32_t)b[i]) * ((int32_t)b[i]); + scalarProduct += ((int32_t)a[i]) * ((int32_t)b[i]); + } + // similarity == 1-cosine distance + return 1.0f - (float)(scalarProduct / (sqrt(magA) * sqrt(magB))); +#endif +} + +float DistanceCosineFloat::compare(const float *a, const float *b, uint32_t length) const +{ +#ifdef _WINDOWS + return diskann::CosineSimilarity2(a, b, length); +#else + float magA = 0, magB = 0, scalarProduct = 0; + for (uint32_t i = 0; i < length; i++) + { + magA += (a[i]) * (a[i]); + magB += (b[i]) * (b[i]); + scalarProduct += (a[i]) * (b[i]); + } + // similarity == 1-cosine distance + return 1.0f - (scalarProduct / (sqrt(magA) * sqrt(magB))); +#endif +} + +float SlowDistanceCosineUInt8::compare(const uint8_t *a, const uint8_t *b, uint32_t length) const +{ + int magA = 0, magB = 0, scalarProduct = 0; + for (uint32_t i = 0; i < length; i++) + { + magA += ((uint32_t)a[i]) * ((uint32_t)a[i]); + magB += ((uint32_t)b[i]) * ((uint32_t)b[i]); + scalarProduct += ((uint32_t)a[i]) * ((uint32_t)b[i]); + } + // similarity == 1-cosine distance + return 1.0f - (float)(scalarProduct / (sqrt(magA) * sqrt(magB))); +} + +// +// L2 distance functions. +// + +float DistanceL2Int8::compare(const int8_t *a, const int8_t *b, uint32_t size) const +{ +#ifdef _WINDOWS +#ifdef USE_AVX2 + __m256 r = _mm256_setzero_ps(); + char *pX = (char *)a, *pY = (char *)b; + while (size >= 32) + { + __m256i r1 = _mm256_subs_epi8(_mm256_loadu_si256((__m256i *)pX), _mm256_loadu_si256((__m256i *)pY)); + r = _mm256_add_ps(r, _mm256_mul_epi8(r1, r1)); + pX += 32; + pY += 32; + size -= 32; + } + while (size > 0) + { + __m128i r2 = _mm_subs_epi8(_mm_loadu_si128((__m128i *)pX), _mm_loadu_si128((__m128i *)pY)); + r = _mm256_add_ps(r, _mm256_mul32_pi8(r2, r2)); + pX += 4; + pY += 4; + size -= 4; + } + r = _mm256_hadd_ps(_mm256_hadd_ps(r, r), r); + return r.m256_f32[0] + r.m256_f32[4]; +#else + int32_t result = 0; +#pragma omp simd reduction(+ : result) aligned(a, b : 8) + for (int32_t i = 0; i < (int32_t)size; i++) + { + result += ((int32_t)((int16_t)a[i] - (int16_t)b[i])) * ((int32_t)((int16_t)a[i] - (int16_t)b[i])); + } + return (float)result; +#endif +#else + int32_t result = 0; +#pragma omp simd reduction(+ : result) aligned(a, b : 8) + for (int32_t i = 0; i < (int32_t)size; i++) + { + result += ((int32_t)((int16_t)a[i] - (int16_t)b[i])) * ((int32_t)((int16_t)a[i] - (int16_t)b[i])); + } + return (float)result; +#endif +} + +float DistanceL2UInt8::compare(const uint8_t *a, const uint8_t *b, uint32_t size) const +{ + uint32_t result = 0; +#ifndef _WINDOWS +#pragma omp simd reduction(+ : result) aligned(a, b : 8) +#endif + for (int32_t i = 0; i < (int32_t)size; i++) + { + result += ((int32_t)((int16_t)a[i] - (int16_t)b[i])) * ((int32_t)((int16_t)a[i] - (int16_t)b[i])); + } + return (float)result; +} + +#ifndef _WINDOWS +float DistanceL2Float::compare(const float *a, const float *b, uint32_t size) const +{ + a = (const float *)__builtin_assume_aligned(a, 32); + b = (const float *)__builtin_assume_aligned(b, 32); +#else +float DistanceL2Float::compare(const float *a, const float *b, uint32_t size) const +{ +#endif + + float result = 0; +#ifdef USE_AVX2 + // assume size is divisible by 8 + uint16_t niters = (uint16_t)(size / 8); + __m256 sum = _mm256_setzero_ps(); + for (uint16_t j = 0; j < niters; j++) + { + // scope is a[8j:8j+7], b[8j:8j+7] + // load a_vec + if (j < (niters - 1)) + { + _mm_prefetch((char *)(a + 8 * (j + 1)), _MM_HINT_T0); + _mm_prefetch((char *)(b + 8 * (j + 1)), _MM_HINT_T0); + } + __m256 a_vec = _mm256_load_ps(a + 8 * j); + // load b_vec + __m256 b_vec = _mm256_load_ps(b + 8 * j); + // a_vec - b_vec + __m256 tmp_vec = _mm256_sub_ps(a_vec, b_vec); + + sum = _mm256_fmadd_ps(tmp_vec, tmp_vec, sum); + } + + // horizontal add sum + result = _mm256_reduce_add_ps(sum); +#else +#ifndef _WINDOWS +#pragma omp simd reduction(+ : result) aligned(a, b : 32) +#endif + for (int32_t i = 0; i < (int32_t)size; i++) + { + result += (a[i] - b[i]) * (a[i] - b[i]); + } +#endif + return result; +} + +template float SlowDistanceL2::compare(const T *a, const T *b, uint32_t length) const +{ + float result = 0.0f; + for (uint32_t i = 0; i < length; i++) + { + result += ((float)(a[i] - b[i])) * (a[i] - b[i]); + } + return result; +} + +#ifdef _WINDOWS +float AVXDistanceL2Int8::compare(const int8_t *a, const int8_t *b, uint32_t length) const +{ + __m128 r = _mm_setzero_ps(); + __m128i r1; + while (length >= 16) + { + r1 = _mm_subs_epi8(_mm_load_si128((__m128i *)a), _mm_load_si128((__m128i *)b)); + r = _mm_add_ps(r, _mm_mul_epi8(r1)); + a += 16; + b += 16; + length -= 16; + } + r = _mm_hadd_ps(_mm_hadd_ps(r, r), r); + float res = r.m128_f32[0]; + + if (length >= 8) + { + __m128 r2 = _mm_setzero_ps(); + __m128i r3 = _mm_subs_epi8(_mm_load_si128((__m128i *)(a - 8)), _mm_load_si128((__m128i *)(b - 8))); + r2 = _mm_add_ps(r2, _mm_mulhi_epi8(r3)); + a += 8; + b += 8; + length -= 8; + r2 = _mm_hadd_ps(_mm_hadd_ps(r2, r2), r2); + res += r2.m128_f32[0]; + } + + if (length >= 4) + { + __m128 r2 = _mm_setzero_ps(); + __m128i r3 = _mm_subs_epi8(_mm_load_si128((__m128i *)(a - 12)), _mm_load_si128((__m128i *)(b - 12))); + r2 = _mm_add_ps(r2, _mm_mulhi_epi8_shift32(r3)); + res += r2.m128_f32[0] + r2.m128_f32[1]; + } + + return res; +} + +float AVXDistanceL2Float::compare(const float *a, const float *b, uint32_t length) const +{ + __m128 diff, v1, v2; + __m128 sum = _mm_set1_ps(0); + + while (length >= 4) + { + v1 = _mm_loadu_ps(a); + a += 4; + v2 = _mm_loadu_ps(b); + b += 4; + diff = _mm_sub_ps(v1, v2); + sum = _mm_add_ps(sum, _mm_mul_ps(diff, diff)); + length -= 4; + } + + return sum.m128_f32[0] + sum.m128_f32[1] + sum.m128_f32[2] + sum.m128_f32[3]; +} +#else +float AVXDistanceL2Int8::compare(const int8_t *, const int8_t *, uint32_t) const +{ + return 0; +} +float AVXDistanceL2Float::compare(const float *, const float *, uint32_t) const +{ + return 0; +} +#endif + +template float DistanceInnerProduct::inner_product(const T *a, const T *b, uint32_t size) const +{ + if (!std::is_floating_point::value) + { + diskann::cerr << "ERROR: Inner Product only defined for float currently." << std::endl; + throw diskann::ANNException("ERROR: Inner Product only defined for float currently.", -1, __FUNCSIG__, __FILE__, + __LINE__); + } + + float result = 0; + +#ifdef __GNUC__ +#ifdef USE_AVX2 +#define AVX_DOT(addr1, addr2, dest, tmp1, tmp2) \ + tmp1 = _mm256_loadu_ps(addr1); \ + tmp2 = _mm256_loadu_ps(addr2); \ + tmp1 = _mm256_mul_ps(tmp1, tmp2); \ + dest = _mm256_add_ps(dest, tmp1); + + __m256 sum; + __m256 l0, l1; + __m256 r0, r1; + uint32_t D = (size + 7) & ~7U; + uint32_t DR = D % 16; + uint32_t DD = D - DR; + const float *l = (float *)a; + const float *r = (float *)b; + const float *e_l = l + DD; + const float *e_r = r + DD; + float unpack[8] __attribute__((aligned(32))) = {0, 0, 0, 0, 0, 0, 0, 0}; + + sum = _mm256_loadu_ps(unpack); + if (DR) + { + AVX_DOT(e_l, e_r, sum, l0, r0); + } + + for (uint32_t i = 0; i < DD; i += 16, l += 16, r += 16) + { + AVX_DOT(l, r, sum, l0, r0); + AVX_DOT(l + 8, r + 8, sum, l1, r1); + } + _mm256_storeu_ps(unpack, sum); + result = unpack[0] + unpack[1] + unpack[2] + unpack[3] + unpack[4] + unpack[5] + unpack[6] + unpack[7]; + +#else +#ifdef __SSE2__ +#define SSE_DOT(addr1, addr2, dest, tmp1, tmp2) \ + tmp1 = _mm128_loadu_ps(addr1); \ + tmp2 = _mm128_loadu_ps(addr2); \ + tmp1 = _mm128_mul_ps(tmp1, tmp2); \ + dest = _mm128_add_ps(dest, tmp1); + __m128 sum; + __m128 l0, l1, l2, l3; + __m128 r0, r1, r2, r3; + uint32_t D = (size + 3) & ~3U; + uint32_t DR = D % 16; + uint32_t DD = D - DR; + const float *l = a; + const float *r = b; + const float *e_l = l + DD; + const float *e_r = r + DD; + float unpack[4] __attribute__((aligned(16))) = {0, 0, 0, 0}; + + sum = _mm_load_ps(unpack); + switch (DR) + { + case 12: + SSE_DOT(e_l + 8, e_r + 8, sum, l2, r2); + case 8: + SSE_DOT(e_l + 4, e_r + 4, sum, l1, r1); + case 4: + SSE_DOT(e_l, e_r, sum, l0, r0); + default: + break; + } + for (uint32_t i = 0; i < DD; i += 16, l += 16, r += 16) + { + SSE_DOT(l, r, sum, l0, r0); + SSE_DOT(l + 4, r + 4, sum, l1, r1); + SSE_DOT(l + 8, r + 8, sum, l2, r2); + SSE_DOT(l + 12, r + 12, sum, l3, r3); + } + _mm_storeu_ps(unpack, sum); + result += unpack[0] + unpack[1] + unpack[2] + unpack[3]; +#elif __APPLE__ + vDSP_dotpr((float *)a, (vDSP_Stride)1, (float *)b, (vDSP_Stride)1, &result, size); +#else + + float dot0, dot1, dot2, dot3; + const float *last = a + size; + const float *unroll_group = last - 3; + + /* Process 4 items with each loop for efficiency. */ + while (a < unroll_group) + { + dot0 = a[0] * b[0]; + dot1 = a[1] * b[1]; + dot2 = a[2] * b[2]; + dot3 = a[3] * b[3]; + result += dot0 + dot1 + dot2 + dot3; + a += 4; + b += 4; + } + /* Process last 0-3 pixels. Not needed for standard vector lengths. */ + while (a < last) + { + result += *a++ * *b++; + } +#endif +#endif +#endif + return result; +} + +template float DistanceFastL2::compare(const T *a, const T *b, float norm, uint32_t size) const +{ + float result = -2 * DistanceInnerProduct::inner_product(a, b, size); + result += norm; + return result; +} + +template float DistanceFastL2::norm(const T *a, uint32_t size) const +{ + if (!std::is_floating_point::value) + { + diskann::cerr << "ERROR: FastL2 only defined for float currently." << std::endl; + throw diskann::ANNException("ERROR: FastL2 only defined for float currently.", -1, __FUNCSIG__, __FILE__, + __LINE__); + } + float result = 0; +#ifdef __GNUC__ +#ifdef __AVX__ +#define AVX_L2NORM(addr, dest, tmp) \ + tmp = _mm256_loadu_ps(addr); \ + tmp = _mm256_mul_ps(tmp, tmp); \ + dest = _mm256_add_ps(dest, tmp); + + __m256 sum; + __m256 l0, l1; + uint32_t D = (size + 7) & ~7U; + uint32_t DR = D % 16; + uint32_t DD = D - DR; + const float *l = (float *)a; + const float *e_l = l + DD; + float unpack[8] __attribute__((aligned(32))) = {0, 0, 0, 0, 0, 0, 0, 0}; + + sum = _mm256_loadu_ps(unpack); + if (DR) + { + AVX_L2NORM(e_l, sum, l0); + } + for (uint32_t i = 0; i < DD; i += 16, l += 16) + { + AVX_L2NORM(l, sum, l0); + AVX_L2NORM(l + 8, sum, l1); + } + _mm256_storeu_ps(unpack, sum); + result = unpack[0] + unpack[1] + unpack[2] + unpack[3] + unpack[4] + unpack[5] + unpack[6] + unpack[7]; +#else +#ifdef __SSE2__ +#define SSE_L2NORM(addr, dest, tmp) \ + tmp = _mm128_loadu_ps(addr); \ + tmp = _mm128_mul_ps(tmp, tmp); \ + dest = _mm128_add_ps(dest, tmp); + + __m128 sum; + __m128 l0, l1, l2, l3; + uint32_t D = (size + 3) & ~3U; + uint32_t DR = D % 16; + uint32_t DD = D - DR; + const float *l = a; + const float *e_l = l + DD; + float unpack[4] __attribute__((aligned(16))) = {0, 0, 0, 0}; + + sum = _mm_load_ps(unpack); + switch (DR) + { + case 12: + SSE_L2NORM(e_l + 8, sum, l2); + case 8: + SSE_L2NORM(e_l + 4, sum, l1); + case 4: + SSE_L2NORM(e_l, sum, l0); + default: + break; + } + for (uint32_t i = 0; i < DD; i += 16, l += 16) + { + SSE_L2NORM(l, sum, l0); + SSE_L2NORM(l + 4, sum, l1); + SSE_L2NORM(l + 8, sum, l2); + SSE_L2NORM(l + 12, sum, l3); + } + _mm_storeu_ps(unpack, sum); + result += unpack[0] + unpack[1] + unpack[2] + unpack[3]; +#elif __APPLE__ + vDSP_dotpr((float *)a, 1, (float *)a, 1, &result, size); +#else + float dot0, dot1, dot2, dot3; + const float *last = a + size; + const float *unroll_group = last - 3; + + /* Process 4 items with each loop for efficiency. */ + while (a < unroll_group) + { + dot0 = a[0] * a[0]; + dot1 = a[1] * a[1]; + dot2 = a[2] * a[2]; + dot3 = a[3] * a[3]; + result += dot0 + dot1 + dot2 + dot3; + a += 4; + } + /* Process last 0-3 pixels. Not needed for standard vector lengths. */ + while (a < last) + { + result += (*a) * (*a); + a++; + } +#endif +#endif +#endif + return result; +} + +float AVXDistanceInnerProductFloat::compare(const float *a, const float *b, uint32_t size) const +{ + float result = 0.0f; +#ifdef __APPLE__ + vDSP_dotpr(a, (vDSP_Stride)1, b, (vDSP_Stride)1, &result, size); +#else +#define AVX_DOT(addr1, addr2, dest, tmp1, tmp2) \ + tmp1 = _mm256_loadu_ps(addr1); \ + tmp2 = _mm256_loadu_ps(addr2); \ + tmp1 = _mm256_mul_ps(tmp1, tmp2); \ + dest = _mm256_add_ps(dest, tmp1); + + __m256 sum; + __m256 l0, l1; + __m256 r0, r1; + uint32_t D = (size + 7) & ~7U; + uint32_t DR = D % 16; + uint32_t DD = D - DR; + const float *l = (float *)a; + const float *r = (float *)b; + const float *e_l = l + DD; + const float *e_r = r + DD; +#ifndef _WINDOWS + float unpack[8] __attribute__((aligned(32))) = {0, 0, 0, 0, 0, 0, 0, 0}; +#else + __declspec(align(32)) float unpack[8] = {0, 0, 0, 0, 0, 0, 0, 0}; +#endif + + sum = _mm256_loadu_ps(unpack); + if (DR) + { + AVX_DOT(e_l, e_r, sum, l0, r0); + } + + for (uint32_t i = 0; i < DD; i += 16, l += 16, r += 16) + { + AVX_DOT(l, r, sum, l0, r0); + AVX_DOT(l + 8, r + 8, sum, l1, r1); + } + _mm256_storeu_ps(unpack, sum); + result = unpack[0] + unpack[1] + unpack[2] + unpack[3] + unpack[4] + unpack[5] + unpack[6] + unpack[7]; +#endif + return -result; +} + +uint32_t AVXNormalizedCosineDistanceFloat::post_normalization_dimension(uint32_t orig_dimension) const +{ + return orig_dimension; +} +bool AVXNormalizedCosineDistanceFloat::preprocessing_required() const +{ + return true; +} +void AVXNormalizedCosineDistanceFloat::preprocess_base_points(float *original_data, const size_t orig_dim, + const size_t num_points) +{ + for (uint32_t i = 0; i < num_points; i++) + { + normalize((float *)(original_data + i * orig_dim), orig_dim); + } +} + +void AVXNormalizedCosineDistanceFloat::preprocess_query(const float *query_vec, const size_t query_dim, + float *query_scratch) +{ + normalize_and_copy(query_vec, (uint32_t)query_dim, query_scratch); +} + +void AVXNormalizedCosineDistanceFloat::normalize_and_copy(const float *query_vec, const uint32_t query_dim, + float *query_target) const +{ + float norm = get_norm(query_vec, query_dim); + + for (uint32_t i = 0; i < query_dim; i++) + { + query_target[i] = query_vec[i] / norm; + } +} + +// Get the right distance function for the given metric. +template <> diskann::Distance *get_distance_function(diskann::Metric m) +{ + if (m == diskann::Metric::L2) + { + if (Avx2SupportedCPU) + { + diskann::cout << "L2: Using AVX2 distance computation DistanceL2Float" << std::endl; + return new diskann::DistanceL2Float(); + } + else if (AvxSupportedCPU) + { + diskann::cout << "L2: AVX2 not supported. Using AVX distance computation" << std::endl; + return new diskann::AVXDistanceL2Float(); + } + else + { + diskann::cout << "L2: Older CPU. Using slow distance computation" << std::endl; + return new diskann::SlowDistanceL2(); + } + } + else if (m == diskann::Metric::COSINE) + { + diskann::cout << "Cosine: Using either AVX or AVX2 implementation" << std::endl; + return new diskann::DistanceCosineFloat(); + } + else if (m == diskann::Metric::INNER_PRODUCT) + { + diskann::cout << "Inner product: Using AVX2 implementation " + "AVXDistanceInnerProductFloat" + << std::endl; + return new diskann::AVXDistanceInnerProductFloat(); + } + else if (m == diskann::Metric::FAST_L2) + { + diskann::cout << "Fast_L2: Using AVX2 implementation with norm " + "memoization DistanceFastL2" + << std::endl; + return new diskann::DistanceFastL2(); + } + else + { + std::stringstream stream; + stream << "Only L2, cosine, and inner product supported for floating " + "point vectors as of now." + << std::endl; + diskann::cerr << stream.str() << std::endl; + throw diskann::ANNException(stream.str(), -1, __FUNCSIG__, __FILE__, __LINE__); + } +} + +template <> diskann::Distance *get_distance_function(diskann::Metric m) +{ + if (m == diskann::Metric::L2) + { + if (Avx2SupportedCPU) + { + diskann::cout << "Using AVX2 distance computation DistanceL2Int8." << std::endl; + return new diskann::DistanceL2Int8(); + } + else if (AvxSupportedCPU) + { + diskann::cout << "AVX2 not supported. Using AVX distance computation" << std::endl; + return new diskann::AVXDistanceL2Int8(); + } + else + { + diskann::cout << "Older CPU. Using slow distance computation " + "SlowDistanceL2Int." + << std::endl; + return new diskann::SlowDistanceL2(); + } + } + else if (m == diskann::Metric::COSINE) + { + diskann::cout << "Using either AVX or AVX2 for Cosine similarity " + "DistanceCosineInt8." + << std::endl; + return new diskann::DistanceCosineInt8(); + } + else + { + std::stringstream stream; + stream << "Only L2 and cosine supported for signed byte vectors." << std::endl; + diskann::cerr << stream.str() << std::endl; + throw diskann::ANNException(stream.str(), -1, __FUNCSIG__, __FILE__, __LINE__); + } +} + +template <> diskann::Distance *get_distance_function(diskann::Metric m) +{ + if (m == diskann::Metric::L2) + { +#ifdef _WINDOWS + diskann::cout << "WARNING: AVX/AVX2 distance function not defined for Uint8. " + "Using " + "slow version. " + "Contact gopalsr@microsoft.com if you need AVX/AVX2 support." + << std::endl; +#endif + return new diskann::DistanceL2UInt8(); + } + else if (m == diskann::Metric::COSINE) + { + diskann::cout << "AVX/AVX2 distance function not defined for Uint8. Using " + "slow version SlowDistanceCosineUint8() " + "Contact gopalsr@microsoft.com if you need AVX/AVX2 support." + << std::endl; + return new diskann::SlowDistanceCosineUInt8(); + } + else + { + std::stringstream stream; + stream << "Only L2 and cosine supported for uint32_t byte vectors." << std::endl; + diskann::cerr << stream.str() << std::endl; + throw diskann::ANNException(stream.str(), -1, __FUNCSIG__, __FILE__, __LINE__); + } +} + +template DISKANN_DLLEXPORT class DistanceInnerProduct; +template DISKANN_DLLEXPORT class DistanceInnerProduct; +template DISKANN_DLLEXPORT class DistanceInnerProduct; + +template DISKANN_DLLEXPORT class DistanceFastL2; +template DISKANN_DLLEXPORT class DistanceFastL2; +template DISKANN_DLLEXPORT class DistanceFastL2; + +template DISKANN_DLLEXPORT class SlowDistanceL2; +template DISKANN_DLLEXPORT class SlowDistanceL2; +template DISKANN_DLLEXPORT class SlowDistanceL2; + +// template DISKANN_DLLEXPORT Distance *get_distance_function(Metric m); +// template DISKANN_DLLEXPORT Distance *get_distance_function(Metric m); +// template DISKANN_DLLEXPORT Distance *get_distance_function(Metric m); + +} // namespace diskann diff --git a/packages/leann-backend-diskann/third_party/DiskANN/src/dll/CMakeLists.txt b/packages/leann-backend-diskann/third_party/DiskANN/src/dll/CMakeLists.txt new file mode 100644 index 0000000..096d1b7 --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/src/dll/CMakeLists.txt @@ -0,0 +1,35 @@ +#Copyright(c) Microsoft Corporation.All rights reserved. +#Licensed under the MIT license. + +add_library(${PROJECT_NAME} SHARED dllmain.cpp ../abstract_data_store.cpp ../partition.cpp ../pq.cpp ../pq_flash_index.cpp ../logger.cpp ../utils.cpp + ../windows_aligned_file_reader.cpp ../distance.cpp ../pq_l2_distance.cpp ../memory_mapper.cpp ../index.cpp + ../in_mem_data_store.cpp ../pq_data_store.cpp ../in_mem_graph_store.cpp ../math_utils.cpp ../disk_utils.cpp ../filter_utils.cpp + ../ann_exception.cpp ../natural_number_set.cpp ../natural_number_map.cpp ../scratch.cpp ../index_factory.cpp ../abstract_index.cpp) + +set(TARGET_DIR "$<$:${CMAKE_LIBRARY_OUTPUT_DIRECTORY_DEBUG}>$<$:${CMAKE_LIBRARY_OUTPUT_DIRECTORY_RELEASE}>") + +set(DISKANN_DLL_IMPLIB "${TARGET_DIR}/${PROJECT_NAME}.lib") + +if (NOT PYBIND) + target_compile_definitions(${PROJECT_NAME} PRIVATE DISKANN_RELEASE_UNUSED_TCMALLOC_MEMORY_AT_CHECKPOINTS DISKANN_BUILD) +endif() +target_compile_definitions(${PROJECT_NAME} PRIVATE _USRDLL _WINDLL) +target_compile_options(${PROJECT_NAME} PRIVATE /GL) +target_include_directories(${PROJECT_NAME} PRIVATE ${DISKANN_MKL_INCLUDE_DIRECTORIES}) + +target_link_options(${PROJECT_NAME} PRIVATE /DLL /IMPLIB:${DISKANN_DLL_IMPLIB} /LTCG) +target_link_libraries(${PROJECT_NAME} PRIVATE ${DISKANN_MKL_LINK_LIBRARIES}) +target_link_libraries(${PROJECT_NAME} PRIVATE synchronization.lib) + +if (DISKANN_DLL_TCMALLOC_LINK_OPTIONS) + target_link_libraries(${PROJECT_NAME} PUBLIC ${DISKANN_DLL_TCMALLOC_LINK_OPTIONS}) +endif() + +# Copy OpenMP DLL and PDB. +set(RUNTIME_FILES_TO_COPY ${OPENMP_WINDOWS_RUNTIME_FILES} ${TCMALLOC_WINDOWS_RUNTIME_FILES}) + +foreach(RUNTIME_FILE ${RUNTIME_FILES_TO_COPY}) + add_custom_command(TARGET ${PROJECT_NAME} + POST_BUILD + COMMAND ${CMAKE_COMMAND} -E copy "${RUNTIME_FILE}" "${TARGET_DIR}") +endforeach() \ No newline at end of file diff --git a/packages/leann-backend-diskann/third_party/DiskANN/src/dll/dllmain.cpp b/packages/leann-backend-diskann/third_party/DiskANN/src/dll/dllmain.cpp new file mode 100644 index 0000000..9f5ce44 --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/src/dll/dllmain.cpp @@ -0,0 +1,15 @@ +// dllmain.cpp : Defines the entry point for the DLL application. +#include + +BOOL APIENTRY DllMain(HMODULE hModule, DWORD ul_reason_for_call, LPVOID lpReserved) +{ + switch (ul_reason_for_call) + { + case DLL_PROCESS_ATTACH: + case DLL_THREAD_ATTACH: + case DLL_THREAD_DETACH: + case DLL_PROCESS_DETACH: + break; + } + return TRUE; +} diff --git a/packages/leann-backend-diskann/third_party/DiskANN/src/filter_utils.cpp b/packages/leann-backend-diskann/third_party/DiskANN/src/filter_utils.cpp new file mode 100644 index 0000000..09d740e --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/src/filter_utils.cpp @@ -0,0 +1,355 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT license. + +#include +#include +#include +#include +#include +#include + +#include +#include "filter_utils.h" +#include "index.h" +#include "parameters.h" +#include "utils.h" + +namespace diskann +{ +/* + * Using passed in parameters and files generated from step 3, + * builds a vanilla diskANN index for each label. + * + * Each index is saved under the following path: + * final_index_path_prefix + "_" + label + */ +template +void generate_label_indices(path input_data_path, path final_index_path_prefix, label_set all_labels, uint32_t R, + uint32_t L, float alpha, uint32_t num_threads) +{ + diskann::IndexWriteParameters label_index_build_parameters = diskann::IndexWriteParametersBuilder(L, R) + .with_saturate_graph(false) + .with_alpha(alpha) + .with_num_threads(num_threads) + .build(); + + std::cout << "Generating indices per label..." << std::endl; + // for each label, build an index on resp. points + double total_indexing_time = 0.0, indexing_percentage = 0.0; + std::cout.setstate(std::ios_base::failbit); + diskann::cout.setstate(std::ios_base::failbit); + for (const auto &lbl : all_labels) + { + path curr_label_input_data_path(input_data_path + "_" + lbl); + path curr_label_index_path(final_index_path_prefix + "_" + lbl); + + size_t number_of_label_points, dimension; + diskann::get_bin_metadata(curr_label_input_data_path, number_of_label_points, dimension); + + diskann::Index index(diskann::Metric::L2, dimension, number_of_label_points, + std::make_shared(label_index_build_parameters), nullptr, + 0, false, false, false, false, 0, false); + + auto index_build_timer = std::chrono::high_resolution_clock::now(); + index.build(curr_label_input_data_path.c_str(), number_of_label_points); + std::chrono::duration current_indexing_time = + std::chrono::high_resolution_clock::now() - index_build_timer; + + total_indexing_time += current_indexing_time.count(); + indexing_percentage += (1 / (double)all_labels.size()); + print_progress(indexing_percentage); + + index.save(curr_label_index_path.c_str()); + } + std::cout.clear(); + diskann::cout.clear(); + + std::cout << "\nDone. Generated per-label indices in " << total_indexing_time << " seconds\n" << std::endl; +} + +// for use on systems without writev (i.e. Windows) +template +tsl::robin_map> generate_label_specific_vector_files_compat( + path input_data_path, tsl::robin_map labels_to_number_of_points, + std::vector point_ids_to_labels, label_set all_labels) +{ + auto file_writing_timer = std::chrono::high_resolution_clock::now(); + std::ifstream input_data_stream(input_data_path); + + uint32_t number_of_points, dimension; + input_data_stream.read((char *)&number_of_points, sizeof(uint32_t)); + input_data_stream.read((char *)&dimension, sizeof(uint32_t)); + const uint32_t VECTOR_SIZE = dimension * sizeof(T); + if (number_of_points != point_ids_to_labels.size()) + { + std::cerr << "Error: number of points in labels file and data file differ." << std::endl; + throw; + } + + tsl::robin_map labels_to_vectors; + tsl::robin_map labels_to_curr_vector; + tsl::robin_map> label_id_to_orig_id; + + for (const auto &lbl : all_labels) + { + uint32_t number_of_label_pts = labels_to_number_of_points[lbl]; + char *vectors = (char *)malloc(number_of_label_pts * VECTOR_SIZE); + if (vectors == nullptr) + { + throw; + } + labels_to_vectors[lbl] = vectors; + labels_to_curr_vector[lbl] = 0; + label_id_to_orig_id[lbl].reserve(number_of_label_pts); + } + + for (uint32_t point_id = 0; point_id < number_of_points; point_id++) + { + char *curr_vector = (char *)malloc(VECTOR_SIZE); + input_data_stream.read(curr_vector, VECTOR_SIZE); + for (const auto &lbl : point_ids_to_labels[point_id]) + { + char *curr_label_vector_ptr = labels_to_vectors[lbl] + (labels_to_curr_vector[lbl] * VECTOR_SIZE); + memcpy(curr_label_vector_ptr, curr_vector, VECTOR_SIZE); + labels_to_curr_vector[lbl]++; + label_id_to_orig_id[lbl].push_back(point_id); + } + free(curr_vector); + } + + for (const auto &lbl : all_labels) + { + path curr_label_input_data_path(input_data_path + "_" + lbl); + uint32_t number_of_label_pts = labels_to_number_of_points[lbl]; + + std::ofstream label_file_stream; + label_file_stream.exceptions(std::ios::badbit | std::ios::failbit); + label_file_stream.open(curr_label_input_data_path, std::ios_base::binary); + label_file_stream.write((char *)&number_of_label_pts, sizeof(uint32_t)); + label_file_stream.write((char *)&dimension, sizeof(uint32_t)); + label_file_stream.write((char *)labels_to_vectors[lbl], number_of_label_pts * VECTOR_SIZE); + + label_file_stream.close(); + free(labels_to_vectors[lbl]); + } + input_data_stream.close(); + + std::chrono::duration file_writing_time = std::chrono::high_resolution_clock::now() - file_writing_timer; + std::cout << "generated " << all_labels.size() << " label-specific vector files for index building in time " + << file_writing_time.count() << "\n" + << std::endl; + + return label_id_to_orig_id; +} + +/* + * Manually loads a graph index in from a given file. + * + * Returns both the graph index and the size of the file in bytes. + */ +load_label_index_return_values load_label_index(path label_index_path, uint32_t label_number_of_points) +{ + std::ifstream label_index_stream; + label_index_stream.exceptions(std::ios::badbit | std::ios::failbit); + label_index_stream.open(label_index_path, std::ios::binary); + + uint64_t index_file_size, index_num_frozen_points; + uint32_t index_max_observed_degree, index_entry_point; + const size_t INDEX_METADATA = 2 * sizeof(uint64_t) + 2 * sizeof(uint32_t); + label_index_stream.read((char *)&index_file_size, sizeof(uint64_t)); + label_index_stream.read((char *)&index_max_observed_degree, sizeof(uint32_t)); + label_index_stream.read((char *)&index_entry_point, sizeof(uint32_t)); + label_index_stream.read((char *)&index_num_frozen_points, sizeof(uint64_t)); + size_t bytes_read = INDEX_METADATA; + + std::vector> label_index(label_number_of_points); + uint32_t nodes_read = 0; + while (bytes_read != index_file_size) + { + uint32_t current_node_num_neighbors; + label_index_stream.read((char *)¤t_node_num_neighbors, sizeof(uint32_t)); + nodes_read++; + + std::vector current_node_neighbors(current_node_num_neighbors); + label_index_stream.read((char *)current_node_neighbors.data(), current_node_num_neighbors * sizeof(uint32_t)); + label_index[nodes_read - 1].swap(current_node_neighbors); + bytes_read += sizeof(uint32_t) * (current_node_num_neighbors + 1); + } + + return std::make_tuple(label_index, index_file_size); +} + +/* + * Parses the label datafile, which has comma-separated labels on + * each line. Line i corresponds to point id i. + * + * Returns three objects via std::tuple: + * 1. map: key is point id, value is vector of labels said point has + * 2. map: key is label, value is number of points with the label + * 3. the label universe as a set + */ +parse_label_file_return_values parse_label_file(path label_data_path, std::string universal_label) +{ + std::ifstream label_data_stream(label_data_path); + std::string line, token; + uint32_t line_cnt = 0; + + // allows us to reserve space for the points_to_labels vector + while (std::getline(label_data_stream, line)) + line_cnt++; + label_data_stream.clear(); + label_data_stream.seekg(0, std::ios::beg); + + // values to return + std::vector point_ids_to_labels(line_cnt); + tsl::robin_map labels_to_number_of_points; + label_set all_labels; + + std::vector points_with_universal_label; + line_cnt = 0; + while (std::getline(label_data_stream, line)) + { + std::istringstream current_labels_comma_separated(line); + label_set current_labels; + + // get point id + uint32_t point_id = line_cnt; + + // parse comma separated labels + bool current_universal_label_check = false; + while (getline(current_labels_comma_separated, token, ',')) + { + token.erase(std::remove(token.begin(), token.end(), '\n'), token.end()); + token.erase(std::remove(token.begin(), token.end(), '\r'), token.end()); + + // if token is empty, there's no labels for the point + if (token == universal_label) + { + points_with_universal_label.push_back(point_id); + current_universal_label_check = true; + } + else + { + all_labels.insert(token); + current_labels.insert(token); + labels_to_number_of_points[token]++; + } + } + + if (current_labels.size() <= 0 && !current_universal_label_check) + { + std::cerr << "Error: " << point_id << " has no labels." << std::endl; + exit(-1); + } + point_ids_to_labels[point_id] = current_labels; + line_cnt++; + } + + // for every point with universal label, set its label set to all labels + // also, increment the count for number of points a label has + for (const auto &point_id : points_with_universal_label) + { + point_ids_to_labels[point_id] = all_labels; + for (const auto &lbl : all_labels) + labels_to_number_of_points[lbl]++; + } + + std::cout << "Identified " << all_labels.size() << " distinct label(s) for " << point_ids_to_labels.size() + << " points\n" + << std::endl; + + return std::make_tuple(point_ids_to_labels, labels_to_number_of_points, all_labels); +} + +/* + * A templated function to parse a file of labels that are already represented + * as either uint16_t or uint32_t + * + * Returns two objects via std::tuple: + * 1. a vector of vectors of labels, where the outer vector is indexed by point id + * 2. a set of all labels + */ +template +std::tuple>, tsl::robin_set> parse_formatted_label_file(std::string label_file) +{ + std::vector> pts_to_labels; + tsl::robin_set labels; + + // Format of Label txt file: filters with comma separators + std::ifstream infile(label_file); + if (infile.fail()) + { + throw diskann::ANNException(std::string("Failed to open file ") + label_file, -1); + } + + std::string line, token; + uint32_t line_cnt = 0; + + while (std::getline(infile, line)) + { + line_cnt++; + } + pts_to_labels.resize(line_cnt, std::vector()); + + infile.clear(); + infile.seekg(0, std::ios::beg); + line_cnt = 0; + + while (std::getline(infile, line)) + { + std::istringstream iss(line); + std::vector lbls(0); + getline(iss, token, '\t'); + std::istringstream new_iss(token); + while (getline(new_iss, token, ',')) + { + token.erase(std::remove(token.begin(), token.end(), '\n'), token.end()); + token.erase(std::remove(token.begin(), token.end(), '\r'), token.end()); + LabelT token_as_num = static_cast(std::stoul(token)); + lbls.push_back(token_as_num); + labels.insert(token_as_num); + } + if (lbls.size() <= 0) + { + diskann::cout << "No label found"; + exit(-1); + } + std::sort(lbls.begin(), lbls.end()); + pts_to_labels[line_cnt] = lbls; + line_cnt++; + } + diskann::cout << "Identified " << labels.size() << " distinct label(s)" << std::endl; + + return std::make_tuple(pts_to_labels, labels); +} + +template DISKANN_DLLEXPORT std::tuple>, tsl::robin_set> +parse_formatted_label_file(path label_file); + +template DISKANN_DLLEXPORT std::tuple>, tsl::robin_set> +parse_formatted_label_file(path label_file); + +template DISKANN_DLLEXPORT void generate_label_indices(path input_data_path, path final_index_path_prefix, + label_set all_labels, uint32_t R, uint32_t L, float alpha, + uint32_t num_threads); +template DISKANN_DLLEXPORT void generate_label_indices(path input_data_path, path final_index_path_prefix, + label_set all_labels, uint32_t R, uint32_t L, + float alpha, uint32_t num_threads); +template DISKANN_DLLEXPORT void generate_label_indices(path input_data_path, path final_index_path_prefix, + label_set all_labels, uint32_t R, uint32_t L, + float alpha, uint32_t num_threads); + +template DISKANN_DLLEXPORT tsl::robin_map> +generate_label_specific_vector_files_compat(path input_data_path, + tsl::robin_map labels_to_number_of_points, + std::vector point_ids_to_labels, label_set all_labels); +template DISKANN_DLLEXPORT tsl::robin_map> +generate_label_specific_vector_files_compat(path input_data_path, + tsl::robin_map labels_to_number_of_points, + std::vector point_ids_to_labels, label_set all_labels); +template DISKANN_DLLEXPORT tsl::robin_map> +generate_label_specific_vector_files_compat(path input_data_path, + tsl::robin_map labels_to_number_of_points, + std::vector point_ids_to_labels, label_set all_labels); + +} // namespace diskann \ No newline at end of file diff --git a/packages/leann-backend-diskann/third_party/DiskANN/src/in_mem_data_store.cpp b/packages/leann-backend-diskann/third_party/DiskANN/src/in_mem_data_store.cpp new file mode 100644 index 0000000..cc7acf6 --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/src/in_mem_data_store.cpp @@ -0,0 +1,401 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT license. + +#include +#include "abstract_scratch.h" +#include "in_mem_data_store.h" + +#include "utils.h" + +namespace diskann +{ + +template +InMemDataStore::InMemDataStore(const location_t num_points, const size_t dim, + std::unique_ptr> distance_fn) + : AbstractDataStore(num_points, dim), _distance_fn(std::move(distance_fn)) +{ + _aligned_dim = ROUND_UP(dim, _distance_fn->get_required_alignment()); + alloc_aligned(((void **)&_data), this->_capacity * _aligned_dim * sizeof(data_t), 8 * sizeof(data_t)); + std::memset(_data, 0, this->_capacity * _aligned_dim * sizeof(data_t)); +} + +template InMemDataStore::~InMemDataStore() +{ + if (_data != nullptr) + { + aligned_free(this->_data); + } +} + +template size_t InMemDataStore::get_aligned_dim() const +{ + return _aligned_dim; +} + +template size_t InMemDataStore::get_alignment_factor() const +{ + return _distance_fn->get_required_alignment(); +} + +template location_t InMemDataStore::load(const std::string &filename) +{ + return load_impl(filename); +} + +#ifdef EXEC_ENV_OLS +template location_t InMemDataStore::load_impl(AlignedFileReader &reader) +{ + size_t file_dim, file_num_points; + + diskann::get_bin_metadata(reader, file_num_points, file_dim); + + if (file_dim != this->_dim) + { + std::stringstream stream; + stream << "ERROR: Driver requests loading " << this->_dim << " dimension," + << "but file has " << file_dim << " dimension." << std::endl; + diskann::cerr << stream.str() << std::endl; + aligned_free(_data); + throw diskann::ANNException(stream.str(), -1, __FUNCSIG__, __FILE__, __LINE__); + } + + if (file_num_points > this->capacity()) + { + this->resize((location_t)file_num_points); + } + copy_aligned_data_from_file(reader, _data, file_num_points, file_dim, _aligned_dim); + + return (location_t)file_num_points; +} +#endif + +template location_t InMemDataStore::load_impl(const std::string &filename) +{ + size_t file_dim, file_num_points; + if (!file_exists(filename)) + { + std::stringstream stream; + stream << "ERROR: data file " << filename << " does not exist." << std::endl; + diskann::cerr << stream.str() << std::endl; + aligned_free(_data); + throw diskann::ANNException(stream.str(), -1, __FUNCSIG__, __FILE__, __LINE__); + } + diskann::get_bin_metadata(filename, file_num_points, file_dim); + + if (file_dim != this->_dim) + { + std::stringstream stream; + stream << "ERROR: Driver requests loading " << this->_dim << " dimension," + << "but file has " << file_dim << " dimension." << std::endl; + diskann::cerr << stream.str() << std::endl; + aligned_free(_data); + throw diskann::ANNException(stream.str(), -1, __FUNCSIG__, __FILE__, __LINE__); + } + + if (file_num_points > this->capacity()) + { + this->resize((location_t)file_num_points); + } + + copy_aligned_data_from_file(filename.c_str(), _data, file_num_points, file_dim, _aligned_dim); + + return (location_t)file_num_points; +} + +template size_t InMemDataStore::save(const std::string &filename, const location_t num_points) +{ + return save_data_in_base_dimensions(filename, _data, num_points, this->get_dims(), this->get_aligned_dim(), 0U); +} + +template void InMemDataStore::populate_data(const data_t *vectors, const location_t num_pts) +{ + memset(_data, 0, _aligned_dim * sizeof(data_t) * num_pts); + for (location_t i = 0; i < num_pts; i++) + { + std::memmove(_data + i * _aligned_dim, vectors + i * this->_dim, this->_dim * sizeof(data_t)); + } + + if (_distance_fn->preprocessing_required()) + { + _distance_fn->preprocess_base_points(_data, this->_aligned_dim, num_pts); + } +} + +template void InMemDataStore::populate_data(const std::string &filename, const size_t offset) +{ + size_t npts, ndim; + copy_aligned_data_from_file(filename.c_str(), _data, npts, ndim, _aligned_dim, offset); + + if ((location_t)npts > this->capacity()) + { + std::stringstream ss; + ss << "Number of points in the file: " << filename + << " is greater than the capacity of data store: " << this->capacity() + << ". Must invoke resize before calling populate_data()" << std::endl; + throw diskann::ANNException(ss.str(), -1); + } + + if ((location_t)ndim != this->get_dims()) + { + std::stringstream ss; + ss << "Number of dimensions of a point in the file: " << filename + << " is not equal to dimensions of data store: " << this->capacity() << "." << std::endl; + throw diskann::ANNException(ss.str(), -1); + } + + if (_distance_fn->preprocessing_required()) + { + _distance_fn->preprocess_base_points(_data, this->_aligned_dim, this->capacity()); + } +} + +template +void InMemDataStore::extract_data_to_bin(const std::string &filename, const location_t num_points) +{ + save_data_in_base_dimensions(filename, _data, num_points, this->get_dims(), this->get_aligned_dim(), 0U); +} + +template void InMemDataStore::get_vector(const location_t i, data_t *dest) const +{ + // REFACTOR TODO: Should we denormalize and return values? + memcpy(dest, _data + i * _aligned_dim, this->_dim * sizeof(data_t)); +} + +template void InMemDataStore::set_vector(const location_t loc, const data_t *const vector) +{ + size_t offset_in_data = loc * _aligned_dim; + memset(_data + offset_in_data, 0, _aligned_dim * sizeof(data_t)); + memcpy(_data + offset_in_data, vector, this->_dim * sizeof(data_t)); + if (_distance_fn->preprocessing_required()) + { + _distance_fn->preprocess_base_points(_data + offset_in_data, _aligned_dim, 1); + } +} + +template void InMemDataStore::prefetch_vector(const location_t loc) +{ + diskann::prefetch_vector((const char *)_data + _aligned_dim * (size_t)loc * sizeof(data_t), + sizeof(data_t) * _aligned_dim); +} + +template +void InMemDataStore::preprocess_query(const data_t *query, AbstractScratch *query_scratch) const +{ + if (query_scratch != nullptr) + { + memcpy(query_scratch->aligned_query_T(), query, sizeof(data_t) * this->get_dims()); + } + else + { + std::stringstream ss; + ss << "In InMemDataStore::preprocess_query: Query scratch is null"; + diskann::cerr << ss.str() << std::endl; + throw diskann::ANNException(ss.str(), -1); + } +} + +template float InMemDataStore::get_distance(const data_t *query, const location_t loc) const +{ + return _distance_fn->compare(query, _data + _aligned_dim * loc, (uint32_t)_aligned_dim); +} + +template +void InMemDataStore::get_distance(const data_t *query, const location_t *locations, + const uint32_t location_count, float *distances, + AbstractScratch *scratch_space) const +{ + for (location_t i = 0; i < location_count; i++) + { + distances[i] = _distance_fn->compare(query, _data + locations[i] * _aligned_dim, (uint32_t)this->_aligned_dim); + } +} + +template +float InMemDataStore::get_distance(const location_t loc1, const location_t loc2) const +{ + return _distance_fn->compare(_data + loc1 * _aligned_dim, _data + loc2 * _aligned_dim, + (uint32_t)this->_aligned_dim); +} + +template +void InMemDataStore::get_distance(const data_t *preprocessed_query, const std::vector &ids, + std::vector &distances, AbstractScratch *scratch_space) const +{ + for (int i = 0; i < ids.size(); i++) + { + distances[i] = + _distance_fn->compare(preprocessed_query, _data + ids[i] * _aligned_dim, (uint32_t)this->_aligned_dim); + } +} + +template location_t InMemDataStore::expand(const location_t new_size) +{ + if (new_size == this->capacity()) + { + return this->capacity(); + } + else if (new_size < this->capacity()) + { + std::stringstream ss; + ss << "Cannot 'expand' datastore when new capacity (" << new_size << ") < existing capacity(" + << this->capacity() << ")" << std::endl; + throw diskann::ANNException(ss.str(), -1); + } +#ifndef _WINDOWS + data_t *new_data; + alloc_aligned((void **)&new_data, new_size * _aligned_dim * sizeof(data_t), 8 * sizeof(data_t)); + memcpy(new_data, _data, this->capacity() * _aligned_dim * sizeof(data_t)); + aligned_free(_data); + _data = new_data; +#else + realloc_aligned((void **)&_data, new_size * _aligned_dim * sizeof(data_t), 8 * sizeof(data_t)); +#endif + this->_capacity = new_size; + return this->_capacity; +} + +template location_t InMemDataStore::shrink(const location_t new_size) +{ + if (new_size == this->capacity()) + { + return this->capacity(); + } + else if (new_size > this->capacity()) + { + std::stringstream ss; + ss << "Cannot 'shrink' datastore when new capacity (" << new_size << ") > existing capacity(" + << this->capacity() << ")" << std::endl; + throw diskann::ANNException(ss.str(), -1); + } +#ifndef _WINDOWS + data_t *new_data; + alloc_aligned((void **)&new_data, new_size * _aligned_dim * sizeof(data_t), 8 * sizeof(data_t)); + memcpy(new_data, _data, new_size * _aligned_dim * sizeof(data_t)); + aligned_free(_data); + _data = new_data; +#else + realloc_aligned((void **)&_data, new_size * _aligned_dim * sizeof(data_t), 8 * sizeof(data_t)); +#endif + this->_capacity = new_size; + return this->_capacity; +} + +template +void InMemDataStore::move_vectors(const location_t old_location_start, const location_t new_location_start, + const location_t num_locations) +{ + if (num_locations == 0 || old_location_start == new_location_start) + { + return; + } + + /* // Update pointers to the moved nodes. Note: the computation is correct + even + // when new_location_start < old_location_start given the C++ uint32_t + // integer arithmetic rules. + const uint32_t location_delta = new_location_start - old_location_start; + */ + // The [start, end) interval which will contain obsolete points to be + // cleared. + uint32_t mem_clear_loc_start = old_location_start; + uint32_t mem_clear_loc_end_limit = old_location_start + num_locations; + + if (new_location_start < old_location_start) + { + // If ranges are overlapping, make sure not to clear the newly copied + // data. + if (mem_clear_loc_start < new_location_start + num_locations) + { + // Clear only after the end of the new range. + mem_clear_loc_start = new_location_start + num_locations; + } + } + else + { + // If ranges are overlapping, make sure not to clear the newly copied + // data. + if (mem_clear_loc_end_limit > new_location_start) + { + // Clear only up to the beginning of the new range. + mem_clear_loc_end_limit = new_location_start; + } + } + + // Use memmove to handle overlapping ranges. + copy_vectors(old_location_start, new_location_start, num_locations); + memset(_data + _aligned_dim * mem_clear_loc_start, 0, + sizeof(data_t) * _aligned_dim * (mem_clear_loc_end_limit - mem_clear_loc_start)); +} + +template +void InMemDataStore::copy_vectors(const location_t from_loc, const location_t to_loc, + const location_t num_points) +{ + assert(from_loc < this->_capacity); + assert(to_loc < this->_capacity); + assert(num_points < this->_capacity); + memmove(_data + _aligned_dim * to_loc, _data + _aligned_dim * from_loc, num_points * _aligned_dim * sizeof(data_t)); +} + +template location_t InMemDataStore::calculate_medoid() const +{ + // allocate and init centroid + float *center = new float[_aligned_dim]; + for (size_t j = 0; j < _aligned_dim; j++) + center[j] = 0; + + for (size_t i = 0; i < this->capacity(); i++) + for (size_t j = 0; j < _aligned_dim; j++) + center[j] += (float)_data[i * _aligned_dim + j]; + + for (size_t j = 0; j < _aligned_dim; j++) + center[j] /= (float)this->capacity(); + + // compute all to one distance + float *distances = new float[this->capacity()]; + + // TODO: REFACTOR. Removing pragma might make this slow. Must revisit. + // Problem is that we need to pass num_threads here, it is not clear + // if data store must be aware of threads! + // #pragma omp parallel for schedule(static, 65536) + for (int64_t i = 0; i < (int64_t)this->capacity(); i++) + { + // extract point and distance reference + float &dist = distances[i]; + const data_t *cur_vec = _data + (i * (size_t)_aligned_dim); + dist = 0; + float diff = 0; + for (size_t j = 0; j < _aligned_dim; j++) + { + diff = (center[j] - (float)cur_vec[j]) * (center[j] - (float)cur_vec[j]); + dist += diff; + } + } + // find imin + uint32_t min_idx = 0; + float min_dist = distances[0]; + for (uint32_t i = 1; i < this->capacity(); i++) + { + if (distances[i] < min_dist) + { + min_idx = i; + min_dist = distances[i]; + } + } + + delete[] distances; + delete[] center; + return min_idx; +} + +template Distance *InMemDataStore::get_dist_fn() const +{ + return this->_distance_fn.get(); +} + +template DISKANN_DLLEXPORT class InMemDataStore; +template DISKANN_DLLEXPORT class InMemDataStore; +template DISKANN_DLLEXPORT class InMemDataStore; + +} // namespace diskann \ No newline at end of file diff --git a/packages/leann-backend-diskann/third_party/DiskANN/src/in_mem_graph_store.cpp b/packages/leann-backend-diskann/third_party/DiskANN/src/in_mem_graph_store.cpp new file mode 100644 index 0000000..c12b251 --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/src/in_mem_graph_store.cpp @@ -0,0 +1,242 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT license. + +#include "in_mem_graph_store.h" +#include "utils.h" + +namespace diskann +{ +InMemGraphStore::InMemGraphStore(const size_t total_pts, const size_t reserve_graph_degree) + : AbstractGraphStore(total_pts, reserve_graph_degree) +{ + this->resize_graph(total_pts); + for (size_t i = 0; i < total_pts; i++) + { + _graph[i].reserve(reserve_graph_degree); + } +} + +std::tuple InMemGraphStore::load(const std::string &index_path_prefix, + const size_t num_points) +{ + return load_impl(index_path_prefix, num_points); +} +int InMemGraphStore::store(const std::string &index_path_prefix, const size_t num_points, + const size_t num_frozen_points, const uint32_t start) +{ + return save_graph(index_path_prefix, num_points, num_frozen_points, start); +} +const std::vector &InMemGraphStore::get_neighbours(const location_t i) const +{ + return _graph.at(i); +} + +void InMemGraphStore::add_neighbour(const location_t i, location_t neighbour_id) +{ + _graph[i].emplace_back(neighbour_id); + if (_max_observed_degree < _graph[i].size()) + { + _max_observed_degree = (uint32_t)(_graph[i].size()); + } +} + +void InMemGraphStore::clear_neighbours(const location_t i) +{ + _graph[i].clear(); +}; +void InMemGraphStore::swap_neighbours(const location_t a, location_t b) +{ + _graph[a].swap(_graph[b]); +}; + +void InMemGraphStore::set_neighbours(const location_t i, std::vector &neighbours) +{ + _graph[i].assign(neighbours.begin(), neighbours.end()); + if (_max_observed_degree < neighbours.size()) + { + _max_observed_degree = (uint32_t)(neighbours.size()); + } +} + +size_t InMemGraphStore::resize_graph(const size_t new_size) +{ + _graph.resize(new_size); + set_total_points(new_size); + return _graph.size(); +} + +void InMemGraphStore::clear_graph() +{ + _graph.clear(); +} + +#ifdef EXEC_ENV_OLS +std::tuple InMemGraphStore::load_impl(AlignedFileReader &reader, size_t expected_num_points) +{ + size_t expected_file_size; + size_t file_frozen_pts; + uint32_t start; + + auto max_points = get_max_points(); + int header_size = 2 * sizeof(size_t) + 2 * sizeof(uint32_t); + std::unique_ptr header = std::make_unique(header_size); + read_array(reader, header.get(), header_size); + + expected_file_size = *((size_t *)header.get()); + _max_observed_degree = *((uint32_t *)(header.get() + sizeof(size_t))); + start = *((uint32_t *)(header.get() + sizeof(size_t) + sizeof(uint32_t))); + file_frozen_pts = *((size_t *)(header.get() + sizeof(size_t) + sizeof(uint32_t) + sizeof(uint32_t))); + + diskann::cout << "From graph header, expected_file_size: " << expected_file_size + << ", _max_observed_degree: " << _max_observed_degree << ", _start: " << start + << ", file_frozen_pts: " << file_frozen_pts << std::endl; + + diskann::cout << "Loading vamana graph from reader..." << std::flush; + + // If user provides more points than max_points + // resize the _graph to the larger size. + if (get_total_points() < expected_num_points) + { + diskann::cout << "resizing graph to " << expected_num_points << std::endl; + this->resize_graph(expected_num_points); + } + + uint32_t nodes_read = 0; + size_t cc = 0; + size_t graph_offset = header_size; + while (nodes_read < expected_num_points) + { + uint32_t k; + read_value(reader, k, graph_offset); + graph_offset += sizeof(uint32_t); + std::vector tmp(k); + tmp.reserve(k); + read_array(reader, tmp.data(), k, graph_offset); + graph_offset += k * sizeof(uint32_t); + cc += k; + _graph[nodes_read].swap(tmp); + nodes_read++; + if (nodes_read % 1000000 == 0) + { + diskann::cout << "." << std::flush; + } + if (k > _max_range_of_graph) + { + _max_range_of_graph = k; + } + } + + diskann::cout << "done. Index has " << nodes_read << " nodes and " << cc << " out-edges, _start is set to " << start + << std::endl; + return std::make_tuple(nodes_read, start, file_frozen_pts); +} +#endif + +std::tuple InMemGraphStore::load_impl(const std::string &filename, + size_t expected_num_points) +{ + size_t expected_file_size; + size_t file_frozen_pts; + uint32_t start; + size_t file_offset = 0; // will need this for single file format support + + std::ifstream in; + in.exceptions(std::ios::badbit | std::ios::failbit); + in.open(filename, std::ios::binary); + in.seekg(file_offset, in.beg); + in.read((char *)&expected_file_size, sizeof(size_t)); + in.read((char *)&_max_observed_degree, sizeof(uint32_t)); + in.read((char *)&start, sizeof(uint32_t)); + in.read((char *)&file_frozen_pts, sizeof(size_t)); + size_t vamana_metadata_size = sizeof(size_t) + sizeof(uint32_t) + sizeof(uint32_t) + sizeof(size_t); + + diskann::cout << "From graph header, expected_file_size: " << expected_file_size + << ", _max_observed_degree: " << _max_observed_degree << ", _start: " << start + << ", file_frozen_pts: " << file_frozen_pts << std::endl; + + diskann::cout << "Loading vamana graph " << filename << "..." << std::flush; + + // If user provides more points than max_points + // resize the _graph to the larger size. + if (get_total_points() < expected_num_points) + { + diskann::cout << "resizing graph to " << expected_num_points << std::endl; + this->resize_graph(expected_num_points); + } + + size_t bytes_read = vamana_metadata_size; + size_t cc = 0; + uint32_t nodes_read = 0; + while (bytes_read != expected_file_size) + { + uint32_t k; + in.read((char *)&k, sizeof(uint32_t)); + + if (k == 0) + { + diskann::cerr << "ERROR: Point found with no out-neighbours, point#" << nodes_read << std::endl; + } + + cc += k; + ++nodes_read; + std::vector tmp(k); + tmp.reserve(k); + in.read((char *)tmp.data(), k * sizeof(uint32_t)); + _graph[nodes_read - 1].swap(tmp); + bytes_read += sizeof(uint32_t) * ((size_t)k + 1); + if (nodes_read % 10000000 == 0) + diskann::cout << "." << std::flush; + if (k > _max_range_of_graph) + { + _max_range_of_graph = k; + } + } + + diskann::cout << "done. Index has " << nodes_read << " nodes and " << cc << " out-edges, _start is set to " << start + << std::endl; + return std::make_tuple(nodes_read, start, file_frozen_pts); +} + +int InMemGraphStore::save_graph(const std::string &index_path_prefix, const size_t num_points, + const size_t num_frozen_points, const uint32_t start) +{ + std::ofstream out; + open_file_to_write(out, index_path_prefix); + + size_t file_offset = 0; + out.seekp(file_offset, out.beg); + size_t index_size = 24; + uint32_t max_degree = 0; + out.write((char *)&index_size, sizeof(uint64_t)); + out.write((char *)&_max_observed_degree, sizeof(uint32_t)); + uint32_t ep_u32 = start; + out.write((char *)&ep_u32, sizeof(uint32_t)); + out.write((char *)&num_frozen_points, sizeof(size_t)); + + // Note: num_points = _nd + _num_frozen_points + for (uint32_t i = 0; i < num_points; i++) + { + uint32_t GK = (uint32_t)_graph[i].size(); + out.write((char *)&GK, sizeof(uint32_t)); + out.write((char *)_graph[i].data(), GK * sizeof(uint32_t)); + max_degree = _graph[i].size() > max_degree ? (uint32_t)_graph[i].size() : max_degree; + index_size += (size_t)(sizeof(uint32_t) * (GK + 1)); + } + out.seekp(file_offset, out.beg); + out.write((char *)&index_size, sizeof(uint64_t)); + out.write((char *)&max_degree, sizeof(uint32_t)); + out.close(); + return (int)index_size; +} + +size_t InMemGraphStore::get_max_range_of_graph() +{ + return _max_range_of_graph; +} + +uint32_t InMemGraphStore::get_max_observed_degree() +{ + return _max_observed_degree; +} + +} // namespace diskann diff --git a/packages/leann-backend-diskann/third_party/DiskANN/src/index.cpp b/packages/leann-backend-diskann/third_party/DiskANN/src/index.cpp new file mode 100644 index 0000000..7f26288 --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/src/index.cpp @@ -0,0 +1,3524 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT license. + +#include + +#include + +#include "boost/dynamic_bitset.hpp" +#include "index_factory.h" +#include "memory_mapper.h" +#include "timer.h" +#include "tsl/robin_map.h" +#include "tsl/robin_set.h" +#include "windows_customizations.h" +#include "tag_uint128.h" +#if defined(DISKANN_RELEASE_UNUSED_TCMALLOC_MEMORY_AT_CHECKPOINTS) && defined(DISKANN_BUILD) +#include "gperftools/malloc_extension.h" +#endif + +#ifdef _WINDOWS +#include +#endif + +#include "index.h" + +#define MAX_POINTS_FOR_USING_BITSET 10000000 + +namespace diskann +{ +// Initialize an index with metric m, load the data of type T with filename +// (bin), and initialize max_points +template +Index::Index(const IndexConfig &index_config, std::shared_ptr> data_store, + std::unique_ptr graph_store, + std::shared_ptr> pq_data_store) + : _dist_metric(index_config.metric), _dim(index_config.dimension), _max_points(index_config.max_points), + _num_frozen_pts(index_config.num_frozen_pts), _dynamic_index(index_config.dynamic_index), + _enable_tags(index_config.enable_tags), _indexingMaxC(DEFAULT_MAXC), _query_scratch(nullptr), + _pq_dist(index_config.pq_dist_build), _use_opq(index_config.use_opq), + _filtered_index(index_config.filtered_index), _num_pq_chunks(index_config.num_pq_chunks), + _delete_set(new tsl::robin_set), _conc_consolidate(index_config.concurrent_consolidate) +{ + if (_dynamic_index && !_enable_tags) + { + throw ANNException("ERROR: Dynamic Indexing must have tags enabled.", -1, __FUNCSIG__, __FILE__, __LINE__); + } + + if (_pq_dist) + { + if (_dynamic_index) + throw ANNException("ERROR: Dynamic Indexing not supported with PQ distance based " + "index construction", + -1, __FUNCSIG__, __FILE__, __LINE__); + if (_dist_metric == diskann::Metric::INNER_PRODUCT) + throw ANNException("ERROR: Inner product metrics not yet supported " + "with PQ distance " + "base index", + -1, __FUNCSIG__, __FILE__, __LINE__); + } + + if (_dynamic_index && _num_frozen_pts == 0) + { + _num_frozen_pts = 1; + } + // Sanity check. While logically it is correct, max_points = 0 causes + // downstream problems. + if (_max_points == 0) + { + _max_points = 1; + } + const size_t total_internal_points = _max_points + _num_frozen_pts; + + _start = (uint32_t)_max_points; + + _data_store = data_store; + _pq_data_store = pq_data_store; + _graph_store = std::move(graph_store); + + _locks = std::vector(total_internal_points); + if (_enable_tags) + { + _location_to_tag.reserve(total_internal_points); + _tag_to_location.reserve(total_internal_points); + } + + if (_dynamic_index) + { + this->enable_delete(); // enable delete by default for dynamic index + if (_filtered_index) + { + _location_to_labels.resize(total_internal_points); + } + } + + if (index_config.index_write_params != nullptr) + { + _indexingQueueSize = index_config.index_write_params->search_list_size; + _indexingRange = index_config.index_write_params->max_degree; + _indexingMaxC = index_config.index_write_params->max_occlusion_size; + _indexingAlpha = index_config.index_write_params->alpha; + _filterIndexingQueueSize = index_config.index_write_params->filter_list_size; + _indexingThreads = index_config.index_write_params->num_threads; + _saturate_graph = index_config.index_write_params->saturate_graph; + + if (index_config.index_search_params != nullptr) + { + uint32_t num_scratch_spaces = index_config.index_search_params->num_search_threads + _indexingThreads; + initialize_query_scratch(num_scratch_spaces, index_config.index_search_params->initial_search_list_size, + _indexingQueueSize, _indexingRange, _indexingMaxC, _data_store->get_dims()); + } + } +} + +template +Index::Index(Metric m, const size_t dim, const size_t max_points, + const std::shared_ptr index_parameters, + const std::shared_ptr index_search_params, const size_t num_frozen_pts, + const bool dynamic_index, const bool enable_tags, const bool concurrent_consolidate, + const bool pq_dist_build, const size_t num_pq_chunks, const bool use_opq, + const bool filtered_index) + : Index( + IndexConfigBuilder() + .with_metric(m) + .with_dimension(dim) + .with_max_points(max_points) + .with_index_write_params(index_parameters) + .with_index_search_params(index_search_params) + .with_num_frozen_pts(num_frozen_pts) + .is_dynamic_index(dynamic_index) + .is_enable_tags(enable_tags) + .is_concurrent_consolidate(concurrent_consolidate) + .is_pq_dist_build(pq_dist_build) + .with_num_pq_chunks(num_pq_chunks) + .is_use_opq(use_opq) + .is_filtered(filtered_index) + .with_data_type(diskann_type_to_name()) + .build(), + IndexFactory::construct_datastore(DataStoreStrategy::MEMORY, + (max_points == 0 ? (size_t)1 : max_points) + + (dynamic_index && num_frozen_pts == 0 ? (size_t)1 : num_frozen_pts), + dim, m), + IndexFactory::construct_graphstore(GraphStoreStrategy::MEMORY, + (max_points == 0 ? (size_t)1 : max_points) + + (dynamic_index && num_frozen_pts == 0 ? (size_t)1 : num_frozen_pts), + (size_t)((index_parameters == nullptr ? 0 : index_parameters->max_degree) * + defaults::GRAPH_SLACK_FACTOR * 1.05))) +{ + if (_pq_dist) + { + _pq_data_store = IndexFactory::construct_pq_datastore(DataStoreStrategy::MEMORY, max_points + num_frozen_pts, + dim, m, num_pq_chunks, use_opq); + } + else + { + _pq_data_store = _data_store; + } +} + +template Index::~Index() +{ + // Ensure that no other activity is happening before dtor() + std::unique_lock ul(_update_lock); + std::unique_lock cl(_consolidate_lock); + std::unique_lock tl(_tag_lock); + std::unique_lock dl(_delete_lock); + + for (auto &lock : _locks) + { + LockGuard lg(lock); + } + + if (_opt_graph != nullptr) + { + delete[] _opt_graph; + } + + if (!_query_scratch.empty()) + { + ScratchStoreManager> manager(_query_scratch); + manager.destroy(); + } +} + +template +void Index::initialize_query_scratch(uint32_t num_threads, uint32_t search_l, uint32_t indexing_l, + uint32_t r, uint32_t maxc, size_t dim) +{ + for (uint32_t i = 0; i < num_threads; i++) + { + auto scratch = new InMemQueryScratch(search_l, indexing_l, r, maxc, dim, _data_store->get_aligned_dim(), + _data_store->get_alignment_factor(), _pq_dist); + _query_scratch.push(scratch); + } +} + +template size_t Index::save_tags(std::string tags_file) +{ + if (!_enable_tags) + { + diskann::cout << "Not saving tags as they are not enabled." << std::endl; + return 0; + } + + size_t tag_bytes_written; + TagT *tag_data = new TagT[_nd + _num_frozen_pts]; + for (uint32_t i = 0; i < _nd; i++) + { + TagT tag; + if (_location_to_tag.try_get(i, tag)) + { + tag_data[i] = tag; + } + else + { + // catering to future when tagT can be any type. + std::memset((char *)&tag_data[i], 0, sizeof(TagT)); + } + } + if (_num_frozen_pts > 0) + { + std::memset((char *)&tag_data[_start], 0, sizeof(TagT) * _num_frozen_pts); + } + try + { + tag_bytes_written = save_bin(tags_file, tag_data, _nd + _num_frozen_pts, 1); + } + catch (std::system_error &e) + { + throw FileException(tags_file, e, __FUNCSIG__, __FILE__, __LINE__); + } + delete[] tag_data; + return tag_bytes_written; +} + +template size_t Index::save_data(std::string data_file) +{ + // Note: at this point, either _nd == _max_points or any frozen points have + // been temporarily moved to _nd, so _nd + _num_frozen_pts is the valid + // location limit. + return _data_store->save(data_file, (location_t)(_nd + _num_frozen_pts)); +} + +// save the graph index on a file as an adjacency list. For each point, +// first store the number of neighbors, and then the neighbor list (each as +// 4 byte uint32_t) +template size_t Index::save_graph(std::string graph_file) +{ + return _graph_store->store(graph_file, _nd + _num_frozen_pts, _num_frozen_pts, _start); +} + +template +size_t Index::save_delete_list(const std::string &filename) +{ + if (_delete_set->size() == 0) + { + return 0; + } + std::unique_ptr delete_list = std::make_unique(_delete_set->size()); + uint32_t i = 0; + for (auto &del : *_delete_set) + { + delete_list[i++] = del; + } + return save_bin(filename, delete_list.get(), _delete_set->size(), 1); +} + +template +void Index::save(const char *filename, bool compact_before_save) +{ + diskann::Timer timer; + + std::unique_lock ul(_update_lock); + std::unique_lock cl(_consolidate_lock); + std::unique_lock tl(_tag_lock); + std::unique_lock dl(_delete_lock); + + if (compact_before_save) + { + compact_data(); + compact_frozen_point(); + } + else + { + if (!_data_compacted) + { + throw ANNException("Index save for non-compacted index is not yet implemented", -1, __FUNCSIG__, __FILE__, + __LINE__); + } + } + + if (!_save_as_one_file) + { + if (_filtered_index) + { + if (_label_to_start_id.size() > 0) + { + std::ofstream medoid_writer(std::string(filename) + "_labels_to_medoids.txt"); + if (medoid_writer.fail()) + { + throw diskann::ANNException(std::string("Failed to open file ") + filename, -1); + } + for (auto iter : _label_to_start_id) + { + medoid_writer << iter.first << ", " << iter.second << std::endl; + } + medoid_writer.close(); + } + + if (_use_universal_label) + { + std::ofstream universal_label_writer(std::string(filename) + "_universal_label.txt"); + assert(universal_label_writer.is_open()); + universal_label_writer << _universal_label << std::endl; + universal_label_writer.close(); + } + + if (_location_to_labels.size() > 0) + { + std::ofstream label_writer(std::string(filename) + "_labels.txt"); + assert(label_writer.is_open()); + for (uint32_t i = 0; i < _nd + _num_frozen_pts; i++) + { + for (uint32_t j = 0; j + 1 < _location_to_labels[i].size(); j++) + { + label_writer << _location_to_labels[i][j] << ","; + } + if (_location_to_labels[i].size() != 0) + label_writer << _location_to_labels[i][_location_to_labels[i].size() - 1]; + + label_writer << std::endl; + } + label_writer.close(); + + // write compacted raw_labels if data hence _location_to_labels was also compacted + if (compact_before_save && _dynamic_index) + { + _label_map = load_label_map(std::string(filename) + "_labels_map.txt"); + std::unordered_map mapped_to_raw_labels; + // invert label map + for (const auto &[key, value] : _label_map) + { + mapped_to_raw_labels.insert({value, key}); + } + + // write updated labels + std::ofstream raw_label_writer(std::string(filename) + "_raw_labels.txt"); + assert(raw_label_writer.is_open()); + for (uint32_t i = 0; i < _nd + _num_frozen_pts; i++) + { + for (uint32_t j = 0; j + 1 < _location_to_labels[i].size(); j++) + { + raw_label_writer << mapped_to_raw_labels[_location_to_labels[i][j]] << ","; + } + if (_location_to_labels[i].size() != 0) + raw_label_writer + << mapped_to_raw_labels[_location_to_labels[i][_location_to_labels[i].size() - 1]]; + + raw_label_writer << std::endl; + } + raw_label_writer.close(); + } + } + } + + std::string graph_file = std::string(filename); + std::string tags_file = std::string(filename) + ".tags"; + std::string data_file = std::string(filename) + ".data"; + std::string delete_list_file = std::string(filename) + ".del"; + + // Because the save_* functions use append mode, ensure that + // the files are deleted before save. Ideally, we should check + // the error code for delete_file, but will ignore now because + // delete should succeed if save will succeed. + delete_file(graph_file); + save_graph(graph_file); + delete_file(data_file); + save_data(data_file); + delete_file(tags_file); + save_tags(tags_file); + delete_file(delete_list_file); + save_delete_list(delete_list_file); + } + else + { + diskann::cout << "Save index in a single file currently not supported. " + "Not saving the index." + << std::endl; + } + + // If frozen points were temporarily compacted to _nd, move back to + // _max_points. + reposition_frozen_point_to_end(); + + diskann::cout << "Time taken for save: " << timer.elapsed() / 1000000.0 << "s." << std::endl; +} + +#ifdef EXEC_ENV_OLS +template +size_t Index::load_tags(AlignedFileReader &reader) +{ +#else +template +size_t Index::load_tags(const std::string tag_filename) +{ + if (_enable_tags && !file_exists(tag_filename)) + { + diskann::cerr << "Tag file " << tag_filename << " does not exist!" << std::endl; + throw diskann::ANNException("Tag file " + tag_filename + " does not exist!", -1, __FUNCSIG__, __FILE__, + __LINE__); + } +#endif + if (!_enable_tags) + { + diskann::cout << "Tags not loaded as tags not enabled." << std::endl; + return 0; + } + + size_t file_dim, file_num_points; + TagT *tag_data; +#ifdef EXEC_ENV_OLS + load_bin(reader, tag_data, file_num_points, file_dim); +#else + load_bin(std::string(tag_filename), tag_data, file_num_points, file_dim); +#endif + + if (file_dim != 1) + { + std::stringstream stream; + stream << "ERROR: Found " << file_dim << " dimensions for tags," + << "but tag file must have 1 dimension." << std::endl; + diskann::cerr << stream.str() << std::endl; + delete[] tag_data; + throw diskann::ANNException(stream.str(), -1, __FUNCSIG__, __FILE__, __LINE__); + } + + const size_t num_data_points = file_num_points - _num_frozen_pts; + _location_to_tag.reserve(num_data_points); + _tag_to_location.reserve(num_data_points); + for (uint32_t i = 0; i < (uint32_t)num_data_points; i++) + { + TagT tag = *(tag_data + i); + if (_delete_set->find(i) == _delete_set->end()) + { + _location_to_tag.set(i, tag); + _tag_to_location[tag] = i; + } + } + diskann::cout << "Tags loaded." << std::endl; + delete[] tag_data; + return file_num_points; +} + +template +#ifdef EXEC_ENV_OLS +size_t Index::load_data(AlignedFileReader &reader) +{ +#else +size_t Index::load_data(std::string filename) +{ +#endif + size_t file_dim, file_num_points; +#ifdef EXEC_ENV_OLS + diskann::get_bin_metadata(reader, file_num_points, file_dim); +#else + if (!file_exists(filename)) + { + std::stringstream stream; + stream << "ERROR: data file " << filename << " does not exist." << std::endl; + diskann::cerr << stream.str() << std::endl; + throw diskann::ANNException(stream.str(), -1, __FUNCSIG__, __FILE__, __LINE__); + } + diskann::get_bin_metadata(filename, file_num_points, file_dim); +#endif + + // since we are loading a new dataset, _empty_slots must be cleared + _empty_slots.clear(); + + if (file_dim != _dim) + { + std::stringstream stream; + stream << "ERROR: Driver requests loading " << _dim << " dimension," + << "but file has " << file_dim << " dimension." << std::endl; + diskann::cerr << stream.str() << std::endl; + throw diskann::ANNException(stream.str(), -1, __FUNCSIG__, __FILE__, __LINE__); + } + + if (file_num_points > _max_points + _num_frozen_pts) + { + // update and tag lock acquired in load() before calling load_data + resize(file_num_points - _num_frozen_pts); + } + +#ifdef EXEC_ENV_OLS + // REFACTOR TODO: Must figure out how to support aligned reader in a clean + // manner. + copy_aligned_data_from_file(reader, _data, file_num_points, file_dim, _data_store->get_aligned_dim()); +#else + _data_store->load(filename); // offset == 0. +#endif + return file_num_points; +} + +#ifdef EXEC_ENV_OLS +template +size_t Index::load_delete_set(AlignedFileReader &reader) +{ +#else +template +size_t Index::load_delete_set(const std::string &filename) +{ +#endif + std::unique_ptr delete_list; + size_t npts, ndim; + +#ifdef EXEC_ENV_OLS + diskann::load_bin(reader, delete_list, npts, ndim); +#else + diskann::load_bin(filename, delete_list, npts, ndim); +#endif + assert(ndim == 1); + for (uint32_t i = 0; i < npts; i++) + { + _delete_set->insert(delete_list[i]); + } + return npts; +} + +// load the index from file and update the max_degree, cur (navigating +// node loc), and _final_graph (adjacency list) +template +#ifdef EXEC_ENV_OLS +void Index::load(AlignedFileReader &reader, uint32_t num_threads, uint32_t search_l) +{ +#else +void Index::load(const char *filename, uint32_t num_threads, uint32_t search_l) +{ +#endif + std::unique_lock ul(_update_lock); + std::unique_lock cl(_consolidate_lock); + std::unique_lock tl(_tag_lock); + std::unique_lock dl(_delete_lock); + + _has_built = true; + + size_t tags_file_num_pts = 0, graph_num_pts = 0, data_file_num_pts = 0, label_num_pts = 0; + + std::string mem_index_file(filename); + std::string labels_file = mem_index_file + "_labels.txt"; + std::string labels_to_medoids = mem_index_file + "_labels_to_medoids.txt"; + std::string labels_map_file = mem_index_file + "_labels_map.txt"; + + if (!_save_as_one_file) + { + // For DLVS Store, we will not support saving the index in multiple + // files. +#ifndef EXEC_ENV_OLS + std::string data_file = std::string(filename) + ".data"; + std::string tags_file = std::string(filename) + ".tags"; + std::string delete_set_file = std::string(filename) + ".del"; + std::string graph_file = std::string(filename); + data_file_num_pts = load_data(data_file); + if (file_exists(delete_set_file)) + { + load_delete_set(delete_set_file); + } + if (_enable_tags) + { + tags_file_num_pts = load_tags(tags_file); + } + graph_num_pts = load_graph(graph_file, data_file_num_pts); +#endif + } + else + { + diskann::cout << "Single index file saving/loading support not yet " + "enabled. Not loading the index." + << std::endl; + return; + } + + if (data_file_num_pts != graph_num_pts || (data_file_num_pts != tags_file_num_pts && _enable_tags)) + { + std::stringstream stream; + stream << "ERROR: When loading index, loaded " << data_file_num_pts << " points from datafile, " + << graph_num_pts << " from graph, and " << tags_file_num_pts + << " tags, with num_frozen_pts being set to " << _num_frozen_pts << " in constructor." << std::endl; + diskann::cerr << stream.str() << std::endl; + throw diskann::ANNException(stream.str(), -1, __FUNCSIG__, __FILE__, __LINE__); + } + + if (file_exists(labels_file)) + { + _label_map = load_label_map(labels_map_file); + parse_label_file(labels_file, label_num_pts); + assert(label_num_pts == data_file_num_pts - _num_frozen_pts); + if (file_exists(labels_to_medoids)) + { + std::ifstream medoid_stream(labels_to_medoids); + std::string line, token; + uint32_t line_cnt = 0; + + _label_to_start_id.clear(); + + while (std::getline(medoid_stream, line)) + { + std::istringstream iss(line); + uint32_t cnt = 0; + uint32_t medoid = 0; + LabelT label; + while (std::getline(iss, token, ',')) + { + token.erase(std::remove(token.begin(), token.end(), '\n'), token.end()); + token.erase(std::remove(token.begin(), token.end(), '\r'), token.end()); + LabelT token_as_num = (LabelT)std::stoul(token); + if (cnt == 0) + label = token_as_num; + else + medoid = token_as_num; + cnt++; + } + _label_to_start_id[label] = medoid; + line_cnt++; + } + } + + std::string universal_label_file(filename); + universal_label_file += "_universal_label.txt"; + if (file_exists(universal_label_file)) + { + std::ifstream universal_label_reader(universal_label_file); + universal_label_reader >> _universal_label; + _use_universal_label = true; + universal_label_reader.close(); + } + } + + _nd = data_file_num_pts - _num_frozen_pts; + _empty_slots.clear(); + _empty_slots.reserve(_max_points); + for (auto i = _nd; i < _max_points; i++) + { + _empty_slots.insert((uint32_t)i); + } + + reposition_frozen_point_to_end(); + diskann::cout << "Num frozen points:" << _num_frozen_pts << " _nd: " << _nd << " _start: " << _start + << " size(_location_to_tag): " << _location_to_tag.size() + << " size(_tag_to_location):" << _tag_to_location.size() << " Max points: " << _max_points + << std::endl; + + // For incremental index, _query_scratch is initialized in the constructor. + // For the bulk index, the params required to initialize _query_scratch + // are known only at load time, hence this check and the call to + // initialize_q_s(). + if (_query_scratch.size() == 0) + { + initialize_query_scratch(num_threads, search_l, search_l, (uint32_t)_graph_store->get_max_range_of_graph(), + _indexingMaxC, _dim); + } +} + +#ifndef EXEC_ENV_OLS +template +size_t Index::get_graph_num_frozen_points(const std::string &graph_file) +{ + size_t expected_file_size; + uint32_t max_observed_degree, start; + size_t file_frozen_pts; + + std::ifstream in; + in.exceptions(std::ios::badbit | std::ios::failbit); + + in.open(graph_file, std::ios::binary); + in.read((char *)&expected_file_size, sizeof(size_t)); + in.read((char *)&max_observed_degree, sizeof(uint32_t)); + in.read((char *)&start, sizeof(uint32_t)); + in.read((char *)&file_frozen_pts, sizeof(size_t)); + + return file_frozen_pts; +} +#endif + +#ifdef EXEC_ENV_OLS +template +size_t Index::load_graph(AlignedFileReader &reader, size_t expected_num_points) +{ +#else + +template +size_t Index::load_graph(std::string filename, size_t expected_num_points) +{ +#endif + auto res = _graph_store->load(filename, expected_num_points); + _start = std::get<1>(res); + _num_frozen_pts = std::get<2>(res); + return std::get<0>(res); +} + +template +int Index::_get_vector_by_tag(TagType &tag, DataType &vec) +{ + try + { + TagT tag_val = std::any_cast(tag); + T *vec_val = std::any_cast(vec); + return this->get_vector_by_tag(tag_val, vec_val); + } + catch (const std::bad_any_cast &e) + { + throw ANNException("Error: bad any cast while performing _get_vector_by_tags() " + std::string(e.what()), -1); + } + catch (const std::exception &e) + { + throw ANNException("Error: " + std::string(e.what()), -1); + } +} + +template int Index::get_vector_by_tag(TagT &tag, T *vec) +{ + std::shared_lock lock(_tag_lock); + if (_tag_to_location.find(tag) == _tag_to_location.end()) + { + diskann::cout << "Tag " << get_tag_string(tag) << " does not exist" << std::endl; + return -1; + } + + location_t location = _tag_to_location[tag]; + _data_store->get_vector(location, vec); + + return 0; +} + +template uint32_t Index::calculate_entry_point() +{ + // REFACTOR TODO: This function does not support multi-threaded calculation of medoid. + // Must revisit if perf is a concern. + return _data_store->calculate_medoid(); +} + +template std::vector Index::get_init_ids() +{ + std::vector init_ids; + init_ids.reserve(1 + _num_frozen_pts); + + init_ids.emplace_back(_start); + + for (uint32_t frozen = (uint32_t)_max_points; frozen < _max_points + _num_frozen_pts; frozen++) + { + if (frozen != _start) + { + init_ids.emplace_back(frozen); + } + } + + return init_ids; +} + +// Find common filter between a node's labels and a given set of labels, while +// taking into account universal label +template +bool Index::detect_common_filters(uint32_t point_id, bool search_invocation, + const std::vector &incoming_labels) +{ + auto &curr_node_labels = _location_to_labels[point_id]; + std::vector common_filters; + std::set_intersection(incoming_labels.begin(), incoming_labels.end(), curr_node_labels.begin(), + curr_node_labels.end(), std::back_inserter(common_filters)); + if (common_filters.size() > 0) + { + // This is to reduce the repetitive calls. If common_filters size is > 0 , + // we dont need to check further for universal label + return true; + } + if (_use_universal_label) + { + if (!search_invocation) + { + if (std::find(incoming_labels.begin(), incoming_labels.end(), _universal_label) != incoming_labels.end() || + std::find(curr_node_labels.begin(), curr_node_labels.end(), _universal_label) != curr_node_labels.end()) + common_filters.push_back(_universal_label); + } + else + { + if (std::find(curr_node_labels.begin(), curr_node_labels.end(), _universal_label) != curr_node_labels.end()) + common_filters.push_back(_universal_label); + } + } + return (common_filters.size() > 0); +} + +template +std::pair Index::iterate_to_fixed_point( + InMemQueryScratch *scratch, const uint32_t Lsize, const std::vector &init_ids, bool use_filter, + const std::vector &filter_labels, bool search_invocation) +{ + std::vector &expanded_nodes = scratch->pool(); + NeighborPriorityQueue &best_L_nodes = scratch->best_l_nodes(); + best_L_nodes.reserve(Lsize); + tsl::robin_set &inserted_into_pool_rs = scratch->inserted_into_pool_rs(); + boost::dynamic_bitset<> &inserted_into_pool_bs = scratch->inserted_into_pool_bs(); + std::vector &id_scratch = scratch->id_scratch(); + std::vector &dist_scratch = scratch->dist_scratch(); + assert(id_scratch.size() == 0); + + T *aligned_query = scratch->aligned_query(); + + float *pq_dists = nullptr; + + _pq_data_store->preprocess_query(aligned_query, scratch); + + if (expanded_nodes.size() > 0 || id_scratch.size() > 0) + { + throw ANNException("ERROR: Clear scratch space before passing.", -1, __FUNCSIG__, __FILE__, __LINE__); + } + + // Decide whether to use bitset or robin set to mark visited nodes + auto total_num_points = _max_points + _num_frozen_pts; + bool fast_iterate = total_num_points <= MAX_POINTS_FOR_USING_BITSET; + + if (fast_iterate) + { + if (inserted_into_pool_bs.size() < total_num_points) + { + // hopefully using 2X will reduce the number of allocations. + auto resize_size = + 2 * total_num_points > MAX_POINTS_FOR_USING_BITSET ? MAX_POINTS_FOR_USING_BITSET : 2 * total_num_points; + inserted_into_pool_bs.resize(resize_size); + } + } + + // Lambda to determine if a node has been visited + auto is_not_visited = [this, fast_iterate, &inserted_into_pool_bs, &inserted_into_pool_rs](const uint32_t id) { + return fast_iterate ? inserted_into_pool_bs[id] == 0 + : inserted_into_pool_rs.find(id) == inserted_into_pool_rs.end(); + }; + + // Lambda to batch compute query<-> node distances in PQ space + auto compute_dists = [this, scratch, pq_dists](const std::vector &ids, std::vector &dists_out) { + _pq_data_store->get_distance(scratch->aligned_query(), ids, dists_out, scratch); + }; + + // Initialize the candidate pool with starting points + for (auto id : init_ids) + { + if (id >= _max_points + _num_frozen_pts) + { + diskann::cerr << "Out of range loc found as an edge : " << id << std::endl; + throw diskann::ANNException(std::string("Wrong loc") + std::to_string(id), -1, __FUNCSIG__, __FILE__, + __LINE__); + } + + if (use_filter) + { + if (!detect_common_filters(id, search_invocation, filter_labels)) + continue; + } + + if (is_not_visited(id)) + { + if (fast_iterate) + { + inserted_into_pool_bs[id] = 1; + } + else + { + inserted_into_pool_rs.insert(id); + } + + float distance; + uint32_t ids[] = {id}; + float distances[] = {std::numeric_limits::max()}; + _pq_data_store->get_distance(aligned_query, ids, 1, distances, scratch); + distance = distances[0]; + + Neighbor nn = Neighbor(id, distance); + best_L_nodes.insert(nn); + } + } + + uint32_t hops = 0; + uint32_t cmps = 0; + + while (best_L_nodes.has_unexpanded_node()) + { + auto nbr = best_L_nodes.closest_unexpanded(); + auto n = nbr.id; + + // Add node to expanded nodes to create pool for prune later + if (!search_invocation) + { + if (!use_filter) + { + expanded_nodes.emplace_back(nbr); + } + else + { // in filter based indexing, the same point might invoke + // multiple iterate_to_fixed_points, so need to be careful + // not to add the same item to pool multiple times. + if (std::find(expanded_nodes.begin(), expanded_nodes.end(), nbr) == expanded_nodes.end()) + { + expanded_nodes.emplace_back(nbr); + } + } + } + + // Find which of the nodes in des have not been visited before + id_scratch.clear(); + dist_scratch.clear(); + if (_dynamic_index) + { + LockGuard guard(_locks[n]); + for (auto id : _graph_store->get_neighbours(n)) + { + assert(id < _max_points + _num_frozen_pts); + + if (use_filter) + { + // NOTE: NEED TO CHECK IF THIS CORRECT WITH NEW LOCKS. + if (!detect_common_filters(id, search_invocation, filter_labels)) + continue; + } + + if (is_not_visited(id)) + { + id_scratch.push_back(id); + } + } + } + else + { + _locks[n].lock(); + auto nbrs = _graph_store->get_neighbours(n); + _locks[n].unlock(); + for (auto id : nbrs) + { + assert(id < _max_points + _num_frozen_pts); + + if (use_filter) + { + // NOTE: NEED TO CHECK IF THIS CORRECT WITH NEW LOCKS. + if (!detect_common_filters(id, search_invocation, filter_labels)) + continue; + } + + if (is_not_visited(id)) + { + id_scratch.push_back(id); + } + } + } + + // Mark nodes visited + for (auto id : id_scratch) + { + if (fast_iterate) + { + inserted_into_pool_bs[id] = 1; + } + else + { + inserted_into_pool_rs.insert(id); + } + } + + assert(dist_scratch.capacity() >= id_scratch.size()); + compute_dists(id_scratch, dist_scratch); + cmps += (uint32_t)id_scratch.size(); + + // Insert pairs into the pool of candidates + for (size_t m = 0; m < id_scratch.size(); ++m) + { + best_L_nodes.insert(Neighbor(id_scratch[m], dist_scratch[m])); + } + } + return std::make_pair(hops, cmps); +} + +template +void Index::search_for_point_and_prune(int location, uint32_t Lindex, + std::vector &pruned_list, + InMemQueryScratch *scratch, bool use_filter, + uint32_t filteredLindex) +{ + const std::vector init_ids = get_init_ids(); + const std::vector unused_filter_label; + + if (!use_filter) + { + _data_store->get_vector(location, scratch->aligned_query()); + iterate_to_fixed_point(scratch, Lindex, init_ids, false, unused_filter_label, false); + } + else + { + std::shared_lock tl(_tag_lock, std::defer_lock); + if (_dynamic_index) + tl.lock(); + std::vector filter_specific_start_nodes; + for (auto &x : _location_to_labels[location]) + filter_specific_start_nodes.emplace_back(_label_to_start_id[x]); + + if (_dynamic_index) + tl.unlock(); + + _data_store->get_vector(location, scratch->aligned_query()); + iterate_to_fixed_point(scratch, filteredLindex, filter_specific_start_nodes, true, + _location_to_labels[location], false); + + // combine candidate pools obtained with filter and unfiltered criteria. + std::set best_candidate_pool; + for (auto filtered_neighbor : scratch->pool()) + { + best_candidate_pool.insert(filtered_neighbor); + } + + // clear scratch for finding unfiltered candidates + scratch->clear(); + + _data_store->get_vector(location, scratch->aligned_query()); + iterate_to_fixed_point(scratch, Lindex, init_ids, false, unused_filter_label, false); + + for (auto unfiltered_neighbour : scratch->pool()) + { + // insert if this neighbour is not already in best_candidate_pool + if (best_candidate_pool.find(unfiltered_neighbour) == best_candidate_pool.end()) + { + best_candidate_pool.insert(unfiltered_neighbour); + } + } + + scratch->pool().clear(); + std::copy(best_candidate_pool.begin(), best_candidate_pool.end(), std::back_inserter(scratch->pool())); + } + + auto &pool = scratch->pool(); + + for (uint32_t i = 0; i < pool.size(); i++) + { + if (pool[i].id == (uint32_t)location) + { + pool.erase(pool.begin() + i); + i--; + } + } + + if (pruned_list.size() > 0) + { + throw diskann::ANNException("ERROR: non-empty pruned_list passed", -1, __FUNCSIG__, __FILE__, __LINE__); + } + + prune_neighbors(location, pool, pruned_list, scratch); + + assert(!pruned_list.empty()); + assert(_graph_store->get_total_points() == _max_points + _num_frozen_pts); +} + +template +void Index::occlude_list(const uint32_t location, std::vector &pool, const float alpha, + const uint32_t degree, const uint32_t maxc, std::vector &result, + InMemQueryScratch *scratch, + const tsl::robin_set *const delete_set_ptr) +{ + if (pool.size() == 0) + return; + + // Truncate pool at maxc and initialize scratch spaces + assert(std::is_sorted(pool.begin(), pool.end())); + assert(result.size() == 0); + if (pool.size() > maxc) + pool.resize(maxc); + std::vector &occlude_factor = scratch->occlude_factor(); + // occlude_list can be called with the same scratch more than once by + // search_for_point_and_add_link through inter_insert. + occlude_factor.clear(); + // Initialize occlude_factor to pool.size() many 0.0f values for correctness + occlude_factor.insert(occlude_factor.end(), pool.size(), 0.0f); + + float cur_alpha = 1; + while (cur_alpha <= alpha && result.size() < degree) + { + // used for MIPS, where we store a value of eps in cur_alpha to + // denote pruned out entries which we can skip in later rounds. + float eps = cur_alpha + 0.01f; + + for (auto iter = pool.begin(); result.size() < degree && iter != pool.end(); ++iter) + { + if (occlude_factor[iter - pool.begin()] > cur_alpha) + { + continue; + } + // Set the entry to float::max so that is not considered again + occlude_factor[iter - pool.begin()] = std::numeric_limits::max(); + // Add the entry to the result if its not been deleted, and doesn't + // add a self loop + if (delete_set_ptr == nullptr || delete_set_ptr->find(iter->id) == delete_set_ptr->end()) + { + if (iter->id != location) + { + result.push_back(iter->id); + } + } + + // Update occlude factor for points from iter+1 to pool.end() + for (auto iter2 = iter + 1; iter2 != pool.end(); iter2++) + { + auto t = iter2 - pool.begin(); + if (occlude_factor[t] > alpha) + continue; + + bool prune_allowed = true; + if (_filtered_index) + { + uint32_t a = iter->id; + uint32_t b = iter2->id; + if (_location_to_labels.size() < b || _location_to_labels.size() < a) + continue; + for (auto &x : _location_to_labels[b]) + { + if (std::find(_location_to_labels[a].begin(), _location_to_labels[a].end(), x) == + _location_to_labels[a].end()) + { + prune_allowed = false; + } + if (!prune_allowed) + break; + } + } + if (!prune_allowed) + continue; + + float djk = _data_store->get_distance(iter2->id, iter->id); + if (_dist_metric == diskann::Metric::L2 || _dist_metric == diskann::Metric::COSINE) + { + occlude_factor[t] = (djk == 0) ? std::numeric_limits::max() + : std::max(occlude_factor[t], iter2->distance / djk); + } + else if (_dist_metric == diskann::Metric::INNER_PRODUCT) + { + // Improvization for flipping max and min dist for MIPS + float x = -iter2->distance; + float y = -djk; + if (y > cur_alpha * x) + { + occlude_factor[t] = std::max(occlude_factor[t], eps); + } + } + } + } + cur_alpha *= 1.2f; + } +} + +template +void Index::prune_neighbors(const uint32_t location, std::vector &pool, + std::vector &pruned_list, InMemQueryScratch *scratch) +{ + prune_neighbors(location, pool, _indexingRange, _indexingMaxC, _indexingAlpha, pruned_list, scratch); +} + +template +void Index::prune_neighbors(const uint32_t location, std::vector &pool, const uint32_t range, + const uint32_t max_candidate_size, const float alpha, + std::vector &pruned_list, InMemQueryScratch *scratch) +{ + if (pool.size() == 0) + { + // if the pool is empty, behave like a noop + pruned_list.clear(); + return; + } + + // If using _pq_build, over-write the PQ distances with actual distances + // REFACTOR PQ: TODO: How to get rid of this!? + if (_pq_dist) + { + for (auto &ngh : pool) + ngh.distance = _data_store->get_distance(ngh.id, location); + } + + // sort the pool based on distance to query and prune it with occlude_list + std::sort(pool.begin(), pool.end()); + pruned_list.clear(); + pruned_list.reserve(range); + + occlude_list(location, pool, alpha, range, max_candidate_size, pruned_list, scratch); + assert(pruned_list.size() <= range); + + if (_saturate_graph && alpha > 1) + { + for (const auto &node : pool) + { + if (pruned_list.size() >= range) + break; + if ((std::find(pruned_list.begin(), pruned_list.end(), node.id) == pruned_list.end()) && + node.id != location) + pruned_list.push_back(node.id); + } + } +} + +template +void Index::inter_insert(uint32_t n, std::vector &pruned_list, const uint32_t range, + InMemQueryScratch *scratch) +{ + const auto &src_pool = pruned_list; + + assert(!src_pool.empty()); + + for (auto des : src_pool) + { + // des.loc is the loc of the neighbors of n + assert(des < _max_points + _num_frozen_pts); + // des_pool contains the neighbors of the neighbors of n + std::vector copy_of_neighbors; + bool prune_needed = false; + { + LockGuard guard(_locks[des]); + auto &des_pool = _graph_store->get_neighbours(des); + if (std::find(des_pool.begin(), des_pool.end(), n) == des_pool.end()) + { + if (des_pool.size() < (uint64_t)(defaults::GRAPH_SLACK_FACTOR * range)) + { + // des_pool.emplace_back(n); + _graph_store->add_neighbour(des, n); + prune_needed = false; + } + else + { + copy_of_neighbors.reserve(des_pool.size() + 1); + copy_of_neighbors = des_pool; + copy_of_neighbors.push_back(n); + prune_needed = true; + } + } + } // des lock is released by this point + + if (prune_needed) + { + tsl::robin_set dummy_visited(0); + std::vector dummy_pool(0); + + size_t reserveSize = (size_t)(std::ceil(1.05 * defaults::GRAPH_SLACK_FACTOR * range)); + dummy_visited.reserve(reserveSize); + dummy_pool.reserve(reserveSize); + + for (auto cur_nbr : copy_of_neighbors) + { + if (dummy_visited.find(cur_nbr) == dummy_visited.end() && cur_nbr != des) + { + float dist = _data_store->get_distance(des, cur_nbr); + dummy_pool.emplace_back(Neighbor(cur_nbr, dist)); + dummy_visited.insert(cur_nbr); + } + } + std::vector new_out_neighbors; + prune_neighbors(des, dummy_pool, new_out_neighbors, scratch); + { + LockGuard guard(_locks[des]); + + _graph_store->set_neighbours(des, new_out_neighbors); + } + } + } +} + +template +void Index::inter_insert(uint32_t n, std::vector &pruned_list, InMemQueryScratch *scratch) +{ + inter_insert(n, pruned_list, _indexingRange, scratch); +} + +template void Index::link() +{ + uint32_t num_threads = _indexingThreads; + if (num_threads != 0) + omp_set_num_threads(num_threads); + + /* visit_order is a vector that is initialized to the entire graph */ + std::vector visit_order; + std::vector pool, tmp; + tsl::robin_set visited; + visit_order.reserve(_nd + _num_frozen_pts); + for (uint32_t i = 0; i < (uint32_t)_nd; i++) + { + visit_order.emplace_back(i); + } + + // If there are any frozen points, add them all. + for (uint32_t frozen = (uint32_t)_max_points; frozen < _max_points + _num_frozen_pts; frozen++) + { + visit_order.emplace_back(frozen); + } + + // if there are frozen points, the first such one is set to be the _start + if (_num_frozen_pts > 0) + _start = (uint32_t)_max_points; + else + _start = calculate_entry_point(); + + diskann::Timer link_timer; + +#pragma omp parallel for schedule(dynamic, 2048) + for (int64_t node_ctr = 0; node_ctr < (int64_t)(visit_order.size()); node_ctr++) + { + auto node = visit_order[node_ctr]; + + // Find and add appropriate graph edges + ScratchStoreManager> manager(_query_scratch); + auto scratch = manager.scratch_space(); + std::vector pruned_list; + if (_filtered_index) + { + search_for_point_and_prune(node, _indexingQueueSize, pruned_list, scratch, true, _filterIndexingQueueSize); + } + else + { + search_for_point_and_prune(node, _indexingQueueSize, pruned_list, scratch); + } + assert(pruned_list.size() > 0); + + { + LockGuard guard(_locks[node]); + + _graph_store->set_neighbours(node, pruned_list); + assert(_graph_store->get_neighbours((location_t)node).size() <= _indexingRange); + } + + inter_insert(node, pruned_list, scratch); + + if (node_ctr % 100000 == 0) + { + diskann::cout << "\r" << (100.0 * node_ctr) / (visit_order.size()) << "% of index build completed." + << std::flush; + } + } + + if (_nd > 0) + { + diskann::cout << "Starting final cleanup.." << std::flush; + } +#pragma omp parallel for schedule(dynamic, 2048) + for (int64_t node_ctr = 0; node_ctr < (int64_t)(visit_order.size()); node_ctr++) + { + auto node = visit_order[node_ctr]; + if (_graph_store->get_neighbours((location_t)node).size() > _indexingRange) + { + ScratchStoreManager> manager(_query_scratch); + auto scratch = manager.scratch_space(); + + tsl::robin_set dummy_visited(0); + std::vector dummy_pool(0); + std::vector new_out_neighbors; + + for (auto cur_nbr : _graph_store->get_neighbours((location_t)node)) + { + if (dummy_visited.find(cur_nbr) == dummy_visited.end() && cur_nbr != node) + { + float dist = _data_store->get_distance(node, cur_nbr); + dummy_pool.emplace_back(Neighbor(cur_nbr, dist)); + dummy_visited.insert(cur_nbr); + } + } + prune_neighbors(node, dummy_pool, new_out_neighbors, scratch); + + _graph_store->clear_neighbours((location_t)node); + _graph_store->set_neighbours((location_t)node, new_out_neighbors); + } + } + if (_nd > 0) + { + diskann::cout << "done. Link time: " << ((double)link_timer.elapsed() / (double)1000000) << "s" << std::endl; + } +} + +template +void Index::prune_all_neighbors(const uint32_t max_degree, const uint32_t max_occlusion_size, + const float alpha) +{ + const uint32_t range = max_degree; + const uint32_t maxc = max_occlusion_size; + + _filtered_index = true; + + diskann::Timer timer; +#pragma omp parallel for + for (int64_t node = 0; node < (int64_t)(_max_points + _num_frozen_pts); node++) + { + if ((size_t)node < _nd || (size_t)node >= _max_points) + { + if (_graph_store->get_neighbours((location_t)node).size() > range) + { + tsl::robin_set dummy_visited(0); + std::vector dummy_pool(0); + std::vector new_out_neighbors; + + ScratchStoreManager> manager(_query_scratch); + auto scratch = manager.scratch_space(); + + for (auto cur_nbr : _graph_store->get_neighbours((location_t)node)) + { + if (dummy_visited.find(cur_nbr) == dummy_visited.end() && cur_nbr != node) + { + float dist = _data_store->get_distance((location_t)node, (location_t)cur_nbr); + dummy_pool.emplace_back(Neighbor(cur_nbr, dist)); + dummy_visited.insert(cur_nbr); + } + } + + prune_neighbors((uint32_t)node, dummy_pool, range, maxc, alpha, new_out_neighbors, scratch); + _graph_store->clear_neighbours((location_t)node); + _graph_store->set_neighbours((location_t)node, new_out_neighbors); + } + } + } + + diskann::cout << "Prune time : " << timer.elapsed() / 1000 << "ms" << std::endl; + size_t max = 0, min = 1 << 30, total = 0, cnt = 0; + for (size_t i = 0; i < _max_points + _num_frozen_pts; i++) + { + if (i < _nd || i >= _max_points) + { + const std::vector &pool = _graph_store->get_neighbours((location_t)i); + max = (std::max)(max, pool.size()); + min = (std::min)(min, pool.size()); + total += pool.size(); + if (pool.size() < 2) + cnt++; + } + } + if (min > max) + min = max; + if (_nd > 0) + { + diskann::cout << "Index built with degree: max:" << max + << " avg:" << (float)total / (float)(_nd + _num_frozen_pts) << " min:" << min + << " count(deg<2):" << cnt << std::endl; + } +} + +// REFACTOR +template +void Index::set_start_points(const T *data, size_t data_count) +{ + std::unique_lock ul(_update_lock); + std::unique_lock tl(_tag_lock); + if (_nd > 0) + throw ANNException("Can not set starting point for a non-empty index", -1, __FUNCSIG__, __FILE__, __LINE__); + + if (data_count != _num_frozen_pts * _dim) + throw ANNException("Invalid number of points", -1, __FUNCSIG__, __FILE__, __LINE__); + + // memcpy(_data + _aligned_dim * _max_points, data, _aligned_dim * + // sizeof(T) * _num_frozen_pts); + for (location_t i = 0; i < _num_frozen_pts; i++) + { + _data_store->set_vector((location_t)(i + _max_points), data + i * _dim); + } + _has_built = true; + diskann::cout << "Index start points set: #" << _num_frozen_pts << std::endl; +} + +template +void Index::_set_start_points_at_random(DataType radius, uint32_t random_seed) +{ + try + { + T radius_to_use = std::any_cast(radius); + this->set_start_points_at_random(radius_to_use, random_seed); + } + catch (const std::bad_any_cast &e) + { + throw ANNException( + "Error: bad any cast while performing _set_start_points_at_random() " + std::string(e.what()), -1); + } + catch (const std::exception &e) + { + throw ANNException("Error: " + std::string(e.what()), -1); + } +} + +template +void Index::set_start_points_at_random(T radius, uint32_t random_seed) +{ + std::mt19937 gen{random_seed}; + std::normal_distribution<> d{0.0, 1.0}; + + std::vector points_data; + points_data.reserve(_dim * _num_frozen_pts); + std::vector real_vec(_dim); + + for (size_t frozen_point = 0; frozen_point < _num_frozen_pts; frozen_point++) + { + double norm_sq = 0.0; + for (size_t i = 0; i < _dim; ++i) + { + auto r = d(gen); + real_vec[i] = r; + norm_sq += r * r; + } + + const double norm = std::sqrt(norm_sq); + for (auto iter : real_vec) + points_data.push_back(static_cast(iter * radius / norm)); + } + + set_start_points(points_data.data(), points_data.size()); +} + +template +void Index::build_with_data_populated(const std::vector &tags) +{ + diskann::cout << "Starting index build with " << _nd << " points... " << std::endl; + + if (_nd < 1) + throw ANNException("Error: Trying to build an index with 0 points", -1, __FUNCSIG__, __FILE__, __LINE__); + + if (_enable_tags && tags.size() != _nd) + { + std::stringstream stream; + stream << "ERROR: Driver requests loading " << _nd << " points from file," + << "but tags vector is of size " << tags.size() << "." << std::endl; + diskann::cerr << stream.str() << std::endl; + throw diskann::ANNException(stream.str(), -1, __FUNCSIG__, __FILE__, __LINE__); + } + if (_enable_tags) + { + for (size_t i = 0; i < tags.size(); ++i) + { + _tag_to_location[tags[i]] = (uint32_t)i; + _location_to_tag.set(static_cast(i), tags[i]); + } + } + + uint32_t index_R = _indexingRange; + uint32_t num_threads_index = _indexingThreads; + uint32_t index_L = _indexingQueueSize; + uint32_t maxc = _indexingMaxC; + + if (_query_scratch.size() == 0) + { + initialize_query_scratch(5 + num_threads_index, index_L, index_L, index_R, maxc, + _data_store->get_aligned_dim()); + } + + generate_frozen_point(); + link(); + + size_t max = 0, min = SIZE_MAX, total = 0, cnt = 0; + for (size_t i = 0; i < _nd; i++) + { + auto &pool = _graph_store->get_neighbours((location_t)i); + max = std::max(max, pool.size()); + min = std::min(min, pool.size()); + total += pool.size(); + if (pool.size() < 2) + cnt++; + } + diskann::cout << "Index built with degree: max:" << max << " avg:" << (float)total / (float)(_nd + _num_frozen_pts) + << " min:" << min << " count(deg<2):" << cnt << std::endl; + + _has_built = true; +} +template +void Index::_build(const DataType &data, const size_t num_points_to_load, TagVector &tags) +{ + try + { + this->build(std::any_cast(data), num_points_to_load, tags.get>()); + } + catch (const std::bad_any_cast &e) + { + throw ANNException("Error: bad any cast in while building index. " + std::string(e.what()), -1); + } + catch (const std::exception &e) + { + throw ANNException("Error" + std::string(e.what()), -1); + } +} +template +void Index::build(const T *data, const size_t num_points_to_load, const std::vector &tags) +{ + if (num_points_to_load == 0) + { + throw ANNException("Do not call build with 0 points", -1, __FUNCSIG__, __FILE__, __LINE__); + } + if (_pq_dist) + { + throw ANNException("ERROR: DO not use this build interface with PQ distance", -1, __FUNCSIG__, __FILE__, + __LINE__); + } + + std::unique_lock ul(_update_lock); + + { + std::unique_lock tl(_tag_lock); + _nd = num_points_to_load; + + _data_store->populate_data(data, (location_t)num_points_to_load); + } + + build_with_data_populated(tags); +} + +template +void Index::build(const char *filename, const size_t num_points_to_load, const std::vector &tags) +{ + // idealy this should call build_filtered_index based on params passed + + std::unique_lock ul(_update_lock); + + // error checks + if (num_points_to_load == 0) + throw ANNException("Do not call build with 0 points", -1, __FUNCSIG__, __FILE__, __LINE__); + + if (!file_exists(filename)) + { + std::stringstream stream; + stream << "ERROR: Data file " << filename << " does not exist." << std::endl; + diskann::cerr << stream.str() << std::endl; + throw diskann::ANNException(stream.str(), -1, __FUNCSIG__, __FILE__, __LINE__); + } + + size_t file_num_points, file_dim; + if (filename == nullptr) + { + throw diskann::ANNException("Can not build with an empty file", -1, __FUNCSIG__, __FILE__, __LINE__); + } + + diskann::get_bin_metadata(filename, file_num_points, file_dim); + if (file_num_points > _max_points) + { + std::stringstream stream; + stream << "ERROR: Driver requests loading " << num_points_to_load << " points and file has " << file_num_points + << " points, but " + << "index can support only " << _max_points << " points as specified in constructor." << std::endl; + + throw diskann::ANNException(stream.str(), -1, __FUNCSIG__, __FILE__, __LINE__); + } + + if (num_points_to_load > file_num_points) + { + std::stringstream stream; + stream << "ERROR: Driver requests loading " << num_points_to_load << " points and file has only " + << file_num_points << " points." << std::endl; + + throw diskann::ANNException(stream.str(), -1, __FUNCSIG__, __FILE__, __LINE__); + } + + if (file_dim != _dim) + { + std::stringstream stream; + stream << "ERROR: Driver requests loading " << _dim << " dimension," + << "but file has " << file_dim << " dimension." << std::endl; + diskann::cerr << stream.str() << std::endl; + + throw diskann::ANNException(stream.str(), -1, __FUNCSIG__, __FILE__, __LINE__); + } + + // REFACTOR PQ TODO: We can remove this if and add a check in the InMemDataStore + // to not populate_data if it has been called once. + if (_pq_dist) + { +#ifdef EXEC_ENV_OLS + std::stringstream ss; + ss << "PQ Build is not supported in DLVS environment (i.e. if EXEC_ENV_OLS is defined)" << std::endl; + diskann::cerr << ss.str() << std::endl; + throw ANNException(ss.str(), -1, __FUNCSIG__, __FILE__, __LINE__); +#else + // REFACTOR TODO: Both in the previous code and in the current PQDataStore, + // we are writing the PQ files in the same path as the input file. Now we + // may not have write permissions to that folder, but we will always have + // write permissions to the output folder. So we should write the PQ files + // there. The problem is that the Index class gets the output folder prefix + // only at the time of save(), by which time we are too late. So leaving it + // as-is for now. + _pq_data_store->populate_data(filename, 0U); +#endif + } + + _data_store->populate_data(filename, 0U); + diskann::cout << "Using only first " << num_points_to_load << " from file.. " << std::endl; + + { + std::unique_lock tl(_tag_lock); + _nd = num_points_to_load; + } + build_with_data_populated(tags); +} + +template +void Index::build(const char *filename, const size_t num_points_to_load, const char *tag_filename) +{ + std::vector tags; + + if (_enable_tags) + { + std::unique_lock tl(_tag_lock); + if (tag_filename == nullptr) + { + throw ANNException("Tag filename is null, while _enable_tags is set", -1, __FUNCSIG__, __FILE__, __LINE__); + } + else + { + if (file_exists(tag_filename)) + { + diskann::cout << "Loading tags from " << tag_filename << " for vamana index build" << std::endl; + TagT *tag_data = nullptr; + size_t npts, ndim; + diskann::load_bin(tag_filename, tag_data, npts, ndim); + if (npts < num_points_to_load) + { + std::stringstream sstream; + sstream << "Loaded " << npts << " tags, insufficient to populate tags for " << num_points_to_load + << " points to load"; + throw diskann::ANNException(sstream.str(), -1, __FUNCSIG__, __FILE__, __LINE__); + } + for (size_t i = 0; i < num_points_to_load; i++) + { + tags.push_back(tag_data[i]); + } + delete[] tag_data; + } + else + { + throw diskann::ANNException(std::string("Tag file") + tag_filename + " does not exist", -1, __FUNCSIG__, + __FILE__, __LINE__); + } + } + } + build(filename, num_points_to_load, tags); +} + +template +void Index::build(const std::string &data_file, const size_t num_points_to_load, + IndexFilterParams &filter_params) +{ + size_t points_to_load = num_points_to_load == 0 ? _max_points : num_points_to_load; + + auto s = std::chrono::high_resolution_clock::now(); + if (filter_params.label_file == "") + { + this->build(data_file.c_str(), points_to_load); + } + else + { + // TODO: this should ideally happen in save() + std::string labels_file_to_use = filter_params.save_path_prefix + "_label_formatted.txt"; + std::string mem_labels_int_map_file = filter_params.save_path_prefix + "_labels_map.txt"; + convert_labels_string_to_int(filter_params.label_file, labels_file_to_use, mem_labels_int_map_file, + filter_params.universal_label); + if (filter_params.universal_label != "") + { + LabelT unv_label_as_num = 0; + this->set_universal_label(unv_label_as_num); + } + this->build_filtered_index(data_file.c_str(), labels_file_to_use, points_to_load); + } + std::chrono::duration diff = std::chrono::high_resolution_clock::now() - s; + std::cout << "Indexing time: " << diff.count() << "\n"; +} + +template +std::unordered_map Index::load_label_map(const std::string &labels_map_file) +{ + std::unordered_map string_to_int_mp; + std::ifstream map_reader(labels_map_file); + std::string line, token; + LabelT token_as_num; + std::string label_str; + while (std::getline(map_reader, line)) + { + std::istringstream iss(line); + getline(iss, token, '\t'); + label_str = token; + getline(iss, token, '\t'); + token_as_num = (LabelT)std::stoul(token); + string_to_int_mp[label_str] = token_as_num; + } + return string_to_int_mp; +} + +template +LabelT Index::get_converted_label(const std::string &raw_label) +{ + if (_label_map.find(raw_label) != _label_map.end()) + { + return _label_map[raw_label]; + } + if (_use_universal_label) + { + return _universal_label; + } + std::stringstream stream; + stream << "Unable to find label in the Label Map"; + diskann::cerr << stream.str() << std::endl; + throw diskann::ANNException(stream.str(), -1, __FUNCSIG__, __FILE__, __LINE__); +} + +template +void Index::parse_label_file(const std::string &label_file, size_t &num_points) +{ + // Format of Label txt file: filters with comma separators + + std::ifstream infile(label_file); + if (infile.fail()) + { + throw diskann::ANNException(std::string("Failed to open file ") + label_file, -1); + } + + std::string line, token; + uint32_t line_cnt = 0; + + while (std::getline(infile, line)) + { + line_cnt++; + } + _location_to_labels.resize(line_cnt, std::vector()); + + infile.clear(); + infile.seekg(0, std::ios::beg); + line_cnt = 0; + + while (std::getline(infile, line)) + { + std::istringstream iss(line); + std::vector lbls(0); + getline(iss, token, '\t'); + std::istringstream new_iss(token); + while (getline(new_iss, token, ',')) + { + token.erase(std::remove(token.begin(), token.end(), '\n'), token.end()); + token.erase(std::remove(token.begin(), token.end(), '\r'), token.end()); + LabelT token_as_num = (LabelT)std::stoul(token); + lbls.push_back(token_as_num); + _labels.insert(token_as_num); + } + + std::sort(lbls.begin(), lbls.end()); + _location_to_labels[line_cnt] = lbls; + line_cnt++; + } + num_points = (size_t)line_cnt; + diskann::cout << "Identified " << _labels.size() << " distinct label(s)" << std::endl; +} + +template +void Index::_set_universal_label(const LabelType universal_label) +{ + this->set_universal_label(std::any_cast(universal_label)); +} + +template +void Index::set_universal_label(const LabelT &label) +{ + _use_universal_label = true; + _universal_label = label; +} + +template +void Index::build_filtered_index(const char *filename, const std::string &label_file, + const size_t num_points_to_load, const std::vector &tags) +{ + _filtered_index = true; + _label_to_start_id.clear(); + size_t num_points_labels = 0; + + parse_label_file(label_file, + num_points_labels); // determines medoid for each label and identifies + // the points to label mapping + + std::unordered_map> label_to_points; + + for (uint32_t point_id = 0; point_id < num_points_to_load; point_id++) + { + for (auto label : _location_to_labels[point_id]) + { + if (label != _universal_label) + { + label_to_points[label].emplace_back(point_id); + } + else + { + for (typename tsl::robin_set::size_type lbl = 0; lbl < _labels.size(); lbl++) + { + auto itr = _labels.begin(); + std::advance(itr, lbl); + auto &x = *itr; + label_to_points[x].emplace_back(point_id); + } + } + } + } + + uint32_t num_cands = 25; + for (auto itr = _labels.begin(); itr != _labels.end(); itr++) + { + uint32_t best_medoid_count = std::numeric_limits::max(); + auto &curr_label = *itr; + uint32_t best_medoid; + auto labeled_points = label_to_points[curr_label]; + for (uint32_t cnd = 0; cnd < num_cands; cnd++) + { + uint32_t cur_cnd = labeled_points[rand() % labeled_points.size()]; + uint32_t cur_cnt = std::numeric_limits::max(); + if (_medoid_counts.find(cur_cnd) == _medoid_counts.end()) + { + _medoid_counts[cur_cnd] = 0; + cur_cnt = 0; + } + else + { + cur_cnt = _medoid_counts[cur_cnd]; + } + if (cur_cnt < best_medoid_count) + { + best_medoid_count = cur_cnt; + best_medoid = cur_cnd; + } + } + _label_to_start_id[curr_label] = best_medoid; + _medoid_counts[best_medoid]++; + } + + this->build(filename, num_points_to_load, tags); +} + +template +std::pair Index::_search(const DataType &query, const size_t K, const uint32_t L, + std::any &indices, float *distances) +{ + try + { + auto typed_query = std::any_cast(query); + if (typeid(uint32_t *) == indices.type()) + { + auto u32_ptr = std::any_cast(indices); + return this->search(typed_query, K, L, u32_ptr, distances); + } + else if (typeid(uint64_t *) == indices.type()) + { + auto u64_ptr = std::any_cast(indices); + return this->search(typed_query, K, L, u64_ptr, distances); + } + else + { + throw ANNException("Error: indices type can only be uint64_t or uint32_t.", -1); + } + } + catch (const std::bad_any_cast &e) + { + throw ANNException("Error: bad any cast while searching. " + std::string(e.what()), -1); + } + catch (const std::exception &e) + { + throw ANNException("Error: " + std::string(e.what()), -1); + } +} + +template +template +std::pair Index::search(const T *query, const size_t K, const uint32_t L, + IdType *indices, float *distances) +{ + if (K > (uint64_t)L) + { + throw ANNException("Set L to a value of at least K", -1, __FUNCSIG__, __FILE__, __LINE__); + } + + ScratchStoreManager> manager(_query_scratch); + auto scratch = manager.scratch_space(); + + if (L > scratch->get_L()) + { + diskann::cout << "Attempting to expand query scratch_space. Was created " + << "with Lsize: " << scratch->get_L() << " but search L is: " << L << std::endl; + scratch->resize_for_new_L(L); + diskann::cout << "Resize completed. New scratch->L is " << scratch->get_L() << std::endl; + } + + const std::vector unused_filter_label; + const std::vector init_ids = get_init_ids(); + + std::shared_lock lock(_update_lock); + + _data_store->preprocess_query(query, scratch); + + auto retval = iterate_to_fixed_point(scratch, L, init_ids, false, unused_filter_label, true); + + NeighborPriorityQueue &best_L_nodes = scratch->best_l_nodes(); + + size_t pos = 0; + for (size_t i = 0; i < best_L_nodes.size(); ++i) + { + if (best_L_nodes[i].id < _max_points) + { + // safe because Index uses uint32_t ids internally + // and IDType will be uint32_t or uint64_t + indices[pos] = (IdType)best_L_nodes[i].id; + if (distances != nullptr) + { +#ifdef EXEC_ENV_OLS + // DLVS expects negative distances + distances[pos] = best_L_nodes[i].distance; +#else + distances[pos] = _dist_metric == diskann::Metric::INNER_PRODUCT ? -1 * best_L_nodes[i].distance + : best_L_nodes[i].distance; +#endif + } + pos++; + } + if (pos == K) + break; + } + if (pos < K) + { + diskann::cerr << "Found pos: " << pos << "fewer than K elements " << K << " for query" << std::endl; + } + + return retval; +} + +template +std::pair Index::_search_with_filters(const DataType &query, + const std::string &raw_label, const size_t K, + const uint32_t L, std::any &indices, + float *distances) +{ + auto converted_label = this->get_converted_label(raw_label); + if (typeid(uint64_t *) == indices.type()) + { + auto ptr = std::any_cast(indices); + return this->search_with_filters(std::any_cast(query), converted_label, K, L, ptr, distances); + } + else if (typeid(uint32_t *) == indices.type()) + { + auto ptr = std::any_cast(indices); + return this->search_with_filters(std::any_cast(query), converted_label, K, L, ptr, distances); + } + else + { + throw ANNException("Error: Id type can only be uint64_t or uint32_t.", -1); + } +} + +template +template +std::pair Index::search_with_filters(const T *query, const LabelT &filter_label, + const size_t K, const uint32_t L, + IdType *indices, float *distances) +{ + if (K > (uint64_t)L) + { + throw ANNException("Set L to a value of at least K", -1, __FUNCSIG__, __FILE__, __LINE__); + } + + ScratchStoreManager> manager(_query_scratch); + auto scratch = manager.scratch_space(); + + if (L > scratch->get_L()) + { + diskann::cout << "Attempting to expand query scratch_space. Was created " + << "with Lsize: " << scratch->get_L() << " but search L is: " << L << std::endl; + scratch->resize_for_new_L(L); + diskann::cout << "Resize completed. New scratch->L is " << scratch->get_L() << std::endl; + } + + std::vector filter_vec; + std::vector init_ids = get_init_ids(); + + std::shared_lock lock(_update_lock); + std::shared_lock tl(_tag_lock, std::defer_lock); + if (_dynamic_index) + tl.lock(); + + if (_label_to_start_id.find(filter_label) != _label_to_start_id.end()) + { + init_ids.emplace_back(_label_to_start_id[filter_label]); + } + else + { + diskann::cout << "No filtered medoid found. exitting " + << std::endl; // RKNOTE: If universal label found start there + throw diskann::ANNException("No filtered medoid found. exitting ", -1); + } + if (_dynamic_index) + tl.unlock(); + + filter_vec.emplace_back(filter_label); + + _data_store->preprocess_query(query, scratch); + auto retval = iterate_to_fixed_point(scratch, L, init_ids, true, filter_vec, true); + + auto best_L_nodes = scratch->best_l_nodes(); + + size_t pos = 0; + for (size_t i = 0; i < best_L_nodes.size(); ++i) + { + if (best_L_nodes[i].id < _max_points) + { + indices[pos] = (IdType)best_L_nodes[i].id; + + if (distances != nullptr) + { +#ifdef EXEC_ENV_OLS + // DLVS expects negative distances + distances[pos] = best_L_nodes[i].distance; +#else + distances[pos] = _dist_metric == diskann::Metric::INNER_PRODUCT ? -1 * best_L_nodes[i].distance + : best_L_nodes[i].distance; +#endif + } + pos++; + } + if (pos == K) + break; + } + if (pos < K) + { + diskann::cerr << "Found fewer than K elements for query" << std::endl; + } + + return retval; +} + +template +size_t Index::_search_with_tags(const DataType &query, const uint64_t K, const uint32_t L, + const TagType &tags, float *distances, DataVector &res_vectors, + bool use_filters, const std::string filter_label) +{ + try + { + return this->search_with_tags(std::any_cast(query), K, L, std::any_cast(tags), distances, + res_vectors.get>(), use_filters, filter_label); + } + catch (const std::bad_any_cast &e) + { + throw ANNException("Error: bad any cast while performing _search_with_tags() " + std::string(e.what()), -1); + } + catch (const std::exception &e) + { + throw ANNException("Error: " + std::string(e.what()), -1); + } +} + +template +size_t Index::search_with_tags(const T *query, const uint64_t K, const uint32_t L, TagT *tags, + float *distances, std::vector &res_vectors, bool use_filters, + const std::string filter_label) +{ + if (K > (uint64_t)L) + { + throw ANNException("Set L to a value of at least K", -1, __FUNCSIG__, __FILE__, __LINE__); + } + ScratchStoreManager> manager(_query_scratch); + auto scratch = manager.scratch_space(); + + if (L > scratch->get_L()) + { + diskann::cout << "Attempting to expand query scratch_space. Was created " + << "with Lsize: " << scratch->get_L() << " but search L is: " << L << std::endl; + scratch->resize_for_new_L(L); + diskann::cout << "Resize completed. New scratch->L is " << scratch->get_L() << std::endl; + } + + std::shared_lock ul(_update_lock); + + const std::vector init_ids = get_init_ids(); + + //_distance->preprocess_query(query, _data_store->get_dims(), + // scratch->aligned_query()); + _data_store->preprocess_query(query, scratch); + if (!use_filters) + { + const std::vector unused_filter_label; + iterate_to_fixed_point(scratch, L, init_ids, false, unused_filter_label, true); + } + else + { + std::vector filter_vec; + auto converted_label = this->get_converted_label(filter_label); + filter_vec.push_back(converted_label); + iterate_to_fixed_point(scratch, L, init_ids, true, filter_vec, true); + } + + NeighborPriorityQueue &best_L_nodes = scratch->best_l_nodes(); + assert(best_L_nodes.size() <= L); + + std::shared_lock tl(_tag_lock); + + size_t pos = 0; + for (size_t i = 0; i < best_L_nodes.size(); ++i) + { + auto node = best_L_nodes[i]; + + TagT tag; + if (_location_to_tag.try_get(node.id, tag)) + { + tags[pos] = tag; + + if (res_vectors.size() > 0) + { + _data_store->get_vector(node.id, res_vectors[pos]); + } + + if (distances != nullptr) + { +#ifdef EXEC_ENV_OLS + distances[pos] = node.distance; // DLVS expects negative distances +#else + distances[pos] = _dist_metric == INNER_PRODUCT ? -1 * node.distance : node.distance; +#endif + } + pos++; + // If res_vectors.size() < k, clip at the value. + if (pos == K || pos == res_vectors.size()) + break; + } + } + + return pos; +} + +template size_t Index::get_num_points() +{ + std::shared_lock tl(_tag_lock); + return _nd; +} + +template size_t Index::get_max_points() +{ + std::shared_lock tl(_tag_lock); + return _max_points; +} + +template void Index::generate_frozen_point() +{ + if (_num_frozen_pts == 0) + return; + + if (_num_frozen_pts > 1) + { + throw ANNException("More than one frozen point not supported in generate_frozen_point", -1, __FUNCSIG__, + __FILE__, __LINE__); + } + + if (_nd == 0) + { + throw ANNException("ERROR: Can not pick a frozen point since nd=0", -1, __FUNCSIG__, __FILE__, __LINE__); + } + size_t res = calculate_entry_point(); + + // REFACTOR PQ: Not sure if we should do this for both stores. + if (_pq_dist) + { + // copy the PQ data corresponding to the point returned by + // calculate_entry_point + // memcpy(_pq_data + _max_points * _num_pq_chunks, + // _pq_data + res * _num_pq_chunks, + // _num_pq_chunks * DIV_ROUND_UP(NUM_PQ_BITS, 8)); + _pq_data_store->copy_vectors((location_t)res, (location_t)_max_points, 1); + } + else + { + _data_store->copy_vectors((location_t)res, (location_t)_max_points, 1); + } + _frozen_pts_used++; +} + +template int Index::enable_delete() +{ + assert(_enable_tags); + + if (!_enable_tags) + { + diskann::cerr << "Tags must be instantiated for deletions" << std::endl; + return -2; + } + + if (this->_deletes_enabled) + { + return 0; + } + + std::unique_lock ul(_update_lock); + std::unique_lock tl(_tag_lock); + std::unique_lock dl(_delete_lock); + + if (_data_compacted) + { + for (uint32_t slot = (uint32_t)_nd; slot < _max_points; ++slot) + { + _empty_slots.insert(slot); + } + } + this->_deletes_enabled = true; + return 0; +} + +template +inline void Index::process_delete(const tsl::robin_set &old_delete_set, size_t loc, + const uint32_t range, const uint32_t maxc, const float alpha, + InMemQueryScratch *scratch) +{ + tsl::robin_set &expanded_nodes_set = scratch->expanded_nodes_set(); + std::vector &expanded_nghrs_vec = scratch->expanded_nodes_vec(); + + // If this condition were not true, deadlock could result + assert(old_delete_set.find((uint32_t)loc) == old_delete_set.end()); + + std::vector adj_list; + { + // Acquire and release lock[loc] before acquiring locks for neighbors + std::unique_lock adj_list_lock; + if (_conc_consolidate) + adj_list_lock = std::unique_lock(_locks[loc]); + adj_list = _graph_store->get_neighbours((location_t)loc); + } + + bool modify = false; + for (auto ngh : adj_list) + { + if (old_delete_set.find(ngh) == old_delete_set.end()) + { + expanded_nodes_set.insert(ngh); + } + else + { + modify = true; + + std::unique_lock ngh_lock; + if (_conc_consolidate) + ngh_lock = std::unique_lock(_locks[ngh]); + for (auto j : _graph_store->get_neighbours((location_t)ngh)) + if (j != loc && old_delete_set.find(j) == old_delete_set.end()) + expanded_nodes_set.insert(j); + } + } + + if (modify) + { + if (expanded_nodes_set.size() <= range) + { + std::unique_lock adj_list_lock(_locks[loc]); + _graph_store->clear_neighbours((location_t)loc); + for (auto &ngh : expanded_nodes_set) + _graph_store->add_neighbour((location_t)loc, ngh); + } + else + { + // Create a pool of Neighbor candidates from the expanded_nodes_set + expanded_nghrs_vec.reserve(expanded_nodes_set.size()); + for (auto &ngh : expanded_nodes_set) + { + expanded_nghrs_vec.emplace_back(ngh, _data_store->get_distance((location_t)loc, (location_t)ngh)); + } + std::sort(expanded_nghrs_vec.begin(), expanded_nghrs_vec.end()); + std::vector &occlude_list_output = scratch->occlude_list_output(); + occlude_list((uint32_t)loc, expanded_nghrs_vec, alpha, range, maxc, occlude_list_output, scratch, + &old_delete_set); + std::unique_lock adj_list_lock(_locks[loc]); + _graph_store->set_neighbours((location_t)loc, occlude_list_output); + } + } +} + +// Returns number of live points left after consolidation +template +consolidation_report Index::consolidate_deletes(const IndexWriteParameters ¶ms) +{ + if (!_enable_tags) + throw diskann::ANNException("Point tag array not instantiated", -1, __FUNCSIG__, __FILE__, __LINE__); + + { + std::shared_lock ul(_update_lock); + std::shared_lock tl(_tag_lock); + std::shared_lock dl(_delete_lock); + if (_empty_slots.size() + _nd != _max_points) + { + std::string err = "#empty slots + nd != max points"; + diskann::cerr << err << std::endl; + throw ANNException(err, -1, __FUNCSIG__, __FILE__, __LINE__); + } + + if (_location_to_tag.size() + _delete_set->size() != _nd) + { + diskann::cerr << "Error: _location_to_tag.size (" << _location_to_tag.size() << ") + _delete_set->size (" + << _delete_set->size() << ") != _nd(" << _nd << ") "; + return consolidation_report(diskann::consolidation_report::status_code::INCONSISTENT_COUNT_ERROR, 0, 0, 0, + 0, 0, 0, 0); + } + + if (_location_to_tag.size() != _tag_to_location.size()) + { + throw diskann::ANNException("_location_to_tag and _tag_to_location not of same size", -1, __FUNCSIG__, + __FILE__, __LINE__); + } + } + + std::unique_lock update_lock(_update_lock, std::defer_lock); + if (!_conc_consolidate) + update_lock.lock(); + + std::unique_lock cl(_consolidate_lock, std::defer_lock); + if (!cl.try_lock()) + { + diskann::cerr << "Consildate delete function failed to acquire consolidate lock" << std::endl; + return consolidation_report(diskann::consolidation_report::status_code::LOCK_FAIL, 0, 0, 0, 0, 0, 0, 0); + } + + diskann::cout << "Starting consolidate_deletes... "; + + std::unique_ptr> old_delete_set(new tsl::robin_set); + { + std::unique_lock dl(_delete_lock); + std::swap(_delete_set, old_delete_set); + } + + if (old_delete_set->find(_start) != old_delete_set->end()) + { + throw diskann::ANNException("ERROR: start node has been deleted", -1, __FUNCSIG__, __FILE__, __LINE__); + } + + const uint32_t range = params.max_degree; + const uint32_t maxc = params.max_occlusion_size; + const float alpha = params.alpha; + const uint32_t num_threads = params.num_threads == 0 ? omp_get_num_procs() : params.num_threads; + + uint32_t num_calls_to_process_delete = 0; + diskann::Timer timer; +#pragma omp parallel for num_threads(num_threads) schedule(dynamic, 8192) reduction(+ : num_calls_to_process_delete) + for (int64_t loc = 0; loc < (int64_t)_max_points; loc++) + { + if (old_delete_set->find((uint32_t)loc) == old_delete_set->end() && !_empty_slots.is_in_set((uint32_t)loc)) + { + ScratchStoreManager> manager(_query_scratch); + auto scratch = manager.scratch_space(); + process_delete(*old_delete_set, loc, range, maxc, alpha, scratch); + num_calls_to_process_delete += 1; + } + } + for (int64_t loc = _max_points; loc < (int64_t)(_max_points + _num_frozen_pts); loc++) + { + ScratchStoreManager> manager(_query_scratch); + auto scratch = manager.scratch_space(); + process_delete(*old_delete_set, loc, range, maxc, alpha, scratch); + num_calls_to_process_delete += 1; + } + + std::unique_lock tl(_tag_lock); + size_t ret_nd = release_locations(*old_delete_set); + size_t max_points = _max_points; + size_t empty_slots_size = _empty_slots.size(); + + std::shared_lock dl(_delete_lock); + size_t delete_set_size = _delete_set->size(); + size_t old_delete_set_size = old_delete_set->size(); + + if (!_conc_consolidate) + { + update_lock.unlock(); + } + + double duration = timer.elapsed() / 1000000.0; + diskann::cout << " done in " << duration << " seconds." << std::endl; + return consolidation_report(diskann::consolidation_report::status_code::SUCCESS, ret_nd, max_points, + empty_slots_size, old_delete_set_size, delete_set_size, num_calls_to_process_delete, + duration); +} + +template void Index::compact_frozen_point() +{ + if (_nd < _max_points && _num_frozen_pts > 0) + { + reposition_points((uint32_t)_max_points, (uint32_t)_nd, (uint32_t)_num_frozen_pts); + _start = (uint32_t)_nd; + + if (_filtered_index && _dynamic_index) + { + // update medoid id's as frozen points are treated as medoid + for (auto &[label, medoid_id] : _label_to_start_id) + { + /* if (label == _universal_label) + continue;*/ + _label_to_start_id[label] = (uint32_t)_nd + (medoid_id - (uint32_t)_max_points); + } + } + } +} + +// Should be called after acquiring _update_lock +template void Index::compact_data() +{ + if (!_dynamic_index) + throw ANNException("Can not compact a non-dynamic index", -1, __FUNCSIG__, __FILE__, __LINE__); + + if (_data_compacted) + { + diskann::cerr << "Warning! Calling compact_data() when _data_compacted is true!" << std::endl; + return; + } + + if (_delete_set->size() > 0) + { + throw ANNException("Can not compact data when index has non-empty _delete_set of " + "size: " + + std::to_string(_delete_set->size()), + -1, __FUNCSIG__, __FILE__, __LINE__); + } + + diskann::Timer timer; + + std::vector new_location = std::vector(_max_points + _num_frozen_pts, UINT32_MAX); + + uint32_t new_counter = 0; + std::set empty_locations; + for (uint32_t old_location = 0; old_location < _max_points; old_location++) + { + if (_location_to_tag.contains(old_location)) + { + new_location[old_location] = new_counter; + new_counter++; + } + else + { + empty_locations.insert(old_location); + } + } + for (uint32_t old_location = (uint32_t)_max_points; old_location < _max_points + _num_frozen_pts; old_location++) + { + new_location[old_location] = old_location; + } + + // If start node is removed, throw an exception + if (_start < _max_points && !_location_to_tag.contains(_start)) + { + throw diskann::ANNException("ERROR: Start node deleted.", -1, __FUNCSIG__, __FILE__, __LINE__); + } + + size_t num_dangling = 0; + for (uint32_t old = 0; old < _max_points + _num_frozen_pts; ++old) + { + // compact _final_graph + std::vector new_adj_list; + + if ((new_location[old] < _max_points) // If point continues to exist + || (old >= _max_points && old < _max_points + _num_frozen_pts)) + { + new_adj_list.reserve(_graph_store->get_neighbours((location_t)old).size()); + for (auto ngh_iter : _graph_store->get_neighbours((location_t)old)) + { + if (empty_locations.find(ngh_iter) != empty_locations.end()) + { + ++num_dangling; + diskann::cerr << "Error in compact_data(). _final_graph[" << old << "] has neighbor " << ngh_iter + << " which is a location not associated with any tag." << std::endl; + } + else + { + new_adj_list.push_back(new_location[ngh_iter]); + } + } + //_graph_store->get_neighbours((location_t)old).swap(new_adj_list); + _graph_store->set_neighbours((location_t)old, new_adj_list); + + // Move the data and adj list to the correct position + if (new_location[old] != old) + { + assert(new_location[old] < old); + _graph_store->swap_neighbours(new_location[old], (location_t)old); + + if (_filtered_index) + { + _location_to_labels[new_location[old]].swap(_location_to_labels[old]); + } + + _data_store->copy_vectors(old, new_location[old], 1); + } + } + else + { + _graph_store->clear_neighbours((location_t)old); + } + } + diskann::cerr << "#dangling references after data compaction: " << num_dangling << std::endl; + + _tag_to_location.clear(); + for (auto pos = _location_to_tag.find_first(); pos.is_valid(); pos = _location_to_tag.find_next(pos)) + { + const auto tag = _location_to_tag.get(pos); + _tag_to_location[tag] = new_location[pos._key]; + } + _location_to_tag.clear(); + for (const auto &iter : _tag_to_location) + { + _location_to_tag.set(iter.second, iter.first); + } + // remove all cleared up old + for (size_t old = _nd; old < _max_points; ++old) + { + _graph_store->clear_neighbours((location_t)old); + } + if (_filtered_index) + { + for (size_t old = _nd; old < _max_points; old++) + { + _location_to_labels[old].clear(); + } + } + + _empty_slots.clear(); + // mark all slots after _nd as empty + for (auto i = _nd; i < _max_points; i++) + { + _empty_slots.insert((uint32_t)i); + } + _data_compacted = true; + diskann::cout << "Time taken for compact_data: " << timer.elapsed() / 1000000. << "s." << std::endl; +} + +// +// Caller must hold unique _tag_lock and _delete_lock before calling this +// +template int Index::reserve_location() +{ + if (_nd >= _max_points) + { + return -1; + } + uint32_t location; + if (_data_compacted && _empty_slots.is_empty()) + { + // This code path is encountered when enable_delete hasn't been + // called yet, so no points have been deleted and _empty_slots + // hasn't been filled in. In that case, just keep assigning + // consecutive locations. + location = (uint32_t)_nd; + } + else + { + assert(_empty_slots.size() != 0); + assert(_empty_slots.size() + _nd == _max_points); + + location = _empty_slots.pop_any(); + _delete_set->erase(location); + } + ++_nd; + return location; +} + +template size_t Index::release_location(int location) +{ + if (_empty_slots.is_in_set(location)) + throw ANNException("Trying to release location, but location already in empty slots", -1, __FUNCSIG__, __FILE__, + __LINE__); + _empty_slots.insert(location); + + _nd--; + return _nd; +} + +template +size_t Index::release_locations(const tsl::robin_set &locations) +{ + for (auto location : locations) + { + if (_empty_slots.is_in_set(location)) + throw ANNException("Trying to release location, but location " + "already in empty slots", + -1, __FUNCSIG__, __FILE__, __LINE__); + _empty_slots.insert(location); + + _nd--; + } + + if (_empty_slots.size() + _nd != _max_points) + throw ANNException("#empty slots + nd != max points", -1, __FUNCSIG__, __FILE__, __LINE__); + + return _nd; +} + +template +void Index::reposition_points(uint32_t old_location_start, uint32_t new_location_start, + uint32_t num_locations) +{ + if (num_locations == 0 || old_location_start == new_location_start) + { + return; + } + + // Update pointers to the moved nodes. Note: the computation is correct even + // when new_location_start < old_location_start given the C++ uint32_t + // integer arithmetic rules. + const uint32_t location_delta = new_location_start - old_location_start; + + std::vector updated_neighbours_location; + for (uint32_t i = 0; i < _max_points + _num_frozen_pts; i++) + { + auto &i_neighbours = _graph_store->get_neighbours((location_t)i); + std::vector i_neighbours_copy(i_neighbours.begin(), i_neighbours.end()); + for (auto &loc : i_neighbours_copy) + { + if (loc >= old_location_start && loc < old_location_start + num_locations) + loc += location_delta; + } + _graph_store->set_neighbours(i, i_neighbours_copy); + } + + // The [start, end) interval which will contain obsolete points to be + // cleared. + uint32_t mem_clear_loc_start = old_location_start; + uint32_t mem_clear_loc_end_limit = old_location_start + num_locations; + + // Move the adjacency lists. Make sure that overlapping ranges are handled + // correctly. + if (new_location_start < old_location_start) + { + // New location before the old location: copy the entries in order + // to avoid modifying locations that are yet to be copied. + for (uint32_t loc_offset = 0; loc_offset < num_locations; loc_offset++) + { + assert(_graph_store->get_neighbours(new_location_start + loc_offset).empty()); + _graph_store->swap_neighbours(new_location_start + loc_offset, old_location_start + loc_offset); + if (_dynamic_index && _filtered_index) + { + _location_to_labels[new_location_start + loc_offset].swap( + _location_to_labels[old_location_start + loc_offset]); + } + } + // If ranges are overlapping, make sure not to clear the newly copied + // data. + if (mem_clear_loc_start < new_location_start + num_locations) + { + // Clear only after the end of the new range. + mem_clear_loc_start = new_location_start + num_locations; + } + } + else + { + // Old location after the new location: copy from the end of the range + // to avoid modifying locations that are yet to be copied. + for (uint32_t loc_offset = num_locations; loc_offset > 0; loc_offset--) + { + assert(_graph_store->get_neighbours(new_location_start + loc_offset - 1u).empty()); + _graph_store->swap_neighbours(new_location_start + loc_offset - 1u, old_location_start + loc_offset - 1u); + if (_dynamic_index && _filtered_index) + { + _location_to_labels[new_location_start + loc_offset - 1u].swap( + _location_to_labels[old_location_start + loc_offset - 1u]); + } + } + + // If ranges are overlapping, make sure not to clear the newly copied + // data. + if (mem_clear_loc_end_limit > new_location_start) + { + // Clear only up to the beginning of the new range. + mem_clear_loc_end_limit = new_location_start; + } + } + _data_store->move_vectors(old_location_start, new_location_start, num_locations); +} + +template void Index::reposition_frozen_point_to_end() +{ + if (_num_frozen_pts == 0) + return; + + if (_nd == _max_points) + { + diskann::cout << "Not repositioning frozen point as it is already at the end." << std::endl; + return; + } + + reposition_points((uint32_t)_nd, (uint32_t)_max_points, (uint32_t)_num_frozen_pts); + _start = (uint32_t)_max_points; + + // update medoid id's as frozen points are treated as medoid + if (_filtered_index && _dynamic_index) + { + for (auto &[label, medoid_id] : _label_to_start_id) + { + /*if (label == _universal_label) + continue;*/ + _label_to_start_id[label] = (uint32_t)_max_points + (medoid_id - (uint32_t)_nd); + } + } +} + +template void Index::resize(size_t new_max_points) +{ + const size_t new_internal_points = new_max_points + _num_frozen_pts; + auto start = std::chrono::high_resolution_clock::now(); + assert(_empty_slots.size() == 0); // should not resize if there are empty slots. + + _data_store->resize((location_t)new_internal_points); + _graph_store->resize_graph(new_internal_points); + _locks = std::vector(new_internal_points); + + if (_num_frozen_pts != 0) + { + reposition_points((uint32_t)_max_points, (uint32_t)new_max_points, (uint32_t)_num_frozen_pts); + _start = (uint32_t)new_max_points; + } + + _max_points = new_max_points; + _empty_slots.reserve(_max_points); + for (auto i = _nd; i < _max_points; i++) + { + _empty_slots.insert((uint32_t)i); + } + + auto stop = std::chrono::high_resolution_clock::now(); + diskann::cout << "Resizing took: " << std::chrono::duration(stop - start).count() << "s" << std::endl; +} + +template +int Index::_insert_point(const DataType &point, const TagType tag) +{ + try + { + return this->insert_point(std::any_cast(point), std::any_cast(tag)); + } + catch (const std::bad_any_cast &anycast_e) + { + throw new ANNException("Error:Trying to insert invalid data type" + std::string(anycast_e.what()), -1); + } + catch (const std::exception &e) + { + throw new ANNException("Error:" + std::string(e.what()), -1); + } +} + +template +int Index::_insert_point(const DataType &point, const TagType tag, Labelvector &labels) +{ + try + { + return this->insert_point(std::any_cast(point), std::any_cast(tag), + labels.get>()); + } + catch (const std::bad_any_cast &anycast_e) + { + throw new ANNException("Error:Trying to insert invalid data type" + std::string(anycast_e.what()), -1); + } + catch (const std::exception &e) + { + throw new ANNException("Error:" + std::string(e.what()), -1); + } +} + +template +int Index::insert_point(const T *point, const TagT tag) +{ + std::vector no_labels{0}; + return insert_point(point, tag, no_labels); +} + +template +int Index::insert_point(const T *point, const TagT tag, const std::vector &labels) +{ + + assert(_has_built); + if (tag == 0) + { + throw diskann::ANNException("Do not insert point with tag 0. That is " + "reserved for points hidden " + "from the user.", + -1, __FUNCSIG__, __FILE__, __LINE__); + } + + std::shared_lock shared_ul(_update_lock); + std::unique_lock tl(_tag_lock); + std::unique_lock dl(_delete_lock); + + auto location = reserve_location(); + if (_filtered_index) + { + if (labels.empty()) + { + release_location(location); + std::cerr << "Error: Can't insert point with tag " + get_tag_string(tag) + + " . there are no labels for the point." + << std::endl; + return -1; + } + + _location_to_labels[location] = labels; + + for (LabelT label : labels) + { + if (_labels.find(label) == _labels.end()) + { + if (_frozen_pts_used >= _num_frozen_pts) + { + throw ANNException( + "Error: For dynamic filtered index, the number of frozen points should be atleast equal " + "to number of unique labels.", + -1); + } + + auto fz_location = (int)(_max_points) + _frozen_pts_used; // as first _fz_point + _labels.insert(label); + _label_to_start_id[label] = (uint32_t)fz_location; + _location_to_labels[fz_location] = {label}; + _data_store->set_vector((location_t)fz_location, point); + _frozen_pts_used++; + } + } + } + + if (location == -1) + { +#if EXPAND_IF_FULL + dl.unlock(); + tl.unlock(); + shared_ul.unlock(); + + { + std::unique_lock ul(_update_lock); + tl.lock(); + dl.lock(); + + if (_nd >= _max_points) + { + auto new_max_points = (size_t)(_max_points * INDEX_GROWTH_FACTOR); + resize(new_max_points); + } + + dl.unlock(); + tl.unlock(); + ul.unlock(); + } + + shared_ul.lock(); + tl.lock(); + dl.lock(); + + location = reserve_location(); + if (location == -1) + { + throw diskann::ANNException("Cannot reserve location even after " + "expanding graph. Terminating.", + -1, __FUNCSIG__, __FILE__, __LINE__); + } +#else + return -1; +#endif + } // cant insert as active pts >= max_pts + dl.unlock(); + + // Insert tag and mapping to location + if (_enable_tags) + { + // if tags are enabled and tag is already inserted. so we can't reuse that tag. + if (_tag_to_location.find(tag) != _tag_to_location.end()) + { + release_location(location); + return -1; + } + + _tag_to_location[tag] = location; + _location_to_tag.set(location, tag); + } + tl.unlock(); + + _data_store->set_vector(location, point); // update datastore + + // Find and add appropriate graph edges + ScratchStoreManager> manager(_query_scratch); + auto scratch = manager.scratch_space(); + std::vector pruned_list; // it is the set best candidates to connect to this point + if (_filtered_index) + { + // when filtered the best_candidates will share the same label ( label_present > distance) + search_for_point_and_prune(location, _indexingQueueSize, pruned_list, scratch, true, _filterIndexingQueueSize); + } + else + { + search_for_point_and_prune(location, _indexingQueueSize, pruned_list, scratch); + } + assert(pruned_list.size() > 0); // should find atleast one neighbour (i.e frozen point acting as medoid) + + { + std::shared_lock tlock(_tag_lock, std::defer_lock); + if (_conc_consolidate) + tlock.lock(); + + LockGuard guard(_locks[location]); + _graph_store->clear_neighbours(location); + + std::vector neighbor_links; + for (auto link : pruned_list) + { + if (_conc_consolidate) + if (!_location_to_tag.contains(link)) + continue; + neighbor_links.emplace_back(link); + } + _graph_store->set_neighbours(location, neighbor_links); + assert(_graph_store->get_neighbours(location).size() <= _indexingRange); + + if (_conc_consolidate) + tlock.unlock(); + } + + inter_insert(location, pruned_list, scratch); + + return 0; +} + +template int Index::_lazy_delete(const TagType &tag) +{ + try + { + return lazy_delete(std::any_cast(tag)); + } + catch (const std::bad_any_cast &e) + { + throw ANNException(std::string("Error: ") + e.what(), -1); + } +} + +template +void Index::_lazy_delete(TagVector &tags, TagVector &failed_tags) +{ + try + { + this->lazy_delete(tags.get>(), failed_tags.get>()); + } + catch (const std::bad_any_cast &e) + { + throw ANNException("Error: bad any cast while performing _lazy_delete() " + std::string(e.what()), -1); + } + catch (const std::exception &e) + { + throw ANNException("Error: " + std::string(e.what()), -1); + } +} + +template int Index::lazy_delete(const TagT &tag) +{ + std::shared_lock ul(_update_lock); + std::unique_lock tl(_tag_lock); + std::unique_lock dl(_delete_lock); + _data_compacted = false; + + if (_tag_to_location.find(tag) == _tag_to_location.end()) + { + diskann::cerr << "Delete tag not found " << get_tag_string(tag) << std::endl; + return -1; + } + assert(_tag_to_location[tag] < _max_points); + + const auto location = _tag_to_location[tag]; + _delete_set->insert(location); + _location_to_tag.erase(location); + _tag_to_location.erase(tag); + return 0; +} + +template +void Index::lazy_delete(const std::vector &tags, std::vector &failed_tags) +{ + if (failed_tags.size() > 0) + { + throw ANNException("failed_tags should be passed as an empty list", -1, __FUNCSIG__, __FILE__, __LINE__); + } + std::shared_lock ul(_update_lock); + std::unique_lock tl(_tag_lock); + std::unique_lock dl(_delete_lock); + _data_compacted = false; + + for (auto tag : tags) + { + if (_tag_to_location.find(tag) == _tag_to_location.end()) + { + failed_tags.push_back(tag); + } + else + { + const auto location = _tag_to_location[tag]; + _delete_set->insert(location); + _location_to_tag.erase(location); + _tag_to_location.erase(tag); + } + } +} + +template bool Index::is_index_saved() +{ + return _is_saved; +} + +template +void Index::_get_active_tags(TagRobinSet &active_tags) +{ + try + { + this->get_active_tags(active_tags.get>()); + } + catch (const std::bad_any_cast &e) + { + throw ANNException("Error: bad_any cast while performing _get_active_tags() " + std::string(e.what()), -1); + } + catch (const std::exception &e) + { + throw ANNException("Error :" + std::string(e.what()), -1); + } +} + +template +void Index::get_active_tags(tsl::robin_set &active_tags) +{ + active_tags.clear(); + std::shared_lock tl(_tag_lock); + for (auto iter : _tag_to_location) + { + active_tags.insert(iter.first); + } +} + +template +void Index::get_degree_stats(size_t &max_deg, size_t &min_deg, size_t &avg_deg, size_t &cnt_deg) +{ + max_deg = 0; + min_deg = SIZE_MAX; + avg_deg = 0; + cnt_deg = 0; + size_t total = 0; + for (size_t i = 0; i < _nd; i++) + { + auto &pool = _graph_store->get_neighbours((location_t)i); + cnt_deg += (pool.size() < 2); + max_deg = std::max(max_deg, pool.size()); + min_deg = std::min(min_deg, pool.size()); + total += pool.size(); + } + avg_deg = total / _nd; +} + +template +void Index::dump_degree_stats(std::string filename) +{ + std::ofstream file(filename); + if (!file.is_open()) + { + std::cerr << "Error: Could not open file " << filename << " for writing" << std::endl; + return; + } + + // Write each node's degree to the file, one per line + for (size_t i = 0; i < _nd; i++) + { + auto &pool = _graph_store->get_neighbours((location_t)i); + file << pool.size() << std::endl; + } + + file.close(); +} + +template void Index::print_status() +{ + std::shared_lock ul(_update_lock); + std::shared_lock cl(_consolidate_lock); + std::shared_lock tl(_tag_lock); + std::shared_lock dl(_delete_lock); + + diskann::cout << "------------------- Index object: " << (uint64_t)this << " -------------------" << std::endl; + diskann::cout << "Number of points: " << _nd << std::endl; + diskann::cout << "Graph size: " << _graph_store->get_total_points() << std::endl; + diskann::cout << "Location to tag size: " << _location_to_tag.size() << std::endl; + diskann::cout << "Tag to location size: " << _tag_to_location.size() << std::endl; + diskann::cout << "Number of empty slots: " << _empty_slots.size() << std::endl; + diskann::cout << std::boolalpha << "Data compacted: " << this->_data_compacted << std::endl; + diskann::cout << "---------------------------------------------------------" + "------------" + << std::endl; +} + +template void Index::count_nodes_at_bfs_levels() +{ + std::unique_lock ul(_update_lock); + + boost::dynamic_bitset<> visited(_max_points + _num_frozen_pts); + + size_t MAX_BFS_LEVELS = 32; + auto bfs_sets = new tsl::robin_set[MAX_BFS_LEVELS]; + + bfs_sets[0].insert(_start); + visited.set(_start); + + for (uint32_t i = (uint32_t)_max_points; i < _max_points + _num_frozen_pts; ++i) + { + if (i != _start) + { + bfs_sets[0].insert(i); + visited.set(i); + } + } + + for (size_t l = 0; l < MAX_BFS_LEVELS - 1; ++l) + { + diskann::cout << "Number of nodes at BFS level " << l << " is " << bfs_sets[l].size() << std::endl; + if (bfs_sets[l].size() == 0) + break; + for (auto node : bfs_sets[l]) + { + for (auto nghbr : _graph_store->get_neighbours((location_t)node)) + { + if (!visited.test(nghbr)) + { + visited.set(nghbr); + bfs_sets[l + 1].insert(nghbr); + } + } + } + } + + delete[] bfs_sets; +} + +// REFACTOR: This should be an OptimizedDataStore class +template void Index::optimize_index_layout() +{ // use after build or load + if (_dynamic_index) + { + throw diskann::ANNException("Optimize_index_layout not implemented for dyanmic indices", -1, __FUNCSIG__, + __FILE__, __LINE__); + } + + float *cur_vec = new float[_data_store->get_aligned_dim()]; + std::memset(cur_vec, 0, _data_store->get_aligned_dim() * sizeof(float)); + _data_len = (_data_store->get_aligned_dim() + 1) * sizeof(float); + _neighbor_len = (_graph_store->get_max_observed_degree() + 1) * sizeof(uint32_t); + _node_size = _data_len + _neighbor_len; + _opt_graph = new char[_node_size * _nd]; + auto dist_fast = (DistanceFastL2 *)(_data_store->get_dist_fn()); + for (uint32_t i = 0; i < _nd; i++) + { + char *cur_node_offset = _opt_graph + i * _node_size; + _data_store->get_vector(i, (T *)cur_vec); + float cur_norm = dist_fast->norm((T *)cur_vec, (uint32_t)_data_store->get_aligned_dim()); + std::memcpy(cur_node_offset, &cur_norm, sizeof(float)); + std::memcpy(cur_node_offset + sizeof(float), cur_vec, _data_len - sizeof(float)); + + cur_node_offset += _data_len; + uint32_t k = (uint32_t)_graph_store->get_neighbours(i).size(); + std::memcpy(cur_node_offset, &k, sizeof(uint32_t)); + std::memcpy(cur_node_offset + sizeof(uint32_t), _graph_store->get_neighbours(i).data(), k * sizeof(uint32_t)); + // std::vector().swap(_graph_store->get_neighbours(i)); + _graph_store->clear_neighbours(i); + } + _graph_store->clear_graph(); + _graph_store->resize_graph(0); + delete[] cur_vec; +} + +template +void Index::_search_with_optimized_layout(const DataType &query, size_t K, size_t L, uint32_t *indices) +{ + try + { + return this->search_with_optimized_layout(std::any_cast(query), K, L, indices); + } + catch (const std::bad_any_cast &e) + { + throw ANNException("Error: bad any cast while performing " + "_search_with_optimized_layout() " + + std::string(e.what()), + -1); + } + catch (const std::exception &e) + { + throw ANNException("Error: " + std::string(e.what()), -1); + } +} + +template +void Index::search_with_optimized_layout(const T *query, size_t K, size_t L, uint32_t *indices) +{ + DistanceFastL2 *dist_fast = (DistanceFastL2 *)(_data_store->get_dist_fn()); + + NeighborPriorityQueue retset(L); + std::vector init_ids(L); + + boost::dynamic_bitset<> flags{_nd, 0}; + uint32_t tmp_l = 0; + uint32_t *neighbors = (uint32_t *)(_opt_graph + _node_size * _start + _data_len); + uint32_t MaxM_ep = *neighbors; + neighbors++; + + for (; tmp_l < L && tmp_l < MaxM_ep; tmp_l++) + { + init_ids[tmp_l] = neighbors[tmp_l]; + flags[init_ids[tmp_l]] = true; + } + + while (tmp_l < L) + { + uint32_t id = rand() % _nd; + if (flags[id]) + continue; + flags[id] = true; + init_ids[tmp_l] = id; + tmp_l++; + } + + for (uint32_t i = 0; i < init_ids.size(); i++) + { + uint32_t id = init_ids[i]; + if (id >= _nd) + continue; + _mm_prefetch(_opt_graph + _node_size * id, _MM_HINT_T0); + } + L = 0; + for (uint32_t i = 0; i < init_ids.size(); i++) + { + uint32_t id = init_ids[i]; + if (id >= _nd) + continue; + T *x = (T *)(_opt_graph + _node_size * id); + float norm_x = *x; + x++; + float dist = dist_fast->compare(x, query, norm_x, (uint32_t)_data_store->get_aligned_dim()); + retset.insert(Neighbor(id, dist)); + flags[id] = true; + L++; + } + + while (retset.has_unexpanded_node()) + { + auto nbr = retset.closest_unexpanded(); + auto n = nbr.id; + _mm_prefetch(_opt_graph + _node_size * n + _data_len, _MM_HINT_T0); + neighbors = (uint32_t *)(_opt_graph + _node_size * n + _data_len); + uint32_t MaxM = *neighbors; + neighbors++; + for (uint32_t m = 0; m < MaxM; ++m) + _mm_prefetch(_opt_graph + _node_size * neighbors[m], _MM_HINT_T0); + for (uint32_t m = 0; m < MaxM; ++m) + { + uint32_t id = neighbors[m]; + if (flags[id]) + continue; + flags[id] = 1; + T *data = (T *)(_opt_graph + _node_size * id); + float norm = *data; + data++; + float dist = dist_fast->compare(query, data, norm, (uint32_t)_data_store->get_aligned_dim()); + Neighbor nn(id, dist); + retset.insert(nn); + } + } + + for (size_t i = 0; i < K; i++) + { + indices[i] = retset[i].id; + } +} + +/* Internals of the library */ +template const float Index::INDEX_GROWTH_FACTOR = 1.5f; + +// EXPORTS +template DISKANN_DLLEXPORT class Index; +template DISKANN_DLLEXPORT class Index; +template DISKANN_DLLEXPORT class Index; +template DISKANN_DLLEXPORT class Index; +template DISKANN_DLLEXPORT class Index; +template DISKANN_DLLEXPORT class Index; +template DISKANN_DLLEXPORT class Index; +template DISKANN_DLLEXPORT class Index; +template DISKANN_DLLEXPORT class Index; +template DISKANN_DLLEXPORT class Index; +template DISKANN_DLLEXPORT class Index; +template DISKANN_DLLEXPORT class Index; +template DISKANN_DLLEXPORT class Index; +template DISKANN_DLLEXPORT class Index; +template DISKANN_DLLEXPORT class Index; +// Label with short int 2 byte +template DISKANN_DLLEXPORT class Index; +template DISKANN_DLLEXPORT class Index; +template DISKANN_DLLEXPORT class Index; +template DISKANN_DLLEXPORT class Index; +template DISKANN_DLLEXPORT class Index; +template DISKANN_DLLEXPORT class Index; +template DISKANN_DLLEXPORT class Index; +template DISKANN_DLLEXPORT class Index; +template DISKANN_DLLEXPORT class Index; +template DISKANN_DLLEXPORT class Index; +template DISKANN_DLLEXPORT class Index; +template DISKANN_DLLEXPORT class Index; +template DISKANN_DLLEXPORT class Index; +template DISKANN_DLLEXPORT class Index; +template DISKANN_DLLEXPORT class Index; + +template DISKANN_DLLEXPORT std::pair Index::search( + const float *query, const size_t K, const uint32_t L, uint64_t *indices, float *distances); +template DISKANN_DLLEXPORT std::pair Index::search( + const float *query, const size_t K, const uint32_t L, uint32_t *indices, float *distances); +template DISKANN_DLLEXPORT std::pair Index::search( + const uint8_t *query, const size_t K, const uint32_t L, uint64_t *indices, float *distances); +template DISKANN_DLLEXPORT std::pair Index::search( + const uint8_t *query, const size_t K, const uint32_t L, uint32_t *indices, float *distances); +template DISKANN_DLLEXPORT std::pair Index::search( + const int8_t *query, const size_t K, const uint32_t L, uint64_t *indices, float *distances); +template DISKANN_DLLEXPORT std::pair Index::search( + const int8_t *query, const size_t K, const uint32_t L, uint32_t *indices, float *distances); +// TagT==uint32_t +template DISKANN_DLLEXPORT std::pair Index::search( + const float *query, const size_t K, const uint32_t L, uint64_t *indices, float *distances); +template DISKANN_DLLEXPORT std::pair Index::search( + const float *query, const size_t K, const uint32_t L, uint32_t *indices, float *distances); +template DISKANN_DLLEXPORT std::pair Index::search( + const uint8_t *query, const size_t K, const uint32_t L, uint64_t *indices, float *distances); +template DISKANN_DLLEXPORT std::pair Index::search( + const uint8_t *query, const size_t K, const uint32_t L, uint32_t *indices, float *distances); +template DISKANN_DLLEXPORT std::pair Index::search( + const int8_t *query, const size_t K, const uint32_t L, uint64_t *indices, float *distances); +template DISKANN_DLLEXPORT std::pair Index::search( + const int8_t *query, const size_t K, const uint32_t L, uint32_t *indices, float *distances); + +template DISKANN_DLLEXPORT std::pair Index::search_with_filters< + uint64_t>(const float *query, const uint32_t &filter_label, const size_t K, const uint32_t L, uint64_t *indices, + float *distances); +template DISKANN_DLLEXPORT std::pair Index::search_with_filters< + uint32_t>(const float *query, const uint32_t &filter_label, const size_t K, const uint32_t L, uint32_t *indices, + float *distances); +template DISKANN_DLLEXPORT std::pair Index::search_with_filters< + uint64_t>(const uint8_t *query, const uint32_t &filter_label, const size_t K, const uint32_t L, uint64_t *indices, + float *distances); +template DISKANN_DLLEXPORT std::pair Index::search_with_filters< + uint32_t>(const uint8_t *query, const uint32_t &filter_label, const size_t K, const uint32_t L, uint32_t *indices, + float *distances); +template DISKANN_DLLEXPORT std::pair Index::search_with_filters< + uint64_t>(const int8_t *query, const uint32_t &filter_label, const size_t K, const uint32_t L, uint64_t *indices, + float *distances); +template DISKANN_DLLEXPORT std::pair Index::search_with_filters< + uint32_t>(const int8_t *query, const uint32_t &filter_label, const size_t K, const uint32_t L, uint32_t *indices, + float *distances); +// TagT==uint32_t +template DISKANN_DLLEXPORT std::pair Index::search_with_filters< + uint64_t>(const float *query, const uint32_t &filter_label, const size_t K, const uint32_t L, uint64_t *indices, + float *distances); +template DISKANN_DLLEXPORT std::pair Index::search_with_filters< + uint32_t>(const float *query, const uint32_t &filter_label, const size_t K, const uint32_t L, uint32_t *indices, + float *distances); +template DISKANN_DLLEXPORT std::pair Index::search_with_filters< + uint64_t>(const uint8_t *query, const uint32_t &filter_label, const size_t K, const uint32_t L, uint64_t *indices, + float *distances); +template DISKANN_DLLEXPORT std::pair Index::search_with_filters< + uint32_t>(const uint8_t *query, const uint32_t &filter_label, const size_t K, const uint32_t L, uint32_t *indices, + float *distances); +template DISKANN_DLLEXPORT std::pair Index::search_with_filters< + uint64_t>(const int8_t *query, const uint32_t &filter_label, const size_t K, const uint32_t L, uint64_t *indices, + float *distances); +template DISKANN_DLLEXPORT std::pair Index::search_with_filters< + uint32_t>(const int8_t *query, const uint32_t &filter_label, const size_t K, const uint32_t L, uint32_t *indices, + float *distances); + +template DISKANN_DLLEXPORT std::pair Index::search( + const float *query, const size_t K, const uint32_t L, uint64_t *indices, float *distances); +template DISKANN_DLLEXPORT std::pair Index::search( + const float *query, const size_t K, const uint32_t L, uint32_t *indices, float *distances); +template DISKANN_DLLEXPORT std::pair Index::search( + const uint8_t *query, const size_t K, const uint32_t L, uint64_t *indices, float *distances); +template DISKANN_DLLEXPORT std::pair Index::search( + const uint8_t *query, const size_t K, const uint32_t L, uint32_t *indices, float *distances); +template DISKANN_DLLEXPORT std::pair Index::search( + const int8_t *query, const size_t K, const uint32_t L, uint64_t *indices, float *distances); +template DISKANN_DLLEXPORT std::pair Index::search( + const int8_t *query, const size_t K, const uint32_t L, uint32_t *indices, float *distances); +// TagT==uint32_t +template DISKANN_DLLEXPORT std::pair Index::search( + const float *query, const size_t K, const uint32_t L, uint64_t *indices, float *distances); +template DISKANN_DLLEXPORT std::pair Index::search( + const float *query, const size_t K, const uint32_t L, uint32_t *indices, float *distances); +template DISKANN_DLLEXPORT std::pair Index::search( + const uint8_t *query, const size_t K, const uint32_t L, uint64_t *indices, float *distances); +template DISKANN_DLLEXPORT std::pair Index::search( + const uint8_t *query, const size_t K, const uint32_t L, uint32_t *indices, float *distances); +template DISKANN_DLLEXPORT std::pair Index::search( + const int8_t *query, const size_t K, const uint32_t L, uint64_t *indices, float *distances); +template DISKANN_DLLEXPORT std::pair Index::search( + const int8_t *query, const size_t K, const uint32_t L, uint32_t *indices, float *distances); + +template DISKANN_DLLEXPORT std::pair Index::search_with_filters< + uint64_t>(const float *query, const uint16_t &filter_label, const size_t K, const uint32_t L, uint64_t *indices, + float *distances); +template DISKANN_DLLEXPORT std::pair Index::search_with_filters< + uint32_t>(const float *query, const uint16_t &filter_label, const size_t K, const uint32_t L, uint32_t *indices, + float *distances); +template DISKANN_DLLEXPORT std::pair Index::search_with_filters< + uint64_t>(const uint8_t *query, const uint16_t &filter_label, const size_t K, const uint32_t L, uint64_t *indices, + float *distances); +template DISKANN_DLLEXPORT std::pair Index::search_with_filters< + uint32_t>(const uint8_t *query, const uint16_t &filter_label, const size_t K, const uint32_t L, uint32_t *indices, + float *distances); +template DISKANN_DLLEXPORT std::pair Index::search_with_filters< + uint64_t>(const int8_t *query, const uint16_t &filter_label, const size_t K, const uint32_t L, uint64_t *indices, + float *distances); +template DISKANN_DLLEXPORT std::pair Index::search_with_filters< + uint32_t>(const int8_t *query, const uint16_t &filter_label, const size_t K, const uint32_t L, uint32_t *indices, + float *distances); +// TagT==uint32_t +template DISKANN_DLLEXPORT std::pair Index::search_with_filters< + uint64_t>(const float *query, const uint16_t &filter_label, const size_t K, const uint32_t L, uint64_t *indices, + float *distances); +template DISKANN_DLLEXPORT std::pair Index::search_with_filters< + uint32_t>(const float *query, const uint16_t &filter_label, const size_t K, const uint32_t L, uint32_t *indices, + float *distances); +template DISKANN_DLLEXPORT std::pair Index::search_with_filters< + uint64_t>(const uint8_t *query, const uint16_t &filter_label, const size_t K, const uint32_t L, uint64_t *indices, + float *distances); +template DISKANN_DLLEXPORT std::pair Index::search_with_filters< + uint32_t>(const uint8_t *query, const uint16_t &filter_label, const size_t K, const uint32_t L, uint32_t *indices, + float *distances); +template DISKANN_DLLEXPORT std::pair Index::search_with_filters< + uint64_t>(const int8_t *query, const uint16_t &filter_label, const size_t K, const uint32_t L, uint64_t *indices, + float *distances); +template DISKANN_DLLEXPORT std::pair Index::search_with_filters< + uint32_t>(const int8_t *query, const uint16_t &filter_label, const size_t K, const uint32_t L, uint32_t *indices, + float *distances); + +} // namespace diskann diff --git a/packages/leann-backend-diskann/third_party/DiskANN/src/index_factory.cpp b/packages/leann-backend-diskann/third_party/DiskANN/src/index_factory.cpp new file mode 100644 index 0000000..35790f8 --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/src/index_factory.cpp @@ -0,0 +1,213 @@ +#include "index_factory.h" +#include "pq_l2_distance.h" + +namespace diskann +{ + +IndexFactory::IndexFactory(const IndexConfig &config) : _config(std::make_unique(config)) +{ + check_config(); +} + +std::unique_ptr IndexFactory::create_instance() +{ + return create_instance(_config->data_type, _config->tag_type, _config->label_type); +} + +void IndexFactory::check_config() +{ + if (_config->dynamic_index && !_config->enable_tags) + { + throw ANNException("ERROR: Dynamic Indexing must have tags enabled.", -1, __FUNCSIG__, __FILE__, __LINE__); + } + + if (_config->pq_dist_build) + { + if (_config->dynamic_index) + throw ANNException("ERROR: Dynamic Indexing not supported with PQ distance based " + "index construction", + -1, __FUNCSIG__, __FILE__, __LINE__); + if (_config->metric == diskann::Metric::INNER_PRODUCT) + throw ANNException("ERROR: Inner product metrics not yet supported " + "with PQ distance " + "base index", + -1, __FUNCSIG__, __FILE__, __LINE__); + } + + if (_config->data_type != "float" && _config->data_type != "uint8" && _config->data_type != "int8") + { + throw ANNException("ERROR: invalid data type : + " + _config->data_type + + " is not supported. please select from [float, int8, uint8]", + -1); + } + + if (_config->tag_type != "int32" && _config->tag_type != "uint32" && _config->tag_type != "int64" && + _config->tag_type != "uint64") + { + throw ANNException("ERROR: invalid data type : + " + _config->tag_type + + " is not supported. please select from [int32, uint32, int64, uint64]", + -1); + } +} + +template Distance *IndexFactory::construct_inmem_distance_fn(Metric metric) +{ + if (metric == diskann::Metric::COSINE && std::is_same::value) + { + return (Distance *)new AVXNormalizedCosineDistanceFloat(); + } + else + { + return (Distance *)get_distance_function(metric); + } +} + +template +std::shared_ptr> IndexFactory::construct_datastore(DataStoreStrategy strategy, + size_t total_internal_points, size_t dimension, + Metric metric) +{ + std::unique_ptr> distance; + switch (strategy) + { + case DataStoreStrategy::MEMORY: + distance.reset(construct_inmem_distance_fn(metric)); + return std::make_shared>((location_t)total_internal_points, dimension, + std::move(distance)); + default: + break; + } + return nullptr; +} + +std::unique_ptr IndexFactory::construct_graphstore(const GraphStoreStrategy strategy, + const size_t size, + const size_t reserve_graph_degree) +{ + switch (strategy) + { + case GraphStoreStrategy::MEMORY: + return std::make_unique(size, reserve_graph_degree); + default: + throw ANNException("Error : Current GraphStoreStratagy is not supported.", -1); + } +} + +template +std::shared_ptr> IndexFactory::construct_pq_datastore(DataStoreStrategy strategy, size_t num_points, + size_t dimension, Metric m, size_t num_pq_chunks, + bool use_opq) +{ + std::unique_ptr> distance_fn; + std::unique_ptr> quantized_distance_fn; + + quantized_distance_fn = std::move(std::make_unique>((uint32_t)num_pq_chunks, use_opq)); + switch (strategy) + { + case DataStoreStrategy::MEMORY: + distance_fn.reset(construct_inmem_distance_fn(m)); + return std::make_shared>(dimension, (location_t)(num_points), num_pq_chunks, + std::move(distance_fn), std::move(quantized_distance_fn)); + default: + // REFACTOR TODO: We do support diskPQ - so we may need to add a new class for SSDPQDataStore! + break; + } + return nullptr; +} + +template +std::unique_ptr IndexFactory::create_instance() +{ + size_t num_points = _config->max_points + _config->num_frozen_pts; + size_t dim = _config->dimension; + // auto graph_store = construct_graphstore(_config->graph_strategy, num_points); + auto data_store = construct_datastore(_config->data_strategy, num_points, dim, _config->metric); + std::shared_ptr> pq_data_store = nullptr; + + if (_config->data_strategy == DataStoreStrategy::MEMORY && _config->pq_dist_build) + { + pq_data_store = + construct_pq_datastore(_config->data_strategy, num_points + _config->num_frozen_pts, dim, + _config->metric, _config->num_pq_chunks, _config->use_opq); + } + else + { + pq_data_store = data_store; + } + size_t max_reserve_degree = + (size_t)(defaults::GRAPH_SLACK_FACTOR * 1.05 * + (_config->index_write_params == nullptr ? 0 : _config->index_write_params->max_degree)); + std::unique_ptr graph_store = + construct_graphstore(_config->graph_strategy, num_points + _config->num_frozen_pts, max_reserve_degree); + + // REFACTOR TODO: Must construct in-memory PQDatastore if strategy == ONDISK and must construct + // in-mem and on-disk PQDataStore if strategy == ONDISK and diskPQ is required. + return std::make_unique>(*_config, data_store, + std::move(graph_store), pq_data_store); +} + +std::unique_ptr IndexFactory::create_instance(const std::string &data_type, const std::string &tag_type, + const std::string &label_type) +{ + if (data_type == std::string("float")) + { + return create_instance(tag_type, label_type); + } + else if (data_type == std::string("uint8")) + { + return create_instance(tag_type, label_type); + } + else if (data_type == std::string("int8")) + { + return create_instance(tag_type, label_type); + } + else + throw ANNException("Error: unsupported data_type please choose from [float/int8/uint8]", -1); +} + +template +std::unique_ptr IndexFactory::create_instance(const std::string &tag_type, const std::string &label_type) +{ + if (tag_type == std::string("int32")) + { + return create_instance(label_type); + } + else if (tag_type == std::string("uint32")) + { + return create_instance(label_type); + } + else if (tag_type == std::string("int64")) + { + return create_instance(label_type); + } + else if (tag_type == std::string("uint64")) + { + return create_instance(label_type); + } + else + throw ANNException("Error: unsupported tag_type please choose from [int32/uint32/int64/uint64]", -1); +} + +template +std::unique_ptr IndexFactory::create_instance(const std::string &label_type) +{ + if (label_type == std::string("uint16") || label_type == std::string("ushort")) + { + return create_instance(); + } + else if (label_type == std::string("uint32") || label_type == std::string("uint")) + { + return create_instance(); + } + else + throw ANNException("Error: unsupported label_type please choose from [uint/ushort]", -1); +} + +// template DISKANN_DLLEXPORT std::shared_ptr> IndexFactory::construct_datastore( +// DataStoreStrategy stratagy, size_t num_points, size_t dimension, Metric m); +// template DISKANN_DLLEXPORT std::shared_ptr> IndexFactory::construct_datastore( +// DataStoreStrategy stratagy, size_t num_points, size_t dimension, Metric m); +// template DISKANN_DLLEXPORT std::shared_ptr> IndexFactory::construct_datastore( +// DataStoreStrategy stratagy, size_t num_points, size_t dimension, Metric m); + +} // namespace diskann diff --git a/packages/leann-backend-diskann/third_party/DiskANN/src/linux_aligned_file_reader.cpp b/packages/leann-backend-diskann/third_party/DiskANN/src/linux_aligned_file_reader.cpp new file mode 100644 index 0000000..64e7eee --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/src/linux_aligned_file_reader.cpp @@ -0,0 +1,230 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT license. + +#include "linux_aligned_file_reader.h" +#ifndef __APPLE__ + +#include +#include +#include +#include "tsl/robin_map.h" +#include "utils.h" +#define MAX_EVENTS 1024 + +namespace +{ +typedef struct io_event io_event_t; +typedef struct iocb iocb_t; + +void execute_io(io_context_t ctx, int fd, std::vector &read_reqs, uint64_t n_retries = 0) +{ +#ifdef DEBUG + for (auto &req : read_reqs) + { + assert(IS_ALIGNED(req.len, 512)); + // std::cout << "request:"<= req.len); + } +#endif + + // break-up requests into chunks of size MAX_EVENTS each + uint64_t n_iters = ROUND_UP(read_reqs.size(), MAX_EVENTS) / MAX_EVENTS; + for (uint64_t iter = 0; iter < n_iters; iter++) + { + uint64_t n_ops = std::min((uint64_t)read_reqs.size() - (iter * MAX_EVENTS), (uint64_t)MAX_EVENTS); + std::vector cbs(n_ops, nullptr); + std::vector evts(n_ops); + std::vector cb(n_ops); + for (uint64_t j = 0; j < n_ops; j++) + { + io_prep_pread(cb.data() + j, fd, read_reqs[j + iter * MAX_EVENTS].buf, read_reqs[j + iter * MAX_EVENTS].len, + read_reqs[j + iter * MAX_EVENTS].offset); + } + + // initialize `cbs` using `cb` array + // + + for (uint64_t i = 0; i < n_ops; i++) + { + cbs[i] = cb.data() + i; + } + + uint64_t n_tries = 0; + while (n_tries <= n_retries) + { + // issue reads + int64_t ret = io_submit(ctx, (int64_t)n_ops, cbs.data()); + // if requests didn't get accepted + if (ret != (int64_t)n_ops) + { + std::cerr << "io_submit() failed; returned " << ret << ", expected=" << n_ops << ", ernno=" << errno + << "=" << ::strerror(-ret) << ", try #" << n_tries + 1; + std::cout << "ctx: " << ctx << "\n"; + exit(-1); + } + else + { + // wait on io_getevents + ret = io_getevents(ctx, (int64_t)n_ops, (int64_t)n_ops, evts.data(), nullptr); + // if requests didn't complete + if (ret != (int64_t)n_ops) + { + std::cerr << "io_getevents() failed; returned " << ret << ", expected=" << n_ops + << ", ernno=" << errno << "=" << ::strerror(-ret) << ", try #" << n_tries + 1; + exit(-1); + } + else + { + break; + } + } + } + // disabled since req.buf could be an offset into another buf + /* + for (auto &req : read_reqs) { + // corruption check + assert(malloc_usable_size(req.buf) >= req.len); + } + */ + } +} +} // namespace + +LinuxAlignedFileReader::LinuxAlignedFileReader() +{ + this->file_desc = -1; +} + +LinuxAlignedFileReader::~LinuxAlignedFileReader() +{ + int64_t ret; + // check to make sure file_desc is closed + ret = ::fcntl(this->file_desc, F_GETFD); + if (ret == -1) + { + if (errno != EBADF) + { + std::cerr << "close() not called" << std::endl; + // close file desc + ret = ::close(this->file_desc); + // error checks + if (ret == -1) + { + std::cerr << "close() failed; returned " << ret << ", errno=" << errno << ":" << ::strerror(errno) + << std::endl; + } + } + } +} + +io_context_t &LinuxAlignedFileReader::get_ctx() +{ + std::unique_lock lk(ctx_mut); + // perform checks only in DEBUG mode + if (ctx_map.find(std::this_thread::get_id()) == ctx_map.end()) + { + std::cerr << "bad thread access; returning -1 as io_context_t" << std::endl; + return this->bad_ctx; + } + else + { + return ctx_map[std::this_thread::get_id()]; + } +} + +void LinuxAlignedFileReader::register_thread() +{ + auto my_id = std::this_thread::get_id(); + std::unique_lock lk(ctx_mut); + if (ctx_map.find(my_id) != ctx_map.end()) + { + std::cerr << "multiple calls to register_thread from the same thread" << std::endl; + return; + } + io_context_t ctx = 0; + int ret = io_setup(MAX_EVENTS, &ctx); + if (ret != 0) + { + lk.unlock(); + if (ret == -EAGAIN) + { + std::cerr << "io_setup() failed with EAGAIN: Consider increasing /proc/sys/fs/aio-max-nr" << std::endl; + } + else + { + std::cerr << "io_setup() failed; returned " << ret << ": " << ::strerror(-ret) << std::endl; + } + } + else + { + diskann::cout << "allocating ctx: " << ctx << " to thread-id:" << my_id << std::endl; + ctx_map[my_id] = ctx; + } + lk.unlock(); +} + +void LinuxAlignedFileReader::deregister_thread() +{ + auto my_id = std::this_thread::get_id(); + std::unique_lock lk(ctx_mut); + assert(ctx_map.find(my_id) != ctx_map.end()); + + lk.unlock(); + io_context_t ctx = this->get_ctx(); + io_destroy(ctx); + // assert(ret == 0); + lk.lock(); + ctx_map.erase(my_id); + std::cerr << "returned ctx from thread-id:" << my_id << std::endl; + lk.unlock(); +} + +void LinuxAlignedFileReader::deregister_all_threads() +{ + std::unique_lock lk(ctx_mut); + for (auto x = ctx_map.begin(); x != ctx_map.end(); x++) + { + io_context_t ctx = x.value(); + io_destroy(ctx); + // assert(ret == 0); + // lk.lock(); + // ctx_map.erase(my_id); + // std::cerr << "returned ctx from thread-id:" << my_id << std::endl; + } + ctx_map.clear(); + // lk.unlock(); +} + +void LinuxAlignedFileReader::open(const std::string &fname) +{ + int flags = O_DIRECT | O_RDONLY | O_LARGEFILE; + this->file_desc = ::open(fname.c_str(), flags); + // error checks + assert(this->file_desc != -1); + std::cerr << "Opened file : " << fname << std::endl; +} + +void LinuxAlignedFileReader::close() +{ + // int64_t ret; + + // check to make sure file_desc is closed + ::fcntl(this->file_desc, F_GETFD); + // assert(ret != -1); + + ::close(this->file_desc); + // assert(ret != -1); +} + +void LinuxAlignedFileReader::read(std::vector &read_reqs, io_context_t &ctx, bool async) +{ + if (async == true) + { + diskann::cout << "Async currently not supported in linux." << std::endl; + } + assert(this->file_desc != -1); + execute_io(ctx, this->file_desc, read_reqs); +} +#endif diff --git a/packages/leann-backend-diskann/third_party/DiskANN/src/logger.cpp b/packages/leann-backend-diskann/third_party/DiskANN/src/logger.cpp new file mode 100644 index 0000000..052f548 --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/src/logger.cpp @@ -0,0 +1,97 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT license. + +#include +#include + +#include "logger_impl.h" +#include "windows_customizations.h" + +namespace diskann +{ + +#ifdef ENABLE_CUSTOM_LOGGER +DISKANN_DLLEXPORT ANNStreamBuf coutBuff(stdout); +DISKANN_DLLEXPORT ANNStreamBuf cerrBuff(stderr); + +DISKANN_DLLEXPORT std::basic_ostream cout(&coutBuff); +DISKANN_DLLEXPORT std::basic_ostream cerr(&cerrBuff); +std::function g_logger; + +void SetCustomLogger(std::function logger) +{ + g_logger = logger; + diskann::cout << "Set Custom Logger" << std::endl; +} + +ANNStreamBuf::ANNStreamBuf(FILE *fp) +{ + if (fp == nullptr) + { + throw diskann::ANNException("File pointer passed to ANNStreamBuf() cannot be null", -1); + } + if (fp != stdout && fp != stderr) + { + throw diskann::ANNException("The custom logger only supports stdout and stderr.", -1); + } + _fp = fp; + _logLevel = (_fp == stdout) ? LogLevel::LL_Info : LogLevel::LL_Error; + _buf = new char[BUFFER_SIZE + 1]; // See comment in the header + + std::memset(_buf, 0, (BUFFER_SIZE) * sizeof(char)); + setp(_buf, _buf + BUFFER_SIZE - 1); +} + +ANNStreamBuf::~ANNStreamBuf() +{ + sync(); + _fp = nullptr; // we'll not close because we can't. + delete[] _buf; +} + +int ANNStreamBuf::overflow(int c) +{ + std::lock_guard lock(_mutex); + if (c != EOF) + { + *pptr() = (char)c; + pbump(1); + } + flush(); + return c; +} + +int ANNStreamBuf::sync() +{ + std::lock_guard lock(_mutex); + flush(); + return 0; +} + +int ANNStreamBuf::underflow() +{ + throw diskann::ANNException("Attempt to read on streambuf meant only for writing.", -1); +} + +int ANNStreamBuf::flush() +{ + const int num = (int)(pptr() - pbase()); + logImpl(pbase(), num); + pbump(-num); + return num; +} +void ANNStreamBuf::logImpl(char *str, int num) +{ + str[num] = '\0'; // Safe. See the c'tor. + // Invoke the OLS custom logging function. + if (g_logger) + { + g_logger(_logLevel, str); + } +} +#else +using std::cerr; +using std::cout; +#endif + +} // namespace diskann diff --git a/packages/leann-backend-diskann/third_party/DiskANN/src/math_utils.cpp b/packages/leann-backend-diskann/third_party/DiskANN/src/math_utils.cpp new file mode 100644 index 0000000..d8fcda3 --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/src/math_utils.cpp @@ -0,0 +1,465 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT license. + +#include +#include +#ifdef __APPLE__ +#include +#else +#include +#endif +#include "logger.h" +#include "utils.h" + +namespace math_utils +{ + +#ifdef __APPLE__ +typedef int MKL_INT; +#endif + +float calc_distance(float *vec_1, float *vec_2, size_t dim) +{ + float dist = 0; + for (size_t j = 0; j < dim; j++) + { + dist += (vec_1[j] - vec_2[j]) * (vec_1[j] - vec_2[j]); + } + return dist; +} + +// compute l2-squared norms of data stored in row major num_points * dim, +// needs +// to be pre-allocated +void compute_vecs_l2sq(float *vecs_l2sq, float *data, const size_t num_points, const size_t dim) +{ +#pragma omp parallel for schedule(static, 8192) + for (int64_t n_iter = 0; n_iter < (int64_t)num_points; n_iter++) + { + vecs_l2sq[n_iter] = cblas_snrm2((MKL_INT)dim, (data + (n_iter * dim)), 1); + vecs_l2sq[n_iter] *= vecs_l2sq[n_iter]; + } +} + +void rotate_data_randomly(float *data, size_t num_points, size_t dim, float *rot_mat, float *&new_mat, + bool transpose_rot) +{ + CBLAS_TRANSPOSE transpose = CblasNoTrans; + if (transpose_rot) + { + diskann::cout << "Transposing rotation matrix.." << std::flush; + transpose = CblasTrans; + } + diskann::cout << "done Rotating data with random matrix.." << std::flush; + + cblas_sgemm(CblasRowMajor, CblasNoTrans, transpose, (MKL_INT)num_points, (MKL_INT)dim, (MKL_INT)dim, 1.0, data, + (MKL_INT)dim, rot_mat, (MKL_INT)dim, 0, new_mat, (MKL_INT)dim); + + diskann::cout << "done." << std::endl; +} + +// calculate k closest centers to data of num_points * dim (row major) +// centers is num_centers * dim (row major) +// data_l2sq has pre-computed squared norms of data +// centers_l2sq has pre-computed squared norms of centers +// pre-allocated center_index will contain id of nearest center +// pre-allocated dist_matrix shound be num_points * num_centers and contain +// squared distances +// Default value of k is 1 + +// Ideally used only by compute_closest_centers +void compute_closest_centers_in_block(const float *const data, const size_t num_points, const size_t dim, + const float *const centers, const size_t num_centers, + const float *const docs_l2sq, const float *const centers_l2sq, + uint32_t *center_index, float *const dist_matrix, size_t k) +{ + if (k > num_centers) + { + diskann::cout << "ERROR: k (" << k << ") > num_center(" << num_centers << ")" << std::endl; + return; + } + + float *ones_a = new float[num_centers]; + float *ones_b = new float[num_points]; + + for (size_t i = 0; i < num_centers; i++) + { + ones_a[i] = 1.0; + } + for (size_t i = 0; i < num_points; i++) + { + ones_b[i] = 1.0; + } + + cblas_sgemm(CblasRowMajor, CblasNoTrans, CblasTrans, (MKL_INT)num_points, (MKL_INT)num_centers, (MKL_INT)1, 1.0f, + docs_l2sq, (MKL_INT)1, ones_a, (MKL_INT)1, 0.0f, dist_matrix, (MKL_INT)num_centers); + + cblas_sgemm(CblasRowMajor, CblasNoTrans, CblasTrans, (MKL_INT)num_points, (MKL_INT)num_centers, (MKL_INT)1, 1.0f, + ones_b, (MKL_INT)1, centers_l2sq, (MKL_INT)1, 1.0f, dist_matrix, (MKL_INT)num_centers); + + cblas_sgemm(CblasRowMajor, CblasNoTrans, CblasTrans, (MKL_INT)num_points, (MKL_INT)num_centers, (MKL_INT)dim, -2.0f, + data, (MKL_INT)dim, centers, (MKL_INT)dim, 1.0f, dist_matrix, (MKL_INT)num_centers); + + if (k == 1) + { +#pragma omp parallel for schedule(static, 8192) + for (int64_t i = 0; i < (int64_t)num_points; i++) + { + float min = std::numeric_limits::max(); + float *current = dist_matrix + (i * num_centers); + for (size_t j = 0; j < num_centers; j++) + { + if (current[j] < min) + { + center_index[i] = (uint32_t)j; + min = current[j]; + } + } + } + } + else + { +#pragma omp parallel for schedule(static, 8192) + for (int64_t i = 0; i < (int64_t)num_points; i++) + { + std::priority_queue top_k_queue; + float *current = dist_matrix + (i * num_centers); + for (size_t j = 0; j < num_centers; j++) + { + PivotContainer this_piv(j, current[j]); + top_k_queue.push(this_piv); + } + for (size_t j = 0; j < k; j++) + { + PivotContainer this_piv = top_k_queue.top(); + center_index[i * k + j] = (uint32_t)this_piv.piv_id; + top_k_queue.pop(); + } + } + } + delete[] ones_a; + delete[] ones_b; +} + +// Given data in num_points * new_dim row major +// Pivots stored in full_pivot_data as num_centers * new_dim row major +// Calculate the k closest pivot for each point and store it in vector +// closest_centers_ivf (row major, num_points*k) (which needs to be allocated +// outside) Additionally, if inverted index is not null (and pre-allocated), +// it +// will return inverted index for each center, assuming each of the inverted +// indices is an empty vector. Additionally, if pts_norms_squared is not null, +// then it will assume that point norms are pre-computed and use those values + +void compute_closest_centers(float *data, size_t num_points, size_t dim, float *pivot_data, size_t num_centers, + size_t k, uint32_t *closest_centers_ivf, std::vector *inverted_index, + float *pts_norms_squared) +{ + if (k > num_centers) + { + diskann::cout << "ERROR: k (" << k << ") > num_center(" << num_centers << ")" << std::endl; + return; + } + + bool is_norm_given_for_pts = (pts_norms_squared != NULL); + + float *pivs_norms_squared = new float[num_centers]; + if (!is_norm_given_for_pts) + pts_norms_squared = new float[num_points]; + + size_t PAR_BLOCK_SIZE = num_points; + size_t N_BLOCKS = + (num_points % PAR_BLOCK_SIZE) == 0 ? (num_points / PAR_BLOCK_SIZE) : (num_points / PAR_BLOCK_SIZE) + 1; + + if (!is_norm_given_for_pts) + math_utils::compute_vecs_l2sq(pts_norms_squared, data, num_points, dim); + math_utils::compute_vecs_l2sq(pivs_norms_squared, pivot_data, num_centers, dim); + uint32_t *closest_centers = new uint32_t[PAR_BLOCK_SIZE * k]; + float *distance_matrix = new float[num_centers * PAR_BLOCK_SIZE]; + + for (size_t cur_blk = 0; cur_blk < N_BLOCKS; cur_blk++) + { + float *data_cur_blk = data + cur_blk * PAR_BLOCK_SIZE * dim; + size_t num_pts_blk = std::min(PAR_BLOCK_SIZE, num_points - cur_blk * PAR_BLOCK_SIZE); + float *pts_norms_blk = pts_norms_squared + cur_blk * PAR_BLOCK_SIZE; + + math_utils::compute_closest_centers_in_block(data_cur_blk, num_pts_blk, dim, pivot_data, num_centers, + pts_norms_blk, pivs_norms_squared, closest_centers, + distance_matrix, k); + +#pragma omp parallel for schedule(static, 1) + for (int64_t j = cur_blk * PAR_BLOCK_SIZE; + j < std::min((int64_t)num_points, (int64_t)((cur_blk + 1) * PAR_BLOCK_SIZE)); j++) + { + for (size_t l = 0; l < k; l++) + { + size_t this_center_id = closest_centers[(j - cur_blk * PAR_BLOCK_SIZE) * k + l]; + closest_centers_ivf[j * k + l] = (uint32_t)this_center_id; + if (inverted_index != NULL) + { +#pragma omp critical + inverted_index[this_center_id].push_back(j); + } + } + } + } + delete[] closest_centers; + delete[] distance_matrix; + delete[] pivs_norms_squared; + if (!is_norm_given_for_pts) + delete[] pts_norms_squared; +} + +// if to_subtract is 1, will subtract nearest center from each row. Else will +// add. Output will be in data_load iself. +// Nearest centers need to be provided in closst_centers. +void process_residuals(float *data_load, size_t num_points, size_t dim, float *cur_pivot_data, size_t num_centers, + uint32_t *closest_centers, bool to_subtract) +{ + diskann::cout << "Processing residuals of " << num_points << " points in " << dim << " dimensions using " + << num_centers << " centers " << std::endl; +#pragma omp parallel for schedule(static, 8192) + for (int64_t n_iter = 0; n_iter < (int64_t)num_points; n_iter++) + { + for (size_t d_iter = 0; d_iter < dim; d_iter++) + { + if (to_subtract == 1) + data_load[n_iter * dim + d_iter] = + data_load[n_iter * dim + d_iter] - cur_pivot_data[closest_centers[n_iter] * dim + d_iter]; + else + data_load[n_iter * dim + d_iter] = + data_load[n_iter * dim + d_iter] + cur_pivot_data[closest_centers[n_iter] * dim + d_iter]; + } + } +} + +} // namespace math_utils + +namespace kmeans +{ + +// run Lloyds one iteration +// Given data in row major num_points * dim, and centers in row major +// num_centers * dim And squared lengths of data points, output the closest +// center to each data point, update centers, and also return inverted index. +// If +// closest_centers == NULL, will allocate memory and return. Similarly, if +// closest_docs == NULL, will allocate memory and return. + +float lloyds_iter(float *data, size_t num_points, size_t dim, float *centers, size_t num_centers, float *docs_l2sq, + std::vector *closest_docs, uint32_t *&closest_center) +{ + bool compute_residual = true; + // Timer timer; + + if (closest_center == NULL) + closest_center = new uint32_t[num_points]; + if (closest_docs == NULL) + closest_docs = new std::vector[num_centers]; + else + for (size_t c = 0; c < num_centers; ++c) + closest_docs[c].clear(); + + math_utils::compute_closest_centers(data, num_points, dim, centers, num_centers, 1, closest_center, closest_docs, + docs_l2sq); + + memset(centers, 0, sizeof(float) * (size_t)num_centers * (size_t)dim); + +#pragma omp parallel for schedule(static, 1) + for (int64_t c = 0; c < (int64_t)num_centers; ++c) + { + float *center = centers + (size_t)c * (size_t)dim; + double *cluster_sum = new double[dim]; + for (size_t i = 0; i < dim; i++) + cluster_sum[i] = 0.0; + for (size_t i = 0; i < closest_docs[c].size(); i++) + { + float *current = data + ((closest_docs[c][i]) * dim); + for (size_t j = 0; j < dim; j++) + { + cluster_sum[j] += (double)current[j]; + } + } + if (closest_docs[c].size() > 0) + { + for (size_t i = 0; i < dim; i++) + center[i] = (float)(cluster_sum[i] / ((double)closest_docs[c].size())); + } + delete[] cluster_sum; + } + + float residual = 0.0; + if (compute_residual) + { + size_t BUF_PAD = 32; + size_t CHUNK_SIZE = 2 * 8192; + size_t nchunks = num_points / CHUNK_SIZE + (num_points % CHUNK_SIZE == 0 ? 0 : 1); + std::vector residuals(nchunks * BUF_PAD, 0.0); + +#pragma omp parallel for schedule(static, 32) + for (int64_t chunk = 0; chunk < (int64_t)nchunks; ++chunk) + for (size_t d = chunk * CHUNK_SIZE; d < num_points && d < (chunk + 1) * CHUNK_SIZE; ++d) + residuals[chunk * BUF_PAD] += + math_utils::calc_distance(data + (d * dim), centers + (size_t)closest_center[d] * (size_t)dim, dim); + + for (size_t chunk = 0; chunk < nchunks; ++chunk) + residual += residuals[chunk * BUF_PAD]; + } + + return residual; +} + +// Run Lloyds until max_reps or stopping criterion +// If you pass NULL for closest_docs and closest_center, it will NOT return +// the +// results, else it will assume appriate allocation as closest_docs = new +// vector [num_centers], and closest_center = new size_t[num_points] +// Final centers are output in centers as row major num_centers * dim +// +float run_lloyds(float *data, size_t num_points, size_t dim, float *centers, const size_t num_centers, + const size_t max_reps, std::vector *closest_docs, uint32_t *closest_center) +{ + float residual = std::numeric_limits::max(); + bool ret_closest_docs = true; + bool ret_closest_center = true; + if (closest_docs == NULL) + { + closest_docs = new std::vector[num_centers]; + ret_closest_docs = false; + } + if (closest_center == NULL) + { + closest_center = new uint32_t[num_points]; + ret_closest_center = false; + } + + float *docs_l2sq = new float[num_points]; + math_utils::compute_vecs_l2sq(docs_l2sq, data, num_points, dim); + + float old_residual; + // Timer timer; + for (size_t i = 0; i < max_reps; ++i) + { + old_residual = residual; + + residual = lloyds_iter(data, num_points, dim, centers, num_centers, docs_l2sq, closest_docs, closest_center); + + if (((i != 0) && ((old_residual - residual) / residual) < 0.00001) || + (residual < std::numeric_limits::epsilon())) + { + diskann::cout << "Residuals unchanged: " << old_residual << " becomes " << residual + << ". Early termination." << std::endl; + break; + } + } + delete[] docs_l2sq; + if (!ret_closest_docs) + delete[] closest_docs; + if (!ret_closest_center) + delete[] closest_center; + return residual; +} + +// assumes memory allocated for pivot_data as new +// float[num_centers*dim] +// and select randomly num_centers points as pivots +void selecting_pivots(float *data, size_t num_points, size_t dim, float *pivot_data, size_t num_centers) +{ + // pivot_data = new float[num_centers * dim]; + + std::vector picked; + std::random_device rd; + auto x = rd(); + std::mt19937 generator(x); + std::uniform_int_distribution distribution(0, num_points - 1); + + size_t tmp_pivot; + for (size_t j = 0; j < num_centers; j++) + { + tmp_pivot = distribution(generator); + if (std::find(picked.begin(), picked.end(), tmp_pivot) != picked.end()) + continue; + picked.push_back(tmp_pivot); + std::memcpy(pivot_data + j * dim, data + tmp_pivot * dim, dim * sizeof(float)); + } +} + +void kmeanspp_selecting_pivots(float *data, size_t num_points, size_t dim, float *pivot_data, size_t num_centers) +{ + if (num_points > 1 << 23) + { + diskann::cout << "ERROR: n_pts " << num_points + << " currently not supported for k-means++, maximum is " + "8388608. Falling back to random pivot " + "selection." + << std::endl; + selecting_pivots(data, num_points, dim, pivot_data, num_centers); + return; + } + + std::vector picked; + std::random_device rd; + auto x = rd(); + std::mt19937 generator(x); + std::uniform_real_distribution<> distribution(0, 1); + std::uniform_int_distribution int_dist(0, num_points - 1); + size_t init_id = int_dist(generator); + size_t num_picked = 1; + + picked.push_back(init_id); + std::memcpy(pivot_data, data + init_id * dim, dim * sizeof(float)); + + float *dist = new float[num_points]; + +#pragma omp parallel for schedule(static, 8192) + for (int64_t i = 0; i < (int64_t)num_points; i++) + { + dist[i] = math_utils::calc_distance(data + i * dim, data + init_id * dim, dim); + } + + double dart_val; + size_t tmp_pivot; + bool sum_flag = false; + + while (num_picked < num_centers) + { + dart_val = distribution(generator); + + double sum = 0; + for (size_t i = 0; i < num_points; i++) + { + sum = sum + dist[i]; + } + if (sum == 0) + sum_flag = true; + + dart_val *= sum; + + double prefix_sum = 0; + for (size_t i = 0; i < (num_points); i++) + { + tmp_pivot = i; + if (dart_val >= prefix_sum && dart_val < prefix_sum + dist[i]) + { + break; + } + + prefix_sum += dist[i]; + } + + if (std::find(picked.begin(), picked.end(), tmp_pivot) != picked.end() && (sum_flag == false)) + continue; + picked.push_back(tmp_pivot); + std::memcpy(pivot_data + num_picked * dim, data + tmp_pivot * dim, dim * sizeof(float)); + +#pragma omp parallel for schedule(static, 8192) + for (int64_t i = 0; i < (int64_t)num_points; i++) + { + dist[i] = (std::min)(dist[i], math_utils::calc_distance(data + i * dim, data + tmp_pivot * dim, dim)); + } + num_picked++; + } + delete[] dist; +} + +} // namespace kmeans diff --git a/packages/leann-backend-diskann/third_party/DiskANN/src/memory_mapper.cpp b/packages/leann-backend-diskann/third_party/DiskANN/src/memory_mapper.cpp new file mode 100644 index 0000000..d1c5ef9 --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/src/memory_mapper.cpp @@ -0,0 +1,107 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT license. + +#include "logger.h" +#include "memory_mapper.h" +#include +#include + +using namespace diskann; + +MemoryMapper::MemoryMapper(const std::string &filename) : MemoryMapper(filename.c_str()) +{ +} + +MemoryMapper::MemoryMapper(const char *filename) +{ +#ifndef _WINDOWS + _fd = open(filename, O_RDONLY); + if (_fd <= 0) + { + std::cerr << "Inner vertices file not found" << std::endl; + return; + } + struct stat sb; + if (fstat(_fd, &sb) != 0) + { + std::cerr << "Inner vertices file not dound. " << std::endl; + return; + } + _fileSize = sb.st_size; + diskann::cout << "File Size: " << _fileSize << std::endl; + _buf = (char *)mmap(NULL, _fileSize, PROT_READ, MAP_PRIVATE, _fd, 0); +#else + _bareFile = + CreateFileA(filename, GENERIC_READ | GENERIC_EXECUTE, 0, NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL); + if (_bareFile == nullptr) + { + std::ostringstream message; + message << "CreateFileA(" << filename << ") failed with error " << GetLastError() << std::endl; + std::cerr << message.str(); + throw std::exception(message.str().c_str()); + } + + _fd = CreateFileMapping(_bareFile, NULL, PAGE_EXECUTE_READ, 0, 0, NULL); + if (_fd == nullptr) + { + std::ostringstream message; + message << "CreateFileMapping(" << filename << ") failed with error " << GetLastError() << std::endl; + std::cerr << message.str() << std::endl; + throw std::exception(message.str().c_str()); + } + + _buf = (char *)MapViewOfFile(_fd, FILE_MAP_READ, 0, 0, 0); + if (_buf == nullptr) + { + std::ostringstream message; + message << "MapViewOfFile(" << filename << ") failed with error: " << GetLastError() << std::endl; + std::cerr << message.str() << std::endl; + throw std::exception(message.str().c_str()); + } + + LARGE_INTEGER fSize; + if (TRUE == GetFileSizeEx(_bareFile, &fSize)) + { + _fileSize = fSize.QuadPart; // take the 64-bit value + diskann::cout << "File Size: " << _fileSize << std::endl; + } + else + { + std::cerr << "Failed to get size of file " << filename << std::endl; + } +#endif +} +char *MemoryMapper::getBuf() +{ + return _buf; +} + +size_t MemoryMapper::getFileSize() +{ + return _fileSize; +} + +MemoryMapper::~MemoryMapper() +{ +#ifndef _WINDOWS + if (munmap(_buf, _fileSize) != 0) + std::cerr << "ERROR unmapping. CHECK!" << std::endl; + close(_fd); +#else + if (FALSE == UnmapViewOfFile(_buf)) + { + std::cerr << "Unmap view of file failed. Error: " << GetLastError() << std::endl; + } + + if (FALSE == CloseHandle(_fd)) + { + std::cerr << "Failed to close memory mapped file. Error: " << GetLastError() << std::endl; + } + + if (FALSE == CloseHandle(_bareFile)) + { + std::cerr << "Failed to close file: " << _fileName << " Error: " << GetLastError() << std::endl; + } + +#endif +} diff --git a/packages/leann-backend-diskann/third_party/DiskANN/src/natural_number_map.cpp b/packages/leann-backend-diskann/third_party/DiskANN/src/natural_number_map.cpp new file mode 100644 index 0000000..a996dcf --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/src/natural_number_map.cpp @@ -0,0 +1,116 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT license. + +#include +#include + +#include "natural_number_map.h" +#include "tag_uint128.h" + +namespace diskann +{ +static constexpr auto invalid_position = boost::dynamic_bitset<>::npos; + +template +natural_number_map::natural_number_map() + : _size(0), _values_bitset(std::make_unique>()) +{ +} + +template void natural_number_map::reserve(size_t count) +{ + _values_vector.reserve(count); + _values_bitset->reserve(count); +} + +template size_t natural_number_map::size() const +{ + return _size; +} + +template void natural_number_map::set(Key key, Value value) +{ + if (key >= _values_bitset->size()) + { + _values_bitset->resize(static_cast(key) + 1); + _values_vector.resize(_values_bitset->size()); + } + + _values_vector[key] = value; + const bool was_present = _values_bitset->test_set(key, true); + + if (!was_present) + { + ++_size; + } +} + +template void natural_number_map::erase(Key key) +{ + if (key < _values_bitset->size()) + { + const bool was_present = _values_bitset->test_set(key, false); + + if (was_present) + { + --_size; + } + } +} + +template bool natural_number_map::contains(Key key) const +{ + return key < _values_bitset->size() && _values_bitset->test(key); +} + +template bool natural_number_map::try_get(Key key, Value &value) const +{ + if (!contains(key)) + { + return false; + } + + value = _values_vector[key]; + return true; +} + +template +typename natural_number_map::position natural_number_map::find_first() const +{ + return position{_size > 0 ? _values_bitset->find_first() : invalid_position, 0}; +} + +template +typename natural_number_map::position natural_number_map::find_next( + const position &after_position) const +{ + return position{after_position._keys_already_enumerated < _size ? _values_bitset->find_next(after_position._key) + : invalid_position, + after_position._keys_already_enumerated + 1}; +} + +template bool natural_number_map::position::is_valid() const +{ + return _key != invalid_position; +} + +template Value natural_number_map::get(const position &pos) const +{ + assert(pos.is_valid()); + return _values_vector[pos._key]; +} + +template void natural_number_map::clear() +{ + _size = 0; + _values_vector.clear(); + _values_bitset->clear(); +} + +// Instantiate used templates. +template class natural_number_map; +template class natural_number_map; +template class natural_number_map; +template class natural_number_map; +template class natural_number_map; +} // namespace diskann diff --git a/packages/leann-backend-diskann/third_party/DiskANN/src/natural_number_set.cpp b/packages/leann-backend-diskann/third_party/DiskANN/src/natural_number_set.cpp new file mode 100644 index 0000000..b36cb52 --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/src/natural_number_set.cpp @@ -0,0 +1,70 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT license. + +#include + +#include "ann_exception.h" +#include "natural_number_set.h" + +namespace diskann +{ +template +natural_number_set::natural_number_set() : _values_bitset(std::make_unique>()) +{ +} + +template bool natural_number_set::is_empty() const +{ + return _values_vector.empty(); +} + +template void natural_number_set::reserve(size_t count) +{ + _values_vector.reserve(count); + _values_bitset->reserve(count); +} + +template void natural_number_set::insert(T id) +{ + _values_vector.emplace_back(id); + + if (id >= _values_bitset->size()) + _values_bitset->resize(static_cast(id) + 1); + + _values_bitset->set(id, true); +} + +template T natural_number_set::pop_any() +{ + if (_values_vector.empty()) + { + throw diskann::ANNException("No values available", -1, __FUNCSIG__, __FILE__, __LINE__); + } + + const T id = _values_vector.back(); + _values_vector.pop_back(); + + _values_bitset->set(id, false); + + return id; +} + +template void natural_number_set::clear() +{ + _values_vector.clear(); + _values_bitset->clear(); +} + +template size_t natural_number_set::size() const +{ + return _values_vector.size(); +} + +template bool natural_number_set::is_in_set(T id) const +{ + return _values_bitset->test(id); +} + +// Instantiate used templates. +template class natural_number_set; +} // namespace diskann diff --git a/packages/leann-backend-diskann/third_party/DiskANN/src/partition.cpp b/packages/leann-backend-diskann/third_party/DiskANN/src/partition.cpp new file mode 100644 index 0000000..7e100ad --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/src/partition.cpp @@ -0,0 +1,657 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT license. + +#include +#include +#include +#include +#include + +#include +#include "tsl/robin_map.h" +#include "tsl/robin_set.h" + +#if defined(DISKANN_RELEASE_UNUSED_TCMALLOC_MEMORY_AT_CHECKPOINTS) && defined(DISKANN_BUILD) +#include "gperftools/malloc_extension.h" +#endif + +#include "utils.h" +#include "math_utils.h" +#include "index.h" +#include "parameters.h" +#include "memory_mapper.h" +#include "partition.h" +#ifdef _WINDOWS +#include +#endif + +// block size for reading/ processing large files and matrices in blocks +#define BLOCK_SIZE 5000000 + +// #define SAVE_INFLATED_PQ true + +template +void gen_random_slice(const std::string base_file, const std::string output_prefix, double sampling_rate) +{ + size_t read_blk_size = 64 * 1024 * 1024; + cached_ifstream base_reader(base_file.c_str(), read_blk_size); + std::ofstream sample_writer(std::string(output_prefix + "_data.bin").c_str(), std::ios::binary); + std::ofstream sample_id_writer(std::string(output_prefix + "_ids.bin").c_str(), std::ios::binary); + + std::random_device rd; // Will be used to obtain a seed for the random number engine + auto x = rd(); + std::mt19937 generator(x); // Standard mersenne_twister_engine seeded with rd() + std::uniform_real_distribution distribution(0, 1); + + size_t npts, nd; + uint32_t npts_u32, nd_u32; + uint32_t num_sampled_pts_u32 = 0; + uint32_t one_const = 1; + + base_reader.read((char *)&npts_u32, sizeof(uint32_t)); + base_reader.read((char *)&nd_u32, sizeof(uint32_t)); + diskann::cout << "Loading base " << base_file << ". #points: " << npts_u32 << ". #dim: " << nd_u32 << "." + << std::endl; + sample_writer.write((char *)&num_sampled_pts_u32, sizeof(uint32_t)); + sample_writer.write((char *)&nd_u32, sizeof(uint32_t)); + sample_id_writer.write((char *)&num_sampled_pts_u32, sizeof(uint32_t)); + sample_id_writer.write((char *)&one_const, sizeof(uint32_t)); + + npts = npts_u32; + nd = nd_u32; + std::unique_ptr cur_row = std::make_unique(nd); + + for (size_t i = 0; i < npts; i++) + { + base_reader.read((char *)cur_row.get(), sizeof(T) * nd); + float sample = distribution(generator); + if (sample < sampling_rate) + { + sample_writer.write((char *)cur_row.get(), sizeof(T) * nd); + uint32_t cur_i_u32 = (uint32_t)i; + sample_id_writer.write((char *)&cur_i_u32, sizeof(uint32_t)); + num_sampled_pts_u32++; + } + } + sample_writer.seekp(0, std::ios::beg); + sample_writer.write((char *)&num_sampled_pts_u32, sizeof(uint32_t)); + sample_id_writer.seekp(0, std::ios::beg); + sample_id_writer.write((char *)&num_sampled_pts_u32, sizeof(uint32_t)); + sample_writer.close(); + sample_id_writer.close(); + diskann::cout << "Wrote " << num_sampled_pts_u32 << " points to sample file: " << output_prefix + "_data.bin" + << std::endl; +} + +// streams data from the file, and samples each vector with probability p_val +// and returns a matrix of size slice_size* ndims as floating point type. +// the slice_size and ndims are set inside the function. + +/*********************************** + * Reimplement using gen_random_slice(const T* inputdata,...) + ************************************/ + +template +void gen_random_slice(const std::string data_file, double p_val, float *&sampled_data, size_t &slice_size, + size_t &ndims) +{ + size_t npts; + uint32_t npts32, ndims32; + std::vector> sampled_vectors; + + // amount to read in one shot + size_t read_blk_size = 64 * 1024 * 1024; + // create cached reader + writer + cached_ifstream base_reader(data_file.c_str(), read_blk_size); + + // metadata: npts, ndims + base_reader.read((char *)&npts32, sizeof(uint32_t)); + base_reader.read((char *)&ndims32, sizeof(uint32_t)); + npts = npts32; + ndims = ndims32; + + std::unique_ptr cur_vector_T = std::make_unique(ndims); + p_val = p_val < 1 ? p_val : 1; + + std::random_device rd; // Will be used to obtain a seed for the random number + size_t x = rd(); + std::mt19937 generator((uint32_t)x); + std::uniform_real_distribution distribution(0, 1); + + for (size_t i = 0; i < npts; i++) + { + base_reader.read((char *)cur_vector_T.get(), ndims * sizeof(T)); + float rnd_val = distribution(generator); + if (rnd_val < p_val) + { + std::vector cur_vector_float; + for (size_t d = 0; d < ndims; d++) + cur_vector_float.push_back(cur_vector_T[d]); + sampled_vectors.push_back(cur_vector_float); + } + } + slice_size = sampled_vectors.size(); + sampled_data = new float[slice_size * ndims]; + for (size_t i = 0; i < slice_size; i++) + { + for (size_t j = 0; j < ndims; j++) + { + sampled_data[i * ndims + j] = sampled_vectors[i][j]; + } + } +} + +// same as above, but samples from the matrix inputdata instead of a file of +// npts*ndims to return sampled_data of size slice_size*ndims. +template +void gen_random_slice(const T *inputdata, size_t npts, size_t ndims, double p_val, float *&sampled_data, + size_t &slice_size) +{ + std::vector> sampled_vectors; + const T *cur_vector_T; + + p_val = p_val < 1 ? p_val : 1; + + std::random_device rd; // Will be used to obtain a seed for the random number engine + size_t x = rd(); + std::mt19937 generator((uint32_t)x); // Standard mersenne_twister_engine seeded with rd() + std::uniform_real_distribution distribution(0, 1); + + for (size_t i = 0; i < npts; i++) + { + cur_vector_T = inputdata + ndims * i; + float rnd_val = distribution(generator); + if (rnd_val < p_val) + { + std::vector cur_vector_float; + for (size_t d = 0; d < ndims; d++) + cur_vector_float.push_back(cur_vector_T[d]); + sampled_vectors.push_back(cur_vector_float); + } + } + slice_size = sampled_vectors.size(); + sampled_data = new float[slice_size * ndims]; + for (size_t i = 0; i < slice_size; i++) + { + for (size_t j = 0; j < ndims; j++) + { + sampled_data[i * ndims + j] = sampled_vectors[i][j]; + } + } +} + +int estimate_cluster_sizes(float *test_data_float, size_t num_test, float *pivots, const size_t num_centers, + const size_t test_dim, const size_t k_base, std::vector &cluster_sizes) +{ + cluster_sizes.clear(); + + size_t *shard_counts = new size_t[num_centers]; + + for (size_t i = 0; i < num_centers; i++) + { + shard_counts[i] = 0; + } + + size_t block_size = num_test <= BLOCK_SIZE ? num_test : BLOCK_SIZE; + uint32_t *block_closest_centers = new uint32_t[block_size * k_base]; + float *block_data_float; + + size_t num_blocks = DIV_ROUND_UP(num_test, block_size); + + for (size_t block = 0; block < num_blocks; block++) + { + size_t start_id = block * block_size; + size_t end_id = (std::min)((block + 1) * block_size, num_test); + size_t cur_blk_size = end_id - start_id; + + block_data_float = test_data_float + start_id * test_dim; + + math_utils::compute_closest_centers(block_data_float, cur_blk_size, test_dim, pivots, num_centers, k_base, + block_closest_centers); + + for (size_t p = 0; p < cur_blk_size; p++) + { + for (size_t p1 = 0; p1 < k_base; p1++) + { + size_t shard_id = block_closest_centers[p * k_base + p1]; + shard_counts[shard_id]++; + } + } + } + + diskann::cout << "Estimated cluster sizes: "; + for (size_t i = 0; i < num_centers; i++) + { + uint32_t cur_shard_count = (uint32_t)shard_counts[i]; + cluster_sizes.push_back((size_t)cur_shard_count); + diskann::cout << cur_shard_count << " "; + } + diskann::cout << std::endl; + delete[] shard_counts; + delete[] block_closest_centers; + return 0; +} + +template +int shard_data_into_clusters(const std::string data_file, float *pivots, const size_t num_centers, const size_t dim, + const size_t k_base, std::string prefix_path) +{ + size_t read_blk_size = 64 * 1024 * 1024; + // uint64_t write_blk_size = 64 * 1024 * 1024; + // create cached reader + writer + cached_ifstream base_reader(data_file, read_blk_size); + uint32_t npts32; + uint32_t basedim32; + base_reader.read((char *)&npts32, sizeof(uint32_t)); + base_reader.read((char *)&basedim32, sizeof(uint32_t)); + size_t num_points = npts32; + if (basedim32 != dim) + { + diskann::cout << "Error. dimensions dont match for train set and base set" << std::endl; + return -1; + } + + std::unique_ptr shard_counts = std::make_unique(num_centers); + std::vector shard_data_writer(num_centers); + std::vector shard_idmap_writer(num_centers); + uint32_t dummy_size = 0; + uint32_t const_one = 1; + + for (size_t i = 0; i < num_centers; i++) + { + std::string data_filename = prefix_path + "_subshard-" + std::to_string(i) + ".bin"; + std::string idmap_filename = prefix_path + "_subshard-" + std::to_string(i) + "_ids_uint32.bin"; + shard_data_writer[i] = std::ofstream(data_filename.c_str(), std::ios::binary); + shard_idmap_writer[i] = std::ofstream(idmap_filename.c_str(), std::ios::binary); + shard_data_writer[i].write((char *)&dummy_size, sizeof(uint32_t)); + shard_data_writer[i].write((char *)&basedim32, sizeof(uint32_t)); + shard_idmap_writer[i].write((char *)&dummy_size, sizeof(uint32_t)); + shard_idmap_writer[i].write((char *)&const_one, sizeof(uint32_t)); + shard_counts[i] = 0; + } + + size_t block_size = num_points <= BLOCK_SIZE ? num_points : BLOCK_SIZE; + std::unique_ptr block_closest_centers = std::make_unique(block_size * k_base); + std::unique_ptr block_data_T = std::make_unique(block_size * dim); + std::unique_ptr block_data_float = std::make_unique(block_size * dim); + + size_t num_blocks = DIV_ROUND_UP(num_points, block_size); + + for (size_t block = 0; block < num_blocks; block++) + { + size_t start_id = block * block_size; + size_t end_id = (std::min)((block + 1) * block_size, num_points); + size_t cur_blk_size = end_id - start_id; + + base_reader.read((char *)block_data_T.get(), sizeof(T) * (cur_blk_size * dim)); + diskann::convert_types(block_data_T.get(), block_data_float.get(), cur_blk_size, dim); + + math_utils::compute_closest_centers(block_data_float.get(), cur_blk_size, dim, pivots, num_centers, k_base, + block_closest_centers.get()); + + for (size_t p = 0; p < cur_blk_size; p++) + { + for (size_t p1 = 0; p1 < k_base; p1++) + { + size_t shard_id = block_closest_centers[p * k_base + p1]; + uint32_t original_point_map_id = (uint32_t)(start_id + p); + shard_data_writer[shard_id].write((char *)(block_data_T.get() + p * dim), sizeof(T) * dim); + shard_idmap_writer[shard_id].write((char *)&original_point_map_id, sizeof(uint32_t)); + shard_counts[shard_id]++; + } + } + } + + size_t total_count = 0; + diskann::cout << "Actual shard sizes: " << std::flush; + for (size_t i = 0; i < num_centers; i++) + { + uint32_t cur_shard_count = (uint32_t)shard_counts[i]; + total_count += cur_shard_count; + diskann::cout << cur_shard_count << " "; + shard_data_writer[i].seekp(0); + shard_data_writer[i].write((char *)&cur_shard_count, sizeof(uint32_t)); + shard_data_writer[i].close(); + shard_idmap_writer[i].seekp(0); + shard_idmap_writer[i].write((char *)&cur_shard_count, sizeof(uint32_t)); + shard_idmap_writer[i].close(); + } + + diskann::cout << "\n Partitioned " << num_points << " with replication factor " << k_base << " to get " + << total_count << " points across " << num_centers << " shards " << std::endl; + return 0; +} + +// useful for partitioning large dataset. we first generate only the IDS for +// each shard, and retrieve the actual vectors on demand. +template +int shard_data_into_clusters_only_ids(const std::string data_file, float *pivots, const size_t num_centers, + const size_t dim, const size_t k_base, std::string prefix_path) +{ + size_t read_blk_size = 64 * 1024 * 1024; + // uint64_t write_blk_size = 64 * 1024 * 1024; + // create cached reader + writer + cached_ifstream base_reader(data_file, read_blk_size); + uint32_t npts32; + uint32_t basedim32; + base_reader.read((char *)&npts32, sizeof(uint32_t)); + base_reader.read((char *)&basedim32, sizeof(uint32_t)); + size_t num_points = npts32; + if (basedim32 != dim) + { + diskann::cout << "Error. dimensions dont match for train set and base set" << std::endl; + return -1; + } + + std::unique_ptr shard_counts = std::make_unique(num_centers); + + std::vector shard_idmap_writer(num_centers); + uint32_t dummy_size = 0; + uint32_t const_one = 1; + + for (size_t i = 0; i < num_centers; i++) + { + std::string idmap_filename = prefix_path + "_subshard-" + std::to_string(i) + "_ids_uint32.bin"; + shard_idmap_writer[i] = std::ofstream(idmap_filename.c_str(), std::ios::binary); + shard_idmap_writer[i].write((char *)&dummy_size, sizeof(uint32_t)); + shard_idmap_writer[i].write((char *)&const_one, sizeof(uint32_t)); + shard_counts[i] = 0; + } + + size_t block_size = num_points <= BLOCK_SIZE ? num_points : BLOCK_SIZE; + std::unique_ptr block_closest_centers = std::make_unique(block_size * k_base); + std::unique_ptr block_data_T = std::make_unique(block_size * dim); + std::unique_ptr block_data_float = std::make_unique(block_size * dim); + + size_t num_blocks = DIV_ROUND_UP(num_points, block_size); + + for (size_t block = 0; block < num_blocks; block++) + { + size_t start_id = block * block_size; + size_t end_id = (std::min)((block + 1) * block_size, num_points); + size_t cur_blk_size = end_id - start_id; + + base_reader.read((char *)block_data_T.get(), sizeof(T) * (cur_blk_size * dim)); + diskann::convert_types(block_data_T.get(), block_data_float.get(), cur_blk_size, dim); + + math_utils::compute_closest_centers(block_data_float.get(), cur_blk_size, dim, pivots, num_centers, k_base, + block_closest_centers.get()); + + for (size_t p = 0; p < cur_blk_size; p++) + { + for (size_t p1 = 0; p1 < k_base; p1++) + { + size_t shard_id = block_closest_centers[p * k_base + p1]; + uint32_t original_point_map_id = (uint32_t)(start_id + p); + shard_idmap_writer[shard_id].write((char *)&original_point_map_id, sizeof(uint32_t)); + shard_counts[shard_id]++; + } + } + } + + size_t total_count = 0; + diskann::cout << "Actual shard sizes: " << std::flush; + for (size_t i = 0; i < num_centers; i++) + { + uint32_t cur_shard_count = (uint32_t)shard_counts[i]; + total_count += cur_shard_count; + diskann::cout << cur_shard_count << " "; + shard_idmap_writer[i].seekp(0); + shard_idmap_writer[i].write((char *)&cur_shard_count, sizeof(uint32_t)); + shard_idmap_writer[i].close(); + } + + diskann::cout << "\n Partitioned " << num_points << " with replication factor " << k_base << " to get " + << total_count << " points across " << num_centers << " shards " << std::endl; + return 0; +} + +template +int retrieve_shard_data_from_ids(const std::string data_file, std::string idmap_filename, std::string data_filename) +{ + size_t read_blk_size = 64 * 1024 * 1024; + // uint64_t write_blk_size = 64 * 1024 * 1024; + // create cached reader + writer + cached_ifstream base_reader(data_file, read_blk_size); + uint32_t npts32; + uint32_t basedim32; + base_reader.read((char *)&npts32, sizeof(uint32_t)); + base_reader.read((char *)&basedim32, sizeof(uint32_t)); + size_t num_points = npts32; + size_t dim = basedim32; + + uint32_t dummy_size = 0; + + std::ofstream shard_data_writer(data_filename.c_str(), std::ios::binary); + shard_data_writer.write((char *)&dummy_size, sizeof(uint32_t)); + shard_data_writer.write((char *)&basedim32, sizeof(uint32_t)); + + uint32_t *shard_ids; + size_t shard_size, tmp; + diskann::load_bin(idmap_filename, shard_ids, shard_size, tmp); + + uint32_t cur_pos = 0; + uint32_t num_written = 0; + std::cout << "Shard has " << shard_size << " points" << std::endl; + + size_t block_size = num_points <= BLOCK_SIZE ? num_points : BLOCK_SIZE; + std::unique_ptr block_data_T = std::make_unique(block_size * dim); + + size_t num_blocks = DIV_ROUND_UP(num_points, block_size); + + for (size_t block = 0; block < num_blocks; block++) + { + size_t start_id = block * block_size; + size_t end_id = (std::min)((block + 1) * block_size, num_points); + size_t cur_blk_size = end_id - start_id; + + base_reader.read((char *)block_data_T.get(), sizeof(T) * (cur_blk_size * dim)); + + for (size_t p = 0; p < cur_blk_size; p++) + { + uint32_t original_point_map_id = (uint32_t)(start_id + p); + if (cur_pos == shard_size) + break; + if (original_point_map_id == shard_ids[cur_pos]) + { + cur_pos++; + shard_data_writer.write((char *)(block_data_T.get() + p * dim), sizeof(T) * dim); + num_written++; + } + } + if (cur_pos == shard_size) + break; + } + + diskann::cout << "Written file with " << num_written << " points" << std::endl; + + shard_data_writer.seekp(0); + shard_data_writer.write((char *)&num_written, sizeof(uint32_t)); + shard_data_writer.close(); + delete[] shard_ids; + return 0; +} + +// partitions a large base file into many shards using k-means hueristic +// on a random sample generated using sampling_rate probability. After this, it +// assignes each base point to the closest k_base nearest centers and creates +// the shards. +// The total number of points across all shards will be k_base * num_points. + +template +int partition(const std::string data_file, const float sampling_rate, size_t num_parts, size_t max_k_means_reps, + const std::string prefix_path, size_t k_base) +{ + size_t train_dim; + size_t num_train; + float *train_data_float; + + gen_random_slice(data_file, sampling_rate, train_data_float, num_train, train_dim); + + float *pivot_data; + + std::string cur_file = std::string(prefix_path); + std::string output_file; + + // kmeans_partitioning on training data + + // cur_file = cur_file + "_kmeans_partitioning-" + + // std::to_string(num_parts); + output_file = cur_file + "_centroids.bin"; + + pivot_data = new float[num_parts * train_dim]; + + // Process Global k-means for kmeans_partitioning Step + diskann::cout << "Processing global k-means (kmeans_partitioning Step)" << std::endl; + kmeans::kmeanspp_selecting_pivots(train_data_float, num_train, train_dim, pivot_data, num_parts); + + kmeans::run_lloyds(train_data_float, num_train, train_dim, pivot_data, num_parts, max_k_means_reps, NULL, NULL); + + diskann::cout << "Saving global k-center pivots" << std::endl; + diskann::save_bin(output_file.c_str(), pivot_data, (size_t)num_parts, train_dim); + + // now pivots are ready. need to stream base points and assign them to + // closest clusters. + + shard_data_into_clusters(data_file, pivot_data, num_parts, train_dim, k_base, prefix_path); + delete[] pivot_data; + delete[] train_data_float; + return 0; +} + +template +int partition_with_ram_budget(const std::string data_file, const double sampling_rate, double ram_budget, + size_t graph_degree, const std::string prefix_path, size_t k_base) +{ + size_t train_dim; + size_t num_train; + float *train_data_float; + size_t max_k_means_reps = 10; + + int num_parts = 3; + bool fit_in_ram = false; + + gen_random_slice(data_file, sampling_rate, train_data_float, num_train, train_dim); + + size_t test_dim; + size_t num_test; + float *test_data_float; + gen_random_slice(data_file, sampling_rate, test_data_float, num_test, test_dim); + + float *pivot_data = nullptr; + + std::string cur_file = std::string(prefix_path); + std::string output_file; + + // kmeans_partitioning on training data + + // cur_file = cur_file + "_kmeans_partitioning-" + + // std::to_string(num_parts); + output_file = cur_file + "_centroids.bin"; + + while (!fit_in_ram) + { + fit_in_ram = true; + + double max_ram_usage = 0; + if (pivot_data != nullptr) + delete[] pivot_data; + + pivot_data = new float[num_parts * train_dim]; + // Process Global k-means for kmeans_partitioning Step + diskann::cout << "Processing global k-means (kmeans_partitioning Step)" << std::endl; + kmeans::kmeanspp_selecting_pivots(train_data_float, num_train, train_dim, pivot_data, num_parts); + + kmeans::run_lloyds(train_data_float, num_train, train_dim, pivot_data, num_parts, max_k_means_reps, NULL, NULL); + + // now pivots are ready. need to stream base points and assign them to + // closest clusters. + + std::vector cluster_sizes; + estimate_cluster_sizes(test_data_float, num_test, pivot_data, num_parts, train_dim, k_base, cluster_sizes); + + for (auto &p : cluster_sizes) + { + // to account for the fact that p is the size of the shard over the + // testing sample. + p = (uint64_t)(p / sampling_rate); + double cur_shard_ram_estimate = + diskann::estimate_ram_usage(p, (uint32_t)train_dim, sizeof(T), (uint32_t)graph_degree); + + if (cur_shard_ram_estimate > max_ram_usage) + max_ram_usage = cur_shard_ram_estimate; + } + diskann::cout << "With " << num_parts + << " parts, max estimated RAM usage: " << max_ram_usage / (1024 * 1024 * 1024) + << "GB, budget given is " << ram_budget << std::endl; + if (max_ram_usage > 1024 * 1024 * 1024 * ram_budget) + { + fit_in_ram = false; + num_parts += 2; + } + } + + diskann::cout << "Saving global k-center pivots" << std::endl; + diskann::save_bin(output_file.c_str(), pivot_data, (size_t)num_parts, train_dim); + + shard_data_into_clusters_only_ids(data_file, pivot_data, num_parts, train_dim, k_base, prefix_path); + delete[] pivot_data; + delete[] train_data_float; + delete[] test_data_float; + return num_parts; +} + +// Instantations of supported templates + +template void DISKANN_DLLEXPORT gen_random_slice(const std::string base_file, const std::string output_prefix, + double sampling_rate); +template void DISKANN_DLLEXPORT gen_random_slice(const std::string base_file, const std::string output_prefix, + double sampling_rate); +template void DISKANN_DLLEXPORT gen_random_slice(const std::string base_file, const std::string output_prefix, + double sampling_rate); + +template void DISKANN_DLLEXPORT gen_random_slice(const float *inputdata, size_t npts, size_t ndims, double p_val, + float *&sampled_data, size_t &slice_size); +template void DISKANN_DLLEXPORT gen_random_slice(const uint8_t *inputdata, size_t npts, size_t ndims, + double p_val, float *&sampled_data, size_t &slice_size); +template void DISKANN_DLLEXPORT gen_random_slice(const int8_t *inputdata, size_t npts, size_t ndims, + double p_val, float *&sampled_data, size_t &slice_size); + +template void DISKANN_DLLEXPORT gen_random_slice(const std::string data_file, double p_val, float *&sampled_data, + size_t &slice_size, size_t &ndims); +template void DISKANN_DLLEXPORT gen_random_slice(const std::string data_file, double p_val, + float *&sampled_data, size_t &slice_size, size_t &ndims); +template void DISKANN_DLLEXPORT gen_random_slice(const std::string data_file, double p_val, + float *&sampled_data, size_t &slice_size, size_t &ndims); + +template DISKANN_DLLEXPORT int partition(const std::string data_file, const float sampling_rate, + size_t num_centers, size_t max_k_means_reps, + const std::string prefix_path, size_t k_base); +template DISKANN_DLLEXPORT int partition(const std::string data_file, const float sampling_rate, + size_t num_centers, size_t max_k_means_reps, + const std::string prefix_path, size_t k_base); +template DISKANN_DLLEXPORT int partition(const std::string data_file, const float sampling_rate, + size_t num_centers, size_t max_k_means_reps, + const std::string prefix_path, size_t k_base); + +template DISKANN_DLLEXPORT int partition_with_ram_budget(const std::string data_file, + const double sampling_rate, double ram_budget, + size_t graph_degree, const std::string prefix_path, + size_t k_base); +template DISKANN_DLLEXPORT int partition_with_ram_budget(const std::string data_file, + const double sampling_rate, double ram_budget, + size_t graph_degree, const std::string prefix_path, + size_t k_base); +template DISKANN_DLLEXPORT int partition_with_ram_budget(const std::string data_file, const double sampling_rate, + double ram_budget, size_t graph_degree, + const std::string prefix_path, size_t k_base); + +template DISKANN_DLLEXPORT int retrieve_shard_data_from_ids(const std::string data_file, + std::string idmap_filename, + std::string data_filename); +template DISKANN_DLLEXPORT int retrieve_shard_data_from_ids(const std::string data_file, + std::string idmap_filename, + std::string data_filename); +template DISKANN_DLLEXPORT int retrieve_shard_data_from_ids(const std::string data_file, + std::string idmap_filename, + std::string data_filename); diff --git a/packages/leann-backend-diskann/third_party/DiskANN/src/pq.cpp b/packages/leann-backend-diskann/third_party/DiskANN/src/pq.cpp new file mode 100644 index 0000000..d8fbc7f --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/src/pq.cpp @@ -0,0 +1,1214 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT license. + +#ifdef __APPLE__ +#include +#else +#include "mkl.h" +#if defined(DISKANN_RELEASE_UNUSED_TCMALLOC_MEMORY_AT_CHECKPOINTS) && defined(DISKANN_BUILD) +#include "gperftools/malloc_extension.h" +#endif +#endif +#include "pq.h" +#include "partition.h" +#include "math_utils.h" +#include "tsl/robin_map.h" + +// block size for reading/processing large files and matrices in blocks +#define BLOCK_SIZE 5000000 + +namespace diskann +{ + +#ifdef __APPLE__ +typedef long long int MKL_INT; +#endif + +FixedChunkPQTable::FixedChunkPQTable() +{ +} + +FixedChunkPQTable::~FixedChunkPQTable() +{ +#ifndef EXEC_ENV_OLS + if (tables != nullptr) + delete[] tables; + if (tables_tr != nullptr) + delete[] tables_tr; + if (chunk_offsets != nullptr) + delete[] chunk_offsets; + if (centroid != nullptr) + delete[] centroid; + if (rotmat_tr != nullptr) + delete[] rotmat_tr; +#endif +} + +#ifdef EXEC_ENV_OLS +void FixedChunkPQTable::load_pq_centroid_bin(MemoryMappedFiles &files, const char *pq_table_file, size_t num_chunks) +{ +#else +void FixedChunkPQTable::load_pq_centroid_bin(const char *pq_table_file, size_t num_chunks) +{ +#endif + + size_t nr, nc; + std::string rotmat_file = std::string(pq_table_file) + "_rotation_matrix.bin"; + +#ifdef EXEC_ENV_OLS + size_t *file_offset_data; // since load_bin only sets the pointer, no need + // to delete. + diskann::load_bin(files, pq_table_file, file_offset_data, nr, nc); +#else + std::unique_ptr file_offset_data; + diskann::load_bin(pq_table_file, file_offset_data, nr, nc); +#endif + + bool use_old_filetype = false; + + if (nr != 4 && nr != 5) + { + diskann::cout << "Error reading pq_pivots file " << pq_table_file + << ". Offsets dont contain correct metadata, # offsets = " << nr << ", but expecting " << 4 + << " or " << 5; + throw diskann::ANNException("Error reading pq_pivots file at offsets data.", -1, __FUNCSIG__, __FILE__, + __LINE__); + } + + if (nr == 4) + { + diskann::cout << "Offsets: " << file_offset_data[0] << " " << file_offset_data[1] << " " << file_offset_data[2] + << " " << file_offset_data[3] << std::endl; + } + else if (nr == 5) + { + use_old_filetype = true; + diskann::cout << "Offsets: " << file_offset_data[0] << " " << file_offset_data[1] << " " << file_offset_data[2] + << " " << file_offset_data[3] << file_offset_data[4] << std::endl; + } + else + { + throw diskann::ANNException("Wrong number of offsets in pq_pivots", -1, __FUNCSIG__, __FILE__, __LINE__); + } + +#ifdef EXEC_ENV_OLS + + diskann::load_bin(files, pq_table_file, tables, nr, nc, file_offset_data[0]); +#else + diskann::load_bin(pq_table_file, tables, nr, nc, file_offset_data[0]); +#endif + + if ((nr != NUM_PQ_CENTROIDS)) + { + diskann::cout << "Error reading pq_pivots file " << pq_table_file << ". file_num_centers = " << nr + << " but expecting " << NUM_PQ_CENTROIDS << " centers"; + throw diskann::ANNException("Error reading pq_pivots file at pivots data.", -1, __FUNCSIG__, __FILE__, + __LINE__); + } + + this->ndims = nc; + +#ifdef EXEC_ENV_OLS + diskann::load_bin(files, pq_table_file, centroid, nr, nc, file_offset_data[1]); +#else + diskann::load_bin(pq_table_file, centroid, nr, nc, file_offset_data[1]); +#endif + + if ((nr != this->ndims) || (nc != 1)) + { + diskann::cerr << "Error reading centroids from pq_pivots file " << pq_table_file << ". file_dim = " << nr + << ", file_cols = " << nc << " but expecting " << this->ndims << " entries in 1 dimension."; + throw diskann::ANNException("Error reading pq_pivots file at centroid data.", -1, __FUNCSIG__, __FILE__, + __LINE__); + } + + int chunk_offsets_index = 2; + if (use_old_filetype) + { + chunk_offsets_index = 3; + } +#ifdef EXEC_ENV_OLS + diskann::load_bin(files, pq_table_file, chunk_offsets, nr, nc, file_offset_data[chunk_offsets_index]); +#else + diskann::load_bin(pq_table_file, chunk_offsets, nr, nc, file_offset_data[chunk_offsets_index]); +#endif + + if (nc != 1 || (nr != num_chunks + 1 && num_chunks != 0)) + { + diskann::cerr << "Error loading chunk offsets file. numc: " << nc << " (should be 1). numr: " << nr + << " (should be " << num_chunks + 1 << " or 0 if we need to infer)" << std::endl; + throw diskann::ANNException("Error loading chunk offsets file", -1, __FUNCSIG__, __FILE__, __LINE__); + } + + this->n_chunks = nr - 1; + diskann::cout << "Loaded PQ Pivots: #ctrs: " << NUM_PQ_CENTROIDS << ", #dims: " << this->ndims + << ", #chunks: " << this->n_chunks << std::endl; + +#ifdef EXEC_ENV_OLS + if (files.fileExists(rotmat_file)) + { + diskann::load_bin(files, rotmat_file, (float *&)rotmat_tr, nr, nc); +#else + if (file_exists(rotmat_file)) + { + diskann::load_bin(rotmat_file, rotmat_tr, nr, nc); +#endif + if (nr != this->ndims || nc != this->ndims) + { + diskann::cerr << "Error loading rotation matrix file" << std::endl; + throw diskann::ANNException("Error loading rotation matrix file", -1, __FUNCSIG__, __FILE__, __LINE__); + } + use_rotation = true; + } + + // alloc and compute transpose + tables_tr = new float[256 * this->ndims]; + for (size_t i = 0; i < 256; i++) + { + for (size_t j = 0; j < this->ndims; j++) + { + tables_tr[j * 256 + i] = tables[i * this->ndims + j]; + } + } +} + +uint32_t FixedChunkPQTable::get_num_chunks() +{ + return static_cast(n_chunks); +} + +void FixedChunkPQTable::preprocess_query(float *query_vec) +{ + for (uint32_t d = 0; d < ndims; d++) + { + query_vec[d] -= centroid[d]; + } + std::vector tmp(ndims, 0); + if (use_rotation) + { + for (uint32_t d = 0; d < ndims; d++) + { + for (uint32_t d1 = 0; d1 < ndims; d1++) + { + tmp[d] += query_vec[d1] * rotmat_tr[d1 * ndims + d]; + } + } + std::memcpy(query_vec, tmp.data(), ndims * sizeof(float)); + } +} + +// assumes pre-processed query +void FixedChunkPQTable::populate_chunk_distances(const float *query_vec, float *dist_vec) +{ + memset(dist_vec, 0, 256 * n_chunks * sizeof(float)); + // chunk wise distance computation + for (size_t chunk = 0; chunk < n_chunks; chunk++) + { + // sum (q-c)^2 for the dimensions associated with this chunk + float *chunk_dists = dist_vec + (256 * chunk); + for (size_t j = chunk_offsets[chunk]; j < chunk_offsets[chunk + 1]; j++) + { + const float *centers_dim_vec = tables_tr + (256 * j); + for (size_t idx = 0; idx < 256; idx++) + { + double diff = centers_dim_vec[idx] - (query_vec[j]); + chunk_dists[idx] += (float)(diff * diff); + } + } + } +} + +float FixedChunkPQTable::l2_distance(const float *query_vec, uint8_t *base_vec) +{ + float res = 0; + for (size_t chunk = 0; chunk < n_chunks; chunk++) + { + for (size_t j = chunk_offsets[chunk]; j < chunk_offsets[chunk + 1]; j++) + { + const float *centers_dim_vec = tables_tr + (256 * j); + float diff = centers_dim_vec[base_vec[chunk]] - (query_vec[j]); + res += diff * diff; + } + } + return res; +} + +float FixedChunkPQTable::inner_product(const float *query_vec, uint8_t *base_vec) +{ + float res = 0; + for (size_t chunk = 0; chunk < n_chunks; chunk++) + { + for (size_t j = chunk_offsets[chunk]; j < chunk_offsets[chunk + 1]; j++) + { + const float *centers_dim_vec = tables_tr + (256 * j); + float diff = centers_dim_vec[base_vec[chunk]] * query_vec[j]; // assumes centroid is 0 to + // prevent translation errors + res += diff; + } + } + return -res; // returns negative value to simulate distances (max -> min + // conversion) +} + +// assumes no rotation is involved +void FixedChunkPQTable::inflate_vector(uint8_t *base_vec, float *out_vec) +{ + for (size_t chunk = 0; chunk < n_chunks; chunk++) + { + for (size_t j = chunk_offsets[chunk]; j < chunk_offsets[chunk + 1]; j++) + { + const float *centers_dim_vec = tables_tr + (256 * j); + out_vec[j] = centers_dim_vec[base_vec[chunk]] + centroid[j]; + } + } +} + +void FixedChunkPQTable::populate_chunk_inner_products(const float *query_vec, float *dist_vec) +{ + memset(dist_vec, 0, 256 * n_chunks * sizeof(float)); + // chunk wise distance computation + for (size_t chunk = 0; chunk < n_chunks; chunk++) + { + // sum (q-c)^2 for the dimensions associated with this chunk + float *chunk_dists = dist_vec + (256 * chunk); + for (size_t j = chunk_offsets[chunk]; j < chunk_offsets[chunk + 1]; j++) + { + const float *centers_dim_vec = tables_tr + (256 * j); + for (size_t idx = 0; idx < 256; idx++) + { + double prod = centers_dim_vec[idx] * query_vec[j]; // assumes that we are not + // shifting the vectors to + // mean zero, i.e., centroid + // array should be all zeros + chunk_dists[idx] -= (float)prod; // returning negative to keep the search code + // clean (max inner product vs min distance) + } + } + } +} + +void aggregate_coords(const std::vector &ids, const uint8_t *all_coords, const uint64_t ndims, uint8_t *out) +{ + for (size_t i = 0; i < ids.size(); i++) + { + memcpy(out + i * ndims, all_coords + ids[i] * ndims, ndims * sizeof(uint8_t)); + } +} + +void pq_dist_lookup(const uint8_t *pq_ids, const size_t n_pts, const size_t pq_nchunks, const float *pq_dists, + std::vector &dists_out) +{ + //_mm_prefetch((char*) dists_out, _MM_HINT_T0); + _mm_prefetch((char *)pq_ids, _MM_HINT_T0); + _mm_prefetch((char *)(pq_ids + 64), _MM_HINT_T0); + _mm_prefetch((char *)(pq_ids + 128), _MM_HINT_T0); + dists_out.clear(); + dists_out.resize(n_pts, 0); + for (size_t chunk = 0; chunk < pq_nchunks; chunk++) + { + const float *chunk_dists = pq_dists + 256 * chunk; + if (chunk < pq_nchunks - 1) + { + _mm_prefetch((char *)(chunk_dists + 256), _MM_HINT_T0); + } + for (size_t idx = 0; idx < n_pts; idx++) + { + uint8_t pq_centerid = pq_ids[pq_nchunks * idx + chunk]; + dists_out[idx] += chunk_dists[pq_centerid]; + } + } +} + +// Need to replace calls to these functions with calls to vector& based +// functions above +void aggregate_coords(const uint32_t *ids, const uint64_t n_ids, const uint8_t *all_coords, const uint64_t ndims, + uint8_t *out) +{ + for (size_t i = 0; i < n_ids; i++) + { + memcpy(out + i * ndims, all_coords + ids[i] * ndims, ndims * sizeof(uint8_t)); + } +} + +void pq_dist_lookup(const uint8_t *pq_ids, const size_t n_pts, const size_t pq_nchunks, const float *pq_dists, + float *dists_out) +{ + _mm_prefetch((char *)dists_out, _MM_HINT_T0); + _mm_prefetch((char *)pq_ids, _MM_HINT_T0); + _mm_prefetch((char *)(pq_ids + 64), _MM_HINT_T0); + _mm_prefetch((char *)(pq_ids + 128), _MM_HINT_T0); + memset(dists_out, 0, n_pts * sizeof(float)); + for (size_t chunk = 0; chunk < pq_nchunks; chunk++) + { + const float *chunk_dists = pq_dists + 256 * chunk; + if (chunk < pq_nchunks - 1) + { + _mm_prefetch((char *)(chunk_dists + 256), _MM_HINT_T0); + } + for (size_t idx = 0; idx < n_pts; idx++) + { + uint8_t pq_centerid = pq_ids[pq_nchunks * idx + chunk]; + dists_out[idx] += chunk_dists[pq_centerid]; + } + } +} + +// generate_pq_pivots_simplified is a simplified version of generate_pq_pivots. +// Input is provided in the in-memory buffer train_data. +// Output is stored in the in-memory buffer pivot_data_vector. +// Simplification is based on the following assumptions: +// dim % num_pq_chunks == 0 +// num_centers == 256 by default +// KMEANS_ITERS_FOR_PQ == 15 by default +// make_zero_mean is false by default. +// These assumptions allow to make the function much simpler and avoid storing +// array of chunk_offsets and centroids. +// The compiler pragma for multi-threading support is removed from this implementation +// for the purpose of integration into systems that strictly control resource allocation. +int generate_pq_pivots_simplified(const float *train_data, size_t num_train, size_t dim, size_t num_pq_chunks, + std::vector &pivot_data_vector) +{ + if (num_pq_chunks > dim || dim % num_pq_chunks != 0) + { + return -1; + } + + const size_t num_centers = 256; + const size_t cur_chunk_size = dim / num_pq_chunks; + const uint32_t KMEANS_ITERS_FOR_PQ = 15; + + pivot_data_vector.resize(num_centers * dim); + std::vector cur_pivot_data_vector(num_centers * cur_chunk_size); + std::vector cur_data_vector(num_train * cur_chunk_size); + std::vector closest_center_vector(num_train); + + float *pivot_data = &pivot_data_vector[0]; + float *cur_pivot_data = &cur_pivot_data_vector[0]; + float *cur_data = &cur_data_vector[0]; + uint32_t *closest_center = &closest_center_vector[0]; + + for (size_t i = 0; i < num_pq_chunks; i++) + { + size_t chunk_offset = cur_chunk_size * i; + + for (int32_t j = 0; j < num_train; j++) + { + std::memcpy(cur_data + j * cur_chunk_size, train_data + j * dim + chunk_offset, + cur_chunk_size * sizeof(float)); + } + + kmeans::kmeanspp_selecting_pivots(cur_data, num_train, cur_chunk_size, cur_pivot_data, num_centers); + + kmeans::run_lloyds(cur_data, num_train, cur_chunk_size, cur_pivot_data, num_centers, KMEANS_ITERS_FOR_PQ, NULL, + closest_center); + + for (uint64_t j = 0; j < num_centers; j++) + { + std::memcpy(pivot_data + j * dim + chunk_offset, cur_pivot_data + j * cur_chunk_size, + cur_chunk_size * sizeof(float)); + } + } + + return 0; +} + +// given training data in train_data of dimensions num_train * dim, generate +// PQ pivots using k-means algorithm to partition the co-ordinates into +// num_pq_chunks (if it divides dimension, else rounded) chunks, and runs +// k-means in each chunk to compute the PQ pivots and stores in bin format in +// file pq_pivots_path as a s num_centers*dim floating point binary file +int generate_pq_pivots(const float *const passed_train_data, size_t num_train, uint32_t dim, uint32_t num_centers, + uint32_t num_pq_chunks, uint32_t max_k_means_reps, std::string pq_pivots_path, + bool make_zero_mean) +{ + if (num_pq_chunks > dim) + { + diskann::cout << " Error: number of chunks more than dimension" << std::endl; + return -1; + } + + std::unique_ptr train_data = std::make_unique(num_train * dim); + std::memcpy(train_data.get(), passed_train_data, num_train * dim * sizeof(float)); + + std::unique_ptr full_pivot_data; + + if (file_exists(pq_pivots_path)) + { + size_t file_dim, file_num_centers; + diskann::load_bin(pq_pivots_path, full_pivot_data, file_num_centers, file_dim, METADATA_SIZE); + if (file_dim == dim && file_num_centers == num_centers) + { + diskann::cout << "PQ pivot file exists. Not generating again" << std::endl; + return -1; + } + } + + // Calculate centroid and center the training data + std::unique_ptr centroid = std::make_unique(dim); + for (uint64_t d = 0; d < dim; d++) + { + centroid[d] = 0; + } + if (make_zero_mean) + { // If we use L2 distance, there is an option to + // translate all vectors to make them centered and + // then compute PQ. This needs to be set to false + // when using PQ for MIPS as such translations dont + // preserve inner products. + for (uint64_t d = 0; d < dim; d++) + { + for (uint64_t p = 0; p < num_train; p++) + { + centroid[d] += train_data[p * dim + d]; + } + centroid[d] /= num_train; + } + + for (uint64_t d = 0; d < dim; d++) + { + for (uint64_t p = 0; p < num_train; p++) + { + train_data[p * dim + d] -= centroid[d]; + } + } + } + + std::vector chunk_offsets; + + size_t low_val = (size_t)std::floor((double)dim / (double)num_pq_chunks); + size_t high_val = (size_t)std::ceil((double)dim / (double)num_pq_chunks); + size_t max_num_high = dim - (low_val * num_pq_chunks); + size_t cur_num_high = 0; + size_t cur_bin_threshold = high_val; + + std::vector> bin_to_dims(num_pq_chunks); + tsl::robin_map dim_to_bin; + std::vector bin_loads(num_pq_chunks, 0); + + // Process dimensions not inserted by previous loop + for (uint32_t d = 0; d < dim; d++) + { + if (dim_to_bin.find(d) != dim_to_bin.end()) + continue; + auto cur_best = num_pq_chunks + 1; + float cur_best_load = std::numeric_limits::max(); + for (uint32_t b = 0; b < num_pq_chunks; b++) + { + if (bin_loads[b] < cur_best_load && bin_to_dims[b].size() < cur_bin_threshold) + { + cur_best = b; + cur_best_load = bin_loads[b]; + } + } + bin_to_dims[cur_best].push_back(d); + if (bin_to_dims[cur_best].size() == high_val) + { + cur_num_high++; + if (cur_num_high == max_num_high) + cur_bin_threshold = low_val; + } + } + + chunk_offsets.clear(); + chunk_offsets.push_back(0); + + for (uint32_t b = 0; b < num_pq_chunks; b++) + { + if (b > 0) + chunk_offsets.push_back(chunk_offsets[b - 1] + (uint32_t)bin_to_dims[b - 1].size()); + } + chunk_offsets.push_back(dim); + + full_pivot_data.reset(new float[num_centers * dim]); + + for (size_t i = 0; i < num_pq_chunks; i++) + { + size_t cur_chunk_size = chunk_offsets[i + 1] - chunk_offsets[i]; + + if (cur_chunk_size == 0) + continue; + std::unique_ptr cur_pivot_data = std::make_unique(num_centers * cur_chunk_size); + std::unique_ptr cur_data = std::make_unique(num_train * cur_chunk_size); + std::unique_ptr closest_center = std::make_unique(num_train); + + diskann::cout << "Processing chunk " << i << " with dimensions [" << chunk_offsets[i] << ", " + << chunk_offsets[i + 1] << ")" << std::endl; + +#pragma omp parallel for schedule(static, 65536) + for (int64_t j = 0; j < (int64_t)num_train; j++) + { + std::memcpy(cur_data.get() + j * cur_chunk_size, train_data.get() + j * dim + chunk_offsets[i], + cur_chunk_size * sizeof(float)); + } + + kmeans::kmeanspp_selecting_pivots(cur_data.get(), num_train, cur_chunk_size, cur_pivot_data.get(), num_centers); + + kmeans::run_lloyds(cur_data.get(), num_train, cur_chunk_size, cur_pivot_data.get(), num_centers, + max_k_means_reps, NULL, closest_center.get()); + + for (uint64_t j = 0; j < num_centers; j++) + { + std::memcpy(full_pivot_data.get() + j * dim + chunk_offsets[i], cur_pivot_data.get() + j * cur_chunk_size, + cur_chunk_size * sizeof(float)); + } + } + + std::vector cumul_bytes(4, 0); + cumul_bytes[0] = METADATA_SIZE; + cumul_bytes[1] = cumul_bytes[0] + diskann::save_bin(pq_pivots_path.c_str(), full_pivot_data.get(), + (size_t)num_centers, dim, cumul_bytes[0]); + cumul_bytes[2] = cumul_bytes[1] + + diskann::save_bin(pq_pivots_path.c_str(), centroid.get(), (size_t)dim, 1, cumul_bytes[1]); + cumul_bytes[3] = cumul_bytes[2] + diskann::save_bin(pq_pivots_path.c_str(), chunk_offsets.data(), + chunk_offsets.size(), 1, cumul_bytes[2]); + diskann::save_bin(pq_pivots_path.c_str(), cumul_bytes.data(), cumul_bytes.size(), 1, 0); + + diskann::cout << "Saved pq pivot data to " << pq_pivots_path << " of size " << cumul_bytes[cumul_bytes.size() - 1] + << "B." << std::endl; + + return 0; +} + +int generate_opq_pivots(const float *passed_train_data, size_t num_train, uint32_t dim, uint32_t num_centers, + uint32_t num_pq_chunks, std::string opq_pivots_path, bool make_zero_mean) +{ + if (num_pq_chunks > dim) + { + diskann::cout << " Error: number of chunks more than dimension" << std::endl; + return -1; + } + + std::unique_ptr train_data = std::make_unique(num_train * dim); + std::memcpy(train_data.get(), passed_train_data, num_train * dim * sizeof(float)); + + std::unique_ptr rotated_train_data = std::make_unique(num_train * dim); + std::unique_ptr rotated_and_quantized_train_data = std::make_unique(num_train * dim); + + std::unique_ptr full_pivot_data; + + // rotation matrix for OPQ + std::unique_ptr rotmat_tr; + + // matrices for SVD + std::unique_ptr Umat = std::make_unique(dim * dim); + std::unique_ptr Vmat_T = std::make_unique(dim * dim); + std::unique_ptr singular_values = std::make_unique(dim); + std::unique_ptr correlation_matrix = std::make_unique(dim * dim); + + // Calculate centroid and center the training data + std::unique_ptr centroid = std::make_unique(dim); + for (uint64_t d = 0; d < dim; d++) + { + centroid[d] = 0; + } + if (make_zero_mean) + { // If we use L2 distance, there is an option to + // translate all vectors to make them centered and + // then compute PQ. This needs to be set to false + // when using PQ for MIPS as such translations dont + // preserve inner products. + for (uint64_t d = 0; d < dim; d++) + { + for (uint64_t p = 0; p < num_train; p++) + { + centroid[d] += train_data[p * dim + d]; + } + centroid[d] /= num_train; + } + for (uint64_t d = 0; d < dim; d++) + { + for (uint64_t p = 0; p < num_train; p++) + { + train_data[p * dim + d] -= centroid[d]; + } + } + } + + std::vector chunk_offsets; + + size_t low_val = (size_t)std::floor((double)dim / (double)num_pq_chunks); + size_t high_val = (size_t)std::ceil((double)dim / (double)num_pq_chunks); + size_t max_num_high = dim - (low_val * num_pq_chunks); + size_t cur_num_high = 0; + size_t cur_bin_threshold = high_val; + + std::vector> bin_to_dims(num_pq_chunks); + tsl::robin_map dim_to_bin; + std::vector bin_loads(num_pq_chunks, 0); + + // Process dimensions not inserted by previous loop + for (uint32_t d = 0; d < dim; d++) + { + if (dim_to_bin.find(d) != dim_to_bin.end()) + continue; + auto cur_best = num_pq_chunks + 1; + float cur_best_load = std::numeric_limits::max(); + for (uint32_t b = 0; b < num_pq_chunks; b++) + { + if (bin_loads[b] < cur_best_load && bin_to_dims[b].size() < cur_bin_threshold) + { + cur_best = b; + cur_best_load = bin_loads[b]; + } + } + bin_to_dims[cur_best].push_back(d); + if (bin_to_dims[cur_best].size() == high_val) + { + cur_num_high++; + if (cur_num_high == max_num_high) + cur_bin_threshold = low_val; + } + } + + chunk_offsets.clear(); + chunk_offsets.push_back(0); + + for (uint32_t b = 0; b < num_pq_chunks; b++) + { + if (b > 0) + chunk_offsets.push_back(chunk_offsets[b - 1] + (uint32_t)bin_to_dims[b - 1].size()); + } + chunk_offsets.push_back(dim); + + full_pivot_data.reset(new float[num_centers * dim]); + rotmat_tr.reset(new float[dim * dim]); + + std::memset(rotmat_tr.get(), 0, dim * dim * sizeof(float)); + for (uint32_t d1 = 0; d1 < dim; d1++) + *(rotmat_tr.get() + d1 * dim + d1) = 1; + + for (uint32_t rnd = 0; rnd < MAX_OPQ_ITERS; rnd++) + { + // rotate the training data using the current rotation matrix + cblas_sgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, (MKL_INT)num_train, (MKL_INT)dim, (MKL_INT)dim, 1.0f, + train_data.get(), (MKL_INT)dim, rotmat_tr.get(), (MKL_INT)dim, 0.0f, rotated_train_data.get(), + (MKL_INT)dim); + + // compute the PQ pivots on the rotated space + for (size_t i = 0; i < num_pq_chunks; i++) + { + size_t cur_chunk_size = chunk_offsets[i + 1] - chunk_offsets[i]; + + if (cur_chunk_size == 0) + continue; + std::unique_ptr cur_pivot_data = std::make_unique(num_centers * cur_chunk_size); + std::unique_ptr cur_data = std::make_unique(num_train * cur_chunk_size); + std::unique_ptr closest_center = std::make_unique(num_train); + + diskann::cout << "Processing chunk " << i << " with dimensions [" << chunk_offsets[i] << ", " + << chunk_offsets[i + 1] << ")" << std::endl; + +#pragma omp parallel for schedule(static, 65536) + for (int64_t j = 0; j < (int64_t)num_train; j++) + { + std::memcpy(cur_data.get() + j * cur_chunk_size, rotated_train_data.get() + j * dim + chunk_offsets[i], + cur_chunk_size * sizeof(float)); + } + + if (rnd == 0) + { + kmeans::kmeanspp_selecting_pivots(cur_data.get(), num_train, cur_chunk_size, cur_pivot_data.get(), + num_centers); + } + else + { + for (uint64_t j = 0; j < num_centers; j++) + { + std::memcpy(cur_pivot_data.get() + j * cur_chunk_size, + full_pivot_data.get() + j * dim + chunk_offsets[i], cur_chunk_size * sizeof(float)); + } + } + + uint32_t num_lloyds_iters = 8; + kmeans::run_lloyds(cur_data.get(), num_train, cur_chunk_size, cur_pivot_data.get(), num_centers, + num_lloyds_iters, NULL, closest_center.get()); + + for (uint64_t j = 0; j < num_centers; j++) + { + std::memcpy(full_pivot_data.get() + j * dim + chunk_offsets[i], + cur_pivot_data.get() + j * cur_chunk_size, cur_chunk_size * sizeof(float)); + } + + for (size_t j = 0; j < num_train; j++) + { + std::memcpy(rotated_and_quantized_train_data.get() + j * dim + chunk_offsets[i], + cur_pivot_data.get() + (size_t)closest_center[j] * cur_chunk_size, + cur_chunk_size * sizeof(float)); + } + } + + // compute the correlation matrix between the original data and the + // quantized data to compute the new rotation + cblas_sgemm(CblasRowMajor, CblasTrans, CblasNoTrans, (MKL_INT)dim, (MKL_INT)dim, (MKL_INT)num_train, 1.0f, + train_data.get(), (MKL_INT)dim, rotated_and_quantized_train_data.get(), (MKL_INT)dim, 0.0f, + correlation_matrix.get(), (MKL_INT)dim); + + // compute the SVD of the correlation matrix to help determine the new + // rotation matrix + +#ifdef __APPLE__ + uint32_t errcode = (uint32_t)LAPACKE_sgesdd(LAPACK_ROW_MAJOR, 'A', (clp_int)dim, (clp_int)dim, + correlation_matrix.get(), (clp_int)dim, singular_values.get(), + Umat.get(), (clp_int)dim, Vmat_T.get(), (clp_int)dim); + +#else + uint32_t errcode = (uint32_t)LAPACKE_sgesdd(LAPACK_ROW_MAJOR, 'A', (MKL_INT)dim, (MKL_INT)dim, + correlation_matrix.get(), (MKL_INT)dim, singular_values.get(), + Umat.get(), (MKL_INT)dim, Vmat_T.get(), (MKL_INT)dim); +#endif + + if (errcode > 0) + { + std::cout << "SVD failed to converge." << std::endl; + exit(-1); + } + + // compute the new rotation matrix from the singular vectors as R^T = U + // V^T + cblas_sgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, (MKL_INT)dim, (MKL_INT)dim, (MKL_INT)dim, 1.0f, + Umat.get(), (MKL_INT)dim, Vmat_T.get(), (MKL_INT)dim, 0.0f, rotmat_tr.get(), (MKL_INT)dim); + } + + std::vector cumul_bytes(4, 0); + cumul_bytes[0] = METADATA_SIZE; + cumul_bytes[1] = cumul_bytes[0] + diskann::save_bin(opq_pivots_path.c_str(), full_pivot_data.get(), + (size_t)num_centers, dim, cumul_bytes[0]); + cumul_bytes[2] = cumul_bytes[1] + + diskann::save_bin(opq_pivots_path.c_str(), centroid.get(), (size_t)dim, 1, cumul_bytes[1]); + cumul_bytes[3] = cumul_bytes[2] + diskann::save_bin(opq_pivots_path.c_str(), chunk_offsets.data(), + chunk_offsets.size(), 1, cumul_bytes[2]); + diskann::save_bin(opq_pivots_path.c_str(), cumul_bytes.data(), cumul_bytes.size(), 1, 0); + + diskann::cout << "Saved opq pivot data to " << opq_pivots_path << " of size " << cumul_bytes[cumul_bytes.size() - 1] + << "B." << std::endl; + + std::string rotmat_path = opq_pivots_path + "_rotation_matrix.bin"; + diskann::save_bin(rotmat_path.c_str(), rotmat_tr.get(), dim, dim); + + return 0; +} + +// generate_pq_data_from_pivots_simplified is a simplified version of generate_pq_data_from_pivots. +// Input is provided in the in-memory buffers data and pivot_data. +// Output is stored in the in-memory buffer pq. +// Simplification is based on the following assumptions: +// supporting only float data type +// dim % num_pq_chunks == 0, which results in a fixed chunk_size +// num_centers == 256 by default +// make_zero_mean is false by default. +// These assumptions allow to make the function much simpler and avoid using +// array of chunk_offsets and centroids. +// The compiler pragma for multi-threading support is removed from this implementation +// for the purpose of integration into systems that strictly control resource allocation. +int generate_pq_data_from_pivots_simplified(const float *data, const size_t num, const float *pivot_data, + const size_t pivots_num, const size_t dim, const size_t num_pq_chunks, + std::vector &pq) +{ + if (num_pq_chunks == 0 || num_pq_chunks > dim || dim % num_pq_chunks != 0) + { + return -1; + } + + const size_t num_centers = 256; + const size_t chunk_size = dim / num_pq_chunks; + + if (pivots_num != num_centers * dim) + { + return -1; + } + + pq.resize(num * num_pq_chunks); + + std::vector cur_pivot_vector(num_centers * chunk_size); + std::vector cur_data_vector(num * chunk_size); + std::vector closest_center_vector(num); + + float *cur_pivot_data = &cur_pivot_vector[0]; + float *cur_data = &cur_data_vector[0]; + uint32_t *closest_center = &closest_center_vector[0]; + + for (size_t i = 0; i < num_pq_chunks; i++) + { + const size_t chunk_offset = chunk_size * i; + + for (int j = 0; j < num_centers; j++) + { + std::memcpy(cur_pivot_data + j * chunk_size, pivot_data + j * dim + chunk_offset, + chunk_size * sizeof(float)); + } + + for (int j = 0; j < num; j++) + { + for (size_t k = 0; k < chunk_size; k++) + { + cur_data[j * chunk_size + k] = data[j * dim + chunk_offset + k]; + } + } + + math_utils::compute_closest_centers(cur_data, num, chunk_size, cur_pivot_data, num_centers, 1, closest_center); + + for (int j = 0; j < num; j++) + { + assert(closest_center[j] < num_centers); + pq[j * num_pq_chunks + i] = closest_center[j]; + } + } + + return 0; +} + +// streams the base file (data_file), and computes the closest centers in each +// chunk to generate the compressed data_file and stores it in +// pq_compressed_vectors_path. +// If the numbber of centers is < 256, it stores as byte vector, else as +// 4-byte vector in binary format. +template +int generate_pq_data_from_pivots(const std::string &data_file, uint32_t num_centers, uint32_t num_pq_chunks, + const std::string &pq_pivots_path, const std::string &pq_compressed_vectors_path, + bool use_opq) +{ + size_t read_blk_size = 64 * 1024 * 1024; + cached_ifstream base_reader(data_file, read_blk_size); + uint32_t npts32; + uint32_t basedim32; + base_reader.read((char *)&npts32, sizeof(uint32_t)); + base_reader.read((char *)&basedim32, sizeof(uint32_t)); + size_t num_points = npts32; + size_t dim = basedim32; + + std::unique_ptr full_pivot_data; + std::unique_ptr rotmat_tr; + std::unique_ptr centroid; + std::unique_ptr chunk_offsets; + + std::string inflated_pq_file = pq_compressed_vectors_path + "_inflated.bin"; + + if (!file_exists(pq_pivots_path)) + { + std::cout << "ERROR: PQ k-means pivot file not found" << std::endl; + throw diskann::ANNException("PQ k-means pivot file not found", -1); + } + else + { + size_t nr, nc; + std::unique_ptr file_offset_data; + + diskann::load_bin(pq_pivots_path.c_str(), file_offset_data, nr, nc, 0); + + if (nr != 4) + { + diskann::cout << "Error reading pq_pivots file " << pq_pivots_path + << ". Offsets dont contain correct metadata, # offsets = " << nr << ", but expecting 4."; + throw diskann::ANNException("Error reading pq_pivots file at offsets data.", -1, __FUNCSIG__, __FILE__, + __LINE__); + } + + diskann::load_bin(pq_pivots_path.c_str(), full_pivot_data, nr, nc, file_offset_data[0]); + + if ((nr != num_centers) || (nc != dim)) + { + diskann::cout << "Error reading pq_pivots file " << pq_pivots_path << ". file_num_centers = " << nr + << ", file_dim = " << nc << " but expecting " << num_centers << " centers in " << dim + << " dimensions."; + throw diskann::ANNException("Error reading pq_pivots file at pivots data.", -1, __FUNCSIG__, __FILE__, + __LINE__); + } + + diskann::load_bin(pq_pivots_path.c_str(), centroid, nr, nc, file_offset_data[1]); + + if ((nr != dim) || (nc != 1)) + { + diskann::cout << "Error reading pq_pivots file " << pq_pivots_path << ". file_dim = " << nr + << ", file_cols = " << nc << " but expecting " << dim << " entries in 1 dimension."; + throw diskann::ANNException("Error reading pq_pivots file at centroid data.", -1, __FUNCSIG__, __FILE__, + __LINE__); + } + + diskann::load_bin(pq_pivots_path.c_str(), chunk_offsets, nr, nc, file_offset_data[2]); + + if (nr != (uint64_t)num_pq_chunks + 1 || nc != 1) + { + diskann::cout << "Error reading pq_pivots file at chunk offsets; file has nr=" << nr << ",nc=" << nc + << ", expecting nr=" << num_pq_chunks + 1 << ", nc=1." << std::endl; + throw diskann::ANNException("Error reading pq_pivots file at chunk offsets.", -1, __FUNCSIG__, __FILE__, + __LINE__); + } + + if (use_opq) + { + std::string rotmat_path = pq_pivots_path + "_rotation_matrix.bin"; + diskann::load_bin(rotmat_path.c_str(), rotmat_tr, nr, nc); + if (nr != (uint64_t)dim || nc != dim) + { + diskann::cout << "Error reading rotation matrix file." << std::endl; + throw diskann::ANNException("Error reading rotation matrix file.", -1, __FUNCSIG__, __FILE__, __LINE__); + } + } + + diskann::cout << "Loaded PQ pivot information" << std::endl; + } + + std::ofstream compressed_file_writer(pq_compressed_vectors_path, std::ios::binary); + uint32_t num_pq_chunks_u32 = num_pq_chunks; + + compressed_file_writer.write((char *)&num_points, sizeof(uint32_t)); + compressed_file_writer.write((char *)&num_pq_chunks_u32, sizeof(uint32_t)); + + size_t block_size = num_points <= BLOCK_SIZE ? num_points : BLOCK_SIZE; + +#ifdef SAVE_INFLATED_PQ + std::ofstream inflated_file_writer(inflated_pq_file, std::ios::binary); + inflated_file_writer.write((char *)&num_points, sizeof(uint32_t)); + inflated_file_writer.write((char *)&basedim32, sizeof(uint32_t)); + + std::unique_ptr block_inflated_base = std::make_unique(block_size * dim); + std::memset(block_inflated_base.get(), 0, block_size * dim * sizeof(float)); +#endif + + std::unique_ptr block_compressed_base = + std::make_unique(block_size * (size_t)num_pq_chunks); + std::memset(block_compressed_base.get(), 0, block_size * (size_t)num_pq_chunks * sizeof(uint32_t)); + + std::unique_ptr block_data_T = std::make_unique(block_size * dim); + std::unique_ptr block_data_float = std::make_unique(block_size * dim); + std::unique_ptr block_data_tmp = std::make_unique(block_size * dim); + + size_t num_blocks = DIV_ROUND_UP(num_points, block_size); + + for (size_t block = 0; block < num_blocks; block++) + { + size_t start_id = block * block_size; + size_t end_id = (std::min)((block + 1) * block_size, num_points); + size_t cur_blk_size = end_id - start_id; + + base_reader.read((char *)(block_data_T.get()), sizeof(T) * (cur_blk_size * dim)); + diskann::convert_types(block_data_T.get(), block_data_tmp.get(), cur_blk_size, dim); + + diskann::cout << "Processing points [" << start_id << ", " << end_id << ").." << std::flush; + + for (size_t p = 0; p < cur_blk_size; p++) + { + for (uint64_t d = 0; d < dim; d++) + { + block_data_tmp[p * dim + d] -= centroid[d]; + } + } + + for (size_t p = 0; p < cur_blk_size; p++) + { + for (uint64_t d = 0; d < dim; d++) + { + block_data_float[p * dim + d] = block_data_tmp[p * dim + d]; + } + } + + if (use_opq) + { + // rotate the current block with the trained rotation matrix before + // PQ + cblas_sgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, (MKL_INT)cur_blk_size, (MKL_INT)dim, (MKL_INT)dim, + 1.0f, block_data_float.get(), (MKL_INT)dim, rotmat_tr.get(), (MKL_INT)dim, 0.0f, + block_data_tmp.get(), (MKL_INT)dim); + std::memcpy(block_data_float.get(), block_data_tmp.get(), cur_blk_size * dim * sizeof(float)); + } + + for (size_t i = 0; i < num_pq_chunks; i++) + { + size_t cur_chunk_size = chunk_offsets[i + 1] - chunk_offsets[i]; + if (cur_chunk_size == 0) + continue; + + std::unique_ptr cur_pivot_data = std::make_unique(num_centers * cur_chunk_size); + std::unique_ptr cur_data = std::make_unique(cur_blk_size * cur_chunk_size); + std::unique_ptr closest_center = std::make_unique(cur_blk_size); + +#pragma omp parallel for schedule(static, 8192) + for (int64_t j = 0; j < (int64_t)cur_blk_size; j++) + { + for (size_t k = 0; k < cur_chunk_size; k++) + cur_data[j * cur_chunk_size + k] = block_data_float[j * dim + chunk_offsets[i] + k]; + } + +#pragma omp parallel for schedule(static, 1) + for (int64_t j = 0; j < (int64_t)num_centers; j++) + { + std::memcpy(cur_pivot_data.get() + j * cur_chunk_size, + full_pivot_data.get() + j * dim + chunk_offsets[i], cur_chunk_size * sizeof(float)); + } + + math_utils::compute_closest_centers(cur_data.get(), cur_blk_size, cur_chunk_size, cur_pivot_data.get(), + num_centers, 1, closest_center.get()); + +#pragma omp parallel for schedule(static, 8192) + for (int64_t j = 0; j < (int64_t)cur_blk_size; j++) + { + block_compressed_base[j * num_pq_chunks + i] = closest_center[j]; +#ifdef SAVE_INFLATED_PQ + for (size_t k = 0; k < cur_chunk_size; k++) + block_inflated_base[j * dim + chunk_offsets[i] + k] = + cur_pivot_data[closest_center[j] * cur_chunk_size + k] + centroid[chunk_offsets[i] + k]; +#endif + } + } + + if (num_centers > 256) + { + compressed_file_writer.write((char *)(block_compressed_base.get()), + cur_blk_size * num_pq_chunks * sizeof(uint32_t)); + } + else + { + std::unique_ptr pVec = std::make_unique(cur_blk_size * num_pq_chunks); + diskann::convert_types(block_compressed_base.get(), pVec.get(), cur_blk_size, + num_pq_chunks); + compressed_file_writer.write((char *)(pVec.get()), cur_blk_size * num_pq_chunks * sizeof(uint8_t)); + } +#ifdef SAVE_INFLATED_PQ + inflated_file_writer.write((char *)(block_inflated_base.get()), cur_blk_size * dim * sizeof(float)); +#endif + diskann::cout << ".done." << std::endl; + } +// Gopal. Splitting diskann_dll into separate DLLs for search and build. +// This code should only be available in the "build" DLL. +#if defined(DISKANN_RELEASE_UNUSED_TCMALLOC_MEMORY_AT_CHECKPOINTS) && defined(DISKANN_BUILD) + MallocExtension::instance()->ReleaseFreeMemory(); +#endif + compressed_file_writer.close(); +#ifdef SAVE_INFLATED_PQ + inflated_file_writer.close(); +#endif + return 0; +} + +template +void generate_disk_quantized_data(const std::string &data_file_to_use, const std::string &disk_pq_pivots_path, + const std::string &disk_pq_compressed_vectors_path, diskann::Metric compareMetric, + const double p_val, size_t &disk_pq_dims) +{ + size_t train_size, train_dim; + float *train_data; + + // instantiates train_data with random sample updates train_size + gen_random_slice(data_file_to_use.c_str(), p_val, train_data, train_size, train_dim); + diskann::cout << "Training data with " << train_size << " samples loaded." << std::endl; + + if (disk_pq_dims > train_dim) + disk_pq_dims = train_dim; + + std::cout << "Compressing base for disk-PQ into " << disk_pq_dims << " chunks " << std::endl; + generate_pq_pivots(train_data, train_size, (uint32_t)train_dim, 256, (uint32_t)disk_pq_dims, NUM_KMEANS_REPS_PQ, + disk_pq_pivots_path, false); + if (compareMetric == diskann::Metric::INNER_PRODUCT) + generate_pq_data_from_pivots(data_file_to_use, 256, (uint32_t)disk_pq_dims, disk_pq_pivots_path, + disk_pq_compressed_vectors_path); + else + generate_pq_data_from_pivots(data_file_to_use, 256, (uint32_t)disk_pq_dims, disk_pq_pivots_path, + disk_pq_compressed_vectors_path); + + delete[] train_data; +} + +template +void generate_quantized_data(const std::string &data_file_to_use, const std::string &pq_pivots_path, + const std::string &pq_compressed_vectors_path, diskann::Metric compareMetric, + const double p_val, const uint64_t num_pq_chunks, const bool use_opq, + const std::string &codebook_prefix) +{ + size_t train_size, train_dim; + float *train_data; + if (!file_exists(codebook_prefix)) + { + // instantiates train_data with random sample updates train_size + gen_random_slice(data_file_to_use.c_str(), p_val, train_data, train_size, train_dim); + diskann::cout << "Training data with " << train_size << " samples loaded." << std::endl; + + bool make_zero_mean = true; + if (compareMetric == diskann::Metric::INNER_PRODUCT) + make_zero_mean = false; + if (use_opq) // we also do not center the data for OPQ + make_zero_mean = false; + + if (!use_opq) + { + generate_pq_pivots(train_data, train_size, (uint32_t)train_dim, NUM_PQ_CENTROIDS, (uint32_t)num_pq_chunks, + NUM_KMEANS_REPS_PQ, pq_pivots_path, make_zero_mean); + } + else + { + generate_opq_pivots(train_data, train_size, (uint32_t)train_dim, NUM_PQ_CENTROIDS, (uint32_t)num_pq_chunks, + pq_pivots_path, make_zero_mean); + } + delete[] train_data; + } + else + { + diskann::cout << "Skip Training with predefined pivots in: " << pq_pivots_path << std::endl; + if (!file_exists(pq_compressed_vectors_path)) + { + diskann::cout << "! Pivot exists, but compressed vectors do not exist, please check the file path" + << std::endl; + diskann::cout << "It's " << pq_compressed_vectors_path << " and " << pq_pivots_path << std::endl; + assert(false); + } + return; + } + generate_pq_data_from_pivots(data_file_to_use, NUM_PQ_CENTROIDS, (uint32_t)num_pq_chunks, pq_pivots_path, + pq_compressed_vectors_path, use_opq); +} + +// Instantations of supported templates + +template DISKANN_DLLEXPORT int generate_pq_data_from_pivots(const std::string &data_file, uint32_t num_centers, + uint32_t num_pq_chunks, + const std::string &pq_pivots_path, + const std::string &pq_compressed_vectors_path, + bool use_opq); +template DISKANN_DLLEXPORT int generate_pq_data_from_pivots(const std::string &data_file, uint32_t num_centers, + uint32_t num_pq_chunks, + const std::string &pq_pivots_path, + const std::string &pq_compressed_vectors_path, + bool use_opq); +template DISKANN_DLLEXPORT int generate_pq_data_from_pivots(const std::string &data_file, uint32_t num_centers, + uint32_t num_pq_chunks, + const std::string &pq_pivots_path, + const std::string &pq_compressed_vectors_path, + bool use_opq); + +template DISKANN_DLLEXPORT void generate_disk_quantized_data(const std::string &data_file_to_use, + const std::string &disk_pq_pivots_path, + const std::string &disk_pq_compressed_vectors_path, + diskann::Metric compareMetric, const double p_val, + size_t &disk_pq_dims); + +template DISKANN_DLLEXPORT void generate_disk_quantized_data( + const std::string &data_file_to_use, const std::string &disk_pq_pivots_path, + const std::string &disk_pq_compressed_vectors_path, diskann::Metric compareMetric, const double p_val, + size_t &disk_pq_dims); + +template DISKANN_DLLEXPORT void generate_disk_quantized_data(const std::string &data_file_to_use, + const std::string &disk_pq_pivots_path, + const std::string &disk_pq_compressed_vectors_path, + diskann::Metric compareMetric, const double p_val, + size_t &disk_pq_dims); + +template DISKANN_DLLEXPORT void generate_quantized_data(const std::string &data_file_to_use, + const std::string &pq_pivots_path, + const std::string &pq_compressed_vectors_path, + diskann::Metric compareMetric, const double p_val, + const uint64_t num_pq_chunks, const bool use_opq, + const std::string &codebook_prefix); + +template DISKANN_DLLEXPORT void generate_quantized_data(const std::string &data_file_to_use, + const std::string &pq_pivots_path, + const std::string &pq_compressed_vectors_path, + diskann::Metric compareMetric, const double p_val, + const uint64_t num_pq_chunks, const bool use_opq, + const std::string &codebook_prefix); + +template DISKANN_DLLEXPORT void generate_quantized_data(const std::string &data_file_to_use, + const std::string &pq_pivots_path, + const std::string &pq_compressed_vectors_path, + diskann::Metric compareMetric, const double p_val, + const uint64_t num_pq_chunks, const bool use_opq, + const std::string &codebook_prefix); +} // namespace diskann diff --git a/packages/leann-backend-diskann/third_party/DiskANN/src/pq_data_store.cpp b/packages/leann-backend-diskann/third_party/DiskANN/src/pq_data_store.cpp new file mode 100644 index 0000000..491975e --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/src/pq_data_store.cpp @@ -0,0 +1,260 @@ +#include + +#include "pq_data_store.h" +#include "pq.h" +#include "pq_scratch.h" +#include "utils.h" +#include "distance.h" + +namespace diskann +{ + +// REFACTOR TODO: Assuming that num_pq_chunks is known already. Must verify if +// this is true. +template +PQDataStore::PQDataStore(size_t dim, location_t num_points, size_t num_pq_chunks, + std::unique_ptr> distance_fn, + std::unique_ptr> pq_distance_fn) + : AbstractDataStore(num_points, dim), _quantized_data(nullptr), _num_chunks(num_pq_chunks), + _distance_metric(distance_fn->get_metric()) +{ + if (num_pq_chunks > dim) + { + throw diskann::ANNException("ERROR: num_pq_chunks > dim", -1, __FUNCSIG__, __FILE__, __LINE__); + } + _distance_fn = std::move(distance_fn); + _pq_distance_fn = std::move(pq_distance_fn); +} + +template PQDataStore::~PQDataStore() +{ + if (_quantized_data != nullptr) + { + aligned_free(_quantized_data); + _quantized_data = nullptr; + } +} + +template location_t PQDataStore::load(const std::string &filename) +{ + return load_impl(filename); +} +template size_t PQDataStore::save(const std::string &filename, const location_t num_points) +{ + return diskann::save_bin(filename, _quantized_data, this->capacity(), _num_chunks, 0); +} + +template size_t PQDataStore::get_aligned_dim() const +{ + return this->get_dims(); +} + +// Populate quantized data from regular data. +template void PQDataStore::populate_data(const data_t *vectors, const location_t num_pts) +{ + throw std::logic_error("Not implemented yet"); +} + +template void PQDataStore::populate_data(const std::string &filename, const size_t offset) +{ + if (_quantized_data != nullptr) + { + aligned_free(_quantized_data); + } + + size_t file_num_points = 0, file_dim = 0; + get_bin_metadata(filename, file_num_points, file_dim, offset); + this->_capacity = (location_t)file_num_points; + this->_dim = file_dim; + + double p_val = std::min(1.0, ((double)MAX_PQ_TRAINING_SET_SIZE / (double)file_num_points)); + + auto pivots_file = _pq_distance_fn->get_pivot_data_filename(filename); + auto compressed_file = _pq_distance_fn->get_quantized_vectors_filename(filename); + + generate_quantized_data(filename, pivots_file, compressed_file, _distance_metric, p_val, _num_chunks, + _pq_distance_fn->is_opq()); + + // REFACTOR TODO: Not sure of the alignment. Just copying from index.cpp + alloc_aligned(((void **)&_quantized_data), file_num_points * _num_chunks * sizeof(uint8_t), 1); + copy_aligned_data_from_file(compressed_file.c_str(), _quantized_data, file_num_points, _num_chunks, + _num_chunks); +#ifdef EXEC_ENV_OLS + throw ANNException("load_pq_centroid_bin should not be called when " + "EXEC_ENV_OLS is defined.", + -1, __FUNCSIG__, __FILE__, __LINE__); +#else + _pq_distance_fn->load_pivot_data(pivots_file.c_str(), _num_chunks); +#endif +} + +template +void PQDataStore::extract_data_to_bin(const std::string &filename, const location_t num_pts) +{ + throw std::logic_error("Not implemented yet"); +} + +template void PQDataStore::get_vector(const location_t i, data_t *target) const +{ + // REFACTOR TODO: Should we inflate the compressed vector here? + if (i < this->capacity()) + { + throw std::logic_error("Not implemented yet."); + } + else + { + std::stringstream ss; + ss << "Requested vector " << i << " but only " << this->capacity() << " vectors are present"; + throw diskann::ANNException(ss.str(), -1); + } +} +template void PQDataStore::set_vector(const location_t i, const data_t *const vector) +{ + // REFACTOR TODO: Should we accept a normal vector and compress here? + // memcpy (_data + i * _num_chunks, vector, _num_chunks * sizeof(data_t)); + throw std::logic_error("Not implemented yet"); +} + +template void PQDataStore::prefetch_vector(const location_t loc) +{ + const uint8_t *ptr = _quantized_data + ((size_t)loc) * _num_chunks * sizeof(data_t); + diskann::prefetch_vector((const char *)ptr, _num_chunks * sizeof(data_t)); +} + +template +void PQDataStore::move_vectors(const location_t old_location_start, const location_t new_location_start, + const location_t num_points) +{ + // REFACTOR TODO: Moving vectors is only for in-mem fresh. + throw std::logic_error("Not implemented yet"); +} + +template +void PQDataStore::copy_vectors(const location_t from_loc, const location_t to_loc, const location_t num_points) +{ + // REFACTOR TODO: Is the number of bytes correct? + memcpy(_quantized_data + to_loc * _num_chunks, _quantized_data + from_loc * _num_chunks, _num_chunks * num_points); +} + +// REFACTOR TODO: Currently, we take aligned_query as parameter, but this +// function should also do the alignment. +template +void PQDataStore::preprocess_query(const data_t *aligned_query, AbstractScratch *scratch) const +{ + if (scratch == nullptr) + { + throw diskann::ANNException("Scratch space is null", -1); + } + + PQScratch *pq_scratch = scratch->pq_scratch(); + + if (pq_scratch == nullptr) + { + throw diskann::ANNException("PQScratch space has not been set in the scratch object.", -1); + } + + _pq_distance_fn->preprocess_query(aligned_query, (location_t)this->get_dims(), *pq_scratch); +} + +template float PQDataStore::get_distance(const data_t *query, const location_t loc) const +{ + throw std::logic_error("Not implemented yet"); +} + +template float PQDataStore::get_distance(const location_t loc1, const location_t loc2) const +{ + throw std::logic_error("Not implemented yet"); +} + +template +void PQDataStore::get_distance(const data_t *preprocessed_query, const location_t *locations, + const uint32_t location_count, float *distances, + AbstractScratch *scratch_space) const +{ + if (scratch_space == nullptr) + { + throw diskann::ANNException("Scratch space is null", -1); + } + PQScratch *pq_scratch = scratch_space->pq_scratch(); + if (pq_scratch == nullptr) + { + throw diskann::ANNException("PQScratch not set in scratch space.", -1); + } + diskann::aggregate_coords(locations, location_count, _quantized_data, this->_num_chunks, + pq_scratch->aligned_pq_coord_scratch); + _pq_distance_fn->preprocessed_distance(*pq_scratch, location_count, distances); +} + +template +void PQDataStore::get_distance(const data_t *preprocessed_query, const std::vector &ids, + std::vector &distances, AbstractScratch *scratch_space) const +{ + if (scratch_space == nullptr) + { + throw diskann::ANNException("Scratch space is null", -1); + } + PQScratch *pq_scratch = scratch_space->pq_scratch(); + if (pq_scratch == nullptr) + { + throw diskann::ANNException("PQScratch not set in scratch space.", -1); + } + diskann::aggregate_coords(ids, _quantized_data, this->_num_chunks, pq_scratch->aligned_pq_coord_scratch); + _pq_distance_fn->preprocessed_distance(*pq_scratch, (location_t)ids.size(), distances); +} + +template location_t PQDataStore::calculate_medoid() const +{ + // REFACTOR TODO: Must calculate this just like we do with data store. + size_t r = (size_t)rand() * (size_t)RAND_MAX + (size_t)rand(); + return (uint32_t)(r % (size_t)this->capacity()); +} + +template size_t PQDataStore::get_alignment_factor() const +{ + return 1; +} + +template Distance *PQDataStore::get_dist_fn() const +{ + return _distance_fn.get(); +} + +template location_t PQDataStore::load_impl(const std::string &file_prefix) +{ + if (_quantized_data != nullptr) + { + aligned_free(_quantized_data); + } + auto quantized_vectors_file = _pq_distance_fn->get_quantized_vectors_filename(file_prefix); + + size_t num_points; + load_aligned_bin(quantized_vectors_file, _quantized_data, num_points, _num_chunks, _num_chunks); + this->_capacity = (location_t)num_points; + + auto pivots_file = _pq_distance_fn->get_pivot_data_filename(file_prefix); + _pq_distance_fn->load_pivot_data(pivots_file, _num_chunks); + + return this->_capacity; +} + +template location_t PQDataStore::expand(const location_t new_size) +{ + throw std::logic_error("Not implemented yet"); +} + +template location_t PQDataStore::shrink(const location_t new_size) +{ + throw std::logic_error("Not implemented yet"); +} + +#ifdef EXEC_ENV_OLS +template location_t PQDataStore::load_impl(AlignedFileReader &reader) +{ +} +#endif + +template DISKANN_DLLEXPORT class PQDataStore; +template DISKANN_DLLEXPORT class PQDataStore; +template DISKANN_DLLEXPORT class PQDataStore; + +} // namespace diskann \ No newline at end of file diff --git a/packages/leann-backend-diskann/third_party/DiskANN/src/pq_flash_index.cpp b/packages/leann-backend-diskann/third_party/DiskANN/src/pq_flash_index.cpp new file mode 100644 index 0000000..bfb0abb --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/src/pq_flash_index.cpp @@ -0,0 +1,2964 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT license. + +#include "common_includes.h" + +#include +#include +#include + +#include "timer.h" +#include "pq.h" +#include "pq_scratch.h" +#include "pq_flash_index.h" +#include "cosine_similarity.h" +#include "embedding.pb.h" // from embedding.proto -> embedding.pb.h +#include +#include +#include +#include +#include + +#ifdef _WINDOWS +#include "windows_aligned_file_reader.h" +#elif __APPLE__ +#include "apple_aligned_file_reader.h" +#else +#include "linux_aligned_file_reader.h" +#endif + +#define READ_U64(stream, val) stream.read((char *)&val, sizeof(uint64_t)) +#define READ_U32(stream, val) stream.read((char *)&val, sizeof(uint32_t)) +#define READ_UNSIGNED(stream, val) stream.read((char *)&val, sizeof(unsigned)) + +// sector # beyond the end of graph where data for id is present for reordering +#define VECTOR_SECTOR_NO(id) (((uint64_t)(id)) / _nvecs_per_sector + _reorder_data_start_sector) + +// sector # beyond the end of graph where data for id is present for reordering +#define VECTOR_SECTOR_OFFSET(id) ((((uint64_t)(id)) % _nvecs_per_sector) * _data_dim * sizeof(float)) + +namespace diskann +{ +static std::mutex log_file_mutex; +static std::atomic search_counter(0); + +template +PQFlashIndex::PQFlashIndex(std::shared_ptr &fileReader, + std::shared_ptr &graphReader, diskann::Metric m) + : reader(fileReader), graph_reader(graphReader), metric(m), _thread_data(nullptr) +{ + diskann::Metric metric_to_invoke = m; + if (m == diskann::Metric::COSINE || m == diskann::Metric::INNER_PRODUCT) + { + if (std::is_floating_point::value) + { + diskann::cout << "Since data is floating point, we assume that it has been appropriately pre-processed " + "(normalization for cosine, and convert-to-l2 by adding extra dimension for MIPS). So we " + "shall invoke an l2 distance function." + << std::endl; + metric_to_invoke = diskann::Metric::L2; + } + else + { + diskann::cerr << "WARNING: Cannot normalize integral data types." + << " This may result in erroneous results or poor recall." + << " Consider using L2 distance with integral data types." << std::endl; + } + } + + this->_dist_cmp.reset(diskann::get_distance_function(metric_to_invoke)); + this->_dist_cmp_float.reset(diskann::get_distance_function(metric_to_invoke)); +} + +template PQFlashIndex::~PQFlashIndex() +{ +#ifndef EXEC_ENV_OLS + if (data != nullptr) + { + delete[] data; + } +#endif + + if (_centroid_data != nullptr) + aligned_free(_centroid_data); + // delete backing bufs for nhood and coord cache + if (_nhood_cache_buf != nullptr) + { + delete[] _nhood_cache_buf; + diskann::aligned_free(_coord_cache_buf); + } + + if (_load_flag) + { + diskann::cout << "Clearing scratch" << std::endl; + ScratchStoreManager> manager(this->_thread_data); + manager.destroy(); + this->reader->deregister_all_threads(); + reader->close(); + } + if (_pts_to_label_offsets != nullptr) + { + delete[] _pts_to_label_offsets; + } + if (_pts_to_label_counts != nullptr) + { + delete[] _pts_to_label_counts; + } + if (_pts_to_labels != nullptr) + { + delete[] _pts_to_labels; + } + if (_medoids != nullptr) + { + delete[] _medoids; + } +} + +template inline uint64_t PQFlashIndex::get_node_sector(uint64_t node_id) +{ + return 1 + (_nnodes_per_sector > 0 ? node_id / _nnodes_per_sector + : node_id * DIV_ROUND_UP(_max_node_len, defaults::SECTOR_LEN)); +} + +template +inline char *PQFlashIndex::offset_to_node(char *sector_buf, uint64_t node_id) +{ + return sector_buf + (_nnodes_per_sector == 0 ? 0 : (node_id % _nnodes_per_sector) * _max_node_len); +} + +template inline uint32_t *PQFlashIndex::offset_to_node_nhood(char *node_buf) +{ + return (unsigned *)(node_buf + _disk_bytes_per_point); +} + +template inline T *PQFlashIndex::offset_to_node_coords(char *node_buf) +{ + return (T *)(node_buf); +} + +template +void PQFlashIndex::setup_thread_data(uint64_t nthreads, uint64_t visited_reserve) +{ + diskann::cout << "Setting up thread-specific contexts for nthreads: " << nthreads << std::endl; +// omp parallel for to generate unique thread IDs +#pragma omp parallel for num_threads((int)nthreads) + for (int64_t thread = 0; thread < (int64_t)nthreads; thread++) + { +#pragma omp critical + { + SSDThreadData *data = new SSDThreadData(this->_aligned_dim, visited_reserve); + this->reader->register_thread(); + data->ctx = this->reader->get_ctx(); + this->_thread_data.push(data); + } + } + _load_flag = true; +} + +template +std::vector PQFlashIndex::read_nodes(const std::vector &node_ids, + std::vector &coord_buffers, + std::vector> &nbr_buffers) +{ + std::vector read_reqs; + std::vector retval(node_ids.size(), true); + + char *buf = nullptr; + auto num_sectors = _nnodes_per_sector > 0 ? 1 : DIV_ROUND_UP(_max_node_len, defaults::SECTOR_LEN); + + // borrow thread data and issue reads + ScratchStoreManager> manager(this->_thread_data); + auto this_thread_data = manager.scratch_space(); + IOContext &ctx = this_thread_data->ctx; + +#if 1 + // -- If not partition_read, this is the normal DiskANN approach: + if (!_use_partition) + { +#endif + // (1) read each node's 4 KB from offset = get_node_sector(node_id)*4096 + alloc_aligned((void **)&buf, node_ids.size() * num_sectors * defaults::SECTOR_LEN, defaults::SECTOR_LEN); + + // create read requests + for (size_t i = 0; i < node_ids.size(); ++i) + { + auto node_id = node_ids[i]; + + AlignedRead read; + read.len = num_sectors * defaults::SECTOR_LEN; + read.buf = buf + i * num_sectors * defaults::SECTOR_LEN; + read.offset = get_node_sector(node_id) * defaults::SECTOR_LEN; + read_reqs.push_back(read); + } + + reader->read(read_reqs, ctx); + + // copy reads into buffers + for (uint32_t i = 0; i < read_reqs.size(); i++) + { +#if defined(_WINDOWS) && defined(USE_BING_INFRA) // this block is to handle failed reads in + // production settings + if ((*ctx.m_pRequestsStatus)[i] != IOContext::READ_SUCCESS) + { + retval[i] = false; + continue; + } +#endif + + char *node_buf = offset_to_node((char *)read_reqs[i].buf, node_ids[i]); + + if (coord_buffers[i] != nullptr) + { + T *node_coords = offset_to_node_coords(node_buf); + memcpy(coord_buffers[i], node_coords, _disk_bytes_per_point); + } + + if (nbr_buffers[i].second != nullptr) + { + uint32_t *node_nhood = offset_to_node_nhood(node_buf); + auto num_nbrs = *node_nhood; + nbr_buffers[i].first = num_nbrs; + memcpy(nbr_buffers[i].second, node_nhood + 1, num_nbrs * sizeof(uint32_t)); + } + } + aligned_free(buf); + + if (!_use_partition) + { + // done with the normal path + return retval; + } +#if 1 + } +#endif + + { + // 计算每个节点的分区偏移 + std::vector> offsets(node_ids.size()); + std::vector valid_nodes(node_ids.size(), true); + + // 按分区分组,减少重复读取相同分区 + std::map> partition_to_indices; + + // 遍历所有节点,获取其分区信息 + for (size_t i = 0; i < node_ids.size(); i++) + { + uint32_t node_id = node_ids[i]; + if (nbr_buffers[i].second != nullptr) + { + // 使用read_neighbors中的逻辑获取分区ID + uint32_t partition_id = _id2partition[node_id]; + if (partition_id >= _num_partitions) + { + valid_nodes[i] = false; + retval[i] = false; + continue; + } + + // 将节点按分区ID分组 + partition_to_indices[partition_id].push_back(i); + } + } + + // 对每个分区执行一次读取 + for (const auto &pair : partition_to_indices) + { + uint32_t partition_id = pair.first; + const auto &indices = pair.second; + + // 计算扇区偏移 (与read_neighbors中相同) + uint64_t sector_offset = (partition_id + 1) * defaults::SECTOR_LEN; + + // 读取分区扇区 + char *sector_buf = nullptr; + alloc_aligned((void **)§or_buf, defaults::SECTOR_LEN, defaults::SECTOR_LEN); + + AlignedRead read; + read.len = defaults::SECTOR_LEN; + read.buf = sector_buf; + read.offset = sector_offset; + + std::vector single_read = {read}; + graph_reader->read(single_read, ctx); + + // 处理该分区中的所有节点 + for (size_t idx : indices) + { + uint32_t node_id = node_ids[idx]; + + // 查找节点在分区内的位置 (与read_neighbors中相同) + const auto &part_list = _graph_partitions[partition_id]; + auto it = std::find(part_list.begin(), part_list.end(), node_id); + if (it == part_list.end()) + { + retval[idx] = false; + continue; + } + size_t j = std::distance(part_list.begin(), it); + + // 计算节点在扇区内的偏移 (与read_neighbors中相同) + uint64_t node_offset = j * _graph_node_len; + if (node_offset + 4 > defaults::SECTOR_LEN) + { + retval[idx] = false; + continue; + } + + // 读取邻居数量 + char *adjacency_ptr = sector_buf + node_offset; + uint32_t neighbor_count = *reinterpret_cast(adjacency_ptr); + + // 检查邻居数据是否超出扇区范围 + size_t needed = neighbor_count * sizeof(uint32_t); + if (node_offset + 4 + needed > defaults::SECTOR_LEN) + { + retval[idx] = false; + continue; + } + + // 拷贝邻居数据 + nbr_buffers[idx].first = neighbor_count; + memcpy(nbr_buffers[idx].second, adjacency_ptr + 4, needed); + } + + aligned_free(sector_buf); + } + } + + return retval; +} + +template void PQFlashIndex::load_cache_list(std::vector &node_list) +{ + diskann::cout << "Loading the cache list into memory.." << std::flush; + size_t num_cached_nodes = node_list.size(); + + // Allocate space for neighborhood cache + _nhood_cache_buf = new uint32_t[num_cached_nodes * (_max_degree + 1)]; + memset(_nhood_cache_buf, 0, num_cached_nodes * (_max_degree + 1)); + + // Allocate space for coordinate cache + size_t coord_cache_buf_len = num_cached_nodes * _aligned_dim; + diskann::alloc_aligned((void **)&_coord_cache_buf, coord_cache_buf_len * sizeof(T), 8 * sizeof(T)); + memset(_coord_cache_buf, 0, coord_cache_buf_len * sizeof(T)); + + size_t BLOCK_SIZE = 8; + size_t num_blocks = DIV_ROUND_UP(num_cached_nodes, BLOCK_SIZE); + for (size_t block = 0; block < num_blocks; block++) + { + size_t start_idx = block * BLOCK_SIZE; + size_t end_idx = (std::min)(num_cached_nodes, (block + 1) * BLOCK_SIZE); + + // Copy offset into buffers to read into + std::vector nodes_to_read; + std::vector coord_buffers; + std::vector> nbr_buffers; + for (size_t node_idx = start_idx; node_idx < end_idx; node_idx++) + { + nodes_to_read.push_back(node_list[node_idx]); + coord_buffers.push_back(_coord_cache_buf + node_idx * _aligned_dim); + nbr_buffers.emplace_back(0, _nhood_cache_buf + node_idx * (_max_degree + 1)); + } + + // issue the reads + auto read_status = read_nodes(nodes_to_read, coord_buffers, nbr_buffers); + + // check for success and insert into the cache. + for (size_t i = 0; i < read_status.size(); i++) + { + if (read_status[i] == true) + { + _coord_cache.insert(std::make_pair(nodes_to_read[i], coord_buffers[i])); + _nhood_cache.insert(std::make_pair(nodes_to_read[i], nbr_buffers[i])); + } + } + } + diskann::cout << "..done." << std::endl; +} + +#ifdef EXEC_ENV_OLS +template +void PQFlashIndex::generate_cache_list_from_sample_queries(MemoryMappedFiles &files, std::string sample_bin, + uint64_t l_search, uint64_t beamwidth, + uint64_t num_nodes_to_cache, uint32_t nthreads, + std::vector &node_list) +{ +#else +template +void PQFlashIndex::generate_cache_list_from_sample_queries(std::string sample_bin, uint64_t l_search, + uint64_t beamwidth, uint64_t num_nodes_to_cache, + uint32_t nthreads, + std::vector &node_list) +{ +#endif + if (num_nodes_to_cache >= this->_num_points) + { + // for small num_points and big num_nodes_to_cache, use below way to get the node_list quickly + node_list.resize(this->_num_points); + for (uint32_t i = 0; i < this->_num_points; ++i) + { + node_list[i] = i; + } + return; + } + + this->_count_visited_nodes = true; + this->_node_visit_counter.clear(); + this->_node_visit_counter.resize(this->_num_points); + for (uint32_t i = 0; i < _node_visit_counter.size(); i++) + { + this->_node_visit_counter[i].first = i; + this->_node_visit_counter[i].second = 0; + } + + size_t sample_num, sample_dim, sample_aligned_dim; + T *samples; + +#ifdef EXEC_ENV_OLS + if (files.fileExists(sample_bin)) + { + diskann::load_aligned_bin(files, sample_bin, samples, sample_num, sample_dim, sample_aligned_dim); + } +#else + if (file_exists(sample_bin)) + { + diskann::load_aligned_bin(sample_bin, samples, sample_num, sample_dim, sample_aligned_dim); + } +#endif + else + { + diskann::cerr << "Sample bin file not found. Not generating cache." << std::endl; + return; + } + + std::vector tmp_result_ids_64(sample_num, 0); + std::vector tmp_result_dists(sample_num, 0); + + bool filtered_search = false; + std::vector random_query_filters(sample_num); + if (_filter_to_medoid_ids.size() != 0) + { + filtered_search = true; + generate_random_labels(random_query_filters, (uint32_t)sample_num, nthreads); + } + +#pragma omp parallel for schedule(dynamic, 1) num_threads(nthreads) + for (int64_t i = 0; i < (int64_t)sample_num; i++) + { + auto &label_for_search = random_query_filters[i]; + // run a search on the sample query with a random label (sampled from base label distribution), and it will + // concurrently update the node_visit_counter to track most visited nodes. The last false is to not use the + // "use_reorder_data" option which enables a final reranking if the disk index itself contains only PQ data. + cached_beam_search(samples + (i * sample_aligned_dim), 1, l_search, tmp_result_ids_64.data() + i, + tmp_result_dists.data() + i, beamwidth, filtered_search, label_for_search, false); + } + + std::sort(this->_node_visit_counter.begin(), _node_visit_counter.end(), + [](std::pair &left, std::pair &right) { + return left.second > right.second; + }); + node_list.clear(); + node_list.shrink_to_fit(); + num_nodes_to_cache = std::min((size_t)num_nodes_to_cache, this->_node_visit_counter.size()); + node_list.reserve(num_nodes_to_cache); + for (uint64_t i = 0; i < num_nodes_to_cache; i++) + { + node_list.push_back(this->_node_visit_counter[i].first); + } + this->_count_visited_nodes = false; + + diskann::aligned_free(samples); +} + +template +void PQFlashIndex::cache_bfs_levels(uint64_t num_nodes_to_cache, std::vector &node_list, + const bool shuffle) +{ + std::random_device rng; + std::mt19937 urng(rng()); + + tsl::robin_set node_set; + + // Do not cache more than 10% of the nodes in the index + uint64_t tenp_nodes = (uint64_t)(std::round(this->_num_points * 0.1)); + if (num_nodes_to_cache > tenp_nodes) + { + diskann::cout << "Reducing nodes to cache from: " << num_nodes_to_cache << " to: " << tenp_nodes + << "(10 percent of total nodes:" << this->_num_points << ")" << std::endl; + num_nodes_to_cache = tenp_nodes == 0 ? 1 : tenp_nodes; + } + diskann::cout << "Caching " << num_nodes_to_cache << "..." << std::endl; + + std::unique_ptr> cur_level, prev_level; + cur_level = std::make_unique>(); + prev_level = std::make_unique>(); + + for (uint64_t miter = 0; miter < _num_medoids && cur_level->size() < num_nodes_to_cache; miter++) + { + cur_level->insert(_medoids[miter]); + } + + if ((_filter_to_medoid_ids.size() > 0) && (cur_level->size() < num_nodes_to_cache)) + { + for (auto &x : _filter_to_medoid_ids) + { + for (auto &y : x.second) + { + cur_level->insert(y); + if (cur_level->size() == num_nodes_to_cache) + break; + } + if (cur_level->size() == num_nodes_to_cache) + break; + } + } + + uint64_t lvl = 1; + uint64_t prev_node_set_size = 0; + while ((node_set.size() + cur_level->size() < num_nodes_to_cache) && cur_level->size() != 0) + { + // swap prev_level and cur_level + std::swap(prev_level, cur_level); + // clear cur_level + cur_level->clear(); + + std::vector nodes_to_expand; + + for (const uint32_t &id : *prev_level) + { + if (node_set.find(id) != node_set.end()) + { + continue; + } + node_set.insert(id); + nodes_to_expand.push_back(id); + } + + if (shuffle) + std::shuffle(nodes_to_expand.begin(), nodes_to_expand.end(), urng); + else + std::sort(nodes_to_expand.begin(), nodes_to_expand.end()); + + diskann::cout << "Level: " << lvl << std::flush; + bool finish_flag = false; + + size_t BLOCK_SIZE = 1024; + size_t nblocks = DIV_ROUND_UP(nodes_to_expand.size(), BLOCK_SIZE); + for (size_t block = 0; block < nblocks && !finish_flag; block++) + { + diskann::cout << "." << std::flush; + size_t start = block * BLOCK_SIZE; + size_t end = (std::min)((block + 1) * BLOCK_SIZE, nodes_to_expand.size()); + + std::vector nodes_to_read; + std::vector coord_buffers(end - start, nullptr); + std::vector> nbr_buffers; + + for (size_t cur_pt = start; cur_pt < end; cur_pt++) + { + nodes_to_read.push_back(nodes_to_expand[cur_pt]); + nbr_buffers.emplace_back(0, new uint32_t[_max_degree + 1]); + } + + // issue read requests + auto read_status = read_nodes(nodes_to_read, coord_buffers, nbr_buffers); + + // process each nhood buf + for (uint32_t i = 0; i < read_status.size(); i++) + { + if (read_status[i] == false) + { + continue; + } + else + { + uint32_t nnbrs = nbr_buffers[i].first; + uint32_t *nbrs = nbr_buffers[i].second; + + // explore next level + for (uint32_t j = 0; j < nnbrs && !finish_flag; j++) + { + if (node_set.find(nbrs[j]) == node_set.end()) + { + cur_level->insert(nbrs[j]); + } + if (cur_level->size() + node_set.size() >= num_nodes_to_cache) + { + finish_flag = true; + } + } + } + delete[] nbr_buffers[i].second; + } + } + + diskann::cout << ". #nodes: " << node_set.size() - prev_node_set_size + << ", #nodes thus far: " << node_set.size() << std::endl; + prev_node_set_size = node_set.size(); + lvl++; + } + + assert(node_set.size() + cur_level->size() == num_nodes_to_cache || cur_level->size() == 0); + + node_list.clear(); + node_list.reserve(node_set.size() + cur_level->size()); + for (auto node : node_set) + node_list.push_back(node); + for (auto node : *cur_level) + node_list.push_back(node); + + diskann::cout << "Level: " << lvl << std::flush; + diskann::cout << ". #nodes: " << node_list.size() - prev_node_set_size << ", #nodes thus far: " << node_list.size() + << std::endl; + diskann::cout << "done" << std::endl; +} + +template void PQFlashIndex::use_medoids_data_as_centroids() +{ + if (_centroid_data != nullptr) + aligned_free(_centroid_data); + alloc_aligned(((void **)&_centroid_data), _num_medoids * _aligned_dim * sizeof(float), 32); + std::memset(_centroid_data, 0, _num_medoids * _aligned_dim * sizeof(float)); + + diskann::cout << "Loading centroid data from medoids vector data of " << _num_medoids << " medoid(s)" << std::endl; + + std::vector nodes_to_read; + std::vector medoid_bufs; + std::vector> nbr_bufs; + + for (uint64_t cur_m = 0; cur_m < _num_medoids; cur_m++) + { + nodes_to_read.push_back(_medoids[cur_m]); + medoid_bufs.push_back(new T[_data_dim]); + nbr_bufs.emplace_back(0, nullptr); + } + + auto read_status = read_nodes(nodes_to_read, medoid_bufs, nbr_bufs); + + for (uint64_t cur_m = 0; cur_m < _num_medoids; cur_m++) + { + if (read_status[cur_m] == true) + { + if (!_use_disk_index_pq) + { + for (uint32_t i = 0; i < _data_dim; i++) + _centroid_data[cur_m * _aligned_dim + i] = medoid_bufs[cur_m][i]; + } + else + { + _disk_pq_table.inflate_vector((uint8_t *)medoid_bufs[cur_m], (_centroid_data + cur_m * _aligned_dim)); + } + } + else + { + throw ANNException("Unable to read a medoid", -1, __FUNCSIG__, __FILE__, __LINE__); + } + delete[] medoid_bufs[cur_m]; + } +} + +template +void PQFlashIndex::generate_random_labels(std::vector &labels, const uint32_t num_labels, + const uint32_t nthreads) +{ + std::random_device rd; + labels.clear(); + labels.resize(num_labels); + + uint64_t num_total_labels = _pts_to_label_offsets[_num_points - 1] + _pts_to_label_counts[_num_points - 1]; + std::mt19937 gen(rd()); + if (num_total_labels == 0) + { + std::stringstream stream; + stream << "No labels found in data. Not sampling random labels "; + diskann::cerr << stream.str() << std::endl; + throw diskann::ANNException(stream.str(), -1, __FUNCSIG__, __FILE__, __LINE__); + } + std::uniform_int_distribution dis(0, num_total_labels - 1); + +#pragma omp parallel for schedule(dynamic, 1) num_threads(nthreads) + for (int64_t i = 0; i < num_labels; i++) + { + uint64_t rnd_loc = dis(gen); + labels[i] = (LabelT)_pts_to_labels[rnd_loc]; + } +} + +template +std::unordered_map PQFlashIndex::load_label_map(std::basic_istream &map_reader) +{ + std::unordered_map string_to_int_mp; + std::string line, token; + LabelT token_as_num; + std::string label_str; + while (std::getline(map_reader, line)) + { + std::istringstream iss(line); + getline(iss, token, '\t'); + label_str = token; + getline(iss, token, '\t'); + token_as_num = (LabelT)std::stoul(token); + string_to_int_mp[label_str] = token_as_num; + } + return string_to_int_mp; +} + +template +LabelT PQFlashIndex::get_converted_label(const std::string &filter_label) +{ + if (_label_map.find(filter_label) != _label_map.end()) + { + return _label_map[filter_label]; + } + if (_use_universal_label) + { + return _universal_filter_label; + } + std::stringstream stream; + stream << "Unable to find label in the Label Map"; + diskann::cerr << stream.str() << std::endl; + throw diskann::ANNException(stream.str(), -1, __FUNCSIG__, __FILE__, __LINE__); +} + +template +void PQFlashIndex::reset_stream_for_reading(std::basic_istream &infile) +{ + infile.clear(); + infile.seekg(0); +} + +template +void PQFlashIndex::get_label_file_metadata(const std::string &fileContent, uint32_t &num_pts, + uint32_t &num_total_labels) +{ + num_pts = 0; + num_total_labels = 0; + + size_t file_size = fileContent.length(); + + std::string label_str; + size_t cur_pos = 0; + size_t next_pos = 0; + while (cur_pos < file_size && cur_pos != std::string::npos) + { + next_pos = fileContent.find('\n', cur_pos); + if (next_pos == std::string::npos) + { + break; + } + + size_t lbl_pos = cur_pos; + size_t next_lbl_pos = 0; + while (lbl_pos < next_pos && lbl_pos != std::string::npos) + { + next_lbl_pos = fileContent.find(',', lbl_pos); + if (next_lbl_pos == std::string::npos) // the last label + { + next_lbl_pos = next_pos; + } + + num_total_labels++; + + lbl_pos = next_lbl_pos + 1; + } + + cur_pos = next_pos + 1; + + num_pts++; + } + + diskann::cout << "Labels file metadata: num_points: " << num_pts << ", #total_labels: " << num_total_labels + << std::endl; +} + +template +inline bool PQFlashIndex::point_has_label(uint32_t point_id, LabelT label_id) +{ + uint32_t start_vec = _pts_to_label_offsets[point_id]; + uint32_t num_lbls = _pts_to_label_counts[point_id]; + bool ret_val = false; + for (uint32_t i = 0; i < num_lbls; i++) + { + if (_pts_to_labels[start_vec + i] == label_id) + { + ret_val = true; + break; + } + } + return ret_val; +} + +template +void PQFlashIndex::parse_label_file(std::basic_istream &infile, size_t &num_points_labels) +{ + infile.seekg(0, std::ios::end); + size_t file_size = infile.tellg(); + + std::string buffer(file_size, ' '); + + infile.seekg(0, std::ios::beg); + infile.read(&buffer[0], file_size); + + std::string line; + uint32_t line_cnt = 0; + + uint32_t num_pts_in_label_file; + uint32_t num_total_labels; + get_label_file_metadata(buffer, num_pts_in_label_file, num_total_labels); + + _pts_to_label_offsets = new uint32_t[num_pts_in_label_file]; + _pts_to_label_counts = new uint32_t[num_pts_in_label_file]; + _pts_to_labels = new LabelT[num_total_labels]; + uint32_t labels_seen_so_far = 0; + + std::string label_str; + size_t cur_pos = 0; + size_t next_pos = 0; + while (cur_pos < file_size && cur_pos != std::string::npos) + { + next_pos = buffer.find('\n', cur_pos); + if (next_pos == std::string::npos) + { + break; + } + + _pts_to_label_offsets[line_cnt] = labels_seen_so_far; + uint32_t &num_lbls_in_cur_pt = _pts_to_label_counts[line_cnt]; + num_lbls_in_cur_pt = 0; + + size_t lbl_pos = cur_pos; + size_t next_lbl_pos = 0; + while (lbl_pos < next_pos && lbl_pos != std::string::npos) + { + next_lbl_pos = buffer.find(',', lbl_pos); + if (next_lbl_pos == std::string::npos) // the last label in the whole file + { + next_lbl_pos = next_pos; + } + + if (next_lbl_pos > next_pos) // the last label in one line, just read to the end + { + next_lbl_pos = next_pos; + } + + label_str.assign(buffer.c_str() + lbl_pos, next_lbl_pos - lbl_pos); + if (label_str[label_str.length() - 1] == '\t') // '\t' won't exist in label file? + { + label_str.erase(label_str.length() - 1); + } + + LabelT token_as_num = (LabelT)std::stoul(label_str); + _pts_to_labels[labels_seen_so_far++] = (LabelT)token_as_num; + num_lbls_in_cur_pt++; + + // move to next label + lbl_pos = next_lbl_pos + 1; + } + + // move to next line + cur_pos = next_pos + 1; + + if (num_lbls_in_cur_pt == 0) + { + diskann::cout << "No label found for point " << line_cnt << std::endl; + exit(-1); + } + + line_cnt++; + } + + num_points_labels = line_cnt; + reset_stream_for_reading(infile); +} + +template void PQFlashIndex::set_universal_label(const LabelT &label) +{ + _use_universal_label = true; + _universal_filter_label = label; +} + +#ifdef EXEC_ENV_OLS +template +int PQFlashIndex::load(MemoryMappedFiles &files, uint32_t num_threads, const char *index_prefix, + const char *pq_prefix) +{ +#else +template +int PQFlashIndex::load(uint32_t num_threads, const char *index_prefix, const char *pq_prefix, + const char *partition_prefix) +{ +#endif + if (pq_prefix == nullptr || strcmp(pq_prefix, "") == 0) + { + pq_prefix = index_prefix; + } + if (partition_prefix != nullptr && strcmp(partition_prefix, "") != 0) + { + _use_partition = true; + } + std::string pq_table_bin = std::string(pq_prefix) + "_pq_pivots.bin"; + std::string pq_compressed_vectors = std::string(pq_prefix) + "_pq_compressed.bin"; + std::string _disk_index_file = std::string(index_prefix) + "_disk.index"; + std::string graph_file = std::string(partition_prefix) + "_disk_graph.index"; + std::string partition_file = std::string(partition_prefix) + "_partition.bin"; +#ifdef EXEC_ENV_OLS + return load_from_separate_paths(files, num_threads, _disk_index_file.c_str(), pq_table_bin.c_str(), + pq_compressed_vectors.c_str(), graph_file.c_str(), partition_file.c_str()); +#else + return load_from_separate_paths(num_threads, _disk_index_file.c_str(), pq_table_bin.c_str(), + pq_compressed_vectors.c_str(), graph_file.c_str(), partition_file.c_str()); +#endif +} + +template +int PQFlashIndex::read_partition_info(const std::string &partition_bin) +{ + std::ifstream pf(partition_bin, std::ios::binary); + if (!pf.is_open()) + { + diskann::cout << "Cannot open partition.bin: " << partition_bin << std::endl; + return 1; + } + diskann::cout << "Loading partition info from " << partition_bin << std::endl; + uint64_t C, nd; + READ_U64(pf, C); + READ_U64(pf, _num_partitions); + READ_U64(pf, nd); + std::cout << "[partition.bin header] C=" << C << ", partition_nums=" << _num_partitions << ", nd=" << nd + << std::endl; + + // 读取分区节点列表 + _graph_partitions.resize(_num_partitions); + for (uint64_t i = 0; i < _num_partitions; i++) + { + uint32_t psize; + READ_U32(pf, psize); + _graph_partitions[i].resize(psize); + pf.read(reinterpret_cast(_graph_partitions[i].data()), psize * sizeof(uint32_t)); + } + // 读取 _id2partition[node], 大小= nd + _id2partition.resize(nd); + pf.read(reinterpret_cast(_id2partition.data()), nd * sizeof(uint32_t)); + pf.close(); + std::cout << "Done loading partition info.\n"; + + return 0; +} + +template +int PQFlashIndex::load_graph_index(const std::string &graph_index_file) +{ + std::ifstream gf(graph_index_file, std::ios::binary); + if (!gf.is_open()) + { + diskann::cout << "Cannot open disk_graph.index: " << graph_index_file << std::endl; + return 1; + } + diskann::cout << "Loading graph index from " << graph_index_file << std::endl; + + // (a) sector0 => read 2 ints for meta_n and meta_dim + int meta_n, meta_dim; + gf.read((char *)&meta_n, sizeof(int)); + gf.read((char *)&meta_dim, sizeof(int)); + diskann::cout << "[debug] meta_n=" << meta_n << ", meta_dim=" << meta_dim << "\n"; + + // (b) Read uint64_t meta_n times + std::vector meta_info(meta_n); + gf.read(reinterpret_cast(meta_info.data()), meta_n * sizeof(uint64_t)); + for (int i = 0; i < meta_n; i++) + { + diskann::cout << " meta_info[" << i << "]= " << meta_info[i] << "\n"; + } + + size_t file_size = get_file_size(graph_index_file); + diskann::cout << "[disk_graph.index size] " << file_size << " bytes\n"; + + uint64_t nd_in_meta = meta_info[0]; + uint64_t dim_in_meta = meta_info[1]; + uint64_t max_node_len = meta_info[3]; + uint64_t c_in_meta = meta_info[4]; + uint64_t entire_file_sz = meta_info[8]; + + diskann::cout << "Based on meta_info:\n" + << " nd_in_meta= " << nd_in_meta << ", dim_in_meta= " << dim_in_meta + << ", max_node_len= " << max_node_len << ", c_in_meta= " << c_in_meta + << ", entire_file_size= " << entire_file_sz << "\n"; + + uint64_t dim_size = dim_in_meta * sizeof(float); + + _graph_node_len = max_node_len - dim_size; + +#if 0 + assert(max_node_len == _max_node_len); + assert(dim_size == _disk_bytes_per_point); + assert(_graph_node_len / sizeof(float) == _max_degree + 1); +#endif + + // Compensate the losting info from old meta_info + _max_degree = _graph_node_len / sizeof(float) - 1; + _disk_bytes_per_point = dim_size; + _max_node_len = max_node_len; + + diskann::cout << " => graph_node_len= " << _graph_node_len << "\n\n"; + + return 0; +} + +#ifdef EXEC_ENV_OLS +template +int PQFlashIndex::load_from_separate_paths(diskann::MemoryMappedFiles &files, uint32_t num_threads, + const char *index_filepath, const char *pivots_filepath, + const char *compressed_filepath, const char *graph_filepath) +{ +#else +template +int PQFlashIndex::load_from_separate_paths(uint32_t num_threads, const char *index_filepath, + const char *pivots_filepath, const char *compressed_filepath, + const char *graph_file, const char *partition_file) +{ +#endif + std::string pq_table_bin = pivots_filepath; + std::string pq_compressed_vectors = compressed_filepath; + std::string _disk_index_file = index_filepath; + // medoids, etc. + std::string medoids_file = std::string(_disk_index_file) + "_medoids.bin"; + std::string centroids_file = std::string(_disk_index_file) + "_centroids.bin"; + + std::string labels_file = std::string(_disk_index_file) + "_labels.txt"; + std::string labels_to_medoids = std::string(_disk_index_file) + "_labels_to_medoids.txt"; + std::string dummy_map_file = std::string(_disk_index_file) + "_dummy_map.txt"; + std::string labels_map_file = std::string(_disk_index_file) + "_labels_map.txt"; + + size_t num_pts_in_label_file = 0; + + size_t pq_file_dim = 0, pq_file_num_centroids = 0; +#ifdef EXEC_ENV_OLS + get_bin_metadata(files, pq_table_bin, pq_file_num_centroids, pq_file_dim, METADATA_SIZE); +#else + get_bin_metadata(pq_table_bin, pq_file_num_centroids, pq_file_dim, METADATA_SIZE); +#endif + + this->_disk_index_file = _disk_index_file; + + if (pq_file_num_centroids != 256) + { + diskann::cout << "Got " << pq_file_num_centroids << " PQ centroids, loading from " << pq_table_bin << std::endl; + diskann::cout << "Error. Number of PQ centroids is not 256. Exiting." << std::endl; + return -1; + } + + this->_data_dim = pq_file_dim; + // will change later if we use PQ on disk or if we are using + // inner product without PQ + this->_disk_bytes_per_point = this->_data_dim * sizeof(T); + this->_aligned_dim = ROUND_UP(pq_file_dim, 8); + + size_t npts_u64, nchunks_u64; +#ifdef EXEC_ENV_OLS + diskann::load_bin(files, pq_compressed_vectors, this->data, npts_u64, nchunks_u64); +#else + diskann::load_bin(pq_compressed_vectors, this->data, npts_u64, nchunks_u64); +#endif + + this->_num_points = npts_u64; + this->_n_chunks = nchunks_u64; +#ifdef EXEC_ENV_OLS + if (files.fileExists(labels_file)) + { + FileContent &content_labels = files.getContent(labels_file); + std::stringstream infile(std::string((const char *)content_labels._content, content_labels._size)); +#else + if (file_exists(labels_file)) + { + std::ifstream infile(labels_file, std::ios::binary); + if (infile.fail()) + { + throw diskann::ANNException(std::string("Failed to open file ") + labels_file, -1); + } +#endif + parse_label_file(infile, num_pts_in_label_file); + assert(num_pts_in_label_file == this->_num_points); + +#ifndef EXEC_ENV_OLS + infile.close(); +#endif + +#ifdef EXEC_ENV_OLS + FileContent &content_labels_map = files.getContent(labels_map_file); + std::stringstream map_reader(std::string((const char *)content_labels_map._content, content_labels_map._size)); +#else + std::ifstream map_reader(labels_map_file); +#endif + _label_map = load_label_map(map_reader); + +#ifndef EXEC_ENV_OLS + map_reader.close(); +#endif + +#ifdef EXEC_ENV_OLS + if (files.fileExists(labels_to_medoids)) + { + FileContent &content_labels_to_meoids = files.getContent(labels_to_medoids); + std::stringstream medoid_stream( + std::string((const char *)content_labels_to_meoids._content, content_labels_to_meoids._size)); +#else + if (file_exists(labels_to_medoids)) + { + std::ifstream medoid_stream(labels_to_medoids); + assert(medoid_stream.is_open()); +#endif + std::string line, token; + + _filter_to_medoid_ids.clear(); + try + { + while (std::getline(medoid_stream, line)) + { + std::istringstream iss(line); + uint32_t cnt = 0; + std::vector medoids; + LabelT label; + while (std::getline(iss, token, ',')) + { + if (cnt == 0) + label = (LabelT)std::stoul(token); + else + medoids.push_back((uint32_t)stoul(token)); + cnt++; + } + _filter_to_medoid_ids[label].swap(medoids); + } + } + catch (std::system_error &e) + { + throw FileException(labels_to_medoids, e, __FUNCSIG__, __FILE__, __LINE__); + } + } + std::string univ_label_file = std ::string(_disk_index_file) + "_universal_label.txt"; + +#ifdef EXEC_ENV_OLS + if (files.fileExists(univ_label_file)) + { + FileContent &content_univ_label = files.getContent(univ_label_file); + std::stringstream universal_label_reader( + std::string((const char *)content_univ_label._content, content_univ_label._size)); +#else + if (file_exists(univ_label_file)) + { + std::ifstream universal_label_reader(univ_label_file); + assert(universal_label_reader.is_open()); +#endif + std::string univ_label; + universal_label_reader >> univ_label; +#ifndef EXEC_ENV_OLS + universal_label_reader.close(); +#endif + LabelT label_as_num = (LabelT)std::stoul(univ_label); + set_universal_label(label_as_num); + } + +#ifdef EXEC_ENV_OLS + if (files.fileExists(dummy_map_file)) + { + FileContent &content_dummy_map = files.getContent(dummy_map_file); + std::stringstream dummy_map_stream( + std::string((const char *)content_dummy_map._content, content_dummy_map._size)); +#else + if (file_exists(dummy_map_file)) + { + std::ifstream dummy_map_stream(dummy_map_file); + assert(dummy_map_stream.is_open()); +#endif + std::string line, token; + + while (std::getline(dummy_map_stream, line)) + { + std::istringstream iss(line); + uint32_t cnt = 0; + uint32_t dummy_id; + uint32_t real_id; + while (std::getline(iss, token, ',')) + { + if (cnt == 0) + dummy_id = (uint32_t)stoul(token); + else + real_id = (uint32_t)stoul(token); + cnt++; + } + _dummy_pts.insert(dummy_id); + _has_dummy_pts.insert(real_id); + _dummy_to_real_map[dummy_id] = real_id; + + if (_real_to_dummy_map.find(real_id) == _real_to_dummy_map.end()) + _real_to_dummy_map[real_id] = std::vector(); + + _real_to_dummy_map[real_id].emplace_back(dummy_id); + } +#ifndef EXEC_ENV_OLS + dummy_map_stream.close(); +#endif + diskann::cout << "Loaded dummy map" << std::endl; + } + } + +#ifdef EXEC_ENV_OLS + _pq_table.load_pq_centroid_bin(files, pq_table_bin.c_str(), nchunks_u64); +#else + _pq_table.load_pq_centroid_bin(pq_table_bin.c_str(), nchunks_u64); +#endif + + diskann::cout << "Loaded PQ centroids and in-memory compressed vectors. #points: " << _num_points + << " #dim: " << _data_dim << " #aligned_dim: " << _aligned_dim << " #chunks: " << _n_chunks + << std::endl; + + if (_n_chunks > MAX_PQ_CHUNKS) + { + std::stringstream stream; + stream << "Error loading index. Ensure that max PQ bytes for in-memory " + "PQ data does not exceed " + << MAX_PQ_CHUNKS << std::endl; + throw diskann::ANNException(stream.str(), -1, __FUNCSIG__, __FILE__, __LINE__); + } + + std::string disk_pq_pivots_path = this->_disk_index_file + "_pq_pivots.bin"; +#ifdef EXEC_ENV_OLS + if (files.fileExists(disk_pq_pivots_path)) + { + _use_disk_index_pq = true; + // giving 0 chunks to make the _pq_table infer from the + // chunk_offsets file the correct value + _disk_pq_table.load_pq_centroid_bin(files, disk_pq_pivots_path.c_str(), 0); +#else + if (file_exists(disk_pq_pivots_path)) + { + _use_disk_index_pq = true; + // giving 0 chunks to make the _pq_table infer from the + // chunk_offsets file the correct value + _disk_pq_table.load_pq_centroid_bin(disk_pq_pivots_path.c_str(), 0); +#endif + _disk_pq_n_chunks = _disk_pq_table.get_num_chunks(); + _disk_bytes_per_point = + _disk_pq_n_chunks * sizeof(uint8_t); // revising disk_bytes_per_point since DISK PQ is used. + diskann::cout << "Disk index uses PQ data compressed down to " << _disk_pq_n_chunks << " bytes per point." + << std::endl; + } + +// read index metadata +#ifdef EXEC_ENV_OLS + // This is a bit tricky. We have to read the header from the + // disk_index_file. But this is now exclusively a preserve of the + // DiskPriorityIO class. So, we need to estimate how many + // bytes are needed to store the header and read in that many using our + // 'standard' aligned file reader approach. + reader->open(_disk_index_file); + this->setup_thread_data(num_threads); + this->_max_nthreads = num_threads; + + char *bytes = getHeaderBytes(); + ContentBuf buf(bytes, HEADER_SIZE); + std::basic_istream index_metadata(&buf); +#else + diskann::cout << "Loading index metadata from " << _disk_index_file << std::endl; + std::ifstream index_metadata(_disk_index_file, std::ios::binary); +#endif + + size_t medoid_id_on_file; +#if 1 + if (!_use_partition) + { +#endif + if (!index_metadata.is_open()) + { + diskann::cout << "Error: Could not open index metadata file: " << _disk_index_file << std::endl; + return -1; + } + + uint32_t nr, nc; // metadata itself is stored as bin format (nr is number of + // metadata, nc should be 1) + READ_U32(index_metadata, nr); + READ_U32(index_metadata, nc); + + uint64_t disk_nnodes; + uint64_t disk_ndims; // can be disk PQ dim if disk_PQ is set to true + READ_U64(index_metadata, disk_nnodes); + READ_U64(index_metadata, disk_ndims); + + if (disk_nnodes != _num_points) + { + diskann::cout << "Mismatch in #points for compressed data file and disk " + "index file: " + << disk_nnodes << " vs " << _num_points << std::endl; + return -1; + } + + READ_U64(index_metadata, medoid_id_on_file); + READ_U64(index_metadata, _max_node_len); + READ_U64(index_metadata, _nnodes_per_sector); + _max_degree = ((_max_node_len - _disk_bytes_per_point) / sizeof(uint32_t)) - 1; + + if (_max_degree > defaults::MAX_GRAPH_DEGREE) + { + std::stringstream stream; + stream << "Error loading index. Ensure that max graph degree (R) does " + "not exceed " + << defaults::MAX_GRAPH_DEGREE << std::endl; + throw diskann::ANNException(stream.str(), -1, __FUNCSIG__, __FILE__, __LINE__); + } + + // setting up concept of frozen points in disk index for streaming-DiskANN + READ_U64(index_metadata, this->_num_frozen_points); + uint64_t file_frozen_id; + READ_U64(index_metadata, file_frozen_id); + if (this->_num_frozen_points == 1) + this->_frozen_location = file_frozen_id; + if (this->_num_frozen_points == 1) + { + diskann::cout << " Detected frozen point in index at location " << this->_frozen_location + << ". Will not output it at search time." << std::endl; + } + + READ_U64(index_metadata, this->_reorder_data_exists); + if (this->_reorder_data_exists) + { + if (this->_use_disk_index_pq == false) + { + throw ANNException("Reordering is designed for used with disk PQ " + "compression option", + -1, __FUNCSIG__, __FILE__, __LINE__); + } + READ_U64(index_metadata, this->_reorder_data_start_sector); + READ_U64(index_metadata, this->_ndims_reorder_vecs); + READ_U64(index_metadata, this->_nvecs_per_sector); + } + + diskann::cout << "Disk-Index File Meta-data: "; + diskann::cout << "# nodes per sector: " << _nnodes_per_sector; + diskann::cout << ", max node len (bytes): " << _max_node_len; + diskann::cout << ", max node degree: " << _max_degree << std::endl; + +#ifdef EXEC_ENV_OLS + delete[] bytes; +#else + index_metadata.close(); +#endif + +#ifndef EXEC_ENV_OLS + // open AlignedFileReader handle to index_file + std::string index_fname(_disk_index_file); + reader->open(index_fname); + + diskann::cout << "Disk-Index Meta: nodes per sector: " << _nnodes_per_sector + << ", max node len: " << _max_node_len << ", max node degree: " << _max_degree << std::endl; + +#endif + +#if 1 + } +#endif + + this->setup_thread_data(num_threads); + this->_max_nthreads = num_threads; + +#ifdef EXEC_ENV_OLS + if (files.fileExists(medoids_file)) + { + size_t tmp_dim; + diskann::load_bin(files, norm_file, medoids_file, _medoids, _num_medoids, tmp_dim); +#else + if (file_exists(medoids_file)) + { + size_t tmp_dim; + diskann::load_bin(medoids_file, _medoids, _num_medoids, tmp_dim); +#endif + + if (tmp_dim != 1) + { + std::stringstream stream; + stream << "Error loading medoids file. Expected bin format of m times " + "1 vector of uint32_t." + << std::endl; + throw diskann::ANNException(stream.str(), -1, __FUNCSIG__, __FILE__, __LINE__); + } +#ifdef EXEC_ENV_OLS + if (!files.fileExists(centroids_file)) + { +#else + if (!file_exists(centroids_file)) + { +#endif + diskann::cout << "Centroid data file not found. Using corresponding vectors " + "for the medoids " + << std::endl; + use_medoids_data_as_centroids(); + } + else + { + size_t num_centroids, aligned_tmp_dim; +#ifdef EXEC_ENV_OLS + diskann::load_aligned_bin(files, centroids_file, _centroid_data, num_centroids, tmp_dim, + aligned_tmp_dim); +#else + diskann::load_aligned_bin(centroids_file, _centroid_data, num_centroids, tmp_dim, aligned_tmp_dim); +#endif + if (aligned_tmp_dim != _aligned_dim || num_centroids != _num_medoids) + { + std::stringstream stream; + stream << "Error loading centroids data file. Expected bin format " + "of " + "m times data_dim vector of float, where m is number of " + "medoids " + "in medoids file."; + diskann::cerr << stream.str() << std::endl; + throw diskann::ANNException(stream.str(), -1, __FUNCSIG__, __FILE__, __LINE__); + } + } + } + else + { + if (_use_partition) + { + assert(false); // We do not have a valid medoid id in the partition file. + } + _num_medoids = 1; + _medoids = new uint32_t[1]; + _medoids[0] = (uint32_t)(medoid_id_on_file); + use_medoids_data_as_centroids(); + } + + std::string norm_file = std::string(_disk_index_file) + "_max_base_norm.bin"; + +#ifdef EXEC_ENV_OLS + if (files.fileExists(norm_file) && metric == diskann::Metric::INNER_PRODUCT) + { + uint64_t dumr, dumc; + float *norm_val; + diskann::load_bin(files, norm_val, dumr, dumc); +#else + if (file_exists(norm_file) && metric == diskann::Metric::INNER_PRODUCT) + { + size_t dumr, dumc; + float *norm_val; + diskann::load_bin(norm_file, norm_val, dumr, dumc); +#endif + this->_max_base_norm = norm_val[0]; + diskann::cout << "Setting re-scaling factor of base vectors to " << this->_max_base_norm << std::endl; + delete[] norm_val; + } + + if (_use_partition) + { + read_partition_info(partition_file); + + this->_graph_index_file = graph_file; + graph_reader->open(this->_graph_index_file); + load_graph_index(this->_graph_index_file); + } + + diskann::cout << "load_from_separate_paths done." << std::endl; + return 0; +} + +#ifdef USE_BING_INFRA +bool getNextCompletedRequest(std::shared_ptr &reader, IOContext &ctx, size_t size, + int &completedIndex) +{ + if ((*ctx.m_pRequests)[0].m_callback) + { + bool waitsRemaining = false; + long completeCount = ctx.m_completeCount; + do + { + for (int i = 0; i < size; i++) + { + auto ithStatus = (*ctx.m_pRequestsStatus)[i]; + if (ithStatus == IOContext::Status::READ_SUCCESS) + { + completedIndex = i; + return true; + } + else if (ithStatus == IOContext::Status::READ_WAIT) + { + waitsRemaining = true; + } + } + + // if we didn't find one in READ_SUCCESS, wait for one to complete. + if (waitsRemaining) + { + WaitOnAddress(&ctx.m_completeCount, &completeCount, sizeof(completeCount), 100); + // this assumes the knowledge of the reader behavior (implicit + // contract). need better factoring? + } + } while (waitsRemaining); + + completedIndex = -1; + return false; + } + else + { + reader->wait(ctx, completedIndex); + return completedIndex != -1; + } +} +#endif + +template +void PQFlashIndex::cached_beam_search(const T *query1, const uint64_t k_search, const uint64_t l_search, + uint64_t *indices, float *distances, const uint64_t beam_width, + const bool use_reorder_data, QueryStats *stats, + bool USE_DEFERRED_FETCH, bool skip_search_reorder, + bool recompute_beighbor_embeddings, bool dedup_node_dis, + float prune_ratio, const bool batch_recompute, bool global_pruning) +{ + cached_beam_search(query1, k_search, l_search, indices, distances, beam_width, std::numeric_limits::max(), + use_reorder_data, stats, USE_DEFERRED_FETCH, skip_search_reorder, recompute_beighbor_embeddings, + dedup_node_dis, prune_ratio, batch_recompute, global_pruning); +} + +template +void PQFlashIndex::cached_beam_search(const T *query1, const uint64_t k_search, const uint64_t l_search, + uint64_t *indices, float *distances, const uint64_t beam_width, + const bool use_filter, const LabelT &filter_label, + const bool use_reorder_data, QueryStats *stats, + bool USE_DEFERRED_FETCH, bool skip_search_reorder, + bool recompute_beighbor_embeddings, bool dedup_node_dis, + float prune_ratio, const bool batch_recompute, bool global_pruning) +{ + cached_beam_search(query1, k_search, l_search, indices, distances, beam_width, use_filter, filter_label, + std::numeric_limits::max(), use_reorder_data, stats, USE_DEFERRED_FETCH, + skip_search_reorder, recompute_beighbor_embeddings, dedup_node_dis, prune_ratio, batch_recompute, + global_pruning); +} + +template +void PQFlashIndex::cached_beam_search(const T *query1, const uint64_t k_search, const uint64_t l_search, + uint64_t *indices, float *distances, const uint64_t beam_width, + const uint32_t io_limit, const bool use_reorder_data, + QueryStats *stats, bool USE_DEFERRED_FETCH, bool skip_search_reorder, + bool recompute_beighbor_embeddings, bool dedup_node_dis, + float prune_ratio, const bool batch_recompute, bool global_pruning) +{ + LabelT dummy_filter = 0; + cached_beam_search(query1, k_search, l_search, indices, distances, beam_width, false, dummy_filter, io_limit, + use_reorder_data, stats, USE_DEFERRED_FETCH, skip_search_reorder, recompute_beighbor_embeddings, + dedup_node_dis, prune_ratio, batch_recompute, global_pruning); +} + +// A helper callback for cURL +static size_t WriteCallback(void *contents, size_t size, size_t nmemb, void *userp) +{ + ((std::string *)userp)->append((char *)contents, size * nmemb); + return size * nmemb; +} + +static void *g_zmq_context = zmq_ctx_new(); + +struct ZmqContextManager +{ + ~ZmqContextManager() + { + if (g_zmq_context) + { + zmq_ctx_destroy(g_zmq_context); + g_zmq_context = nullptr; + } + } +}; +static ZmqContextManager g_zmq_manager; + +bool fetch_embeddings_zmq(const std::vector &node_ids, std::vector> &out_embeddings, + int zmq_port) +{ + // 1. Protobuf 序列化:创建请求消息 + protoembedding::NodeEmbeddingRequest req_proto; + for (const auto id : node_ids) + { + req_proto.add_node_ids(id); + } + std::string req_str; + if (!req_proto.SerializeToString(&req_str)) + { + std::cerr << "ZMQ_FETCH_ERROR: Failed to serialize NodeEmbeddingRequest.\n"; + return false; + } + + // 2. 使用线程本地(thread_local)的 Socket,实现连接复用 + // 每个线程将拥有自己独立的、持久化的 Socket + thread_local void *tl_socket = nullptr; + + // 如果当前线程的 Socket 还未创建,则初始化并连接 + if (tl_socket == nullptr) + { + // 从全局 Context 创建 Socket + tl_socket = zmq_socket(g_zmq_context, ZMQ_REQ); + if (!tl_socket) + { + std::cerr << "ZMQ_FETCH_ERROR: zmq_socket() failed: " << zmq_strerror(zmq_errno()) << "\n"; + return false; + } + + int timeout = 30000; // 30 秒超时 + zmq_setsockopt(tl_socket, ZMQ_RCVTIMEO, &timeout, sizeof(timeout)); + zmq_setsockopt(tl_socket, ZMQ_SNDTIMEO, &timeout, sizeof(timeout)); + + std::string endpoint = "tcp://127.0.0.1:" + std::to_string(zmq_port); + if (zmq_connect(tl_socket, endpoint.c_str()) != 0) + { + std::cerr << "ZMQ_FETCH_ERROR: zmq_connect() to " << endpoint << " failed: " << zmq_strerror(zmq_errno()) + << "\n"; + zmq_close(tl_socket); + tl_socket = nullptr; // 重置为空指针,以便下次调用时可以尝试重建 + return false; + } + } + + // 3. 使用已建立的连接发送请求 + if (zmq_send(tl_socket, req_str.data(), req_str.size(), 0) < 0) + { + std::cerr << "ZMQ_FETCH_ERROR: zmq_send() failed: " << zmq_strerror(zmq_errno()) << "\n"; + zmq_close(tl_socket); // 连接可能已失效,关闭它 + tl_socket = nullptr; // 重置,强制下次重建 + return false; + } + + // 4. 接收响应 + zmq_msg_t response_msg; + zmq_msg_init(&response_msg); + bool success = true; + + if (zmq_msg_recv(&response_msg, tl_socket, 0) < 0) + { + std::cerr << "ZMQ_FETCH_ERROR: zmq_msg_recv() failed: " << zmq_strerror(zmq_errno()) << "\n"; + zmq_close(tl_socket); // 同样,接收超时后连接也可能无效 + tl_socket = nullptr; // 重置,强制下次重建 + success = false; + } + else + { + // 5. Protobuf 反序列化并提取数据 + protoembedding::NodeEmbeddingResponse resp_proto; + if (!resp_proto.ParseFromArray(zmq_msg_data(&response_msg), static_cast(zmq_msg_size(&response_msg)))) + { + std::cerr << "ZMQ_FETCH_ERROR: Failed to parse NodeEmbeddingResponse from server.\n"; + success = false; + } + else + { + if (resp_proto.dimensions_size() == 2) + { + int batch_size = resp_proto.dimensions(0); + int embedding_dim = resp_proto.dimensions(1); + const std::string &emb_data = resp_proto.embeddings_data(); + size_t expected_bytes = (size_t)batch_size * embedding_dim * sizeof(float); + + if (batch_size >= 0 && emb_data.size() == expected_bytes) + { + out_embeddings.resize(batch_size); + if (batch_size > 0) + { + const float *float_data = reinterpret_cast(emb_data.data()); + for (int i = 0; i < batch_size; ++i) + { + out_embeddings[i].resize(embedding_dim); + std::memcpy(out_embeddings[i].data(), float_data + (size_t)i * embedding_dim, + embedding_dim * sizeof(float)); + } + } + } + else + { + std::cerr << "ZMQ_FETCH_ERROR: Embedding data size mismatch. Expected " << expected_bytes + << " bytes, got " << emb_data.size() << ".\n"; + success = false; + } + } + else + { + std::cerr << "ZMQ_FETCH_ERROR: Server response has invalid dimensions size.\n"; + success = false; + } + } + } + + // 6. 清理消息对象,但保持 Socket 和 Context 开放以备下次复用 + zmq_msg_close(&response_msg); + + return success; +} + +/** + * fetch_embeddings_http: Function for backward compatibility, now uses ZMQ exclusively + */ +bool fetch_embeddings_http(const std::vector &node_ids, std::vector> &out_embeddings) +{ + // Use ZMQ implementation exclusively + return fetch_embeddings_zmq(node_ids, out_embeddings, 5555); +} + +//! Should be aligned with utils.h::prepare_base_for_inner_products +void preprocess_fetched_embeddings(std::vector> &embeddings, diskann::Metric metric, + float max_base_norm, uint32_t data_dim) +{ + for (auto &emb : embeddings) + { + // Ensure embedding has correct size + if (emb.size() < data_dim - 1) + { + // Pad with zeros if needed + emb.resize(data_dim - 1, 0); + } + + if (metric == diskann::Metric::INNER_PRODUCT) + { + // For inner product, apply same preprocessing as in prepare_base_for_inner_products + + // Calculate original norm + float norm_sq = 0; + for (size_t i = 0; i < data_dim - 1; i++) + { + norm_sq += emb[i] * emb[i]; + } + + // Normalize by max_base_norm (same as in index construction) + for (size_t i = 0; i < data_dim - 1; i++) + { + emb[i] /= max_base_norm; + } + + // Add the extra coordinate for MIPS->L2 conversion + float res = 1 - (norm_sq / (max_base_norm * max_base_norm)); + res = res <= 0 ? 0 : std::sqrt(res); + emb.resize(data_dim, res); + } + else if (metric == diskann::Metric::COSINE) + { + // For cosine similarity, just normalize the vector + float norm = 0; + for (auto val : emb) + { + norm += val * val; + } + norm = std::sqrt(norm); + + if (norm > 0) + { + for (size_t i = 0; i < emb.size(); i++) + { + emb[i] /= norm; + } + } + } + // For L2, no preprocessing needed + } +} + +template +void PQFlashIndex::cached_beam_search(const T *query1, const uint64_t k_search, const uint64_t l_search, + uint64_t *indices, float *distances, const uint64_t beam_width, + const bool use_filter, const LabelT &filter_label, + const uint32_t io_limit, const bool use_reorder_data, + QueryStats *stats, bool USE_DEFERRED_FETCH, bool skip_search_reorder, + bool recompute_beighbor_embeddings, const bool dedup_node_dis, + float prune_ratio, const bool batch_recompute, bool global_pruning) +{ + // printf("cached_beam_search\n"); + // diskann::cout << "cached_beam_search" << std::endl; + // diskann out prune_ratio + prune_ratio = 1 - prune_ratio; + diskann::cout << "reserve ratio: " << prune_ratio << std::endl; + // prune_ratio = 0.8; + uint64_t num_sector_per_nodes = DIV_ROUND_UP(_max_node_len, defaults::SECTOR_LEN); + if (beam_width > num_sector_per_nodes * defaults::MAX_N_SECTOR_READS) + throw ANNException("Beamwidth can not be higher than defaults::MAX_N_SECTOR_READS", -1, __FUNCSIG__, __FILE__, + __LINE__); + + ScratchStoreManager> manager(this->_thread_data); + auto data = manager.scratch_space(); + IOContext &ctx = data->ctx; + auto query_scratch = &(data->scratch); + auto pq_query_scratch = query_scratch->pq_scratch(); + + // reset query scratch + query_scratch->reset(); + + // copy query to thread specific aligned and allocated memory (for distance + // calculations we need aligned data) + float query_norm = 0; + T *aligned_query_T = query_scratch->aligned_query_T(); + float *query_float = pq_query_scratch->aligned_query_float; + float *query_rotated = pq_query_scratch->rotated_query; + + // Add cache hit tracking variables + uint64_t total_nodes_requested = 0; + uint64_t total_nodes_from_cache = 0; + + // normalization step. for cosine, we simply normalize the query + // for mips, we normalize the first d-1 dims, and add a 0 for last dim, since an extra coordinate was used to + // convert MIPS to L2 search + if (metric == diskann::Metric::INNER_PRODUCT || metric == diskann::Metric::COSINE) + { + uint64_t inherent_dim = (metric == diskann::Metric::COSINE) ? this->_data_dim : (uint64_t)(this->_data_dim - 1); + for (size_t i = 0; i < inherent_dim; i++) + { + aligned_query_T[i] = query1[i]; + query_norm += query1[i] * query1[i]; + } + if (metric == diskann::Metric::INNER_PRODUCT) + aligned_query_T[this->_data_dim - 1] = 0; + + query_norm = std::sqrt(query_norm); + + for (size_t i = 0; i < inherent_dim; i++) + { + aligned_query_T[i] = (T)(aligned_query_T[i] / query_norm); + } + pq_query_scratch->initialize(this->_data_dim, aligned_query_T); + } + else + { + for (size_t i = 0; i < this->_data_dim; i++) + { + aligned_query_T[i] = query1[i]; + } + pq_query_scratch->initialize(this->_data_dim, aligned_query_T); + } + + // pointers to buffers for data + T *data_buf = query_scratch->coord_scratch; + _mm_prefetch((char *)data_buf, _MM_HINT_T1); + + // sector scratch + char *sector_scratch = query_scratch->sector_scratch; + size_t §or_scratch_idx = query_scratch->sector_idx; + const uint64_t num_sectors_per_node = + _nnodes_per_sector > 0 ? 1 : DIV_ROUND_UP(_max_node_len, defaults::SECTOR_LEN); + + // query <-> PQ chunk centers distances + _pq_table.preprocess_query(query_rotated); // center the query and rotate if + // we have a rotation matrix + float *pq_dists = pq_query_scratch->aligned_pqtable_dist_scratch; + _pq_table.populate_chunk_distances(query_rotated, pq_dists); + // Preprocess Distance b/w Query Vector and Centroids + // Chunk 1 | Chunk 2 | Chunk 3 + // Centroid 1 d[1][1] d[1][2] d[1][3] + // Centroid 2 + // Centroid 3 + // Centroid 4 + // Centroid 5 + // Centroid 6 + // Centroid 7 + // Centroid 8 + + // query <-> neighbor list + float *dist_scratch = pq_query_scratch->aligned_dist_scratch; + uint8_t *pq_coord_scratch = pq_query_scratch->aligned_pq_coord_scratch; + + std::map node_distances; + + // Lambda to batch compute query<->node distances in PQ space + auto compute_dists = [this, pq_coord_scratch, pq_dists, aligned_query_T, recompute_beighbor_embeddings, data_buf, + &node_distances, &total_nodes_requested, &total_nodes_from_cache, + dedup_node_dis](const uint32_t *ids, const uint64_t n_ids, float *dists_out) { + // Vector[0], {3, 6, 2} + // Distance = d[3][1] + d[6][2] + d[2][3] + // recompute_beighbor_embeddings = true; + if (!recompute_beighbor_embeddings) + { + diskann::aggregate_coords(ids, n_ids, this->data, this->_n_chunks, pq_coord_scratch); + diskann::pq_dist_lookup(pq_coord_scratch, n_ids, this->_n_chunks, pq_dists, dists_out); + } + else + { + // Fetch the embeddings from the embedding server using n_ids + std::vector node_ids; + + // Update total nodes requested counter + total_nodes_requested += n_ids; + + // Build a map from node_id to original position for O(1) lookup + // Handle deduplication if enabled + std::vector cached_node_idx(n_ids, false); + if (dedup_node_dis) + { + // First pass: use cached distances where available + for (size_t i = 0; i < n_ids; i++) + { + if (node_distances.find(ids[i]) != node_distances.end()) + { + // Use cached distance + dists_out[i] = node_distances[ids[i]]; + cached_node_idx[i] = true; + total_nodes_from_cache++; // Count cache hits + } + else + { + // Not in cache, need to compute + node_ids.push_back(ids[i]); + } + } + + // If all distances are cached, we can return early + if (node_ids.empty()) + return; + } + else + { + node_ids = std::vector(ids, ids + n_ids); + } + + // Fetch embeddings from the embedding server + std::vector> embeddings; + bool success = fetch_embeddings_http(node_ids, embeddings); + + if (!success || embeddings.size() != node_ids.size()) + { + diskann::cout << "Failed to fetch embeddings from the embedding server" << std::endl; + // Fallback to PQ-based distance computation if fetching fails + diskann::aggregate_coords(ids, n_ids, this->data, this->_n_chunks, pq_coord_scratch); + diskann::pq_dist_lookup(pq_coord_scratch, n_ids, this->_n_chunks, pq_dists, dists_out); + return; + } + + // Preprocess the fetched embeddings to match the format used in diskann + preprocess_fetched_embeddings(embeddings, this->metric, this->_max_base_norm, this->_data_dim); + + // Compute distances for fetched embeddings + if (dedup_node_dis) + { + // Process each node that needs computation + uint32_t idx = 0; + for (size_t i = 0; i < n_ids; i++) + { + if (cached_node_idx[i]) + { + continue; + } + // Prepare embedding for distance computation + embeddings[idx].resize(this->_aligned_dim, 0); + memcpy(data_buf, embeddings[idx].data(), this->_aligned_dim * sizeof(T)); + + // Compute distance + float distance = + this->_dist_cmp->compare(aligned_query_T, data_buf, static_cast(this->_aligned_dim)); + + // Store results + dists_out[i] = distance; + node_distances[node_ids[i]] = distance; + idx++; + } + } + else + { + // Without deduplication, embeddings match the original order + for (size_t i = 0; i < n_ids; i++) + { + // Prepare embedding for distance computation + embeddings[i].resize(this->_aligned_dim, 0); + memcpy(data_buf, embeddings[i].data(), this->_aligned_dim * sizeof(T)); + + // Compute distance + float distance = + this->_dist_cmp->compare(aligned_query_T, data_buf, static_cast(this->_aligned_dim)); + + // Store results + dists_out[i] = distance; + } + } + } + }; + + // Add logic of global pruning + // Using a priority queue to record the PQ distance - use min heap for nearest neighbors + std::priority_queue, std::vector>, + std::greater>> + aq_priority_queue; + tsl::robin_set &visited = query_scratch->visited; + + // TODO: implement this function + // 1. Based on some heristic to prune the node_nbrs and nnbrs that is not promising + // 1.1 heruistic 1: use higher compression PQ to prune the node_nbrs and nnbrs that is not promising in path + // /powerrag/scaling_out/embeddings/facebook/contriever-msmarco/rpj_wiki/compressed_2/ + // 1.2 heruistic 2: use a lightweight reranker to rerank the node_nbrs and nnbrs that is not promising + auto prune_node_nbrs = [this, pq_coord_scratch, pq_dists, recompute_beighbor_embeddings, dedup_node_dis, + prune_ratio, global_pruning, &aq_priority_queue, + &visited](uint32_t *&node_nbrs, uint64_t &nnbrs) { + if (!recompute_beighbor_embeddings) + { + return; + } + if (nnbrs <= 10) + { + // Don't prune if there are very few neighbors + return; + } + + // Allocate space for distance calculations + float *dists_out = new float[nnbrs]; + + // Compute distances using PQ directly instead of compute_dists + diskann::aggregate_coords(node_nbrs, nnbrs, this->data, this->_n_chunks, pq_coord_scratch); + diskann::pq_dist_lookup(pq_coord_scratch, nnbrs, this->_n_chunks, pq_dists, dists_out); + + if (global_pruning) + { + // Add the distance and node_id to the priority queue + for (uint64_t i = 0; i < nnbrs; i++) + { + aq_priority_queue.push(std::make_pair(dists_out[i], node_nbrs[i])); + } + // select all ratio=prune_ratio in aq_priority_queue but need to check if the node_id is already visited, + // dont need to pop + std::vector> promising_nodes; + + std::vector> roll_back_nodes; + // 1. visit top prune_ratio*length of aq_priority_queue nodes in aq_priority_queue and put the node_id not + // visited into a vector + uint64_t original_size = aq_priority_queue.size(); + for (uint64_t i = 0; i < prune_ratio * original_size; i++) + { + auto top_node = aq_priority_queue.top(); + roll_back_nodes.push_back(top_node); + aq_priority_queue.pop(); + if (visited.find(top_node.second) == visited.end()) + { + float distance = top_node.first; + uint32_t node_id = top_node.second; + promising_nodes.push_back(std::make_pair(distance, node_id)); + } + } + // push all roll_back_nodes back to aq_priority_queue + for (uint64_t i = 0; i < roll_back_nodes.size(); i++) + { + aq_priority_queue.push(roll_back_nodes[i]); + } + + // 2. assing the node_id and distance to node_nbrs and nnbrs + for (uint64_t i = 0; i < promising_nodes.size(); i++) + { + node_nbrs[i] = promising_nodes[i].second; + } + nnbrs = promising_nodes.size(); + // then return corresponding node_nbrs and nnbrs + + delete[] dists_out; + return; + } + // Create a vector of pairs (node_id, distance) + std::vector> scored_nbrs; + scored_nbrs.reserve(nnbrs); + + for (uint64_t i = 0; i < nnbrs; i++) + { + scored_nbrs.emplace_back(node_nbrs[i], dists_out[i]); + } + + // Sort by distance (lower is better) + std::sort(scored_nbrs.begin(), scored_nbrs.end(), + [](const std::pair &a, const std::pair &b) { + return a.second < b.second; + }); + + // Keep only the top portion of neighbors based on prune_ratio (or at least 10) + uint64_t new_nnbrs = std::max(10UL, static_cast(nnbrs * prune_ratio)); + if (new_nnbrs < nnbrs) + { + // Update the original node_nbrs array with pruned neighbors + for (uint64_t i = 0; i < new_nnbrs; i++) + { + node_nbrs[i] = scored_nbrs[i].first; + } + + // Update the count of neighbors + nnbrs = new_nnbrs; + } + + // Free the allocated memory + delete[] dists_out; + }; + Timer query_timer, io_timer, cpu_timer; + + NeighborPriorityQueue &retset = query_scratch->retset; + retset.reserve(l_search); + std::vector &full_retset = query_scratch->full_retset; + std::vector points_to_compute; // Store points for later embedding computation + +#if 0 + std::vector exact_dist_retset; + std::vector> exact_embeddings; +#endif + + uint32_t best_medoid = 0; + float best_dist = (std::numeric_limits::max)(); + if (!use_filter) + { + for (uint64_t cur_m = 0; cur_m < _num_medoids; cur_m++) + { + float cur_expanded_dist = + _dist_cmp_float->compare(query_float, _centroid_data + _aligned_dim * cur_m, (uint32_t)_aligned_dim); + if (cur_expanded_dist < best_dist) + { + best_medoid = _medoids[cur_m]; + best_dist = cur_expanded_dist; + } + } + } + else + { + if (_filter_to_medoid_ids.find(filter_label) != _filter_to_medoid_ids.end()) + { + const auto &medoid_ids = _filter_to_medoid_ids[filter_label]; + for (uint64_t cur_m = 0; cur_m < medoid_ids.size(); cur_m++) + { + // for filtered index, we dont store global centroid data as for unfiltered index, so we use PQ distance + // as approximation to decide closest medoid matching the query filter. + compute_dists(&medoid_ids[cur_m], 1, dist_scratch); + float cur_expanded_dist = dist_scratch[0]; + if (cur_expanded_dist < best_dist) + { + best_medoid = medoid_ids[cur_m]; + best_dist = cur_expanded_dist; + } + } + } + else + { + throw ANNException("Cannot find medoid for specified filter.", -1, __FUNCSIG__, __FILE__, __LINE__); + } + } + + compute_dists(&best_medoid, 1, dist_scratch); + retset.insert(Neighbor(best_medoid, dist_scratch[0])); + visited.insert(best_medoid); + + uint32_t cmps = 0; + uint32_t hops = 0; + uint32_t num_ios = 0; + + // cleared every iteration + std::vector frontier; + frontier.reserve(2 * beam_width); + std::vector> frontier_nhoods; + frontier_nhoods.reserve(2 * beam_width); + std::vector frontier_read_reqs; + frontier_read_reqs.reserve(2 * beam_width); + std::vector>> cached_nhoods; + cached_nhoods.reserve(2 * beam_width); + + float *batched_dists = nullptr; + if (batch_recompute) + { + batched_dists = new float[_max_degree * beam_width + 5]; + } + + while (retset.has_unexpanded_node() && num_ios < io_limit) + { + // clear iteration state + frontier.clear(); + frontier_nhoods.clear(); + frontier_read_reqs.clear(); + cached_nhoods.clear(); + sector_scratch_idx = 0; + // find new beam + uint32_t num_seen = 0; + while (retset.has_unexpanded_node() && frontier.size() < beam_width && num_seen < beam_width) + { + auto nbr = retset.closest_unexpanded(); + num_seen++; + auto iter = _nhood_cache.find(nbr.id); + if (iter != _nhood_cache.end()) + { + cached_nhoods.push_back(std::make_pair(nbr.id, iter->second)); + if (stats != nullptr) + { + stats->n_cache_hits++; + } + } + else + { + frontier.push_back(nbr.id); + } + if (this->_count_visited_nodes) + { + reinterpret_cast &>(this->_node_visit_counter[nbr.id].second).fetch_add(1); + } + } + + std::vector graph_read_reqs; + std::map node_offsets; // id -> offset + std::map> node_nbrs_ori; + std::map> node_cords; + + // read nhoods of frontier ids + if (!frontier.empty()) + { + if (stats != nullptr) + stats->n_hops++; + + for (uint64_t i = 0; i < frontier.size(); i++) + { + auto id = frontier[i]; + std::pair fnhood; + fnhood.first = id; + fnhood.second = sector_scratch + num_sectors_per_node * sector_scratch_idx * defaults::SECTOR_LEN; + sector_scratch_idx++; + frontier_nhoods.push_back(fnhood); +#if 1 + if (!_use_partition) + { +#endif + frontier_read_reqs.emplace_back(get_node_sector((size_t)id) * defaults::SECTOR_LEN, + num_sectors_per_node * defaults::SECTOR_LEN, fnhood.second); +#if 1 + } +#endif + if (stats != nullptr) + { + stats->n_4k++; + stats->n_ios++; + } + num_ios++; + } + + if (_use_partition) + { + sector_scratch_idx = 0; + for (auto &frontier_nhood : frontier_nhoods) + { + uint32_t node_id = frontier_nhood.first; + uint32_t partition_id = _id2partition[node_id]; + if (partition_id >= _num_partitions) + { + diskann::cout << "Warning: partition_id is invalid: " << partition_id << std::endl; + assert(false); + } + + std::vector part_list = _graph_partitions[partition_id]; + auto it = std::find(part_list.begin(), part_list.end(), node_id); + if (it == part_list.end()) + { + diskann::cerr << "Error: node " << node_id << " not found in partition " << partition_id + << std::endl; + assert(false); + } + size_t j = std::distance(part_list.begin(), it); + node_offsets[node_id] = j; + + uint64_t sector_offset = (partition_id + 1) * defaults::SECTOR_LEN; + // ! Keep it same with frontier_nhood.second + char *sector_buffer = sector_scratch + sector_scratch_idx * defaults::SECTOR_LEN; + sector_scratch_idx++; + + AlignedRead partition_read; + partition_read.len = defaults::SECTOR_LEN; + partition_read.buf = sector_buffer; + partition_read.offset = sector_offset; + + graph_read_reqs.emplace_back(partition_read); + } + } + + io_timer.reset(); +#if 1 + if (!_use_partition) + { +#endif +#ifdef USE_BING_INFRA + reader->read(frontier_read_reqs, ctx, + true); // asynhronous reader for Bing. +#else + reader->read(frontier_read_reqs, ctx); // synchronous IO linux +#endif +#if 1 + } +#endif + +#if 0 + for (auto &[node_id, disk_buf] : frontier_nhoods) + { + char *node_disk_buf = offset_to_node(disk_buf, node_id); + uint32_t *nhood_buf = offset_to_node_nhood(node_disk_buf); + uint32_t neighbor_count = *nhood_buf; + node_nbrs_ori[node_id] = std::vector(nhood_buf + 1, nhood_buf + 1 + neighbor_count); + node_cords[node_id] = + std::vector(offset_to_node_coords(node_disk_buf), + offset_to_node_coords(node_disk_buf) + _disk_bytes_per_point / sizeof(float)); + } +#endif + if (_use_partition) + { + graph_reader->read(graph_read_reqs, ctx); + } + + if (stats != nullptr) + { + stats->io_us += (float)io_timer.elapsed(); + } + } + + // process cached nhoods + for (auto &cached_nhood : cached_nhoods) + { + auto global_cache_iter = _coord_cache.find(cached_nhood.first); + uint32_t node_id = cached_nhood.first; + T *node_fp_coords_copy = global_cache_iter->second; + float cur_expanded_dist; + float exact_expanded_dist = 0; + + if (skip_search_reorder) + { + compute_dists(&node_id, 1, dist_scratch); + cur_expanded_dist = dist_scratch[0]; + } + else if (USE_DEFERRED_FETCH) + { + cur_expanded_dist = 0.0f; + } + else if (!_use_disk_index_pq) + { + cur_expanded_dist = _dist_cmp->compare(aligned_query_T, node_fp_coords_copy, (uint32_t)_aligned_dim); + } + else + { + if (metric == diskann::Metric::INNER_PRODUCT) + cur_expanded_dist = _disk_pq_table.inner_product(query_float, (uint8_t *)node_fp_coords_copy); + else + cur_expanded_dist = _disk_pq_table.l2_distance( // disk_pq does not support OPQ yet + query_float, (uint8_t *)node_fp_coords_copy); + } + full_retset.push_back(Neighbor(node_id, cur_expanded_dist)); + +#if 0 + if (!_use_disk_index_pq) + { + exact_expanded_dist = _dist_cmp->compare(aligned_query_T, node_fp_coords_copy, (uint32_t)_aligned_dim); + } + else + { + if (metric == diskann::Metric::INNER_PRODUCT) + exact_expanded_dist = _disk_pq_table.inner_product(query_float, (uint8_t *)node_fp_coords_copy); + else + exact_expanded_dist = _disk_pq_table.l2_distance(query_float, (uint8_t *)node_fp_coords_copy); + } + exact_dist_retset.push_back(Neighbor(node_id, exact_expanded_dist)); + exact_embeddings.push_back(std::vector(node_fp_coords_copy, node_fp_coords_copy + _aligned_dim)); +#endif + + uint64_t nnbrs = cached_nhood.second.first; + uint32_t *node_nbrs = cached_nhood.second.second; + + // compute node_nbrs <-> query dists in PQ space + cpu_timer.reset(); + compute_dists(node_nbrs, nnbrs, dist_scratch); + if (stats != nullptr) + { + stats->n_cmps += (uint32_t)nnbrs; + stats->cpu_us += (float)cpu_timer.elapsed(); + } + + // process prefetched nhood + for (uint64_t m = 0; m < nnbrs; ++m) + { + uint32_t id = node_nbrs[m]; + if (visited.insert(id).second) + { + if (!use_filter && _dummy_pts.find(id) != _dummy_pts.end()) + continue; + + if (use_filter && !(point_has_label(id, filter_label)) && + (!_use_universal_label || !point_has_label(id, _universal_filter_label))) + continue; + cmps++; + float dist = dist_scratch[m]; + Neighbor nn(id, dist); + retset.insert(nn); + } + } + } +#ifdef USE_BING_INFRA + // process each frontier nhood - compute distances to unvisited nodes + int completedIndex = -1; + long requestCount = static_cast(frontier_read_reqs.size()); + // If we issued read requests and if a read is complete or there are + // reads in wait state, then enter the while loop. + while (requestCount > 0 && getNextCompletedRequest(reader, ctx, requestCount, completedIndex)) + { + assert(completedIndex >= 0); + auto &frontier_nhood = frontier_nhoods[completedIndex]; + (*ctx.m_pRequestsStatus)[completedIndex] = IOContext::PROCESS_COMPLETE; +#else + std::vector batched_node_ids; + + for (auto &frontier_nhood : frontier_nhoods) + { +#endif + uint32_t node_id = frontier_nhood.first; + char *disk_buf = frontier_nhood.second; + char *node_disk_buf = offset_to_node(disk_buf, node_id); + + float cur_expanded_dist; + + // If skip_reorder is true, compute both PQ distance and exact distance + if (skip_search_reorder) + { + compute_dists(&node_id, 1, dist_scratch); + cur_expanded_dist = dist_scratch[0]; + } + else if (USE_DEFERRED_FETCH) + { + cur_expanded_dist = 0.0f; + } + else if (recompute_beighbor_embeddings && dedup_node_dis && _use_partition) + { + // For _use_partition = True, we must rely on node_distances to get the distance + // Since we are using graph-structure only reading. + // ! Use node_distances to get the distance + cur_expanded_dist = node_distances[node_id]; + } + else + { +#if 0 + if (node_cords.find(node_id) == node_cords.end()) + { + diskann::cout << "Warning: node " << node_id << " not found in node_cords" << std::endl; + diskann::cout << "Are you using deferred fetch for detached graph?" << std::endl; + assert(false); + } + // ! As for DEBUG mode and partition_read = True, we are overriding the node_disk_buf + // ! with our graph-structure only reading. So we need to use node_cords to get the correct + // ! coordinates. + T *node_fp_coords = reinterpret_cast(node_cords[node_id].data()); + // T *node_fp_coords = offset_to_node_coords(node_disk_buf); +#endif + T *node_fp_coords = offset_to_node_coords(node_disk_buf); + memcpy(data_buf, node_fp_coords, _disk_bytes_per_point); + if (!_use_disk_index_pq) + { + cur_expanded_dist = _dist_cmp->compare(aligned_query_T, data_buf, (uint32_t)_aligned_dim); + } + else + { + if (metric == diskann::Metric::INNER_PRODUCT) + cur_expanded_dist = _disk_pq_table.inner_product(query_float, (uint8_t *)data_buf); + else + cur_expanded_dist = _disk_pq_table.l2_distance(query_float, (uint8_t *)data_buf); + } + } + full_retset.push_back(Neighbor(node_id, cur_expanded_dist)); + +#if 0 + T *node_fp_coords = offset_to_node_coords(node_disk_buf); + memcpy(data_buf, node_fp_coords, _disk_bytes_per_point); + float exact_expanded_dist = 0; + if (!_use_disk_index_pq) + { + exact_expanded_dist = _dist_cmp->compare(aligned_query_T, data_buf, (uint32_t)_aligned_dim); + } + else + { + if (metric == diskann::Metric::INNER_PRODUCT) + exact_expanded_dist = _disk_pq_table.inner_product(query_float, (uint8_t *)data_buf); + else + exact_expanded_dist = _disk_pq_table.l2_distance(query_float, (uint8_t *)data_buf); + } + exact_dist_retset.push_back(Neighbor(node_id, exact_expanded_dist)); + exact_embeddings.push_back(std::vector(data_buf, data_buf + _aligned_dim)); +#endif + + uint32_t *node_nbrs; + uint64_t nnbrs; + + if (!_use_partition) + { + auto node_buf = offset_to_node_nhood(node_disk_buf); + nnbrs = (uint64_t)(*node_buf); + node_nbrs = (node_buf + 1); + } + +#if 0 + auto node_nbrs_vec = node_nbrs_ori[node_id]; + nnbrs = node_nbrs_vec.size(); + node_nbrs = node_nbrs_vec.data(); +#endif + if (_use_partition) + { + char *sector_buffer = frontier_nhood.second; + int j = node_offsets[node_id]; + uint64_t node_offset = j * _graph_node_len; + if (node_offset + 4 > defaults::SECTOR_LEN) + { + diskann::cerr << "Error: node offset out of range: " << node_offset << " (+4) > " + << defaults::SECTOR_LEN << " for node " << node_id << std::endl; + assert(false); + } + + char *adjacency_ptr = sector_buffer + node_offset; + uint32_t neighbor_count = *reinterpret_cast(adjacency_ptr); + + if (neighbor_count > 10000) + { + diskann::cerr << "Error: suspicious neighbor count: " << neighbor_count << " for node " << node_id + << std::endl; + assert(false); + } + + size_t needed = neighbor_count * sizeof(uint32_t); + if (node_offset + 4 + needed > defaults::SECTOR_LEN) + { + diskann::cerr << "Error: neighbor data out of range: " << (node_offset + 4 + needed) << " > " + << defaults::SECTOR_LEN << " for node " << node_id << std::endl; + assert(false); + } + +#if 0 + if (neighbor_count != nnbrs) + { + diskann::cout << "Warning: neighbor_count != nnbrs: " << neighbor_count << " != " << nnbrs + << std::endl; + assert(false); + } +#endif + + nnbrs = neighbor_count; + +#if 0 + uint32_t *our_node_nbrs = (uint32_t *)(adjacency_ptr + 4); + for (uint32_t i = 0; i < nnbrs; i++) + { + if (our_node_nbrs[i] != node_nbrs[i]) + { + diskann::cout << "Warning: our_node_nbrs[" << i << "] != node_nbrs[" << i + << "]: " << our_node_nbrs[i] << " != " << node_nbrs[i] << std::endl; + assert(false); + } + } +#endif + + node_nbrs = reinterpret_cast(adjacency_ptr + 4); + } + + // compute node_nbrs <-> query dist in PQ space + cpu_timer.reset(); + // have a function to prune the node_nbrs and nnbrs + + // prune_node_nbrs(node_nbrs, nnbrs); + + if (!batch_recompute) + { + prune_node_nbrs(node_nbrs, nnbrs); + compute_dists(node_nbrs, nnbrs, dist_scratch); + if (stats != nullptr) + { + stats->n_cmps += (uint32_t)nnbrs; + stats->cpu_us += (float)cpu_timer.elapsed(); + } + + cpu_timer.reset(); + // process prefetch-ed nhood + for (uint64_t m = 0; m < nnbrs; ++m) + { + uint32_t id = node_nbrs[m]; + if (visited.insert(id).second) + { + if (!use_filter && _dummy_pts.find(id) != _dummy_pts.end()) + continue; + + if (use_filter && !(point_has_label(id, filter_label)) && + (!_use_universal_label || !point_has_label(id, _universal_filter_label))) + continue; + cmps++; + float dist = dist_scratch[m]; + if (stats != nullptr) + { + stats->n_cmps++; + } + + Neighbor nn(id, dist); + retset.insert(nn); + } + } + + if (stats != nullptr) + { + stats->cpu_us += (float)cpu_timer.elapsed(); + } + } + else + { + // add all the node_nbrs to the batch_requests + batched_node_ids.insert(batched_node_ids.end(), node_nbrs, node_nbrs + nnbrs); + } + } + + if (batch_recompute) + { + auto nnbrs = batched_node_ids.size(); + uint32_t *batched_data_ptr = batched_node_ids.data(); // Get pointer to data + prune_node_nbrs(batched_data_ptr, nnbrs); // Prune using the pointer, nnbrs is updated + + compute_dists(batched_data_ptr, nnbrs, batched_dists); // Compute dists for the pruned set + // ! Not sure if dist_scratch has enough space + + // process prefetch-ed nhood + for (uint64_t m = 0; m < nnbrs; ++m) + { + uint32_t id = batched_node_ids[m]; + if (visited.insert(id).second) + { + if (!use_filter && _dummy_pts.find(id) != _dummy_pts.end()) + continue; + + if (use_filter && !(point_has_label(id, filter_label)) && + (!_use_universal_label || !point_has_label(id, _universal_filter_label))) + continue; + cmps++; + float dist = batched_dists[m]; + if (stats != nullptr) + { + stats->n_cmps++; + } + + Neighbor nn(id, dist); + retset.insert(nn); + } + } + } + // } + // } + hops++; + } + + delete[] batched_dists; + + diskann::cout << "Graph traversal completed, hops: " << hops << std::endl; + + if (USE_DEFERRED_FETCH) + { + diskann::cout << "hops: " << hops << std::endl; + + std::vector node_ids; + node_ids.reserve(full_retset.size()); + for (auto &nr : full_retset) + { + node_ids.push_back(nr.id); + } + + Timer fetch_timer; + std::vector> real_embeddings; + bool success = fetch_embeddings_http(node_ids, real_embeddings); + if (!success) + { + throw ANNException("Failed to fetch embeddings", -1, __FUNCSIG__, __FILE__, __LINE__); + } + + diskann::cout << "Fetched " << real_embeddings.size() << " embeddings in " << fetch_timer.elapsed() << " us" + << std::endl; + + // compute real-dist + Timer compute_timer; + // preprocess the real embedding to match the format of nomarlized version of diskann + preprocess_fetched_embeddings(real_embeddings, metric, _max_base_norm, this->_data_dim); + +#if 0 + assert(real_embeddings.size() == full_retset.size()); + assert(real_embeddings.size() == exact_dist_retset.size()); + assert(real_embeddings.size() == exact_embeddings.size()); +#endif + + for (int i = 0; i < real_embeddings.size(); i++) + { + // padding real_embeddings[i] to _aligned_dim + real_embeddings[i].resize(_aligned_dim, 0); +#if 0 + // compare real_embeddings[i] with exact_embeddings[i] + if (real_embeddings[i].size() != exact_embeddings[i].size()) + { + diskann::cout << "real_embeddings[i].size(): " << real_embeddings[i].size() << std::endl; + diskann::cout << "exact_embeddings[i].size(): " << exact_embeddings[i].size() << std::endl; + + // dumping to files + std::ofstream diff_file("./diff_embeddings.txt"); + diff_file << "real_embeddings[i].size(): " << real_embeddings[i].size() << std::endl; + diff_file << "exact_embeddings[i].size(): " << exact_embeddings[i].size() << std::endl; + for (int j = 0; j < real_embeddings[i].size(); j++) + { + diff_file << real_embeddings[i][j] << " "; + } + diff_file << std::endl; + for (int j = 0; j < exact_embeddings[i].size(); j++) + { + diff_file << exact_embeddings[i][j] << " "; + } + diff_file << std::endl; + assert(false); + } + for (int j = 0; j < real_embeddings[i].size(); j++) + { + if (abs(real_embeddings[i][j] - exact_embeddings[i][j]) > 5e-4) + { + diskann::cout << "Difference found at node_id: " << full_retset[i].id << " and dimension: " << j + << std::endl; + diskann::cout << "real_embeddings[i][j]: " << real_embeddings[i][j] << std::endl; + diskann::cout << "exact_embeddings[i][j]: " << exact_embeddings[i][j] << std::endl; + assert(false); + } + } +#endif + + float dist; + assert(!_use_disk_index_pq); + memcpy(data_buf, real_embeddings[i].data(), real_embeddings[0].size() * sizeof(T)); + dist = _dist_cmp->compare(aligned_query_T, data_buf, (uint32_t)_aligned_dim); + + full_retset[i].distance = dist; + +#if 0 + if (abs(dist - exact_dist_retset[i].distance) > 5e-4) + { + diskann::cout << "Difference found at node_id: " << full_retset[i].id << std::endl; + diskann::cout << "dist: " << dist << std::endl; + diskann::cout << "exact_dist_retset[i].distance: " << exact_dist_retset[i].distance << std::endl; + assert(false); + } +#endif + } + diskann::cout << "compute_timer.elapsed(): " << compute_timer.elapsed() << std::endl; + } + + std::sort(full_retset.begin(), full_retset.end()); + +// Compare PQ results with exact results when skip_search_reorder is true +#if 0 + if (skip_search_reorder) + { + // Sort the exact distance results + std::sort(exact_dist_retset.begin(), exact_dist_retset.end()); + + // Create a map to find positions of IDs in the PQ-sorted list + std::unordered_map pq_positions; + for (size_t i = 0; i < full_retset.size(); i++) + { + pq_positions[full_retset[i].id] = i; + } + + int current_search_id = search_counter.fetch_add(1); + int thread_id = omp_get_thread_num(); + + std::lock_guard lock(log_file_mutex); + + std::ofstream log_file("./top3_positions_log.txt", std::ios::app); + // Write header if file is empty + log_file.seekp(0, std::ios::end); + if (log_file.tellp() == 0) + { + diskann::cout << "Saved top3 distributions to " << std::filesystem::canonical("./top3_positions_log.txt") + << std::endl; + log_file << "Search#,ThreadID,FullSetSize,Rank,ID,PQ_Rank,PQ_Distance,Exact_Distance" << std::endl; + } + + // Log the top-k results from exact distance sorting and their positions in PQ-sorted list + size_t top_k = std::min((size_t)k_search, exact_dist_retset.size()); + for (size_t i = 0; i < top_k; i++) + { + uint32_t id = exact_dist_retset[i].id; + float exact_dist = exact_dist_retset[i].distance; + + // Find this ID's position in the PQ-sorted list + size_t pq_pos = pq_positions.count(id) ? pq_positions[id] : full_retset.size(); + float pq_dist = (pq_pos < full_retset.size()) ? full_retset[pq_pos].distance : -1; + + log_file << current_search_id << "," << thread_id << "," << full_retset.size() << "," << i + 1 << "," << id + << "," << pq_pos + 1 << "," << pq_dist << "," << exact_dist << std::endl; + } + + log_file.close(); + } +#endif + + if (use_reorder_data) + { + if (!(this->_reorder_data_exists)) + { + throw ANNException("Requested use of reordering data which does " + "not exist in index " + "file", + -1, __FUNCSIG__, __FILE__, __LINE__); + } + + std::vector vec_read_reqs; + + if (full_retset.size() > k_search * FULL_PRECISION_REORDER_MULTIPLIER) + full_retset.erase(full_retset.begin() + k_search * FULL_PRECISION_REORDER_MULTIPLIER, full_retset.end()); + + for (size_t i = 0; i < full_retset.size(); ++i) + { + // MULTISECTORFIX + vec_read_reqs.emplace_back(VECTOR_SECTOR_NO(((size_t)full_retset[i].id)) * defaults::SECTOR_LEN, + defaults::SECTOR_LEN, sector_scratch + i * defaults::SECTOR_LEN); + + if (stats != nullptr) + { + stats->n_4k++; + stats->n_ios++; + } + } + + io_timer.reset(); +#ifdef USE_BING_INFRA + reader->read(vec_read_reqs, ctx, true); // async reader windows. +#else + reader->read(vec_read_reqs, ctx); // synchronous IO linux +#endif + if (stats != nullptr) + { + stats->io_us += io_timer.elapsed(); + } + + for (size_t i = 0; i < full_retset.size(); ++i) + { + auto id = full_retset[i].id; + // MULTISECTORFIX + auto location = (sector_scratch + i * defaults::SECTOR_LEN) + VECTOR_SECTOR_OFFSET(id); + full_retset[i].distance = _dist_cmp->compare(aligned_query_T, (T *)location, (uint32_t)this->_data_dim); + } + + std::sort(full_retset.begin(), full_retset.end()); + } + + // copy k_search values + for (uint64_t i = 0; i < k_search; i++) + { + indices[i] = full_retset[i].id; + auto key = (uint32_t)indices[i]; + if (_dummy_pts.find(key) != _dummy_pts.end()) + { + indices[i] = _dummy_to_real_map[key]; + } + + if (distances != nullptr) + { + distances[i] = full_retset[i].distance; + if (metric == diskann::Metric::INNER_PRODUCT) + { + // flip the sign to convert min to max + distances[i] = (-distances[i]); + // rescale to revert back to original norms (cancelling the + // effect of base and query pre-processing) + if (_max_base_norm != 0) + distances[i] *= (_max_base_norm * query_norm); + } + } + } + +#ifdef USE_BING_INFRA + ctx.m_completeCount = 0; +#endif + + if (stats != nullptr) + { + stats->total_us = (float)query_timer.elapsed(); + } + + // After search is complete, print cache hit rate statistics + if (recompute_beighbor_embeddings && dedup_node_dis && total_nodes_requested > 0) + { + float cache_hit_rate = static_cast(total_nodes_from_cache) / total_nodes_requested * 100.0f; + diskann::cout << "Node distance cache statistics:" << std::endl; + diskann::cout << " Total nodes requested: " << total_nodes_requested << std::endl; + diskann::cout << " Nodes served from cache: " << total_nodes_from_cache << std::endl; + diskann::cout << " Cache hit rate: " << cache_hit_rate << "%" << std::endl; + } +} + +// range search returns results of all neighbors within distance of range. +// indices and distances need to be pre-allocated of size l_search and the +// return value is the number of matching hits. +template +uint32_t PQFlashIndex::range_search(const T *query1, const double range, const uint64_t min_l_search, + const uint64_t max_l_search, std::vector &indices, + std::vector &distances, const uint64_t min_beam_width, + QueryStats *stats) +{ + uint32_t res_count = 0; + + bool stop_flag = false; + + uint32_t l_search = (uint32_t)min_l_search; // starting size of the candidate list + while (!stop_flag) + { + indices.resize(l_search); + distances.resize(l_search); + uint64_t cur_bw = min_beam_width > (l_search / 5) ? min_beam_width : l_search / 5; + cur_bw = (cur_bw > 100) ? 100 : cur_bw; + for (auto &x : distances) + x = std::numeric_limits::max(); + this->cached_beam_search(query1, l_search, l_search, indices.data(), distances.data(), cur_bw, false, stats); + for (uint32_t i = 0; i < l_search; i++) + { + if (distances[i] > (float)range) + { + res_count = i; + break; + } + else if (i == l_search - 1) + res_count = l_search; + } + if (res_count < (uint32_t)(l_search / 2.0)) + stop_flag = true; + l_search = l_search * 2; + if (l_search > max_l_search) + stop_flag = true; + } + indices.resize(res_count); + distances.resize(res_count); + return res_count; +} + +template uint64_t PQFlashIndex::get_data_dim() +{ + return _data_dim; +} + +template diskann::Metric PQFlashIndex::get_metric() +{ + return this->metric; +} + +#ifdef EXEC_ENV_OLS +template char *PQFlashIndex::getHeaderBytes() +{ + IOContext &ctx = reader->get_ctx(); + AlignedRead readReq; + readReq.buf = new char[PQFlashIndex::HEADER_SIZE]; + readReq.len = PQFlashIndex::HEADER_SIZE; + readReq.offset = 0; + + std::vector readReqs; + readReqs.push_back(readReq); + + reader->read(readReqs, ctx, false); + + return (char *)readReq.buf; +} +#endif + +template +std::vector PQFlashIndex::get_pq_vector(std::uint64_t vid) +{ + std::uint8_t *pqVec = &this->data[vid * this->_n_chunks]; + return std::vector(pqVec, pqVec + this->_n_chunks); +} + +template std::uint64_t PQFlashIndex::get_num_points() +{ + return _num_points; +} + +// instantiations +template class PQFlashIndex; +template class PQFlashIndex; +template class PQFlashIndex; +template class PQFlashIndex; +template class PQFlashIndex; +template class PQFlashIndex; + +} // namespace diskann diff --git a/packages/leann-backend-diskann/third_party/DiskANN/src/pq_l2_distance.cpp b/packages/leann-backend-diskann/third_party/DiskANN/src/pq_l2_distance.cpp new file mode 100644 index 0000000..9bd5311 --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/src/pq_l2_distance.cpp @@ -0,0 +1,284 @@ + +#include "pq.h" +#include "pq_l2_distance.h" +#include "pq_scratch.h" + +// block size for reading/processing large files and matrices in blocks +#define BLOCK_SIZE 5000000 + +namespace diskann +{ + +template +PQL2Distance::PQL2Distance(uint32_t num_chunks, bool use_opq) : _num_chunks(num_chunks), _is_opq(use_opq) +{ +} + +template PQL2Distance::~PQL2Distance() +{ +#ifndef EXEC_ENV_OLS + if (_tables != nullptr) + delete[] _tables; + if (_chunk_offsets != nullptr) + delete[] _chunk_offsets; + if (_centroid != nullptr) + delete[] _centroid; + if (_rotmat_tr != nullptr) + delete[] _rotmat_tr; +#endif + if (_tables_tr != nullptr) + delete[] _tables_tr; +} + +template bool PQL2Distance::is_opq() const +{ + return this->_is_opq; +} + +template +std::string PQL2Distance::get_quantized_vectors_filename(const std::string &prefix) const +{ + if (_num_chunks == 0) + { + throw diskann::ANNException("Must set num_chunks before calling get_quantized_vectors_filename", -1, + __FUNCSIG__, __FILE__, __LINE__); + } + return diskann::get_quantized_vectors_filename(prefix, _is_opq, (uint32_t)_num_chunks); +} +template std::string PQL2Distance::get_pivot_data_filename(const std::string &prefix) const +{ + if (_num_chunks == 0) + { + throw diskann::ANNException("Must set num_chunks before calling get_pivot_data_filename", -1, __FUNCSIG__, + __FILE__, __LINE__); + } + return diskann::get_pivot_data_filename(prefix, _is_opq, (uint32_t)_num_chunks); +} +template +std::string PQL2Distance::get_rotation_matrix_suffix(const std::string &pq_pivots_filename) const +{ + return diskann::get_rotation_matrix_suffix(pq_pivots_filename); +} + +#ifdef EXEC_ENV_OLS +template +void PQL2Distance::load_pivot_data(MemoryMappedFiles &files, const std::string &pq_table_file, + size_t num_chunks) +{ +#else +template +void PQL2Distance::load_pivot_data(const std::string &pq_table_file, size_t num_chunks) +{ +#endif + size_t nr, nc; + // std::string rotmat_file = get_opq_rot_matrix_filename(pq_table_file, + // false); + +#ifdef EXEC_ENV_OLS + size_t *file_offset_data; // since load_bin only sets the pointer, no need + // to delete. + diskann::load_bin(files, pq_table_file, file_offset_data, nr, nc); +#else + std::unique_ptr file_offset_data; + diskann::load_bin(pq_table_file, file_offset_data, nr, nc); +#endif + + bool use_old_filetype = false; + + if (nr != 4 && nr != 5) + { + diskann::cout << "Error reading pq_pivots file " << pq_table_file + << ". Offsets dont contain correct metadata, # offsets = " << nr << ", but expecting " << 4 + << " or " << 5; + throw diskann::ANNException("Error reading pq_pivots file at offsets data.", -1, __FUNCSIG__, __FILE__, + __LINE__); + } + + if (nr == 4) + { + diskann::cout << "Offsets: " << file_offset_data[0] << " " << file_offset_data[1] << " " << file_offset_data[2] + << " " << file_offset_data[3] << std::endl; + } + else if (nr == 5) + { + use_old_filetype = true; + diskann::cout << "Offsets: " << file_offset_data[0] << " " << file_offset_data[1] << " " << file_offset_data[2] + << " " << file_offset_data[3] << file_offset_data[4] << std::endl; + } + else + { + throw diskann::ANNException("Wrong number of offsets in pq_pivots", -1, __FUNCSIG__, __FILE__, __LINE__); + } + +#ifdef EXEC_ENV_OLS + diskann::load_bin(files, pq_table_file, tables, nr, nc, file_offset_data[0]); +#else + diskann::load_bin(pq_table_file, _tables, nr, nc, file_offset_data[0]); +#endif + + if ((nr != NUM_PQ_CENTROIDS)) + { + diskann::cout << "Error reading pq_pivots file " << pq_table_file << ". file_num_centers = " << nr + << " but expecting " << NUM_PQ_CENTROIDS << " centers"; + throw diskann::ANNException("Error reading pq_pivots file at pivots data.", -1, __FUNCSIG__, __FILE__, + __LINE__); + } + + this->_ndims = nc; + +#ifdef EXEC_ENV_OLS + diskann::load_bin(files, pq_table_file, centroid, nr, nc, file_offset_data[1]); +#else + diskann::load_bin(pq_table_file, _centroid, nr, nc, file_offset_data[1]); +#endif + + if ((nr != this->_ndims) || (nc != 1)) + { + diskann::cerr << "Error reading centroids from pq_pivots file " << pq_table_file << ". file_dim = " << nr + << ", file_cols = " << nc << " but expecting " << this->_ndims << " entries in 1 dimension."; + throw diskann::ANNException("Error reading pq_pivots file at centroid data.", -1, __FUNCSIG__, __FILE__, + __LINE__); + } + + int chunk_offsets_index = 2; + if (use_old_filetype) + { + chunk_offsets_index = 3; + } +#ifdef EXEC_ENV_OLS + diskann::load_bin(files, pq_table_file, chunk_offsets, nr, nc, file_offset_data[chunk_offsets_index]); +#else + diskann::load_bin(pq_table_file, _chunk_offsets, nr, nc, file_offset_data[chunk_offsets_index]); +#endif + + if (nc != 1 || (nr != num_chunks + 1 && num_chunks != 0)) + { + diskann::cerr << "Error loading chunk offsets file. numc: " << nc << " (should be 1). numr: " << nr + << " (should be " << num_chunks + 1 << " or 0 if we need to infer)" << std::endl; + throw diskann::ANNException("Error loading chunk offsets file", -1, __FUNCSIG__, __FILE__, __LINE__); + } + + this->_num_chunks = nr - 1; + diskann::cout << "Loaded PQ Pivots: #ctrs: " << NUM_PQ_CENTROIDS << ", #dims: " << this->_ndims + << ", #chunks: " << this->_num_chunks << std::endl; + + // For OPQ there will be a rotation matrix to load. + if (this->_is_opq) + { + std::string rotmat_file = get_rotation_matrix_suffix(pq_table_file); +#ifdef EXEC_ENV_OLS + diskann::load_bin(files, rotmat_file, (float *&)rotmat_tr, nr, nc); +#else + diskann::load_bin(rotmat_file, _rotmat_tr, nr, nc); +#endif + if (nr != this->_ndims || nc != this->_ndims) + { + diskann::cerr << "Error loading rotation matrix file" << std::endl; + throw diskann::ANNException("Error loading rotation matrix file", -1, __FUNCSIG__, __FILE__, __LINE__); + } + } + + // alloc and compute transpose + _tables_tr = new float[256 * this->_ndims]; + for (size_t i = 0; i < 256; i++) + { + for (size_t j = 0; j < this->_ndims; j++) + { + _tables_tr[j * 256 + i] = _tables[i * this->_ndims + j]; + } + } +} + +template uint32_t PQL2Distance::get_num_chunks() const +{ + return static_cast(_num_chunks); +} + +// REFACTOR: Instead of doing half the work in the caller and half in this +// function, we let this function +// do all of the work, making it easier for the caller. +template +void PQL2Distance::preprocess_query(const data_t *aligned_query, uint32_t dim, PQScratch &scratch) +{ + // Copy query vector to float and then to "rotated" query + for (size_t d = 0; d < dim; d++) + { + scratch.aligned_query_float[d] = (float)aligned_query[d]; + } + scratch.initialize(dim, aligned_query); + + for (uint32_t d = 0; d < _ndims; d++) + { + scratch.rotated_query[d] -= _centroid[d]; + } + std::vector tmp(_ndims, 0); + if (_is_opq) + { + for (uint32_t d = 0; d < _ndims; d++) + { + for (uint32_t d1 = 0; d1 < _ndims; d1++) + { + tmp[d] += scratch.rotated_query[d1] * _rotmat_tr[d1 * _ndims + d]; + } + } + std::memcpy(scratch.rotated_query, tmp.data(), _ndims * sizeof(float)); + } + this->prepopulate_chunkwise_distances(scratch.rotated_query, scratch.aligned_pqtable_dist_scratch); +} + +template +void PQL2Distance::preprocessed_distance(PQScratch &pq_scratch, const uint32_t n_ids, float *dists_out) +{ + pq_dist_lookup(pq_scratch.aligned_pq_coord_scratch, n_ids, _num_chunks, pq_scratch.aligned_pqtable_dist_scratch, + dists_out); +} + +template +void PQL2Distance::preprocessed_distance(PQScratch &pq_scratch, const uint32_t n_ids, + std::vector &dists_out) +{ + pq_dist_lookup(pq_scratch.aligned_pq_coord_scratch, n_ids, _num_chunks, pq_scratch.aligned_pqtable_dist_scratch, + dists_out); +} + +template float PQL2Distance::brute_force_distance(const float *query_vec, uint8_t *base_vec) +{ + float res = 0; + for (size_t chunk = 0; chunk < _num_chunks; chunk++) + { + for (size_t j = _chunk_offsets[chunk]; j < _chunk_offsets[chunk + 1]; j++) + { + const float *centers_dim_vec = _tables_tr + (256 * j); + float diff = centers_dim_vec[base_vec[chunk]] - (query_vec[j]); + res += diff * diff; + } + } + return res; +} + +template +void PQL2Distance::prepopulate_chunkwise_distances(const float *query_vec, float *dist_vec) +{ + memset(dist_vec, 0, 256 * _num_chunks * sizeof(float)); + // chunk wise distance computation + for (size_t chunk = 0; chunk < _num_chunks; chunk++) + { + // sum (q-c)^2 for the dimensions associated with this chunk + float *chunk_dists = dist_vec + (256 * chunk); + for (size_t j = _chunk_offsets[chunk]; j < _chunk_offsets[chunk + 1]; j++) + { + const float *centers_dim_vec = _tables_tr + (256 * j); + for (size_t idx = 0; idx < 256; idx++) + { + double diff = centers_dim_vec[idx] - (query_vec[j]); + chunk_dists[idx] += (float)(diff * diff); + } + } + } +} + +template DISKANN_DLLEXPORT class PQL2Distance; +template DISKANN_DLLEXPORT class PQL2Distance; +template DISKANN_DLLEXPORT class PQL2Distance; + +} // namespace diskann \ No newline at end of file diff --git a/packages/leann-backend-diskann/third_party/DiskANN/src/restapi/search_wrapper.cpp b/packages/leann-backend-diskann/third_party/DiskANN/src/restapi/search_wrapper.cpp new file mode 100644 index 0000000..001e36d --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/src/restapi/search_wrapper.cpp @@ -0,0 +1,217 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT license. + +#include +#include +#include + +#include "utils.h" +#include + +#ifndef _WINDOWS +#include +#include +#include +#include "linux_aligned_file_reader.h" +#else +#ifdef USE_BING_INFRA +#include "bing_aligned_file_reader.h" +#else +#include "windows_aligned_file_reader.h" +#endif +#endif + +namespace diskann +{ +const unsigned int DEFAULT_W = 1; + +SearchResult::SearchResult(unsigned int K, unsigned int elapsed_time_in_ms, const unsigned *const indices, + const float *const distances, const std::string *const tags, + const unsigned *const partitions) + : _K(K), _search_time_in_ms(elapsed_time_in_ms) +{ + for (unsigned i = 0; i < K; ++i) + { + this->_indices.push_back(indices[i]); + this->_distances.push_back(distances[i]); + if (tags != NULL) + this->_tags.push_back(tags[i]); + if (partitions != NULL) + this->_partitions.push_back(partitions[i]); + } + if (tags != nullptr) + this->_tags_enabled = true; + else + this->_tags_enabled = false; + + if (partitions != nullptr) + this->_partitions_enabled = true; + else + this->_partitions_enabled = false; +} + +BaseSearch::BaseSearch(const std::string &tagsFile) +{ + if (tagsFile.size() != 0) + { + std::ifstream in(tagsFile); + + if (!in.is_open()) + { + std::cerr << "Could not open " << tagsFile << std::endl; + } + + std::string tag; + while (std::getline(in, tag)) + { + _tags_str.push_back(tag); + } + + _tags_enabled = true; + + std::cout << "Loaded " << _tags_str.size() << " tags from " << tagsFile << std::endl; + } + else + { + _tags_enabled = false; + } +} + +void BaseSearch::lookup_tags(const unsigned K, const unsigned *indices, std::string *ret_tags) +{ + if (_tags_enabled == false) + throw std::runtime_error("Can not look up tags as they are not enabled."); + else + { + for (unsigned k = 0; k < K; ++k) + { + if (indices[k] > _tags_str.size()) + throw std::runtime_error("In tag lookup, index exceeded the number of tags"); + else + ret_tags[k] = _tags_str[indices[k]]; + } + } +} + +template +InMemorySearch::InMemorySearch(const std::string &baseFile, const std::string &indexFile, + const std::string &tagsFile, Metric m, uint32_t num_threads, uint32_t search_l) + : BaseSearch(tagsFile) +{ + size_t dimensions, total_points = 0; + diskann::get_bin_metadata(baseFile, total_points, dimensions); + auto search_params = diskann::IndexSearchParams(search_l, num_threads); + _index = std::unique_ptr>( + new diskann::Index(m, dimensions, total_points, nullptr, search_params, 0, false)); + + _index->load(indexFile.c_str(), num_threads, search_l); +} + +template +SearchResult InMemorySearch::search(const T *query, const unsigned int dimensions, const unsigned int K, + const unsigned int Ls) +{ + unsigned int *indices = new unsigned int[K]; + float *distances = new float[K]; + + auto startTime = std::chrono::high_resolution_clock::now(); + _index->search(query, K, Ls, indices, distances); + auto duration = + std::chrono::duration_cast(std::chrono::high_resolution_clock::now() - startTime) + .count(); + + std::string *tags = nullptr; + if (_tags_enabled) + { + tags = new std::string[K]; + lookup_tags(K, indices, tags); + } + + SearchResult result(K, (unsigned int)duration, indices, distances, tags); + + delete[] indices; + delete[] distances; + return result; +} + +template InMemorySearch::~InMemorySearch() +{ +} + +template +PQFlashSearch::PQFlashSearch(const std::string &indexPrefix, const unsigned num_nodes_to_cache, + const unsigned num_threads, const std::string &tagsFile, Metric m) + : BaseSearch(tagsFile) +{ +#ifdef _WINDOWS +#ifndef USE_BING_INFRA + reader.reset(new WindowsAlignedFileReader()); +#else + reader.reset(new diskann::BingAlignedFileReader()); +#endif +#else + auto ptr = new LinuxAlignedFileReader(); + reader.reset(ptr); +#endif + + std::string index_prefix_path(indexPrefix); + std::string disk_index_file = index_prefix_path + "_disk.index"; + std::string warmup_query_file = index_prefix_path + "_sample_data.bin"; + + _index = std::unique_ptr>(new diskann::PQFlashIndex(reader, m)); + + int res = _index->load(num_threads, index_prefix_path.c_str()); + + if (res != 0) + { + std::cerr << "Unable to load index. Status code: " << res << "." << std::endl; + } + + std::vector node_list; + std::cout << "Caching " << num_nodes_to_cache << " BFS nodes around medoid(s)" << std::endl; + _index->cache_bfs_levels(num_nodes_to_cache, node_list); + _index->load_cache_list(node_list); + omp_set_num_threads(num_threads); +} + +template +SearchResult PQFlashSearch::search(const T *query, const unsigned int dimensions, const unsigned int K, + const unsigned int Ls) +{ + uint64_t *indices_u64 = new uint64_t[K]; + unsigned *indices = new unsigned[K]; + float *distances = new float[K]; + + auto startTime = std::chrono::high_resolution_clock::now(); + _index->cached_beam_search(query, K, Ls, indices_u64, distances, DEFAULT_W); + auto duration = + std::chrono::duration_cast(std::chrono::high_resolution_clock::now() - startTime) + .count(); + for (unsigned k = 0; k < K; ++k) + indices[k] = indices_u64[k]; + + std::string *tags = nullptr; + if (_tags_enabled) + { + tags = new std::string[K]; + lookup_tags(K, indices, tags); + } + SearchResult result(K, (unsigned int)duration, indices, distances, tags); + delete[] indices_u64; + delete[] indices; + delete[] distances; + return result; +} + +template PQFlashSearch::~PQFlashSearch() +{ +} + +template class InMemorySearch; +template class InMemorySearch; +template class InMemorySearch; + +template class PQFlashSearch; +template class PQFlashSearch; +template class PQFlashSearch; +} // namespace diskann diff --git a/packages/leann-backend-diskann/third_party/DiskANN/src/restapi/server.cpp b/packages/leann-backend-diskann/third_party/DiskANN/src/restapi/server.cpp new file mode 100644 index 0000000..f79b0af --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/src/restapi/server.cpp @@ -0,0 +1,271 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT license. + +#include +#include +#include +#include +#include +#include +#include + +#include + +namespace diskann +{ + +Server::Server(web::uri &uri, std::vector> &multi_searcher, + const std::string &typestring) + : _multi_search(multi_searcher.size() > 1 ? true : false) +{ + for (auto &searcher : multi_searcher) + _multi_searcher.push_back(std::move(searcher)); + + _listener = std::unique_ptr( + new web::http::experimental::listener::http_listener(uri)); + if (typestring == std::string("float")) + { + _listener->support(std::bind(&Server::handle_post, this, std::placeholders::_1)); + } + else if (typestring == std::string("int8_t")) + { + _listener->support(web::http::methods::POST, + std::bind(&Server::handle_post, this, std::placeholders::_1)); + } + else if (typestring == std::string("uint8_t")) + { + _listener->support(web::http::methods::POST, + std::bind(&Server::handle_post, this, std::placeholders::_1)); + } + else + { + throw "Unsupported type in server constuctor"; + } +} + +Server::~Server() +{ +} + +pplx::task Server::open() +{ + return _listener->open(); +} +pplx::task Server::close() +{ + return _listener->close(); +} + +diskann::SearchResult Server::aggregate_results(const unsigned K, const std::vector &results) +{ + if (_multi_search) + { + auto best_indices = new unsigned[K]; + auto best_distances = new float[K]; + auto best_partitions = new unsigned[K]; + auto best_tags = results[0].tags_enabled() ? new std::string[K] : nullptr; + + auto numsearchers = _multi_searcher.size(); + std::vector pos(numsearchers, 0); + + for (size_t k = 0; k < K; ++k) + { + float best_distance = std::numeric_limits::max(); + unsigned best_partition = 0; + + for (size_t i = 0; i < numsearchers; ++i) + { + if (results[i].get_distances()[pos[i]] < best_distance) + { + best_distance = results[i].get_distances()[pos[i]]; + best_partition = i; + } + } + best_distances[k] = best_distance; + best_indices[k] = results[best_partition].get_indices()[pos[best_partition]]; + best_partitions[k] = best_partition; + if (results[best_partition].tags_enabled()) + best_tags[k] = results[best_partition].get_tags()[pos[best_partition]]; + std::cout << best_partition << " " << pos[best_partition] << std::endl; + pos[best_partition]++; + } + + unsigned int total_time = 0; + for (size_t i = 0; i < numsearchers; ++i) + total_time += results[i].get_time(); + diskann::SearchResult result = + SearchResult(K, total_time, best_indices, best_distances, best_tags, best_partitions); + + delete[] best_indices; + delete[] best_distances; + delete[] best_partitions; + delete[] best_tags; + + return result; + } + else + { + return results[0]; + } +} + +template void Server::handle_post(web::http::http_request message) +{ + message.extract_string(true) + .then([=](utility::string_t body) { + int64_t queryId = -1; + unsigned int K = 0; + try + { + T *queryVector = nullptr; + unsigned int dimensions = 0; + unsigned int Ls; + parseJson(body, K, queryId, queryVector, dimensions, Ls); + + auto startTime = std::chrono::high_resolution_clock::now(); + std::vector results; + + for (auto &searcher : _multi_searcher) + results.push_back(searcher->search(queryVector, dimensions, (unsigned int)K, Ls)); + diskann::SearchResult result = aggregate_results(K, results); + diskann::aligned_free(queryVector); + web::json::value response = prepareResponse(queryId, K); + response[INDICES_KEY] = idsToJsonArray(result); + response[DISTANCES_KEY] = distancesToJsonArray(result); + if (result.tags_enabled()) + response[TAGS_KEY] = tagsToJsonArray(result); + if (result.partitions_enabled()) + response[PARTITION_KEY] = partitionsToJsonArray(result); + + response[TIME_TAKEN_KEY] = std::chrono::duration_cast( + std::chrono::high_resolution_clock::now() - startTime) + .count(); + + std::cout << "Responding to: " << queryId << std::endl; + return std::make_pair(web::http::status_codes::OK, response); + } + catch (const std::exception &ex) + { + std::cerr << "Exception while processing query: " << queryId << ":" << ex.what() << std::endl; + web::json::value response = prepareResponse(queryId, K); + response[ERROR_MESSAGE_KEY] = web::json::value::string(ex.what()); + return std::make_pair(web::http::status_codes::InternalError, response); + } + catch (...) + { + std::cerr << "Uncaught exception while processing query: " << queryId; + web::json::value response = prepareResponse(queryId, K); + response[ERROR_MESSAGE_KEY] = web::json::value::string(UNKNOWN_ERROR); + return std::make_pair(web::http::status_codes::InternalError, response); + } + }) + .then([=](std::pair response_status) { + try + { + message.reply(response_status.first, response_status.second).wait(); + } + catch (const std::exception &ex) + { + std::cerr << "Exception while processing reply: " << ex.what() << std::endl; + }; + }); +} + +web::json::value Server::prepareResponse(const int64_t &queryId, const int k) +{ + web::json::value response = web::json::value::object(); + response[QUERY_ID_KEY] = queryId; + response[K_KEY] = k; + + return response; +} + +template +void Server::parseJson(const utility::string_t &body, unsigned int &k, int64_t &queryId, T *&queryVector, + unsigned int &dimensions, unsigned &Ls) +{ + std::cout << body << std::endl; + web::json::value val = web::json::value::parse(body); + web::json::array queryArr = val.at(VECTOR_KEY).as_array(); + queryId = val.has_field(QUERY_ID_KEY) ? val.at(QUERY_ID_KEY).as_number().to_int64() : -1; + Ls = val.has_field(L_KEY) ? val.at(L_KEY).as_number().to_uint32() : DEFAULT_L; + k = val.at(K_KEY).as_integer(); + + if (k <= 0 || k > Ls) + { + throw new std::invalid_argument("Num of expected NN (k) must be greater than zero and less than or " + "equal to Ls."); + } + if (queryArr.size() == 0) + { + throw new std::invalid_argument("Query vector has zero elements."); + } + + dimensions = static_cast(queryArr.size()); + unsigned new_dim = ROUND_UP(dimensions, 8); + diskann::alloc_aligned((void **)&queryVector, new_dim * sizeof(T), 8 * sizeof(T)); + memset(queryVector, 0, new_dim * sizeof(float)); + for (size_t i = 0; i < queryArr.size(); i++) + { + queryVector[i] = (float)queryArr[i].as_double(); + } +} + +template +web::json::value Server::toJsonArray(const std::vector &v, std::function valConverter) +{ + web::json::value rslts = web::json::value::array(); + for (size_t i = 0; i < v.size(); i++) + { + auto jsonVal = valConverter(v[i]); + rslts[i] = jsonVal; + } + return rslts; +} + +web::json::value Server::idsToJsonArray(const diskann::SearchResult &result) +{ + web::json::value idArray = web::json::value::array(); + auto ids = result.get_indices(); + for (size_t i = 0; i < ids.size(); i++) + { + auto idVal = web::json::value::number(ids[i]); + idArray[i] = idVal; + } + std::cout << "Vector size: " << ids.size() << std::endl; + return idArray; +} + +web::json::value Server::distancesToJsonArray(const diskann::SearchResult &result) +{ + web::json::value distArray = web::json::value::array(); + auto distances = result.get_distances(); + for (size_t i = 0; i < distances.size(); i++) + { + distArray[i] = web::json::value::number(distances[i]); + } + return distArray; +} + +web::json::value Server::tagsToJsonArray(const diskann::SearchResult &result) +{ + web::json::value tagArray = web::json::value::array(); + auto tags = result.get_tags(); + for (size_t i = 0; i < tags.size(); i++) + { + tagArray[i] = web::json::value::string(tags[i]); + } + return tagArray; +} + +web::json::value Server::partitionsToJsonArray(const diskann::SearchResult &result) +{ + web::json::value partitionArray = web::json::value::array(); + auto partitions = result.get_partitions(); + for (size_t i = 0; i < partitions.size(); i++) + { + partitionArray[i] = web::json::value::number(partitions[i]); + } + return partitionArray; +} +}; // namespace diskann \ No newline at end of file diff --git a/packages/leann-backend-diskann/third_party/DiskANN/src/scratch.cpp b/packages/leann-backend-diskann/third_party/DiskANN/src/scratch.cpp new file mode 100644 index 0000000..1f8a34b --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/src/scratch.cpp @@ -0,0 +1,182 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT license. + +#include +#include + +#include "scratch.h" +#include "pq_scratch.h" + +namespace diskann +{ +// +// Functions to manage scratch space for in-memory index based search +// +template +InMemQueryScratch::InMemQueryScratch(uint32_t search_l, uint32_t indexing_l, uint32_t r, uint32_t maxc, size_t dim, + size_t aligned_dim, size_t alignment_factor, bool init_pq_scratch) + : _L(0), _R(r), _maxc(maxc) +{ + if (search_l == 0 || indexing_l == 0 || r == 0 || dim == 0) + { + std::stringstream ss; + ss << "In InMemQueryScratch, one of search_l = " << search_l << ", indexing_l = " << indexing_l + << ", dim = " << dim << " or r = " << r << " is zero." << std::endl; + throw diskann::ANNException(ss.str(), -1); + } + + alloc_aligned(((void **)&this->_aligned_query_T), aligned_dim * sizeof(T), alignment_factor * sizeof(T)); + memset(this->_aligned_query_T, 0, aligned_dim * sizeof(T)); + + if (init_pq_scratch) + this->_pq_scratch = new PQScratch(defaults::MAX_GRAPH_DEGREE, aligned_dim); + else + this->_pq_scratch = nullptr; + + _occlude_factor.reserve(maxc); + _inserted_into_pool_bs = new boost::dynamic_bitset<>(); + _id_scratch.reserve((size_t)std::ceil(1.5 * defaults::GRAPH_SLACK_FACTOR * _R)); + _dist_scratch.reserve((size_t)std::ceil(1.5 * defaults::GRAPH_SLACK_FACTOR * _R)); + + resize_for_new_L(std::max(search_l, indexing_l)); +} + +template void InMemQueryScratch::clear() +{ + _pool.clear(); + _best_l_nodes.clear(); + _occlude_factor.clear(); + + _inserted_into_pool_rs.clear(); + _inserted_into_pool_bs->reset(); + + _id_scratch.clear(); + _dist_scratch.clear(); + + _expanded_nodes_set.clear(); + _expanded_nghrs_vec.clear(); + _occlude_list_output.clear(); +} + +template void InMemQueryScratch::resize_for_new_L(uint32_t new_l) +{ + if (new_l > _L) + { + _L = new_l; + _pool.reserve(3 * _L + _R); + _best_l_nodes.reserve(_L); + + _inserted_into_pool_rs.reserve(20 * _L); + } +} + +template InMemQueryScratch::~InMemQueryScratch() +{ + if (this->_aligned_query_T != nullptr) + { + aligned_free(this->_aligned_query_T); + this->_aligned_query_T = nullptr; + } + + delete this->_pq_scratch; + delete _inserted_into_pool_bs; +} + +// +// Functions to manage scratch space for SSD based search +// +template void SSDQueryScratch::reset() +{ + sector_idx = 0; + visited.clear(); + retset.clear(); + full_retset.clear(); +} + +template SSDQueryScratch::SSDQueryScratch(size_t aligned_dim, size_t visited_reserve) +{ + size_t coord_alloc_size = ROUND_UP(sizeof(T) * aligned_dim, 256); + + diskann::alloc_aligned((void **)&coord_scratch, coord_alloc_size, 256); + diskann::alloc_aligned((void **)§or_scratch, defaults::MAX_N_SECTOR_READS * defaults::SECTOR_LEN, + defaults::SECTOR_LEN); + diskann::alloc_aligned((void **)&this->_aligned_query_T, aligned_dim * sizeof(T), 8 * sizeof(T)); + + this->_pq_scratch = new PQScratch(defaults::MAX_GRAPH_DEGREE, aligned_dim); + + memset(coord_scratch, 0, coord_alloc_size); + memset(this->_aligned_query_T, 0, aligned_dim * sizeof(T)); + + visited.reserve(visited_reserve); + full_retset.reserve(visited_reserve); +} + +template SSDQueryScratch::~SSDQueryScratch() +{ + diskann::aligned_free((void *)coord_scratch); + diskann::aligned_free((void *)sector_scratch); + diskann::aligned_free((void *)this->_aligned_query_T); + + delete this->_pq_scratch; +} + +template +SSDThreadData::SSDThreadData(size_t aligned_dim, size_t visited_reserve) : scratch(aligned_dim, visited_reserve) +{ +} + +template void SSDThreadData::clear() +{ + scratch.reset(); +} + +template PQScratch::PQScratch(size_t graph_degree, size_t aligned_dim) +{ + diskann::alloc_aligned((void **)&aligned_pq_coord_scratch, + (size_t)graph_degree * (size_t)MAX_PQ_CHUNKS * sizeof(uint8_t), 256); + diskann::alloc_aligned((void **)&aligned_pqtable_dist_scratch, 256 * (size_t)MAX_PQ_CHUNKS * sizeof(float), 256); + diskann::alloc_aligned((void **)&aligned_dist_scratch, (size_t)graph_degree * sizeof(float), 256); + diskann::alloc_aligned((void **)&aligned_query_float, aligned_dim * sizeof(float), 8 * sizeof(float)); + diskann::alloc_aligned((void **)&rotated_query, aligned_dim * sizeof(float), 8 * sizeof(float)); + + memset(aligned_query_float, 0, aligned_dim * sizeof(float)); + memset(rotated_query, 0, aligned_dim * sizeof(float)); +} + +template PQScratch::~PQScratch() +{ + diskann::aligned_free((void *)aligned_pq_coord_scratch); + diskann::aligned_free((void *)aligned_pqtable_dist_scratch); + diskann::aligned_free((void *)aligned_dist_scratch); + diskann::aligned_free((void *)aligned_query_float); + diskann::aligned_free((void *)rotated_query); +} + +template void PQScratch::initialize(size_t dim, const T *query, const float norm) +{ + for (size_t d = 0; d < dim; ++d) + { + if (norm != 1.0f) + rotated_query[d] = aligned_query_float[d] = static_cast(query[d]) / norm; + else + rotated_query[d] = aligned_query_float[d] = static_cast(query[d]); + } +} + +template DISKANN_DLLEXPORT class InMemQueryScratch; +template DISKANN_DLLEXPORT class InMemQueryScratch; +template DISKANN_DLLEXPORT class InMemQueryScratch; + +template DISKANN_DLLEXPORT class SSDQueryScratch; +template DISKANN_DLLEXPORT class SSDQueryScratch; +template DISKANN_DLLEXPORT class SSDQueryScratch; + +template DISKANN_DLLEXPORT class PQScratch; +template DISKANN_DLLEXPORT class PQScratch; +template DISKANN_DLLEXPORT class PQScratch; + +template DISKANN_DLLEXPORT class SSDThreadData; +template DISKANN_DLLEXPORT class SSDThreadData; +template DISKANN_DLLEXPORT class SSDThreadData; + +} // namespace diskann diff --git a/packages/leann-backend-diskann/third_party/DiskANN/src/utils.cpp b/packages/leann-backend-diskann/third_party/DiskANN/src/utils.cpp new file mode 100644 index 0000000..3773cda --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/src/utils.cpp @@ -0,0 +1,477 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT license. + +#include "utils.h" + +#include + +#ifdef EXEC_ENV_OLS +#include "aligned_file_reader.h" +#endif + +const uint32_t MAX_REQUEST_SIZE = 1024 * 1024 * 1024; // 64MB +const uint32_t MAX_SIMULTANEOUS_READ_REQUESTS = 128; + +#ifdef _WINDOWS +#include + +// Taken from: +// https://insufficientlycomplicated.wordpress.com/2011/11/07/detecting-intel-advanced-vector-extensions-avx-in-visual-studio/ +bool cpuHasAvxSupport() +{ + bool avxSupported = false; + + // Checking for AVX requires 3 things: + // 1) CPUID indicates that the OS uses XSAVE and XRSTORE + // instructions (allowing saving YMM registers on context + // switch) + // 2) CPUID indicates support for AVX + // 3) XGETBV indicates the AVX registers will be saved and + // restored on context switch + // + // Note that XGETBV is only available on 686 or later CPUs, so + // the instruction needs to be conditionally run. + int cpuInfo[4]; + __cpuid(cpuInfo, 1); + + bool osUsesXSAVE_XRSTORE = cpuInfo[2] & (1 << 27) || false; + bool cpuAVXSuport = cpuInfo[2] & (1 << 28) || false; + + if (osUsesXSAVE_XRSTORE && cpuAVXSuport) + { + // Check if the OS will save the YMM registers + unsigned long long xcrFeatureMask = _xgetbv(_XCR_XFEATURE_ENABLED_MASK); + avxSupported = (xcrFeatureMask & 0x6) || false; + } + + return avxSupported; +} + +bool cpuHasAvx2Support() +{ + int cpuInfo[4]; + __cpuid(cpuInfo, 0); + int n = cpuInfo[0]; + if (n >= 7) + { + __cpuidex(cpuInfo, 7, 0); + static int avx2Mask = 0x20; + return (cpuInfo[1] & avx2Mask) > 0; + } + return false; +} + +bool AvxSupportedCPU = cpuHasAvxSupport(); +bool Avx2SupportedCPU = cpuHasAvx2Support(); + +#else + +bool Avx2SupportedCPU = true; +bool AvxSupportedCPU = false; +#endif + +namespace diskann +{ + +void block_convert(std::ofstream &writr, std::ifstream &readr, float *read_buf, size_t npts, size_t ndims) +{ + readr.read((char *)read_buf, npts * ndims * sizeof(float)); + uint32_t ndims_u32 = (uint32_t)ndims; +#pragma omp parallel for + for (int64_t i = 0; i < (int64_t)npts; i++) + { + float norm_pt = std::numeric_limits::epsilon(); + for (uint32_t dim = 0; dim < ndims_u32; dim++) + { + norm_pt += *(read_buf + i * ndims + dim) * *(read_buf + i * ndims + dim); + } + norm_pt = std::sqrt(norm_pt); + for (uint32_t dim = 0; dim < ndims_u32; dim++) + { + *(read_buf + i * ndims + dim) = *(read_buf + i * ndims + dim) / norm_pt; + } + } + writr.write((char *)read_buf, npts * ndims * sizeof(float)); +} + +void normalize_data_file(const std::string &inFileName, const std::string &outFileName) +{ + std::ifstream readr(inFileName, std::ios::binary); + std::ofstream writr(outFileName, std::ios::binary); + + int npts_s32, ndims_s32; + readr.read((char *)&npts_s32, sizeof(int32_t)); + readr.read((char *)&ndims_s32, sizeof(int32_t)); + + writr.write((char *)&npts_s32, sizeof(int32_t)); + writr.write((char *)&ndims_s32, sizeof(int32_t)); + + size_t npts = (size_t)npts_s32; + size_t ndims = (size_t)ndims_s32; + diskann::cout << "Normalizing FLOAT vectors in file: " << inFileName << std::endl; + diskann::cout << "Dataset: #pts = " << npts << ", # dims = " << ndims << std::endl; + + size_t blk_size = 131072; + size_t nblks = ROUND_UP(npts, blk_size) / blk_size; + diskann::cout << "# blks: " << nblks << std::endl; + + float *read_buf = new float[npts * ndims]; + for (size_t i = 0; i < nblks; i++) + { + size_t cblk_size = std::min(npts - i * blk_size, blk_size); + block_convert(writr, readr, read_buf, cblk_size, ndims); + } + delete[] read_buf; + + diskann::cout << "Wrote normalized points to file: " << outFileName << std::endl; +} + +double calculate_recall(uint32_t num_queries, uint32_t *gold_std, float *gs_dist, uint32_t dim_gs, + uint32_t *our_results, uint32_t dim_or, uint32_t recall_at) +{ + double total_recall = 0; + std::set gt, res; + + for (size_t i = 0; i < num_queries; i++) + { + gt.clear(); + res.clear(); + uint32_t *gt_vec = gold_std + dim_gs * i; + uint32_t *res_vec = our_results + dim_or * i; + size_t tie_breaker = recall_at; + if (gs_dist != nullptr) + { + tie_breaker = recall_at - 1; + float *gt_dist_vec = gs_dist + dim_gs * i; + while (tie_breaker < dim_gs && gt_dist_vec[tie_breaker] == gt_dist_vec[recall_at - 1]) + tie_breaker++; + } + + gt.insert(gt_vec, gt_vec + tie_breaker); + res.insert(res_vec, + res_vec + recall_at); // change to recall_at for recall k@k + // or dim_or for k@dim_or + uint32_t cur_recall = 0; + for (auto &v : gt) + { + if (res.find(v) != res.end()) + { + cur_recall++; + } + } + total_recall += cur_recall; + } + return total_recall / (num_queries) * (100.0 / recall_at); +} + +double calculate_recall(uint32_t num_queries, uint32_t *gold_std, float *gs_dist, uint32_t dim_gs, + uint32_t *our_results, uint32_t dim_or, uint32_t recall_at, + const tsl::robin_set &active_tags) +{ + double total_recall = 0; + std::set gt, res; + bool printed = false; + for (size_t i = 0; i < num_queries; i++) + { + gt.clear(); + res.clear(); + uint32_t *gt_vec = gold_std + dim_gs * i; + uint32_t *res_vec = our_results + dim_or * i; + size_t tie_breaker = recall_at; + uint32_t active_points_count = 0; + uint32_t cur_counter = 0; + while (active_points_count < recall_at && cur_counter < dim_gs) + { + if (active_tags.find(*(gt_vec + cur_counter)) != active_tags.end()) + { + active_points_count++; + } + cur_counter++; + } + if (active_tags.empty()) + cur_counter = recall_at; + + if ((active_points_count < recall_at && !active_tags.empty()) && !printed) + { + diskann::cout << "Warning: Couldn't find enough closest neighbors " << active_points_count << "/" + << recall_at + << " from " + "truthset for query # " + << i << ". Will result in under-reported value of recall." << std::endl; + printed = true; + } + if (gs_dist != nullptr) + { + tie_breaker = cur_counter - 1; + float *gt_dist_vec = gs_dist + dim_gs * i; + while (tie_breaker < dim_gs && gt_dist_vec[tie_breaker] == gt_dist_vec[cur_counter - 1]) + tie_breaker++; + } + + gt.insert(gt_vec, gt_vec + tie_breaker); + res.insert(res_vec, res_vec + recall_at); + uint32_t cur_recall = 0; + for (auto &v : res) + { + if (gt.find(v) != gt.end()) + { + cur_recall++; + } + } + total_recall += cur_recall; + } + return ((double)(total_recall / (num_queries))) * ((double)(100.0 / recall_at)); +} + +double calculate_range_search_recall(uint32_t num_queries, std::vector> &groundtruth, + std::vector> &our_results) +{ + double total_recall = 0; + std::set gt, res; + + for (size_t i = 0; i < num_queries; i++) + { + gt.clear(); + res.clear(); + + gt.insert(groundtruth[i].begin(), groundtruth[i].end()); + res.insert(our_results[i].begin(), our_results[i].end()); + uint32_t cur_recall = 0; + for (auto &v : gt) + { + if (res.find(v) != res.end()) + { + cur_recall++; + } + } + if (gt.size() != 0) + total_recall += ((100.0 * cur_recall) / gt.size()); + else + total_recall += 100; + } + return total_recall / (num_queries); +} + +#ifdef EXEC_ENV_OLS +void get_bin_metadata(AlignedFileReader &reader, size_t &npts, size_t &ndim, size_t offset) +{ + std::vector readReqs; + AlignedRead readReq; + uint32_t buf[2]; // npts/ndim are uint32_ts. + + readReq.buf = buf; + readReq.offset = offset; + readReq.len = 2 * sizeof(uint32_t); + readReqs.push_back(readReq); + + IOContext &ctx = reader.get_ctx(); + reader.read(readReqs, ctx); // synchronous + if ((*(ctx.m_pRequestsStatus))[0] == IOContext::READ_SUCCESS) + { + npts = buf[0]; + ndim = buf[1]; + diskann::cout << "File has: " << npts << " points, " << ndim << " dimensions at offset: " << offset + << std::endl; + } + else + { + std::stringstream str; + str << "Could not read binary metadata from index file at offset: " << offset << std::endl; + throw diskann::ANNException(str.str(), -1, __FUNCSIG__, __FILE__, __LINE__); + } +} + +template void load_bin(AlignedFileReader &reader, T *&data, size_t &npts, size_t &ndim, size_t offset) +{ + // Code assumes that the reader is already setup correctly. + get_bin_metadata(reader, npts, ndim, offset); + data = new T[npts * ndim]; + + size_t data_size = npts * ndim * sizeof(T); + size_t write_offset = 0; + size_t read_start = offset + 2 * sizeof(uint32_t); + + // BingAlignedFileReader can only read uint32_t bytes of data. So, + // we limit ourselves even more to reading 1GB at a time. + std::vector readReqs; + while (data_size > 0) + { + AlignedRead readReq; + readReq.buf = data + write_offset; + readReq.offset = read_start + write_offset; + readReq.len = data_size > MAX_REQUEST_SIZE ? MAX_REQUEST_SIZE : data_size; + readReqs.push_back(readReq); + // in the corner case, the loop will not execute + data_size -= readReq.len; + write_offset += readReq.len; + } + IOContext &ctx = reader.get_ctx(); + reader.read(readReqs, ctx); + for (int i = 0; i < readReqs.size(); i++) + { + // Since we are making sync calls, no request will be in the + // READ_WAIT state. + if ((*(ctx.m_pRequestsStatus))[i] != IOContext::READ_SUCCESS) + { + std::stringstream str; + str << "Could not read binary data from index file at offset: " << readReqs[i].offset << std::endl; + throw diskann::ANNException(str.str(), -1, __FUNCSIG__, __FILE__, __LINE__); + } + } +} +template +void load_bin(AlignedFileReader &reader, std::unique_ptr &data, size_t &npts, size_t &ndim, size_t offset) +{ + T *ptr = nullptr; + load_bin(reader, ptr, npts, ndim, offset); + data.reset(ptr); +} + +template +void copy_aligned_data_from_file(AlignedFileReader &reader, T *&data, size_t &npts, size_t &ndim, + const size_t &rounded_dim, size_t offset) +{ + if (data == nullptr) + { + diskann::cerr << "Memory was not allocated for " << data << " before calling the load function. Exiting..." + << std::endl; + throw diskann::ANNException("Null pointer passed to copy_aligned_data_from_file()", -1, __FUNCSIG__, __FILE__, + __LINE__); + } + + size_t pts, dim; + get_bin_metadata(reader, pts, dim, offset); + + if (ndim != dim || npts != pts) + { + std::stringstream ss; + ss << "Either file dimension: " << dim << " is != passed dimension: " << ndim << " or file #pts: " << pts + << " is != passed #pts: " << npts << std::endl; + throw diskann::ANNException(ss.str(), -1, __FUNCSIG__, __FILE__, __LINE__); + } + + // Instead of reading one point of ndim size and setting (rounded_dim - dim) + // values to zero We'll set everything to zero and read in chunks of data at + // the appropriate locations. + size_t read_offset = offset + 2 * sizeof(uint32_t); + memset(data, 0, npts * rounded_dim * sizeof(T)); + int i = 0; + std::vector read_requests; + + while (i < npts) + { + int j = 0; + read_requests.clear(); + while (j < MAX_SIMULTANEOUS_READ_REQUESTS && i < npts) + { + AlignedRead read_req; + read_req.buf = data + i * rounded_dim; + read_req.len = dim * sizeof(T); + read_req.offset = read_offset + i * dim * sizeof(T); + read_requests.push_back(read_req); + i++; + j++; + } + IOContext &ctx = reader.get_ctx(); + reader.read(read_requests, ctx); + for (int k = 0; k < read_requests.size(); k++) + { + if ((*ctx.m_pRequestsStatus)[k] != IOContext::READ_SUCCESS) + { + throw diskann::ANNException("Load data from file using AlignedReader failed.", -1, __FUNCSIG__, + __FILE__, __LINE__); + } + } + } +} + +// Unlike load_bin, assumes that data is already allocated 'size' entries +template void read_array(AlignedFileReader &reader, T *data, size_t size, size_t offset) +{ + if (data == nullptr) + { + throw diskann::ANNException("read_array requires an allocated buffer.", -1); + } + + if (size * sizeof(T) > MAX_REQUEST_SIZE) + { + std::stringstream ss; + ss << "Cannot read more than " << MAX_REQUEST_SIZE << " bytes. Current request size: " << std::to_string(size) + << " sizeof(T): " << sizeof(T) << std::endl; + throw diskann::ANNException(ss.str(), -1, __FUNCSIG__, __FILE__, __LINE__); + } + std::vector read_requests; + AlignedRead read_req; + read_req.buf = data; + read_req.len = size * sizeof(T); + read_req.offset = offset; + read_requests.push_back(read_req); + IOContext &ctx = reader.get_ctx(); + reader.read(read_requests, ctx); + + if ((*(ctx.m_pRequestsStatus))[0] != IOContext::READ_SUCCESS) + { + std::stringstream ss; + ss << "Failed to read_array() of size: " << size * sizeof(T) << " at offset: " << offset << " from reader. " + << std::endl; + throw diskann::ANNException(ss.str(), -1, __FUNCSIG__, __FILE__, __LINE__); + } +} + +template void read_value(AlignedFileReader &reader, T &value, size_t offset) +{ + read_array(reader, &value, 1, offset); +} + +template DISKANN_DLLEXPORT void load_bin(AlignedFileReader &reader, std::unique_ptr &data, + size_t &npts, size_t &ndim, size_t offset); +template DISKANN_DLLEXPORT void load_bin(AlignedFileReader &reader, std::unique_ptr &data, + size_t &npts, size_t &ndim, size_t offset); +template DISKANN_DLLEXPORT void load_bin(AlignedFileReader &reader, std::unique_ptr &data, + size_t &npts, size_t &ndim, size_t offset); +template DISKANN_DLLEXPORT void load_bin(AlignedFileReader &reader, std::unique_ptr &data, + size_t &npts, size_t &ndim, size_t offset); +template DISKANN_DLLEXPORT void load_bin(AlignedFileReader &reader, std::unique_ptr &data, + size_t &npts, size_t &ndim, size_t offset); +template DISKANN_DLLEXPORT void load_bin(AlignedFileReader &reader, std::unique_ptr &data, size_t &npts, + size_t &ndim, size_t offset); + +template DISKANN_DLLEXPORT void load_bin(AlignedFileReader &reader, uint8_t *&data, size_t &npts, size_t &ndim, + size_t offset); +template DISKANN_DLLEXPORT void load_bin(AlignedFileReader &reader, int64_t *&data, size_t &npts, size_t &ndim, + size_t offset); +template DISKANN_DLLEXPORT void load_bin(AlignedFileReader &reader, uint64_t *&data, size_t &npts, + size_t &ndim, size_t offset); +template DISKANN_DLLEXPORT void load_bin(AlignedFileReader &reader, uint32_t *&data, size_t &npts, + size_t &ndim, size_t offset); +template DISKANN_DLLEXPORT void load_bin(AlignedFileReader &reader, int32_t *&data, size_t &npts, size_t &ndim, + size_t offset); + +template DISKANN_DLLEXPORT void copy_aligned_data_from_file(AlignedFileReader &reader, uint8_t *&data, + size_t &npts, size_t &dim, + const size_t &rounded_dim, size_t offset); +template DISKANN_DLLEXPORT void copy_aligned_data_from_file(AlignedFileReader &reader, int8_t *&data, + size_t &npts, size_t &dim, + const size_t &rounded_dim, size_t offset); +template DISKANN_DLLEXPORT void copy_aligned_data_from_file(AlignedFileReader &reader, float *&data, + size_t &npts, size_t &dim, const size_t &rounded_dim, + size_t offset); + +template DISKANN_DLLEXPORT void read_array(AlignedFileReader &reader, char *data, size_t size, size_t offset); + +template DISKANN_DLLEXPORT void read_array(AlignedFileReader &reader, uint8_t *data, size_t size, + size_t offset); +template DISKANN_DLLEXPORT void read_array(AlignedFileReader &reader, int8_t *data, size_t size, size_t offset); +template DISKANN_DLLEXPORT void read_array(AlignedFileReader &reader, uint32_t *data, size_t size, + size_t offset); +template DISKANN_DLLEXPORT void read_array(AlignedFileReader &reader, float *data, size_t size, size_t offset); + +template DISKANN_DLLEXPORT void read_value(AlignedFileReader &reader, uint8_t &value, size_t offset); +template DISKANN_DLLEXPORT void read_value(AlignedFileReader &reader, int8_t &value, size_t offset); +template DISKANN_DLLEXPORT void read_value(AlignedFileReader &reader, float &value, size_t offset); +template DISKANN_DLLEXPORT void read_value(AlignedFileReader &reader, uint32_t &value, size_t offset); +template DISKANN_DLLEXPORT void read_value(AlignedFileReader &reader, uint64_t &value, size_t offset); + +#endif + +} // namespace diskann diff --git a/packages/leann-backend-diskann/third_party/DiskANN/src/windows_aligned_file_reader.cpp b/packages/leann-backend-diskann/third_party/DiskANN/src/windows_aligned_file_reader.cpp new file mode 100644 index 0000000..3650b92 --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/src/windows_aligned_file_reader.cpp @@ -0,0 +1,189 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT license. + +#ifdef _WINDOWS +#ifndef USE_BING_INFRA +#include "windows_aligned_file_reader.h" +#include +#include "utils.h" +#include + +#define SECTOR_LEN 4096 + +void WindowsAlignedFileReader::open(const std::string &fname) +{ +#ifdef UNICODE + m_filename = std::wstring(fname.begin(), fname.end()); +#else + m_filename = fname; +#endif + + this->register_thread(); +} + +void WindowsAlignedFileReader::close() +{ + for (auto &k_v : ctx_map) + { + IOContext ctx = ctx_map[k_v.first]; + CloseHandle(ctx.fhandle); + } +} + +void WindowsAlignedFileReader::register_thread() +{ + std::unique_lock lk(this->ctx_mut); + if (this->ctx_map.find(std::this_thread::get_id()) != ctx_map.end()) + { + diskann::cout << "Warning:: Duplicate registration for thread_id : " << std::this_thread::get_id() << std::endl; + } + + IOContext ctx; + ctx.fhandle = CreateFile( + m_filename.c_str(), GENERIC_READ, FILE_SHARE_READ, NULL, OPEN_EXISTING, + FILE_ATTRIBUTE_READONLY | FILE_FLAG_NO_BUFFERING | FILE_FLAG_OVERLAPPED | FILE_FLAG_RANDOM_ACCESS, NULL); + if (ctx.fhandle == INVALID_HANDLE_VALUE) + { + const size_t c_max_filepath_len = 256; + size_t actual_len = 0; + char filePath[c_max_filepath_len]; + if (wcstombs_s(&actual_len, filePath, c_max_filepath_len, m_filename.c_str(), m_filename.length()) == 0) + { + diskann::cout << "Error opening " << filePath << " -- error=" << GetLastError() << std::endl; + } + else + { + diskann::cout << "Error converting wchar to char -- error=" << GetLastError() << std::endl; + } + } + + // create IOCompletionPort + ctx.iocp = CreateIoCompletionPort(ctx.fhandle, ctx.iocp, 0, 0); + + // create MAX_DEPTH # of reqs + for (uint64_t i = 0; i < MAX_IO_DEPTH; i++) + { + OVERLAPPED os; + memset(&os, 0, sizeof(OVERLAPPED)); + // os.hEvent = CreateEventA(NULL, TRUE, FALSE, NULL); + ctx.reqs.push_back(os); + } + this->ctx_map.insert(std::make_pair(std::this_thread::get_id(), ctx)); +} + +IOContext &WindowsAlignedFileReader::get_ctx() +{ + std::unique_lock lk(this->ctx_mut); + if (ctx_map.find(std::this_thread::get_id()) == ctx_map.end()) + { + std::stringstream stream; + stream << "unable to find IOContext for thread_id : " << std::this_thread::get_id() << "\n"; + throw diskann::ANNException(stream.str(), -2, __FUNCSIG__, __FILE__, __LINE__); + } + IOContext &ctx = ctx_map[std::this_thread::get_id()]; + lk.unlock(); + return ctx; +} + +void WindowsAlignedFileReader::read(std::vector &read_reqs, IOContext &ctx, bool async) +{ + using namespace std::chrono_literals; + // execute each request sequentially + size_t n_reqs = read_reqs.size(); + uint64_t n_batches = ROUND_UP(n_reqs, MAX_IO_DEPTH) / MAX_IO_DEPTH; + for (uint64_t i = 0; i < n_batches; i++) + { + // reset all OVERLAPPED objects + for (auto &os : ctx.reqs) + { + // HANDLE evt = os.hEvent; + memset(&os, 0, sizeof(os)); + // os.hEvent = evt; + + /* + if (ResetEvent(os.hEvent) == 0) { + diskann::cerr << "ResetEvent failed" << std::endl; + exit(-3); + } + */ + } + + // batch start/end + uint64_t batch_start = MAX_IO_DEPTH * i; + uint64_t batch_size = std::min((uint64_t)(n_reqs - batch_start), (uint64_t)MAX_IO_DEPTH); + + // fill OVERLAPPED and issue them + for (uint64_t j = 0; j < batch_size; j++) + { + AlignedRead &req = read_reqs[batch_start + j]; + OVERLAPPED &os = ctx.reqs[j]; + + uint64_t offset = req.offset; + uint64_t nbytes = req.len; + char *read_buf = (char *)req.buf; + assert(IS_ALIGNED(read_buf, SECTOR_LEN)); + assert(IS_ALIGNED(offset, SECTOR_LEN)); + assert(IS_ALIGNED(nbytes, SECTOR_LEN)); + + // fill in OVERLAPPED struct + os.Offset = offset & 0xffffffff; + os.OffsetHigh = (offset >> 32); + + BOOL ret = ReadFile(ctx.fhandle, read_buf, (DWORD)nbytes, NULL, &os); + if (ret == FALSE) + { + auto error = GetLastError(); + if (error != ERROR_IO_PENDING) + { + diskann::cerr << "Error queuing IO -- " << error << "\n"; + } + } + else + { + diskann::cerr << "Error queueing IO -- ReadFile returned TRUE" << std::endl; + } + } + DWORD n_read = 0; + uint64_t n_complete = 0; + ULONG_PTR completion_key = 0; + OVERLAPPED *lp_os; + while (n_complete < batch_size) + { + if (GetQueuedCompletionStatus(ctx.iocp, &n_read, &completion_key, &lp_os, INFINITE) != 0) + { + // successfully dequeued a completed I/O + n_complete++; + } + else + { + // failed to dequeue OR dequeued failed I/O + if (lp_os == NULL) + { + DWORD error = GetLastError(); + if (error != WAIT_TIMEOUT) + { + diskann::cerr << "GetQueuedCompletionStatus() failed " + "with error = " + << error << std::endl; + throw diskann::ANNException("GetQueuedCompletionStatus failed with error: ", error, __FUNCSIG__, + __FILE__, __LINE__); + } + // no completion packet dequeued ==> sleep for 5us and try + // again + std::this_thread::sleep_for(5us); + } + else + { + // completion packet for failed IO dequeued + auto op_idx = lp_os - ctx.reqs.data(); + std::stringstream stream; + stream << "I/O failed , offset: " << read_reqs[op_idx].offset + << "with error code: " << GetLastError() << std::endl; + throw diskann::ANNException(stream.str(), -1, __FUNCSIG__, __FILE__, __LINE__); + } + } + } + } +} +#endif +#endif diff --git a/packages/leann-backend-diskann/third_party/DiskANN/tests/CMakeLists.txt b/packages/leann-backend-diskann/third_party/DiskANN/tests/CMakeLists.txt new file mode 100644 index 0000000..6af8405 --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/tests/CMakeLists.txt @@ -0,0 +1,41 @@ +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT license. + +set(CMAKE_COMPILE_WARNING_AS_ERROR ON) + +find_package(Boost COMPONENTS unit_test_framework) + +# For Windows, fall back to nuget version if find_package didn't find it. +if (MSVC AND NOT Boost_FOUND) + set(DISKANN_BOOST_INCLUDE "${DISKANN_MSVC_PACKAGES}/boost/lib/native/include") + # Multi-threaded static library. + set(UNIT_TEST_FRAMEWORK_LIB_PATTERN "${DISKANN_MSVC_PACKAGES}/boost_unit_test_framework-vc${MSVC_TOOLSET_VERSION}/lib/native/libboost_unit_test_framework-vc${MSVC_TOOLSET_VERSION}-mt-x64-*.lib") + file(GLOB DISKANN_BOOST_UNIT_TEST_FRAMEWORK_LIB ${UNIT_TEST_FRAMEWORK_LIB_PATTERN}) + + set(UNIT_TEST_FRAMEWORK_DLIB_PATTERN "${DISKANN_MSVC_PACKAGES}/boost_unit_test_framework-vc${MSVC_TOOLSET_VERSION}/lib/native/libboost_unit_test_framework-vc${MSVC_TOOLSET_VERSION}-mt-gd-x64-*.lib") + file(GLOB DISKANN_BOOST_UNIT_TEST_FRAMEWORK_DLIB ${UNIT_TEST_FRAMEWORK_DLIB_PATTERN}) + + if (EXISTS ${DISKANN_BOOST_INCLUDE} AND EXISTS ${DISKANN_BOOST_UNIT_TEST_FRAMEWORK_LIB} AND EXISTS ${DISKANN_BOOST_UNIT_TEST_FRAMEWORK_DLIB}) + set(Boost_FOUND ON) + set(Boost_INCLUDE_DIR ${DISKANN_BOOST_INCLUDE}) + add_library(Boost::unit_test_framework STATIC IMPORTED) + set_target_properties(Boost::unit_test_framework PROPERTIES IMPORTED_LOCATION_RELEASE "${DISKANN_BOOST_UNIT_TEST_FRAMEWORK_LIB}") + set_target_properties(Boost::unit_test_framework PROPERTIES IMPORTED_LOCATION_DEBUG "${DISKANN_BOOST_UNIT_TEST_FRAMEWORK_DLIB}") + message(STATUS "Falling back to using Boost from the nuget package") + else() + message(WARNING "Couldn't find Boost. Was looking for ${DISKANN_BOOST_INCLUDE} and ${UNIT_TEST_FRAMEWORK_LIB_PATTERN}") + endif() +endif() + +if (NOT Boost_FOUND) + message(FATAL_ERROR "Couldn't find Boost dependency") +endif() + + +set(DISKANN_UNIT_TEST_SOURCES main.cpp index_write_parameters_builder_tests.cpp) + +add_executable(${PROJECT_NAME}_unit_tests ${DISKANN_SOURCES} ${DISKANN_UNIT_TEST_SOURCES}) +target_link_libraries(${PROJECT_NAME}_unit_tests ${PROJECT_NAME} ${DISKANN_TOOLS_TCMALLOC_LINK_OPTIONS} Boost::unit_test_framework) + +add_test(NAME ${PROJECT_NAME}_unit_tests COMMAND ${PROJECT_NAME}_unit_tests) + diff --git a/packages/leann-backend-diskann/third_party/DiskANN/tests/README.md b/packages/leann-backend-diskann/third_party/DiskANN/tests/README.md new file mode 100644 index 0000000..113c998 --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/tests/README.md @@ -0,0 +1,11 @@ +# Unit Test project + +This unit test project is based on the [boost unit test framework](https://www.boost.org/doc/libs/1_78_0/libs/test/doc/html/index.html). Below are the simple steps to add new unit test, you could find more usage from the [boost unit test document](https://www.boost.org/doc/libs/1_78_0/libs/test/doc/html/index.html). + +## How to add unit test + +- Create new [BOOST_AUTO_TEST_SUITE](https://www.boost.org/doc/libs/1_78_0/libs/test/doc/html/boost_test/utf_reference/test_org_reference/test_org_boost_auto_test_suite.html) for each class in an individual cpp file + +- Add [BOOST_AUTO_TEST_CASE](https://www.boost.org/doc/libs/1_78_0/libs/test/doc/html/boost_test/utf_reference/test_org_reference/test_org_boost_auto_test_case.html) for each test case in the [BOOST_AUTO_TEST_SUITE](https://www.boost.org/doc/libs/1_78_0/libs/test/doc/html/boost_test/utf_reference/test_org_reference/test_org_boost_auto_test_suite.html) + +- Update the [CMakeLists.txt](CMakeLists.txt) file to add the new cpp file to the test project \ No newline at end of file diff --git a/packages/leann-backend-diskann/third_party/DiskANN/tests/index_write_parameters_builder_tests.cpp b/packages/leann-backend-diskann/third_party/DiskANN/tests/index_write_parameters_builder_tests.cpp new file mode 100644 index 0000000..0aa798d --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/tests/index_write_parameters_builder_tests.cpp @@ -0,0 +1,58 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT license. + +#include + +#include "parameters.h" + +BOOST_AUTO_TEST_SUITE(IndexWriteParametersBuilder_tests) + +BOOST_AUTO_TEST_CASE(test_build) +{ + uint32_t search_list_size = rand(); + uint32_t max_degree = rand(); + float alpha = (float)rand(); + uint32_t filter_list_size = rand(); + uint32_t max_occlusion_size = rand(); + bool saturate_graph = true; + + diskann::IndexWriteParametersBuilder builder(search_list_size, max_degree); + + builder.with_alpha(alpha) + .with_filter_list_size(filter_list_size) + .with_max_occlusion_size(max_occlusion_size) + .with_num_threads(0) + .with_saturate_graph(saturate_graph); + + { + auto parameters = builder.build(); + + BOOST_TEST(search_list_size == parameters.search_list_size); + BOOST_TEST(max_degree == parameters.max_degree); + BOOST_TEST(alpha == parameters.alpha); + BOOST_TEST(filter_list_size == parameters.filter_list_size); + BOOST_TEST(max_occlusion_size == parameters.max_occlusion_size); + BOOST_TEST(saturate_graph == parameters.saturate_graph); + + BOOST_TEST(parameters.num_threads > (uint32_t)0); + } + + { + uint32_t num_threads = rand() + 1; + saturate_graph = false; + builder.with_num_threads(num_threads).with_saturate_graph(saturate_graph); + + auto parameters = builder.build(); + + BOOST_TEST(search_list_size == parameters.search_list_size); + BOOST_TEST(max_degree == parameters.max_degree); + BOOST_TEST(alpha == parameters.alpha); + BOOST_TEST(filter_list_size == parameters.filter_list_size); + BOOST_TEST(max_occlusion_size == parameters.max_occlusion_size); + BOOST_TEST(saturate_graph == parameters.saturate_graph); + + BOOST_TEST(num_threads == parameters.num_threads); + } +} + +BOOST_AUTO_TEST_SUITE_END() diff --git a/packages/leann-backend-diskann/third_party/DiskANN/tests/main.cpp b/packages/leann-backend-diskann/third_party/DiskANN/tests/main.cpp new file mode 100644 index 0000000..53440a1 --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/tests/main.cpp @@ -0,0 +1,6 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT license. + +#define BOOST_TEST_MODULE diskann_unit_tests + +#include diff --git a/packages/leann-backend-diskann/third_party/DiskANN/windows/packages.config.in b/packages/leann-backend-diskann/third_party/DiskANN/windows/packages.config.in new file mode 100644 index 0000000..f8eecf0 --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/windows/packages.config.in @@ -0,0 +1,11 @@ + + + + + + + + + + diff --git a/packages/leann-backend-diskann/third_party/DiskANN/windows/packages_restapi.config.in b/packages/leann-backend-diskann/third_party/DiskANN/windows/packages_restapi.config.in new file mode 100644 index 0000000..6d1a60c --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/windows/packages_restapi.config.in @@ -0,0 +1,4 @@ + + + + diff --git a/packages/leann-backend-diskann/third_party/DiskANN/workflows/SSD_index.md b/packages/leann-backend-diskann/third_party/DiskANN/workflows/SSD_index.md new file mode 100644 index 0000000..3144528 --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/workflows/SSD_index.md @@ -0,0 +1,74 @@ +**Usage for SSD-based indices** +=============================== + +To generate an SSD-friendly index, use the `apps/build_disk_index` program. +---------------------------------------------------------------------------- + +The arguments are as follows: + +1. **--data_type**: The type of dataset you wish to build an index on. float(32 bit), signed int8 and unsigned uint8 are supported. +2. **--dist_fn**: Three distance functions are supported: cosine distance, minimum Euclidean distance (l2) and maximum inner product (mips). +3. **--data_file**: The input data over which to build an index, in .bin format. The first 4 bytes represent number of points as an integer. The next 4 bytes represent the dimension of data as an integer. The following `n*d*sizeof(T)` bytes contain the contents of the data one data point in time. `sizeof(T)` is 1 for byte indices, and 4 for float indices. This will be read by the program as int8_t for signed indices, uint8_t for unsigned indices or float for float indices. +4. **--index_path_prefix**: the index will span a few files, all beginning with the specified prefix path. For example, if you provide `~/index_test` as the prefix path, build generates files such as `~/index_test_pq_pivots.bin, ~/index_test_pq_compressed.bin, ~/index_test_disk.index, ...`. There may be between 8 and 10 files generated with this prefix depending on how the index is constructed. +5. **-R (--max_degree)** (default is 64): the degree of the graph index, typically between 60 and 150. Larger R will result in larger indices and longer indexing times, but better search quality. +6. **-L (--Lbuild)** (default is 100): the size of search list during index build. Typical values are between 75 to 200. Larger values will take more time to build but result in indices that provide higher recall for the same search complexity. Use a value for L value that is at least the value of R unless you need to build indices really quickly and can somewhat compromise on quality. +7. **-B (--search_DRAM_budget)**: bound on the memory footprint of the index at search time in GB. Once built, the index will use up only the specified RAM limit, the rest will reside on disk. This will dictate how aggressively we compress the data vectors to store in memory. Larger will yield better performance at search time. For an n point index, to use b byte PQ compressed representation in memory, use `B = ((n * b) / 2^30 + (250000*(4*R + sizeof(T)*ndim)) / 2^30)`. The second term in the summation is to allow some buffer for caching about 250,000 nodes from the graph in memory while serving. If you are not sure about this term, add 0.25GB to the first term. +8. **-M (--build_DRAM_budget)**: Limit on the memory allowed for building the index in GB. If you specify a value less than what is required to build the index in one pass, the index is built using a divide and conquer approach so that sub-graphs will fit in the RAM budget. The sub-graphs are overlayed to build the overall index. This approach can be upto 1.5 times slower than building the index in one shot. Allocate as much memory as your RAM allows. +9. **-T (--num_threads)** (default is to get_omp_num_procs()): number of threads used by the index build process. Since the code is highly parallel, the indexing time improves almost linearly with the number of threads (subject to the cores available on the machine and DRAM bandwidth). +10. **--PQ_disk_bytes** (default is 0): Use 0 to store uncompressed data on SSD. This allows the index to asymptote to 100% recall. If your vectors are too large to store in SSD, this parameter provides the option to compress the vectors using PQ for storing on SSD. This will trade off recall. You would also want this to be greater than the number of bytes used for the PQ compressed data stored in-memory +11. **--build_PQ_bytes** (default is 0): Set to a positive value less than the dimensionality of the data to enable faster index build with PQ based distance comparisons. +12. **--use_opq**: use the flag to use OPQ rather than PQ compression. OPQ is more space efficient for some high dimensional datasets, but also needs a bit more build time. + +To search the SSD-index, use the `apps/search_disk_index` program. +------------------------------------------------------------------- + +The arguments are as follows: + +1. **--data_type**: The type of dataset you wish to build an index on. float(32 bit), signed int8 and unsigned uint8 are supported. Use the same data type as in arg (1) above used in building the index. +2. **--dist_fn**: There are two distance functions supported: minimum Euclidean distance (l2) and maximum inner product (mips). Use the same distance as in arg (2) above used in building the index. +3. **--index_path_prefix**: same as the prefix used in building the index (see arg 4 above). +4. **--num_nodes_to_cache** (default is 0): While serving the index, the entire graph is stored on SSD. For faster search performance, you can cache a few frequently accessed nodes in memory. +5. **-T (--num_threads)** (default is to get_omp_num_procs()): The number of threads used for searching. Threads run in parallel and one thread handles one query at a time. More threads will result in higher aggregate query throughput, but will also use more IOs/second across the system, which may lead to higher per-query latency. So find the balance depending on the maximum number of IOPs supported by the SSD. +6. **-W (--beamwidth)** (default is 2): The beamwidth to be used for search. This is the maximum number of IO requests each query will issue per iteration of search code. Larger beamwidth will result in fewer IO round-trips per query, but might result in slightly higher total number of IO requests to SSD per query. For the highest query throughput with a fixed SSD IOps rating, use `W=1`. For best latency, use `W=4,8` or higher complexity search. Specifying 0 will optimize the beamwidth depending on the number of threads performing search, but will involve some tuning overhead. +7. **--query_file**: The queries to be searched on in same binary file format as the data file in arg (2) above. The query file must be the same type as argument (1). +8. **--gt_file**: The ground truth file for the queries in arg (7) and data file used in index construction. The binary file must start with *n*, the number of queries (4 bytes), followed by *d*, the number of ground truth elements per query (4 bytes), followed by `n*d` entries per query representing the d closest IDs per query in integer format, followed by `n*d` entries representing the corresponding distances (float). Total file size is `8 + 4*n*d + 4*n*d` bytes. The groundtruth file, if not available, can be calculated using the program `apps/utils/compute_groundtruth`. Use "null" if you do not have this file and if you do not want to compute recall. +9. **K**: search for *K* neighbors and measure *K*-recall@*K*, meaning the intersection between the retrieved top-*K* nearest neighbors and ground truth *K* nearest neighbors. +10. **result_output_prefix**: Search results will be stored in files with specified prefix, in bin format. +11. **-L (--search_list)**: A list of search_list sizes to perform search with. Larger parameters will result in slower latencies, but higher accuracies. Must be at least the value of *K* in arg (9). + + +Example with BIGANN: +-------------------- + +This example demonstrates the use of the commands above on a 100K slice of the [BIGANN dataset](http://corpus-texmex.irisa.fr/) with 128 dimensional SIFT descriptors applied to images. + +Download the base and query set and convert the data to binary format +```bash +mkdir -p DiskANN/build/data && cd DiskANN/build/data +wget ftp://ftp.irisa.fr/local/texmex/corpus/sift.tar.gz +tar -xf sift.tar.gz +cd .. +./apps/utils/fvecs_to_bin float data/sift/sift_learn.fvecs data/sift/sift_learn.fbin +./apps/utils/fvecs_to_bin float data/sift/sift_query.fvecs data/sift/sift_query.fbin +``` + +Now build and search the index and measure the recall using ground truth computed using brutefoce. +```bash +./apps/utils/compute_groundtruth --data_type float --dist_fn l2 --base_file data/sift/sift_learn.fbin --query_file data/sift/sift_query.fbin --gt_file data/sift/sift_query_learn_gt100 --K 100 +# Using 0.003GB search memory budget for 100K vectors implies 32 byte PQ compression +./apps/build_disk_index --data_type float --dist_fn l2 --data_path data/sift/sift_learn.fbin --index_path_prefix data/sift/disk_index_sift_learn_R32_L50_A1.2 -R 32 -L50 -B 0.003 -M 1 + ./apps/search_disk_index --data_type float --dist_fn l2 --index_path_prefix data/sift/disk_index_sift_learn_R32_L50_A1.2 --query_file data/sift/sift_query.fbin --gt_file data/sift/sift_query_learn_gt100 -K 10 -L 10 20 30 40 50 100 --result_path data/sift/res --num_nodes_to_cache 10000 + ``` + +The search might be slower on machine with remote SSDs. The output lists the query throughput, the mean and 99.9pc latency in microseconds and mean number of 4KB IOs to disk for each `L` parameter provided. + +``` + L Beamwidth QPS Mean Latency 99.9 Latency Mean IOs CPU (s) Recall@10 +====================================================================================================================== + 10 2 27723.95 2271.92 4700.00 8.81 40.47 81.79 + 20 2 15369.23 4121.04 7576.00 15.93 61.60 96.42 + 30 2 10335.75 6147.14 11424.00 23.30 74.96 98.78 + 40 2 7684.18 8278.83 14714.00 30.78 94.27 99.40 + 50 2 6421.66 9913.28 16550.00 38.35 116.86 99.63 + 100 2 3337.98 19107.81 29292.00 76.59 226.88 99.91 +``` diff --git a/packages/leann-backend-diskann/third_party/DiskANN/workflows/dynamic_index.md b/packages/leann-backend-diskann/third_party/DiskANN/workflows/dynamic_index.md new file mode 100644 index 0000000..17c3fb3 --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/workflows/dynamic_index.md @@ -0,0 +1,187 @@ + + +**Usage for dynamic indices** +================================ + +A "dynamic" index refers to an index which supports insertion of new points into a (possibly previously built) index as well as deletions of points. +While eager deletes can be supported by DiskANN, `lazy_deletes` are the preferred method. +A sequence of lazy deletions must be followed by an invocation of the `consolidate_deletes` method that frees up slots in the index and edits the graph to maintain good recall. + + +The program `apps/test_insert_deletes_consolidate` demonstrates this functionality. It allows the user to specify which points from the data file will be used +to initially build the index, which points will be deleted from the index, and which points will be inserted into the index. +Insertions, searches and lazy deletions can be performed concurrently. +Conslolidation of lazy deletes can be performed synchnronously or concurrently with insertions and deletions. +When modifying the index sequentially, the user has the ability to take *snapshots*-- +that is, save the index to memory for every *m* insertions or deletions instead of only at the end of the build. + +The program `apps/test_streaming_scenario` simulates a scenario where the index actively maintains a sliding window of active points from a larger dataset. +The program starts with an index build over the first `active_window` set of points from a data file. +The program then simultaneously inserts newer points drawn from the file and deletes older points from the index +in chunks of `consolidate_interval` points so that the number of active points in the index is approximately `active_window`. +It terminates when the end of data file is reached, and the final index has `active_window + consolidate_interval` number of points. + +The index also supports filters on steaming index, you can use `insert_point` function overloads to either insert points as before or insert points with labels. +Additional options are added to support this in `apps/test_streaming_scenario` and `apps/test_streaming_scenario` please refer to program arguments for more details. + +--- +> Note +* The index does not support mixed points, that is, either all points do not have labels or all points have labels. +* You can search the built filter index (one built with filters) without filters as well. + +> WARNING: Deleting points in case of filtered build may cause the quality of Index to degrade and affect recall. +--- + +`apps/test_insert_deletes_consolidate` to try inserting, lazy deletes and consolidate_delete +--------------------------------------------------------------------------------------------- + +The arguments are as follows: + +1. **--data_type**: The type of dataset you wish to build an index on. float(32 bit), signed int8 and unsigned uint8 are supported. +2. **--dist_fn**: There are two distance functions supported: minimum Euclidean distance (l2) and maximum inner product (mips). +3. **--data_file**: The input data over which to build an index, in .bin format. The first 4 bytes represent number of points as integer. The next 4 bytes represent the dimension of data as integer. The following `n*d*sizeof(T)` bytes contain the contents of the data one data point in time. sizeof(T) is 1 for byte indices, and 4 for float indices. This will be read by the program as int8_t for signed indices, uint8_t for unsigned indices or float for float indices. +4. **--index_path_prefix**: The constructed index components will be saved to this path prefix. +5. **-R (--max_degree)** (default is 64): the degree of the graph index, typically between 32 and 150. Larger R will result in larger indices and longer indexing times, but might yield better search quality. +6. **-L (--Lbuild)** (default is 100): the size of search list we maintain during index building. Typical values are between 75 to 400. Larger values will take more time to build but result in indices that provide higher recall for the same search complexity. Ensure that value of L is at least that of R value unless you need to build indices really quickly and can somewhat compromise on quality. +7. **--alpha** (default is 1.2): A float value between 1.0 and 1.5 which determines the diameter of the graph, which will be approximately *log n* to the base alpha. Typical values are between 1 to 1.5. 1 will yield the sparsest graph, 1.5 will yield denser graphs. +8. **T (--num_threads)** (default is to get_omp_num_procs()): number of threads used by the index build process. Since the code is highly parallel, the indexing time improves almost linearly with the number of threads (subject to the cores available on the machine and DRAM bandwidth). +9. **--points_to_skip**: number of points to skip from the beginning of the data file. +10. **--max_points_to_insert**: the maximum size of the index. +11. **--beginning_index_size**: how many points to build the initial index with. The number of points inserted dynamically will be max_points_to_insert - beginning_index_size. +12. **--points_per_checkpoint**: when inserting and deleting sequentially, each update is handled in points_per_checkpoint batches. When updating concurrently, insertions are handled in points_per_checkpoint batches but deletions are always processed in a single batch. +13. **--checkpoints_per_snapshot**: when inserting and deleting sequentially, the graph is saved to memory every checkpoints_per_snapshot checkpoints. This is not currently supported for concurrent updates. +14. **--points_to_delete_from_beginning**: how many points to delete from the index, starting in order of insertion. If deletions are concurrent with insertions, points_to_delete_from_beginning cannot be larger than beginning_index_size. +15. **--start_point_norm**: Set the starting node to a random point on a sphere of this radius. A reasonable choice is to set this to the average norm of the data set. Use when starting an index with zero points. +16. **--do_concurrent** (default false): whether to perform conslidate_deletes and other updates concurrently or sequentially. If concurrent is specified, half the threads are used for insertions and half the threads are used for processing deletes. Note that insertions are performed before deletions if this flag is set to false, so in this case is possible to delete more than beginning_index_size points. + +`apps/test_streaming_scenario` to try inserting, lazy deletes and consolidate_delete +--------------------------------------------------------------------------------------------- + +The arguments are as follows: + +1. **--data_type**: The type of dataset you wish to build an index on. float(32 bit), signed int8 and unsigned uint8 are supported. +2. **--dist_fn**: There are two distance functions supported: minimum Euclidean distance (l2) and maximum inner product (mips). +3. **--data_file**: The input data over which to build an index, in .bin format. The first 4 bytes represent number of points as integer. The next 4 bytes represent the dimension of data as integer. The following `n*d*sizeof(T)` bytes contain the contents of the data one data point in time. sizeof(T) is 1 for byte indices, and 4 for float indices. This will be read by the program as int8_t for signed indices, uint8_t for unsigned indices or float for float indices. +4. **--index_path_prefix**: The constructed index components will be saved to this path prefix. +5. **-R (--max_degree)** (default is 64): the degree of the graph index, typically between 32 and 150. Larger R will result in larger indices and longer indexing times, but might yield better search quality. +6. **-L (--Lbuild)** (default is 100): the size of search list we maintain during index building. Typical values are between 75 to 400. Larger values will take more time to build but result in indices that provide higher recall for the same search complexity. Ensure that value of L is at least that of R value unless you need to build indices really quickly and can somewhat compromise on quality. +7. **--alpha** (default is 1.2): A float value between 1.0 and 1.5 which determines the diameter of the graph, which will be approximately *log n* to the base alpha. Typical values are between 1 to 1.5. 1 will yield the sparsest graph, 1.5 will yield denser graphs. +8. **--insert_threads**: number of threads used for inserting points in to the index. +9. **--consolidate_threads**: number of threads used for consolidating deletes to the index. +10. **--max_points_to_insert**: Maximum number of points from the data file to insert in to the index. +11. **--active_window**: Approximate number of points in the index at any point. +12. **--consolidate_interval**: Granularity at which insert and delete functions are called. +13. **--start_point_norm**: Set the starting node to a random point on a sphere of this radius. A reasonable choice is to set this to the average norm of the data stream. + +** To build with filters add these optional parameters. + +14. **--label_file**: Filter data for each point, in `.txt` format. Line `i` of the file consists of a comma-separated list of labels corresponding to point `i` in the file passed via `--data_file`. +15. **--FilteredLbuild**: If building a filtered index, we maintain a separate search list from the one provided by `--Lbuild/-L`. +16. **--num_start_points**: number of frozen points in this case should be more then number of unique labels. +17. **--universal_label**: Optionally, the label data may contain a special "universal" label. A point with the universal label can be matched against a query with any label. Note that if a point has the universal label, then the filter data must only have the universal label on the line corresponding. +18. **--label_type**: Optionally, type of label to be use its either uint or short, defaulted to `uint`. + +To search the generated index, use the `apps/search_memory_index` program: +--------------------------------------------------------------------------- + + +The arguments are as follows: + +1. **data_type**: The type of dataset you built the index on. float(32 bit), signed int8 and unsigned uint8 are supported. Use the same data type as in arg (1) above used in building the index. +2. **dist_fn**: There are two distance functions supported: l2 and mips. There is an additional *fast_l2* implementation that could provide faster results for small (about a million-sized) indices. Use the same distance as in arg (2) above used in building the index. +3. **memory_index_path**: index built above in argument (4). +4. **T**: The number of threads used for searching. Threads run in parallel and one thread handles one query at a time. More threads will result in higher aggregate query throughput, but may lead to higher per-query latency, especially if the DRAM bandwidth is a bottleneck. So find the balance depending on throughput and latency required for your application. +5. **query_bin**: The queries to be searched on in same binary file format as the data file (ii) above. The query file must be the same type as in argument (1). +6. **truthset.bin**: The ground truth file for the queries in arg (7) and data file used in index construction. The binary file must start with *n*, the number of queries (4 bytes), followed by *d*, the number of ground truth elements per query (4 bytes), followed by `n*d` entries per query representing the d closest IDs per query in integer format, followed by `n*d` entries representing the corresponding distances (float). Total file size is `8 + 4*n*d + 4*n*d` bytes. The groundtruth file, if not available, can be calculated using the program `apps/utils/compute_groundtruth`. Use "null" if you do not have this file and if you do not want to compute recall. +7. **K**: search for *K* neighbors and measure *K*-recall@*K*, meaning the intersection between the retrieved top-*K* nearest neighbors and ground truth *K* nearest neighbors. +8. **result_output_prefix**: search results will be stored in files, one per L value (see next arg), with specified prefix, in binary format. +9. **-L (--search_list)**: A list of search_list sizes to perform search with. Larger parameters will result in slower latencies, but higher accuracies. Must be at least the value of *K* in (7). +10. **--dynamic** (default false): whether the index being searched is dynamic or not. +11. **--tags** (default false): whether to search with tags. This should be used if point *i* in the ground truth file does not correspond the point in the *i*th position in the loaded index. + +** to search with filters add these + +12. **--filter_label**: Filter for each query. For each query, a search is performed with this filter. + +Example with BIGANN: +-------------------- + +This example demonstrates the use of the commands above on a 100K slice of the [BIGANN dataset](http://corpus-texmex.irisa.fr/) with 128 dimensional SIFT descriptors applied to images. + +Download the base and query set and convert the data to binary format +```bash +mkdir -p DiskANN/build/data && cd DiskANN/build/data +wget ftp://ftp.irisa.fr/local/texmex/corpus/sift.tar.gz +tar -xf sift.tar.gz +cd .. +./apps/utils/fvecs_to_bin float data/sift/sift_learn.fvecs data/sift/sift_learn.fbin +./apps/utils/fvecs_to_bin float data/sift/sift_query.fvecs data/sift/sift_query.fbin +``` + +The example below tests the following scenario: using a file with 100000 points, the index is incrementally constructed point by point. After the first 50000 ponts are inserted, another concurrent job deletes the first 25000 points from the index and consolidates the index (edit the graph and cleans up resources). At the same time an additional 25000 points (i.e. points 50001 to 75000) are concurrently inserted into the index. Note that the index should be built **before** calculating the ground truth, since the memory index returns the slice of the sift100K dataset that was used to build the final graph (that is, points 25001-75000 in the original index). +```bash +type='float' +data='data/sift/sift_learn.fbin' +query='data/sift/sift_query.fbin' +index_prefix='data/sift/index' +result='data/sift/res' +deletes=25000 +inserts=75000 +deletes_after=50000 +pts_per_checkpoint=10000 +begin=0 +thr=64 +index=${index_prefix}.after-concurrent-delete-del${deletes}-${inserts} +gt_file=data/sift/gt100_learn-conc-${deletes}-${inserts} + + ~/DiskANN/build/apps/test_insert_deletes_consolidate --data_type ${type} --dist_fn l2 --data_path ${data} --index_path_prefix ${index_prefix} -R 64 -L 300 --alpha 1.2 -T ${thr} --points_to_skip 0 --max_points_to_insert ${inserts} --beginning_index_size ${begin} --points_per_checkpoint ${pts_per_checkpoint} --checkpoints_per_snapshot 0 --points_to_delete_from_beginning ${deletes} --start_deletes_after ${deletes_after} --do_concurrent true; + + ~/DiskANN/build/apps/utils/compute_groundtruth --data_type ${type} --dist_fn l2 --base_file ${index}.data --query_file ${query} --K 100 --gt_file ${gt_file} --tags_file ${index}.tags + +~/DiskANN/build/apps/search_memory_index --data_type ${type} --dist_fn l2 --index_path_prefix ${index} --result_path ${result} --query_file ${query} --gt_file ${gt_file} -K 10 -L 20 40 60 80 100 -T ${thr} --dynamic true --tags 1 + ``` + + The example below tests the following scenario: using a file with 100000 points, insert 10000 points at a time. After the first 40000 +are inserted, start deleting the first 10000 points while inserting points 40000--50000. Then delete points 10000--20000 while inserting +points 50000--60000 and so until the index is left with points 60000-100000. + + +Generate labels for filtered build like this. Generating 50 unique labels zipf's distributed for 100K point dataset. +``` +~/DiskANN/build/apps/utils/generate_synthetic_labels --num_labels 50 --num_points 100000 --output_file data/zipf_labels_50_100K.txt --distribution_type zipf +``` + +```bash +type='float' +data='data/sift/sift_learn.fbin' +query='data/sift/sift_query.fbin' +index_prefix='data/sift/idx_learn_str' +result='data/sift/res' +ins_thr=16 +cons_thr=16 +inserts=100000 +active=20000 +cons_int=10000 +index=${index_prefix}.after-streaming-act${active}-cons${cons_int}-max${inserts} +gt=data/sift/gt100_learn-act${active}-cons${cons_int}-max${inserts} +filter_label=1 + +## filter options +universal_label = '0' +label_file = 'data/zipf_labels_50_100K.txt' +num_start_points = 50 +gt_filtered= data/sift/gt100_learn-act${active}-cons${cons_int}-max${inserts}_wlabel_${filter_label} + + +# Without Filters (build and search) +./apps/test_streaming_scenario --data_type ${type} --dist_fn l2 --data_path ${data} --index_path_prefix ${index_prefix} -R 64 -L 600 --alpha 1.2 --insert_threads ${ins_thr} --consolidate_threads ${cons_thr} --max_points_to_insert ${inserts} --active_window ${active} --consolidate_interval ${cons_int} --start_point_norm 508; +./apps/utils/compute_groundtruth --data_type ${type} --dist_fn l2 --base_file ${index}.data --query_file ${query} --K 100 --gt_file ${gt} --tags_file ${index}.tags +./apps/search_memory_index --data_type ${type} --dist_fn l2 --index_path_prefix ${index} --result_path ${result} --query_file ${query} --gt_file ${gt} -K 10 -L 20 40 60 80 100 -T 64 --dynamic true --tags 1 + +# With filters (build and search) + +./apps/test_streaming_scenario --data_type ${type} --num_start_points ${num_start_points} --label_file ${label_file} --universal_label {universal_label} --dist_fn l2 --data_path ${data} --index_path_prefix ${index_prefix} -R 64 -L 600 --alpha 1.2 --insert_threads ${ins_thr} --consolidate_threads ${cons_thr} --max_points_to_insert ${inserts} --active_window ${active} --consolidate_interval ${cons_int} --start_point_norm 508; +./apps/utils/compute_groundtruth_for_filters --data_type ${type} --dist_fn l2 --base_file ${index}.data --query_file ${query} --K 100 --gt_file ${gt_filtered} --label_file ${label_file} --universal_label {universal_label} --filter_label {filter_label} +./apps/search_memory_index --data_type ${type} --filter_label {filter_label} --dist_fn l2 --index_path_prefix ${index} --result_path ${result} --query_file ${query} --gt_file ${gt_filtered} -K 10 -L 20 40 60 80 100 -T 64 --dynamic true --tags 1 +``` diff --git a/packages/leann-backend-diskann/third_party/DiskANN/workflows/filtered_in_memory.md b/packages/leann-backend-diskann/third_party/DiskANN/workflows/filtered_in_memory.md new file mode 100644 index 0000000..fe34b80 --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/workflows/filtered_in_memory.md @@ -0,0 +1,126 @@ +**Usage for filtered indices** +================================ +## Building a filtered Index +DiskANN provides two algorithms for building an index with filters support: filtered-vamana and stitched-vamana. Here, we describe the parameters for building both. `apps/build_memory_index.cpp` and `apps/build_stitched_index.cpp` are respectively used to build each kind of index. + +### 1. filtered-vamana + +1. **`--data_type`**: The type of dataset you wish to build an index on. float(32 bit), signed int8 and unsigned uint8 are supported. +2. **`--dist_fn`**: There are two distance functions supported: minimum Euclidean distance (l2) and maximum inner product (mips). +3. **`--data_file`**: The input data over which to build an index, in .bin format. The first 4 bytes represent number of points as integer. The next 4 bytes represent the dimension of data as integer. The following `n*d*sizeof(T)` bytes contain the contents of the data one data point in time. sizeof(T) is 1 for byte indices, and 4 for float indices. This will be read by the program as int8_t for signed indices, uint8_t for unsigned indices or float for float indices. +4. **`--index_path_prefix`**: The constructed index components will be saved to this path prefix. +5. **`-R (--max_degree)`** (default is 64): the degree of the graph index, typically between 32 and 150. Larger R will result in larger indices and longer indexing times, but might yield better search quality. +6. **`-L (--Lbuild)`** (default is 100): the size of search list we maintain during index building. Typical values are between 75 to 400. Larger values will take more time to build but result in indices that provide higher recall for the same search complexity. Ensure that value of L is at least that of R value unless you need to build indices really quickly and can somewhat compromise on quality. Note that this is to be used only for building an unfiltered index. The corresponding search list parameter for a filtered index is managed by `--FilteredLbuild`. +7. **`--alpha`** (default is 1.2): A float value between 1.0 and 1.5 which determines the diameter of the graph, which will be approximately *log n* to the base alpha. Typical values are between 1 to 1.5. 1 will yield the sparsest graph, 1.5 will yield denser graphs. +8. **`-T (--num_threads)`** (default is to get_omp_num_procs()): number of threads used by the index build process. Since the code is highly parallel, the indexing time improves almost linearly with the number of threads (subject to the cores available on the machine and DRAM bandwidth). +9. **`--build_PQ_bytes`** (default is 0): Set to a positive value less than the dimensionality of the data to enable faster index build with PQ based distance comparisons. Defaults to using full precision vectors for distance comparisons. +10. **`--use_opq`**: use the flag to use OPQ rather than PQ compression. OPQ is more space efficient for some high dimensional datasets, but also needs a bit more build time. +11. **`--label_file`**: Filter data for each point, in `.txt` format. Line `i` of the file consists of a comma-separated list of filters corresponding to point `i` in the file passed via `--data_file`. +12. **`--universal_label`**: Optionally, the the filter data may contain a "wild-card" filter corresponding to all filters. This is referred to as a universal label. Note that if a point has the universal label, then the filter data must only have the universal label on the line corresponding to said point. +13. **`--FilteredLbuild`**: If building a filtered index, we maintain a separate search list from the one provided by `--Lbuild`. + +### 2. stitched-vamana +1. **`--data_type`**: The type of dataset you wish to build an index on. float(32 bit), signed int8 and unsigned uint8 are supported. +2. **`--data_path`**: The input data over which to build an index, in .bin format. The first 4 bytes represent number of points as integer. The next 4 bytes represent the dimension of data as integer. The following `n*d*sizeof(T)` bytes contain the contents of the data one data point in time. sizeof(T) is 1 for byte indices, and 4 for float indices. This will be read by the program as int8_t for signed indices, uint8_t for unsigned indices or float for float indices. +3. **`--index_path_prefix`**: The constructed index components will be saved to this path prefix. +4. **`-R (--max_degree)`** (default is 64): Recall that stitched-vamana first builds a sub-index for each filter. This parameter sets the max degree for each sub-index. +5. **`-L (--Lbuild)`** (default is 100): the size of search list we maintain during sub-index building. Typical values are between 75 to 400. Larger values will take more time to build but result in indices that provide higher recall for the same search complexity. Ensure that value of L is at least that of R value unless you need to build indices really quickly and can somewhat compromise on quality. +6. **`--alpha`** (default is 1.2): A float value between 1.0 and 1.5 which determines the diameter of the graph, which will be approximately *log n* to the base alpha. Typical values are between 1 to 1.5. 1 will yield the sparsest graph, 1.5 will yield denser graphs. +7. **`-T (--num_threads)`** (default is to get_omp_num_procs()): number of threads used by the index build process. Since the code is highly parallel, the indexing time improves almost linearly with the number of threads (subject to the cores available on the machine and DRAM bandwidth). +8. **`--label_file`**: Filter data for each point, in `.txt` format. Line `i` of the file consists of a comma-separated list of filters corresponding to point `i` in the file passed via `--data_file`. +9. **`--universal_label`**: Optionally, the the filter data may contain a "wild-card" filter corresponding to all filters. This is referred to as a universal label. Note that if a point has the universal label, then the filter data must only have the universal label on the line corresponding to said point. +10. **`--Stitched_R`**: Once all sub-indices are "stitched" together, we prune the resulting graph down to the degree given by this parameter. + +## Computing a groundtruth file for a filtered index +In order to evaluate the performance of our algorithms, we can compare its results (i.e. the top `k` neighbors found for each query) against the results found by an exact nearest neighbor search. We provide the program `apps/utils/compute_groundtruth.cpp` to provide the results for the latter: + +1. **`--data_type`** The type of dataset you built an index with. float(32 bit), signed int8 and unsigned uint8 are supported. +2. **`--dist_fn`**: There are two distance functions supported: l2 and mips. +3. **`--base_file`**: The input data over which to build an index, in .bin format. Corresponds to the `--data_path` argument from above. +4. **`--query_file`**: The queries to be searched on, which are stored in the same .bin format. +5. **`--label_file`**: Filter data for each point, in `.txt` format. Line `i` of the file consists of a comma-separated list of filters corresponding to point `i` in the file passed via `--data_file`. +6. **`--filter_label`**: Filter for each query. For each query, a search is performed with this filter. +7. **`--universal_label`**: Corresponds to the universal label passed when building an index with filter support. +8. **`--gt_file`**: File to output results to. The binary file starts with `n`, the number of queries (4 bytes), followed by `d`, the number of ground truth elements per query (4 bytes), followed by `n*d` entries per query representing the `d` closest IDs per query in integer format, followed by `n*d` entries representing the corresponding distances (float). Total file size is `8 + 4*n*d + 4*n*d` bytes. +9. **`-K`**: The number of nearest neighbors to compute for each query. + + + +## Searching a Filtered Index + +Searching a filtered index uses the `apps/search_memory_index.cpp`: + +1. **`--data_type`**: The type of dataset you built the index on. float(32 bit), signed int8 and unsigned uint8 are supported. Use the same data type as in arg (1) above used in building the index. +2. **`--dist_fn`**: There are two distance functions supported: l2 and mips. There is an additional *fast_l2* implementation that could provide faster results for small (about a million-sized) indices. Use the same distance as in arg (2) above used in building the index. Note that stitched-vamana only supports l2. +3. **`--index_path_prefix`**: index built above in argument (4). +4. **`--result_path`**: search results will be stored in files, one per L value (see last arg), with specified prefix, in binary format. +5. **`-T (--num_threads)`**: The number of threads used for searching. Threads run in parallel and one thread handles one query at a time. More threads will result in higher aggregate query throughput, but may lead to higher per-query latency, especially if the DRAM bandwidth is a bottleneck. So find the balance depending on throughput and latency required for your application. +6. **`--query_file`**: The queries to be searched on in same binary file format as the data file (ii) above. The query file must be the same type as in argument (1). +7. **`--filter_label`**: The filter to be used when searching an index with filters. For each query, a search is performed with this filter. +8. **`--gt_file`**: The ground truth file for the queries and data file used in index construction. Use "null" if you do not have this file and if you do not want to compute recall. Note that if building a filtered index, a special groundtruth must be computed, as described above. +9. **`-K`**: search for *K* neighbors and measure *K*-recall@*K*, meaning the intersection between the retrieved top-*K* nearest neighbors and ground truth *K* nearest neighbors. +10. **`-L (--search_list)`**: A list of search_list sizes to perform search with. Larger parameters will result in slower latencies, but higher accuracies. Must be atleast the value of *K* in (7). + +Example with SIFT10K: +-------------------- +We demonstrate how to work through this pipeline using the SIFT10K dataset (http://corpus-texmex.irisa.fr/). Before starting, make sure you have compiled diskANN according to the instructions in the README and can see the following binaries (paths with respect to repository root): +- `build/apps/utils/compute_groundtruth` +- `build/apps/utils/fvecs_to_bin` +- `build/apps/build_memory_index` +- `build/apps/build_stitched_index` +- `build/apps/search_memory_index` + +Now, download the base and query set and convert the data to binary format: +```bash +wget ftp://ftp.irisa.fr/local/texmex/corpus/siftsmall.tar.gz +tar -zxvf siftsmall.tar.gz +build/apps/utils/fvecs_to_bin float siftsmall/siftsmall_base.fvecs siftsmall/siftsmall_base.bin +build/apps/utils/fvecs_to_bin float siftsmall/siftsmall_query.fvecs siftsmall/siftsmall_query.bin +``` + +We now need to make label file for our vectors. For convenience, we've included a synthetic label generator through which we can generate label file as follow +```bash + build/apps/utils/generate_synthetic_labels --num_labels 50 --num_points 10000 --output_file ./rand_labels_50_10K.txt --distribution_type zipf +``` +Note : `distribution_type` can be `rand` or `zipf` + +This will genearate label file with 10000 data points with 50 distinct labels, ranging from 1 to 50 assigned using zipf distribution (0 is the universal label). + +Label count for each unique label in the generated label file can be printed with help of following command +```bash + build/apps/utils/stats_label_data.exe --labels_file ./rand_labels_50_10K.txt --universal_label 0 +``` + +Note that neither approach is designed for use with random synthetic labels, which will lead to unpredictable accuracy at search time. + +Now build and search the index and measure the recall using ground truth computed using bruteforce. We search for results with the filter 35. +```bash +build/apps/utils/compute_groundtruth --data_type float --dist_fn l2 --base_file siftsmall/siftsmall_base.bin --query_file siftsmall/siftsmall_query.bin --gt_file siftsmall/siftsmall_gt_35.bin --K 100 --label_file ./rand_labels_50_10K.txt --filter_label 35 --universal_label 0 +build/apps/build_memory_index --data_type float --dist_fn l2 --data_path siftsmall/siftsmall_base.bin --index_path_prefix siftsmall/siftsmall_R32_L50_filtered_index -R 32 --FilteredLbuild 50 --alpha 1.2 --label_file ./rand_labels_50_10K.txt --universal_label 0 +build/apps/build_stitched_index --data_type float --data_path siftsmall/siftsmall_base.bin --index_path_prefix siftsmall/siftsmall_R20_L40_SR32_stitched_index -R 20 -L 40 --stitched_R 32 --alpha 1.2 --label_file ./rand_labels_50_10K.txt --universal_label 0 +build/apps/search_memory_index --data_type float --dist_fn l2 --index_path_prefix data/sift/siftsmall_R20_L40_SR32_filtered_index --query_file siftsmall/siftsmall_query.bin --gt_file siftsmall/siftsmall_gt_35.bin --filter_label 35 -K 10 -L 10 20 30 40 50 100 --result_path siftsmall/filtered_search_results +build/apps/search_memory_index --data_type float --dist_fn l2 --index_path_prefix data/sift/siftsmall_R20_L40_SR32_stitched_index --query_file siftsmall/siftsmall_query.bin --gt_file siftsmall/siftsmall_gt_35.bin --filter_label 35 -K 10 -L 10 20 30 40 50 100 --result_path siftsmall/stitched_search_results +``` + + The output of both searches is listed below. The throughput (Queries/sec) as well as mean and 99.9 latency in microseconds for each `L` parameter provided. (Measured on a physical machine with a Intel(R) Xeon(R) W-2145 CPU and 64 GB RAM) + ``` + Stitched Index + Ls QPS Avg dist cmps Mean Latency (mus) 99.9 Latency Recall@10 +================================================================================= + 10 31324.39 37.33 116.79 311.90 17.80 + 20 91357.57 44.36 193.06 1042.30 17.90 + 30 69314.48 49.89 258.09 1398.00 18.20 + 40 61421.29 60.52 289.08 1515.00 18.60 + 50 54203.48 70.27 294.26 685.10 19.40 + 100 52904.45 79.00 336.26 1018.80 19.50 + +Filtered Index + Ls QPS Avg dist cmps Mean Latency (mus) 99.9 Latency Recall@10 +================================================================================= + 10 69671.84 21.48 45.25 146.20 11.60 + 20 168577.20 38.94 100.54 547.90 18.20 + 30 127129.41 52.95 126.83 768.40 19.70 + 40 106349.04 62.38 167.23 899.10 20.90 + 50 89952.33 70.95 189.12 1070.80 22.10 + 100 56899.00 112.26 304.67 636.60 23.80 + ``` diff --git a/packages/leann-backend-diskann/third_party/DiskANN/workflows/filtered_ssd_index.md b/packages/leann-backend-diskann/third_party/DiskANN/workflows/filtered_ssd_index.md new file mode 100644 index 0000000..7457d8c --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/workflows/filtered_ssd_index.md @@ -0,0 +1,103 @@ +**Usage for filtered indices** +================================ + +To generate an SSD-friendly index, use the `apps/build_disk_index` program. +---------------------------------------------------------------------------- + +## Building a SSD based filtered Index + +### filtered-vamana SSD Index + +1. **--data_type**: The type of dataset you wish to build an index on. float(32 bit), signed int8 and unsigned uint8 are supported. +2. **--dist_fn**: There are two distance functions supported: minimum Euclidean distance (l2) and maximum inner product (mips). +3. **--data_file**: The input data over which to build an index, in .bin format. The first 4 bytes represent number of points as an integer. The next 4 bytes represent the dimension of data as an integer. The following `n*d*sizeof(T)` bytes contain the contents of the data one data point in time. `sizeof(T)` is 1 for byte indices, and 4 for float indices. This will be read by the program as int8_t for signed indices, uint8_t for unsigned indices or float for float indices. +4. **--index_path_prefix**: the index will span a few files, all beginning with the specified prefix path. For example, if you provide `~/index_test` as the prefix path, build generates files such as `~/index_test_pq_pivots.bin, ~/index_test_pq_compressed.bin, ~/index_test_disk.index, ...`. There may be between 8 and 10 files generated with this prefix depending on how the index is constructed. +5. **-R (--max_degree)** (default is 64): the degree of the graph index, typically between 60 and 150. Larger R will result in larger indices and longer indexing times, but better search quality. +6. **-L (--Lbuild)** (default is 100): the size of search listduring index build. Typical values are between 75 to 200. Larger values will take more time to build but result in indices that provide higher recall for the same search complexity. Use a value for L value that is at least the value of R unless you need to build indices really quickly and can somewhat compromise on quality. Note that this is to be used only for building an unfiltered index. The corresponding search list parameter for a filtered index is managed by `--FilteredLbuild`. +7. **-B (--search_DRAM_budget)**: bound on the memory footprint of the index at search time in GB. Once built, the index will use up only the specified RAM limit, the rest will reside on disk. This will dictate how aggressively we compress the data vectors to store in memory. Larger will yield better performance at search time. For an n point index, to use b byte PQ compressed representation in memory, use `B = ((n * b) / 2^30 + (250000*(4*R + sizeof(T)*ndim)) / 2^30)`. The second term in the summation is to allow some buffer for caching about 250,000 nodes from the graph in memory while serving. If you are not sure about this term, add 0.25GB to the first term. +8. **-M (--build_DRAM_budget)**: Limit on the memory allowed for building the index in GB. If you specify a value less than what is required to build the index in one pass, the index is built using a divide and conquer approach so that sub-graphs will fit in the RAM budget. The sub-graphs are overlayed to build the overall index. This approach can be upto 1.5 times slower than building the index in one shot. Allocate as much memory as your RAM allows. +9. **-T (--num_threads)** (default is to get_omp_num_procs()): number of threads used by the index build process. Since the code is highly parallel, the indexing time improves almost linearly with the number of threads (subject to the cores available on the machine and DRAM bandwidth). +10. **--PQ_disk_bytes** (default is 0): Use 0 to store uncompressed data on SSD. This allows the index to asymptote to 100% recall. If your vectors are too large to store in SSD, this parameter provides the option to compress the vectors using PQ for storing on SSD. This will trade off recall. You would also want this to be greater than the number of bytes used for the PQ compressed data stored in-memory +11. **--build_PQ_bytes** (default is 0): Set to a positive value less than the dimensionality of the data to enable faster index build with PQ based distance comparisons. +12. **--use_opq**: use the flag to use OPQ rather than PQ compression. OPQ is more space efficient for some high dimensional datasets, but also needs a bit more build time. +13. **--label_file**: Filter data for each point, in `.txt` format. Line `i` of the file consists of a comma-separated list of filters corresponding to point `i` in the file passed via `--data_file`. +14. **--universal_label**: Optionally, the label data may contain a special "universal" label. A point with the universal label can be matched against a query with any label. Note that if a point has the universal label, then the filter data must only have the universal label on the line corresponding. +15. **--FilteredLbuild**: If building a filtered index, we maintain a separate search list from the one provided by `--Lbuild`. +16. **--filter_threshold**: Threshold to break up the existing nodes to generate new graph internally by breaking dense points where each node will have a maximum F labels. Default value is zero where no break up happens for the dense points. + + +## Computing a groundtruth file for a filtered index +In order to evaluate the performance of our algorithms, we can compare its results (i.e. the top `k` neighbors found for each query) against the results found by an exact nearest neighbor search. We provide the program `apps/utils/compute_groundtruth.cpp` to provide the results for the latter: + +1. **`--data_type`** The type of dataset you built an index with. float(32 bit), signed int8 and unsigned uint8 are supported. +2. **`--dist_fn`**: There are two distance functions supported: l2 and mips. +3. **`--base_file`**: The input data over which to build an index, in .bin format. Corresponds to the `--data_path` argument from above. +4. **`--query_file`**: The queries to be searched on, which are stored in the same .bin format. +5. **`--label_file`**: Filter data for each point, in `.txt` format. Line `i` of the file consists of a comma-separated list of filters corresponding to point `i` in the file passed via `--data_file`. +6. **`--filter_label`**: Filter for each query. For each query, a search is performed with this filter. +7. **`--universal_label`**: Corresponds to the universal label passed when building an index with filter support. +8. **`--gt_file`**: File to output results to. The binary file starts with `n`, the number of queries (4 bytes), followed by `d`, the number of ground truth elements per query (4 bytes), followed by `n*d` entries per query representing the `d` closest IDs per query in integer format, followed by `n*d` entries representing the corresponding distances (float). Total file size is `8 + 4*n*d + 4*n*d` bytes. +9. **`-K`**: The number of nearest neighbors to compute for each query. + +## Searching a Filtered Index + +Searching a filtered index uses the `apps/search_disk_index.cpp`: + +1. **--data_type**: The type of dataset you wish to build an index on. float(32 bit), signed int8 and unsigned uint8 are supported. Use the same data type as in arg (1) above used in building the index. +2. **--dist_fn**: There are two distance functions supported: minimum Euclidean distance (l2) and maximum inner product (mips). Use the same distance as in arg (2) above used in building the index. +3. **--index_path_prefix**: same as the prefix used in building the index (see arg 4 above). +4. **--num_nodes_to_cache** (default is 0): While serving the index, the entire graph is stored on SSD. For faster search performance, you can cache a few frequently accessed nodes in memory. +5. **-T (--num_threads)** (default is to get_omp_num_procs()): The number of threads used for searching. Threads run in parallel and one thread handles one query at a time. More threads will result in higher aggregate query throughput, but will also use more IOs/second across the system, which may lead to higher per-query latency. So find the balance depending on the maximum number of IOPs supported by the SSD. +6. **-W (--beamwidth)** (default is 2): The beamwidth to be used for search. This is the maximum number of IO requests each query will issue per iteration of search code. Larger beamwidth will result in fewer IO round-trips per query, but might result in slightly higher total number of IO requests to SSD per query. For the highest query throughput with a fixed SSD IOps rating, use `W=1`. For best latency, use `W=4,8` or higher complexity search. Specifying 0 will optimize the beamwidth depending on the number of threads performing search, but will involve some tuning overhead. +7. **--query_file**: The queries to be searched on in same binary file format as the data file in arg (2) above. The query file must be the same type as argument (1). +8. **--gt_file**: The ground truth file for the queries in arg (7) and data file used in index construction. The binary file must start with *n*, the number of queries (4 bytes), followed by *d*, the number of ground truth elements per query (4 bytes), followed by `n*d` entries per query representing the d closest IDs per query in integer format, followed by `n*d` entries representing the corresponding distances (float). Total file size is `8 + 4*n*d + 4*n*d` bytes. The groundtruth file, if not available, can be calculated using the program `apps/utils/compute_groundtruth`. Use "null" if you do not have this file and if you do not want to compute recall. +9. **-K**: search for *K* neighbors and measure *K*-recall@*K*, meaning the intersection between the retrieved top-*K* nearest neighbors and ground truth *K* nearest neighbors. +10. **--result_path**: Search results will be stored in files with specified prefix, in bin format. +11. **-L (--search_list)**: A list of search_list sizes to perform search with. Larger parameters will result in slower latencies, but higher accuracies. Must be atleast the value of *K* in arg (9). +12. **--filter_label**: The filter to be used when searching an index with filters. For each query, a search is performed with this filter. + + +Example with SIFT10K: +-------------------- +We demonstrate how to work through this pipeline using the SIFT10K dataset (http://corpus-texmex.irisa.fr/). Before starting, make sure you have compiled diskANN according to the instructions in the README and can see the following binaries (paths with respect to repository root): +- `build/apps/utils/compute_groundtruth` +- `build/apps/utils/fvecs_to_bin` +- `build/apps/build_disk_index` +- `build/apps/search_disk_index` + +Now, download the base and query set and convert the data to binary format: +```bash +wget ftp://ftp.irisa.fr/local/texmex/corpus/siftsmall.tar.gz +tar -zxvf siftsmall.tar.gz +build/apps/utils/fvecs_to_bin float siftsmall/siftsmall_base.fvecs siftsmall/siftsmall_base.bin +build/apps/utils/fvecs_to_bin float siftsmall/siftsmall_query.fvecs siftsmall/siftsmall_query.bin +``` + +We now need to make label file for our vectors. For convenience, we've included a synthetic label generator through which we can generate label file as follow +```bash + build/apps/utils/generate_synthetic_labels --num_labels 50 --num_points 10000 --output_file ./rand_labels_50_10K.txt --distribution_type zipf +``` +Note : `distribution_type` can be `rand` or `zipf` + +This will genearate label file with 10000 data points with 50 distinct labels, ranging from 1 to 50 assigned using zipf distribution (0 is the universal label). + +Now build and search the index and measure the recall using ground truth computed using bruteforce. We search for results with the filter 35. +```bash +build/apps/utils/compute_groundtruth --data_type float --dist_fn l2 --base_file siftsmall/siftsmall_base.bin --query_file siftsmall/siftsmall_query.bin --gt_file siftsmall_gt_35.bin --K 100 --label_file rand_labels_50_10K.txt --filter_label 35 --universal_label 0 +build/apps/build_disk_index --data_type float --dist_fn l2 --data_path siftsmall/siftsmall_base.bin --index_path_prefix data/sift/siftsmall_R32_L50_filtered -R 32 --FilteredLbuild 50 -B 1 -M 1 --label_file rand_labels_50_10K.txt --universal_label 0 -F 0 +build/apps/search_disk_index --data_type float --dist_fn l2 --index_path_prefix data/sift/siftsmall_R32_L50_filtered --result_path siftsmall/search_35 --query_file siftsmall/siftsmall_query.bin --gt_file siftsmall_gt_35.bin -K 10 -L 10 20 30 40 50 100 --filter_label 35 -W 4 -T 8 +``` + + The output of both searches is listed below. The throughput (Queries/sec) as well as mean and 99.9 latency in microseconds for each `L` parameter provided. (Measured on a physical machine with a 11th Gen Intel(R) Core(TM) i7-1185G7 CPU and 32 GB RAM) + + ``` +Filtered Disk Index + L Beamwidth QPS Mean Latency 99.9 Latency Mean IOs CPU (s) Recall@10 +================================================================================================================== + 10 4 1922.02 4062.19 12849.00 15.49 66.19 11.80 + 20 4 4609.91 1618.68 3438.00 30.66 140.48 17.20 + 30 4 3377.83 2250.22 4631.00 42.70 202.39 20.70 + 40 4 2707.77 2817.21 4889.00 51.46 267.03 22.00 + 50 4 2191.56 3509.43 5943.00 60.80 349.10 23.50 +100 4 1257.92 6113.45 7321.00 109.08 609.42 23.90 +``` \ No newline at end of file diff --git a/packages/leann-backend-diskann/third_party/DiskANN/workflows/in_memory_index.md b/packages/leann-backend-diskann/third_party/DiskANN/workflows/in_memory_index.md new file mode 100644 index 0000000..6d78320 --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/workflows/in_memory_index.md @@ -0,0 +1,73 @@ +**Usage for in-memory indices** +================================ + +To generate index, use the `apps/build_memory_index` program. +-------------------------------------------------------------- + +The arguments are as follows: + +1. **--data_type**: The type of dataset you wish to build an index on. float(32 bit), signed int8 and unsigned uint8 are supported. +2. **--dist_fn**: There are two distance functions supported: minimum Euclidean distance (l2) and maximum inner product (mips). +3. **--data_file**: The input data over which to build an index, in .bin format. The first 4 bytes represent number of points as integer. The next 4 bytes represent the dimension of data as integer. The following `n*d*sizeof(T)` bytes contain the contents of the data one data point in time. sizeof(T) is 1 for byte indices, and 4 for float indices. This will be read by the program as int8_t for signed indices, uint8_t for unsigned indices or float for float indices. +4. **--index_path_prefix**: The constructed index components will be saved to this path prefix. +5. **-R (--max_degree)** (default is 64): the degree of the graph index, typically between 32 and 150. Larger R will result in larger indices and longer indexing times, but might yield better search quality. +6. **-L (--Lbuild)** (default is 100): the size of search list we maintain during index building. Typical values are between 75 to 400. Larger values will take more time to build but result in indices that provide higher recall for the same search complexity. Ensure that value of L is at least that of R value unless you need to build indices really quickly and can somewhat compromise on quality. +7. **--alpha** (default is 1.2): A float value between 1.0 and 1.5 which determines the diameter of the graph, which will be approximately *log n* to the base alpha. Typical values are between 1 to 1.5. 1 will yield the sparsest graph, 1.5 will yield denser graphs. +8. **T (--num_threads)** (default is to get_omp_num_procs()): number of threads used by the index build process. Since the code is highly parallel, the indexing time improves almost linearly with the number of threads (subject to the cores available on the machine and DRAM bandwidth). +9. **--build_PQ_bytes** (default is 0): Set to a positive value less than the dimensionality of the data to enable faster index build with PQ based distance comparisons. Defaults to using full precision vectors for distance comparisons. +10.**--use_opq**: use the flag to use OPQ rather than PQ compression. OPQ is more space efficient for some high dimensional datasets, but also needs a bit more build time. + + +To search the generated index, use the `apps/search_memory_index` program: +--------------------------------------------------------------------------- + + +The arguments are as follows: + +1. **data_type**: The type of dataset you built the index on. float(32 bit), signed int8 and unsigned uint8 are supported. Use the same data type as in arg (1) above used in building the index. +2. **dist_fn**: There are two distance functions supported: l2 and mips. There is an additional *fast_l2* implementation that could provide faster results for small (about a million-sized) indices. Use the same distance as in arg (2) above used in building the index. +3. **memory_index_path**: index built above in argument (4). +4. **T**: The number of threads used for searching. Threads run in parallel and one thread handles one query at a time. More threads will result in higher aggregate query throughput, but may lead to higher per-query latency, especially if the DRAM bandwidth is a bottleneck. So find the balance depending on throughput and latency required for your application. +5. **query_bin**: The queries to be searched on in same binary file format as the data file (ii) above. The query file must be the same type as in argument (1). +6. **truthset.bin**: The ground truth file for the queries in arg (7) and data file used in index construction. The binary file must start with *n*, the number of queries (4 bytes), followed by *d*, the number of ground truth elements per query (4 bytes), followed by `n*d` entries per query representing the d closest IDs per query in integer format, followed by `n*d` entries representing the corresponding distances (float). Total file size is `8 + 4*n*d + 4*n*d` bytes. The groundtruth file, if not available, can be calculated using the program `apps/utils/compute_groundtruth`. Use "null" if you do not have this file and if you do not want to compute recall. +7. **K**: search for *K* neighbors and measure *K*-recall@*K*, meaning the intersection between the retrieved top-*K* nearest neighbors and ground truth *K* nearest neighbors. +8. **result_output_prefix**: search results will be stored in files, one per L value (see next arg), with specified prefix, in binary format. +9. **-L (--search_list)**: A list of search_list sizes to perform search with. Larger parameters will result in slower latencies, but higher accuracies. Must be atleast the value of *K* in (7). + + +Example with BIGANN: +-------------------- + +This example demonstrates the use of the commands above on a 100K slice of the [BIGANN dataset](http://corpus-texmex.irisa.fr/) with 128 dimensional SIFT descriptors applied to images. + +Download the base and query set and convert the data to binary format +```bash +mkdir -p DiskANN/build/data && cd DiskANN/build/data +wget ftp://ftp.irisa.fr/local/texmex/corpus/sift.tar.gz +tar -xf sift.tar.gz +cd .. +./apps/utils/fvecs_to_bin float data/sift/sift_learn.fvecs data/sift/sift_learn.fbin +./apps/utils/fvecs_to_bin float data/sift/sift_query.fvecs data/sift/sift_query.fbin +``` + +Now build and search the index and measure the recall using ground truth computed using brutefoce. +```bash +./apps/utils/compute_groundtruth --data_type float --dist_fn l2 --base_file data/sift/sift_learn.fbin --query_file data/sift/sift_query.fbin --gt_file data/sift/sift_query_learn_gt100 --K 100 +./apps/build_memory_index --data_type float --dist_fn l2 --data_path data/sift/sift_learn.fbin --index_path_prefix data/sift/index_sift_learn_R32_L50_A1.2 -R 32 -L 50 --alpha 1.2 + ./apps/search_memory_index --data_type float --dist_fn l2 --index_path_prefix data/sift/index_sift_learn_R32_L50_A1.2 --query_file data/sift/sift_query.fbin --gt_file data/sift/sift_query_learn_gt100 -K 10 -L 10 20 30 40 50 100 --result_path data/sift/res + ``` + + + The output of search lists the throughput (Queries/sec) as well as mean and 99.9 latency in microseconds for each `L` parameter provided. (We measured on a 32-core 64-vCPU D-series Azure VM) + ``` + Ls QPS Avg dist cmps Mean Latency (mus) 99.9 Latency Recall@10 +================================================================================= + 10 319901.78 348.93 174.51 4943.35 97.80 + 20 346572.72 525.85 183.36 376.60 98.93 + 30 292060.12 688.86 217.73 421.60 99.30 + 40 248945.22 841.74 255.41 476.80 99.45 + 50 215888.81 986.67 294.62 542.21 99.56 + 100 129711.39 1631.94 490.58 848.61 99.88 + ``` + + diff --git a/packages/leann-backend-diskann/third_party/DiskANN/workflows/python.md b/packages/leann-backend-diskann/third_party/DiskANN/workflows/python.md new file mode 100644 index 0000000..d009cd7 --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/workflows/python.md @@ -0,0 +1,133 @@ +# `diskannpy` + +We publish (sporadic) builds of `diskann` with python bindings to `pypi.org`, which you can install via `pip install diskannpy`. + +#### Caveats +Native python modules with cffi need to be built for *every* version of Python and *every* OS and *every* native-integration-library. + +This makes for a complicated build matrix that only `(ana)conda` is properly fit to solve. However, we do build wheels +for python 3.9-3.11, across linux, Windows, and macOS (x86_64). These versions are also built against `numpy` 1.25 - +which makes for a hard runtime requirement that can be challenging to use if you are using older or newer versions of numpy. + +There *are* instructions for building against other versions of numpy +[documented in this issue response](https://github.com/microsoft/DiskANN/issues/544#issuecomment-2103437976) if you require a different build. + +# Basic Usage + +`diskannpy` provides access to both building and reading `DiskANN` indices. In all cases, the _lingua franca_ is numpy +ndarrays. Currently, the only supported dtypes are `np.float32`, `np.int8`, and `np.uint8`. + +`diskannpy` provides a number of helpful functions, like reading or writing `diskann` style vector binary files via the +`vectors_to_file` and `vectors_from_file` functions. For a full suite of python functions and their documentation, +please be sure to read the latest documentation @ [https://microsoft.github.io/](https://microsoft.github.io/DiskANN/docs/python/latest/diskannpy.html). + + +## Scenarios +The following scenarios are supported via the `diskannpy` api. + + +### Commonalities +```python +my_dtype = np.float32 # or np.uint8 or np.int8 ONLY +my_set_of_vectors: np.typing.NDArray[my_dtype] = ... # your vectors come from somewhere - you need to bring these! +index_to_identifiers_map: np.typing.NDArray[str] = ... # your vectors likely have some kind of external identifier - +# you need to keep track of the external identifier -> index relationship somehow +identifiers_to_index_map: dict[str, np.uint32|np.uint.64] = ... # your map of your external id to the `diskannpy` internal id +# diskannpy `query` responses will contain the _internal id only_, and if you don't have these maps you won't be able to +# know what this relates to +``` + +### Build Disk Index +A disk index is a memory mapped, [vamana](https://proceedings.neurips.cc/paper_files/paper/2019/file/09853c7fb1d3f8ee67a61b6bf4a7f8e6-Paper.pdf) +index that heavily leans into the hardware speeds of modern NVMe based solid state storage. + +This means you can build performant ANN indices that overflow plausibly available system memory! + +```python +import numpy as np +import diskannpy as dap + +vecs = my_set_of_vectors / np.linalg.norm(my_set_of_vectors, axis=1) # useful if your intention is to rank by a directionless +# cosine angle distance + +dap.build_disk_index( + data=vecs, + distance_metric="l2", # can also be cosine, especially if you don't normalize your vectors like above + index_directory="/tmp/my_index", + complexity=128, # the larger this is, the more candidate points we consider when ranking + graph_degree=64, # the beauty of a vamana index is it's ability to shard and be able to transfer long distances across the grpah without navigating the whole thing. the larger this value is, the higher quality your results, but the longer it will take to build + search_memory_maximum=16.0, # a floating point number to represent how much memory in GB we want to optimize for @ query time + build_memory_maximum=100.0, # a floating point number to represent how much memory in GB we are allocating for the index building process + num_threads=0, # 0 means use all available threads - but if you are in a shared environment you may need to restrict how greedy you are + vector_dtype=my_dtype, # we specified this in the Commonalities section above + index_prefix="ann", # ann is the default anyway. all files generated will have the prefix `ann_`, in the form of `f"{index_prefix}_"` + pq_disk_bytes=0 # using product quantization of your vectors can still achieve excellent recall characteristics at a fraction of the latency, but we'll do it without PQ for now +) +``` + +### Search Disk Index + +Now we want to search our disk index - using a completely different set of vectors that aren't necessarily guaranteed to +be in our index. We will call this set of vectors `q`, and it is *critical* that they are the same dtype and +dimensionality as the disk index we have just built. + +**Note**: If you manually normalized your indexed vectors prior to building the index, you will *also* need to normalize +them prior to query! + +#### Common index query setup + +```python +index = dap.StaticDiskIndex( + index_directory="/tmp/my_index", + num_threads=0, + num_nodes_to_cache=1_000_000, + index_prefix="ann" +) +``` + +#### Individual Vectors +```python +some_index: np.uint32 = ... # the index in our `q` array of points that we will be using to query on an individual basis +my_query_vector: np.typing.NDArray[my_dtype] = q[some_index] # make sure this is a 1-d array of the same dimensionality as your index! +# normalize if required by my_query_vector /= np.linalg.norm(my_query_vector) +internal_indices, distances = index.search( + query=my_query_vector, + k_neighbors=25, + complexity=50, # must be as big or bigger than `k_neighbors` +) +``` + +#### Mapping to our External Ids +The internal IDs that diskann returns via query aren't necessarily directly useful to you, and the onus is on you +to figure out what they actually link to via your `index_to_identifiers_map` map. +```python +actual_identifiers = index_to_identifiers_map[internal_indices] # using np fancy indexing (advanced indexing?) to map them all to ids you actually understand +``` + +#### Batch Vectors +```python +import multiprocessing + +internal_indices, distances = index.batch_search( + queries=q, + k_neighbors=25, + complexity=50, + num_threads=multiprocessing.cpu_count(), # there's a current bug where this is not handling the value 0 properly + beam_width=8 # beamwidth is the parameter that indicates our parallelism of individual searches, whereas num_threads + # indicates the number of threads *per* query item in the batch +) +# note that in batch_query form, our internal_indices and distances are 2d arrays +``` + +#### Mapping to our External Ids +Unlike the previous entry, I have yet to get the fancy awesome advanced indexing to work in one shot, we will have +to do this the not-numpy-paragon way. + +```python +actual_neighbors = np.full(shape=internal_indices.shape, dtype=str, fill_value="") +for row in range(internal_indices.shape[0]): + actual_neighbors[row] = index_to_identifiers_map[internal_indices[row]] +``` + +This is only scratching the surface of what `diskannpy` can offer. Please read the API documentation @ [https://microsoft.github.io/](https://microsoft.github.io/DiskANN/docs/python/latest/diskannpy.html) +for more details. diff --git a/packages/leann-backend-diskann/third_party/DiskANN/workflows/rest_api.md b/packages/leann-backend-diskann/third_party/DiskANN/workflows/rest_api.md new file mode 100644 index 0000000..b735fbe --- /dev/null +++ b/packages/leann-backend-diskann/third_party/DiskANN/workflows/rest_api.md @@ -0,0 +1,72 @@ + +**REST service set up for serving DiskANN indices and query interface** +======================================================================= + +Install dependencies on Ubuntu and compile +------------------------------------------ +In addition to the common dependencies in the [README](/README.md), install [Microsoft C++ REST SDK](https://github.com/Microsoft/cpprestsdk). + +```bash +sudo apt install libcpprest-dev +mkdir -p build && cd build +cmake -DRESTAPI=True -DCMAKE_BUILD_TYPE=Release .. +make -j +``` + +Starting an index hosting service +--------------------------------- +Follow the instructions for [building an in-memory DiskANN index](/workflows/in_memory_index.md) or [building an SSD DiskANN index](/workflows/SSD_index.md). Then start a service bound at the appropriate IP:port. For querying from the local machine, you may want to use `http://127.0.0.1:port`. For serving queries originating from remote machines, you may want to use `http://0.0.0.0:port`. + +```bash +# To start serving an in-memory index +./apps/restapi/inmem_server --address --data_type --data_file --index_path_prefix --num_threads --l_search --tags_file [tags_file] + +# To start serving an SSD-based index. +./apps/restapi/ssd_server --address --data_type --index_path_prefix --num_nodes_to_cache --num_threads --tags_file [tags_file] +``` +The `data_type` and the `data_file` should be the same as those used in the construction of the index. The server returns the ids and distances of the closests vector in the index to the query. The ids are implicitly defined by the order of the vector in the data file. If you wish to assign a different numbering or GUID or URL to the vectors in the index, use the optional `tags_file`. This should be a file which lists a "tag" string for each vector in the index. The file should contain one string per line. The string on the line `n` is considered the tag corresponding to the vector `n` in the index (in the implicit order defined in the `data_file`). + +For an SSD-based index, specify the number of nodes to cache in-memory to make queries faster. For large indices with over 100 million vectors, a typical value for `num_nodes_to_cache` could be 500000. Increase or decrease based on DRAM footprint desired. + +For an SSD-based index, also specify the number of threads used for search by setting the `num_threads` parameter. + +You can also query multiple SSD based indices using the following command by listing the prefix of each index in a file (one prefix per line) and passing it through the `index_prefix_paths` parameter to the following command. +```bash +multiple_ssdserver --address --data_type --index_prefix_paths --num_nodes_to_cache --num_threads --tags_file [tags_file] +``` +The service searches each of the indices and aggregate the results based on distances to find the closest neighbors across all indices. + +Querying the service +-------------------- +Issue a json query with the following fields +- "k" : The number of nearest neighbors needed +- "query" : The query vector with a listing of co-ordinates. +- "query_id" : An id to track the query. Use a unique number to keep track of queries, or "0" if you do not want to keep track. +- "Ls" : query complexity. Higher Ls takes more milliseconds to process but offers higher recall. Default to 256 if you don't want to tune this. + +**Post a json query using python** + +```python +import requests +jsonquery = {"Ls": 256, + "query_id": 1234, + "query": [0.00407, 0.01534, 0.02498, ...], + "k": 10} + +response = requests.post('http://ip_addr:port', json=jsonquery) +print(response.text) +``` + +The response might look like the following. The partition array indicates the ID of index from which the result was found in the case of a multi-index set up. For a single index set up, the response would not contain the information on partitions. The response may or may not contain `tags` based on whether the server was started with a `tags_file`. +```json +{"distances":[1.6947,1.6954,1.6972,1.6985,1.6991,1.7003,1.7008,1.7014,1.7021,1.7039],"indices":[8976853,8221762,30909336,13100282,30514543,11537860,7133262,34074869,50512601,17983301],"k":10,"partition":[20,7,20,20,6,6,11,6,6,20],"query_id":1234,"tags":["https://xyz1", "https://xyz2", "https://xyz3", "https://xyz4", "https://xyz5", "https://xyz6", "https://xyz7", "https://xyz8", "https://xyz9", "https://xyz10"],"time_taken_in_us":3245} +``` + +**Command line interface to issue multiple queries from a file** + +To issue `num_queries` queries from `query_file`, run the following command +```bash +client ip_addr:port data_type query_file num_queries Ls" +``` + diff --git a/packages/leann-backend-diskann/third_party/embedding.pb.cc b/packages/leann-backend-diskann/third_party/embedding.pb.cc new file mode 100644 index 0000000..2b10e23 --- /dev/null +++ b/packages/leann-backend-diskann/third_party/embedding.pb.cc @@ -0,0 +1,613 @@ +// Generated by the protocol buffer compiler. DO NOT EDIT! +// source: embedding.proto + +#include "embedding.pb.h" + +#include + +#include +#include +#include +#include +#include +#include +#include +// @@protoc_insertion_point(includes) +#include +namespace protoembedding { +class NodeEmbeddingRequestDefaultTypeInternal { + public: + ::PROTOBUF_NAMESPACE_ID::internal::ExplicitlyConstructed _instance; +} _NodeEmbeddingRequest_default_instance_; +class NodeEmbeddingResponseDefaultTypeInternal { + public: + ::PROTOBUF_NAMESPACE_ID::internal::ExplicitlyConstructed _instance; +} _NodeEmbeddingResponse_default_instance_; +} // namespace protoembedding +static void InitDefaultsscc_info_NodeEmbeddingRequest_embedding_2eproto() { + GOOGLE_PROTOBUF_VERIFY_VERSION; + + { + void* ptr = &::protoembedding::_NodeEmbeddingRequest_default_instance_; + new (ptr) ::protoembedding::NodeEmbeddingRequest(); + ::PROTOBUF_NAMESPACE_ID::internal::OnShutdownDestroyMessage(ptr); + } + ::protoembedding::NodeEmbeddingRequest::InitAsDefaultInstance(); +} + +::PROTOBUF_NAMESPACE_ID::internal::SCCInfo<0> scc_info_NodeEmbeddingRequest_embedding_2eproto = + {{ATOMIC_VAR_INIT(::PROTOBUF_NAMESPACE_ID::internal::SCCInfoBase::kUninitialized), 0, 0, InitDefaultsscc_info_NodeEmbeddingRequest_embedding_2eproto}, {}}; + +static void InitDefaultsscc_info_NodeEmbeddingResponse_embedding_2eproto() { + GOOGLE_PROTOBUF_VERIFY_VERSION; + + { + void* ptr = &::protoembedding::_NodeEmbeddingResponse_default_instance_; + new (ptr) ::protoembedding::NodeEmbeddingResponse(); + ::PROTOBUF_NAMESPACE_ID::internal::OnShutdownDestroyMessage(ptr); + } + ::protoembedding::NodeEmbeddingResponse::InitAsDefaultInstance(); +} + +::PROTOBUF_NAMESPACE_ID::internal::SCCInfo<0> scc_info_NodeEmbeddingResponse_embedding_2eproto = + {{ATOMIC_VAR_INIT(::PROTOBUF_NAMESPACE_ID::internal::SCCInfoBase::kUninitialized), 0, 0, InitDefaultsscc_info_NodeEmbeddingResponse_embedding_2eproto}, {}}; + +static ::PROTOBUF_NAMESPACE_ID::Metadata file_level_metadata_embedding_2eproto[2]; +static constexpr ::PROTOBUF_NAMESPACE_ID::EnumDescriptor const** file_level_enum_descriptors_embedding_2eproto = nullptr; +static constexpr ::PROTOBUF_NAMESPACE_ID::ServiceDescriptor const** file_level_service_descriptors_embedding_2eproto = nullptr; + +const ::PROTOBUF_NAMESPACE_ID::uint32 TableStruct_embedding_2eproto::offsets[] PROTOBUF_SECTION_VARIABLE(protodesc_cold) = { + ~0u, // no _has_bits_ + PROTOBUF_FIELD_OFFSET(::protoembedding::NodeEmbeddingRequest, _internal_metadata_), + ~0u, // no _extensions_ + ~0u, // no _oneof_case_ + ~0u, // no _weak_field_map_ + PROTOBUF_FIELD_OFFSET(::protoembedding::NodeEmbeddingRequest, node_ids_), + ~0u, // no _has_bits_ + PROTOBUF_FIELD_OFFSET(::protoembedding::NodeEmbeddingResponse, _internal_metadata_), + ~0u, // no _extensions_ + ~0u, // no _oneof_case_ + ~0u, // no _weak_field_map_ + PROTOBUF_FIELD_OFFSET(::protoembedding::NodeEmbeddingResponse, embeddings_data_), + PROTOBUF_FIELD_OFFSET(::protoembedding::NodeEmbeddingResponse, dimensions_), + PROTOBUF_FIELD_OFFSET(::protoembedding::NodeEmbeddingResponse, missing_ids_), +}; +static const ::PROTOBUF_NAMESPACE_ID::internal::MigrationSchema schemas[] PROTOBUF_SECTION_VARIABLE(protodesc_cold) = { + { 0, -1, sizeof(::protoembedding::NodeEmbeddingRequest)}, + { 6, -1, sizeof(::protoembedding::NodeEmbeddingResponse)}, +}; + +static ::PROTOBUF_NAMESPACE_ID::Message const * const file_default_instances[] = { + reinterpret_cast(&::protoembedding::_NodeEmbeddingRequest_default_instance_), + reinterpret_cast(&::protoembedding::_NodeEmbeddingResponse_default_instance_), +}; + +const char descriptor_table_protodef_embedding_2eproto[] PROTOBUF_SECTION_VARIABLE(protodesc_cold) = + "\n\017embedding.proto\022\016protoembedding\"(\n\024Nod" + "eEmbeddingRequest\022\020\n\010node_ids\030\001 \003(\r\"Y\n\025N" + "odeEmbeddingResponse\022\027\n\017embeddings_data\030" + "\001 \001(\014\022\022\n\ndimensions\030\002 \003(\005\022\023\n\013missing_ids" + "\030\003 \003(\rb\006proto3" + ; +static const ::PROTOBUF_NAMESPACE_ID::internal::DescriptorTable*const descriptor_table_embedding_2eproto_deps[1] = { +}; +static ::PROTOBUF_NAMESPACE_ID::internal::SCCInfoBase*const descriptor_table_embedding_2eproto_sccs[2] = { + &scc_info_NodeEmbeddingRequest_embedding_2eproto.base, + &scc_info_NodeEmbeddingResponse_embedding_2eproto.base, +}; +static ::PROTOBUF_NAMESPACE_ID::internal::once_flag descriptor_table_embedding_2eproto_once; +const ::PROTOBUF_NAMESPACE_ID::internal::DescriptorTable descriptor_table_embedding_2eproto = { + false, false, descriptor_table_protodef_embedding_2eproto, "embedding.proto", 174, + &descriptor_table_embedding_2eproto_once, descriptor_table_embedding_2eproto_sccs, descriptor_table_embedding_2eproto_deps, 2, 0, + schemas, file_default_instances, TableStruct_embedding_2eproto::offsets, + file_level_metadata_embedding_2eproto, 2, file_level_enum_descriptors_embedding_2eproto, file_level_service_descriptors_embedding_2eproto, +}; + +// Force running AddDescriptors() at dynamic initialization time. +static bool dynamic_init_dummy_embedding_2eproto = (static_cast(::PROTOBUF_NAMESPACE_ID::internal::AddDescriptors(&descriptor_table_embedding_2eproto)), true); +namespace protoembedding { + +// =================================================================== + +void NodeEmbeddingRequest::InitAsDefaultInstance() { +} +class NodeEmbeddingRequest::_Internal { + public: +}; + +NodeEmbeddingRequest::NodeEmbeddingRequest(::PROTOBUF_NAMESPACE_ID::Arena* arena) + : ::PROTOBUF_NAMESPACE_ID::Message(arena), + node_ids_(arena) { + SharedCtor(); + RegisterArenaDtor(arena); + // @@protoc_insertion_point(arena_constructor:protoembedding.NodeEmbeddingRequest) +} +NodeEmbeddingRequest::NodeEmbeddingRequest(const NodeEmbeddingRequest& from) + : ::PROTOBUF_NAMESPACE_ID::Message(), + node_ids_(from.node_ids_) { + _internal_metadata_.MergeFrom<::PROTOBUF_NAMESPACE_ID::UnknownFieldSet>(from._internal_metadata_); + // @@protoc_insertion_point(copy_constructor:protoembedding.NodeEmbeddingRequest) +} + +void NodeEmbeddingRequest::SharedCtor() { +} + +NodeEmbeddingRequest::~NodeEmbeddingRequest() { + // @@protoc_insertion_point(destructor:protoembedding.NodeEmbeddingRequest) + SharedDtor(); + _internal_metadata_.Delete<::PROTOBUF_NAMESPACE_ID::UnknownFieldSet>(); +} + +void NodeEmbeddingRequest::SharedDtor() { + GOOGLE_DCHECK(GetArena() == nullptr); +} + +void NodeEmbeddingRequest::ArenaDtor(void* object) { + NodeEmbeddingRequest* _this = reinterpret_cast< NodeEmbeddingRequest* >(object); + (void)_this; +} +void NodeEmbeddingRequest::RegisterArenaDtor(::PROTOBUF_NAMESPACE_ID::Arena*) { +} +void NodeEmbeddingRequest::SetCachedSize(int size) const { + _cached_size_.Set(size); +} +const NodeEmbeddingRequest& NodeEmbeddingRequest::default_instance() { + ::PROTOBUF_NAMESPACE_ID::internal::InitSCC(&::scc_info_NodeEmbeddingRequest_embedding_2eproto.base); + return *internal_default_instance(); +} + + +void NodeEmbeddingRequest::Clear() { +// @@protoc_insertion_point(message_clear_start:protoembedding.NodeEmbeddingRequest) + ::PROTOBUF_NAMESPACE_ID::uint32 cached_has_bits = 0; + // Prevent compiler warnings about cached_has_bits being unused + (void) cached_has_bits; + + node_ids_.Clear(); + _internal_metadata_.Clear<::PROTOBUF_NAMESPACE_ID::UnknownFieldSet>(); +} + +const char* NodeEmbeddingRequest::_InternalParse(const char* ptr, ::PROTOBUF_NAMESPACE_ID::internal::ParseContext* ctx) { +#define CHK_(x) if (PROTOBUF_PREDICT_FALSE(!(x))) goto failure + ::PROTOBUF_NAMESPACE_ID::Arena* arena = GetArena(); (void)arena; + while (!ctx->Done(&ptr)) { + ::PROTOBUF_NAMESPACE_ID::uint32 tag; + ptr = ::PROTOBUF_NAMESPACE_ID::internal::ReadTag(ptr, &tag); + CHK_(ptr); + switch (tag >> 3) { + // repeated uint32 node_ids = 1; + case 1: + if (PROTOBUF_PREDICT_TRUE(static_cast<::PROTOBUF_NAMESPACE_ID::uint8>(tag) == 10)) { + ptr = ::PROTOBUF_NAMESPACE_ID::internal::PackedUInt32Parser(_internal_mutable_node_ids(), ptr, ctx); + CHK_(ptr); + } else if (static_cast<::PROTOBUF_NAMESPACE_ID::uint8>(tag) == 8) { + _internal_add_node_ids(::PROTOBUF_NAMESPACE_ID::internal::ReadVarint32(&ptr)); + CHK_(ptr); + } else goto handle_unusual; + continue; + default: { + handle_unusual: + if ((tag & 7) == 4 || tag == 0) { + ctx->SetLastTag(tag); + goto success; + } + ptr = UnknownFieldParse(tag, + _internal_metadata_.mutable_unknown_fields<::PROTOBUF_NAMESPACE_ID::UnknownFieldSet>(), + ptr, ctx); + CHK_(ptr != nullptr); + continue; + } + } // switch + } // while +success: + return ptr; +failure: + ptr = nullptr; + goto success; +#undef CHK_ +} + +::PROTOBUF_NAMESPACE_ID::uint8* NodeEmbeddingRequest::_InternalSerialize( + ::PROTOBUF_NAMESPACE_ID::uint8* target, ::PROTOBUF_NAMESPACE_ID::io::EpsCopyOutputStream* stream) const { + // @@protoc_insertion_point(serialize_to_array_start:protoembedding.NodeEmbeddingRequest) + ::PROTOBUF_NAMESPACE_ID::uint32 cached_has_bits = 0; + (void) cached_has_bits; + + // repeated uint32 node_ids = 1; + { + int byte_size = _node_ids_cached_byte_size_.load(std::memory_order_relaxed); + if (byte_size > 0) { + target = stream->WriteUInt32Packed( + 1, _internal_node_ids(), byte_size, target); + } + } + + if (PROTOBUF_PREDICT_FALSE(_internal_metadata_.have_unknown_fields())) { + target = ::PROTOBUF_NAMESPACE_ID::internal::WireFormat::InternalSerializeUnknownFieldsToArray( + _internal_metadata_.unknown_fields<::PROTOBUF_NAMESPACE_ID::UnknownFieldSet>(::PROTOBUF_NAMESPACE_ID::UnknownFieldSet::default_instance), target, stream); + } + // @@protoc_insertion_point(serialize_to_array_end:protoembedding.NodeEmbeddingRequest) + return target; +} + +size_t NodeEmbeddingRequest::ByteSizeLong() const { +// @@protoc_insertion_point(message_byte_size_start:protoembedding.NodeEmbeddingRequest) + size_t total_size = 0; + + ::PROTOBUF_NAMESPACE_ID::uint32 cached_has_bits = 0; + // Prevent compiler warnings about cached_has_bits being unused + (void) cached_has_bits; + + // repeated uint32 node_ids = 1; + { + size_t data_size = ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite:: + UInt32Size(this->node_ids_); + if (data_size > 0) { + total_size += 1 + + ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::Int32Size( + static_cast<::PROTOBUF_NAMESPACE_ID::int32>(data_size)); + } + int cached_size = ::PROTOBUF_NAMESPACE_ID::internal::ToCachedSize(data_size); + _node_ids_cached_byte_size_.store(cached_size, + std::memory_order_relaxed); + total_size += data_size; + } + + if (PROTOBUF_PREDICT_FALSE(_internal_metadata_.have_unknown_fields())) { + return ::PROTOBUF_NAMESPACE_ID::internal::ComputeUnknownFieldsSize( + _internal_metadata_, total_size, &_cached_size_); + } + int cached_size = ::PROTOBUF_NAMESPACE_ID::internal::ToCachedSize(total_size); + SetCachedSize(cached_size); + return total_size; +} + +void NodeEmbeddingRequest::MergeFrom(const ::PROTOBUF_NAMESPACE_ID::Message& from) { +// @@protoc_insertion_point(generalized_merge_from_start:protoembedding.NodeEmbeddingRequest) + GOOGLE_DCHECK_NE(&from, this); + const NodeEmbeddingRequest* source = + ::PROTOBUF_NAMESPACE_ID::DynamicCastToGenerated( + &from); + if (source == nullptr) { + // @@protoc_insertion_point(generalized_merge_from_cast_fail:protoembedding.NodeEmbeddingRequest) + ::PROTOBUF_NAMESPACE_ID::internal::ReflectionOps::Merge(from, this); + } else { + // @@protoc_insertion_point(generalized_merge_from_cast_success:protoembedding.NodeEmbeddingRequest) + MergeFrom(*source); + } +} + +void NodeEmbeddingRequest::MergeFrom(const NodeEmbeddingRequest& from) { +// @@protoc_insertion_point(class_specific_merge_from_start:protoembedding.NodeEmbeddingRequest) + GOOGLE_DCHECK_NE(&from, this); + _internal_metadata_.MergeFrom<::PROTOBUF_NAMESPACE_ID::UnknownFieldSet>(from._internal_metadata_); + ::PROTOBUF_NAMESPACE_ID::uint32 cached_has_bits = 0; + (void) cached_has_bits; + + node_ids_.MergeFrom(from.node_ids_); +} + +void NodeEmbeddingRequest::CopyFrom(const ::PROTOBUF_NAMESPACE_ID::Message& from) { +// @@protoc_insertion_point(generalized_copy_from_start:protoembedding.NodeEmbeddingRequest) + if (&from == this) return; + Clear(); + MergeFrom(from); +} + +void NodeEmbeddingRequest::CopyFrom(const NodeEmbeddingRequest& from) { +// @@protoc_insertion_point(class_specific_copy_from_start:protoembedding.NodeEmbeddingRequest) + if (&from == this) return; + Clear(); + MergeFrom(from); +} + +bool NodeEmbeddingRequest::IsInitialized() const { + return true; +} + +void NodeEmbeddingRequest::InternalSwap(NodeEmbeddingRequest* other) { + using std::swap; + _internal_metadata_.Swap<::PROTOBUF_NAMESPACE_ID::UnknownFieldSet>(&other->_internal_metadata_); + node_ids_.InternalSwap(&other->node_ids_); +} + +::PROTOBUF_NAMESPACE_ID::Metadata NodeEmbeddingRequest::GetMetadata() const { + return GetMetadataStatic(); +} + + +// =================================================================== + +void NodeEmbeddingResponse::InitAsDefaultInstance() { +} +class NodeEmbeddingResponse::_Internal { + public: +}; + +NodeEmbeddingResponse::NodeEmbeddingResponse(::PROTOBUF_NAMESPACE_ID::Arena* arena) + : ::PROTOBUF_NAMESPACE_ID::Message(arena), + dimensions_(arena), + missing_ids_(arena) { + SharedCtor(); + RegisterArenaDtor(arena); + // @@protoc_insertion_point(arena_constructor:protoembedding.NodeEmbeddingResponse) +} +NodeEmbeddingResponse::NodeEmbeddingResponse(const NodeEmbeddingResponse& from) + : ::PROTOBUF_NAMESPACE_ID::Message(), + dimensions_(from.dimensions_), + missing_ids_(from.missing_ids_) { + _internal_metadata_.MergeFrom<::PROTOBUF_NAMESPACE_ID::UnknownFieldSet>(from._internal_metadata_); + embeddings_data_.UnsafeSetDefault(&::PROTOBUF_NAMESPACE_ID::internal::GetEmptyStringAlreadyInited()); + if (!from._internal_embeddings_data().empty()) { + embeddings_data_.Set(&::PROTOBUF_NAMESPACE_ID::internal::GetEmptyStringAlreadyInited(), from._internal_embeddings_data(), + GetArena()); + } + // @@protoc_insertion_point(copy_constructor:protoembedding.NodeEmbeddingResponse) +} + +void NodeEmbeddingResponse::SharedCtor() { + ::PROTOBUF_NAMESPACE_ID::internal::InitSCC(&scc_info_NodeEmbeddingResponse_embedding_2eproto.base); + embeddings_data_.UnsafeSetDefault(&::PROTOBUF_NAMESPACE_ID::internal::GetEmptyStringAlreadyInited()); +} + +NodeEmbeddingResponse::~NodeEmbeddingResponse() { + // @@protoc_insertion_point(destructor:protoembedding.NodeEmbeddingResponse) + SharedDtor(); + _internal_metadata_.Delete<::PROTOBUF_NAMESPACE_ID::UnknownFieldSet>(); +} + +void NodeEmbeddingResponse::SharedDtor() { + GOOGLE_DCHECK(GetArena() == nullptr); + embeddings_data_.DestroyNoArena(&::PROTOBUF_NAMESPACE_ID::internal::GetEmptyStringAlreadyInited()); +} + +void NodeEmbeddingResponse::ArenaDtor(void* object) { + NodeEmbeddingResponse* _this = reinterpret_cast< NodeEmbeddingResponse* >(object); + (void)_this; +} +void NodeEmbeddingResponse::RegisterArenaDtor(::PROTOBUF_NAMESPACE_ID::Arena*) { +} +void NodeEmbeddingResponse::SetCachedSize(int size) const { + _cached_size_.Set(size); +} +const NodeEmbeddingResponse& NodeEmbeddingResponse::default_instance() { + ::PROTOBUF_NAMESPACE_ID::internal::InitSCC(&::scc_info_NodeEmbeddingResponse_embedding_2eproto.base); + return *internal_default_instance(); +} + + +void NodeEmbeddingResponse::Clear() { +// @@protoc_insertion_point(message_clear_start:protoembedding.NodeEmbeddingResponse) + ::PROTOBUF_NAMESPACE_ID::uint32 cached_has_bits = 0; + // Prevent compiler warnings about cached_has_bits being unused + (void) cached_has_bits; + + dimensions_.Clear(); + missing_ids_.Clear(); + embeddings_data_.ClearToEmpty(&::PROTOBUF_NAMESPACE_ID::internal::GetEmptyStringAlreadyInited(), GetArena()); + _internal_metadata_.Clear<::PROTOBUF_NAMESPACE_ID::UnknownFieldSet>(); +} + +const char* NodeEmbeddingResponse::_InternalParse(const char* ptr, ::PROTOBUF_NAMESPACE_ID::internal::ParseContext* ctx) { +#define CHK_(x) if (PROTOBUF_PREDICT_FALSE(!(x))) goto failure + ::PROTOBUF_NAMESPACE_ID::Arena* arena = GetArena(); (void)arena; + while (!ctx->Done(&ptr)) { + ::PROTOBUF_NAMESPACE_ID::uint32 tag; + ptr = ::PROTOBUF_NAMESPACE_ID::internal::ReadTag(ptr, &tag); + CHK_(ptr); + switch (tag >> 3) { + // bytes embeddings_data = 1; + case 1: + if (PROTOBUF_PREDICT_TRUE(static_cast<::PROTOBUF_NAMESPACE_ID::uint8>(tag) == 10)) { + auto str = _internal_mutable_embeddings_data(); + ptr = ::PROTOBUF_NAMESPACE_ID::internal::InlineGreedyStringParser(str, ptr, ctx); + CHK_(ptr); + } else goto handle_unusual; + continue; + // repeated int32 dimensions = 2; + case 2: + if (PROTOBUF_PREDICT_TRUE(static_cast<::PROTOBUF_NAMESPACE_ID::uint8>(tag) == 18)) { + ptr = ::PROTOBUF_NAMESPACE_ID::internal::PackedInt32Parser(_internal_mutable_dimensions(), ptr, ctx); + CHK_(ptr); + } else if (static_cast<::PROTOBUF_NAMESPACE_ID::uint8>(tag) == 16) { + _internal_add_dimensions(::PROTOBUF_NAMESPACE_ID::internal::ReadVarint64(&ptr)); + CHK_(ptr); + } else goto handle_unusual; + continue; + // repeated uint32 missing_ids = 3; + case 3: + if (PROTOBUF_PREDICT_TRUE(static_cast<::PROTOBUF_NAMESPACE_ID::uint8>(tag) == 26)) { + ptr = ::PROTOBUF_NAMESPACE_ID::internal::PackedUInt32Parser(_internal_mutable_missing_ids(), ptr, ctx); + CHK_(ptr); + } else if (static_cast<::PROTOBUF_NAMESPACE_ID::uint8>(tag) == 24) { + _internal_add_missing_ids(::PROTOBUF_NAMESPACE_ID::internal::ReadVarint32(&ptr)); + CHK_(ptr); + } else goto handle_unusual; + continue; + default: { + handle_unusual: + if ((tag & 7) == 4 || tag == 0) { + ctx->SetLastTag(tag); + goto success; + } + ptr = UnknownFieldParse(tag, + _internal_metadata_.mutable_unknown_fields<::PROTOBUF_NAMESPACE_ID::UnknownFieldSet>(), + ptr, ctx); + CHK_(ptr != nullptr); + continue; + } + } // switch + } // while +success: + return ptr; +failure: + ptr = nullptr; + goto success; +#undef CHK_ +} + +::PROTOBUF_NAMESPACE_ID::uint8* NodeEmbeddingResponse::_InternalSerialize( + ::PROTOBUF_NAMESPACE_ID::uint8* target, ::PROTOBUF_NAMESPACE_ID::io::EpsCopyOutputStream* stream) const { + // @@protoc_insertion_point(serialize_to_array_start:protoembedding.NodeEmbeddingResponse) + ::PROTOBUF_NAMESPACE_ID::uint32 cached_has_bits = 0; + (void) cached_has_bits; + + // bytes embeddings_data = 1; + if (this->embeddings_data().size() > 0) { + target = stream->WriteBytesMaybeAliased( + 1, this->_internal_embeddings_data(), target); + } + + // repeated int32 dimensions = 2; + { + int byte_size = _dimensions_cached_byte_size_.load(std::memory_order_relaxed); + if (byte_size > 0) { + target = stream->WriteInt32Packed( + 2, _internal_dimensions(), byte_size, target); + } + } + + // repeated uint32 missing_ids = 3; + { + int byte_size = _missing_ids_cached_byte_size_.load(std::memory_order_relaxed); + if (byte_size > 0) { + target = stream->WriteUInt32Packed( + 3, _internal_missing_ids(), byte_size, target); + } + } + + if (PROTOBUF_PREDICT_FALSE(_internal_metadata_.have_unknown_fields())) { + target = ::PROTOBUF_NAMESPACE_ID::internal::WireFormat::InternalSerializeUnknownFieldsToArray( + _internal_metadata_.unknown_fields<::PROTOBUF_NAMESPACE_ID::UnknownFieldSet>(::PROTOBUF_NAMESPACE_ID::UnknownFieldSet::default_instance), target, stream); + } + // @@protoc_insertion_point(serialize_to_array_end:protoembedding.NodeEmbeddingResponse) + return target; +} + +size_t NodeEmbeddingResponse::ByteSizeLong() const { +// @@protoc_insertion_point(message_byte_size_start:protoembedding.NodeEmbeddingResponse) + size_t total_size = 0; + + ::PROTOBUF_NAMESPACE_ID::uint32 cached_has_bits = 0; + // Prevent compiler warnings about cached_has_bits being unused + (void) cached_has_bits; + + // repeated int32 dimensions = 2; + { + size_t data_size = ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite:: + Int32Size(this->dimensions_); + if (data_size > 0) { + total_size += 1 + + ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::Int32Size( + static_cast<::PROTOBUF_NAMESPACE_ID::int32>(data_size)); + } + int cached_size = ::PROTOBUF_NAMESPACE_ID::internal::ToCachedSize(data_size); + _dimensions_cached_byte_size_.store(cached_size, + std::memory_order_relaxed); + total_size += data_size; + } + + // repeated uint32 missing_ids = 3; + { + size_t data_size = ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite:: + UInt32Size(this->missing_ids_); + if (data_size > 0) { + total_size += 1 + + ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::Int32Size( + static_cast<::PROTOBUF_NAMESPACE_ID::int32>(data_size)); + } + int cached_size = ::PROTOBUF_NAMESPACE_ID::internal::ToCachedSize(data_size); + _missing_ids_cached_byte_size_.store(cached_size, + std::memory_order_relaxed); + total_size += data_size; + } + + // bytes embeddings_data = 1; + if (this->embeddings_data().size() > 0) { + total_size += 1 + + ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::BytesSize( + this->_internal_embeddings_data()); + } + + if (PROTOBUF_PREDICT_FALSE(_internal_metadata_.have_unknown_fields())) { + return ::PROTOBUF_NAMESPACE_ID::internal::ComputeUnknownFieldsSize( + _internal_metadata_, total_size, &_cached_size_); + } + int cached_size = ::PROTOBUF_NAMESPACE_ID::internal::ToCachedSize(total_size); + SetCachedSize(cached_size); + return total_size; +} + +void NodeEmbeddingResponse::MergeFrom(const ::PROTOBUF_NAMESPACE_ID::Message& from) { +// @@protoc_insertion_point(generalized_merge_from_start:protoembedding.NodeEmbeddingResponse) + GOOGLE_DCHECK_NE(&from, this); + const NodeEmbeddingResponse* source = + ::PROTOBUF_NAMESPACE_ID::DynamicCastToGenerated( + &from); + if (source == nullptr) { + // @@protoc_insertion_point(generalized_merge_from_cast_fail:protoembedding.NodeEmbeddingResponse) + ::PROTOBUF_NAMESPACE_ID::internal::ReflectionOps::Merge(from, this); + } else { + // @@protoc_insertion_point(generalized_merge_from_cast_success:protoembedding.NodeEmbeddingResponse) + MergeFrom(*source); + } +} + +void NodeEmbeddingResponse::MergeFrom(const NodeEmbeddingResponse& from) { +// @@protoc_insertion_point(class_specific_merge_from_start:protoembedding.NodeEmbeddingResponse) + GOOGLE_DCHECK_NE(&from, this); + _internal_metadata_.MergeFrom<::PROTOBUF_NAMESPACE_ID::UnknownFieldSet>(from._internal_metadata_); + ::PROTOBUF_NAMESPACE_ID::uint32 cached_has_bits = 0; + (void) cached_has_bits; + + dimensions_.MergeFrom(from.dimensions_); + missing_ids_.MergeFrom(from.missing_ids_); + if (from.embeddings_data().size() > 0) { + _internal_set_embeddings_data(from._internal_embeddings_data()); + } +} + +void NodeEmbeddingResponse::CopyFrom(const ::PROTOBUF_NAMESPACE_ID::Message& from) { +// @@protoc_insertion_point(generalized_copy_from_start:protoembedding.NodeEmbeddingResponse) + if (&from == this) return; + Clear(); + MergeFrom(from); +} + +void NodeEmbeddingResponse::CopyFrom(const NodeEmbeddingResponse& from) { +// @@protoc_insertion_point(class_specific_copy_from_start:protoembedding.NodeEmbeddingResponse) + if (&from == this) return; + Clear(); + MergeFrom(from); +} + +bool NodeEmbeddingResponse::IsInitialized() const { + return true; +} + +void NodeEmbeddingResponse::InternalSwap(NodeEmbeddingResponse* other) { + using std::swap; + _internal_metadata_.Swap<::PROTOBUF_NAMESPACE_ID::UnknownFieldSet>(&other->_internal_metadata_); + dimensions_.InternalSwap(&other->dimensions_); + missing_ids_.InternalSwap(&other->missing_ids_); + embeddings_data_.Swap(&other->embeddings_data_, &::PROTOBUF_NAMESPACE_ID::internal::GetEmptyStringAlreadyInited(), GetArena()); +} + +::PROTOBUF_NAMESPACE_ID::Metadata NodeEmbeddingResponse::GetMetadata() const { + return GetMetadataStatic(); +} + + +// @@protoc_insertion_point(namespace_scope) +} // namespace protoembedding +PROTOBUF_NAMESPACE_OPEN +template<> PROTOBUF_NOINLINE ::protoembedding::NodeEmbeddingRequest* Arena::CreateMaybeMessage< ::protoembedding::NodeEmbeddingRequest >(Arena* arena) { + return Arena::CreateMessageInternal< ::protoembedding::NodeEmbeddingRequest >(arena); +} +template<> PROTOBUF_NOINLINE ::protoembedding::NodeEmbeddingResponse* Arena::CreateMaybeMessage< ::protoembedding::NodeEmbeddingResponse >(Arena* arena) { + return Arena::CreateMessageInternal< ::protoembedding::NodeEmbeddingResponse >(arena); +} +PROTOBUF_NAMESPACE_CLOSE + +// @@protoc_insertion_point(global_scope) +#include diff --git a/packages/leann-backend-diskann/third_party/embedding.proto b/packages/leann-backend-diskann/third_party/embedding.proto new file mode 100644 index 0000000..98e7713 --- /dev/null +++ b/packages/leann-backend-diskann/third_party/embedding.proto @@ -0,0 +1,13 @@ +syntax = "proto3"; + +package protoembedding; + +message NodeEmbeddingRequest { + repeated uint32 node_ids = 1; +} + +message NodeEmbeddingResponse { + bytes embeddings_data = 1; // All embedded binary datas + repeated int32 dimensions = 2; // Shape [batch_size, embedding_dim] + repeated uint32 missing_ids = 3; // Missing node ids +} \ No newline at end of file diff --git a/packages/leann-backend-hnsw/CMakeLists.txt b/packages/leann-backend-hnsw/CMakeLists.txt new file mode 100644 index 0000000..6865da3 --- /dev/null +++ b/packages/leann-backend-hnsw/CMakeLists.txt @@ -0,0 +1,12 @@ +# 最终简化版 +cmake_minimum_required(VERSION 3.24) +project(leann_backend_hnsw_wrapper) + +set(FAISS_ENABLE_PYTHON ON CACHE BOOL "" FORCE) +set(FAISS_ENABLE_GPU OFF CACHE BOOL "" FORCE) +set(FAISS_ENABLE_EXTRAS OFF CACHE BOOL "" FORCE) +set(BUILD_TESTING OFF CACHE BOOL "" FORCE) +set(FAISS_ENABLE_C_API OFF CACHE BOOL "" FORCE) +set(FAISS_OPT_LEVEL "generic" CACHE STRING "" FORCE) + +add_subdirectory(third_party/faiss) \ No newline at end of file diff --git a/packages/leann-backend-hnsw/leann_backend_hnsw/__init__.py b/packages/leann-backend-hnsw/leann_backend_hnsw/__init__.py new file mode 100644 index 0000000..53fd67a --- /dev/null +++ b/packages/leann-backend-hnsw/leann_backend_hnsw/__init__.py @@ -0,0 +1 @@ +from . import hnsw_backend diff --git a/packages/leann-backend-hnsw/leann_backend_hnsw/hnsw_backend.py b/packages/leann-backend-hnsw/leann_backend_hnsw/hnsw_backend.py new file mode 100644 index 0000000..53e7e91 --- /dev/null +++ b/packages/leann-backend-hnsw/leann_backend_hnsw/hnsw_backend.py @@ -0,0 +1,313 @@ +import numpy as np +import os +import json +import struct +from pathlib import Path +from typing import Dict +import contextlib +import threading +import time +import atexit +import socket +import subprocess +import sys + +# 文件: packages/leann-backend-hnsw/leann_backend_hnsw/hnsw_backend.py + +# ... (其他 import 保持不变) ... + +from leann.registry import register_backend +from leann.interface import ( + LeannBackendFactoryInterface, + LeannBackendBuilderInterface, + LeannBackendSearcherInterface +) + +def get_metric_map(): + from . import faiss + return { + "mips": faiss.METRIC_INNER_PRODUCT, + "l2": faiss.METRIC_L2, + "cosine": faiss.METRIC_INNER_PRODUCT, # Will need normalization + } + +def _check_port(port: int) -> bool: + """Check if a port is in use""" + with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s: + return s.connect_ex(('localhost', port)) == 0 + +class HNSWEmbeddingServerManager: + """ + HNSW-specific embedding server manager that handles the lifecycle of the embedding server process. + Mirrors the DiskANN EmbeddingServerManager architecture. + """ + def __init__(self): + self.server_process = None + self.server_port = None + atexit.register(self.stop_server) + + def start_server(self, port=5556, model_name="sentence-transformers/all-mpnet-base-v2", passages_file=None): + """ + Start the HNSW embedding server process. + + Args: + port: ZMQ port for the server + model_name: Name of the embedding model to use + passages_file: Optional path to passages JSON file + """ + if self.server_process and self.server_process.poll() is None: + print(f"INFO: Reusing existing HNSW server process for this session (PID {self.server_process.pid})") + return True + + # Check if port is already in use + if _check_port(port): + print(f"WARNING: Port {port} is already in use. Assuming an external HNSW server is running and connecting to it.") + return True + + print(f"INFO: Starting session-level HNSW embedding server as a background process...") + + try: + command = [ + sys.executable, + "-m", "packages.leann-backend-hnsw.src.leann_backend_hnsw.hnsw_embedding_server", + "--zmq-port", str(port), + "--model-name", model_name + ] + + # Add passages file if provided + if passages_file: + command.extend(["--passages-file", str(passages_file)]) + + project_root = Path(__file__).parent.parent.parent.parent + print(f"INFO: Running HNSW command from project root: {project_root}") + + self.server_process = subprocess.Popen( + command, + cwd=project_root, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + text=True, + encoding='utf-8' + ) + self.server_port = port + print(f"INFO: HNSW server process started with PID: {self.server_process.pid}") + + max_wait, wait_interval = 30, 0.5 + for _ in range(int(max_wait / wait_interval)): + if _check_port(port): + print(f"✅ HNSW embedding server is up and ready for this session.") + log_thread = threading.Thread(target=self._log_monitor, daemon=True) + log_thread.start() + return True + if self.server_process.poll() is not None: + print("❌ ERROR: HNSW server process terminated unexpectedly during startup.") + self._log_monitor() + return False + time.sleep(wait_interval) + + print(f"❌ ERROR: HNSW server process failed to start listening within {max_wait} seconds.") + self.stop_server() + return False + + except Exception as e: + print(f"❌ ERROR: Failed to start HNSW embedding server process: {e}") + return False + + def _log_monitor(self): + """Monitor server logs""" + if not self.server_process: + return + try: + if self.server_process.stdout: + for line in iter(self.server_process.stdout.readline, ''): + print(f"[HNSWEmbeddingServer LOG]: {line.strip()}") + self.server_process.stdout.close() + if self.server_process.stderr: + for line in iter(self.server_process.stderr.readline, ''): + print(f"[HNSWEmbeddingServer ERROR]: {line.strip()}") + self.server_process.stderr.close() + except Exception as e: + print(f"HNSW Log monitor error: {e}") + + def stop_server(self): + """Stop the HNSW embedding server process""" + if self.server_process and self.server_process.poll() is None: + print(f"INFO: Terminating HNSW session server process (PID: {self.server_process.pid})...") + self.server_process.terminate() + try: + self.server_process.wait(timeout=5) + print("INFO: HNSW server process terminated.") + except subprocess.TimeoutExpired: + print("WARNING: HNSW server process did not terminate gracefully, killing it.") + self.server_process.kill() + self.server_process = None + +@register_backend("hnsw") +class HNSWBackend(LeannBackendFactoryInterface): + @staticmethod + def builder(**kwargs) -> LeannBackendBuilderInterface: + return HNSWBuilder(**kwargs) + + @staticmethod + def searcher(index_path: str, **kwargs) -> LeannBackendSearcherInterface: + path = Path(index_path) + meta_path = path.parent / f"{path.stem}.hnsw.meta.json" + if not meta_path.exists(): + raise FileNotFoundError(f"Leann metadata file not found at {meta_path}. Cannot infer vector dimension for searcher.") + + with open(meta_path, 'r') as f: + meta = json.load(f) + + try: + from sentence_transformers import SentenceTransformer + model = SentenceTransformer(meta.get("embedding_model")) + dimensions = model.get_sentence_embedding_dimension() + kwargs['dimensions'] = dimensions + except ImportError: + raise ImportError("sentence-transformers is required to infer embedding dimensions. Please install it.") + except Exception as e: + raise RuntimeError(f"Could not load SentenceTransformer model to get dimension: {e}") + + return HNSWSearcher(index_path, **kwargs) + +class HNSWBuilder(LeannBackendBuilderInterface): + def __init__(self, **kwargs): + self.build_params = kwargs + + def build(self, data: np.ndarray, index_path: str, **kwargs): + """Build HNSW index using FAISS""" + from . import faiss + + path = Path(index_path) + index_dir = path.parent + index_prefix = path.stem + + index_dir.mkdir(parents=True, exist_ok=True) + + if data.dtype != np.float32: + data = data.astype(np.float32) + if not data.flags['C_CONTIGUOUS']: + data = np.ascontiguousarray(data) + + build_kwargs = {**self.build_params, **kwargs} + metric_str = build_kwargs.get("distance_metric", "mips").lower() + metric_enum = get_metric_map().get(metric_str) + if metric_enum is None: + raise ValueError(f"Unsupported distance_metric '{metric_str}'.") + + # HNSW parameters + M = build_kwargs.get("M", 32) # Max connections per layer + efConstruction = build_kwargs.get("efConstruction", 200) # Size of the dynamic candidate list for construction + dim = data.shape[1] + + print(f"INFO: Building HNSW index for {data.shape[0]} vectors with metric {metric_enum}...") + + try: + # Create HNSW index + if metric_enum == faiss.METRIC_INNER_PRODUCT: + index = faiss.IndexHNSWFlat(dim, M, metric_enum) + else: # L2 + index = faiss.IndexHNSWFlat(dim, M, metric_enum) + + # Set construction parameters + index.hnsw.efConstruction = efConstruction + + # Normalize vectors if using cosine similarity + if metric_str == "cosine": + faiss.normalize_L2(data) + + # Add vectors to index + index.add(data.shape[0], faiss.swig_ptr(data)) + + # Save index + index_file = index_dir / f"{index_prefix}.index" + faiss.write_index(index, str(index_file)) + + print(f"✅ HNSW index built successfully at '{index_file}'") + + except Exception as e: + print(f"💥 ERROR: HNSW index build failed. Exception: {e}") + raise + +class HNSWSearcher(LeannBackendSearcherInterface): + def __init__(self, index_path: str, **kwargs): + from . import faiss + path = Path(index_path) + index_dir = path.parent + index_prefix = path.stem + + metric_str = kwargs.get("distance_metric", "mips").lower() + metric_enum = get_metric_map().get(metric_str) + if metric_enum is None: + raise ValueError(f"Unsupported distance_metric '{metric_str}'.") + + dimensions = kwargs.get("dimensions") + if not dimensions: + raise ValueError("Vector dimension not provided to HNSWSearcher.") + + try: + # Load FAISS HNSW index + index_file = index_dir / f"{index_prefix}.index" + if not index_file.exists(): + raise FileNotFoundError(f"HNSW index file not found at {index_file}") + + self._index = faiss.read_index(str(index_file)) + self.metric_str = metric_str + self.embedding_server_manager = HNSWEmbeddingServerManager() + print("✅ HNSW index loaded successfully.") + + except Exception as e: + print(f"💥 ERROR: Failed to load HNSW index. Exception: {e}") + raise + + def search(self, query: np.ndarray, top_k: int, **kwargs) -> Dict[str, any]: + """Search using HNSW index with optional recompute functionality""" + ef = kwargs.get("ef", 200) # Size of the dynamic candidate list for search + + # Recompute parameters + recompute_neighbor_embeddings = kwargs.get("recompute_neighbor_embeddings", False) + zmq_port = kwargs.get("zmq_port", 5556) + embedding_model = kwargs.get("embedding_model", "sentence-transformers/all-mpnet-base-v2") + passages_file = kwargs.get("passages_file", None) + + if recompute_neighbor_embeddings: + print(f"INFO: HNSW ZMQ mode enabled - ensuring embedding server is running") + + if not self.embedding_server_manager.start_server(zmq_port, embedding_model, passages_file): + print(f"WARNING: Failed to start HNSW embedding server, falling back to standard search") + kwargs['recompute_neighbor_embeddings'] = False + + if query.dtype != np.float32: + query = query.astype(np.float32) + if query.ndim == 1: + query = np.expand_dims(query, axis=0) + + # Normalize query if using cosine similarity + if self.metric_str == "cosine": + faiss.normalize_L2(query) + + try: + # Set search parameter + self._index.hnsw.efSearch = ef + + if recompute_neighbor_embeddings: + # Use custom search with recompute + # This would require implementing custom HNSW search logic + # For now, we'll fall back to standard search + print("WARNING: Recompute functionality for HNSW not yet implemented, using standard search") + distances, labels = self._index.search(query, top_k) + else: + # Standard FAISS search + distances, labels = self._index.search(query, top_k) + + return {"labels": labels, "distances": distances} + + except Exception as e: + print(f"💥 ERROR: HNSW search failed. Exception: {e}") + batch_size = query.shape[0] + return {"labels": np.full((batch_size, top_k), -1, dtype=np.int64), + "distances": np.full((batch_size, top_k), float('inf'), dtype=np.float32)} + + def __del__(self): + if hasattr(self, 'embedding_server_manager'): + self.embedding_server_manager.stop_server() \ No newline at end of file diff --git a/packages/leann-backend-hnsw/leann_backend_hnsw/hnsw_embedding_server.py b/packages/leann-backend-hnsw/leann_backend_hnsw/hnsw_embedding_server.py new file mode 100644 index 0000000..4c1aee0 --- /dev/null +++ b/packages/leann-backend-hnsw/leann_backend_hnsw/hnsw_embedding_server.py @@ -0,0 +1,583 @@ +#!/usr/bin/env python3 +""" +HNSW-specific embedding server with removed config.py dependencies +Based on DiskANN embedding server architecture +""" + +import pickle +import argparse +import threading +import time +from transformers import AutoTokenizer, AutoModel +import os +from contextlib import contextmanager +import zmq +import numpy as np +import msgpack +import json +from pathlib import Path +from typing import Dict, Any, Optional, Union + +RED = "\033[91m" +RESET = "\033[0m" + +def is_similarity_metric(): + """ + Check if the metric type is similarity-based (like inner product). + 0 = L2 (distance metric), 1 = Inner Product (similarity metric) + """ + return True # 1 is METRIC_INNER_PRODUCT in FAISS + +# Function for E5-style average pooling +import torch +from torch import Tensor +import torch.nn.functional as F + +def e5_average_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor: + last_hidden = last_hidden_states.masked_fill(~attention_mask[..., None].bool(), 0.0) + return last_hidden.sum(dim=1) / attention_mask.sum(dim=1)[..., None] + +class SimplePassageLoader: + """ + Simple passage loader that replaces config.py dependencies + """ + def __init__(self, passages_data: Optional[Dict[str, Any]] = None): + self.passages_data = passages_data or {} + + def __getitem__(self, passage_id: Union[str, int]) -> Dict[str, str]: + """Get passage by ID""" + str_id = str(passage_id) + if str_id in self.passages_data: + return {"text": self.passages_data[str_id]} + else: + # Return empty text for missing passages + return {"text": ""} + + def __len__(self) -> int: + return len(self.passages_data) + +def load_passages_from_file(passages_file: str) -> SimplePassageLoader: + """ + Load passages from a JSON file + Expected format: {"passage_id": "passage_text", ...} + """ + if not os.path.exists(passages_file): + print(f"Warning: Passages file {passages_file} not found. Using empty loader.") + return SimplePassageLoader() + + try: + with open(passages_file, 'r', encoding='utf-8') as f: + passages_data = json.load(f) + print(f"Loaded {len(passages_data)} passages from {passages_file}") + return SimplePassageLoader(passages_data) + except Exception as e: + print(f"Error loading passages from {passages_file}: {e}") + return SimplePassageLoader() + +def create_hnsw_embedding_server( + passages_file: Optional[str] = None, + passages_data: Optional[Dict[str, str]] = None, + embeddings_file: Optional[str] = None, + use_fp16: bool = True, + use_int8: bool = False, + use_cuda_graphs: bool = False, + zmq_port: int = 5555, + max_batch_size: int = 128, + model_name: str = "sentence-transformers/all-mpnet-base-v2", + custom_max_length_param: Optional[int] = None, +): + """ + Create and start a ZMQ-based embedding server for HNSW backend. + + Args: + passages_file: Path to JSON file containing passage ID -> text mapping + passages_data: Direct passage data dict (alternative to passages_file) + embeddings_file: Path to pre-computed embeddings file (optional) + use_fp16: Whether to use FP16 precision + use_int8: Whether to use INT8 quantization + use_cuda_graphs: Whether to use CUDA graphs + zmq_port: ZMQ port to bind to + max_batch_size: Maximum batch size for processing + model_name: Transformer model name + custom_max_length_param: Custom max sequence length + """ + tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=True) + + # Device setup + mps_available = hasattr(torch.backends, 'mps') and torch.backends.mps.is_available() + cuda_available = torch.cuda.is_available() + + print(f"MPS available: {mps_available}") + print(f"CUDA available: {cuda_available}") + + if cuda_available: + device = torch.device("cuda") + print("Using CUDA device") + elif mps_available: + device = torch.device("mps") + print("Using MPS device (Apple Silicon)") + else: + device = torch.device("cpu") + print("Using CPU device (no GPU acceleration available)") + + # Load model to the appropriate device + print(f"Starting HNSW server on port {zmq_port} with model {model_name}") + model = AutoModel.from_pretrained(model_name).to(device).eval() + + # Check port availability + import socket + def check_port(port): + with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s: + return s.connect_ex(('localhost', port)) == 0 + + if check_port(zmq_port): + print(f"{RED}Port {zmq_port} is already in use{RESET}") + return + + # Apply model optimizations (similar to DiskANN version) + if use_fp16 and (cuda_available or mps_available): + model = model.half() + model = torch.compile(model) + print(f"Using FP16 precision with model: {model_name}") + elif use_int8: + print("- Using TorchAO for Int8 dynamic activation and Int8 weight quantization") + from torchao.quantization import quantize_, Int8DynamicActivationInt8WeightConfig + quantize_(model, Int8DynamicActivationInt8WeightConfig()) + model = torch.compile(model) + model.eval() + print("- Model successfully quantized and compiled") + + # Load passages + if passages_data: + passages = SimplePassageLoader(passages_data) + print(f"Using provided passages data: {len(passages)} passages") + elif passages_file: + passages = load_passages_from_file(passages_file) + else: + passages = SimplePassageLoader() + print("No passages provided, using empty loader") + + # Load embeddings if provided + _embeddings = None + if embeddings_file and os.path.exists(embeddings_file): + try: + with open(embeddings_file, "rb") as f: + _embeddings = pickle.load(f) + print(f"Loaded embeddings from {embeddings_file}") + except Exception as e: + print(f"Error loading embeddings: {e}") + + class DeviceTimer: + """Device event-based timer for accurate timing.""" + def __init__(self, name="", device=device): + self.name = name + self.device = device + self.start_time = 0 + self.end_time = 0 + + if cuda_available: + self.start_event = torch.cuda.Event(enable_timing=True) + self.end_event = torch.cuda.Event(enable_timing=True) + else: + self.start_event = None + self.end_event = None + + @contextmanager + def timing(self): + self.start() + yield + self.end() + + def start(self): + if cuda_available: + torch.cuda.synchronize() + self.start_event.record() + else: + if self.device.type == "mps": + torch.mps.synchronize() + self.start_time = time.time() + + def end(self): + if cuda_available: + self.end_event.record() + torch.cuda.synchronize() + else: + if self.device.type == "mps": + torch.mps.synchronize() + self.end_time = time.time() + + def elapsed_time(self): + if cuda_available: + return self.start_event.elapsed_time(self.end_event) / 1000.0 + else: + return self.end_time - self.start_time + + def print_elapsed(self): + return # Disabled for now + + def process_batch(texts_batch, ids_batch, missing_ids): + """Process a batch of texts and return embeddings""" + _is_e5_model = "e5" in model_name.lower() + batch_size = len(texts_batch) + + # E5 model preprocessing + if _is_e5_model: + processed_texts_batch = [f"passage: {text}" for text in texts_batch] + else: + processed_texts_batch = texts_batch + + # Set max length + if _is_e5_model: + current_max_length = custom_max_length_param if custom_max_length_param is not None else 512 + else: + current_max_length = custom_max_length_param if custom_max_length_param is not None else 256 + + tokenize_timer = DeviceTimer("tokenization (batch)", device) + to_device_timer = DeviceTimer("transfer to device (batch)", device) + embed_timer = DeviceTimer("embedding (batch)", device) + pool_timer = DeviceTimer("pooling (batch)", device) + norm_timer = DeviceTimer("normalization (batch)", device) + + with tokenize_timer.timing(): + encoded_batch = tokenizer( + processed_texts_batch, + padding="max_length", + truncation=True, + max_length=current_max_length, + return_tensors="pt", + return_token_type_ids=False, + ) + + seq_length = encoded_batch["input_ids"].size(1) + + with to_device_timer.timing(): + enc = {k: v.to(device) for k, v in encoded_batch.items()} + + with torch.no_grad(): + with embed_timer.timing(): + out = model(enc["input_ids"], enc["attention_mask"]) + + with pool_timer.timing(): + if not hasattr(out, 'last_hidden_state'): + if isinstance(out, torch.Tensor) and len(out.shape) == 2: + pooled_embeddings = out + else: + print(f"{RED}ERROR: Cannot determine how to pool. Output shape: {out.shape if isinstance(out, torch.Tensor) else 'N/A'}{RESET}") + hidden_dim = getattr(model.config, 'hidden_size', 384 if _is_e5_model else 768) + pooled_embeddings = torch.zeros((batch_size, hidden_dim), device=device, dtype=enc["input_ids"].dtype if hasattr(enc["input_ids"], "dtype") else torch.float32) + elif _is_e5_model: + pooled_embeddings = e5_average_pool(out.last_hidden_state, enc['attention_mask']) + else: + hidden_states = out.last_hidden_state + mask_expanded = enc["attention_mask"].unsqueeze(-1).expand(hidden_states.size()).float() + sum_embeddings = torch.sum(hidden_states * mask_expanded, 1) + sum_mask = torch.clamp(mask_expanded.sum(1), min=1e-9) + pooled_embeddings = sum_embeddings / sum_mask + + final_embeddings = pooled_embeddings + if _is_e5_model: + with norm_timer.timing(): + final_embeddings = F.normalize(pooled_embeddings, p=2, dim=1) + + if torch.isnan(final_embeddings).any() or torch.isinf(final_embeddings).any(): + print(f"{RED}!!! In process_batch: NaN or Inf detected in final_embeddings! " + f"Model: {model_name}, E5: {_is_e5_model}. IDs (sample): {ids_batch[:5]}...{RESET}") + dim_size = final_embeddings.shape[-1] + error_output = torch.zeros((batch_size, dim_size), device='cpu', dtype=torch.float32).numpy() + print(f"{RED}Returning zero embeddings of shape ({batch_size}, {dim_size}) due to NaN/Inf.{RESET}") + return error_output + + return final_embeddings.cpu().numpy() + + def client_warmup(zmq_port): + """Perform client-side warmup""" + time.sleep(2) + print(f"Performing client-side warmup with model {model_name}...") + sample_ids = ["1", "2", "3", "4", "5"] + + try: + context = zmq.Context() + socket = context.socket(zmq.REQ) + socket.connect(f"tcp://localhost:{zmq_port}") + socket.setsockopt(zmq.RCVTIMEO, 30000) + socket.setsockopt(zmq.SNDTIMEO, 30000) + + try: + ids_to_send = [int(x) for x in sample_ids] + except ValueError: + ids_to_send = [] + + if not ids_to_send: + print("Skipping warmup send.") + return + + request_payload = [ids_to_send] + request_bytes = msgpack.packb(request_payload) + + for i in range(3): + print(f"Sending warmup request {i+1}/3 via ZMQ (MessagePack)...") + socket.send(request_bytes) + response_bytes = socket.recv() + + response_payload = msgpack.unpackb(response_bytes) + dimensions = response_payload[0] + embeddings_count = dimensions[0] if dimensions and len(dimensions) > 0 else 0 + print(f"Warmup request {i+1}/3 successful, received {embeddings_count} embeddings") + time.sleep(0.1) + + print("Client-side MessagePack ZMQ warmup complete") + socket.close() + context.term() + except Exception as e: + print(f"Error during MessagePack ZMQ warmup: {e}") + + def zmq_server_thread(): + """ZMQ server thread""" + context = zmq.Context() + socket = context.socket(zmq.REP) + socket.bind(f"tcp://*:{zmq_port}") + print(f"HNSW ZMQ server listening on port {zmq_port}") + + socket.setsockopt(zmq.RCVTIMEO, 300000) + socket.setsockopt(zmq.SNDTIMEO, 300000) + + while True: + try: + message_bytes = socket.recv() + print(f"Received ZMQ request of size {len(message_bytes)} bytes") + + e2e_start = time.time() + lookup_timer = DeviceTimer("text lookup", device) + + try: + request_payload = msgpack.unpackb(message_bytes) + + # Handle distance calculation requests + if isinstance(request_payload, list) and len(request_payload) == 2 and isinstance(request_payload[0], list) and isinstance(request_payload[1], list): + node_ids = request_payload[0] + query_vector = np.array(request_payload[1], dtype=np.float32) + + print(f"Request for distance calculation: {len(node_ids)} nodes, query vector dim: {len(query_vector)}") + + # Get embeddings for node IDs + texts = [] + missing_ids = [] + with lookup_timer.timing(): + for nid in node_ids: + txtinfo = passages[nid] + if txtinfo is None or txtinfo["text"] == "": + print(f"Warning: Passage with ID {nid} not found") + missing_ids.append(nid) + txt = "" + else: + txt = txtinfo["text"] + texts.append(txt) + lookup_timer.print_elapsed() + + # Process embeddings in chunks if needed + all_node_embeddings = [] + total_size = len(texts) + + if total_size > max_batch_size: + for i in range(0, total_size, max_batch_size): + end_idx = min(i + max_batch_size, total_size) + chunk_texts = texts[i:end_idx] + chunk_ids = node_ids[i:end_idx] + + embeddings_chunk = process_batch(chunk_texts, chunk_ids, missing_ids) + all_node_embeddings.append(embeddings_chunk) + + if cuda_available: + torch.cuda.empty_cache() + elif device.type == "mps": + torch.mps.empty_cache() + + node_embeddings = np.vstack(all_node_embeddings) + else: + node_embeddings = process_batch(texts, node_ids, missing_ids) + + # Calculate distances + query_tensor = torch.tensor(query_vector, device=device).float() + node_embeddings_tensor = torch.tensor(node_embeddings, device=device).float() + + calc_timer = DeviceTimer("distance calculation", device) + with calc_timer.timing(): + with torch.no_grad(): + if is_similarity_metric(): + node_embeddings_np = node_embeddings_tensor.cpu().numpy() + query_np = query_tensor.cpu().numpy() + distances = -np.dot(node_embeddings_np, query_np) + else: + node_embeddings_np = node_embeddings_tensor.cpu().numpy().astype(np.float32) + query_np = query_tensor.cpu().numpy().astype(np.float32) + distances = np.sum(np.square(node_embeddings_np - query_np.reshape(1, -1)), axis=1) + calc_timer.print_elapsed() + + try: + response_payload = distances.flatten().tolist() + response_bytes = msgpack.packb([response_payload], use_single_float=True) + print(f"Sending distance response with {len(distances)} distances") + except Exception as pack_error: + print(f"Error packing MessagePack distance response: {pack_error}") + response_bytes = msgpack.packb([[]]) + + socket.send(response_bytes) + + if device.type == "cuda": + torch.cuda.synchronize() + elif device.type == "mps": + torch.mps.synchronize() + e2e_end = time.time() + print(f"Distance calculation E2E time: {e2e_end - e2e_start:.6f} seconds") + continue + + # Standard embedding request + if not isinstance(request_payload, list) or len(request_payload) != 1 or not isinstance(request_payload[0], list): + print(f"Error: Invalid MessagePack request format. Expected [[ids...]], got: {type(request_payload)}") + socket.send(msgpack.packb([[], []])) + continue + + node_ids = request_payload[0] + print(f"Request for {len(node_ids)} node embeddings") + + except Exception as unpack_error: + print(f"Error unpacking MessagePack request: {unpack_error}") + socket.send(msgpack.packb([[], []])) + continue + + # Look up texts by node IDs + texts = [] + missing_ids = [] + with lookup_timer.timing(): + for nid in node_ids: + txtinfo = passages[nid] + if txtinfo is None or txtinfo["text"] == "": + print(f"Warning: Passage with ID {nid} not found") + missing_ids.append(nid) + txt = "" + else: + txt = txtinfo["text"] + texts.append(txt) + lookup_timer.print_elapsed() + + if missing_ids: + print(f"Missing passages for IDs: {missing_ids}") + + # Process in chunks + total_size = len(texts) + print(f"Total batch size: {total_size}, max_batch_size: {max_batch_size}") + + all_embeddings = [] + + if total_size > max_batch_size: + print(f"Splitting batch of size {total_size} into chunks of {max_batch_size}") + for i in range(0, total_size, max_batch_size): + end_idx = min(i + max_batch_size, total_size) + print(f"Processing chunk {i//max_batch_size + 1}/{(total_size + max_batch_size - 1)//max_batch_size}: items {i} to {end_idx-1}") + + chunk_texts = texts[i:end_idx] + chunk_ids = node_ids[i:end_idx] + + embeddings_chunk = process_batch(chunk_texts, chunk_ids, missing_ids) + all_embeddings.append(embeddings_chunk) + + if cuda_available: + torch.cuda.empty_cache() + elif device.type == "mps": + torch.mps.empty_cache() + + hidden = np.vstack(all_embeddings) + print(f"Combined embeddings shape: {hidden.shape}") + else: + hidden = process_batch(texts, node_ids, missing_ids) + + # Serialization and response + ser_start = time.time() + + print(f"DEBUG zmq_server_thread: Final 'hidden' array | Shape: {hidden.shape} | Dtype: {hidden.dtype} | Has NaN/Inf: {np.isnan(hidden).any() or np.isinf(hidden).any()}") + if np.isnan(hidden).any() or np.isinf(hidden).any(): + print(f"{RED}!!! ERROR: NaN or Inf detected in final 'hidden' numpy array BEFORE sending! " + f"Requested IDs (sample): {node_ids[:5]}...{RESET}") + assert False + + try: + hidden_contiguous_f32 = np.ascontiguousarray(hidden, dtype=np.float32) + response_payload = [ + list(hidden_contiguous_f32.shape), + hidden_contiguous_f32.flatten().tolist() + ] + response_bytes = msgpack.packb(response_payload, use_single_float=True) + except Exception as pack_error: + print(f"Error packing MessagePack response: {pack_error}") + response_bytes = msgpack.packb([[], []]) + + socket.send(response_bytes) + ser_end = time.time() + + print(f"Serialize time: {ser_end - ser_start:.6f} seconds") + + if device.type == "cuda": + torch.cuda.synchronize() + elif device.type == "mps": + torch.mps.synchronize() + e2e_end = time.time() + print(f"ZMQ E2E time: {e2e_end - e2e_start:.6f} seconds") + + except zmq.Again: + print("ZMQ socket timeout, continuing to listen") + continue + except Exception as e: + print(f"Error in ZMQ server loop: {e}") + import traceback + traceback.print_exc() + try: + socket.send(msgpack.packb([[], []])) + except: + pass + + # Start warmup and server threads + if len(passages) > 0: + warmup_thread = threading.Thread(target=client_warmup, args=(zmq_port,)) + warmup_thread.daemon = True + warmup_thread.start() + + zmq_thread = threading.Thread(target=zmq_server_thread, daemon=True) + zmq_thread.start() + print(f"Started HNSW ZMQ server thread on port {zmq_port}") + + # Keep the main thread alive + try: + while True: + time.sleep(1) + except KeyboardInterrupt: + print("HNSW Server shutting down...") + return + + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="HNSW Embedding service") + parser.add_argument("--zmq-port", type=int, default=5555, help="ZMQ port to run on") + parser.add_argument("--passages-file", type=str, help="JSON file containing passage ID to text mapping") + parser.add_argument("--embeddings-file", type=str, help="Pickle file containing pre-computed embeddings") + parser.add_argument("--use-fp16", action="store_true", default=False) + parser.add_argument("--use-int8", action="store_true", default=False) + parser.add_argument("--use-cuda-graphs", action="store_true", default=False) + parser.add_argument("--max-batch-size", type=int, default=128, help="Maximum batch size before splitting") + parser.add_argument("--model-name", type=str, default="sentence-transformers/all-mpnet-base-v2", + help="Embedding model name") + parser.add_argument("--custom-max-length", type=int, default=None, help="Override model's default max sequence length") + + args = parser.parse_args() + + # Create and start the HNSW embedding server + create_hnsw_embedding_server( + passages_file=args.passages_file, + embeddings_file=args.embeddings_file, + use_fp16=args.use_fp16, + use_int8=args.use_int8, + use_cuda_graphs=args.use_cuda_graphs, + zmq_port=args.zmq_port, + max_batch_size=args.max_batch_size, + model_name=args.model_name, + custom_max_length_param=args.custom_max_length, + ) \ No newline at end of file diff --git a/packages/leann-backend-hnsw/pyproject.toml b/packages/leann-backend-hnsw/pyproject.toml new file mode 100644 index 0000000..2201403 --- /dev/null +++ b/packages/leann-backend-hnsw/pyproject.toml @@ -0,0 +1,18 @@ +# 文件: packages/leann-backend-hnsw/pyproject.toml + +[build-system] +requires = ["scikit-build-core>=0.10", "numpy", "swig"] +build-backend = "scikit_build_core.build" + +[project] +name = "leann-backend-hnsw" +version = "0.1.0" +description = "Custom-built HNSW (Faiss) backend for the Leann toolkit." +dependencies = ["leann-core==0.1.0", "numpy"] + +# 回归到最标准的 scikit-build-core 配置 +[tool.scikit-build] +wheel.packages = ["leann_backend_hnsw"] +editable.mode = "redirect" +cmake.build-type = "Debug" +build.verbose = true \ No newline at end of file diff --git a/packages/leann-backend-hnsw/third_party/faiss/.clang-format b/packages/leann-backend-hnsw/third_party/faiss/.clang-format new file mode 100644 index 0000000..1fe6508 --- /dev/null +++ b/packages/leann-backend-hnsw/third_party/faiss/.clang-format @@ -0,0 +1,88 @@ +--- +AccessModifierOffset: -1 +AlignAfterOpenBracket: AlwaysBreak +AlignConsecutiveAssignments: false +AlignConsecutiveDeclarations: false +AlignEscapedNewlinesLeft: true +AlignOperands: false +AlignTrailingComments: true +AllowAllParametersOfDeclarationOnNextLine: false +AllowShortBlocksOnASingleLine: false +AllowShortCaseLabelsOnASingleLine: false +AllowShortFunctionsOnASingleLine: Empty +AllowShortIfStatementsOnASingleLine: false +AllowShortLoopsOnASingleLine: false +AlwaysBreakAfterReturnType: None +AlwaysBreakBeforeMultilineStrings: true +AlwaysBreakTemplateDeclarations: true +BinPackArguments: false # at some point, set this to true +BinPackParameters: false # at some point, set this to true +BraceWrapping: + AfterClass: false + AfterControlStatement: false + AfterEnum: false + AfterFunction: false + AfterNamespace: false + AfterObjCDeclaration: false + AfterStruct: false + AfterUnion: false + BeforeCatch: false + BeforeElse: false + IndentBraces: false +BreakBeforeBinaryOperators: None +BreakBeforeBraces: Attach +BreakBeforeTernaryOperators: true +BreakConstructorInitializersBeforeComma: false +BreakAfterJavaFieldAnnotations: false +BreakStringLiterals: false +ColumnLimit: 80 +CommentPragmas: '^ IWYU pragma:' +CompactNamespaces: false +ConstructorInitializerAllOnOneLineOrOnePerLine: true +ConstructorInitializerIndentWidth: 8 +ContinuationIndentWidth: 8 +Cpp11BracedListStyle: true +DerivePointerAlignment: false +DisableFormat: false +ForEachMacros: [ FOR_EACH_RANGE, FOR_EACH, ] +IncludeCategories: + - Regex: '^<.*\.h(pp)?>' + Priority: 1 + - Regex: '^<.*' + Priority: 2 + - Regex: '.*' + Priority: 3 +IndentCaseLabels: true +IndentWidth: 4 +IndentWrappedFunctionNames: false +KeepEmptyLinesAtTheStartOfBlocks: false +MacroBlockBegin: '' +MacroBlockEnd: '' +MaxEmptyLinesToKeep: 1 +NamespaceIndentation: None +ObjCBlockIndentWidth: 4 +ObjCSpaceAfterProperty: false +ObjCSpaceBeforeProtocolList: false +PenaltyBreakBeforeFirstCallParameter: 1 +PenaltyBreakComment: 300 +PenaltyBreakFirstLessLess: 120 +PenaltyBreakString: 1000 +PenaltyExcessCharacter: 1000000 +PenaltyReturnTypeOnItsOwnLine: 2000000 +PointerAlignment: Left +ReflowComments: true +SortIncludes: true +SpaceAfterCStyleCast: false +SpaceBeforeAssignmentOperators: true +SpaceBeforeParens: ControlStatements +SpaceInEmptyParentheses: false +SpacesBeforeTrailingComments: 1 +SpacesInAngles: false +SpacesInContainerLiterals: true +SpacesInCStyleCastParentheses: false +SpacesInParentheses: false +SpacesInSquareBrackets: false +Standard: Cpp11 +TabWidth: 8 +UseTab: Never +... diff --git a/packages/leann-backend-hnsw/third_party/faiss/.dockerignore b/packages/leann-backend-hnsw/third_party/faiss/.dockerignore new file mode 100644 index 0000000..7763a51 --- /dev/null +++ b/packages/leann-backend-hnsw/third_party/faiss/.dockerignore @@ -0,0 +1 @@ +sift1M \ No newline at end of file diff --git a/packages/leann-backend-hnsw/third_party/faiss/.github/ISSUE_TEMPLATE.md b/packages/leann-backend-hnsw/third_party/faiss/.github/ISSUE_TEMPLATE.md new file mode 100644 index 0000000..132be64 --- /dev/null +++ b/packages/leann-backend-hnsw/third_party/faiss/.github/ISSUE_TEMPLATE.md @@ -0,0 +1,33 @@ +# Summary + + + +# Platform + + + +OS: + +Faiss version: + +Installed from: + +Faiss compilation options: + +Running on: +- [ ] CPU +- [ ] GPU + +Interface: +- [ ] C++ +- [ ] Python + +# Reproduction instructions + + + + diff --git a/packages/leann-backend-hnsw/third_party/faiss/.github/actions/build_cmake/action.yml b/packages/leann-backend-hnsw/third_party/faiss/.github/actions/build_cmake/action.yml new file mode 100644 index 0000000..6251519 --- /dev/null +++ b/packages/leann-backend-hnsw/third_party/faiss/.github/actions/build_cmake/action.yml @@ -0,0 +1,189 @@ +name: Build cmake +inputs: + opt_level: + description: 'Compile options / optimization level.' + required: false + default: generic + gpu: + description: 'Enable GPU support.' + required: false + default: OFF + cuvs: + description: 'Enable cuVS support.' + required: false + default: OFF + rocm: + description: 'Enable ROCm support.' + required: false + default: OFF +runs: + using: composite + steps: + - name: Setup miniconda + uses: conda-incubator/setup-miniconda@v3 + with: + python-version: '3.11' + miniforge-version: latest # ensures conda-forge channel is used. + channels: conda-forge + conda-remove-defaults: 'true' + # Set to aarch64 if we're on arm64 because there's no miniforge ARM64 package, just aarch64. + # They are the same thing, just named differently. + architecture: ${{ runner.arch == 'ARM64' && 'aarch64' || runner.arch }} + - name: Configure build environment + shell: bash + run: | + # initialize Conda + conda config --set solver libmamba + # Ensure starting packages are from conda-forge. + conda list --show-channel-urls + conda update -y -q conda + echo "$CONDA/bin" >> $GITHUB_PATH + + conda install -y -q python=3.11 cmake=3.26 make=4.2 swig=4.0 "numpy<2" scipy=1.14 pytest=7.4 gflags=2.2 + + # install base packages for ARM64 + if [ "${{ runner.arch }}" = "ARM64" ]; then + conda install -y -q -c conda-forge openblas=0.3.29 gxx_linux-aarch64=14.2 sysroot_linux-aarch64=2.17 + fi + + # install base packages for X86_64 + if [ "${{ runner.arch }}" = "X64" ]; then + # TODO: merge this with ARM64 + conda install -y -q -c conda-forge gxx_linux-64=14.2 sysroot_linux-64=2.17 + conda install -y -q mkl=2022.2.1 mkl-devel=2022.2.1 + fi + + # no CUDA needed for ROCm so skip this + if [ "${{ inputs.rocm }}" = "ON" ]; then + : + # regular CUDA for GPU builds + elif [ "${{ inputs.gpu }}" = "ON" ] && [ "${{ inputs.cuvs }}" = "OFF" ]; then + conda install -y -q cuda-toolkit=12.4 -c "nvidia/label/cuda-12.4.0" + # and CUDA from cuVS channel for cuVS builds + elif [ "${{ inputs.cuvs }}" = "ON" ]; then + conda install -y -q libcuvs=24.12 'cuda-version>=12.0,<=12.5' cuda-toolkit=12.4.1 gxx_linux-64=12.4 -c rapidsai -c conda-forge + fi + + # install test packages + if [ "${{ inputs.rocm }}" = "ON" ]; then + : # skip torch install via conda, we need to install via pip to get + # ROCm-enabled version until it's supported in conda by PyTorch + elif [ "${{ inputs.gpu }}" = "ON" ]; then + conda install -y -q "pytorch<2.5" pytorch-cuda=12.4 -c pytorch -c "nvidia/label/cuda-12.4.0" + else + conda install -y -q "pytorch<2.5" -c pytorch + fi + - name: ROCm - Install dependencies + if: inputs.rocm == 'ON' + shell: bash + run: | + # Update repos and install kmod, wget, gpg + sudo apt-get -qq update >/dev/null + sudo apt-get -qq install -y kmod wget gpg >/dev/null + + # Get UBUNTU version name + UBUNTU_VERSION_NAME=`cat /etc/os-release | grep UBUNTU_CODENAME | awk -F= '{print $2}'` + + # Set ROCm version + ROCM_VERSION="6.2" + + # Download, prepare, and install the package signing key + mkdir --parents --mode=0755 /etc/apt/keyrings + wget https://repo.radeon.com/rocm/rocm.gpg.key -O - | gpg --dearmor | sudo tee /etc/apt/keyrings/rocm.gpg > /dev/null + + # Add rocm repository + wget -qO - http://repo.radeon.com/rocm/rocm.gpg.key | sudo apt-key add - + rocm_baseurl="http://repo.radeon.com/rocm/apt/${ROCM_VERSION}" + echo "deb [arch=amd64] ${rocm_baseurl} ${UBUNTU_VERSION_NAME} main" | sudo tee /etc/apt/sources.list.d/rocm.list + sudo apt-get -qq update --allow-insecure-repositories >/dev/null + sudo apt-get -qq install -y --allow-unauthenticated \ + "rocm-dev${ROCM_VERSION}" "rocm-utils${ROCM_VERSION}" \ + "rocm-libs${ROCM_VERSION}" >/dev/null + + # Fake presence of MI200-class accelerators + echo "gfx90a" | sudo tee /opt/rocm/bin/target.lst + + # Cleanup + sudo apt-get -qq autoclean >/dev/null + sudo apt-get -qq clean >/dev/null + sudo rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* + - name: Symblink system dependencies + if: inputs.rocm == 'ON' + shell: bash + run: | + # symblink system libraries for HIP compiler + sudo ln -s /lib/x86_64-linux-gnu/libc.so.6 /lib64/libc.so.6 + sudo ln -s /lib/x86_64-linux-gnu/libc_nonshared.a /usr/lib64/libc_nonshared.a + sudo ln -s /usr/lib/x86_64-linux-gnu/libpthread.so.0 /lib64/libpthread.so.0 + sudo ln -s $HOME/miniconda3/x86_64-conda-linux-gnu/sysroot/usr/lib64/libpthread_nonshared.a /usr/lib64/libpthread_nonshared.a + - name: Build all targets + shell: bash + run: | + eval "$(conda shell.bash hook)" + conda activate + cmake -B build \ + -DBUILD_TESTING=ON \ + -DBUILD_SHARED_LIBS=ON \ + -DFAISS_ENABLE_GPU=${{ inputs.gpu }} \ + -DFAISS_ENABLE_CUVS=${{ inputs.cuvs }} \ + -DFAISS_ENABLE_ROCM=${{ inputs.rocm }} \ + -DFAISS_OPT_LEVEL=${{ inputs.opt_level }} \ + -DFAISS_ENABLE_C_API=ON \ + -DPYTHON_EXECUTABLE=$CONDA/bin/python \ + -DCMAKE_BUILD_TYPE=Release \ + -DBLA_VENDOR=${{ runner.arch == 'X64' && 'Intel10_64_dyn' || '' }} \ + -DCMAKE_CUDA_FLAGS=${{ runner.arch == 'X64' && '"-gencode arch=compute_75,code=sm_75"' || '' }} \ + . + make -k -C build -j$(nproc) + - name: C++ tests + shell: bash + run: | + export GTEST_OUTPUT="xml:$(realpath .)/test-results/googletest/" + make -C build test + - name: C++ perf benchmarks + shell: bash + if: inputs.rocm == 'OFF' + run: | + find ./build/perf_tests/ -executable -type f -name "bench*" -exec '{}' -v \; + - name: Install Python extension + shell: bash + working-directory: build/faiss/python + run: | + $CONDA/bin/python setup.py install + - name: ROCm - install ROCm-enabled torch via pip + if: inputs.rocm == 'ON' + shell: bash + run: | + pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/rocm6.1 + - name: Python tests (CPU only) + if: inputs.gpu == 'OFF' + shell: bash + run: | + pytest --junitxml=test-results/pytest/results.xml tests/test_*.py + pytest --junitxml=test-results/pytest/results-torch.xml tests/torch_*.py + - name: Python tests (CPU + GPU) + if: inputs.gpu == 'ON' + shell: bash + run: | + pytest --junitxml=test-results/pytest/results.xml tests/test_*.py + pytest --junitxml=test-results/pytest/results-torch.xml tests/torch_*.py + cp tests/common_faiss_tests.py faiss/gpu/test + pytest --junitxml=test-results/pytest/results-gpu.xml faiss/gpu/test/test_*.py + pytest --junitxml=test-results/pytest/results-gpu-torch.xml faiss/gpu/test/torch_*.py + - name: Test avx2 loading + if: inputs.opt_level == 'avx2' + shell: bash + run: | + FAISS_DISABLE_CPU_FEATURES=AVX2 LD_DEBUG=libs $CONDA/bin/python -c "import faiss" 2>&1 | grep faiss.so + LD_DEBUG=libs $CONDA/bin/python -c "import faiss" 2>&1 | grep faiss_avx2.so + - name: Upload test results + if: always() + uses: actions/upload-artifact@v4 + with: + name: test-results-arch=${{ runner.arch }}-opt=${{ inputs.opt_level }}-gpu=${{ inputs.gpu }}-cuvs=${{ inputs.cuvs }}-rocm=${{ inputs.rocm }} + path: test-results + - name: Check installed packages channel + shell: bash + run: | + # Shows that all installed packages are from conda-forge. + conda list --show-channel-urls diff --git a/packages/leann-backend-hnsw/third_party/faiss/.github/actions/build_conda/action.yml b/packages/leann-backend-hnsw/third_party/faiss/.github/actions/build_conda/action.yml new file mode 100644 index 0000000..14c2270 --- /dev/null +++ b/packages/leann-backend-hnsw/third_party/faiss/.github/actions/build_conda/action.yml @@ -0,0 +1,107 @@ +name: Conda build +description: Builds Faiss inside a Conda environment and uploads to repository when label is provided. +inputs: + label: + description: "The label to be used for uploads to Conda." + default: "" + required: false + cuda: + description: "CUDA toolkit version to use." + default: "" + required: false + cuvs: + description: "Enable cuVS support." + default: "" + required: false +runs: + using: composite + steps: + - name: Choose shell + shell: bash + id: choose_shell + run: | + # Use pwsh on Windows; bash everywhere else + if [ "${{ runner.os }}" != "Windows" ]; then + echo "shell=bash" >> "$GITHUB_OUTPUT" + else + echo "shell=pwsh" >> "$GITHUB_OUTPUT" + fi + - name: Setup miniconda + uses: conda-incubator/setup-miniconda@v3 + with: + python-version: '3.11' + miniforge-version: latest # ensures conda-forge channel is used. + channels: conda-forge + conda-remove-defaults: 'true' + # Set to runner.arch=aarch64 if we're on arm64 because + # there's no miniforge ARM64 package, just aarch64. + # They are the same thing, just named differently. + # However there is an ARM64 for macOS, so exclude that. + architecture: ${{ (runner.arch == 'ARM64' && runner.os != 'macOS') && 'aarch64' || runner.arch }} + - name: Install conda build tools + shell: ${{ steps.choose_shell.outputs.shell }} + run: | + # Ensure starting packages are from conda-forge. + conda list --show-channel-urls + conda install -y -q "conda!=24.11.0" + conda install -y -q "conda-build!=24.11.0" "liblief=0.14.1" + conda list --show-channel-urls + - name: Enable anaconda uploads + if: inputs.label != '' + shell: ${{ steps.choose_shell.outputs.shell }} + env: + PACKAGE_TYPE: ${{ inputs.label }} + run: | + conda install -y -q anaconda-client + conda config --set anaconda_upload yes + - name: Conda build (CPU) + if: inputs.label == '' && inputs.cuda == '' + shell: ${{ steps.choose_shell.outputs.shell }} + working-directory: conda + run: | + conda build faiss --python 3.11 -c pytorch + - name: Conda build (CPU) w/ anaconda upload + if: inputs.label != '' && inputs.cuda == '' + shell: ${{ steps.choose_shell.outputs.shell }} + working-directory: conda + env: + PACKAGE_TYPE: ${{ inputs.label }} + run: | + conda build faiss --user pytorch --label ${{ inputs.label }} -c pytorch + - name: Conda build (GPU) + if: inputs.label == '' && inputs.cuda != '' && inputs.cuvs == '' + shell: ${{ steps.choose_shell.outputs.shell }} + working-directory: conda + run: | + conda build faiss-gpu --variants '{ "cudatoolkit": "${{ inputs.cuda }}" }' \ + -c pytorch -c nvidia/label/cuda-${{ inputs.cuda }} -c nvidia + - name: Conda build (GPU) w/ anaconda upload + if: inputs.label != '' && inputs.cuda != '' && inputs.cuvs == '' + shell: ${{ steps.choose_shell.outputs.shell }} + working-directory: conda + env: + PACKAGE_TYPE: ${{ inputs.label }} + run: | + conda build faiss-gpu --variants '{ "cudatoolkit": "${{ inputs.cuda }}" }' \ + --user pytorch --label ${{ inputs.label }} -c pytorch -c nvidia/label/cuda-${{ inputs.cuda }} -c nvidia + - name: Conda build (GPU w/ cuVS) + if: inputs.label == '' && inputs.cuda != '' && inputs.cuvs != '' + shell: ${{ steps.choose_shell.outputs.shell }} + working-directory: conda + run: | + conda build faiss-gpu-cuvs --variants '{ "cudatoolkit": "${{ inputs.cuda }}" }' \ + -c pytorch -c rapidsai -c rapidsai-nightly -c conda-forge -c nvidia + - name: Conda build (GPU w/ cuVS) w/ anaconda upload + if: inputs.label != '' && inputs.cuda != '' && inputs.cuvs != '' + shell: ${{ steps.choose_shell.outputs.shell }} + working-directory: conda + env: + PACKAGE_TYPE: ${{ inputs.label }} + run: | + conda build faiss-gpu-cuvs --variants '{ "cudatoolkit": "${{ inputs.cuda }}" }' \ + --user pytorch --label ${{ inputs.label }} -c pytorch -c rapidsai -c rapidsai-nightly -c conda-forge -c nvidia + - name: Check installed packages channel + shell: ${{ steps.choose_shell.outputs.shell }} + run: | + # Shows that all installed packages are from conda-forge. + conda list --show-channel-urls diff --git a/packages/leann-backend-hnsw/third_party/faiss/.github/workflows/autoclose.yml b/packages/leann-backend-hnsw/third_party/faiss/.github/workflows/autoclose.yml new file mode 100644 index 0000000..41a5827 --- /dev/null +++ b/packages/leann-backend-hnsw/third_party/faiss/.github/workflows/autoclose.yml @@ -0,0 +1,23 @@ +name: Close Inactive Issues +on: + schedule: + - cron: "30 1 * * *" + +jobs: + close-issues: + runs-on: ubuntu-latest + permissions: + issues: write + pull-requests: write + steps: + - uses: actions/stale@v5 + with: + only-labels: autoclose + days-before-issue-stale: 7 + days-before-issue-close: 7 + stale-issue-label: "stale" + stale-issue-message: "This issue is stale because it has been open for 7 days with no activity." + close-issue-message: "This issue was closed because it has been inactive for 7 days since being marked as stale." + days-before-pr-stale: -1 + days-before-pr-close: -1 + repo-token: ${{ secrets.GITHUB_TOKEN }} diff --git a/packages/leann-backend-hnsw/third_party/faiss/.github/workflows/build-pull-request.yml b/packages/leann-backend-hnsw/third_party/faiss/.github/workflows/build-pull-request.yml new file mode 100644 index 0000000..bc0d2d6 --- /dev/null +++ b/packages/leann-backend-hnsw/third_party/faiss/.github/workflows/build-pull-request.yml @@ -0,0 +1,169 @@ +on: + workflow_call: +env: + OMP_NUM_THREADS: '10' + MKL_THREADING_LAYER: GNU +jobs: + format: + name: Format + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v4 + - name: Install clang-format + run: | + sudo apt-get update -y + sudo apt-get install -y wget + sudo apt install -y lsb-release wget software-properties-common gnupg + wget https://apt.llvm.org/llvm.sh + chmod u+x llvm.sh + sudo ./llvm.sh 18 + sudo apt-get install -y git-core clang-format-18 + - name: Verify clang-format + run: | + git ls-files | grep -E '\.(cpp|h|cu|cuh)$' | xargs clang-format-18 -i + if git diff --quiet; then + echo "Formatting OK!" + else + echo "Formatting not OK!" + echo "------------------" + git --no-pager diff --color + exit 1 + fi + linux-x86_64-cmake: + name: Linux x86_64 (cmake) + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v4 + - name: Build and Test (cmake) + uses: ./.github/actions/build_cmake + linux-x86_64-AVX2-cmake: + name: Linux x86_64 AVX2 (cmake) + needs: linux-x86_64-cmake + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v4 + - name: Build and Test (cmake) + uses: ./.github/actions/build_cmake + with: + opt_level: avx2 + linux-x86_64-AVX512-cmake: + name: Linux x86_64 AVX512 (cmake) + needs: linux-x86_64-cmake + runs-on: faiss-aws-m7i.large + steps: + - name: Checkout + uses: actions/checkout@v4 + - name: Build and Test (cmake) + uses: ./.github/actions/build_cmake + with: + opt_level: avx512 + linux-x86_64-AVX512_SPR-cmake: + name: Linux x86_64 AVX512_SPR (cmake) + needs: linux-x86_64-cmake + runs-on: faiss-aws-m7i.large + steps: + - name: Checkout + uses: actions/checkout@v4 + - name: Build and Test (cmake) + uses: ./.github/actions/build_cmake + with: + opt_level: avx512_spr + linux-x86_64-GPU-cmake: + name: Linux x86_64 GPU (cmake) + needs: linux-x86_64-cmake + runs-on: 4-core-ubuntu-gpu-t4 + steps: + - name: Checkout + uses: actions/checkout@v4 + - name: Build and Test (cmake) + uses: ./.github/actions/build_cmake + with: + gpu: ON + linux-x86_64-GPU-w-CUVS-cmake: + name: Linux x86_64 GPU w/ cuVS (cmake) + needs: linux-x86_64-cmake + runs-on: 4-core-ubuntu-gpu-t4 + steps: + - name: Checkout + uses: actions/checkout@v4 + - name: Build and Test (cmake) + uses: ./.github/actions/build_cmake + with: + gpu: ON + cuvs: ON + linux-x86_64-GPU-w-ROCm-cmake: + name: Linux x86_64 GPU w/ ROCm (cmake) + needs: linux-x86_64-cmake + runs-on: faiss-amd-MI200 + container: + image: ubuntu:22.04 + options: --device=/dev/kfd --device=/dev/dri --ipc=host --shm-size 16G --group-add video --cap-add=SYS_PTRACE --cap-add=SYS_ADMIN + steps: + - name: Container setup + run: | + if [ -f /.dockerenv ]; then + apt-get update && apt-get install -y sudo && apt-get install -y git + git config --global --add safe.directory '*' + else + echo 'Skipping. Current job is not running inside a container.' + fi + - name: Checkout + uses: actions/checkout@v4 + - name: Build and Test (cmake) + uses: ./.github/actions/build_cmake + with: + gpu: ON + rocm: ON + linux-arm64-SVE-cmake: + name: Linux arm64 SVE (cmake) + needs: linux-x86_64-cmake + runs-on: faiss-aws-r8g.large + steps: + - name: Checkout + uses: actions/checkout@v4 + - name: Build and Test (cmake) + uses: ./.github/actions/build_cmake + with: + opt_level: sve + env: + # Context: https://github.com/facebookresearch/faiss/wiki/Troubleshooting#surprising-faiss-openmp-and-openblas-interaction + OPENBLAS_NUM_THREADS: '1' + linux-x86_64-conda: + name: Linux x86_64 (conda) + needs: linux-x86_64-cmake + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v4 + with: + fetch-depth: 0 + fetch-tags: true + - name: Build and Package (conda) + uses: ./.github/actions/build_conda + windows-x86_64-conda: + name: Windows x86_64 (conda) + needs: linux-x86_64-cmake + runs-on: windows-2019 + steps: + - name: Checkout + uses: actions/checkout@v4 + with: + fetch-depth: 0 + fetch-tags: true + - name: Build and Package (conda) + uses: ./.github/actions/build_conda + linux-arm64-conda: + name: Linux arm64 (conda) + needs: linux-x86_64-cmake + runs-on: 2-core-ubuntu-arm + steps: + - name: Checkout + uses: actions/checkout@v4 + with: + fetch-depth: 0 + fetch-tags: true + - name: Build and Package (conda) + uses: ./.github/actions/build_conda diff --git a/packages/leann-backend-hnsw/third_party/faiss/.github/workflows/build-release.yml b/packages/leann-backend-hnsw/third_party/faiss/.github/workflows/build-release.yml new file mode 100644 index 0000000..b5b02f2 --- /dev/null +++ b/packages/leann-backend-hnsw/third_party/faiss/.github/workflows/build-release.yml @@ -0,0 +1,144 @@ +on: + workflow_call: + secrets: + ANACONDA_API_TOKEN: + required: true +env: + OMP_NUM_THREADS: '10' + MKL_THREADING_LAYER: GNU +jobs: + linux-x86_64-packages: + name: Linux x86_64 packages + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v4 + with: + fetch-depth: 0 + fetch-tags: true + - name: Build and Package (conda) + uses: ./.github/actions/build_conda + env: + ANACONDA_API_TOKEN: ${{ secrets.ANACONDA_API_TOKEN }} + with: + label: main + linux-x86_64-GPU-packages-CUDA-11-4-4: + name: Linux x86_64 GPU packages (CUDA 11.4.4) + runs-on: 4-core-ubuntu-gpu-t4 + env: + CUDA_ARCHS: "60-real;61-real;62-real;70-real;72-real;75-real;80;86-real" + FAISS_FLATTEN_CONDA_INCLUDES: "1" + steps: + - name: Checkout + uses: actions/checkout@v4 + with: + fetch-depth: 0 + fetch-tags: true + - name: Build and Package (conda) + uses: ./.github/actions/build_conda + env: + ANACONDA_API_TOKEN: ${{ secrets.ANACONDA_API_TOKEN }} + with: + label: main + cuda: "11.4.4" + linux-x86_64-GPU-CUVS-packages-CUDA11-8-0: + name: Linux x86_64 GPU w/ cuVS packages (CUDA 11.8.0) + runs-on: 4-core-ubuntu-gpu-t4 + env: + CUDA_ARCHS: "70-real;72-real;75-real;80;86-real" + steps: + - name: Checkout + uses: actions/checkout@v4 + with: + fetch-depth: 0 + fetch-tags: true + - name: Build and Package (conda) + uses: ./.github/actions/build_conda + env: + ANACONDA_API_TOKEN: ${{ secrets.ANACONDA_API_TOKEN }} + with: + label: main + cuvs: "ON" + cuda: "11.8.0" + linux-x86_64-GPU-packages-CUDA-12-1-1: + name: Linux x86_64 GPU packages (CUDA 12.1.1) + runs-on: 4-core-ubuntu-gpu-t4 + env: + CUDA_ARCHS: "70-real;72-real;75-real;80;86-real" + steps: + - name: Checkout + uses: actions/checkout@v4 + with: + fetch-depth: 0 + fetch-tags: true + - name: Build and Package (conda) + uses: ./.github/actions/build_conda + env: + ANACONDA_API_TOKEN: ${{ secrets.ANACONDA_API_TOKEN }} + with: + label: main + cuda: "12.1.1" + linux-x86_64-GPU-CUVS-packages-CUDA12-4-0: + name: Linux x86_64 GPU w/ cuVS packages (CUDA 12.4.0) + runs-on: 4-core-ubuntu-gpu-t4 + env: + CUDA_ARCHS: "70-real;72-real;75-real;80;86-real" + steps: + - name: Checkout + uses: actions/checkout@v4 + with: + fetch-depth: 0 + fetch-tags: true + - name: Build and Package (conda) + uses: ./.github/actions/build_conda + env: + ANACONDA_API_TOKEN: ${{ secrets.ANACONDA_API_TOKEN }} + with: + label: main + cuvs: "ON" + cuda: "12.4.0" + windows-x86_64-packages: + name: Windows x86_64 packages + runs-on: windows-2019 + steps: + - name: Checkout + uses: actions/checkout@v4 + with: + fetch-depth: 0 + fetch-tags: true + - name: Build and Package (conda) + uses: ./.github/actions/build_conda + env: + ANACONDA_API_TOKEN: ${{ secrets.ANACONDA_API_TOKEN }} + with: + label: main + osx-arm64-packages: + name: OSX arm64 packages + runs-on: macos-14 + steps: + - name: Checkout + uses: actions/checkout@v4 + with: + fetch-depth: 0 + fetch-tags: true + - name: Build and Package (conda) + uses: ./.github/actions/build_conda + env: + ANACONDA_API_TOKEN: ${{ secrets.ANACONDA_API_TOKEN }} + with: + label: main + linux-arm64-packages: + name: Linux arm64 packages + runs-on: 2-core-ubuntu-arm + steps: + - name: Checkout + uses: actions/checkout@v4 + with: + fetch-depth: 0 + fetch-tags: true + - name: Build and Package (conda) + uses: ./.github/actions/build_conda + env: + ANACONDA_API_TOKEN: ${{ secrets.ANACONDA_API_TOKEN }} + with: + label: main diff --git a/packages/leann-backend-hnsw/third_party/faiss/.github/workflows/build.yml b/packages/leann-backend-hnsw/third_party/faiss/.github/workflows/build.yml new file mode 100644 index 0000000..82792cb --- /dev/null +++ b/packages/leann-backend-hnsw/third_party/faiss/.github/workflows/build.yml @@ -0,0 +1,17 @@ +name: Build +on: + workflow_dispatch: + pull_request: + branches: + - main + push: + tags: + - 'v*' +jobs: + build-pull-request: + uses: ./.github/workflows/build-pull-request.yml + build-release: + uses: ./.github/workflows/build-release.yml + secrets: + ANACONDA_API_TOKEN: ${{ secrets.ANACONDA_API_TOKEN }} + if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v') diff --git a/packages/leann-backend-hnsw/third_party/faiss/.github/workflows/nightly.yml b/packages/leann-backend-hnsw/third_party/faiss/.github/workflows/nightly.yml new file mode 100644 index 0000000..ef1e8d2 --- /dev/null +++ b/packages/leann-backend-hnsw/third_party/faiss/.github/workflows/nightly.yml @@ -0,0 +1,148 @@ +name: Nightly +on: + schedule: + - cron: '10 6 * * *' +env: + OMP_NUM_THREADS: '10' + MKL_THREADING_LAYER: GNU +jobs: + linux-x86_64-nightly: + name: Linux x86_64 nightlies + runs-on: 4-core-ubuntu + steps: + - name: Checkout + uses: actions/checkout@v4 + with: + fetch-depth: 0 + fetch-tags: true + - uses: ./.github/actions/build_conda + env: + ANACONDA_API_TOKEN: ${{ secrets.ANACONDA_API_TOKEN }} + with: + label: nightly + linux-x86_64-GPU-CUDA-11-4-4-nightly: + name: Linux x86_64 GPU nightlies (CUDA 11.4.4) + runs-on: 4-core-ubuntu-gpu-t4 + env: + CUDA_ARCHS: "60-real;61-real;62-real;70-real;72-real;75-real;80;86-real" + FAISS_FLATTEN_CONDA_INCLUDES: "1" + steps: + - name: Checkout + uses: actions/checkout@v4 + with: + fetch-depth: 0 + fetch-tags: true + - uses: ./.github/actions/build_conda + env: + ANACONDA_API_TOKEN: ${{ secrets.ANACONDA_API_TOKEN }} + with: + label: nightly + cuda: "11.4.4" + linux-x86_64-GPU-CUVS-CUDA11-8-0-nightly: + name: Linux x86_64 GPU w/ cuVS nightlies (CUDA 11.8.0) + runs-on: 4-core-ubuntu-gpu-t4 + env: + CUDA_ARCHS: "70-real;72-real;75-real;80;86-real" + steps: + - name: Checkout + uses: actions/checkout@v4 + with: + fetch-depth: 0 + fetch-tags: true + - uses: ./.github/actions/build_conda + env: + ANACONDA_API_TOKEN: ${{ secrets.ANACONDA_API_TOKEN }} + with: + label: nightly + cuvs: "ON" + cuda: "11.8.0" + linux-x86_64-GPU-CUDA-12-1-1-nightly: + name: Linux x86_64 GPU nightlies (CUDA 12.1.1) + runs-on: 4-core-ubuntu-gpu-t4 + env: + CUDA_ARCHS: "70-real;72-real;75-real;80;86-real" + steps: + - name: Checkout + uses: actions/checkout@v4 + with: + fetch-depth: 0 + fetch-tags: true + - uses: ./.github/actions/build_conda + env: + ANACONDA_API_TOKEN: ${{ secrets.ANACONDA_API_TOKEN }} + with: + label: nightly + cuda: "12.1.1" + linux-x86_64-GPU-CUVS-CUDA12-4-0-nightly: + name: Linux x86_64 GPU w/ cuVS nightlies (CUDA 12.4.0) + runs-on: 4-core-ubuntu-gpu-t4 + env: + CUDA_ARCHS: "70-real;72-real;75-real;80;86-real" + steps: + - name: Checkout + uses: actions/checkout@v4 + with: + fetch-depth: 0 + fetch-tags: true + - uses: ./.github/actions/build_conda + env: + ANACONDA_API_TOKEN: ${{ secrets.ANACONDA_API_TOKEN }} + with: + label: nightly + cuvs: "ON" + cuda: "12.4.0" + windows-x86_64-nightly: + name: Windows x86_64 nightlies + runs-on: windows-2019 + steps: + - name: Checkout + uses: actions/checkout@v4 + with: + fetch-depth: 0 + fetch-tags: true + - uses: ./.github/actions/build_conda + env: + ANACONDA_API_TOKEN: ${{ secrets.ANACONDA_API_TOKEN }} + with: + label: nightly + osx-arm64-nightly: + name: OSX arm64 nightlies + runs-on: macos-14 + steps: + - name: Checkout + uses: actions/checkout@v4 + with: + fetch-depth: 0 + fetch-tags: true + - uses: ./.github/actions/build_conda + env: + ANACONDA_API_TOKEN: ${{ secrets.ANACONDA_API_TOKEN }} + with: + label: nightly + linux-arm64-nightly: + name: Linux arm64 nightlies + runs-on: 2-core-ubuntu-arm + steps: + - name: Checkout + uses: actions/checkout@v4 + with: + fetch-depth: 0 + fetch-tags: true + - uses: ./.github/actions/build_conda + env: + ANACONDA_API_TOKEN: ${{ secrets.ANACONDA_API_TOKEN }} + with: + label: nightly + auto-retry: + name: Auto retry on failure + if: fromJSON(github.run_attempt) < 2 + runs-on: ubuntu-latest + steps: + - name: Start rerun workflow + env: + GH_REPO: ${{ github.repository }} + GH_TOKEN: ${{ github.token }} + GH_DEBUG: api + run: | + gh workflow run retry_build.yml \ + -F run_id=${{ github.run_id }} diff --git a/packages/leann-backend-hnsw/third_party/faiss/.github/workflows/publish-docs.yml b/packages/leann-backend-hnsw/third_party/faiss/.github/workflows/publish-docs.yml new file mode 100644 index 0000000..a75c485 --- /dev/null +++ b/packages/leann-backend-hnsw/third_party/faiss/.github/workflows/publish-docs.yml @@ -0,0 +1,44 @@ +name: Publish Docs +on: + page_build: + branches: + - gh-pages + paths-ignore: + - 'docs/**' + workflow_run: + workflows: [update-doxygen] + types: + - completed +jobs: + build_and_publish: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - name: Set up Python + uses: actions/setup-python@v2 + with: + python-version: 3.8 + - name: Checkout gh-pages + run: | + git fetch origin gh-pages + git checkout gh-pages + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install -r requirements.txt + - name: Generate html + run: | + make html + git rm -rf docs + mv _build/html docs + touch docs/.nojekyll + - name: Push changes + run: | + git config --global user.email "$GITHUB_ACTOR@users.noreply.github.com" + git config --global user.name "$GITHUB_ACTOR" + git add docs + if [ -n "$(git status --porcelain)" ] + then + git commit docs -m "Sphinx rebuild ($(git rev-parse --short gh-pages))." + git push origin gh-pages + fi diff --git a/packages/leann-backend-hnsw/third_party/faiss/.github/workflows/retry_build.yml b/packages/leann-backend-hnsw/third_party/faiss/.github/workflows/retry_build.yml new file mode 100644 index 0000000..45c07ff --- /dev/null +++ b/packages/leann-backend-hnsw/third_party/faiss/.github/workflows/retry_build.yml @@ -0,0 +1,33 @@ +name: Retry Build +on: + workflow_dispatch: + inputs: + run_id: + required: true +jobs: + rerun-on-failure: + permissions: write-all + runs-on: ubuntu-latest + steps: + - name: rerun ${{ inputs.run_id }} + env: + GH_REPO: ${{ github.repository }} + GH_TOKEN: ${{ github.token }} + GH_DEBUG: api + run: | + # status can be one of "queued", "in_progress", "completed", "waiting", "requested", "pending" + # https://docs.github.com/en/rest/checks/runs + # while not completed, sleep for 10 minutes + while gh run view ${{ inputs.run_id }} --json status | grep -v completed + do + echo Workflow in progress - sleeping for 10 minutes then checking again + sleep 10m + done + + # Only retry if there are failed jobs + if gh run view ${{ inputs.run_id }} --exit-status; then + echo Workflow succeeded - no retry necessary. + else + echo Workflow failed - initiating retry. + gh run rerun ${{ inputs.run_id }} --failed + fi diff --git a/packages/leann-backend-hnsw/third_party/faiss/.github/workflows/update-doxygen.yml b/packages/leann-backend-hnsw/third_party/faiss/.github/workflows/update-doxygen.yml new file mode 100644 index 0000000..64d9435 --- /dev/null +++ b/packages/leann-backend-hnsw/third_party/faiss/.github/workflows/update-doxygen.yml @@ -0,0 +1,40 @@ +name: Update Doxygen +on: + push: + branches: + - main + paths: + - 'faiss/**' +jobs: + doxygen: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - name: Set up Python + uses: actions/setup-python@v2 + with: + python-version: 3.8 + - name: Install dependencies + run: | + sudo apt-get install -y doxygen + python -m pip install --upgrade pip + pip install breathe + - name: Generate doxygen xml + run: doxygen + - name: Push changes + run: | + git config --global user.email "$GITHUB_ACTOR@users.noreply.github.com" + git config --global user.name "$GITHUB_ACTOR" + mkdir ./tmp + mv xml ./tmp/xml + git fetch origin gh-pages + git checkout gh-pages + git rm -rf xml cpp_api + mv ./tmp/xml ./xml + breathe-apidoc -o cpp_api xml + git add xml cpp_api + if [ -n "$(git status --porcelain)" ] + then + git commit -m "Update API docs ($(git rev-parse --short main))." + git push origin gh-pages + fi diff --git a/packages/leann-backend-hnsw/third_party/faiss/.gitignore b/packages/leann-backend-hnsw/third_party/faiss/.gitignore new file mode 100644 index 0000000..2d5a8dc --- /dev/null +++ b/packages/leann-backend-hnsw/third_party/faiss/.gitignore @@ -0,0 +1,26 @@ +*.swp +*.swo +*.o +*.a +*.dSYM +*.so +*.dylib +*.pyc +*~ +/build/ +/config.* +/aclocal.m4 +/autom4te.cache/ +/makefile.inc +/bin/ +/c_api/bin/ +/c_api/gpu/bin/ +/tests/test +/tests/gtest/ +faiss/python/swigfaiss_avx2.swig +faiss/python/swigfaiss_avx512.swig +faiss/python/swigfaiss_avx512_spr.swig +faiss/python/swigfaiss_sve.swig +.cache/ +compile_commands.json +sift/ \ No newline at end of file diff --git a/packages/leann-backend-hnsw/third_party/faiss/.vscode/launch.json b/packages/leann-backend-hnsw/third_party/faiss/.vscode/launch.json new file mode 100644 index 0000000..d6087f1 --- /dev/null +++ b/packages/leann-backend-hnsw/third_party/faiss/.vscode/launch.json @@ -0,0 +1,19 @@ +{ + // Use IntelliSense to learn about possible attributes. + // Hover to view descriptions of existing attributes. + // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387 + "version": "0.2.0", + "configurations": [ + { + "name": "Build Demo", + "type": "lldb", + "request": "launch", + "program": "${workspaceFolder}/../.venv/bin/python", + "console": "integratedTerminal", + "cwd": "${workspaceFolder}", + "args": [ + "${workspaceFolder}/demo/build_demo.py" + ], + }, + ] +} \ No newline at end of file diff --git a/packages/leann-backend-hnsw/third_party/faiss/CHANGELOG.md b/packages/leann-backend-hnsw/third_party/faiss/CHANGELOG.md new file mode 100644 index 0000000..c1771f2 --- /dev/null +++ b/packages/leann-backend-hnsw/third_party/faiss/CHANGELOG.md @@ -0,0 +1,482 @@ +# Changelog +All notable changes to this project will be documented in this file. + +## [Unreleased] + +## [1.10.0] - 2025-01-30 + + +Added +- Add desc_name to dataset descriptor (#3935) +- implement ST_norm_from_LUT for the ResidualQuantizer (#3917) +- Add example of how to build, link, and test an external SWIG module (#3922) +- add copyright header (#3948) +- Add some SVE implementations (#3933) +- Enable linting: lint config changes plus arc lint command (#3966) +- Re-add example of how to build, link, and test an external SWIG module (#3981) +- demo: IndexPQ: separate codes from codebook (#3987) +- add all wrapped indexes to the index_read (#3988) +- add validity check AlignedTableTightAlloc clear method (#3997) +- Add index binary to telemetry (#4001) +- Add VectorTransform read from filename to the C API (#3970) +- Added IndexLSH to the demo (#4009) +- write distributed_kmeans centroids and assignments to hive tables (#4017) +- introduce data splits in dataset descriptor (#4012) +- Faiss GPU: bfloat16 brute-force kNN support (#4018) +- ROCm support for bfloat16 (#4039) +- Unit tests for distances_simd.cpp (#4058) +- add cuda-toolkit for GPU (#4057) +- Add more unit testing for IndexHNSW [1/n] (#4054) +- Add more unit testing for IndexHNSW [2/n] (#4056) +- Add more unit testing for HNSW [3/n] (#4059) +- Add more unit testing for HNSW [4/n] (#4061) +- Add more unit tests for index_read and index_write (#4068) +- Add testing for utils/hamming.cpp (#4079) +- Test sa_decode methd on IndexIVFFlat (#4098) +- Conditionally compile extras like benchmarks and demos (#4094) +- Add a new architecture mode: 'avx512_spr'. (#4025) +- Use _mm512_popcnt_epi64 to speedup hamming distance evaluation. (#4020) +- PQ with pytorch (#4116) +- add range_search() to IndexRefine (#4022) +- Expose accumulate_to_mem from faiss interface (#4099) +- Windows Arm64 support (#4087) +- add test to cover GPU (#4130) +- Added support for building without MKL (#4147) + +Changed +- Move train, build and search to their respective operators (#3934) +- PQFS into Index trainer (#3941) +- Place a useful cmake function 'link_to_faiss_lib' into a separate file (#3939) +- Cache device major version value to avoid multiple calls of getCudaDeviceProperties (#3950) +- Consolidate set_target_properties() calls in faiss/CMakeLists.txt (#3973) +- Removing Manual Hipify Build Step (#3962) +- Allow to replace graph structure for NSG graphs (#3975) +- Adjust nightly build (#3978) +- Update RAFT CI with pytorch 2.4.1 (#3980) +- Moved add_sa_codes, sa_code_size to Index, IndexBinary base classes (#3989) +- Update autoclose.yml (#4000) +- Migrate from RAFT to CUVS (#3549) +- Pin to numpy<2 (#4033) +- (1/n) - Preload datasets in manifold so that subsequent stages of training, indexing and search can use those instead of each trainer or indexer downloading data. (#4034) +- Constrain conda version for Windows build (#4040) +- Updates to faiss-gpu-cuvs nightly pkg (#4032) +- pin the dependecies version for x86_64 (#4046) +- pin arm64 dependency (#4060) +- Pin conda build (#4062) +- Improve naming due to codemod (#4063) +- Improve naming due to codemod (#4064) +- Improve naming due to codemod (#4065) +- separare the github build into two conditions (#4066) +- Improve naming due to codemod (#4070) +- improve naming due to codemod (#4067) +- improve naming due to codemod (#4071) +- improve naming due to codemod (#4072) +- fix nightily build (#4080) +- Change github action workflows name (#4083) +- Resolve Packaging Issues (#4044) +- Update __init__.py (#4086) +- Exhaustive IVF probing in scalar quantizer tests (#4075) +- Pin Nightlies with testing on PR (#4088) +- Update benchmarking library code to work for IdMap index as well (#4093) +- Update action.yml (#4100) +- Upgrade CUVS to 24.12 (#4021) +- Link cuVS Docs (#4084) +- Set KnnDescriptor.desc_name in the Benchmarking core framework in FAISS like other descriptors (#4109) +- enable quiet mode for conda install (#4112) +- Disable retry build (#4124) +- Add ngpu default argument to knn_ground_truth (#4123) +- Update code comment to reflect the range of IF from [1, k] (#4139) +- Reenable auto retry workflow (#4140) +- Migration off defaults to conda-forge channel (#4126) +- Benchmarking Scripts for cuVS Index, more docs updates (#4117) + +Fixed +- Fix total_rows (#3942) +- Fix INSTALL.md due to failure of conflict resolving (#3915) +- Back out "Add example of how to build, link, and test an external SWIG module" (#3954) +- Fix shadowed variable in faiss/IndexPQ.cpp (#3959) +- Fix shadowed variable in faiss/IndexIVFAdditiveQuantizer.cpp (#3958) +- Fix shadowed variable in faiss/impl/HNSW.cpp (#3961) +- Fix shadowed variable in faiss/impl/simd_result_handlers.h (#3960) +- Fix shadowed variable in faiss/utils/NeuralNet.cpp (#3952) +- Resolve "incorrect-portions-license" errors: add no license lint to top of GPU files with both licenses (#3965) +- Resolve "duplicate-license-header": Find and replace duplicate license headers (#3967) +- fix some more nvidia licenses that get erased (#3977) +- fix merge_flat_ondisk stress run failures (#3999) +- Fix reverse_index_factory formatting of ScalarQuantizers (#4003) +- Fix shadowed variable in faiss/IndexAdditiveQuantizer.cpp (#4011) +- facebook-unused-include-check in fbcode/faiss (#4029) +- fix linter (#4035) +- Some chore fixes (#4010) +- Fix unused variable compilation error (#4041) +- stop dealloc of coarse quantizer when it is deleted (#4045) +- Fix SCD Table test flakiness (#4069) +- Fix IndexIVFFastScan reconstruct_from_offset method (#4095) +- more fast-scan reconstruction (#4128) +- Fix nightly cuVS 11.8.0 failure (#4149) +- Correct capitalization of FAISS to Faiss (#4155) +- Fix cuVS 12.4.0 nightly failure (#4153) + +Deprecated +- Remove unused-variable in dumbo/backup/dumbo/service/tests/ChainReplicatorTests.cpp (#4024) +- remove inconsistent oom exception test (#4052) +- Remove unused(and wrong) io macro (#4122) + + +## [1.9.0] - 2024-10-04 +### Added +- Add AVX-512 implementation for the distance and scalar quantizer functions. (#3853) +- Allow k and M suffixes in IVF indexes (#3812) +- add reconstruct support to additive quantizers (#3752) +- introduce options for reducing the overhead for a clustering procedure (#3731) +- Add hnsw search params for bounded queue option (#3748) +- ROCm support (#3462) +- Add sve targets (#2886) +- add get_version() for c_api (#3688) +- QINCo implementation in CPU Faiss (#3608) +- Add search functionality to FlatCodes (#3611) +- add dispatcher for VectorDistance and ResultHandlers (#3627) +- Add SQ8bit signed quantization (#3501) +- Add ABS_INNER_PRODUCT metric (#3524) +- Interop between CAGRA and HNSW (#3252) +- add skip_storage flag to HNSW (#3487) +- QT_bf16 for scalar quantizer for bfloat16 (#3444) +- Implement METRIC.NaNEuclidean (#3414) +- TimeoutCallback C++ and Python (#3417) +- support big-endian machines (#3361) +- Support for Remove ids from IVFPQFastScan index (#3354) +- Implement reconstruct_n for GPU IVFFlat indexes (#3338) +- Support of skip_ids in merge_from_multiple function of OnDiskInvertedLists (#3327) +- Add the ability to clone and read binary indexes to the C API. (#3318) +- AVX512 for PQFastScan (#3276) + +### Changed +- faster hnsw CPU index training (#3822) +- Some small improvements. (#3692) +- First attempt at LSH matching with nbits (#3679) +- Set verbosoe before train (#3619) +- Remove duplicate NegativeDistanceComputer instances (#3450) +- interrupt for NNDescent (#3432) +- Get rid of redundant instructions in ScalarQuantizer (#3430) +- PowerPC, improve code generation for function fvec_L2sqr (#3416) +- Unroll loop in lookup_2_lanes (#3364) +- Improve filtering & search parameters propagation (#3304) +- Change index_cpu_to_gpu to throw for indices not implemented on GPU (#3336) +- Throw when attempting to move IndexPQ to GPU (#3328) +- Skip HNSWPQ sdc init with new io flag (#3250) + +### Fixed +- FIx a bug for a non-simdlib code of ResidualQuantizer (#3868) +- assign_index should default to null (#3855) +- Fix an incorrectly counted the number of computed distances for HNSW (#3840) +- Add error for overflowing nbits during PQ construction (#3833) +- Fix radius search with HSNW and IP (#3698) +- fix algorithm of spreading vectors over shards (#3374) +- Fix IndexBinary.assign Python method (#3384) +- Few fixes in bench_fw to enable IndexFromCodec (#3383) +- Fix the endianness issue in AIX while running the benchmark. (#3345) +- Fix faiss swig build with version > 4.2.x (#3315) +- Fix problems when using 64-bit integers. (#3322) +- Fix IVFPQFastScan decode function (#3312) +- Handling FaissException in few destructors of ResultHandler.h (#3311) +- Fix HNSW stats (#3309) +- AIX compilation fix for io classes (#3275) + + +## [1.8.0] - 2024-02-27 +### Added +- Added a new conda package faiss-gpu-raft alongside faiss-cpu and faiss-gpu +- Integrated IVF-Flat and IVF-PQ implementations in faiss-gpu-raft from RAFT by Nvidia [thanks Corey Nolet and Tarang Jain] +- Added a context parameter to InvertedLists and InvertedListsIterator +- Added Faiss on Rocksdb demo to showing how inverted lists can be persisted in a key-value store +- Introduced Offline IVF framework powered by Faiss big batch search +- Added SIMD NEON Optimization for QT_FP16 in Scalar Quantizer. [thanks Naveen Tatikonda] +- Generalized ResultHandler and supported range search for HNSW and FastScan +- Introduced avx512 optimization mode and FAISS_OPT_LEVEL env variable [thanks Alexandr Ghuzva] +- Added search parameters for IndexRefine::search() and IndexRefineFlat::search() +- Supported large two-level clustering +- Added support for Python 3.11 and 3.12 +- Added support for CUDA 12 + +### Changed +- Used the benchmark to find Pareto optimal indices. Intentionally limited to IVF(Flat|HNSW),PQ|SQ indices +- Splitted off RQ encoding steps to another file +- Supported better NaN handling +- HNSW speedup + Distance 4 points [thanks Alexandr Ghuzva] + +### Fixed +- Fixed DeviceVector reallocations in Faiss GPU +- Used efSearch from params if provided in HNSW search +- Fixed warp synchronous behavior in Faiss GPU CUDA 12 + + +## [1.7.4] - 2023-04-12 +### Added +- Added big batch IVF search for conducting efficient search with big batches of queries +- Checkpointing in big batch search support +- Precomputed centroids support +- Support for iterable inverted lists for eg. key value stores +- 64-bit indexing arithmetic support in FAISS GPU +- IndexIVFShards now handle IVF indexes with a common quantizer +- Jaccard distance support +- CodePacker for non-contiguous code layouts +- Approximate evaluation of top-k distances for ResidualQuantizer and IndexBinaryFlat +- Added support for 12-bit PQ / IVFPQ fine quantizer decoders for standalone vector codecs (faiss/cppcontrib) +- Conda packages for osx-arm64 (Apple M1) and linux-aarch64 (ARM64) architectures +- Support for Python 3.10 + +### Removed +- CUDA 10 is no longer supported in precompiled packages +- Removed Python 3.7 support for precompiled packages +- Removed constraint for using fine quantizer with no greater than 8 bits for IVFPQ, for example, now it is possible to use IVF256,PQ10x12 for a CPU index + +### Changed +- Various performance optimizations for PQ / IVFPQ for AVX2 and ARM for training (fused distance+nearest kernel), search (faster kernels for distance_to_code() and scan_list_*()) and vector encoding +- A magnitude faster CPU code for LSQ/PLSQ training and vector encoding (reworked code) +- Performance improvements for Hamming Code computations for AVX2 and ARM (reworked code) +- Improved auto-vectorization support for IP and L2 distance computations (better handling of pragmas) +- Improved ResidualQuantizer vector encoding (pooling memory allocations, avoid r/w to a temporary buffer) + +### Fixed +- HSNW bug fixed which improves the recall rate! Special thanks to zh Wang @hhy3 for this. +- Faiss GPU IVF large query batch fix +- Faiss + Torch fixes, re-enable k = 2048 +- Fix the number of distance computations to match max_codes parameter +- Fix decoding of large fast_scan blocks + + +## [1.7.3] - 2022-11-3 +### Added +- Added sparse k-means routines and moved the generic kmeans to contrib +- Added FlatDistanceComputer for all FlatCodes indexes +- Support for fast accumulation of 4-bit LSQ and RQ +- Added product additive quantization +- Support per-query search parameters for many indexes + filtering by ids +- write_VectorTransform and read_vectorTransform were added to the public API (by @AbdelrahmanElmeniawy) +- Support for IDMap2 in index_factory by adding "IDMap2" to prefix or suffix of the input String (by @AbdelrahmanElmeniawy) +- Support for merging all IndexFlatCodes descendants (by @AbdelrahmanElmeniawy) +- Remove and merge features for IndexFastScan (by @AbdelrahmanElmeniawy) +- Performance improvements: 1) specialized the AVX2 pieces of code speeding up certain hotspots, 2) specialized kernels for vector codecs (this can be found in faiss/cppcontrib) + + +### Fixed +- Fixed memory leak in OnDiskInvertedLists::do_mmap when the file is not closed (by @AbdelrahmanElmeniawy) +- LSH correctly throws error for metric types other than METRIC_L2 (by @AbdelrahmanElmeniawy) + +## [1.7.2] - 2021-12-15 +### Added +- Support LSQ on GPU (by @KinglittleQ) +- Support for exact 1D kmeans (by @KinglittleQ) + +## [1.7.1] - 2021-05-27 +### Added +- Support for building C bindings through the `FAISS_ENABLE_C_API` CMake option. +- Serializing the indexes with the python pickle module +- Support for the NNDescent k-NN graph building method (by @KinglittleQ) +- Support for the NSG graph indexing method (by @KinglittleQ) +- Residual quantizers: support as codec and unoptimized search +- Support for 4-bit PQ implementation for ARM (by @vorj, @n-miyamoto-fixstars, @LWisteria, and @matsui528) +- Implementation of Local Search Quantization (by @KinglittleQ) + +### Changed +- The order of xb an xq was different between `faiss.knn` and `faiss.knn_gpu`. +Also the metric argument was called distance_type. +- The typed vectors (LongVector, LongLongVector, etc.) of the SWIG interface have +been deprecated. They have been replaced with Int32Vector, Int64Vector, etc. (by h-vetinari) + +### Fixed +- Fixed a bug causing kNN search functions for IndexBinaryHash and +IndexBinaryMultiHash to return results in a random order. +- Copy constructor of AlignedTable had a bug leading to crashes when cloning +IVFPQ indices. + +## [1.7.0] - 2021-01-27 + +## [1.6.5] - 2020-11-22 + +## [1.6.4] - 2020-10-12 +### Added +- Arbitrary dimensions per sub-quantizer now allowed for `GpuIndexIVFPQ`. +- Brute-force kNN on GPU (`bfKnn`) now accepts `int32` indices. +- Nightly conda builds now available (for CPU). +- Faiss is now supported on Windows. + +## [1.6.3] - 2020-03-24 +### Added +- Support alternative distances on GPU for GpuIndexFlat, including L1, Linf and +Lp metrics. +- Support METRIC_INNER_PRODUCT for GpuIndexIVFPQ. +- Support float16 coarse quantizer for GpuIndexIVFFlat and GpuIndexIVFPQ. GPU +Tensor Core operations (mixed-precision arithmetic) are enabled on supported +hardware when operating with float16 data. +- Support k-means clustering with encoded vectors. This makes it possible to +train on larger datasets without decompressing them in RAM, and is especially +useful for binary datasets (see https://github.com/facebookresearch/faiss/blob/main/tests/test_build_blocks.py#L92). +- Support weighted k-means. Weights can be associated to each training point +(see https://github.com/facebookresearch/faiss/blob/main/tests/test_build_blocks.py). +- Serialize callback in python, to write to pipes or sockets (see +https://github.com/facebookresearch/faiss/wiki/Index-IO,-cloning-and-hyper-parameter-tuning). +- Reconstruct arbitrary ids from IndexIVF + efficient remove of a small number +of ids. This avoids 2 inefficiencies: O(ntotal) removal of vectors and +IndexIDMap2 on top of indexIVF. Documentation here: +https://github.com/facebookresearch/faiss/wiki/Special-operations-on-indexes. +- Support inner product as a metric in IndexHNSW (see +https://github.com/facebookresearch/faiss/blob/main/tests/test_index.py#L490). +- Support PQ of sizes other than 8 bit in IndexIVFPQ. +- Demo on how to perform searches sequentially on an IVF index. This is useful +for an OnDisk index with a very large batch of queries. In that case, it is +worthwhile to scan the index sequentially (see +https://github.com/facebookresearch/faiss/blob/main/tests/test_ivflib.py#L62). +- Range search support for most binary indexes. +- Support for hashing-based binary indexes (see +https://github.com/facebookresearch/faiss/wiki/Binary-indexes). + +### Changed +- Replaced obj table in Clustering object: now it is a ClusteringIterationStats +structure that contains additional statistics. + +### Removed +- Removed support for useFloat16Accumulator for accumulators on GPU (all +accumulations are now done in float32, regardless of whether float16 or float32 +input data is used). + +### Fixed +- Some python3 fixes in benchmarks. +- Fixed GpuCloner (some fields were not copied, default to no precomputed tables +with IndexIVFPQ). +- Fixed support for new pytorch versions. +- Serialization bug with alternative distances. +- Removed test on multiple-of-4 dimensions when switching between blas and AVX +implementations. + +## [1.6.2] - 2020-03-10 + +## [1.6.1] - 2019-12-04 + +## [1.6.0] - 2019-09-24 +### Added +- Faiss as a codec: We introduce a new API within Faiss to encode fixed-size +vectors into fixed-size codes. The encoding is lossy and the tradeoff between +compression and reconstruction accuracy can be adjusted. +- ScalarQuantizer support for GPU, see gpu/GpuIndexIVFScalarQuantizer.h. This is +particularly useful as GPU memory is often less abundant than CPU. +- Added easy-to-use serialization functions for indexes to byte arrays in Python +(faiss.serialize_index, faiss.deserialize_index). +- The Python KMeans object can be used to use the GPU directly, just add +gpu=True to the constuctor see gpu/test/test_gpu_index.py test TestGPUKmeans. + +### Changed +- Change in the code layout: many C++ sources are now in subdirectories impl/ +and utils/. + +## [1.5.3] - 2019-06-24 +### Added +- Basic support for 6 new metrics in CPU IndexFlat and IndexHNSW (https://github.com/facebookresearch/faiss/issues/848). +- Support for IndexIDMap/IndexIDMap2 with binary indexes (https://github.com/facebookresearch/faiss/issues/780). + +### Changed +- Throw python exception for OOM (https://github.com/facebookresearch/faiss/issues/758). +- Make DistanceComputer available for all random access indexes. +- Gradually moving from long to uint64_t for portability. + +### Fixed +- Slow scanning of inverted lists (https://github.com/facebookresearch/faiss/issues/836). + +## [1.5.2] - 2019-05-28 +### Added +- Support for searching several inverted lists in parallel (parallel_mode != 0). +- Better support for PQ codes where nbit != 8 or 16. +- IVFSpectralHash implementation: spectral hash codes inside an IVF. +- 6-bit per component scalar quantizer (4 and 8 bit were already supported). +- Combinations of inverted lists: HStackInvertedLists and VStackInvertedLists. +- Configurable number of threads for OnDiskInvertedLists prefetching (including +0=no prefetch). +- More test and demo code compatible with Python 3 (print with parentheses). + +### Changed +- License was changed from BSD+Patents to MIT. +- Exceptions raised in sub-indexes of IndexShards and IndexReplicas are now +propagated. +- Refactored benchmark code: data loading is now in a single file. + +## [1.5.1] - 2019-04-05 +### Added +- MatrixStats object, which reports useful statistics about a dataset. +- Option to round coordinates during k-means optimization. +- An alternative option for search in HNSW. +- Support for range search in IVFScalarQuantizer. +- Support for direct uint_8 codec in ScalarQuantizer. +- Better support for PQ code assignment with external index. +- Support for IMI2x16 (4B virtual centroids). +- Support for k = 2048 search on GPU (instead of 1024). +- Support for renaming an ondisk invertedlists. +- Support for nterrupting computations with interrupt signal (ctrl-C) in python. +- Simplified build system (with --with-cuda/--with-cuda-arch options). + +### Changed +- Moved stats() and imbalance_factor() from IndexIVF to InvertedLists object. +- Renamed IndexProxy to IndexReplicas. +- Most CUDA mem alloc failures now throw exceptions instead of terminating on an +assertion. +- Updated example Dockerfile. +- Conda packages now depend on the cudatoolkit packages, which fixes some +interferences with pytorch. Consequentially, faiss-gpu should now be installed +by conda install -c pytorch faiss-gpu cudatoolkit=10.0. + +## [1.5.0] - 2018-12-19 +### Added +- New GpuIndexBinaryFlat index. +- New IndexBinaryHNSW index. + +## [1.4.0] - 2018-08-30 +### Added +- Automatic tracking of C++ references in Python. +- Support for non-intel platforms, some functions optimized for ARM. +- Support for overriding nprobe for concurrent searches. +- Support for floating-point quantizers in binary indices. + +### Fixed +- No more segfaults due to Python's GC. +- GpuIndexIVFFlat issues for float32 with 64 / 128 dims. +- Sharding of flat indexes on GPU with index_cpu_to_gpu_multiple. + +## [1.3.0] - 2018-07-10 +### Added +- Support for binary indexes (IndexBinaryFlat, IndexBinaryIVF). +- Support fp16 encoding in scalar quantizer. +- Support for deduplication in IndexIVFFlat. +- Support for index serialization. + +### Fixed +- MMAP bug for normal indices. +- Propagation of io_flags in read func. +- k-selection for CUDA 9. +- Race condition in OnDiskInvertedLists. + +## [1.2.1] - 2018-02-28 +### Added +- Support for on-disk storage of IndexIVF data. +- C bindings. +- Extended tutorial to GPU indices. + +[Unreleased]: https://github.com/facebookresearch/faiss/compare/v1.9.0...HEAD +[1.9.0]: https://github.com/facebookresearch/faiss/compare/v1.8.0...v1.9.0 +[1.8.0]: https://github.com/facebookresearch/faiss/compare/v1.7.4...v1.8.0 +[1.7.4]: https://github.com/facebookresearch/faiss/compare/v1.7.3...v1.7.4 +[1.7.3]: https://github.com/facebookresearch/faiss/compare/v1.7.2...v1.7.3 +[1.7.2]: https://github.com/facebookresearch/faiss/compare/v1.7.1...v1.7.2 +[1.7.1]: https://github.com/facebookresearch/faiss/compare/v1.7.0...v1.7.1 +[1.7.0]: https://github.com/facebookresearch/faiss/compare/v1.6.5...v1.7.0 +[1.6.5]: https://github.com/facebookresearch/faiss/compare/v1.6.4...v1.6.5 +[1.6.4]: https://github.com/facebookresearch/faiss/compare/v1.6.3...v1.6.4 +[1.6.3]: https://github.com/facebookresearch/faiss/compare/v1.6.2...v1.6.3 +[1.6.2]: https://github.com/facebookresearch/faiss/compare/v1.6.1...v1.6.2 +[1.6.1]: https://github.com/facebookresearch/faiss/compare/v1.6.0...v1.6.1 +[1.6.0]: https://github.com/facebookresearch/faiss/compare/v1.5.3...v1.6.0 +[1.5.3]: https://github.com/facebookresearch/faiss/compare/v1.5.2...v1.5.3 +[1.5.2]: https://github.com/facebookresearch/faiss/compare/v1.5.1...v1.5.2 +[1.5.1]: https://github.com/facebookresearch/faiss/compare/v1.5.0...v1.5.1 +[1.5.0]: https://github.com/facebookresearch/faiss/compare/v1.4.0...v1.5.0 +[1.4.0]: https://github.com/facebookresearch/faiss/compare/v1.3.0...v1.4.0 +[1.3.0]: https://github.com/facebookresearch/faiss/compare/v1.2.1...v1.3.0 +[1.2.1]: https://github.com/facebookresearch/faiss/releases/tag/v1.2.1 diff --git a/packages/leann-backend-hnsw/third_party/faiss/CMakeLists.txt b/packages/leann-backend-hnsw/third_party/faiss/CMakeLists.txt new file mode 100644 index 0000000..4a70aaf --- /dev/null +++ b/packages/leann-backend-hnsw/third_party/faiss/CMakeLists.txt @@ -0,0 +1,126 @@ +# @lint-ignore-every LICENSELINT +# Copyright (c) Facebook, Inc. and its affiliates. +# All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +# ============================================================================= +# Copyright (c) 2023-2024, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except +# in compliance with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software distributed under the License +# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +# or implied. See the License for the specific language governing permissions and limitations under +# the License. +# ============================================================================= + +cmake_minimum_required(VERSION 3.24.0 FATAL_ERROR) + +set(FAISS_LANGUAGES CXX) + +if(FAISS_ENABLE_GPU) + if (FAISS_ENABLE_ROCM) + list(APPEND FAISS_LANGUAGES HIP) + list(PREPEND CMAKE_MODULE_PATH "/opt/rocm/lib/cmake") + list(PREPEND CMAKE_PREFIX_PATH "/opt/rocm") + else() + list(APPEND FAISS_LANGUAGES CUDA) + endif() +endif() + +if(FAISS_ENABLE_CUVS) +include(cmake/thirdparty/fetch_rapids.cmake) +include(rapids-cmake) +include(rapids-cpm) +include(rapids-cuda) +include(rapids-export) +include(rapids-find) + +rapids_cuda_init_architectures(faiss) +rapids_cuda_init_architectures(pyfaiss) +rapids_cuda_init_architectures(faiss_c_library) +endif() + +project(faiss + VERSION 1.10.0 + DESCRIPTION "A library for efficient similarity search and clustering of dense vectors." + HOMEPAGE_URL "https://github.com/facebookresearch/faiss" + LANGUAGES ${FAISS_LANGUAGES}) +include(GNUInstallDirs) + +set(CMAKE_CXX_STANDARD 17) + +list(APPEND CMAKE_MODULE_PATH "${PROJECT_SOURCE_DIR}/cmake") + +# Valid values are "generic", "avx2", "avx512", "avx512_spr", "sve". +option(FAISS_OPT_LEVEL "" "generic") +option(FAISS_ENABLE_GPU "Enable support for GPU indexes." ON) +option(FAISS_ENABLE_CUVS "Enable cuVS for GPU indexes." OFF) +option(FAISS_ENABLE_ROCM "Enable ROCm for GPU indexes." OFF) +option(FAISS_ENABLE_MKL "Enable MKL." ON) +option(FAISS_ENABLE_PYTHON "Build Python extension." ON) +option(FAISS_ENABLE_C_API "Build C API." OFF) +option(FAISS_ENABLE_EXTRAS "Build extras like benchmarks and demos" ON) +option(FAISS_USE_LTO "Enable Link-Time optimization" OFF) + +if(FAISS_ENABLE_GPU) + if(FAISS_ENABLE_ROCM) + enable_language(HIP) + add_definitions(-DUSE_AMD_ROCM) + find_package(HIP REQUIRED) + find_package(hipBLAS REQUIRED) + set(GPU_EXT_PREFIX "hip") + execute_process(COMMAND ${PROJECT_SOURCE_DIR}/faiss/gpu/hipify.sh) + else () + set(CMAKE_CUDA_HOST_COMPILER ${CMAKE_CXX_COMPILER}) + enable_language(CUDA) + set(GPU_EXT_PREFIX "cu") + endif() +endif() + +if(FAISS_ENABLE_CUVS AND NOT TARGET cuvs::cuvs) + find_package(cuvs) + endif() + +add_subdirectory(faiss) + +if(FAISS_ENABLE_GPU) + if(FAISS_ENABLE_ROCM) + add_subdirectory(faiss/gpu-rocm) + else() + add_subdirectory(faiss/gpu) + endif() +endif() + +if(FAISS_ENABLE_PYTHON) + add_subdirectory(faiss/python) +endif() + +if(FAISS_ENABLE_C_API) + add_subdirectory(c_api) +endif() + +if(FAISS_ENABLE_EXTRAS) + add_subdirectory(demos) + add_subdirectory(benchs) + add_subdirectory(tutorial/cpp) +endif() + +# CTest must be included in the top level to enable `make test` target. +include(CTest) +if(BUILD_TESTING) + add_subdirectory(tests) + add_subdirectory(perf_tests) + if(FAISS_ENABLE_GPU) + if(FAISS_ENABLE_ROCM) + add_subdirectory(faiss/gpu-rocm/test) + else() + add_subdirectory(faiss/gpu/test) + endif() + endif() +endif() diff --git a/packages/leann-backend-hnsw/third_party/faiss/CODE_OF_CONDUCT.md b/packages/leann-backend-hnsw/third_party/faiss/CODE_OF_CONDUCT.md new file mode 100644 index 0000000..ac27d8a --- /dev/null +++ b/packages/leann-backend-hnsw/third_party/faiss/CODE_OF_CONDUCT.md @@ -0,0 +1,2 @@ +# Code of Conduct +Facebook has adopted a Code of Conduct that we expect project participants to adhere to. Please [read the full text](https://code.fb.com/codeofconduct) so that you can understand what actions will and will not be tolerated. \ No newline at end of file diff --git a/packages/leann-backend-hnsw/third_party/faiss/CONTRIBUTING.md b/packages/leann-backend-hnsw/third_party/faiss/CONTRIBUTING.md new file mode 100644 index 0000000..10fc815 --- /dev/null +++ b/packages/leann-backend-hnsw/third_party/faiss/CONTRIBUTING.md @@ -0,0 +1,52 @@ +# Contributing to Faiss + +We want to make contributing to this project as easy and transparent as +possible. + +## Our Development Process + +We mainly develop Faiss within Facebook. Sometimes, we will sync the +github version of Faiss with the internal state. + +## Pull Requests + +We welcome pull requests that add significant value to Faiss. If you plan to do +a major development and contribute it back to Faiss, please contact us first before +putting too much effort into it. + +1. Fork the repo and create your branch from `main`. +2. If you've added code that should be tested, add tests. +3. If you've changed APIs, update the documentation. +4. Ensure the test suite passes. +5. Make sure your code lints. +6. If you haven't already, complete the Contributor License Agreement ("CLA"). + +There is a Facebook internal test suite for Faiss, and we need to run +all changes to Faiss through it. + +## Contributor License Agreement ("CLA") + +In order to accept your pull request, we need you to submit a CLA. You only need +to do this once to work on any of Facebook's open source projects. + +Complete your CLA here: + +## Issues + +We use GitHub issues to track public bugs. Please ensure your description is +clear and has sufficient instructions to be able to reproduce the issue. + +Facebook has a [bounty program](https://www.facebook.com/whitehat/) for the safe +disclosure of security bugs. In those cases, please go through the process +outlined on that page and do not file a public issue. + +## Coding Style + +* 4 spaces for indentation in C++ (no tabs) +* 80 character line length (both for C++ and Python) +* C++ language level: C++17 + +## License + +By contributing to Faiss, you agree that your contributions will be licensed +under the LICENSE file in the root directory of this source tree. diff --git a/packages/leann-backend-hnsw/third_party/faiss/Doxyfile b/packages/leann-backend-hnsw/third_party/faiss/Doxyfile new file mode 100644 index 0000000..3a112d0 --- /dev/null +++ b/packages/leann-backend-hnsw/third_party/faiss/Doxyfile @@ -0,0 +1,2282 @@ + + +# Doxyfile 1.8.5 + +# This file describes the settings to be used by the documentation system +# doxygen (www.doxygen.org) for a project. +# +# All text after a double hash (##) is considered a comment and is placed in +# front of the TAG it is preceding. +# +# All text after a single hash (#) is considered a comment and will be ignored. +# The format is: +# TAG = value [value, ...] +# For lists, items can also be appended using: +# TAG += value [value, ...] +# Values that contain spaces should be placed between quotes (\" \"). + +#--------------------------------------------------------------------------- +# Project related configuration options +#--------------------------------------------------------------------------- + +# This tag specifies the encoding used for all characters in the config file +# that follow. The default is UTF-8 which is also the encoding used for all text +# before the first occurrence of this tag. Doxygen uses libiconv (or the iconv +# built into libc) for the transcoding. See http://www.gnu.org/software/libiconv +# for the list of possible encodings. +# The default value is: UTF-8. + +DOXYFILE_ENCODING = UTF-8 + +# The PROJECT_NAME tag is a single word (or a sequence of words surrounded by +# double-quotes, unless you are using Doxywizard) that should identify the +# project for which the documentation is generated. This name is used in the +# title of most generated pages and in a few other places. +# The default value is: My Project. + +PROJECT_NAME = "Faiss" + +# The PROJECT_NUMBER tag can be used to enter a project or revision number. This +# could be handy for archiving the generated documentation or if some version +# control system is used. + +PROJECT_NUMBER = + +# Using the PROJECT_BRIEF tag one can provide an optional one line description +# for a project that appears at the top of each page and should give viewer a +# quick idea about the purpose of the project. Keep the description short. + +PROJECT_BRIEF = + +# With the PROJECT_LOGO tag one can specify an logo or icon that is included in +# the documentation. The maximum height of the logo should not exceed 55 pixels +# and the maximum width should not exceed 200 pixels. Doxygen will copy the logo +# to the output directory. + +PROJECT_LOGO = + +# The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute) path +# into which the generated documentation will be written. If a relative path is +# entered, it will be relative to the location where doxygen was started. If +# left blank the current directory will be used. + +OUTPUT_DIRECTORY = + +# If the CREATE_SUBDIRS tag is set to YES, then doxygen will create 4096 sub- +# directories (in 2 levels) under the output directory of each output format and +# will distribute the generated files over these directories. Enabling this +# option can be useful when feeding doxygen a huge amount of source files, where +# putting all generated files in the same directory would otherwise causes +# performance problems for the file system. +# The default value is: NO. + +CREATE_SUBDIRS = NO + +# The OUTPUT_LANGUAGE tag is used to specify the language in which all +# documentation generated by doxygen is written. Doxygen will use this +# information to generate all constant output in the proper language. +# Possible values are: Afrikaans, Arabic, Brazilian, Catalan, Chinese, Chinese- +# Traditional, Croatian, Czech, Danish, Dutch, English, Esperanto, Farsi, +# Finnish, French, German, Greek, Hungarian, Italian, Japanese, Japanese-en, +# Korean, Korean-en, Latvian, Norwegian, Macedonian, Persian, Polish, +# Portuguese, Romanian, Russian, Serbian, Slovak, Slovene, Spanish, Swedish, +# Turkish, Ukrainian and Vietnamese. +# The default value is: English. + +OUTPUT_LANGUAGE = English + +# If the BRIEF_MEMBER_DESC tag is set to YES doxygen will include brief member +# descriptions after the members that are listed in the file and class +# documentation (similar to Javadoc). Set to NO to disable this. +# The default value is: YES. + +BRIEF_MEMBER_DESC = YES + +# If the REPEAT_BRIEF tag is set to YES doxygen will prepend the brief +# description of a member or function before the detailed description +# +# Note: If both HIDE_UNDOC_MEMBERS and BRIEF_MEMBER_DESC are set to NO, the +# brief descriptions will be completely suppressed. +# The default value is: YES. + +REPEAT_BRIEF = YES + +# This tag implements a quasi-intelligent brief description abbreviator that is +# used to form the text in various listings. Each string in this list, if found +# as the leading text of the brief description, will be stripped from the text +# and the result, after processing the whole list, is used as the annotated +# text. Otherwise, the brief description is used as-is. If left blank, the +# following values are used ($name is automatically replaced with the name of +# the entity):The $name class, The $name widget, The $name file, is, provides, +# specifies, contains, represents, a, an and the. + +ABBREVIATE_BRIEF = + +# If the ALWAYS_DETAILED_SEC and REPEAT_BRIEF tags are both set to YES then +# doxygen will generate a detailed section even if there is only a brief +# description. +# The default value is: NO. + +ALWAYS_DETAILED_SEC = NO + +# If the INLINE_INHERITED_MEMB tag is set to YES, doxygen will show all +# inherited members of a class in the documentation of that class as if those +# members were ordinary class members. Constructors, destructors and assignment +# operators of the base classes will not be shown. +# The default value is: NO. + +INLINE_INHERITED_MEMB = YES + +# If the FULL_PATH_NAMES tag is set to YES doxygen will prepend the full path +# before files name in the file list and in the header files. If set to NO the +# shortest path that makes the file name unique will be used +# The default value is: YES. + +FULL_PATH_NAMES = YES + +# The STRIP_FROM_PATH tag can be used to strip a user-defined part of the path. +# Stripping is only done if one of the specified strings matches the left-hand +# part of the path. The tag can be used to show relative paths in the file list. +# If left blank the directory from which doxygen is run is used as the path to +# strip. +# +# Note that you can specify absolute paths here, but also relative paths, which +# will be relative from the directory where doxygen is started. +# This tag requires that the tag FULL_PATH_NAMES is set to YES. + +STRIP_FROM_PATH = + +# The STRIP_FROM_INC_PATH tag can be used to strip a user-defined part of the +# path mentioned in the documentation of a class, which tells the reader which +# header file to include in order to use a class. If left blank only the name of +# the header file containing the class definition is used. Otherwise one should +# specify the list of include paths that are normally passed to the compiler +# using the -I flag. + +STRIP_FROM_INC_PATH = + +# If the SHORT_NAMES tag is set to YES, doxygen will generate much shorter (but +# less readable) file names. This can be useful is your file systems doesn't +# support long names like on DOS, Mac, or CD-ROM. +# The default value is: NO. + +SHORT_NAMES = NO + +# If the JAVADOC_AUTOBRIEF tag is set to YES then doxygen will interpret the +# first line (until the first dot) of a Javadoc-style comment as the brief +# description. If set to NO, the Javadoc-style will behave just like regular Qt- +# style comments (thus requiring an explicit @brief command for a brief +# description.) +# The default value is: NO. + +JAVADOC_AUTOBRIEF = NO + +# If the QT_AUTOBRIEF tag is set to YES then doxygen will interpret the first +# line (until the first dot) of a Qt-style comment as the brief description. If +# set to NO, the Qt-style will behave just like regular Qt-style comments (thus +# requiring an explicit \brief command for a brief description.) +# The default value is: NO. + +QT_AUTOBRIEF = NO + +# The MULTILINE_CPP_IS_BRIEF tag can be set to YES to make doxygen treat a +# multi-line C++ special comment block (i.e. a block of //! or /// comments) as +# a brief description. This used to be the default behavior. The new default is +# to treat a multi-line C++ comment block as a detailed description. Set this +# tag to YES if you prefer the old behavior instead. +# +# Note that setting this tag to YES also means that rational rose comments are +# not recognized any more. +# The default value is: NO. + +MULTILINE_CPP_IS_BRIEF = NO + +# If the INHERIT_DOCS tag is set to YES then an undocumented member inherits the +# documentation from any documented member that it re-implements. +# The default value is: YES. + +INHERIT_DOCS = YES + +# If the SEPARATE_MEMBER_PAGES tag is set to YES, then doxygen will produce a +# new page for each member. If set to NO, the documentation of a member will be +# part of the file/class/namespace that contains it. +# The default value is: NO. + +SEPARATE_MEMBER_PAGES = NO + +# The TAB_SIZE tag can be used to set the number of spaces in a tab. Doxygen +# uses this value to replace tabs by spaces in code fragments. +# Minimum value: 1, maximum value: 16, default value: 4. + +TAB_SIZE = 4 + +# This tag can be used to specify a number of aliases that act as commands in +# the documentation. An alias has the form: +# name=value +# For example adding +# "sideeffect=@par Side Effects:\n" +# will allow you to put the command \sideeffect (or @sideeffect) in the +# documentation, which will result in a user-defined paragraph with heading +# "Side Effects:". You can put \n's in the value part of an alias to insert +# newlines. + +ALIASES = + +# This tag can be used to specify a number of word-keyword mappings (TCL only). +# A mapping has the form "name=value". For example adding "class=itcl::class" +# will allow you to use the command class in the itcl::class meaning. + +TCL_SUBST = + +# Set the OPTIMIZE_OUTPUT_FOR_C tag to YES if your project consists of C sources +# only. Doxygen will then generate output that is more tailored for C. For +# instance, some of the names that are used will be different. The list of all +# members will be omitted, etc. +# The default value is: NO. + +OPTIMIZE_OUTPUT_FOR_C = NO + +# Set the OPTIMIZE_OUTPUT_JAVA tag to YES if your project consists of Java or +# Python sources only. Doxygen will then generate output that is more tailored +# for that language. For instance, namespaces will be presented as packages, +# qualified scopes will look different, etc. +# The default value is: NO. + +OPTIMIZE_OUTPUT_JAVA = NO + +# Set the OPTIMIZE_FOR_FORTRAN tag to YES if your project consists of Fortran +# sources. Doxygen will then generate output that is tailored for Fortran. +# The default value is: NO. + +OPTIMIZE_FOR_FORTRAN = NO + +# Set the OPTIMIZE_OUTPUT_VHDL tag to YES if your project consists of VHDL +# sources. Doxygen will then generate output that is tailored for VHDL. +# The default value is: NO. + +OPTIMIZE_OUTPUT_VHDL = NO + +# Doxygen selects the parser to use depending on the extension of the files it +# parses. With this tag you can assign which parser to use for a given +# extension. Doxygen has a built-in mapping, but you can override or extend it +# using this tag. The format is ext=language, where ext is a file extension, and +# language is one of the parsers supported by doxygen: IDL, Java, Javascript, +# C#, C, C++, D, PHP, Objective-C, Python, Fortran, VHDL. For instance to make +# doxygen treat .inc files as Fortran files (default is PHP), and .f files as C +# (default is Fortran), use: inc=Fortran f=C. +# +# Note For files without extension you can use no_extension as a placeholder. +# +# Note that for custom extensions you also need to set FILE_PATTERNS otherwise +# the files are not read by doxygen. + +EXTENSION_MAPPING = + +# If the MARKDOWN_SUPPORT tag is enabled then doxygen pre-processes all comments +# according to the Markdown format, which allows for more readable +# documentation. See http://daringfireball.net/projects/markdown/ for details. +# The output of markdown processing is further processed by doxygen, so you can +# mix doxygen, HTML, and XML commands with Markdown formatting. Disable only in +# case of backward compatibilities issues. +# The default value is: YES. + +MARKDOWN_SUPPORT = YES + +# When enabled doxygen tries to link words that correspond to documented +# classes, or namespaces to their corresponding documentation. Such a link can +# be prevented in individual cases by by putting a % sign in front of the word +# or globally by setting AUTOLINK_SUPPORT to NO. +# The default value is: YES. + +AUTOLINK_SUPPORT = YES + +# If you use STL classes (i.e. std::string, std::vector, etc.) but do not want +# to include (a tag file for) the STL sources as input, then you should set this +# tag to YES in order to let doxygen match functions declarations and +# definitions whose arguments contain STL classes (e.g. func(std::string); +# versus func(std::string) {}). This also make the inheritance and collaboration +# diagrams that involve STL classes more complete and accurate. +# The default value is: NO. + +BUILTIN_STL_SUPPORT = NO + +# If you use Microsoft's C++/CLI language, you should set this option to YES to +# enable parsing support. +# The default value is: NO. + +CPP_CLI_SUPPORT = NO + +# Set the SIP_SUPPORT tag to YES if your project consists of sip (see: +# http://www.riverbankcomputing.co.uk/software/sip/intro) sources only. Doxygen +# will parse them like normal C++ but will assume all classes use public instead +# of private inheritance when no explicit protection keyword is present. +# The default value is: NO. + +SIP_SUPPORT = NO + +# For Microsoft's IDL there are propget and propput attributes to indicate +# getter and setter methods for a property. Setting this option to YES will make +# doxygen to replace the get and set methods by a property in the documentation. +# This will only work if the methods are indeed getting or setting a simple +# type. If this is not the case, or you want to show the methods anyway, you +# should set this option to NO. +# The default value is: YES. + +IDL_PROPERTY_SUPPORT = YES + +# If member grouping is used in the documentation and the DISTRIBUTE_GROUP_DOC +# tag is set to YES, then doxygen will reuse the documentation of the first +# member in the group (if any) for the other members of the group. By default +# all members of a group must be documented explicitly. +# The default value is: NO. + +DISTRIBUTE_GROUP_DOC = NO + +# Set the SUBGROUPING tag to YES to allow class member groups of the same type +# (for instance a group of public functions) to be put as a subgroup of that +# type (e.g. under the Public Functions section). Set it to NO to prevent +# subgrouping. Alternatively, this can be done per class using the +# \nosubgrouping command. +# The default value is: YES. + +SUBGROUPING = YES + +# When the INLINE_GROUPED_CLASSES tag is set to YES, classes, structs and unions +# are shown inside the group in which they are included (e.g. using \ingroup) +# instead of on a separate page (for HTML and Man pages) or section (for LaTeX +# and RTF). +# +# Note that this feature does not work in combination with +# SEPARATE_MEMBER_PAGES. +# The default value is: NO. + +INLINE_GROUPED_CLASSES = NO + +# When the INLINE_SIMPLE_STRUCTS tag is set to YES, structs, classes, and unions +# with only public data fields or simple typedef fields will be shown inline in +# the documentation of the scope in which they are defined (i.e. file, +# namespace, or group documentation), provided this scope is documented. If set +# to NO, structs, classes, and unions are shown on a separate page (for HTML and +# Man pages) or section (for LaTeX and RTF). +# The default value is: NO. + +INLINE_SIMPLE_STRUCTS = NO + +# When TYPEDEF_HIDES_STRUCT tag is enabled, a typedef of a struct, union, or +# enum is documented as struct, union, or enum with the name of the typedef. So +# typedef struct TypeS {} TypeT, will appear in the documentation as a struct +# with name TypeT. When disabled the typedef will appear as a member of a file, +# namespace, or class. And the struct will be named TypeS. This can typically be +# useful for C code in case the coding convention dictates that all compound +# types are typedef'ed and only the typedef is referenced, never the tag name. +# The default value is: NO. + +TYPEDEF_HIDES_STRUCT = NO + +# The size of the symbol lookup cache can be set using LOOKUP_CACHE_SIZE. This +# cache is used to resolve symbols given their name and scope. Since this can be +# an expensive process and often the same symbol appears multiple times in the +# code, doxygen keeps a cache of pre-resolved symbols. If the cache is too small +# doxygen will become slower. If the cache is too large, memory is wasted. The +# cache size is given by this formula: 2^(16+LOOKUP_CACHE_SIZE). The valid range +# is 0..9, the default is 0, corresponding to a cache size of 2^16=65536 +# symbols. At the end of a run doxygen will report the cache usage and suggest +# the optimal cache size from a speed point of view. +# Minimum value: 0, maximum value: 9, default value: 0. + +LOOKUP_CACHE_SIZE = 0 + +#--------------------------------------------------------------------------- +# Build related configuration options +#--------------------------------------------------------------------------- + +# If the EXTRACT_ALL tag is set to YES doxygen will assume all entities in +# documentation are documented, even if no documentation was available. Private +# class members and static file members will be hidden unless the +# EXTRACT_PRIVATE respectively EXTRACT_STATIC tags are set to YES. +# Note: This will also disable the warnings about undocumented members that are +# normally produced when WARNINGS is set to YES. +# The default value is: NO. + +EXTRACT_ALL = NO + +# If the EXTRACT_PRIVATE tag is set to YES all private members of a class will +# be included in the documentation. +# The default value is: NO. + +EXTRACT_PRIVATE = NO + +# If the EXTRACT_PACKAGE tag is set to YES all members with package or internal +# scope will be included in the documentation. +# The default value is: NO. + +EXTRACT_PACKAGE = NO + +# If the EXTRACT_STATIC tag is set to YES all static members of a file will be +# included in the documentation. +# The default value is: NO. + +EXTRACT_STATIC = NO + +# If the EXTRACT_LOCAL_CLASSES tag is set to YES classes (and structs) defined +# locally in source files will be included in the documentation. If set to NO +# only classes defined in header files are included. Does not have any effect +# for Java sources. +# The default value is: YES. + +EXTRACT_LOCAL_CLASSES = NO + +# This flag is only useful for Objective-C code. When set to YES local methods, +# which are defined in the implementation section but not in the interface are +# included in the documentation. If set to NO only methods in the interface are +# included. +# The default value is: NO. + +EXTRACT_LOCAL_METHODS = NO + +# If this flag is set to YES, the members of anonymous namespaces will be +# extracted and appear in the documentation as a namespace called +# 'anonymous_namespace{file}', where file will be replaced with the base name of +# the file that contains the anonymous namespace. By default anonymous namespace +# are hidden. +# The default value is: NO. + +EXTRACT_ANON_NSPACES = NO + +# If the HIDE_UNDOC_MEMBERS tag is set to YES, doxygen will hide all +# undocumented members inside documented classes or files. If set to NO these +# members will be included in the various overviews, but no documentation +# section is generated. This option has no effect if EXTRACT_ALL is enabled. +# The default value is: NO. + +HIDE_UNDOC_MEMBERS = NO + +# If the HIDE_UNDOC_CLASSES tag is set to YES, doxygen will hide all +# undocumented classes that are normally visible in the class hierarchy. If set +# to NO these classes will be included in the various overviews. This option has +# no effect if EXTRACT_ALL is enabled. +# The default value is: NO. + +HIDE_UNDOC_CLASSES = NO + +# If the HIDE_FRIEND_COMPOUNDS tag is set to YES, doxygen will hide all friend +# (class|struct|union) declarations. If set to NO these declarations will be +# included in the documentation. +# The default value is: NO. + +HIDE_FRIEND_COMPOUNDS = NO + +# If the HIDE_IN_BODY_DOCS tag is set to YES, doxygen will hide any +# documentation blocks found inside the body of a function. If set to NO these +# blocks will be appended to the function's detailed documentation block. +# The default value is: NO. + +HIDE_IN_BODY_DOCS = NO + +# The INTERNAL_DOCS tag determines if documentation that is typed after a +# \internal command is included. If the tag is set to NO then the documentation +# will be excluded. Set it to YES to include the internal documentation. +# The default value is: NO. + +INTERNAL_DOCS = NO + +# If the CASE_SENSE_NAMES tag is set to NO then doxygen will only generate file +# names in lower-case letters. If set to YES upper-case letters are also +# allowed. This is useful if you have classes or files whose names only differ +# in case and if your file system supports case sensitive file names. Windows +# and Mac users are advised to set this option to NO. +# The default value is: system dependent. + +CASE_SENSE_NAMES = YES + +# If the HIDE_SCOPE_NAMES tag is set to NO then doxygen will show members with +# their full class and namespace scopes in the documentation. If set to YES the +# scope will be hidden. +# The default value is: NO. + +HIDE_SCOPE_NAMES = NO + +# If the SHOW_INCLUDE_FILES tag is set to YES then doxygen will put a list of +# the files that are included by a file in the documentation of that file. +# The default value is: YES. + +SHOW_INCLUDE_FILES = YES + +# If the FORCE_LOCAL_INCLUDES tag is set to YES then doxygen will list include +# files with double quotes in the documentation rather than with sharp brackets. +# The default value is: NO. + +FORCE_LOCAL_INCLUDES = NO + +# If the INLINE_INFO tag is set to YES then a tag [inline] is inserted in the +# documentation for inline members. +# The default value is: YES. + +INLINE_INFO = YES + +# If the SORT_MEMBER_DOCS tag is set to YES then doxygen will sort the +# (detailed) documentation of file and class members alphabetically by member +# name. If set to NO the members will appear in declaration order. +# The default value is: YES. + +SORT_MEMBER_DOCS = YES + +# If the SORT_BRIEF_DOCS tag is set to YES then doxygen will sort the brief +# descriptions of file, namespace and class members alphabetically by member +# name. If set to NO the members will appear in declaration order. +# The default value is: NO. + +SORT_BRIEF_DOCS = NO + +# If the SORT_MEMBERS_CTORS_1ST tag is set to YES then doxygen will sort the +# (brief and detailed) documentation of class members so that constructors and +# destructors are listed first. If set to NO the constructors will appear in the +# respective orders defined by SORT_BRIEF_DOCS and SORT_MEMBER_DOCS. +# Note: If SORT_BRIEF_DOCS is set to NO this option is ignored for sorting brief +# member documentation. +# Note: If SORT_MEMBER_DOCS is set to NO this option is ignored for sorting +# detailed member documentation. +# The default value is: NO. + +SORT_MEMBERS_CTORS_1ST = NO + +# If the SORT_GROUP_NAMES tag is set to YES then doxygen will sort the hierarchy +# of group names into alphabetical order. If set to NO the group names will +# appear in their defined order. +# The default value is: NO. + +SORT_GROUP_NAMES = NO + +# If the SORT_BY_SCOPE_NAME tag is set to YES, the class list will be sorted by +# fully-qualified names, including namespaces. If set to NO, the class list will +# be sorted only by class name, not including the namespace part. +# Note: This option is not very useful if HIDE_SCOPE_NAMES is set to YES. +# Note: This option applies only to the class list, not to the alphabetical +# list. +# The default value is: NO. + +SORT_BY_SCOPE_NAME = NO + +# If the STRICT_PROTO_MATCHING option is enabled and doxygen fails to do proper +# type resolution of all parameters of a function it will reject a match between +# the prototype and the implementation of a member function even if there is +# only one candidate or it is obvious which candidate to choose by doing a +# simple string match. By disabling STRICT_PROTO_MATCHING doxygen will still +# accept a match between prototype and implementation in such cases. +# The default value is: NO. + +STRICT_PROTO_MATCHING = NO + +# The GENERATE_TODOLIST tag can be used to enable ( YES) or disable ( NO) the +# todo list. This list is created by putting \todo commands in the +# documentation. +# The default value is: YES. + +GENERATE_TODOLIST = YES + +# The GENERATE_TESTLIST tag can be used to enable ( YES) or disable ( NO) the +# test list. This list is created by putting \test commands in the +# documentation. +# The default value is: YES. + +GENERATE_TESTLIST = YES + +# The GENERATE_BUGLIST tag can be used to enable ( YES) or disable ( NO) the bug +# list. This list is created by putting \bug commands in the documentation. +# The default value is: YES. + +GENERATE_BUGLIST = YES + +# The GENERATE_DEPRECATEDLIST tag can be used to enable ( YES) or disable ( NO) +# the deprecated list. This list is created by putting \deprecated commands in +# the documentation. +# The default value is: YES. + +GENERATE_DEPRECATEDLIST= YES + +# The ENABLED_SECTIONS tag can be used to enable conditional documentation +# sections, marked by \if ... \endif and \cond +# ... \endcond blocks. + +ENABLED_SECTIONS = + +# The MAX_INITIALIZER_LINES tag determines the maximum number of lines that the +# initial value of a variable or macro / define can have for it to appear in the +# documentation. If the initializer consists of more lines than specified here +# it will be hidden. Use a value of 0 to hide initializers completely. The +# appearance of the value of individual variables and macros / defines can be +# controlled using \showinitializer or \hideinitializer command in the +# documentation regardless of this setting. +# Minimum value: 0, maximum value: 10000, default value: 30. + +MAX_INITIALIZER_LINES = 30 + +# Set the SHOW_USED_FILES tag to NO to disable the list of files generated at +# the bottom of the documentation of classes and structs. If set to YES the list +# will mention the files that were used to generate the documentation. +# The default value is: YES. + +SHOW_USED_FILES = YES + +# Set the SHOW_FILES tag to NO to disable the generation of the Files page. This +# will remove the Files entry from the Quick Index and from the Folder Tree View +# (if specified). +# The default value is: YES. + +SHOW_FILES = YES + +# Set the SHOW_NAMESPACES tag to NO to disable the generation of the Namespaces +# page. This will remove the Namespaces entry from the Quick Index and from the +# Folder Tree View (if specified). +# The default value is: YES. + +SHOW_NAMESPACES = YES + +# The FILE_VERSION_FILTER tag can be used to specify a program or script that +# doxygen should invoke to get the current version for each file (typically from +# the version control system). Doxygen will invoke the program by executing (via +# popen()) the command command input-file, where command is the value of the +# FILE_VERSION_FILTER tag, and input-file is the name of an input file provided +# by doxygen. Whatever the program writes to standard output is used as the file +# version. For an example see the documentation. + +FILE_VERSION_FILTER = + +# The LAYOUT_FILE tag can be used to specify a layout file which will be parsed +# by doxygen. The layout file controls the global structure of the generated +# output files in an output format independent way. To create the layout file +# that represents doxygen's defaults, run doxygen with the -l option. You can +# optionally specify a file name after the option, if omitted DoxygenLayout.xml +# will be used as the name of the layout file. +# +# Note that if you run doxygen from a directory containing a file called +# DoxygenLayout.xml, doxygen will parse it automatically even if the LAYOUT_FILE +# tag is left empty. + +LAYOUT_FILE = + +# The CITE_BIB_FILES tag can be used to specify one or more bib files containing +# the reference definitions. This must be a list of .bib files. The .bib +# extension is automatically appended if omitted. This requires the bibtex tool +# to be installed. See also http://en.wikipedia.org/wiki/BibTeX for more info. +# For LaTeX the style of the bibliography can be controlled using +# LATEX_BIB_STYLE. To use this feature you need bibtex and perl available in the +# search path. Do not use file names with spaces, bibtex cannot handle them. See +# also \cite for info how to create references. + +CITE_BIB_FILES = + +#--------------------------------------------------------------------------- +# Configuration options related to warning and progress messages +#--------------------------------------------------------------------------- + +# The QUIET tag can be used to turn on/off the messages that are generated to +# standard output by doxygen. If QUIET is set to YES this implies that the +# messages are off. +# The default value is: NO. + +QUIET = NO + +# The WARNINGS tag can be used to turn on/off the warning messages that are +# generated to standard error ( stderr) by doxygen. If WARNINGS is set to YES +# this implies that the warnings are on. +# +# Tip: Turn warnings on while writing the documentation. +# The default value is: YES. + +WARNINGS = YES + +# If the WARN_IF_UNDOCUMENTED tag is set to YES, then doxygen will generate +# warnings for undocumented members. If EXTRACT_ALL is set to YES then this flag +# will automatically be disabled. +# The default value is: YES. + +WARN_IF_UNDOCUMENTED = YES + +# If the WARN_IF_DOC_ERROR tag is set to YES, doxygen will generate warnings for +# potential errors in the documentation, such as not documenting some parameters +# in a documented function, or documenting parameters that don't exist or using +# markup commands wrongly. +# The default value is: YES. + +WARN_IF_DOC_ERROR = YES + +# This WARN_NO_PARAMDOC option can be enabled to get warnings for functions that +# are documented, but have no documentation for their parameters or return +# value. If set to NO doxygen will only warn about wrong or incomplete parameter +# documentation, but not about the absence of documentation. +# The default value is: NO. + +WARN_NO_PARAMDOC = NO + +# The WARN_FORMAT tag determines the format of the warning messages that doxygen +# can produce. The string should contain the $file, $line, and $text tags, which +# will be replaced by the file and line number from which the warning originated +# and the warning text. Optionally the format may contain $version, which will +# be replaced by the version of the file (if it could be obtained via +# FILE_VERSION_FILTER) +# The default value is: $file:$line: $text. + +WARN_FORMAT = "$file:$line: $text" + +# The WARN_LOGFILE tag can be used to specify a file to which warning and error +# messages should be written. If left blank the output is written to standard +# error (stderr). + +WARN_LOGFILE = + +#--------------------------------------------------------------------------- +# Configuration options related to the input files +#--------------------------------------------------------------------------- + +# The INPUT tag is used to specify the files and/or directories that contain +# documented source files. You may enter file names like myfile.cpp or +# directories like /usr/src/myproject. Separate the files or directories with +# spaces. +# Note: If this tag is empty the current directory is searched. + +INPUT = ./faiss + +# This tag can be used to specify the character encoding of the source files +# that doxygen parses. Internally doxygen uses the UTF-8 encoding. Doxygen uses +# libiconv (or the iconv built into libc) for the transcoding. See the libiconv +# documentation (see: http://www.gnu.org/software/libiconv) for the list of +# possible encodings. +# The default value is: UTF-8. + +INPUT_ENCODING = UTF-8 + +# If the value of the INPUT tag contains directories, you can use the +# FILE_PATTERNS tag to specify one or more wildcard patterns (like *.cpp and +# *.h) to filter out the source-files in the directories. If left blank the +# following patterns are tested:*.c, *.cc, *.cxx, *.cpp, *.c++, *.java, *.ii, +# *.ixx, *.ipp, *.i++, *.inl, *.idl, *.ddl, *.odl, *.h, *.hh, *.hxx, *.hpp, +# *.h++, *.cs, *.d, *.php, *.php4, *.php5, *.phtml, *.inc, *.m, *.markdown, +# *.md, *.mm, *.dox, *.py, *.f90, *.f, *.for, *.tcl, *.vhd, *.vhdl, *.ucf, +# *.qsf, *.as and *.js. + +FILE_PATTERNS = *.h *.cuh + +# The RECURSIVE tag can be used to specify whether or not subdirectories should +# be searched for input files as well. +# The default value is: NO. + +RECURSIVE = YES + +# The EXCLUDE tag can be used to specify files and/or directories that should be +# excluded from the INPUT source files. This way you can easily exclude a +# subdirectory from a directory tree whose root is specified with the INPUT tag. +# +# Note that relative paths are relative to the directory from which doxygen is +# run. + +EXCLUDE = gpu/test + +# The EXCLUDE_SYMLINKS tag can be used to select whether or not files or +# directories that are symbolic links (a Unix file system feature) are excluded +# from the input. +# The default value is: NO. + +EXCLUDE_SYMLINKS = NO + +# If the value of the INPUT tag contains directories, you can use the +# EXCLUDE_PATTERNS tag to specify one or more wildcard patterns to exclude +# certain files from those directories. +# +# Note that the wildcards are matched against the file with absolute path, so to +# exclude all test directories for example use the pattern */test/* + +EXCLUDE_PATTERNS = + +# The EXCLUDE_SYMBOLS tag can be used to specify one or more symbol names +# (namespaces, classes, functions, etc.) that should be excluded from the +# output. The symbol name can be a fully qualified name, a word, or if the +# wildcard * is used, a substring. Examples: ANamespace, AClass, +# AClass::ANamespace, ANamespace::*Test +# +# Note that the wildcards are matched against the file with absolute path, so to +# exclude all test directories use the pattern */test/* + +EXCLUDE_SYMBOLS = + +# The EXAMPLE_PATH tag can be used to specify one or more files or directories +# that contain example code fragments that are included (see the \include +# command). + +EXAMPLE_PATH = + +# If the value of the EXAMPLE_PATH tag contains directories, you can use the +# EXAMPLE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp and +# *.h) to filter out the source-files in the directories. If left blank all +# files are included. + +EXAMPLE_PATTERNS = + +# If the EXAMPLE_RECURSIVE tag is set to YES then subdirectories will be +# searched for input files to be used with the \include or \dontinclude commands +# irrespective of the value of the RECURSIVE tag. +# The default value is: NO. + +EXAMPLE_RECURSIVE = NO + +# The IMAGE_PATH tag can be used to specify one or more files or directories +# that contain images that are to be included in the documentation (see the +# \image command). + +IMAGE_PATH = + +# The INPUT_FILTER tag can be used to specify a program that doxygen should +# invoke to filter for each input file. Doxygen will invoke the filter program +# by executing (via popen()) the command: +# +# +# +# where is the value of the INPUT_FILTER tag, and is the +# name of an input file. Doxygen will then use the output that the filter +# program writes to standard output. If FILTER_PATTERNS is specified, this tag +# will be ignored. +# +# Note that the filter must not add or remove lines; it is applied before the +# code is scanned, but not when the output code is generated. If lines are added +# or removed, the anchors will not be placed correctly. + +INPUT_FILTER = + +# The FILTER_PATTERNS tag can be used to specify filters on a per file pattern +# basis. Doxygen will compare the file name with each pattern and apply the +# filter if there is a match. The filters are a list of the form: pattern=filter +# (like *.cpp=my_cpp_filter). See INPUT_FILTER for further information on how +# filters are used. If the FILTER_PATTERNS tag is empty or if none of the +# patterns match the file name, INPUT_FILTER is applied. + +FILTER_PATTERNS = + +# If the FILTER_SOURCE_FILES tag is set to YES, the input filter (if set using +# INPUT_FILTER ) will also be used to filter the input files that are used for +# producing the source files to browse (i.e. when SOURCE_BROWSER is set to YES). +# The default value is: NO. + +FILTER_SOURCE_FILES = NO + +# The FILTER_SOURCE_PATTERNS tag can be used to specify source filters per file +# pattern. A pattern will override the setting for FILTER_PATTERN (if any) and +# it is also possible to disable source filtering for a specific pattern using +# *.ext= (so without naming a filter). +# This tag requires that the tag FILTER_SOURCE_FILES is set to YES. + +FILTER_SOURCE_PATTERNS = + +# If the USE_MDFILE_AS_MAINPAGE tag refers to the name of a markdown file that +# is part of the input, its contents will be placed on the main page +# (index.html). This can be useful if you have a project on for instance GitHub +# and want to reuse the introduction page also for the doxygen output. + +USE_MDFILE_AS_MAINPAGE = + +#--------------------------------------------------------------------------- +# Configuration options related to source browsing +#--------------------------------------------------------------------------- + +# If the SOURCE_BROWSER tag is set to YES then a list of source files will be +# generated. Documented entities will be cross-referenced with these sources. +# +# Note: To get rid of all source code in the generated output, make sure that +# also VERBATIM_HEADERS is set to NO. +# The default value is: NO. + +SOURCE_BROWSER = YES + +# Setting the INLINE_SOURCES tag to YES will include the body of functions, +# classes and enums directly into the documentation. +# The default value is: NO. + +INLINE_SOURCES = NO + +# Setting the STRIP_CODE_COMMENTS tag to YES will instruct doxygen to hide any +# special comment blocks from generated source code fragments. Normal C, C++ and +# Fortran comments will always remain visible. +# The default value is: YES. + +STRIP_CODE_COMMENTS = NO + +# If the REFERENCED_BY_RELATION tag is set to YES then for each documented +# function all documented functions referencing it will be listed. +# The default value is: NO. + +REFERENCED_BY_RELATION = NO + +# If the REFERENCES_RELATION tag is set to YES then for each documented function +# all documented entities called/used by that function will be listed. +# The default value is: NO. + +REFERENCES_RELATION = NO + +# If the REFERENCES_LINK_SOURCE tag is set to YES and SOURCE_BROWSER tag is set +# to YES, then the hyperlinks from functions in REFERENCES_RELATION and +# REFERENCED_BY_RELATION lists will link to the source code. Otherwise they will +# link to the documentation. +# The default value is: YES. + +REFERENCES_LINK_SOURCE = YES + +# If SOURCE_TOOLTIPS is enabled (the default) then hovering a hyperlink in the +# source code will show a tooltip with additional information such as prototype, +# brief description and links to the definition and documentation. Since this +# will make the HTML file larger and loading of large files a bit slower, you +# can opt to disable this feature. +# The default value is: YES. +# This tag requires that the tag SOURCE_BROWSER is set to YES. + +SOURCE_TOOLTIPS = YES + +# If the USE_HTAGS tag is set to YES then the references to source code will +# point to the HTML generated by the htags(1) tool instead of doxygen built-in +# source browser. The htags tool is part of GNU's global source tagging system +# (see http://www.gnu.org/software/global/global.html). You will need version +# 4.8.6 or higher. +# +# To use it do the following: +# - Install the latest version of global +# - Enable SOURCE_BROWSER and USE_HTAGS in the config file +# - Make sure the INPUT points to the root of the source tree +# - Run doxygen as normal +# +# Doxygen will invoke htags (and that will in turn invoke gtags), so these +# tools must be available from the command line (i.e. in the search path). +# +# The result: instead of the source browser generated by doxygen, the links to +# source code will now point to the output of htags. +# The default value is: NO. +# This tag requires that the tag SOURCE_BROWSER is set to YES. + +USE_HTAGS = NO + +# If the VERBATIM_HEADERS tag is set the YES then doxygen will generate a +# verbatim copy of the header file for each class for which an include is +# specified. Set to NO to disable this. +# See also: Section \class. +# The default value is: YES. + +VERBATIM_HEADERS = YES + +#--------------------------------------------------------------------------- +# Configuration options related to the alphabetical class index +#--------------------------------------------------------------------------- + +# If the ALPHABETICAL_INDEX tag is set to YES, an alphabetical index of all +# compounds will be generated. Enable this if the project contains a lot of +# classes, structs, unions or interfaces. +# The default value is: YES. + +ALPHABETICAL_INDEX = YES + +# The COLS_IN_ALPHA_INDEX tag can be used to specify the number of columns in +# which the alphabetical index list will be split. +# Minimum value: 1, maximum value: 20, default value: 5. +# This tag requires that the tag ALPHABETICAL_INDEX is set to YES. + +COLS_IN_ALPHA_INDEX = 5 + +# In case all classes in a project start with a common prefix, all classes will +# be put under the same header in the alphabetical index. The IGNORE_PREFIX tag +# can be used to specify a prefix (or a list of prefixes) that should be ignored +# while generating the index headers. +# This tag requires that the tag ALPHABETICAL_INDEX is set to YES. + +IGNORE_PREFIX = + +#--------------------------------------------------------------------------- +# Configuration options related to the HTML output +#--------------------------------------------------------------------------- + +# If the GENERATE_HTML tag is set to YES doxygen will generate HTML output +# The default value is: YES. + +GENERATE_HTML = NO + +# The HTML_OUTPUT tag is used to specify where the HTML docs will be put. If a +# relative path is entered the value of OUTPUT_DIRECTORY will be put in front of +# it. +# The default directory is: html. +# This tag requires that the tag GENERATE_HTML is set to YES. + +HTML_OUTPUT = html + +# The HTML_FILE_EXTENSION tag can be used to specify the file extension for each +# generated HTML page (for example: .htm, .php, .asp). +# The default value is: .html. +# This tag requires that the tag GENERATE_HTML is set to YES. + +HTML_FILE_EXTENSION = .html + +# The HTML_HEADER tag can be used to specify a user-defined HTML header file for +# each generated HTML page. If the tag is left blank doxygen will generate a +# standard header. +# +# To get valid HTML the header file that includes any scripts and style sheets +# that doxygen needs, which is dependent on the configuration options used (e.g. +# the setting GENERATE_TREEVIEW). It is highly recommended to start with a +# default header using +# doxygen -w html new_header.html new_footer.html new_stylesheet.css +# YourConfigFile +# and then modify the file new_header.html. See also section "Doxygen usage" +# for information on how to generate the default header that doxygen normally +# uses. +# Note: The header is subject to change so you typically have to regenerate the +# default header when upgrading to a newer version of doxygen. For a description +# of the possible markers and block names see the documentation. +# This tag requires that the tag GENERATE_HTML is set to YES. + +HTML_HEADER = + +# The HTML_FOOTER tag can be used to specify a user-defined HTML footer for each +# generated HTML page. If the tag is left blank doxygen will generate a standard +# footer. See HTML_HEADER for more information on how to generate a default +# footer and what special commands can be used inside the footer. See also +# section "Doxygen usage" for information on how to generate the default footer +# that doxygen normally uses. +# This tag requires that the tag GENERATE_HTML is set to YES. + +HTML_FOOTER = + +# The HTML_STYLESHEET tag can be used to specify a user-defined cascading style +# sheet that is used by each HTML page. It can be used to fine-tune the look of +# the HTML output. If left blank doxygen will generate a default style sheet. +# See also section "Doxygen usage" for information on how to generate the style +# sheet that doxygen normally uses. +# Note: It is recommended to use HTML_EXTRA_STYLESHEET instead of this tag, as +# it is more robust and this tag (HTML_STYLESHEET) will in the future become +# obsolete. +# This tag requires that the tag GENERATE_HTML is set to YES. + +HTML_STYLESHEET = + +# The HTML_EXTRA_STYLESHEET tag can be used to specify an additional user- +# defined cascading style sheet that is included after the standard style sheets +# created by doxygen. Using this option one can overrule certain style aspects. +# This is preferred over using HTML_STYLESHEET since it does not replace the +# standard style sheet and is therefor more robust against future updates. +# Doxygen will copy the style sheet file to the output directory. For an example +# see the documentation. +# This tag requires that the tag GENERATE_HTML is set to YES. + +HTML_EXTRA_STYLESHEET = + +# The HTML_EXTRA_FILES tag can be used to specify one or more extra images or +# other source files which should be copied to the HTML output directory. Note +# that these files will be copied to the base HTML output directory. Use the +# $relpath^ marker in the HTML_HEADER and/or HTML_FOOTER files to load these +# files. In the HTML_STYLESHEET file, use the file name only. Also note that the +# files will be copied as-is; there are no commands or markers available. +# This tag requires that the tag GENERATE_HTML is set to YES. + +HTML_EXTRA_FILES = + +# The HTML_COLORSTYLE_HUE tag controls the color of the HTML output. Doxygen +# will adjust the colors in the stylesheet and background images according to +# this color. Hue is specified as an angle on a colorwheel, see +# http://en.wikipedia.org/wiki/Hue for more information. For instance the value +# 0 represents red, 60 is yellow, 120 is green, 180 is cyan, 240 is blue, 300 +# purple, and 360 is red again. +# Minimum value: 0, maximum value: 359, default value: 220. +# This tag requires that the tag GENERATE_HTML is set to YES. + +HTML_COLORSTYLE_HUE = 220 + +# The HTML_COLORSTYLE_SAT tag controls the purity (or saturation) of the colors +# in the HTML output. For a value of 0 the output will use grayscales only. A +# value of 255 will produce the most vivid colors. +# Minimum value: 0, maximum value: 255, default value: 100. +# This tag requires that the tag GENERATE_HTML is set to YES. + +HTML_COLORSTYLE_SAT = 100 + +# The HTML_COLORSTYLE_GAMMA tag controls the gamma correction applied to the +# luminance component of the colors in the HTML output. Values below 100 +# gradually make the output lighter, whereas values above 100 make the output +# darker. The value divided by 100 is the actual gamma applied, so 80 represents +# a gamma of 0.8, The value 220 represents a gamma of 2.2, and 100 does not +# change the gamma. +# Minimum value: 40, maximum value: 240, default value: 80. +# This tag requires that the tag GENERATE_HTML is set to YES. + +HTML_COLORSTYLE_GAMMA = 80 + +# If the HTML_TIMESTAMP tag is set to YES then the footer of each generated HTML +# page will contain the date and time when the page was generated. Setting this +# to NO can help when comparing the output of multiple runs. +# The default value is: YES. +# This tag requires that the tag GENERATE_HTML is set to YES. + +HTML_TIMESTAMP = NO + +# If the HTML_DYNAMIC_SECTIONS tag is set to YES then the generated HTML +# documentation will contain sections that can be hidden and shown after the +# page has loaded. +# The default value is: NO. +# This tag requires that the tag GENERATE_HTML is set to YES. + +HTML_DYNAMIC_SECTIONS = NO + +# With HTML_INDEX_NUM_ENTRIES one can control the preferred number of entries +# shown in the various tree structured indices initially; the user can expand +# and collapse entries dynamically later on. Doxygen will expand the tree to +# such a level that at most the specified number of entries are visible (unless +# a fully collapsed tree already exceeds this amount). So setting the number of +# entries 1 will produce a full collapsed tree by default. 0 is a special value +# representing an infinite number of entries and will result in a full expanded +# tree by default. +# Minimum value: 0, maximum value: 9999, default value: 100. +# This tag requires that the tag GENERATE_HTML is set to YES. + +HTML_INDEX_NUM_ENTRIES = 100 + +# If the GENERATE_DOCSET tag is set to YES, additional index files will be +# generated that can be used as input for Apple's Xcode 3 integrated development +# environment (see: http://developer.apple.com/tools/xcode/), introduced with +# OSX 10.5 (Leopard). To create a documentation set, doxygen will generate a +# Makefile in the HTML output directory. Running make will produce the docset in +# that directory and running make install will install the docset in +# ~/Library/Developer/Shared/Documentation/DocSets so that Xcode will find it at +# startup. See http://developer.apple.com/tools/creatingdocsetswithdoxygen.html +# for more information. +# The default value is: NO. +# This tag requires that the tag GENERATE_HTML is set to YES. + +GENERATE_DOCSET = NO + +# This tag determines the name of the docset feed. A documentation feed provides +# an umbrella under which multiple documentation sets from a single provider +# (such as a company or product suite) can be grouped. +# The default value is: Doxygen generated docs. +# This tag requires that the tag GENERATE_DOCSET is set to YES. + +DOCSET_FEEDNAME = "Doxygen generated docs" + +# This tag specifies a string that should uniquely identify the documentation +# set bundle. This should be a reverse domain-name style string, e.g. +# com.mycompany.MyDocSet. Doxygen will append .docset to the name. +# The default value is: org.doxygen.Project. +# This tag requires that the tag GENERATE_DOCSET is set to YES. + +DOCSET_BUNDLE_ID = org.doxygen.Project + +# The DOCSET_PUBLISHER_ID tag specifies a string that should uniquely identify +# the documentation publisher. This should be a reverse domain-name style +# string, e.g. com.mycompany.MyDocSet.documentation. +# The default value is: org.doxygen.Publisher. +# This tag requires that the tag GENERATE_DOCSET is set to YES. + +DOCSET_PUBLISHER_ID = org.doxygen.Publisher + +# The DOCSET_PUBLISHER_NAME tag identifies the documentation publisher. +# The default value is: Publisher. +# This tag requires that the tag GENERATE_DOCSET is set to YES. + +DOCSET_PUBLISHER_NAME = Publisher + +# If the GENERATE_HTMLHELP tag is set to YES then doxygen generates three +# additional HTML index files: index.hhp, index.hhc, and index.hhk. The +# index.hhp is a project file that can be read by Microsoft's HTML Help Workshop +# (see: http://www.microsoft.com/en-us/download/details.aspx?id=21138) on +# Windows. +# +# The HTML Help Workshop contains a compiler that can convert all HTML output +# generated by doxygen into a single compiled HTML file (.chm). Compiled HTML +# files are now used as the Windows 98 help format, and will replace the old +# Windows help format (.hlp) on all Windows platforms in the future. Compressed +# HTML files also contain an index, a table of contents, and you can search for +# words in the documentation. The HTML workshop also contains a viewer for +# compressed HTML files. +# The default value is: NO. +# This tag requires that the tag GENERATE_HTML is set to YES. + +GENERATE_HTMLHELP = NO + +# The CHM_FILE tag can be used to specify the file name of the resulting .chm +# file. You can add a path in front of the file if the result should not be +# written to the html output directory. +# This tag requires that the tag GENERATE_HTMLHELP is set to YES. + +CHM_FILE = + +# The HHC_LOCATION tag can be used to specify the location (absolute path +# including file name) of the HTML help compiler ( hhc.exe). If non-empty +# doxygen will try to run the HTML help compiler on the generated index.hhp. +# The file has to be specified with full path. +# This tag requires that the tag GENERATE_HTMLHELP is set to YES. + +HHC_LOCATION = + +# The GENERATE_CHI flag controls if a separate .chi index file is generated ( +# YES) or that it should be included in the master .chm file ( NO). +# The default value is: NO. +# This tag requires that the tag GENERATE_HTMLHELP is set to YES. + +GENERATE_CHI = NO + +# The CHM_INDEX_ENCODING is used to encode HtmlHelp index ( hhk), content ( hhc) +# and project file content. +# This tag requires that the tag GENERATE_HTMLHELP is set to YES. + +CHM_INDEX_ENCODING = + +# The BINARY_TOC flag controls whether a binary table of contents is generated ( +# YES) or a normal table of contents ( NO) in the .chm file. +# The default value is: NO. +# This tag requires that the tag GENERATE_HTMLHELP is set to YES. + +BINARY_TOC = NO + +# The TOC_EXPAND flag can be set to YES to add extra items for group members to +# the table of contents of the HTML help documentation and to the tree view. +# The default value is: NO. +# This tag requires that the tag GENERATE_HTMLHELP is set to YES. + +TOC_EXPAND = NO + +# If the GENERATE_QHP tag is set to YES and both QHP_NAMESPACE and +# QHP_VIRTUAL_FOLDER are set, an additional index file will be generated that +# can be used as input for Qt's qhelpgenerator to generate a Qt Compressed Help +# (.qch) of the generated HTML documentation. +# The default value is: NO. +# This tag requires that the tag GENERATE_HTML is set to YES. + +GENERATE_QHP = NO + +# If the QHG_LOCATION tag is specified, the QCH_FILE tag can be used to specify +# the file name of the resulting .qch file. The path specified is relative to +# the HTML output folder. +# This tag requires that the tag GENERATE_QHP is set to YES. + +QCH_FILE = + +# The QHP_NAMESPACE tag specifies the namespace to use when generating Qt Help +# Project output. For more information please see Qt Help Project / Namespace +# (see: http://qt-project.org/doc/qt-4.8/qthelpproject.html#namespace). +# The default value is: org.doxygen.Project. +# This tag requires that the tag GENERATE_QHP is set to YES. + +QHP_NAMESPACE = org.doxygen.Project + +# The QHP_VIRTUAL_FOLDER tag specifies the namespace to use when generating Qt +# Help Project output. For more information please see Qt Help Project / Virtual +# Folders (see: http://qt-project.org/doc/qt-4.8/qthelpproject.html#virtual- +# folders). +# The default value is: doc. +# This tag requires that the tag GENERATE_QHP is set to YES. + +QHP_VIRTUAL_FOLDER = doc + +# If the QHP_CUST_FILTER_NAME tag is set, it specifies the name of a custom +# filter to add. For more information please see Qt Help Project / Custom +# Filters (see: http://qt-project.org/doc/qt-4.8/qthelpproject.html#custom- +# filters). +# This tag requires that the tag GENERATE_QHP is set to YES. + +QHP_CUST_FILTER_NAME = + +# The QHP_CUST_FILTER_ATTRS tag specifies the list of the attributes of the +# custom filter to add. For more information please see Qt Help Project / Custom +# Filters (see: http://qt-project.org/doc/qt-4.8/qthelpproject.html#custom- +# filters). +# This tag requires that the tag GENERATE_QHP is set to YES. + +QHP_CUST_FILTER_ATTRS = + +# The QHP_SECT_FILTER_ATTRS tag specifies the list of the attributes this +# project's filter section matches. Qt Help Project / Filter Attributes (see: +# http://qt-project.org/doc/qt-4.8/qthelpproject.html#filter-attributes). +# This tag requires that the tag GENERATE_QHP is set to YES. + +QHP_SECT_FILTER_ATTRS = + +# The QHG_LOCATION tag can be used to specify the location of Qt's +# qhelpgenerator. If non-empty doxygen will try to run qhelpgenerator on the +# generated .qhp file. +# This tag requires that the tag GENERATE_QHP is set to YES. + +QHG_LOCATION = + +# If the GENERATE_ECLIPSEHELP tag is set to YES, additional index files will be +# generated, together with the HTML files, they form an Eclipse help plugin. To +# install this plugin and make it available under the help contents menu in +# Eclipse, the contents of the directory containing the HTML and XML files needs +# to be copied into the plugins directory of eclipse. The name of the directory +# within the plugins directory should be the same as the ECLIPSE_DOC_ID value. +# After copying Eclipse needs to be restarted before the help appears. +# The default value is: NO. +# This tag requires that the tag GENERATE_HTML is set to YES. + +GENERATE_ECLIPSEHELP = NO + +# A unique identifier for the Eclipse help plugin. When installing the plugin +# the directory name containing the HTML and XML files should also have this +# name. Each documentation set should have its own identifier. +# The default value is: org.doxygen.Project. +# This tag requires that the tag GENERATE_ECLIPSEHELP is set to YES. + +ECLIPSE_DOC_ID = org.doxygen.Project + +# If you want full control over the layout of the generated HTML pages it might +# be necessary to disable the index and replace it with your own. The +# DISABLE_INDEX tag can be used to turn on/off the condensed index (tabs) at top +# of each HTML page. A value of NO enables the index and the value YES disables +# it. Since the tabs in the index contain the same information as the navigation +# tree, you can set this option to YES if you also set GENERATE_TREEVIEW to YES. +# The default value is: NO. +# This tag requires that the tag GENERATE_HTML is set to YES. + +DISABLE_INDEX = NO + +# The GENERATE_TREEVIEW tag is used to specify whether a tree-like index +# structure should be generated to display hierarchical information. If the tag +# value is set to YES, a side panel will be generated containing a tree-like +# index structure (just like the one that is generated for HTML Help). For this +# to work a browser that supports JavaScript, DHTML, CSS and frames is required +# (i.e. any modern browser). Windows users are probably better off using the +# HTML help feature. Via custom stylesheets (see HTML_EXTRA_STYLESHEET) one can +# further fine-tune the look of the index. As an example, the default style +# sheet generated by doxygen has an example that shows how to put an image at +# the root of the tree instead of the PROJECT_NAME. Since the tree basically has +# the same information as the tab index, you could consider setting +# DISABLE_INDEX to YES when enabling this option. +# The default value is: NO. +# This tag requires that the tag GENERATE_HTML is set to YES. + +GENERATE_TREEVIEW = NO + +# The ENUM_VALUES_PER_LINE tag can be used to set the number of enum values that +# doxygen will group on one line in the generated HTML documentation. +# +# Note that a value of 0 will completely suppress the enum values from appearing +# in the overview section. +# Minimum value: 0, maximum value: 20, default value: 4. +# This tag requires that the tag GENERATE_HTML is set to YES. + +ENUM_VALUES_PER_LINE = 4 + +# If the treeview is enabled (see GENERATE_TREEVIEW) then this tag can be used +# to set the initial width (in pixels) of the frame in which the tree is shown. +# Minimum value: 0, maximum value: 1500, default value: 250. +# This tag requires that the tag GENERATE_HTML is set to YES. + +TREEVIEW_WIDTH = 250 + +# When the EXT_LINKS_IN_WINDOW option is set to YES doxygen will open links to +# external symbols imported via tag files in a separate window. +# The default value is: NO. +# This tag requires that the tag GENERATE_HTML is set to YES. + +EXT_LINKS_IN_WINDOW = NO + +# Use this tag to change the font size of LaTeX formulas included as images in +# the HTML documentation. When you change the font size after a successful +# doxygen run you need to manually remove any form_*.png images from the HTML +# output directory to force them to be regenerated. +# Minimum value: 8, maximum value: 50, default value: 10. +# This tag requires that the tag GENERATE_HTML is set to YES. + +FORMULA_FONTSIZE = 10 + +# Use the FORMULA_TRANPARENT tag to determine whether or not the images +# generated for formulas are transparent PNGs. Transparent PNGs are not +# supported properly for IE 6.0, but are supported on all modern browsers. +# +# Note that when changing this option you need to delete any form_*.png files in +# the HTML output directory before the changes have effect. +# The default value is: YES. +# This tag requires that the tag GENERATE_HTML is set to YES. + +FORMULA_TRANSPARENT = YES + +# Enable the USE_MATHJAX option to render LaTeX formulas using MathJax (see +# http://www.mathjax.org) which uses client side Javascript for the rendering +# instead of using prerendered bitmaps. Use this if you do not have LaTeX +# installed or if you want to formulas look prettier in the HTML output. When +# enabled you may also need to install MathJax separately and configure the path +# to it using the MATHJAX_RELPATH option. +# The default value is: NO. +# This tag requires that the tag GENERATE_HTML is set to YES. + +USE_MATHJAX = NO + +# When MathJax is enabled you can set the default output format to be used for +# the MathJax output. See the MathJax site (see: +# http://docs.mathjax.org/en/latest/output.html) for more details. +# Possible values are: HTML-CSS (which is slower, but has the best +# compatibility), NativeMML (i.e. MathML) and SVG. +# The default value is: HTML-CSS. +# This tag requires that the tag USE_MATHJAX is set to YES. + +MATHJAX_FORMAT = HTML-CSS + +# When MathJax is enabled you need to specify the location relative to the HTML +# output directory using the MATHJAX_RELPATH option. The destination directory +# should contain the MathJax.js script. For instance, if the mathjax directory +# is located at the same level as the HTML output directory, then +# MATHJAX_RELPATH should be ../mathjax. The default value points to the MathJax +# Content Delivery Network so you can quickly see the result without installing +# MathJax. However, it is strongly recommended to install a local copy of +# MathJax from http://www.mathjax.org before deployment. +# The default value is: http://cdn.mathjax.org/mathjax/latest. +# This tag requires that the tag USE_MATHJAX is set to YES. + +MATHJAX_RELPATH = http://cdn.mathjax.org/mathjax/latest + +# The MATHJAX_EXTENSIONS tag can be used to specify one or more MathJax +# extension names that should be enabled during MathJax rendering. For example +# MATHJAX_EXTENSIONS = TeX/AMSmath TeX/AMSsymbols +# This tag requires that the tag USE_MATHJAX is set to YES. + +MATHJAX_EXTENSIONS = + +# The MATHJAX_CODEFILE tag can be used to specify a file with javascript pieces +# of code that will be used on startup of the MathJax code. See the MathJax site +# (see: http://docs.mathjax.org/en/latest/output.html) for more details. For an +# example see the documentation. +# This tag requires that the tag USE_MATHJAX is set to YES. + +MATHJAX_CODEFILE = + +# When the SEARCHENGINE tag is enabled doxygen will generate a search box for +# the HTML output. The underlying search engine uses javascript and DHTML and +# should work on any modern browser. Note that when using HTML help +# (GENERATE_HTMLHELP), Qt help (GENERATE_QHP), or docsets (GENERATE_DOCSET) +# there is already a search function so this one should typically be disabled. +# For large projects the javascript based search engine can be slow, then +# enabling SERVER_BASED_SEARCH may provide a better solution. It is possible to +# search using the keyboard; to jump to the search box use + S +# (what the is depends on the OS and browser, but it is typically +# , /