chore: release v0.1.2

fix: download all artifacts in release workflow
feat: auto-update leann-core dependencies during release
2025-07-25 01:53:29 +00:00 · 2025-07-24 17:45:46 -07:00 · 2025-07-24 17:22:41 -07:00 · 2025-07-24 17:20:58 -07:00 · 2025-07-24 17:04:47 -07:00 · 2025-07-24 17:00:21 -07:00
24 changed files with 2345 additions and 1043 deletions
--- a/.github/workflows/build-and-publish.yml
+++ b/.github/workflows/build-and-publish.yml
@@ -0,0 +1,262 @@
+name: CI - Build Multi-Platform Packages
+
+on:
+  push:
+    branches: [ main ]
+  pull_request:
+    branches: [ main ]
+  workflow_dispatch:
+    inputs:
+      publish:
+        description: 'Publish to PyPI (only use for emergency fixes)'
+        required: true
+        default: 'false'
+        type: choice
+        options:
+          - 'false'
+          - 'test'
+          - 'prod'
+
+jobs:
+  # Build pure Python package: leann-core
+  build-core:
+    name: Build leann-core
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+        
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: '3.11'
+          
+      - name: Install uv
+        uses: astral-sh/setup-uv@v4
+          
+      - name: Install build dependencies
+        run: |
+          uv pip install --system build twine
+          
+      - name: Build package
+        run: |
+          cd packages/leann-core
+          uv build
+          
+      - name: Upload artifacts
+        uses: actions/upload-artifact@v4
+        with:
+          name: leann-core-dist
+          path: packages/leann-core/dist/
+
+  # Build binary package: leann-backend-hnsw (default backend)
+  build-hnsw:
+    name: Build leann-backend-hnsw
+    strategy:
+      matrix:
+        os: [ubuntu-latest, macos-latest]
+        python-version: ['3.9', '3.10', '3.11', '3.12']
+    runs-on: ${{ matrix.os }}
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          submodules: recursive
+          
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: ${{ matrix.python-version }}
+          
+      - name: Install uv
+        uses: astral-sh/setup-uv@v4
+          
+      - name: Install system dependencies (Ubuntu)
+        if: runner.os == 'Linux'
+        run: |
+          sudo apt-get update
+          sudo apt-get install -y libomp-dev libboost-all-dev libzmq3-dev \
+            pkg-config libopenblas-dev patchelf
+          
+      - name: Install system dependencies (macOS)
+        if: runner.os == 'macOS'
+        run: |
+          brew install libomp boost zeromq
+          
+      - name: Install build dependencies
+        run: |
+          uv pip install --system scikit-build-core numpy swig
+          uv pip install --system auditwheel delocate
+          
+      - name: Build wheel
+        run: |
+          cd packages/leann-backend-hnsw
+          uv build --wheel --python python
+          
+      - name: Repair wheel (Linux)
+        if: runner.os == 'Linux'
+        run: |
+          cd packages/leann-backend-hnsw
+          auditwheel repair dist/*.whl -w dist_repaired
+          rm -rf dist
+          mv dist_repaired dist
+          
+      - name: Repair wheel (macOS)
+        if: runner.os == 'macOS'
+        run: |
+          cd packages/leann-backend-hnsw
+          delocate-wheel -w dist_repaired -v dist/*.whl
+          rm -rf dist
+          mv dist_repaired dist
+          
+      - name: Upload artifacts
+        uses: actions/upload-artifact@v4
+        with:
+          name: hnsw-${{ matrix.os }}-py${{ matrix.python-version }}
+          path: packages/leann-backend-hnsw/dist/
+
+  # Build binary package: leann-backend-diskann (multi-platform)
+  build-diskann:
+    name: Build leann-backend-diskann
+    strategy:
+      matrix:
+        os: [ubuntu-latest, macos-latest]
+        python-version: ['3.9', '3.10', '3.11', '3.12']
+    runs-on: ${{ matrix.os }}
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          submodules: recursive
+          
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: ${{ matrix.python-version }}
+          
+      - name: Install uv
+        uses: astral-sh/setup-uv@v4
+          
+      - name: Install system dependencies (Ubuntu)
+        if: runner.os == 'Linux'
+        run: |
+          sudo apt-get update
+          sudo apt-get install -y libomp-dev libboost-all-dev libaio-dev libzmq3-dev \
+            protobuf-compiler libprotobuf-dev libabsl-dev patchelf
+          
+          # Install Intel MKL using Intel's installer
+          wget https://registrationcenter-download.intel.com/akdlm/IRC_NAS/79153e0f-74d7-45af-b8c2-258941adf58a/intel-onemkl-2025.0.0.940.sh
+          sudo sh intel-onemkl-2025.0.0.940.sh -a --components intel.oneapi.lin.mkl.devel --action install --eula accept -s
+          source /opt/intel/oneapi/setvars.sh
+          echo "MKLROOT=/opt/intel/oneapi/mkl/latest" >> $GITHUB_ENV
+          echo "LD_LIBRARY_PATH=/opt/intel/oneapi/mkl/latest/lib/intel64:$LD_LIBRARY_PATH" >> $GITHUB_ENV
+          
+      - name: Install system dependencies (macOS)
+        if: runner.os == 'macOS'
+        run: |
+          brew install libomp boost zeromq protobuf
+          # MKL is not available on Homebrew, but DiskANN can work without it
+          
+      - name: Install build dependencies
+        run: |
+          uv pip install --system scikit-build-core numpy Cython pybind11
+          if [[ "$RUNNER_OS" == "Linux" ]]; then
+            uv pip install --system auditwheel
+          else
+            uv pip install --system delocate
+          fi
+          
+      - name: Build wheel
+        run: |
+          cd packages/leann-backend-diskann
+          uv build --wheel --python python
+          
+      - name: Repair wheel (Linux)
+        if: runner.os == 'Linux'
+        run: |
+          cd packages/leann-backend-diskann
+          auditwheel repair dist/*.whl -w dist_repaired
+          rm -rf dist
+          mv dist_repaired dist
+          
+      - name: Repair wheel (macOS)
+        if: runner.os == 'macOS'
+        run: |
+          cd packages/leann-backend-diskann
+          delocate-wheel -w dist_repaired -v dist/*.whl
+          rm -rf dist
+          mv dist_repaired dist
+          
+      - name: Upload artifacts
+        uses: actions/upload-artifact@v4
+        with:
+          name: diskann-${{ matrix.os }}-py${{ matrix.python-version }}
+          path: packages/leann-backend-diskann/dist/
+
+  # Build meta-package: leann (build last)
+  build-meta:
+    name: Build leann meta-package
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+        
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: '3.11'
+          
+      - name: Install uv
+        uses: astral-sh/setup-uv@v4
+          
+      - name: Install build dependencies
+        run: |
+          uv pip install --system build
+          
+      - name: Build package
+        run: |
+          cd packages/leann
+          uv build
+          
+      - name: Upload artifacts
+        uses: actions/upload-artifact@v4
+        with:
+          name: leann-meta-dist
+          path: packages/leann/dist/
+
+  # Publish to PyPI (only for emergency fixes or manual triggers)
+  publish:
+    name: Publish to PyPI (Emergency)
+    needs: [build-core, build-hnsw, build-diskann, build-meta]
+    runs-on: ubuntu-latest
+    if: github.event_name == 'workflow_dispatch' && github.event.inputs.publish != 'false'
+    
+    steps:
+      - name: Download all artifacts
+        uses: actions/download-artifact@v4
+        with:
+          path: dist
+          
+      - name: Flatten directory structure
+        run: |
+          mkdir -p all_wheels
+          find dist -name "*.whl" -exec cp {} all_wheels/ \;
+          find dist -name "*.tar.gz" -exec cp {} all_wheels/ \;
+          
+      - name: Show what will be published
+        run: |
+          echo "📦 Packages to be published:"
+          ls -la all_wheels/
+          
+      - name: Publish to Test PyPI
+        if: github.event.inputs.publish == 'test'
+        uses: pypa/gh-action-pypi-publish@release/v1
+        with:
+          password: ${{ secrets.TEST_PYPI_API_TOKEN }}
+          repository-url: https://test.pypi.org/legacy/
+          packages-dir: all_wheels/
+          skip-existing: true
+          
+      - name: Publish to PyPI
+        if: github.event.inputs.publish == 'prod'
+        uses: pypa/gh-action-pypi-publish@release/v1
+        with:
+          password: ${{ secrets.PYPI_API_TOKEN }}
+          packages-dir: all_wheels/
+          skip-existing: true 
--- a/.github/workflows/release-manual.yml
+++ b/.github/workflows/release-manual.yml
@@ -0,0 +1,206 @@
+name: Manual Release
+
+on:
+  workflow_dispatch:
+    inputs:
+      version:
+        description: 'Version to release (e.g., 0.1.1)'
+        required: true
+        type: string
+      test_pypi:
+        description: 'Test on TestPyPI first'
+        required: false
+        type: boolean
+        default: true
+
+jobs:
+  validate-and-release:
+    runs-on: ubuntu-latest
+    permissions:
+      contents: write
+      actions: read
+      
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          token: ${{ secrets.GITHUB_TOKEN }}
+      
+      - name: Check CI status
+        run: |
+          echo "ℹ️  This workflow will download build artifacts from the latest CI run."
+          echo "   CI must have completed successfully on the current commit."
+          echo ""
+          
+      - name: Validate version format
+        run: |
+          if ! [[ "${{ inputs.version }}" =~ ^[0-9]+\.[0-9]+\.[0-9]+$ ]]; then
+            echo "❌ Invalid version format. Use semantic versioning (e.g., 0.1.1)"
+            exit 1
+          fi
+          echo "✅ Version format valid: ${{ inputs.version }}"
+      
+      - name: Check if version already exists
+        run: |
+          if git tag | grep -q "^v${{ inputs.version }}$"; then
+            echo "❌ Version v${{ inputs.version }} already exists!"
+            exit 1
+          fi
+          echo "✅ Version is new"
+      
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: '3.13'
+      
+      - name: Install uv
+        run: |
+          curl -LsSf https://astral.sh/uv/install.sh | sh
+          echo "$HOME/.cargo/bin" >> $GITHUB_PATH
+      
+      - name: Update versions
+        run: |
+          ./scripts/bump_version.sh ${{ inputs.version }}
+          git config user.name "GitHub Actions"
+          git config user.email "actions@github.com"
+          git add packages/*/pyproject.toml
+          git commit -m "chore: release v${{ inputs.version }}"
+      
+      - name: Get CI run ID
+        id: get-ci-run
+        run: |
+          # Get the latest successful CI run on the previous commit (before version bump)
+          COMMIT_SHA=$(git rev-parse HEAD~1)
+          RUN_ID=$(gh run list \
+            --workflow="CI - Build Multi-Platform Packages" \
+            --status=success \
+            --commit=$COMMIT_SHA \
+            --json databaseId \
+            --jq '.[0].databaseId')
+          
+          if [ -z "$RUN_ID" ]; then
+            echo "❌ No successful CI run found for commit $COMMIT_SHA"
+            echo ""
+            echo "This usually means:"
+            echo "1. CI hasn't run on the latest commit yet"
+            echo "2. CI failed on the latest commit"
+            echo ""
+            echo "Please ensure CI passes on main branch before releasing."
+            exit 1
+          fi
+          
+          echo "✅ Found CI run: $RUN_ID"
+          echo "run-id=$RUN_ID" >> $GITHUB_OUTPUT
+        env:
+          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+      
+      - name: Download artifacts from CI run
+        run: |
+          echo "📦 Downloading artifacts from CI run ${{ steps.get-ci-run.outputs.run-id }}..."
+          
+          # Download all artifacts (not just wheels-*)
+          gh run download ${{ steps.get-ci-run.outputs.run-id }} \
+            --dir ./dist-downloads
+          
+          # Consolidate all wheels into packages/*/dist/
+          mkdir -p packages/leann-core/dist
+          mkdir -p packages/leann-backend-hnsw/dist
+          mkdir -p packages/leann-backend-diskann/dist
+          mkdir -p packages/leann/dist
+          
+          find ./dist-downloads -name "*.whl" -exec cp {} ./packages/ \;
+          
+          # Move wheels to correct package directories
+          for wheel in packages/*.whl; do
+            if [[ $wheel == *"leann_core"* ]]; then
+              mv "$wheel" packages/leann-core/dist/
+            elif [[ $wheel == *"leann_backend_hnsw"* ]]; then
+              mv "$wheel" packages/leann-backend-hnsw/dist/
+            elif [[ $wheel == *"leann_backend_diskann"* ]]; then
+              mv "$wheel" packages/leann-backend-diskann/dist/
+            elif [[ $wheel == *"leann-"* ]] && [[ $wheel != *"backend"* ]] && [[ $wheel != *"core"* ]]; then
+              mv "$wheel" packages/leann/dist/
+            fi
+          done
+          
+          # List downloaded wheels
+          echo "✅ Downloaded wheels:"
+          find packages/*/dist -name "*.whl" -type f | sort
+        env:
+          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+      
+      - name: Test on TestPyPI (optional)
+        if: inputs.test_pypi
+        continue-on-error: true
+        env:
+          TWINE_USERNAME: __token__
+          TWINE_PASSWORD: ${{ secrets.TEST_PYPI_API_TOKEN }}
+        run: |
+          if [ -z "$TWINE_PASSWORD" ]; then
+            echo "⚠️  TEST_PYPI_API_TOKEN not configured, skipping TestPyPI upload"
+            echo "   To enable TestPyPI testing, add TEST_PYPI_API_TOKEN to repository secrets"
+            exit 0
+          fi
+          
+          pip install twine
+          echo "📦 Uploading to TestPyPI..."
+          twine upload --repository testpypi packages/*/dist/* --verbose || {
+            echo "⚠️  TestPyPI upload failed, but continuing with release"
+            echo "   This is optional and won't block the release"
+            exit 0
+          }
+          echo "✅ Test upload successful!"
+          echo "📋 Check packages at: https://test.pypi.org/user/your-username/"
+          echo ""
+          echo "To test installation:"
+          echo "pip install -i https://test.pypi.org/simple/ leann"
+      
+      - name: Publish to PyPI
+        env:
+          TWINE_USERNAME: __token__
+          TWINE_PASSWORD: ${{ secrets.PYPI_API_TOKEN }}
+        run: |
+          if [ -z "$TWINE_PASSWORD" ]; then
+            echo "❌ PYPI_API_TOKEN not configured!"
+            echo "   Please add PYPI_API_TOKEN to repository secrets"
+            exit 1
+          fi
+          
+          pip install twine
+          echo "📦 Publishing to PyPI..."
+          
+          # Collect all wheels in one place
+          mkdir -p all_wheels
+          find packages/*/dist -name "*.whl" -exec cp {} all_wheels/ \;
+          find packages/*/dist -name "*.tar.gz" -exec cp {} all_wheels/ \;
+          
+          echo "📋 Packages to publish:"
+          ls -la all_wheels/
+          
+          # Upload to PyPI
+          twine upload all_wheels/* --skip-existing --verbose
+          
+          echo "✅ Published to PyPI!"
+          echo "🎉 Check packages at: https://pypi.org/project/leann/"
+      
+      - name: Create and push tag
+        run: |
+          git tag "v${{ inputs.version }}"
+          git push origin main
+          git push origin "v${{ inputs.version }}"
+          echo "✅ Tag v${{ inputs.version }} created and pushed"
+      
+      - name: Create GitHub Release
+        uses: softprops/action-gh-release@v1
+        with:
+          tag_name: v${{ inputs.version }}
+          name: Release v${{ inputs.version }}
+          body: |
+            ## 🚀 Release v${{ inputs.version }}
+            
+            ### What's Changed
+            See the [full changelog](https://github.com/${{ github.repository }}/compare/...v${{ inputs.version }})
+            
+            ### Installation
+            ```bash
+            pip install leann==${{ inputs.version }}
+            ```
--- a/.gitignore
+++ b/.gitignore
@@ -12,7 +12,6 @@ outputs/
 *.idx 
 *.map
 .history/
-scripts/
 lm_eval.egg-info/
 demo/experiment_results/**/*.json
 *.jsonl
--- a/README.md
+++ b/README.md
@@ -12,11 +12,11 @@
    The smallest vector index in the world. RAG Everything with LEANN!
 </h2>

-LEANN is a revolutionary vector database that democratizes personal AI. Transform your laptop into a powerful RAG system that can index and search through millions of documents while using **[97% less storage]** than traditional solutions **without accuracy loss**.
+LEANN is a revolutionary vector database that democratizes personal AI. Transform your laptop into a powerful RAG system that can index and search through millions of documents while using **97% less storage** than traditional solutions **without accuracy loss**.

-LEANN achieves this through *graph-based selective recomputation* with *high-degree preserving pruning*, computing embeddings on-demand instead of storing them all. [Illustration →](#️-architecture--how-it-works) | [Paper →](https://arxiv.org/abs/2506.08276)
+LEANN achieves this through *graph-based selective recomputation* with *high-degree preserving pruning*, computing embeddings on-demand instead of storing them all. [Illustration Fig →](#️-architecture--how-it-works) | [Paper →](https://arxiv.org/abs/2506.08276)

-**Ready to RAG Everything?** Transform your laptop into a personal AI assistant that can search your **[file system](#process-any-documents-pdf-txt-md)**, **[emails](#search-your-entire-life)**, **[browser history](#time-machine-for-the-web)**, **[chat history](#wechat-detective)**, or external knowledge bases (i.e., 60M documents) - all on your laptop, with zero cloud costs and complete privacy.
+**Ready to RAG Everything?** Transform your laptop into a personal AI assistant that can search your **[file system](#-personal-data-manager-process-any-documents-pdf-txt-md)**, **[emails](#-your-personal-email-secretary-rag-on-apple-mail)**, **[browser history](#-time-machine-for-the-web-rag-your-entire-browser-history)**, **[chat history](#-wechat-detective-unlock-your-golden-memories)**, or external knowledge bases (i.e., 60M documents) - all on your laptop, with zero cloud costs and complete privacy.



@@ -37,7 +37,7 @@ LEANN achieves this through *graph-based selective recomputation* with *high-deg

 ✨ **No Accuracy Loss:** Maintain the same search quality as heavyweight solutions while using 97% less storage.

-## Quick Start in 1 minute
+## Installation

 ```bash
 git clone git@github.com:yichuan-w/LEANN.git leann
@@ -47,36 +47,30 @@ git submodule update --init --recursive

 **macOS:**
 ```bash
-brew install llvm libomp boost protobuf zeromq
-export CC=$(brew --prefix llvm)/bin/clang
-export CXX=$(brew --prefix llvm)/bin/clang++
+brew install llvm libomp boost protobuf zeromq pkgconf

 # Install with HNSW backend (default, recommended for most users)
-uv sync
-
-# Or add DiskANN backend if you want to test more options
-uv sync --extra diskann
+# Install uv first if you don't have it:
+# curl -LsSf https://astral.sh/uv/install.sh | sh
+# See: https://docs.astral.sh/uv/getting-started/installation/#installation-methods
+CC=$(brew --prefix llvm)/bin/clang CXX=$(brew --prefix llvm)/bin/clang++ uv sync
 ```

-**Linux (Ubuntu/Debian):**
+**Linux:**
 ```bash
 sudo apt-get install libomp-dev libboost-all-dev protobuf-compiler libabsl-dev libmkl-full-dev libaio-dev libzmq3-dev

 # Install with HNSW backend (default, recommended for most users)
 uv sync
-
-# Or add DiskANN backend if you want to test more options
-uv sync --extra diskann
 ```


-
 **Ollama Setup (Recommended for full privacy):**

 > *You can skip this installation if you only want to use OpenAI API for generation.*


-*macOS:*
+**macOS:**

 First, [download Ollama for macOS](https://ollama.com/download/mac).

@@ -85,7 +79,7 @@ First, [download Ollama for macOS](https://ollama.com/download/mac).
 ollama pull llama3.2:1b
 ```

-*Linux:*
+**Linux:**
 ```bash
 # Install Ollama
 curl -fsSL https://ollama.ai/install.sh | sh
@@ -97,9 +91,10 @@ ollama serve &
 ollama pull llama3.2:1b
 ```

-## Dead Simple API
+## Quick Start in 30s

-Just 3 lines of code. Our declarative API makes RAG as easy as writing a config file:
+Our declarative API makes RAG as easy as writing a config file.
+[Try in this ipynb file →](demo.ipynb)

 ```python
 from leann.api import LeannBuilder, LeannSearcher, LeannChat
@@ -130,24 +125,22 @@ response = chat.ask(
 )
 ```

-**That's it.** No cloud setup, no API keys, no "fine-tuning". Just your data, your questions, your laptop.
+## RAG on Everything!

-[Try the interactive demo →](demo.ipynb)
+LEANN supports RAG on various data sources including documents (.pdf, .txt, .md), Apple Mail, Google Search History, WeChat, and more.

-## Wild Things You Can Do
+### 📄 Personal Data Manager: Process Any Documents (.pdf, .txt, .md)!

-LEANN supports RAGing a lot of data sources, like .pdf, .txt, .md, and also supports RAGing your WeChat, Google Search History, and more.
+Ask questions directly about your personal PDFs, documents, and any directory containing your files!

-### Process Any Documents (.pdf, .txt, .md)
-
-Above we showed the Python API, while this CLI script demonstrates the same concepts while directly processing PDFs and documents, and even any directory that stores your personal files!
-
-The following scripts use Ollama `qwen3:8b` by default, so you need `ollama pull qwen3:8b` first. For other models: `--llm openai --model gpt-4o` (requires `OPENAI_API_KEY` environment variable) or `--llm hf --model Qwen/Qwen3-4B`.
+The example below asks a question about summarizing two papers (uses default data in `examples/data`):

 ```bash
 # Drop your PDFs, .txt, .md files into examples/data/
 uv run ./examples/main_cli_example.py
+```

+```
 # Or use python directly
 source .venv/bin/activate
 python ./examples/main_cli_example.py
@@ -155,14 +148,13 @@ python ./examples/main_cli_example.py



-**Works with any text format** - research papers, personal notes, presentations. Built with LlamaIndex for document parsing.
+### 📧 Your Personal Email Secretary: RAG on Apple Mail!

-### Search Your Entire Life
+**Note:** You need to grant full disk access to your terminal/VS Code in System Preferences → Privacy & Security → Full Disk Access.
 ```bash
-python examples/mail_reader_leann.py
-# "What's the number of class recommend to take per semester for incoming EECS students?"
+python examples/mail_reader_leann.py --query "What's the food I ordered by doordash or Uber eat mostly?"
 ```
-**90K emails → 14MB.** Finally, search your email like you search Google.
+**780K email chunks → 78MB storage** Finally, search your email like you search Google.

 <details>
 <summary><strong>📋 Click to expand: Command Examples</strong></summary>
@@ -195,12 +187,11 @@ Once the index is built, you can ask questions like:
 - "Show me emails about travel expenses"
 </details>

-### Time Machine for the Web  
+### 🔍 Time Machine for the Web: RAG Your Entire Google Browser History!
 ```bash
-python examples/google_history_reader_leann.py
-# "Tell me my browser history about machine learning system stuff?"
+python examples/google_history_reader_leann.py --query "Tell me my browser history about machine learning?"
 ```
-**38K browser entries → 6MB.** Your browser history becomes your personal search engine.
+**38K browser entries → 6MB storage.** Your browser history becomes your personal search engine.

 <details>
 <summary><strong>📋 Click to expand: Command Examples</strong></summary>
@@ -249,13 +240,13 @@ Once the index is built, you can ask questions like:

 </details>

-### WeChat Detective
+### 💬 WeChat Detective: Unlock Your Golden Memories!

 ```bash
-python examples/wechat_history_reader_leann.py
-# "Show me all group chats about weekend plans"
+python examples/wechat_history_reader_leann.py --query "Show me all group chats about weekend plans"
 ```
-**400K messages → 64MB.** Search years of chat history in any language.
+**400K messages → 64MB storage** Search years of chat history in any language.
+

 <details>
 <summary><strong>🔧 Click to expand: Installation Requirements</strong></summary>
@@ -266,7 +257,13 @@ First, you need to install the WeChat exporter:
 sudo packages/wechat-exporter/wechattweak-cli install
 ```

-**Troubleshooting**: If you encounter installation issues, check the [WeChatTweak-CLI issues page](https://github.com/sunnyyoung/WeChatTweak-CLI/issues/41).
+**Troubleshooting:**
+- **Installation issues**: Check the [WeChatTweak-CLI issues page](https://github.com/sunnyyoung/WeChatTweak-CLI/issues/41)
+- **Export errors**: If you encounter the error below, try restarting WeChat
+```
+Failed to export WeChat data. Please ensure WeChat is running and WeChatTweak is installed.
+Failed to find or export WeChat data. Exiting.
+```
 </details>

 <details>
@@ -403,11 +400,11 @@ Same dataset, same hardware, same embedding model. LEANN just works better.

 ### Storage Usage Comparison

-| System                | DPR (2.1M chunks) | RPJ-wiki (60M chunks) | Chat history (400K messages) | Apple emails (90K messages chunks) |Google Search History (38K entries)
+| System                | DPR (2.1M chunks) | RPJ-wiki (60M chunks) | Chat history (400K messages) | Apple emails (780K messages chunks) |Google Search History (38K entries)
 |-----------------------|------------------|------------------------|-----------------------------|------------------------------|------------------------------|
-| Traditional Vector DB(FAISS) | 3.8 GB           | 201 GB                 | 1.8G                     | 305.8 MB                     |130.4 MB                     |
-| **LEANN**             | **324 MB**       | **6 GB**               | **64 MB**                 | **14.8 MB**                  |**6.4MB**                  |
-| **Reduction**         | **91% smaller**  | **97% smaller**        | **97% smaller**             | **95% smaller**              |**95% smaller**              |
+| Traditional Vector DB(FAISS) | 3.8 GB           | 201 GB                 | 1.8G                     | 2.4G                     |130.4 MB                     |
+| **LEANN**             | **324 MB**       | **6 GB**               | **64 MB**                 | **79 MB**                  |**6.4MB**                  |
+| **Reduction**         | **91% smaller**  | **97% smaller**        | **97% smaller**             | **97% smaller**              |**95% smaller**              |

 <!-- ### Memory Usage Comparison

--- a/demo.ipynb
+++ b/demo.ipynb
@@ -1,37 +1,321 @@
 {
 "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Quick Start in 30s"
+   ]
+  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
-    "from leann.api import LeannBuilder, LeannSearcher, LeannChat\n",
+    "# install this if you areusing colab\n",
+    "! pip install leann"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Build the index"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "INFO: Registering backend 'hnsw'\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/Users/yichuan/Desktop/code/LEANN/leann/.venv/lib/python3.11/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
+      "  from .autonotebook import tqdm as notebook_tqdm\n",
+      "INFO:sentence_transformers.SentenceTransformer:Load pretrained SentenceTransformer: facebook/contriever\n",
+      "WARNING:sentence_transformers.SentenceTransformer:No sentence-transformers model found with name facebook/contriever. Creating a new one with mean pooling.\n",
+      "Writing passages: 100%|██████████| 5/5 [00:00<00:00, 27887.66chunk/s]\n",
+      "Batches: 100%|██████████| 1/1 [00:00<00:00, 13.51it/s]\n",
+      "WARNING:leann_backend_hnsw.hnsw_backend:Converting data to float32, shape: (5, 768)\n",
+      "INFO:leann_backend_hnsw.hnsw_backend:INFO: Converting HNSW index to CSR-pruned format...\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "M: 64 for level: 0\n",
+      "Starting conversion: knowledge.index -> knowledge.csr.tmp\n",
+      "[0.00s] Reading Index HNSW header...\n",
+      "[0.00s]   Header read: d=768, ntotal=5\n",
+      "[0.00s] Reading HNSW struct vectors...\n",
+      "  Reading vector (dtype=<class 'numpy.float64'>, fmt='d')... Count=6, Bytes=48\n",
+      "[0.00s]   Read assign_probas (6)\n",
+      "  Reading vector (dtype=<class 'numpy.int32'>, fmt='i')... Count=7, Bytes=28\n",
+      "[0.11s]   Read cum_nneighbor_per_level (7)\n",
+      "  Reading vector (dtype=<class 'numpy.int32'>, fmt='i')... Count=5, Bytes=20\n",
+      "[0.21s]   Read levels (5)\n",
+      "[0.30s]   Probing for compact storage flag...\n",
+      "[0.30s]   Found compact flag: False\n",
+      "[0.30s]   Compact flag is False, reading original format...\n",
+      "[0.30s]   Probing for potential extra byte before non-compact offsets...\n",
+      "[0.30s]   Found and consumed an unexpected 0x00 byte.\n",
+      "  Reading vector (dtype=<class 'numpy.uint64'>, fmt='Q')... Count=6, Bytes=48\n",
+      "[0.30s]   Read offsets (6)\n",
+      "[0.40s]   Attempting to read neighbors vector...\n",
+      "  Reading vector (dtype=<class 'numpy.int32'>, fmt='i')... Count=320, Bytes=1280\n",
+      "[0.40s]   Read neighbors (320)\n",
+      "[0.50s]   Read scalar params (ep=4, max_lvl=0)\n",
+      "[0.50s] Checking for storage data...\n",
+      "[0.50s]   Found storage fourcc: 49467849.\n",
+      "[0.50s] Converting to CSR format...\n",
+      "[0.50s]   Conversion loop finished.                        \n",
+      "[0.50s] Running validation checks...\n",
+      "    Checking total valid neighbor count...\n",
+      "    OK: Total valid neighbors = 20\n",
+      "    Checking final pointer indices...\n",
+      "    OK: Final pointers match data size.\n",
+      "[0.50s] Deleting original neighbors and offsets arrays...\n",
+      "    CSR Stats: |data|=20, |level_ptr|=10\n",
+      "[0.59s] Writing CSR HNSW graph data in FAISS-compatible order...\n",
+      "   Pruning embeddings: Writing NULL storage marker.\n",
+      "[0.69s] Conversion complete.\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:leann_backend_hnsw.hnsw_backend:✅ CSR conversion successful.\n",
+      "INFO:leann_backend_hnsw.hnsw_backend:INFO: Replaced original index with CSR-pruned version at 'knowledge.index'\n"
+     ]
+    }
+   ],
+   "source": [
+    "from leann.api import LeannBuilder\n",
    "\n",
-    "# 1. Build the index (no embeddings stored!)\n",
    "builder = LeannBuilder(backend_name=\"hnsw\")\n",
-    "builder.add_text(\"C# is a powerful programming language\")\n",
-    "builder.add_text(\"Python is a powerful programming language and it is very popular\")\n",
+    "builder.add_text(\"C# is a powerful programming language and it is good at game development\")\n",
+    "builder.add_text(\"Python is a powerful programming language and it is good at machine learning tasks\")\n",
    "builder.add_text(\"Machine learning transforms industries\")\n",
    "builder.add_text(\"Neural networks process complex data\")\n",
    "builder.add_text(\"Leann is a great storage saving engine for RAG on your MacBook\")\n",
-    "builder.build_index(\"knowledge.leann\")\n",
+    "builder.build_index(\"knowledge.leann\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Search with real-time embeddings"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:leann.api:🔍 LeannSearcher.search() called:\n",
+      "INFO:leann.api:  Query: 'programming languages'\n",
+      "INFO:leann.api:  Top_k: 2\n",
+      "INFO:leann.api:  Additional kwargs: {}\n",
+      "INFO:leann.embedding_server_manager:Port 5557 has incompatible server, trying next port...\n",
+      "INFO:leann.embedding_server_manager:Port 5558 has incompatible server, trying next port...\n",
+      "INFO:leann.embedding_server_manager:Port 5559 has incompatible server, trying next port...\n",
+      "INFO:leann.embedding_server_manager:Using port 5560 instead of 5557\n",
+      "INFO:leann.embedding_server_manager:Starting embedding server on port 5560...\n",
+      "INFO:leann.embedding_server_manager:Command: /Users/yichuan/Desktop/code/LEANN/leann/.venv/bin/python -m leann_backend_hnsw.hnsw_embedding_server --zmq-port 5560 --model-name facebook/contriever --passages-file knowledge.leann.meta.json\n",
+      "huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\n",
+      "To disable this warning, you can either:\n",
+      "\t- Avoid using `tokenizers` before the fork if possible\n",
+      "\t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\n",
+      "INFO:leann.embedding_server_manager:Server process started with PID: 4574\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[read_HNSW - CSR NL v4] Reading metadata & CSR indices (manual offset)...\n",
+      "[read_HNSW NL v4] Read levels vector, size: 5\n",
+      "[read_HNSW NL v4] Reading Compact Storage format indices...\n",
+      "[read_HNSW NL v4] Read compact_level_ptr, size: 10\n",
+      "[read_HNSW NL v4] Read compact_node_offsets, size: 6\n",
+      "[read_HNSW NL v4] Read entry_point: 4, max_level: 0\n",
+      "[read_HNSW NL v4] Read storage fourcc: 0x6c6c756e\n",
+      "[read_HNSW NL v4 FIX] Detected FileIOReader. Neighbors size field offset: 326\n",
+      "[read_HNSW NL v4] Reading neighbors data into memory.\n",
+      "[read_HNSW NL v4] Read neighbors data, size: 20\n",
+      "[read_HNSW NL v4] Finished reading metadata and CSR indices.\n",
+      "INFO: Skipping external storage loading, since is_recompute is true.\n",
+      "INFO: Registering backend 'hnsw'\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:leann.embedding_server_manager:Embedding server is ready!\n",
+      "INFO:leann.api:  Launching server time: 1.078078269958496 seconds\n",
+      "INFO:leann.embedding_server_manager:Existing server process (PID 4574) is compatible\n",
+      "INFO:sentence_transformers.SentenceTransformer:Load pretrained SentenceTransformer: facebook/contriever\n",
+      "WARNING:sentence_transformers.SentenceTransformer:No sentence-transformers model found with name facebook/contriever. Creating a new one with mean pooling.\n",
+      "INFO:leann.api:  Generated embedding shape: (1, 768)\n",
+      "INFO:leann.api:  Embedding time: 2.9307072162628174 seconds\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "ZmqDistanceComputer initialized: d=768, metric=0\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:leann.api:  Search time: 0.27327895164489746 seconds\n",
+      "INFO:leann.api:  Backend returned: labels=2 results\n",
+      "INFO:leann.api:  Processing 2 passage IDs:\n",
+      "INFO:leann.api:    1. passage_id='0' -> SUCCESS: C# is a powerful programming language and it is good at game development...\n",
+      "INFO:leann.api:    2. passage_id='1' -> SUCCESS: Python is a powerful programming language and it is good at machine learning tasks...\n",
+      "INFO:leann.api:  Final enriched results: 2 passages\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "[SearchResult(id='0', score=np.float32(0.9874103), text='C# is a powerful programming language and it is good at game development', metadata={}),\n",
+       " SearchResult(id='1', score=np.float32(0.8922168), text='Python is a powerful programming language and it is good at machine learning tasks', metadata={})]"
+      ]
+     },
+     "execution_count": 2,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "from leann.api import LeannSearcher\n",
    "\n",
-    "# 2. Search with real-time embeddings\n",
    "searcher = LeannSearcher(\"knowledge.leann\")\n",
    "results = searcher.search(\"programming languages\", top_k=2)\n",
+    "results"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Chat with LEANN using retrieved results"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:leann.chat:Attempting to create LLM of type='hf' with model='Qwen/Qwen3-0.6B'\n",
+      "INFO:leann.chat:Initializing HFChat with model='Qwen/Qwen3-0.6B'\n",
+      "INFO:leann.chat:MPS is available. Using Apple Silicon GPU.\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[read_HNSW - CSR NL v4] Reading metadata & CSR indices (manual offset)...\n",
+      "[read_HNSW NL v4] Read levels vector, size: 5\n",
+      "[read_HNSW NL v4] Reading Compact Storage format indices...\n",
+      "[read_HNSW NL v4] Read compact_level_ptr, size: 10\n",
+      "[read_HNSW NL v4] Read compact_node_offsets, size: 6\n",
+      "[read_HNSW NL v4] Read entry_point: 4, max_level: 0\n",
+      "[read_HNSW NL v4] Read storage fourcc: 0x6c6c756e\n",
+      "[read_HNSW NL v4 FIX] Detected FileIOReader. Neighbors size field offset: 326\n",
+      "[read_HNSW NL v4] Reading neighbors data into memory.\n",
+      "[read_HNSW NL v4] Read neighbors data, size: 20\n",
+      "[read_HNSW NL v4] Finished reading metadata and CSR indices.\n",
+      "INFO: Skipping external storage loading, since is_recompute is true.\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:leann.api:🔍 LeannSearcher.search() called:\n",
+      "INFO:leann.api:  Query: 'Compare the two retrieved programming languages and tell me their advantages.'\n",
+      "INFO:leann.api:  Top_k: 2\n",
+      "INFO:leann.api:  Additional kwargs: {}\n",
+      "INFO:leann.embedding_server_manager:Port 5557 has incompatible server, trying next port...\n",
+      "INFO:leann.embedding_server_manager:Port 5558 has incompatible server, trying next port...\n",
+      "INFO:leann.embedding_server_manager:Port 5559 has incompatible server, trying next port...\n",
+      "INFO:leann.embedding_server_manager:Found compatible server on port 5560\n",
+      "INFO:leann.embedding_server_manager:Using existing compatible server on port 5560\n",
+      "INFO:leann.api:  Launching server time: 0.04932403564453125 seconds\n",
+      "INFO:leann.embedding_server_manager:Found compatible server on port 5560\n",
+      "INFO:leann.embedding_server_manager:Using existing compatible server on port 5560\n",
+      "INFO:leann.api:  Generated embedding shape: (1, 768)\n",
+      "INFO:leann.api:  Embedding time: 0.06902289390563965 seconds\n",
+      "INFO:leann.api:  Search time: 0.026793241500854492 seconds\n",
+      "INFO:leann.api:  Backend returned: labels=2 results\n",
+      "INFO:leann.api:  Processing 2 passage IDs:\n",
+      "INFO:leann.api:    1. passage_id='0' -> SUCCESS: C# is a powerful programming language and it is good at game development...\n",
+      "INFO:leann.api:    2. passage_id='1' -> SUCCESS: Python is a powerful programming language and it is good at machine learning tasks...\n",
+      "INFO:leann.api:  Final enriched results: 2 passages\n",
+      "INFO:leann.chat:Generating with HuggingFace model, config: {'max_new_tokens': 128, 'temperature': 0.7, 'top_p': 0.9, 'do_sample': True, 'pad_token_id': 151645, 'eos_token_id': 151645}\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "ZmqDistanceComputer initialized: d=768, metric=0\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "\"<think>\\n\\n</think>\\n\\nBased on the context provided, here's a comparison of the two retrieved programming languages:\\n\\n**C#** is known for being a powerful programming language and is well-suited for game development. It is often used in game development and is popular among developers working on Windows applications.\\n\\n**Python**, on the other hand, is also a powerful language and is well-suited for machine learning tasks. It is widely used for data analysis, scientific computing, and other applications that require handling large datasets or performing complex calculations.\\n\\n**Advantages**:\\n- C#: Strong for game development and cross-platform compatibility.\\n- Python: Strong for\""
+      ]
+     },
+     "execution_count": 8,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "from leann.api import LeannChat\n",
    "\n",
-    "# 3. Chat with LEANN using retrieved results\n",
    "llm_config = {\n",
-    "    \"type\": \"ollama\",\n",
-    "    \"model\": \"llama3.2:1b\"\n",
+    "    \"type\": \"hf\",\n",
+    "    \"model\": \"Qwen/Qwen3-0.6B\",\n",
    "}\n",
    "\n",
    "chat = LeannChat(index_path=\"knowledge.leann\", llm_config=llm_config)\n",
    "response = chat.ask(\n",
-    "    \"Compare the two retrieved programming languages and say which one is more popular today.\",\n",
+    "    \"Compare the two retrieved programming languages and tell me their advantages.\",\n",
    "    top_k=2,\n",
-    ")"
+    "    llm_kwargs={\"max_tokens\": 128}\n",
+    ")\n",
+    "response"
   ]
  }
 ],
--- a/docs/RELEASE.md
+++ b/docs/RELEASE.md
@@ -0,0 +1,100 @@
+# Release Guide
+
+## 📋 Prerequisites
+
+Before releasing, ensure:
+1. ✅ All code changes are committed and pushed
+2. ✅ CI has passed on the latest commit (check [Actions](https://github.com/yichuan-w/LEANN/actions/workflows/ci.yml))
+3. ✅ You have determined the new version number
+
+### Required: PyPI Configuration
+
+To enable PyPI publishing:
+1. Get a PyPI API token from https://pypi.org/manage/account/token/
+2. Add it to repository secrets: Settings → Secrets → Actions → New repository secret
+   - Name: `PYPI_API_TOKEN`
+   - Value: Your PyPI token (starts with `pypi-`)
+
+### Optional: TestPyPI Configuration
+
+To enable TestPyPI testing (recommended but not required):
+1. Get a TestPyPI API token from https://test.pypi.org/manage/account/token/
+2. Add it to repository secrets: Settings → Secrets → Actions → New repository secret
+   - Name: `TEST_PYPI_API_TOKEN`
+   - Value: Your TestPyPI token (starts with `pypi-`)
+
+**Note**: TestPyPI testing is optional. If not configured, the release will skip TestPyPI and proceed.
+
+## 🚀 Recommended: Manual Release Workflow
+
+### Via GitHub UI (Most Reliable)
+
+1. **Verify CI Status**: Check that the latest commit has a green checkmark ✅
+2. Go to [Actions → Manual Release](https://github.com/yichuan-w/LEANN/actions/workflows/release-manual.yml)
+3. Click "Run workflow"
+4. Enter version (e.g., `0.1.1`)
+5. Toggle "Test on TestPyPI first" if desired
+6. Click "Run workflow"
+
+**What happens:**
+- ✅ Downloads pre-built packages from CI (no rebuild needed!)
+- ✅ Updates all package versions
+- ✅ Optionally tests on TestPyPI
+- ✅ **Publishes directly to PyPI**
+- ✅ Creates tag and GitHub release
+
+### Via Command Line
+
+```bash
+gh workflow run release-manual.yml -f version=0.1.1 -f test_pypi=true
+```
+
+## ⚡ Quick Release (One-Line)
+
+For experienced users who want the fastest path:
+
+```bash
+./scripts/release.sh 0.1.1
+```
+
+This script will:
+1. Update all package versions
+2. Commit and push changes
+3. Create GitHub release
+4. **Manual Release workflow will automatically publish to PyPI**
+
+⚠️ **Note**: If CI fails, you'll need to manually fix and re-tag
+
+## Manual Testing Before Release
+
+For testing specific packages locally (especially DiskANN on macOS):
+
+```bash
+# Build specific package locally
+./scripts/build_and_test.sh diskann  # or hnsw, core, meta, all
+
+# Test installation in a clean environment
+python -m venv test_env
+source test_env/bin/activate
+pip install packages/*/dist/*.whl
+
+# Upload to Test PyPI (optional)
+./scripts/upload_to_pypi.sh test
+
+# Upload to Production PyPI (use with caution)
+./scripts/upload_to_pypi.sh prod
+```
+
+## First-time setup
+
+1. Install GitHub CLI:
+   ```bash
+   brew install gh
+   gh auth login
+   ```
+
+2. Set PyPI token in GitHub:
+   ```bash
+   gh secret set PYPI_API_TOKEN
+   # Paste your PyPI token when prompted
+   ``` 
--- a/examples/mail_reader_leann.py
+++ b/examples/mail_reader_leann.py
@@ -22,7 +22,7 @@ def get_mail_path():
    return os.path.join(home_dir, "Library", "Mail")

 # Default mail path for macOS
-# DEFAULT_MAIL_PATH = "/Users/yichuan/Library/Mail/V10/0FCA0879-FD8C-4B7E-83BF-FDDA930791C5/[Gmail].mbox/All Mail.mbox/78BA5BE1-8819-4F9A-9613-EB63772F1DD0/Data"
+DEFAULT_MAIL_PATH = "/Users/yichuan/Library/Mail/V10/0FCA0879-FD8C-4B7E-83BF-FDDA930791C5/[Gmail].mbox/All Mail.mbox/78BA5BE1-8819-4F9A-9613-EB63772F1DD0/Data"

 def create_leann_index_from_multiple_sources(messages_dirs: List[Path], index_path: str = "mail_index.leann", max_count: int = -1, include_html: bool = False, embedding_model: str = "facebook/contriever"):
    """
@@ -77,7 +77,7 @@ def create_leann_index_from_multiple_sources(messages_dirs: List[Path], index_pa
        print(f"\nTotal loaded {len(all_documents)} email documents from {len(messages_dirs)} directories and starting to split them into chunks")
        
        # Create text splitter with 256 chunk size
-        text_splitter = SentenceSplitter(chunk_size=256, chunk_overlap=128)
+        text_splitter = SentenceSplitter(chunk_size=256, chunk_overlap=25)
        
        # Convert Documents to text strings and chunk them
        all_texts = []
@@ -158,7 +158,7 @@ def create_leann_index(mail_path: str, index_path: str = "mail_index.leann", max
        print(f"Loaded {len(documents)} email documents")
        
        # Create text splitter with 256 chunk size
-        text_splitter = SentenceSplitter(chunk_size=256, chunk_overlap=25)
+        text_splitter = SentenceSplitter(chunk_size=256, chunk_overlap=128)
        
        # Convert Documents to text strings and chunk them
        all_texts = []
@@ -218,11 +218,10 @@ async def query_leann_index(index_path: str, query: str):
    start_time = time.time()
    chat_response = chat.ask(
        query, 
-        top_k=10, 
+        top_k=20, 
        recompute_beighbor_embeddings=True,
-        complexity=12,
+        complexity=32,
        beam_width=1,
-        
    )
    end_time = time.time()
    print(f"Time taken: {end_time - start_time} seconds")
@@ -233,7 +232,7 @@ async def main():
    parser = argparse.ArgumentParser(description='LEANN Mail Reader - Create and query email index')
    # Remove --mail-path argument and auto-detect all Messages directories
    # Remove DEFAULT_MAIL_PATH
-    parser.add_argument('--index-dir', type=str, default="./mail_index_leann_debug",
+    parser.add_argument('--index-dir', type=str, default="./mail_index_index_file",
                       help='Directory to store the LEANN index (default: ./mail_index_leann_raw_text_all_dicts)')
    parser.add_argument('--max-emails', type=int, default=1000,
                       help='Maximum number of emails to process (-1 means all)')
@@ -253,6 +252,9 @@ async def main():
    mail_path = get_mail_path()
    print(f"Searching for email data in: {mail_path}")
    messages_dirs = find_all_messages_directories(mail_path)
+    # messages_dirs = find_all_messages_directories(DEFAULT_MAIL_PATH)
+    # messages_dirs = [DEFAULT_MAIL_PATH]
+    # messages_dirs = messages_dirs[:1]
    
    print('len(messages_dirs): ', len(messages_dirs))
    
--- a/examples/wechat_history_reader_leann.py
+++ b/examples/wechat_history_reader_leann.py
@@ -78,7 +78,7 @@ def create_leann_index_from_multiple_wechat_exports(
        )

        # Create text splitter with 256 chunk size
-        text_splitter = SentenceSplitter(chunk_size=256, chunk_overlap=128)
+        text_splitter = SentenceSplitter(chunk_size=192, chunk_overlap=64)

        # Convert Documents to text strings and chunk them
        all_texts = []
--- a/packages/leann-backend-diskann/pyproject.toml
+++ b/packages/leann-backend-diskann/pyproject.toml
@@ -4,8 +4,8 @@ build-backend = "scikit_build_core.build"

 [project]
 name = "leann-backend-diskann"
-version = "0.1.0"
-dependencies = ["leann-core==0.1.0", "numpy"]
+version = "0.1.2"
+dependencies = ["leann-core==0.1.2", "numpy"]

 [tool.scikit-build]
 # Key: simplified CMake path
--- a/packages/leann-backend-diskann/third_party/DiskANN
+++ b/packages/leann-backend-diskann/third_party/DiskANN
--- a/packages/leann-backend-hnsw/pyproject.toml
+++ b/packages/leann-backend-hnsw/pyproject.toml
@@ -6,9 +6,14 @@ build-backend = "scikit_build_core.build"

 [project]
 name = "leann-backend-hnsw"
-version = "0.1.0"
+version = "0.1.2"
 description = "Custom-built HNSW (Faiss) backend for the Leann toolkit."
-dependencies = ["leann-core==0.1.0", "numpy"]
+dependencies = [
+    "leann-core==0.1.2", 
+    "numpy",
+    "pyzmq>=23.0.0",
+    "msgpack>=1.0.0",
+]

 [tool.scikit-build]
 wheel.packages = ["leann_backend_hnsw"]
--- a/packages/leann-core/pyproject.toml
+++ b/packages/leann-core/pyproject.toml
@@ -4,15 +4,23 @@ build-backend = "setuptools.build_meta"

 [project]
 name = "leann-core"
-version = "0.1.0"
-description = "Core API and plugin system for Leann."
+version = "0.1.2"
+description = "Core API and plugin system for LEANN"
 readme = "README.md"
 requires-python = ">=3.9"
 license = { text = "MIT" }

+# All required dependencies included
 dependencies = [
    "numpy>=1.20.0",
-    "tqdm>=4.60.0"
+    "tqdm>=4.60.0",
+    "psutil>=5.8.0",
+    "pyzmq>=23.0.0",
+    "msgpack>=1.0.0",
+    "torch>=2.0.0",
+    "sentence-transformers>=2.2.0",
+    "llama-index-core>=0.12.0",
+    "python-dotenv>=1.0.0",
 ]

 [project.scripts]
--- a/packages/leann-core/src/leann/api.py
+++ b/packages/leann-core/src/leann/api.py
@@ -142,7 +142,7 @@ class LeannBuilder:
    def __init__(
        self,
        backend_name: str,
-        embedding_model: str = "facebook/contriever-msmarco",
+        embedding_model: str = "facebook/contriever",
        dimensions: Optional[int] = None,
        embedding_mode: str = "sentence-transformers",
        **backend_kwargs,
--- a/packages/leann-core/src/leann/chat.py
+++ b/packages/leann-core/src/leann/chat.py
@@ -9,6 +9,7 @@ from typing import Dict, Any, Optional, List
 import logging
 import os
 import difflib
+import torch

 # Configure logging
 logging.basicConfig(level=logging.INFO)
@@ -28,6 +29,68 @@ def check_ollama_models() -> List[str]:
        return []


+def check_ollama_model_exists_remotely(model_name: str) -> tuple[bool, list[str]]:
+    """Check if a model exists in Ollama's remote library and return available tags
+    
+    Returns:
+        (model_exists, available_tags): bool and list of matching tags
+    """
+    try:
+        import requests
+        import re
+        
+        # Split model name and tag
+        if ':' in model_name:
+            base_model, requested_tag = model_name.split(':', 1)
+        else:
+            base_model, requested_tag = model_name, None
+        
+        # First check if base model exists in library
+        library_response = requests.get("https://ollama.com/library", timeout=8)
+        if library_response.status_code != 200:
+            return True, []  # Assume exists if can't check
+            
+        # Extract model names from library page
+        models_in_library = re.findall(r'href="/library/([^"]+)"', library_response.text)
+        
+        if base_model not in models_in_library:
+            return False, []  # Base model doesn't exist
+        
+        # If base model exists, get available tags
+        tags_response = requests.get(f"https://ollama.com/library/{base_model}/tags", timeout=8)
+        if tags_response.status_code != 200:
+            return True, []  # Base model exists but can't get tags
+            
+        # Extract tags for this model - be more specific to avoid HTML artifacts
+        tag_pattern = rf'{re.escape(base_model)}:[a-zA-Z0-9\.\-_]+'
+        raw_tags = re.findall(tag_pattern, tags_response.text)
+        
+        # Clean up tags - remove HTML artifacts and duplicates
+        available_tags = []
+        seen = set()
+        for tag in raw_tags:
+            # Skip if it looks like HTML (contains < or >)
+            if '<' in tag or '>' in tag:
+                continue
+            if tag not in seen:
+                seen.add(tag)
+                available_tags.append(tag)
+        
+        # Check if exact model exists
+        if requested_tag is None:
+            # User just requested base model, suggest tags
+            return True, available_tags[:10]  # Return up to 10 tags
+        else:
+            exact_match = model_name in available_tags
+            return exact_match, available_tags[:10]
+            
+    except Exception:
+        pass
+    
+    # If scraping fails, assume model might exist (don't block user)
+    return True, []
+
+
 def search_ollama_models_fuzzy(query: str, available_models: List[str]) -> List[str]:
    """Use intelligent fuzzy search for Ollama models"""
    if not available_models:
@@ -243,24 +306,66 @@ def validate_model_and_suggest(model_name: str, llm_type: str) -> Optional[str]:
    if llm_type == "ollama":
        available_models = check_ollama_models()
        if available_models and model_name not in available_models:
-            # Use intelligent fuzzy search based on locally installed models
-            suggestions = search_ollama_models_fuzzy(model_name, available_models)
-            
            error_msg = f"Model '{model_name}' not found in your local Ollama installation."
-            if suggestions:
-                error_msg += "\n\nDid you mean one of these installed models?\n"
-                for i, suggestion in enumerate(suggestions, 1):
-                    error_msg += f"  {i}. {suggestion}\n"
-            else:
-                error_msg += "\n\nYour installed models:\n"
-                for i, model in enumerate(available_models[:8], 1):
-                    error_msg += f"  {i}. {model}\n"
-                if len(available_models) > 8:
-                    error_msg += f"  ... and {len(available_models) - 8} more\n"
            
-            error_msg += "\nTo list all models: ollama list"
-            error_msg += "\nTo download a new model: ollama pull <model_name>"
-            error_msg += "\nBrowse models: https://ollama.com/library"
+            # Check if the model exists remotely and get available tags
+            model_exists_remotely, available_tags = check_ollama_model_exists_remotely(model_name)
+            
+            if model_exists_remotely and model_name in available_tags:
+                # Exact model exists remotely - suggest pulling it
+                error_msg += f"\n\nTo install the requested model:\n"
+                error_msg += f"  ollama pull {model_name}\n"
+                
+                # Show local alternatives
+                suggestions = search_ollama_models_fuzzy(model_name, available_models)
+                if suggestions:
+                    error_msg += "\nOr use one of these similar installed models:\n"
+                    for i, suggestion in enumerate(suggestions, 1):
+                        error_msg += f"  {i}. {suggestion}\n"
+                        
+            elif model_exists_remotely and available_tags:
+                # Base model exists but requested tag doesn't - suggest correct tags
+                base_model = model_name.split(':')[0]
+                requested_tag = model_name.split(':', 1)[1] if ':' in model_name else None
+                
+                error_msg += f"\n\nModel '{base_model}' exists, but tag '{requested_tag}' is not available."
+                error_msg += f"\n\nAvailable {base_model} models you can install:\n"
+                for i, tag in enumerate(available_tags[:8], 1):
+                    error_msg += f"  {i}. ollama pull {tag}\n"
+                if len(available_tags) > 8:
+                    error_msg += f"  ... and {len(available_tags) - 8} more variants\n"
+                    
+                # Also show local alternatives
+                suggestions = search_ollama_models_fuzzy(model_name, available_models)
+                if suggestions:
+                    error_msg += "\nOr use one of these similar installed models:\n"
+                    for i, suggestion in enumerate(suggestions, 1):
+                        error_msg += f"  {i}. {suggestion}\n"
+                        
+            else:
+                # Model doesn't exist remotely - show fuzzy suggestions
+                suggestions = search_ollama_models_fuzzy(model_name, available_models)
+                error_msg += f"\n\nModel '{model_name}' was not found in Ollama's library."
+                
+                if suggestions:
+                    error_msg += "\n\nDid you mean one of these installed models?\n"
+                    for i, suggestion in enumerate(suggestions, 1):
+                        error_msg += f"  {i}. {suggestion}\n"
+                else:
+                    error_msg += "\n\nYour installed models:\n"
+                    for i, model in enumerate(available_models[:8], 1):
+                        error_msg += f"  {i}. {model}\n"
+                    if len(available_models) > 8:
+                        error_msg += f"  ... and {len(available_models) - 8} more\n"
+            
+            error_msg += "\n\nCommands:"
+            error_msg += "\n  ollama list                    # List installed models"
+            if model_exists_remotely and available_tags:
+                if model_name in available_tags:
+                    error_msg += f"\n  ollama pull {model_name}          # Install requested model"
+                else:
+                    error_msg += f"\n  ollama pull {available_tags[0]}    # Install recommended variant"
+            error_msg += "\n  https://ollama.com/library     # Browse available models"
            return error_msg
            
    elif llm_type == "hf":
@@ -397,7 +502,7 @@ class OllamaChat(LLMInterface):


 class HFChat(LLMInterface):
-    """LLM interface for local Hugging Face Transformers models."""
+    """LLM interface for local Hugging Face Transformers models with proper chat templates."""

    def __init__(self, model_name: str = "deepseek-ai/deepseek-llm-7b-chat"):
        logger.info(f"Initializing HFChat with model='{model_name}'")
@@ -408,7 +513,7 @@ class HFChat(LLMInterface):
            raise ValueError(model_error)
            
        try:
-            from transformers.pipelines import pipeline
+            from transformers import AutoTokenizer, AutoModelForCausalLM
            import torch
        except ImportError:
            raise ImportError(
@@ -417,54 +522,101 @@ class HFChat(LLMInterface):

        # Auto-detect device
        if torch.cuda.is_available():
-            device = "cuda"
+            self.device = "cuda"
            logger.info("CUDA is available. Using GPU.")
        elif hasattr(torch.backends, "mps") and torch.backends.mps.is_available():
-            device = "mps"
+            self.device = "mps"
            logger.info("MPS is available. Using Apple Silicon GPU.")
        else:
-            device = "cpu"
+            self.device = "cpu"
            logger.info("No GPU detected. Using CPU.")

-        self.pipeline = pipeline("text-generation", model=model_name, device=device)
+        # Load tokenizer and model
+        self.tokenizer = AutoTokenizer.from_pretrained(model_name)
+        self.model = AutoModelForCausalLM.from_pretrained(
+            model_name,
+            torch_dtype=torch.float16 if self.device != "cpu" else torch.float32,
+            device_map="auto" if self.device != "cpu" else None,
+            trust_remote_code=True
+        )
+        
+        # Move model to device if not using device_map
+        if self.device != "cpu" and "device_map" not in str(self.model):
+            self.model = self.model.to(self.device)
+        
+        # Set pad token if not present
+        if self.tokenizer.pad_token is None:
+            self.tokenizer.pad_token = self.tokenizer.eos_token

    def ask(self, prompt: str, **kwargs) -> str:
-        # Map OpenAI-style arguments to Hugging Face equivalents
-        if "max_tokens" in kwargs:
-            # Prefer user-provided max_new_tokens if both are present
-            kwargs.setdefault("max_new_tokens", kwargs["max_tokens"])
-            # Remove the unsupported key to avoid errors in Transformers
-            kwargs.pop("max_tokens")
+        print('kwargs in HF: ', kwargs)
+        # Check if this is a Qwen model and add /no_think by default
+        is_qwen_model = "qwen" in self.model.config._name_or_path.lower()
+        
+        # For Qwen models, automatically add /no_think to the prompt
+        if is_qwen_model and "/no_think" not in prompt and "/think" not in prompt:
+            prompt = prompt + " /no_think"
+        
+        # Prepare chat template
+        messages = [{"role": "user", "content": prompt}]
+        
+        # Apply chat template if available
+        if hasattr(self.tokenizer, "apply_chat_template"):
+            try:
+                formatted_prompt = self.tokenizer.apply_chat_template(
+                    messages, 
+                    tokenize=False, 
+                    add_generation_prompt=True
+                )
+            except Exception as e:
+                logger.warning(f"Chat template failed, using raw prompt: {e}")
+                formatted_prompt = prompt
+        else:
+            # Fallback for models without chat template
+            formatted_prompt = prompt

-        # Handle temperature=0 edge-case for greedy decoding
-        if "temperature" in kwargs and kwargs["temperature"] == 0.0:
-            # Remove unsupported zero temperature and use deterministic generation
-            kwargs.pop("temperature")
-            kwargs.setdefault("do_sample", False)
+        # Tokenize input
+        inputs = self.tokenizer(
+            formatted_prompt, 
+            return_tensors="pt", 
+            padding=True,
+            truncation=True,
+            max_length=2048
+        )
+        
+        # Move inputs to device
+        if self.device != "cpu":
+            inputs = {k: v.to(self.device) for k, v in inputs.items()}

-        # Sensible defaults for text generation
-        params = {"max_length": 500, "num_return_sequences": 1, **kwargs}
-        logger.info(f"Generating text with Hugging Face model with params: {params}")
-        results = self.pipeline(prompt, **params)
+        # Set generation parameters
+        generation_config = {
+            "max_new_tokens": kwargs.get("max_tokens", kwargs.get("max_new_tokens", 512)),
+            "temperature": kwargs.get("temperature", 0.7),
+            "top_p": kwargs.get("top_p", 0.9),
+            "do_sample": kwargs.get("temperature", 0.7) > 0,
+            "pad_token_id": self.tokenizer.eos_token_id,
+            "eos_token_id": self.tokenizer.eos_token_id,
+        }
+        
+        # Handle temperature=0 for greedy decoding
+        if generation_config["temperature"] == 0.0:
+            generation_config["do_sample"] = False
+            generation_config.pop("temperature")

-        # Handle different response formats from transformers
-        if isinstance(results, list) and len(results) > 0:
-            generated_text = (
-                results[0].get("generated_text", "")
-                if isinstance(results[0], dict)
-                else str(results[0])
+        logger.info(f"Generating with HuggingFace model, config: {generation_config}")
+        
+        # Generate
+        with torch.no_grad():
+            outputs = self.model.generate(
+                **inputs,
+                **generation_config
            )
-        else:
-            generated_text = str(results)

-        # Extract only the newly generated portion by removing the original prompt
-        if isinstance(generated_text, str) and generated_text.startswith(prompt):
-            response = generated_text[len(prompt) :].strip()
-        else:
-            # Fallback: return the full response if prompt removal fails
-            response = str(generated_text)
-
-        return response
+        # Decode response
+        generated_tokens = outputs[0][inputs["input_ids"].shape[1]:]
+        response = self.tokenizer.decode(generated_tokens, skip_special_tokens=True)
+        
+        return response.strip()


 class OpenAIChat(LLMInterface):
--- a/packages/leann-core/src/leann/embedding_compute.py
+++ b/packages/leann-core/src/leann/embedding_compute.py
@@ -101,7 +101,7 @@ def compute_embeddings_sentence_transformers(
        if device == "mps":
            batch_size = 128  # MPS optimal batch size from benchmark
            if model_name == "Qwen/Qwen3-Embedding-0.6B":
-                batch_size = 64
+                batch_size = 32
        elif device == "cuda":
            batch_size = 256  # CUDA optimal batch size
        # Keep original batch_size for CPU
--- a/packages/leann/README.md
+++ b/packages/leann/README.md
@@ -0,0 +1,40 @@
+# LEANN - The smallest vector index in the world
+
+LEANN is a revolutionary vector database that democratizes personal AI. Transform your laptop into a powerful RAG system that can index and search through millions of documents while using **97% less storage** than traditional solutions **without accuracy loss**.
+
+## Installation
+
+```bash
+# Default installation (HNSW backend, recommended)
+uv pip install leann
+
+# With DiskANN backend (for large-scale deployments)
+uv pip install leann[diskann]
+```
+
+## Quick Start
+
+```python
+from leann import LeannBuilder, LeannSearcher, LeannChat
+
+# Build an index
+builder = LeannBuilder(backend_name="hnsw")
+builder.add_text("LEANN saves 97% storage compared to traditional vector databases.")
+builder.build_index("my_index.leann")
+
+# Search
+searcher = LeannSearcher("my_index.leann")
+results = searcher.search("storage savings", top_k=3)
+
+# Chat with your data
+chat = LeannChat("my_index.leann", llm_config={"type": "ollama", "model": "llama3.2:1b"})
+response = chat.ask("How much storage does LEANN save?")
+```
+
+## Documentation
+
+For full documentation, visit [https://leann.readthedocs.io](https://leann.readthedocs.io)
+
+## License
+
+MIT License 
--- a/packages/leann/init.py
+++ b/packages/leann/init.py
@@ -0,0 +1,12 @@
+"""
+LEANN - Low-storage Embedding Approximation for Neural Networks
+
+A revolutionary vector database that democratizes personal AI.
+"""
+
+__version__ = "0.1.0"
+
+# Re-export main API from leann-core
+from leann_core import LeannBuilder, LeannSearcher, LeannChat
+
+__all__ = ["LeannBuilder", "LeannSearcher", "LeannChat"]
--- a/packages/leann/pyproject.toml
+++ b/packages/leann/pyproject.toml
@@ -0,0 +1,42 @@
+[build-system]
+requires = ["setuptools>=61.0"]
+build-backend = "setuptools.build_meta"
+
+[project]
+name = "leann"
+version = "0.1.2"
+description = "LEANN - The smallest vector index in the world. RAG Everything with LEANN!"
+readme = "README.md"
+requires-python = ">=3.9"
+license = { text = "MIT" }
+authors = [
+    { name = "LEANN Team" }
+]
+keywords = ["vector-database", "rag", "embeddings", "search", "ai"]
+classifiers = [
+    "Development Status :: 4 - Beta",
+    "Intended Audience :: Developers",
+    "License :: OSI Approved :: MIT License",
+    "Programming Language :: Python :: 3",
+    "Programming Language :: Python :: 3.9",
+    "Programming Language :: Python :: 3.10",
+    "Programming Language :: Python :: 3.11",
+    "Programming Language :: Python :: 3.12",
+]
+
+# Default installation: core + hnsw
+dependencies = [
+    "leann-core>=0.1.0",
+    "leann-backend-hnsw>=0.1.0",
+]
+
+[project.optional-dependencies]
+diskann = [
+    "leann-backend-diskann>=0.1.0",
+]
+
+[project.urls]
+Homepage = "https://github.com/yourusername/leann"
+Documentation = "https://leann.readthedocs.io"
+Repository = "https://github.com/yourusername/leann"
+Issues = "https://github.com/yourusername/leann/issues" 
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -33,8 +33,8 @@ dependencies = [
    "msgpack>=1.1.1",
    "llama-index-vector-stores-faiss>=0.4.0",
    "llama-index-embeddings-huggingface>=0.5.5",
-    "mlx>=0.26.3",
-    "mlx-lm>=0.26.0",
+    "mlx>=0.26.3; sys_platform == 'darwin'",
+    "mlx-lm>=0.26.0; sys_platform == 'darwin'",
    "psutil>=5.8.0",
 ]

--- a/scripts/build_and_test.sh
+++ b/scripts/build_and_test.sh
@@ -0,0 +1,87 @@
+#!/bin/bash
+
+# Manual build and test script for local testing
+
+PACKAGE=${1:-"all"}  # Default to all packages
+
+echo "Building package: $PACKAGE"
+
+# Ensure we're in a virtual environment
+if [ -z "$VIRTUAL_ENV" ]; then
+    echo "Error: Please activate a virtual environment first"
+    echo "Run: source .venv/bin/activate (or .venv/bin/activate.fish for fish shell)"
+    exit 1
+fi
+
+# Install build tools
+uv pip install build twine delocate auditwheel scikit-build-core cmake pybind11 numpy
+
+build_package() {
+    local package_dir=$1
+    local package_name=$(basename $package_dir)
+    
+    echo "Building $package_name..."
+    cd $package_dir
+    
+    # Clean previous builds
+    rm -rf dist/ build/ _skbuild/
+    
+    # Build directly with pip wheel (avoids sdist issues)
+    pip wheel . --no-deps -w dist
+    
+    # Repair wheel for binary packages
+    if [[ "$package_name" != "leann-core" ]] && [[ "$package_name" != "leann" ]]; then
+        if [[ "$OSTYPE" == "darwin"* ]]; then
+            # For macOS
+            for wheel in dist/*.whl; do
+                if [[ -f "$wheel" ]]; then
+                    delocate-wheel -w dist_repaired -v "$wheel"
+                fi
+            done
+            if [[ -d dist_repaired ]]; then
+                rm -rf dist/*.whl
+                mv dist_repaired/*.whl dist/
+                rmdir dist_repaired
+            fi
+        else
+            # For Linux
+            for wheel in dist/*.whl; do
+                if [[ -f "$wheel" ]]; then
+                    auditwheel repair "$wheel" -w dist_repaired
+                fi
+            done
+            if [[ -d dist_repaired ]]; then
+                rm -rf dist/*.whl
+                mv dist_repaired/*.whl dist/
+                rmdir dist_repaired
+            fi
+        fi
+    fi
+    
+    echo "Built wheels in $package_dir/dist/"
+    ls -la dist/
+    cd - > /dev/null
+}
+
+# Build specific package or all
+if [ "$PACKAGE" == "diskann" ]; then
+    build_package "packages/leann-backend-diskann"
+elif [ "$PACKAGE" == "hnsw" ]; then
+    build_package "packages/leann-backend-hnsw"
+elif [ "$PACKAGE" == "core" ]; then
+    build_package "packages/leann-core"
+elif [ "$PACKAGE" == "meta" ]; then
+    build_package "packages/leann"
+elif [ "$PACKAGE" == "all" ]; then
+    build_package "packages/leann-core"
+    build_package "packages/leann-backend-hnsw"
+    build_package "packages/leann-backend-diskann"
+    build_package "packages/leann"
+else
+    echo "Unknown package: $PACKAGE"
+    echo "Usage: $0 [diskann|hnsw|core|meta|all]"
+    exit 1
+fi
+
+echo -e "\nBuild complete! Test with:"
+echo "uv pip install packages/*/dist/*.whl" 
--- a/scripts/bump_version.sh
+++ b/scripts/bump_version.sh
@@ -0,0 +1,31 @@
+#!/bin/bash
+
+if [ $# -eq 0 ]; then
+    echo "Usage: $0 <new_version>"
+    exit 1
+fi
+
+NEW_VERSION=$1
+
+# Get the directory where the script is located
+SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
+PROJECT_ROOT="$( cd "$SCRIPT_DIR/.." && pwd )"
+
+# Update all pyproject.toml files
+echo "Updating versions in $PROJECT_ROOT/packages/"
+
+# Use different sed syntax for macOS vs Linux
+if [[ "$OSTYPE" == "darwin"* ]]; then
+    # Update version fields
+    find "$PROJECT_ROOT/packages" -name "pyproject.toml" -exec sed -i '' "s/version = \".*\"/version = \"$NEW_VERSION\"/" {} \;
+    # Update leann-core dependencies
+    find "$PROJECT_ROOT/packages" -name "pyproject.toml" -exec sed -i '' "s/leann-core==[0-9.]*/leann-core==$NEW_VERSION/" {} \;
+else
+    # Update version fields
+    find "$PROJECT_ROOT/packages" -name "pyproject.toml" -exec sed -i "s/version = \".*\"/version = \"$NEW_VERSION\"/" {} \;
+    # Update leann-core dependencies
+    find "$PROJECT_ROOT/packages" -name "pyproject.toml" -exec sed -i "s/leann-core==[0-9.]*/leann-core==$NEW_VERSION/" {} \;
+fi
+
+echo "✅ Version updated to $NEW_VERSION"
+echo "✅ Dependencies updated to use leann-core==$NEW_VERSION" 
--- a/scripts/release.sh
+++ b/scripts/release.sh
@@ -0,0 +1,18 @@
+#!/bin/bash
+
+if [ $# -eq 0 ]; then
+    echo "Usage: $0 <version>"
+    echo "Example: $0 0.1.1"
+    exit 1
+fi
+
+VERSION=$1
+
+# Update version
+./scripts/bump_version.sh $VERSION
+
+# Commit and push
+git add . && git commit -m "chore: bump version to $VERSION" && git push
+
+# Create release (triggers CI)
+gh release create v$VERSION --generate-notes 
--- a/scripts/upload_to_pypi.sh
+++ b/scripts/upload_to_pypi.sh
@@ -0,0 +1,30 @@
+#!/bin/bash
+
+# Manual upload script for testing
+
+TARGET=${1:-"test"}  # Default to test pypi
+
+if [ "$TARGET" != "test" ] && [ "$TARGET" != "prod" ]; then
+    echo "Usage: $0 [test|prod]"
+    exit 1
+fi
+
+# Check for built packages
+if ! ls packages/*/dist/*.whl >/dev/null 2>&1; then
+    echo "No built packages found. Run ./scripts/build_and_test.sh first"
+    exit 1
+fi
+
+if [ "$TARGET" == "test" ]; then
+    echo "Uploading to Test PyPI..."
+    twine upload --repository testpypi packages/*/dist/*
+else
+    echo "Uploading to PyPI..."
+    echo "Are you sure? (y/N)"
+    read -r response
+    if [ "$response" == "y" ]; then
+        twine upload packages/*/dist/*
+    else
+        echo "Cancelled"
+    fi
+fi 
--- a/uv.lock
+++ b/uv.lock
Author	SHA1	Message	Date
GitHub Actions	cea1f6f87c	chore: release v0.1.2	2025-07-25 01:53:29 +00:00
Andy Lee	6c0e39372b	fix: download all artifacts in release workflow	2025-07-24 17:45:46 -07:00
Andy Lee	2bec67d2b6	feat: auto-update leann-core dependencies during release - Enhanced bump_version.sh to automatically update leann-core dependency versions - Script now updates both package versions and their leann-core dependencies - This ensures version consistency across all packages during release No more manual dependency version updates needed!	2025-07-24 17:22:41 -07:00
Andy Lee	133e715832	fix: resolve CI issues and consolidate workflows - Fix version dependencies: update backend packages to depend on leann-core==0.1.1 - Remove duplicate ci.yml workflow (keeping build-and-publish.yml as main CI) - Update release-manual.yml to reference correct CI workflow name This fixes the dependency resolution error and eliminates duplicate builds.	2025-07-24 17:20:58 -07:00
Andy Lee	95cf2f16e2	refactor: consolidate release and publish into single workflow - Manual Release workflow now directly publishes to PyPI after downloading CI artifacts - No more duplicate builds - reuses artifacts from CI - build-and-publish.yml renamed to 'CI - Build Multi-Platform Packages' - Publishing in CI workflow only for emergency manual triggers - Updated RELEASE.md to reflect the new streamlined process This fixes the issue where releases would trigger redundant builds.	2025-07-24 17:04:47 -07:00
Andy Lee	47a4c153eb	fix: enable PyPI publish on tag push - Manual Release workflow creates tags but build-and-publish.yml only published on 'release' events - Now build-and-publish.yml will also publish when v* tags are pushed - This fixes the issue where manual releases didn't trigger PyPI uploads	2025-07-24 17:00:21 -07:00
GitHub Actions	faf5ae3533	chore: release v0.1.1	2025-07-24 23:36:23 +00:00
Andy Lee	a44dccecac	fix: make TestPyPI upload optional and non-blocking - Add continue-on-error to TestPyPI step - Check if TEST_PYPI_API_TOKEN exists before attempting upload - Add graceful failure handling with clear messages - Update docs to explain TestPyPI token configuration - Clarify that TestPyPI testing is optional Now the release won't fail if TestPyPI is not configured or upload fails	2025-07-24 16:02:07 -07:00
yichuan520030910320	9cf9358b9c	Merge branch 'main' of https://github.com/yichuan-w/LEANN	2025-07-24 14:40:39 -07:00
yichuan520030910320	de252fef31	[chat] update 30s example	2025-07-24 14:40:33 -07:00
Andy Lee	9076bc27b8	fix: resolve CI run detection issues in release workflow - Add 'actions: read' permission to access workflow runs - Use workflow name instead of filename for gh run list - Look for CI run on HEAD~1 (before version bump commit) - Improve error messages for better debugging Fixes HTTP 403 error when trying to find successful CI runs	2025-07-24 14:27:26 -07:00
Andy Lee	50686c0819	refactor: use CI artifacts in release workflow instead of rebuilding - Download pre-built wheels from successful CI runs - Avoids duplicate builds and ensures consistency - CI artifacts are already tested across all platforms - Faster release process (no build time) - Updates release documentation to reflect new flow This ensures the released packages are exactly what was tested in CI.	2025-07-24 14:24:03 -07:00
Andy Lee	1614203786	fix: make bump_version.sh work on both macOS and Linux - macOS uses sed -i '' while Linux uses sed -i - Add OS detection to use correct syntax - Ensures script works in CI (Linux) and local dev (macOS)	2025-07-24 14:13:31 -07:00
Andy Lee	3d4c75a56c	fix: add missing scripts directory to git - Remove scripts/ from .gitignore - Add build_and_test.sh for local testing - Add bump_version.sh for version updates (used by CI) - Add release.sh and upload_to_pypi.sh for publishing - Fixes CI error: ./scripts/bump_version.sh: No such file or directory	2025-07-24 14:13:14 -07:00
Andy Lee	2684ee71dc	fix: ensure uv build uses correct Python version in CI - Add --python python flag to uv build commands - This ensures wheels are built with the correct Python version (cp313 for Python 3.13, etc) - Fixes issue where Python 3.13 CI was building cp311 wheels - Also adds Python version verification before build	2025-07-24 13:44:02 -07:00
Andy Lee	1d321953ba	ci: update all GitHub Actions to latest versions - Update actions/upload-artifact from v3 to v4 (v3 deprecated April 2024) - Update actions/setup-python from v4 to v5 (latest version) - Add Python 3.12 and 3.13 to CI test matrix - Ensure compatibility with latest Python versions and GitHub Actions	2025-07-24 13:36:21 -07:00
Andy Lee	b3cb251369	ci: add Python 3.12 and 3.13 to test matrix - Add Python 3.12 and 3.13 to CI test matrix - Ensure compatibility with latest Python versions - Python 3.12 is stable, 3.13 was released in Oct 2024	2025-07-24 13:32:29 -07:00
Andy Lee	0a17d2c9d8	feat: implement comprehensive CI/CD pipeline with two-stage release - Add ci.yml for continuous integration on every commit - Test builds on Ubuntu/macOS with Python 3.9/3.10/3.11 - Ensure code quality before any release - Add release-manual.yml for controlled releases - Manual trigger prevents accidental releases - Version validation and tag creation - Optional TestPyPI testing before production - Only creates tag after validation passes - Keep build-and-publish.yml for automated PyPI deployment - Triggered by new tags (separation of concerns) - Handles multi-platform wheel building - Allows retry if PyPI upload fails - Update RELEASE.md with clear prerequisites and workflow This setup ensures: 1. Every commit is tested (CI) 2. Releases are deliberate (manual trigger) 3. Failed CI won't create broken tags 4. PyPI publish can be retried independently	2025-07-24 13:29:21 -07:00
Andy Lee	e3defbca84	fix: add minimal CI dependencies for HNSW and DiskANN backends - HNSW (Ubuntu): add libopenblas-dev for BLAS requirements - DiskANN (Ubuntu): keep MKL, remove redundant pkg-config (HNSW already has it) - DiskANN (macOS): add protobuf for build requirements - Both: ensure patchelf for auditwheel on Linux This avoids OpenBLAS/MKL conflicts by using them in separate jobs	2025-07-24 01:06:57 -07:00
Andy Lee	e407f63977	chore: fix uv build	2025-07-24 00:51:57 -07:00
Andy Lee	7add391b2c	chore: build and package	2025-07-24 00:47:46 -07:00
yichuan520030910320	efd6373b32	[chat] update huggingface chat and make qwen no thinking	2025-07-24 00:11:42 -07:00
yichuan520030910320	d502fa24b0	[installation] update install for linux	2025-07-24 02:17:17 +00:00
yichuan520030910320	258a9a5c7f	[misc]test link again	2025-07-23 18:29:32 -07:00
yichuan520030910320	5d41ac6115	test link	2025-07-23 18:28:22 -07:00
yichuan520030910320	2a0fdb49b8	test link	2025-07-23 18:27:08 -07:00
yichuan520030910320	9d1b7231b6	fix broken link	2025-07-23 18:25:22 -07:00
yichuan520030910320	ed3095b478	fix broken link	2025-07-23 18:24:17 -07:00
yichuan520030910320	88eca75917	fix readme	2025-07-23 18:22:10 -07:00
yichuan520030910320	42de27e16a	Merge branch 'main' of https://github.com/yichuan-w/LEANN	2025-07-23 18:17:19 -07:00
yichuan520030910320	c083bda5b7	fix several bug	2025-07-23 18:17:11 -07:00
Andy Lee	e86da38726	fix: ollama hint for similar models	2025-07-23 15:45:10 -07:00
yichuan520030910320	99076e38bc	update install	2025-07-23 14:55:34 -07:00
yichuan520030910320	9698c1a02c	fix readme	2025-07-23 14:52:01 -07:00
yichuan520030910320	851f0f04c3	fix some para	2025-07-23 01:46:34 -07:00
yichuan520030910320	ae16d9d888	fix readme	2025-07-23 00:44:43 -07:00
yichuan520030910320	6e1af2eb0c	fix readme	2025-07-23 00:43:46 -07:00
yichuan520030910320	7695dd0d50	fix readme	2025-07-23 00:42:17 -07:00
yichuan520030910320	c2065473ad	fix readme	2025-07-23 00:30:42 -07:00
yichuan520030910320	5f3870564d	Merge branch 'main' of https://github.com/yichuan-w/LEANN	2025-07-23 00:09:30 -07:00
yichuan520030910320	c214b2e33e	fix readme	2025-07-23 00:09:24 -07:00
Andy Lee	2420c5fd35	chore: update sentence-transformer to prevent MixIn not found error	2025-07-22 23:27:25 -07:00
yichuan520030910320	f48f526f0a	fix readme	2025-07-22 23:21:15 -07:00
yichuan520030910320	5dd74982ba	fix readme	2025-07-22 23:14:31 -07:00
Andy Lee	e07aaf52a7	docs: align	2025-07-22 22:37:27 -07:00
Andy Lee	30e5f12616	docs: quick start	2025-07-22 22:33:04 -07:00
Andy Lee	594427bf87	docs: demo	2025-07-22 22:32:18 -07:00