diff --git a/.github/workflows/build-and-publish.yml b/.github/workflows/build-and-publish.yml
index d8689ff..bf076d3 100644
--- a/.github/workflows/build-and-publish.yml
+++ b/.github/workflows/build-and-publish.yml
@@ -8,4 +8,4 @@ on:
 
 jobs:
   build:
-    uses: ./.github/workflows/build-reusable.yml 
\ No newline at end of file
+    uses: ./.github/workflows/build-reusable.yml
diff --git a/.github/workflows/build-reusable.yml b/.github/workflows/build-reusable.yml
index 1d762c2..8bb089b 100644
--- a/.github/workflows/build-reusable.yml
+++ b/.github/workflows/build-reusable.yml
@@ -17,23 +17,23 @@ jobs:
       - uses: actions/checkout@v4
         with:
           ref: ${{ inputs.ref }}
-      
+
       - name: Setup Python
         uses: actions/setup-python@v5
         with:
           python-version: '3.11'
-      
+
       - name: Install uv
         uses: astral-sh/setup-uv@v4
-      
+
       - name: Install ruff
         run: |
           uv tool install ruff
-      
+
       - name: Run ruff check
         run: |
           ruff check .
-      
+
       - name: Run ruff format check
         run: |
           ruff format --check .
@@ -65,40 +65,40 @@ jobs:
           - os: macos-latest
             python: '3.13'
     runs-on: ${{ matrix.os }}
-    
+
     steps:
       - uses: actions/checkout@v4
         with:
           ref: ${{ inputs.ref }}
           submodules: recursive
-      
+
       - name: Setup Python
         uses: actions/setup-python@v5
         with:
           python-version: ${{ matrix.python }}
-      
+
       - name: Install uv
         uses: astral-sh/setup-uv@v4
-      
+
       - name: Install system dependencies (Ubuntu)
         if: runner.os == 'Linux'
         run: |
           sudo apt-get update
           sudo apt-get install -y libomp-dev libboost-all-dev protobuf-compiler libzmq3-dev \
             pkg-config libopenblas-dev patchelf libabsl-dev libaio-dev libprotobuf-dev
-          
+
           # Install Intel MKL for DiskANN
           wget -q https://registrationcenter-download.intel.com/akdlm/IRC_NAS/79153e0f-74d7-45af-b8c2-258941adf58a/intel-onemkl-2025.0.0.940.sh
           sudo sh intel-onemkl-2025.0.0.940.sh -a --components intel.oneapi.lin.mkl.devel --action install --eula accept -s
           source /opt/intel/oneapi/setvars.sh
           echo "MKLROOT=/opt/intel/oneapi/mkl/latest" >> $GITHUB_ENV
           echo "LD_LIBRARY_PATH=/opt/intel/oneapi/mkl/latest/lib/intel64:$LD_LIBRARY_PATH" >> $GITHUB_ENV
-      
+
       - name: Install system dependencies (macOS)
         if: runner.os == 'macOS'
         run: |
           brew install llvm libomp boost protobuf zeromq
-      
+
       - name: Install build dependencies
         run: |
           uv pip install --system scikit-build-core numpy swig Cython pybind11
@@ -107,7 +107,7 @@ jobs:
           else
             uv pip install --system delocate
           fi
-      
+
       - name: Build packages
         run: |
           # Build core (platform independent)
@@ -116,7 +116,7 @@ jobs:
             uv build
             cd ../..
           fi
-          
+
           # Build HNSW backend
           cd packages/leann-backend-hnsw
           if [ "${{ matrix.os }}" == "macos-latest" ]; then
@@ -125,7 +125,7 @@ jobs:
             uv build --wheel --python python
           fi
           cd ../..
-          
+
           # Build DiskANN backend
           cd packages/leann-backend-diskann
           if [ "${{ matrix.os }}" == "macos-latest" ]; then
@@ -134,14 +134,14 @@ jobs:
             uv build --wheel --python python
           fi
           cd ../..
-          
+
           # Build meta package (platform independent)
           if [[ "${{ matrix.os }}" == ubuntu-* ]]; then
             cd packages/leann
             uv build
             cd ../..
           fi
-      
+
       - name: Repair wheels (Linux)
         if: runner.os == 'Linux'
         run: |
@@ -153,7 +153,7 @@ jobs:
             mv dist_repaired dist
           fi
           cd ../..
-          
+
           # Repair DiskANN wheel
           cd packages/leann-backend-diskann
           if [ -d dist ]; then
@@ -162,7 +162,7 @@ jobs:
             mv dist_repaired dist
           fi
           cd ../..
-      
+
       - name: Repair wheels (macOS)
         if: runner.os == 'macOS'
         run: |
@@ -174,7 +174,7 @@ jobs:
             mv dist_repaired dist
           fi
           cd ../..
-          
+
           # Repair DiskANN wheel
           cd packages/leann-backend-diskann
           if [ -d dist ]; then
@@ -183,14 +183,14 @@ jobs:
             mv dist_repaired dist
           fi
           cd ../..
-      
+
       - name: List built packages
         run: |
           echo "📦 Built packages:"
           find packages/*/dist -name "*.whl" -o -name "*.tar.gz" | sort
-      
+
       - name: Upload artifacts
         uses: actions/upload-artifact@v4
         with:
           name: packages-${{ matrix.os }}-py${{ matrix.python }}
-          path: packages/*/dist/ 
\ No newline at end of file
+          path: packages/*/dist/
diff --git a/.github/workflows/release-manual.yml b/.github/workflows/release-manual.yml
index f7b7af7..a02be62 100644
--- a/.github/workflows/release-manual.yml
+++ b/.github/workflows/release-manual.yml
@@ -16,10 +16,10 @@ jobs:
       contents: write
     outputs:
       commit-sha: ${{ steps.push.outputs.commit-sha }}
-    
+
     steps:
       - uses: actions/checkout@v4
-      
+
       - name: Validate version
         run: |
           # Remove 'v' prefix if present for validation
@@ -30,7 +30,7 @@ jobs:
             exit 1
           fi
           echo "✅ Version format valid: ${{ inputs.version }}"
-      
+
       - name: Update versions and push
         id: push
         run: |
@@ -38,7 +38,7 @@ jobs:
           CURRENT_VERSION=$(grep "^version" packages/leann-core/pyproject.toml | cut -d'"' -f2)
           echo "Current version: $CURRENT_VERSION"
           echo "Target version: ${{ inputs.version }}"
-          
+
           if [ "$CURRENT_VERSION" = "${{ inputs.version }}" ]; then
             echo "⚠️  Version is already ${{ inputs.version }}, skipping update"
             COMMIT_SHA=$(git rev-parse HEAD)
@@ -52,7 +52,7 @@ jobs:
             COMMIT_SHA=$(git rev-parse HEAD)
             echo "✅ Pushed version update: $COMMIT_SHA"
           fi
-          
+
           echo "commit-sha=$COMMIT_SHA" >> $GITHUB_OUTPUT
 
   build-packages:
@@ -60,7 +60,7 @@ jobs:
     needs: update-version
     uses: ./.github/workflows/build-reusable.yml
     with:
-      ref: 'main' 
+      ref: 'main'
 
   publish:
     name: Publish and Release
@@ -69,26 +69,26 @@ jobs:
     runs-on: ubuntu-latest
     permissions:
       contents: write
-    
+
     steps:
       - uses: actions/checkout@v4
         with:
-          ref: 'main' 
-      
+          ref: 'main'
+
       - name: Download all artifacts
         uses: actions/download-artifact@v4
         with:
           path: dist-artifacts
-      
+
       - name: Collect packages
         run: |
           mkdir -p dist
           find dist-artifacts -name "*.whl" -exec cp {} dist/ \;
           find dist-artifacts -name "*.tar.gz" -exec cp {} dist/ \;
-          
+
           echo "📦 Packages to publish:"
           ls -la dist/
-      
+
       - name: Publish to PyPI
         env:
           TWINE_USERNAME: __token__
@@ -98,12 +98,12 @@ jobs:
             echo "❌ PYPI_API_TOKEN not configured!"
             exit 1
           fi
-          
+
           pip install twine
           twine upload dist/* --skip-existing --verbose
-          
+
           echo "✅ Published to PyPI!"
-      
+
       - name: Create release
         run: |
           # Check if tag already exists
@@ -114,7 +114,7 @@ jobs:
             git push origin "v${{ inputs.version }}"
             echo "✅ Created and pushed tag v${{ inputs.version }}"
           fi
-          
+
           # Check if release already exists
           if gh release view "v${{ inputs.version }}" >/dev/null 2>&1; then
             echo "⚠️  Release v${{ inputs.version }} already exists, skipping release creation"
@@ -126,4 +126,4 @@ jobs:
             echo "✅ Created GitHub release v${{ inputs.version }}"
           fi
         env:
-          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
\ No newline at end of file
+          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
diff --git a/.gitignore b/.gitignore
index 0acf5ce..6b9890d 100755
--- a/.gitignore
+++ b/.gitignore
@@ -9,7 +9,7 @@ demo/indices/
 outputs/
 *.pkl
 *.pdf
-*.idx 
+*.idx
 *.map
 .history/
 lm_eval.egg-info/
@@ -85,4 +85,4 @@ packages/leann-backend-diskann/third_party/DiskANN/_deps/
 *.meta.json
 *.passages.json
 
-batchtest.py
\ No newline at end of file
+batchtest.py
diff --git a/docs/RELEASE.md b/docs/RELEASE.md
index 40da945..8588f45 100644
--- a/docs/RELEASE.md
+++ b/docs/RELEASE.md
@@ -19,4 +19,4 @@ That's it! The workflow will automatically:
 - ✅ Publish to PyPI
 - ✅ Create GitHub tag and release
 
-Check progress: https://github.com/yichuan-w/LEANN/actions 
\ No newline at end of file
+Check progress: https://github.com/yichuan-w/LEANN/actions
diff --git a/docs/contributing.md b/docs/contributing.md
index e8d262c..1cacc26 100644
--- a/docs/contributing.md
+++ b/docs/contributing.md
@@ -8,4 +8,4 @@ We welcome contributions! Leann is built by the community, for the community.
 - 💡 **Feature Requests**: Have an idea? We'd love to hear it!
 - 🔧 **Code Contributions**: PRs welcome for all skill levels
 - 📖 **Documentation**: Help make Leann more accessible
-- 🧪 **Benchmarks**: Share your performance results 
\ No newline at end of file
+- 🧪 **Benchmarks**: Share your performance results
diff --git a/docs/faq.md b/docs/faq.md
index ba06e1a..a2fdd52 100644
--- a/docs/faq.md
+++ b/docs/faq.md
@@ -7,4 +7,4 @@ You can speed up the process by using a lightweight embedding model. Add this to
 ```bash
 --embedding-model sentence-transformers/all-MiniLM-L6-v2
 ```
-**Model sizes:** `all-MiniLM-L6-v2` (30M parameters), `facebook/contriever` (~100M parameters), `Qwen3-0.6B` (600M parameters) 
\ No newline at end of file
+**Model sizes:** `all-MiniLM-L6-v2` (30M parameters), `facebook/contriever` (~100M parameters), `Qwen3-0.6B` (600M parameters)
diff --git a/docs/features.md b/docs/features.md
index a0abf85..875f9cf 100644
--- a/docs/features.md
+++ b/docs/features.md
@@ -19,4 +19,4 @@
 
 - **Simple Python API** - Get started in minutes
 - **Extensible backend system** - Easy to add new algorithms
-- **Comprehensive examples** - From basic usage to production deployment 
\ No newline at end of file
+- **Comprehensive examples** - From basic usage to production deployment
diff --git a/docs/roadmap.md b/docs/roadmap.md
index ac6a839..c9446df 100644
--- a/docs/roadmap.md
+++ b/docs/roadmap.md
@@ -18,4 +18,4 @@
 
 - [ ] Integration with LangChain/LlamaIndex
 - [ ] Visual similarity search
-- [ ] Query rewrtiting, rerank and expansion 
\ No newline at end of file
+- [ ] Query rewrtiting, rerank and expansion
diff --git a/examples/data/PrideandPrejudice.txt b/examples/data/PrideandPrejudice.txt
index ef6d9b9..2712426 100644
--- a/examples/data/PrideandPrejudice.txt
+++ b/examples/data/PrideandPrejudice.txt
@@ -1,5 +1,5 @@
 ﻿The Project Gutenberg eBook of Pride and Prejudice
-    
+
 This ebook is for the use of anyone anywhere in the United States and
 most other parts of the world at no cost and with almost no restrictions
 whatsoever. You may copy it, give it away or re-use it under the terms
@@ -14557,7 +14557,7 @@ her into Derbyshire, had been the means of uniting them.
 *** END OF THE PROJECT GUTENBERG EBOOK PRIDE AND PREJUDICE ***
 
 
-    
+
 
 Updated editions will replace the previous one—the old editions will
 be renamed.
@@ -14662,7 +14662,7 @@ performed, viewed, copied or distributed:
     at www.gutenberg.org. If you
     are not located in the United States, you will have to check the laws
     of the country where you are located before using this eBook.
-  
+
 1.E.2. If an individual Project Gutenberg™ electronic work is
 derived from texts not protected by U.S. copyright law (does not
 contain a notice indicating that it is posted with permission of the
@@ -14724,7 +14724,7 @@ provided that:
         Gutenberg Literary Archive Foundation at the address specified in
         Section 4, “Information about donations to the Project Gutenberg
         Literary Archive Foundation.”
-    
+
     • You provide a full refund of any money paid by a user who notifies
         you in writing (or by e-mail) within 30 days of receipt that s/he
         does not agree to the terms of the full Project Gutenberg™
@@ -14732,15 +14732,15 @@ provided that:
         copies of the works possessed in a physical medium and discontinue
         all use of and all access to other copies of Project Gutenberg™
         works.
-    
+
     • You provide, in accordance with paragraph 1.F.3, a full refund of
         any money paid for a work or a replacement copy, if a defect in the
         electronic work is discovered and reported to you within 90 days of
         receipt of the work.
-    
+
     • You comply with all other terms of this agreement for free
         distribution of Project Gutenberg™ works.
-    
+
 
 1.E.9. If you wish to charge a fee or distribute a Project
 Gutenberg™ electronic work or group of works on different terms than
@@ -14903,5 +14903,3 @@ This website includes information about Project Gutenberg™,
 including how to make donations to the Project Gutenberg Literary
 Archive Foundation, how to help produce our new eBooks, and how to
 subscribe to our email newsletter to hear about new eBooks.
-
-
diff --git a/examples/document_search.py b/examples/document_search.py
index 761c6b3..fdb9167 100644
--- a/examples/document_search.py
+++ b/examples/document_search.py
@@ -27,7 +27,10 @@ def load_sample_documents():
             "title": "Intro to Python",
             "content": "Python is a high-level, interpreted language known for simplicity.",
         },
-        {"title": "ML Basics", "content": "Machine learning builds systems that learn from data."},
+        {
+            "title": "ML Basics",
+            "content": "Machine learning builds systems that learn from data.",
+        },
         {
             "title": "Data Structures",
             "content": "Data structures like arrays, lists, and graphs organize data.",
diff --git a/examples/google_history_reader_leann.py b/examples/google_history_reader_leann.py
index db2a4c2..82d78b1 100644
--- a/examples/google_history_reader_leann.py
+++ b/examples/google_history_reader_leann.py
@@ -21,7 +21,9 @@ DEFAULT_CHROME_PROFILE = os.path.expanduser("~/Library/Application Support/Googl
 
 
 def create_leann_index_from_multiple_chrome_profiles(
-    profile_dirs: list[Path], index_path: str = "chrome_history_index.leann", max_count: int = -1
+    profile_dirs: list[Path],
+    index_path: str = "chrome_history_index.leann",
+    max_count: int = -1,
 ):
     """
     Create LEANN index from multiple Chrome profile data sources.
diff --git a/examples/history_data/wechat_history.py b/examples/history_data/wechat_history.py
index d095cda..4106321 100644
--- a/examples/history_data/wechat_history.py
+++ b/examples/history_data/wechat_history.py
@@ -474,7 +474,8 @@ Messages ({len(messages)} messages, {message_group["total_length"]} chars):
                                 message_group, contact_name
                             )
                             doc = Document(
-                                text=doc_content, metadata={"contact_name": contact_name}
+                                text=doc_content,
+                                metadata={"contact_name": contact_name},
                             )
                             docs.append(doc)
                             count += 1
diff --git a/examples/mail_reader_leann.py b/examples/mail_reader_leann.py
index 4412e00..6aa7536 100644
--- a/examples/mail_reader_leann.py
+++ b/examples/mail_reader_leann.py
@@ -315,7 +315,11 @@ async def main():
 
     # Create or load the LEANN index from all sources
     index_path = create_leann_index_from_multiple_sources(
-        messages_dirs, INDEX_PATH, args.max_emails, args.include_html, args.embedding_model
+        messages_dirs,
+        INDEX_PATH,
+        args.max_emails,
+        args.include_html,
+        args.embedding_model,
     )
 
     if index_path:
diff --git a/examples/mail_reader_llamaindex.py b/examples/mail_reader_llamaindex.py
index b00e19c..cfb6b82 100644
--- a/examples/mail_reader_llamaindex.py
+++ b/examples/mail_reader_llamaindex.py
@@ -92,7 +92,10 @@ def main():
         help="Directory to store the index (default: mail_index_embedded)",
     )
     parser.add_argument(
-        "--max-emails", type=int, default=10000, help="Maximum number of emails to process"
+        "--max-emails",
+        type=int,
+        default=10000,
+        help="Maximum number of emails to process",
     )
     parser.add_argument(
         "--include-html",
@@ -112,7 +115,10 @@ def main():
     else:
         print("Creating new index...")
         index = create_and_save_index(
-            mail_path, save_dir, max_count=args.max_emails, include_html=args.include_html
+            mail_path,
+            save_dir,
+            max_count=args.max_emails,
+            include_html=args.include_html,
         )
     if index:
         queries = [
diff --git a/examples/multi_vector_aggregator.py b/examples/multi_vector_aggregator.py
index b5eb4d8..3ec376a 100644
--- a/examples/multi_vector_aggregator.py
+++ b/examples/multi_vector_aggregator.py
@@ -347,7 +347,9 @@ def demo_aggregation():
         print(f"\n{'=' * 20} {method.upper()} AGGREGATION {'=' * 20}")
 
         aggregator = MultiVectorAggregator(
-            aggregation_method=method, spatial_clustering=True, cluster_distance_threshold=100.0
+            aggregation_method=method,
+            spatial_clustering=True,
+            cluster_distance_threshold=100.0,
         )
 
         aggregated = aggregator.aggregate_results(mock_results, top_k=5)
diff --git a/packages/__init__.py b/packages/__init__.py
index 8b13789..e69de29 100644
--- a/packages/__init__.py
+++ b/packages/__init__.py
@@ -1 +0,0 @@
-
diff --git a/packages/leann-backend-diskann/pyproject.toml b/packages/leann-backend-diskann/pyproject.toml
index f28e605..f8f38bc 100644
--- a/packages/leann-backend-diskann/pyproject.toml
+++ b/packages/leann-backend-diskann/pyproject.toml
@@ -16,4 +16,4 @@ wheel.packages = ["leann_backend_diskann"]
 editable.mode = "redirect"
 cmake.build-type = "Release"
 build.verbose = true
-build.tool-args = ["-j8"]
\ No newline at end of file
+build.tool-args = ["-j8"]
diff --git a/packages/leann-backend-diskann/third_party/embedding.proto b/packages/leann-backend-diskann/third_party/embedding.proto
index 98e7713..8481b39 100644
--- a/packages/leann-backend-diskann/third_party/embedding.proto
+++ b/packages/leann-backend-diskann/third_party/embedding.proto
@@ -2,12 +2,12 @@ syntax = "proto3";
 
 package protoembedding;
 
-message NodeEmbeddingRequest { 
-  repeated uint32 node_ids = 1; 
+message NodeEmbeddingRequest {
+  repeated uint32 node_ids = 1;
 }
 
 message NodeEmbeddingResponse {
   bytes embeddings_data = 1;        // All embedded binary datas
   repeated int32 dimensions = 2;    // Shape [batch_size, embedding_dim]
   repeated uint32 missing_ids = 3;  // Missing node ids
-}
\ No newline at end of file
+}
diff --git a/packages/leann-backend-hnsw/CMakeLists.txt b/packages/leann-backend-hnsw/CMakeLists.txt
index 2b86b0a..b9b1cfb 100644
--- a/packages/leann-backend-hnsw/CMakeLists.txt
+++ b/packages/leann-backend-hnsw/CMakeLists.txt
@@ -52,4 +52,4 @@ set(FAISS_BUILD_AVX512 OFF CACHE BOOL "" FORCE)
 # IMPORTANT: Disable building AVX versions to speed up compilation
 set(FAISS_BUILD_AVX_VERSIONS OFF CACHE BOOL "" FORCE)
 
-add_subdirectory(third_party/faiss)
\ No newline at end of file
+add_subdirectory(third_party/faiss)
diff --git a/packages/leann-backend-hnsw/leann_backend_hnsw/convert_to_csr.py b/packages/leann-backend-hnsw/leann_backend_hnsw/convert_to_csr.py
index 13504a1..1f9d4f1 100644
--- a/packages/leann-backend-hnsw/leann_backend_hnsw/convert_to_csr.py
+++ b/packages/leann-backend-hnsw/leann_backend_hnsw/convert_to_csr.py
@@ -72,7 +72,11 @@ def read_vector_raw(f, element_fmt_char):
 def read_numpy_vector(f, np_dtype, struct_fmt_char):
     """Reads a vector into a NumPy array."""
     count = -1  # Initialize count for robust error handling
-    print(f"  Reading vector (dtype={np_dtype}, fmt='{struct_fmt_char}')... ", end="", flush=True)
+    print(
+        f"  Reading vector (dtype={np_dtype}, fmt='{struct_fmt_char}')... ",
+        end="",
+        flush=True,
+    )
     try:
         count, data_bytes = read_vector_raw(f, struct_fmt_char)
         print(f"Count={count}, Bytes={len(data_bytes)}")
@@ -647,7 +651,10 @@ def convert_hnsw_graph_to_csr(input_filename, output_filename, prune_embeddings=
         print(f"Error: Input file not found: {input_filename}", file=sys.stderr)
         return False
     except MemoryError as e:
-        print(f"\nFatal MemoryError during conversion: {e}. Insufficient RAM.", file=sys.stderr)
+        print(
+            f"\nFatal MemoryError during conversion: {e}. Insufficient RAM.",
+            file=sys.stderr,
+        )
         # Clean up potentially partially written output file?
         try:
             os.remove(output_filename)
diff --git a/packages/leann-backend-hnsw/pyproject.toml b/packages/leann-backend-hnsw/pyproject.toml
index ce3a34c..82a46b8 100644
--- a/packages/leann-backend-hnsw/pyproject.toml
+++ b/packages/leann-backend-hnsw/pyproject.toml
@@ -9,7 +9,7 @@ name = "leann-backend-hnsw"
 version = "0.1.14"
 description = "Custom-built HNSW (Faiss) backend for the Leann toolkit."
 dependencies = [
-    "leann-core==0.1.14", 
+    "leann-core==0.1.14",
     "numpy",
     "pyzmq>=23.0.0",
     "msgpack>=1.0.0",
@@ -24,4 +24,4 @@ build.tool-args = ["-j8"]
 
 # CMake definitions to optimize compilation
 [tool.scikit-build.cmake.define]
-CMAKE_BUILD_PARALLEL_LEVEL = "8"
\ No newline at end of file
+CMAKE_BUILD_PARALLEL_LEVEL = "8"
diff --git a/packages/leann-core/pyproject.toml b/packages/leann-core/pyproject.toml
index 8cf027d..a8a9983 100644
--- a/packages/leann-core/pyproject.toml
+++ b/packages/leann-core/pyproject.toml
@@ -46,4 +46,4 @@ colab = [
 leann = "leann.cli:main"
 
 [tool.setuptools.packages.find]
-where = ["src"]
\ No newline at end of file
+where = ["src"]
diff --git a/packages/leann-core/src/leann/chat.py b/packages/leann-core/src/leann/chat.py
index f5097f6..d97cd74 100644
--- a/packages/leann-core/src/leann/chat.py
+++ b/packages/leann-core/src/leann/chat.py
@@ -245,7 +245,11 @@ def search_hf_models_fuzzy(query: str, limit: int = 10) -> list[str]:
 
         # HF Hub's search is already fuzzy! It handles typos and partial matches
         models = list_models(
-            search=query, filter="text-generation", sort="downloads", direction=-1, limit=limit
+            search=query,
+            filter="text-generation",
+            sort="downloads",
+            direction=-1,
+            limit=limit,
         )
 
         model_names = [model.id if hasattr(model, "id") else str(model) for model in models]
@@ -582,7 +586,11 @@ class HFChat(LLMInterface):
 
         # Tokenize input
         inputs = self.tokenizer(
-            formatted_prompt, return_tensors="pt", padding=True, truncation=True, max_length=2048
+            formatted_prompt,
+            return_tensors="pt",
+            padding=True,
+            truncation=True,
+            max_length=2048,
         )
 
         # Move inputs to device
diff --git a/packages/leann/README.md b/packages/leann/README.md
index a1e831f..0488c3d 100644
--- a/packages/leann/README.md
+++ b/packages/leann/README.md
@@ -37,4 +37,4 @@ For full documentation, visit [https://leann.readthedocs.io](https://leann.readt
 
 ## License
 
-MIT License 
\ No newline at end of file
+MIT License
diff --git a/packages/leann/pyproject.toml b/packages/leann/pyproject.toml
index 15e03a3..a6db993 100644
--- a/packages/leann/pyproject.toml
+++ b/packages/leann/pyproject.toml
@@ -39,4 +39,4 @@ diskann = [
 Homepage = "https://github.com/yourusername/leann"
 Documentation = "https://leann.readthedocs.io"
 Repository = "https://github.com/yourusername/leann"
-Issues = "https://github.com/yourusername/leann/issues" 
\ No newline at end of file
+Issues = "https://github.com/yourusername/leann/issues"
diff --git a/packages/wechat-exporter/main.py b/packages/wechat-exporter/main.py
index 63acdd6..5b0911e 100644
--- a/packages/wechat-exporter/main.py
+++ b/packages/wechat-exporter/main.py
@@ -1,6 +1,6 @@
 import json
 import sqlite3
-import xml.etree.ElementTree as ET
+import xml.etree.ElementTree as ElementTree
 from pathlib import Path
 from typing import Annotated
 
@@ -26,7 +26,7 @@ def get_safe_path(s: str) -> str:
 def process_history(history: str):
     if history.startswith("<?xml") or history.startswith("<msg>"):
         try:
-            root = ET.fromstring(history)
+            root = ElementTree.fromstring(history)
             title = root.find(".//title").text if root.find(".//title") is not None else None
             quoted = (
                 root.find(".//refermsg/content").text
@@ -52,7 +52,8 @@ def get_message(history: dict | str):
 
 def export_chathistory(user_id: str):
     res = requests.get(
-        "http://localhost:48065/wechat/chatlog", params={"userId": user_id, "count": 100000}
+        "http://localhost:48065/wechat/chatlog",
+        params={"userId": user_id, "count": 100000},
     ).json()
     for i in range(len(res["chatLogs"])):
         res["chatLogs"][i]["content"] = process_history(res["chatLogs"][i]["content"])
@@ -116,7 +117,8 @@ def export_sqlite(
     all_users = requests.get("http://localhost:48065/wechat/allcontacts").json()
     for user in tqdm(all_users):
         cursor.execute(
-            "INSERT OR IGNORE INTO users (id, name) VALUES (?, ?)", (user["arg"], user["title"])
+            "INSERT OR IGNORE INTO users (id, name) VALUES (?, ?)",
+            (user["arg"], user["title"]),
         )
         usr_chatlog = export_chathistory(user["arg"])
         for msg in usr_chatlog:
diff --git a/scripts/build_and_test.sh b/scripts/build_and_test.sh
index b1c67c0..63c6394 100755
--- a/scripts/build_and_test.sh
+++ b/scripts/build_and_test.sh
@@ -19,16 +19,16 @@ uv pip install build twine delocate auditwheel scikit-build-core cmake pybind11
 build_package() {
     local package_dir=$1
     local package_name=$(basename $package_dir)
-    
+
     echo "Building $package_name..."
     cd $package_dir
-    
+
     # Clean previous builds
     rm -rf dist/ build/ _skbuild/
-    
+
     # Build directly with pip wheel (avoids sdist issues)
     pip wheel . --no-deps -w dist
-    
+
     # Repair wheel for binary packages
     if [[ "$package_name" != "leann-core" ]] && [[ "$package_name" != "leann" ]]; then
         if [[ "$OSTYPE" == "darwin"* ]]; then
@@ -57,7 +57,7 @@ build_package() {
             fi
         fi
     fi
-    
+
     echo "Built wheels in $package_dir/dist/"
     ls -la dist/
     cd - > /dev/null
@@ -84,4 +84,4 @@ else
 fi
 
 echo -e "\nBuild complete! Test with:"
-echo "uv pip install packages/*/dist/*.whl" 
\ No newline at end of file
+echo "uv pip install packages/*/dist/*.whl"
diff --git a/scripts/bump_version.sh b/scripts/bump_version.sh
index 71043bb..a88bcc1 100755
--- a/scripts/bump_version.sh
+++ b/scripts/bump_version.sh
@@ -28,4 +28,4 @@ else
 fi
 
 echo "✅ Version updated to $NEW_VERSION"
-echo "✅ Dependencies updated to use leann-core==$NEW_VERSION" 
\ No newline at end of file
+echo "✅ Dependencies updated to use leann-core==$NEW_VERSION"
diff --git a/scripts/release.sh b/scripts/release.sh
index 9fa2b1e..d6b9a1e 100755
--- a/scripts/release.sh
+++ b/scripts/release.sh
@@ -15,4 +15,4 @@ VERSION=$1
 git add . && git commit -m "chore: bump version to $VERSION" && git push
 
 # Create release (triggers CI)
-gh release create v$VERSION --generate-notes 
\ No newline at end of file
+gh release create v$VERSION --generate-notes
diff --git a/scripts/upload_to_pypi.sh b/scripts/upload_to_pypi.sh
index 745cc61..80726bf 100755
--- a/scripts/upload_to_pypi.sh
+++ b/scripts/upload_to_pypi.sh
@@ -27,4 +27,4 @@ else
     else
         echo "Cancelled"
     fi
-fi 
\ No newline at end of file
+fi
diff --git a/test/micro_tpt.py b/test/micro_tpt.py
index 15c8ee1..37e8b4b 100644
--- a/test/micro_tpt.py
+++ b/test/micro_tpt.py
@@ -58,7 +58,8 @@ class GraphWrapper:
             self.graph = torch.cuda.CUDAGraph()
             with torch.cuda.graph(self.graph):
                 self.static_output = self.model(
-                    input_ids=self.static_input, attention_mask=self.static_attention_mask
+                    input_ids=self.static_input,
+                    attention_mask=self.static_attention_mask,
                 )
             self.use_cuda_graph = True
         else:
@@ -82,7 +83,10 @@ class GraphWrapper:
     def _warmup(self, num_warmup: int = 3):
         with torch.no_grad():
             for _ in range(num_warmup):
-                self.model(input_ids=self.static_input, attention_mask=self.static_attention_mask)
+                self.model(
+                    input_ids=self.static_input,
+                    attention_mask=self.static_attention_mask,
+                )
 
     def __call__(self, input_ids: torch.Tensor, attention_mask: torch.Tensor) -> torch.Tensor:
         if self.use_cuda_graph:
@@ -261,7 +265,10 @@ class Benchmark:
                                 # print size
                                 print(f"in_features: {in_features}, out_features: {out_features}")
                                 new_module = bnb.nn.Linear8bitLt(
-                                    in_features, out_features, bias=bias, has_fp16_weights=False
+                                    in_features,
+                                    out_features,
+                                    bias=bias,
+                                    has_fp16_weights=False,
                                 )
 
                                 # Copy weights and bias
@@ -350,8 +357,6 @@ class Benchmark:
                 # Try xformers if available (only on CUDA)
                 if torch.cuda.is_available():
                     try:
-                        from xformers.ops import memory_efficient_attention  # noqa: F401
-
                         if hasattr(model, "enable_xformers_memory_efficient_attention"):
                             model.enable_xformers_memory_efficient_attention()
                             print("- Enabled xformers memory efficient attention")
@@ -427,7 +432,11 @@ class Benchmark:
             else "cpu"
         )
         return torch.randint(
-            0, 1000, (batch_size, self.config.seq_length), device=device, dtype=torch.long
+            0,
+            1000,
+            (batch_size, self.config.seq_length),
+            device=device,
+            dtype=torch.long,
         )
 
     def _run_inference(
diff --git a/test/sanity_checks/README.md b/test/sanity_checks/README.md
index d55498c..7456ce9 100644
--- a/test/sanity_checks/README.md
+++ b/test/sanity_checks/README.md
@@ -7,7 +7,7 @@ This directory contains comprehensive sanity checks for the Leann system, ensuri
 ### `test_distance_functions.py`
 Tests all supported distance functions across DiskANN backend:
 - ✅ **MIPS** (Maximum Inner Product Search)
-- ✅ **L2** (Euclidean Distance) 
+- ✅ **L2** (Euclidean Distance)
 - ✅ **Cosine** (Cosine Similarity)
 
 ```bash
@@ -27,7 +27,7 @@ uv run python tests/sanity_checks/test_l2_verification.py
 ### `test_sanity_check.py`
 Comprehensive end-to-end verification including:
 - Distance function testing
-- Embedding model compatibility  
+- Embedding model compatibility
 - Search result correctness validation
 - Backend integration testing
 
@@ -64,7 +64,7 @@ When all tests pass, you should see:
 ```
 📊 测试结果总结:
   mips      : ✅ 通过
-  l2        : ✅ 通过  
+  l2        : ✅ 通过
   cosine    : ✅ 通过
 
 🎉 测试完成!
@@ -98,7 +98,7 @@ pkill -f "embedding_server"
 
 ### Typical Timing (3 documents, consumer hardware):
 - **Index Building**: 2-5 seconds per distance function
-- **Search Query**: 50-200ms 
+- **Search Query**: 50-200ms
 - **Recompute Mode**: 5-15 seconds (higher accuracy)
 
 ### Memory Usage:
@@ -117,4 +117,4 @@ These tests are designed to be run in automated environments:
     uv run python tests/sanity_checks/test_l2_verification.py
 ```
 
-The tests are deterministic and should produce consistent results across different platforms.
\ No newline at end of file
+The tests are deterministic and should produce consistent results across different platforms.
diff --git a/test/sanity_checks/benchmark_embeddings.py b/test/sanity_checks/benchmark_embeddings.py
index 7e4b970..c44610d 100644
--- a/test/sanity_checks/benchmark_embeddings.py
+++ b/test/sanity_checks/benchmark_embeddings.py
@@ -115,7 +115,13 @@ def main():
     # --- Plotting ---
     print("\n--- Generating Plot ---")
     plt.figure(figsize=(10, 6))
-    plt.plot(BATCH_SIZES, results_torch, marker="o", linestyle="-", label=f"PyTorch ({device})")
+    plt.plot(
+        BATCH_SIZES,
+        results_torch,
+        marker="o",
+        linestyle="-",
+        label=f"PyTorch ({device})",
+    )
     plt.plot(BATCH_SIZES, results_mlx, marker="s", linestyle="-", label="MLX")
 
     plt.title(f"Embedding Performance: MLX vs PyTorch\nModel: {MODEL_NAME_TORCH}")
diff --git a/test/simple_mac_tpt_test.py b/test/simple_mac_tpt_test.py
index b1a962c..721de95 100644
--- a/test/simple_mac_tpt_test.py
+++ b/test/simple_mac_tpt_test.py
@@ -170,7 +170,11 @@ class Benchmark:
 
     def _create_random_batch(self, batch_size: int) -> torch.Tensor:
         return torch.randint(
-            0, 1000, (batch_size, self.config.seq_length), device=self.device, dtype=torch.long
+            0,
+            1000,
+            (batch_size, self.config.seq_length),
+            device=self.device,
+            dtype=torch.long,
         )
 
     def _run_inference(self, input_ids: torch.Tensor) -> float:
@@ -256,7 +260,11 @@ def run_mlx_benchmark():
     """Run MLX-specific benchmark"""
     if not MLX_AVAILABLE:
         print("MLX not available, skipping MLX benchmark")
-        return {"max_throughput": 0.0, "avg_throughput": 0.0, "error": "MLX not available"}
+        return {
+            "max_throughput": 0.0,
+            "avg_throughput": 0.0,
+            "error": "MLX not available",
+        }
 
     config = BenchmarkConfig(model_path="mlx-community/all-MiniLM-L6-v2-4bit", use_mlx=True)
 
@@ -265,7 +273,11 @@ def run_mlx_benchmark():
         results = benchmark.run()
 
         if not results:
-            return {"max_throughput": 0.0, "avg_throughput": 0.0, "error": "No valid results"}
+            return {
+                "max_throughput": 0.0,
+                "avg_throughput": 0.0,
+                "error": "No valid results",
+            }
 
         max_throughput = max(results[batch_size]["throughput"] for batch_size in results)
         avg_throughput = np.mean([results[batch_size]["throughput"] for batch_size in results])