fix ruff errors and formatting

2025-07-27 02:22:54 -07:00
parent 383c6d8d7e
commit af1790395a
35 changed files with 166 additions and 107 deletions
--- a/packages/init.py
+++ b/packages/init.py
@@ -1 +0,0 @@
-
--- a/packages/leann-backend-diskann/pyproject.toml
+++ b/packages/leann-backend-diskann/pyproject.toml
@@ -16,4 +16,4 @@ wheel.packages = ["leann_backend_diskann"]
 editable.mode = "redirect"
 cmake.build-type = "Release"
 build.verbose = true
-build.tool-args = ["-j8"]
+build.tool-args = ["-j8"]
--- a/packages/leann-backend-diskann/third_party/embedding.proto
+++ b/packages/leann-backend-diskann/third_party/embedding.proto
@@ -2,12 +2,12 @@ syntax = "proto3";

 package protoembedding;

-message NodeEmbeddingRequest { 
-  repeated uint32 node_ids = 1; 
+message NodeEmbeddingRequest {
+  repeated uint32 node_ids = 1;
 }

 message NodeEmbeddingResponse {
  bytes embeddings_data = 1;        // All embedded binary datas
  repeated int32 dimensions = 2;    // Shape [batch_size, embedding_dim]
  repeated uint32 missing_ids = 3;  // Missing node ids
-}
+}
--- a/packages/leann-backend-hnsw/CMakeLists.txt
+++ b/packages/leann-backend-hnsw/CMakeLists.txt
@@ -52,4 +52,4 @@ set(FAISS_BUILD_AVX512 OFF CACHE BOOL "" FORCE)
 # IMPORTANT: Disable building AVX versions to speed up compilation
 set(FAISS_BUILD_AVX_VERSIONS OFF CACHE BOOL "" FORCE)

-add_subdirectory(third_party/faiss)
+add_subdirectory(third_party/faiss)
--- a/packages/leann-backend-hnsw/leann_backend_hnsw/convert_to_csr.py
+++ b/packages/leann-backend-hnsw/leann_backend_hnsw/convert_to_csr.py
@@ -72,7 +72,11 @@ def read_vector_raw(f, element_fmt_char):
 def read_numpy_vector(f, np_dtype, struct_fmt_char):
    """Reads a vector into a NumPy array."""
    count = -1  # Initialize count for robust error handling
-    print(f"  Reading vector (dtype={np_dtype}, fmt='{struct_fmt_char}')... ", end="", flush=True)
+    print(
+        f"  Reading vector (dtype={np_dtype}, fmt='{struct_fmt_char}')... ",
+        end="",
+        flush=True,
+    )
    try:
        count, data_bytes = read_vector_raw(f, struct_fmt_char)
        print(f"Count={count}, Bytes={len(data_bytes)}")
@@ -647,7 +651,10 @@ def convert_hnsw_graph_to_csr(input_filename, output_filename, prune_embeddings=
        print(f"Error: Input file not found: {input_filename}", file=sys.stderr)
        return False
    except MemoryError as e:
-        print(f"\nFatal MemoryError during conversion: {e}. Insufficient RAM.", file=sys.stderr)
+        print(
+            f"\nFatal MemoryError during conversion: {e}. Insufficient RAM.",
+            file=sys.stderr,
+        )
        # Clean up potentially partially written output file?
        try:
            os.remove(output_filename)
--- a/packages/leann-backend-hnsw/pyproject.toml
+++ b/packages/leann-backend-hnsw/pyproject.toml
@@ -9,7 +9,7 @@ name = "leann-backend-hnsw"
 version = "0.1.14"
 description = "Custom-built HNSW (Faiss) backend for the Leann toolkit."
 dependencies = [
-    "leann-core==0.1.14", 
+    "leann-core==0.1.14",
    "numpy",
    "pyzmq>=23.0.0",
    "msgpack>=1.0.0",
@@ -24,4 +24,4 @@ build.tool-args = ["-j8"]

 # CMake definitions to optimize compilation
 [tool.scikit-build.cmake.define]
-CMAKE_BUILD_PARALLEL_LEVEL = "8"
+CMAKE_BUILD_PARALLEL_LEVEL = "8"
--- a/packages/leann-core/pyproject.toml
+++ b/packages/leann-core/pyproject.toml
@@ -46,4 +46,4 @@ colab = [
 leann = "leann.cli:main"

 [tool.setuptools.packages.find]
-where = ["src"]
+where = ["src"]
--- a/packages/leann-core/src/leann/chat.py
+++ b/packages/leann-core/src/leann/chat.py
@@ -245,7 +245,11 @@ def search_hf_models_fuzzy(query: str, limit: int = 10) -> list[str]:

        # HF Hub's search is already fuzzy! It handles typos and partial matches
        models = list_models(
-            search=query, filter="text-generation", sort="downloads", direction=-1, limit=limit
+            search=query,
+            filter="text-generation",
+            sort="downloads",
+            direction=-1,
+            limit=limit,
        )

        model_names = [model.id if hasattr(model, "id") else str(model) for model in models]
@@ -582,7 +586,11 @@ class HFChat(LLMInterface):

        # Tokenize input
        inputs = self.tokenizer(
-            formatted_prompt, return_tensors="pt", padding=True, truncation=True, max_length=2048
+            formatted_prompt,
+            return_tensors="pt",
+            padding=True,
+            truncation=True,
+            max_length=2048,
        )

        # Move inputs to device
--- a/packages/leann/README.md
+++ b/packages/leann/README.md
@@ -37,4 +37,4 @@ For full documentation, visit [https://leann.readthedocs.io](https://leann.readt

 ## License

-MIT License 
+MIT License
--- a/packages/leann/pyproject.toml
+++ b/packages/leann/pyproject.toml
@@ -39,4 +39,4 @@ diskann = [
 Homepage = "https://github.com/yourusername/leann"
 Documentation = "https://leann.readthedocs.io"
 Repository = "https://github.com/yourusername/leann"
-Issues = "https://github.com/yourusername/leann/issues" 
+Issues = "https://github.com/yourusername/leann/issues"
--- a/packages/wechat-exporter/main.py
+++ b/packages/wechat-exporter/main.py
@@ -1,6 +1,6 @@
 import json
 import sqlite3
-import xml.etree.ElementTree as ET
+import xml.etree.ElementTree as ElementTree
 from pathlib import Path
 from typing import Annotated

@@ -26,7 +26,7 @@ def get_safe_path(s: str) -> str:
 def process_history(history: str):
    if history.startswith("<?xml") or history.startswith("<msg>"):
        try:
-            root = ET.fromstring(history)
+            root = ElementTree.fromstring(history)
            title = root.find(".//title").text if root.find(".//title") is not None else None
            quoted = (
                root.find(".//refermsg/content").text
@@ -52,7 +52,8 @@ def get_message(history: dict | str):

 def export_chathistory(user_id: str):
    res = requests.get(
-        "http://localhost:48065/wechat/chatlog", params={"userId": user_id, "count": 100000}
+        "http://localhost:48065/wechat/chatlog",
+        params={"userId": user_id, "count": 100000},
    ).json()
    for i in range(len(res["chatLogs"])):
        res["chatLogs"][i]["content"] = process_history(res["chatLogs"][i]["content"])
@@ -116,7 +117,8 @@ def export_sqlite(
    all_users = requests.get("http://localhost:48065/wechat/allcontacts").json()
    for user in tqdm(all_users):
        cursor.execute(
-            "INSERT OR IGNORE INTO users (id, name) VALUES (?, ?)", (user["arg"], user["title"])
+            "INSERT OR IGNORE INTO users (id, name) VALUES (?, ?)",
+            (user["arg"], user["title"]),
        )
        usr_chatlog = export_chathistory(user["arg"])
        for msg in usr_chatlog: