fix ruff errors and formatting
This commit is contained in:
@@ -1 +0,0 @@
|
||||
|
||||
|
||||
@@ -16,4 +16,4 @@ wheel.packages = ["leann_backend_diskann"]
|
||||
editable.mode = "redirect"
|
||||
cmake.build-type = "Release"
|
||||
build.verbose = true
|
||||
build.tool-args = ["-j8"]
|
||||
build.tool-args = ["-j8"]
|
||||
|
||||
@@ -2,12 +2,12 @@ syntax = "proto3";
|
||||
|
||||
package protoembedding;
|
||||
|
||||
message NodeEmbeddingRequest {
|
||||
repeated uint32 node_ids = 1;
|
||||
message NodeEmbeddingRequest {
|
||||
repeated uint32 node_ids = 1;
|
||||
}
|
||||
|
||||
message NodeEmbeddingResponse {
|
||||
bytes embeddings_data = 1; // All embedded binary datas
|
||||
repeated int32 dimensions = 2; // Shape [batch_size, embedding_dim]
|
||||
repeated uint32 missing_ids = 3; // Missing node ids
|
||||
}
|
||||
}
|
||||
|
||||
@@ -52,4 +52,4 @@ set(FAISS_BUILD_AVX512 OFF CACHE BOOL "" FORCE)
|
||||
# IMPORTANT: Disable building AVX versions to speed up compilation
|
||||
set(FAISS_BUILD_AVX_VERSIONS OFF CACHE BOOL "" FORCE)
|
||||
|
||||
add_subdirectory(third_party/faiss)
|
||||
add_subdirectory(third_party/faiss)
|
||||
|
||||
@@ -72,7 +72,11 @@ def read_vector_raw(f, element_fmt_char):
|
||||
def read_numpy_vector(f, np_dtype, struct_fmt_char):
|
||||
"""Reads a vector into a NumPy array."""
|
||||
count = -1 # Initialize count for robust error handling
|
||||
print(f" Reading vector (dtype={np_dtype}, fmt='{struct_fmt_char}')... ", end="", flush=True)
|
||||
print(
|
||||
f" Reading vector (dtype={np_dtype}, fmt='{struct_fmt_char}')... ",
|
||||
end="",
|
||||
flush=True,
|
||||
)
|
||||
try:
|
||||
count, data_bytes = read_vector_raw(f, struct_fmt_char)
|
||||
print(f"Count={count}, Bytes={len(data_bytes)}")
|
||||
@@ -647,7 +651,10 @@ def convert_hnsw_graph_to_csr(input_filename, output_filename, prune_embeddings=
|
||||
print(f"Error: Input file not found: {input_filename}", file=sys.stderr)
|
||||
return False
|
||||
except MemoryError as e:
|
||||
print(f"\nFatal MemoryError during conversion: {e}. Insufficient RAM.", file=sys.stderr)
|
||||
print(
|
||||
f"\nFatal MemoryError during conversion: {e}. Insufficient RAM.",
|
||||
file=sys.stderr,
|
||||
)
|
||||
# Clean up potentially partially written output file?
|
||||
try:
|
||||
os.remove(output_filename)
|
||||
|
||||
@@ -9,7 +9,7 @@ name = "leann-backend-hnsw"
|
||||
version = "0.1.14"
|
||||
description = "Custom-built HNSW (Faiss) backend for the Leann toolkit."
|
||||
dependencies = [
|
||||
"leann-core==0.1.14",
|
||||
"leann-core==0.1.14",
|
||||
"numpy",
|
||||
"pyzmq>=23.0.0",
|
||||
"msgpack>=1.0.0",
|
||||
@@ -24,4 +24,4 @@ build.tool-args = ["-j8"]
|
||||
|
||||
# CMake definitions to optimize compilation
|
||||
[tool.scikit-build.cmake.define]
|
||||
CMAKE_BUILD_PARALLEL_LEVEL = "8"
|
||||
CMAKE_BUILD_PARALLEL_LEVEL = "8"
|
||||
|
||||
@@ -46,4 +46,4 @@ colab = [
|
||||
leann = "leann.cli:main"
|
||||
|
||||
[tool.setuptools.packages.find]
|
||||
where = ["src"]
|
||||
where = ["src"]
|
||||
|
||||
@@ -245,7 +245,11 @@ def search_hf_models_fuzzy(query: str, limit: int = 10) -> list[str]:
|
||||
|
||||
# HF Hub's search is already fuzzy! It handles typos and partial matches
|
||||
models = list_models(
|
||||
search=query, filter="text-generation", sort="downloads", direction=-1, limit=limit
|
||||
search=query,
|
||||
filter="text-generation",
|
||||
sort="downloads",
|
||||
direction=-1,
|
||||
limit=limit,
|
||||
)
|
||||
|
||||
model_names = [model.id if hasattr(model, "id") else str(model) for model in models]
|
||||
@@ -582,7 +586,11 @@ class HFChat(LLMInterface):
|
||||
|
||||
# Tokenize input
|
||||
inputs = self.tokenizer(
|
||||
formatted_prompt, return_tensors="pt", padding=True, truncation=True, max_length=2048
|
||||
formatted_prompt,
|
||||
return_tensors="pt",
|
||||
padding=True,
|
||||
truncation=True,
|
||||
max_length=2048,
|
||||
)
|
||||
|
||||
# Move inputs to device
|
||||
|
||||
@@ -37,4 +37,4 @@ For full documentation, visit [https://leann.readthedocs.io](https://leann.readt
|
||||
|
||||
## License
|
||||
|
||||
MIT License
|
||||
MIT License
|
||||
|
||||
@@ -39,4 +39,4 @@ diskann = [
|
||||
Homepage = "https://github.com/yourusername/leann"
|
||||
Documentation = "https://leann.readthedocs.io"
|
||||
Repository = "https://github.com/yourusername/leann"
|
||||
Issues = "https://github.com/yourusername/leann/issues"
|
||||
Issues = "https://github.com/yourusername/leann/issues"
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
import json
|
||||
import sqlite3
|
||||
import xml.etree.ElementTree as ET
|
||||
import xml.etree.ElementTree as ElementTree
|
||||
from pathlib import Path
|
||||
from typing import Annotated
|
||||
|
||||
@@ -26,7 +26,7 @@ def get_safe_path(s: str) -> str:
|
||||
def process_history(history: str):
|
||||
if history.startswith("<?xml") or history.startswith("<msg>"):
|
||||
try:
|
||||
root = ET.fromstring(history)
|
||||
root = ElementTree.fromstring(history)
|
||||
title = root.find(".//title").text if root.find(".//title") is not None else None
|
||||
quoted = (
|
||||
root.find(".//refermsg/content").text
|
||||
@@ -52,7 +52,8 @@ def get_message(history: dict | str):
|
||||
|
||||
def export_chathistory(user_id: str):
|
||||
res = requests.get(
|
||||
"http://localhost:48065/wechat/chatlog", params={"userId": user_id, "count": 100000}
|
||||
"http://localhost:48065/wechat/chatlog",
|
||||
params={"userId": user_id, "count": 100000},
|
||||
).json()
|
||||
for i in range(len(res["chatLogs"])):
|
||||
res["chatLogs"][i]["content"] = process_history(res["chatLogs"][i]["content"])
|
||||
@@ -116,7 +117,8 @@ def export_sqlite(
|
||||
all_users = requests.get("http://localhost:48065/wechat/allcontacts").json()
|
||||
for user in tqdm(all_users):
|
||||
cursor.execute(
|
||||
"INSERT OR IGNORE INTO users (id, name) VALUES (?, ?)", (user["arg"], user["title"])
|
||||
"INSERT OR IGNORE INTO users (id, name) VALUES (?, ?)",
|
||||
(user["arg"], user["title"]),
|
||||
)
|
||||
usr_chatlog = export_chathistory(user["arg"])
|
||||
for msg in usr_chatlog:
|
||||
|
||||
Reference in New Issue
Block a user