From 9b7353f33676869a156a46c303110e5cf47e08d4 Mon Sep 17 00:00:00 2001 From: aakash Date: Tue, 11 Nov 2025 05:12:49 -0800 Subject: [PATCH] Fix linting errors in colqwen_rag.py and test_colqwen_reproduction.py - Add noqa comments for E402 errors (imports after sys.path modifications) - Remove unused variable assignment in colqwen_rag.py - Use importlib.util.find_spec for dependency checks instead of unused imports - Fix import ordering in test_colqwen_reproduction.py --- README.md | 2 +- apps/colqwen_rag.py | 18 +++++++++--------- packages/leann-backend-hnsw/third_party/faiss | 2 +- test_colqwen_reproduction.py | 10 ++++++++-- 4 files changed, 19 insertions(+), 13 deletions(-) diff --git a/README.md b/README.md index 841485b..0b0e59f 100755 --- a/README.md +++ b/README.md @@ -24,7 +24,7 @@ LEANN is an innovative vector database that democratizes personal AI. Transform LEANN achieves this through *graph-based selective recomputation* with *high-degree preserving pruning*, computing embeddings on-demand instead of storing them all. [Illustration Fig →](#ļø-architecture--how-it-works) | [Paper →](https://arxiv.org/abs/2506.08276) -**Ready to RAG Everything?** Transform your laptop into a personal AI assistant that can semantic search your **[file system](#-personal-data-manager-process-any-documents-pdf-txt-md)**, **[emails](#-your-personal-email-secretary-rag-on-apple-mail)**, **[browser history](#-time-machine-for-the-web-rag-your-entire-browser-history)**, **[chat history](#-wechat-detective-unlock-your-golden-memories)** ([WeChat](#-wechat-detective-unlock-your-golden-memories), [iMessage](#-imessage-history-your-personal-conversation-archive)), **[agent memory](#-chatgpt-chat-history-your-personal-ai-conversation-archive)** ([ChatGPT](#-chatgpt-chat-history-your-personal-ai-conversation-archive), [Claude](#-claude-chat-history-your-personal-ai-conversation-archive)), **[live data](#mcp-integration-rag-on-live-data-from-any-platform)** ([Slack](#mcp-integration-rag-on-live-data-from-any-platform), [Twitter](#mcp-integration-rag-on-live-data-from-any-platform)), **[codebase](#-claude-code-integration-transform-your-development-workflow)**\* , or external knowledge bases (i.e., 60M documents) - all on your laptop, with zero cloud costs and complete privacy. +**Ready to RAG Everything?** Transform your laptop into a personal AI assistant that can semantic search your **[file system](#-personal-data-manager-process-any-documents-pdf-txt-md)**, **[emails](#-your-personal-email-secretary-rag-on-apple-mail)**, **[browser history](#-time-machine-for-the-web-rag-your-entire-browser-history)**, **[chat history](#-wechat-detective-unlock-your-golden-memories)** ([WeChat](#-wechat-detective-unlock-your-golden-memories), [iMessage](#-imessage-history-your-personal-conversation-archive)), **[agent memory](#-chatgpt-chat-history-your-personal-ai-conversation-archive)** ([ChatGPT](#-chatgpt-chat-history-your-personal-ai-conversation-archive), [Claude](#-claude-chat-history-your-personal-ai-conversation-archive)), **[live data](#mcp-integration-rag-on-live-data-from-any-platform)** ([Slack](#slack-messages-search-your-team-conversations), [Twitter](#-twitter-bookmarks-your-personal-tweet-library)), **[codebase](#-claude-code-integration-transform-your-development-workflow)**\* , or external knowledge bases (i.e., 60M documents) - all on your laptop, with zero cloud costs and complete privacy. \* Claude Code only supports basic `grep`-style keyword search. **LEANN** is a drop-in **semantic search MCP service fully compatible with Claude Code**, unlocking intelligent retrieval without changing your workflow. šŸ”„ Check out [the easy setup →](packages/leann-mcp/README.md) diff --git a/apps/colqwen_rag.py b/apps/colqwen_rag.py index 5c61487..a30058f 100644 --- a/apps/colqwen_rag.py +++ b/apps/colqwen_rag.py @@ -23,17 +23,17 @@ if str(_leann_core_src) not in sys.path: if str(_leann_hnsw_pkg) not in sys.path: sys.path.append(str(_leann_hnsw_pkg)) -import torch -from colpali_engine import ColPali, ColPaliProcessor, ColQwen2, ColQwen2Processor -from colpali_engine.utils.torch_utils import ListDataset -from pdf2image import convert_from_path -from PIL import Image -from torch.utils.data import DataLoader -from tqdm import tqdm +import torch # noqa: E402 +from colpali_engine import ColPali, ColPaliProcessor, ColQwen2, ColQwen2Processor # noqa: E402 +from colpali_engine.utils.torch_utils import ListDataset # noqa: E402 +from pdf2image import convert_from_path # noqa: E402 +from PIL import Image # noqa: E402 +from torch.utils.data import DataLoader # noqa: E402 +from tqdm import tqdm # noqa: E402 # Import the existing multi-vector implementation sys.path.append(str(_repo_root / "apps" / "multimodal" / "vision-based-pdf-multi-vector")) -from leann_multi_vector import LeannMultiVector +from leann_multi_vector import LeannMultiVector # noqa: E402 class ColQwenRAG: @@ -259,7 +259,7 @@ class ColQwenRAG: elif not query: continue - results = self.search(index_name, query, top_k=3) + self.search(index_name, query, top_k=3) # TODO: Add answer generation with Qwen-VL print("\nšŸ’” For detailed answers, we can integrate Qwen-VL here!") diff --git a/packages/leann-backend-hnsw/third_party/faiss b/packages/leann-backend-hnsw/third_party/faiss index e2d243c..5952745 160000 --- a/packages/leann-backend-hnsw/third_party/faiss +++ b/packages/leann-backend-hnsw/third_party/faiss @@ -1 +1 @@ -Subproject commit e2d243c40ddc142b8c57c067c0441694f3c22121 +Subproject commit 595274523790e3bb5991437c3fc6032f170ebad9 diff --git a/test_colqwen_reproduction.py b/test_colqwen_reproduction.py index 2af8d9c..1e38d30 100644 --- a/test_colqwen_reproduction.py +++ b/test_colqwen_reproduction.py @@ -11,6 +11,7 @@ This script demonstrates the ColQwen workflow: 5. Generate similarity maps """ +import importlib.util import os from pathlib import Path @@ -31,9 +32,14 @@ def main(): # Step 1: Check dependencies print("\nšŸ“¦ Checking dependencies...") try: - import pdf2image import torch - from colpali_engine.models import ColQwen2 + + # Check if pdf2image is available + if importlib.util.find_spec("pdf2image") is None: + raise ImportError("pdf2image not found") + # Check if colpali_engine is available + if importlib.util.find_spec("colpali_engine") is None: + raise ImportError("colpali_engine not found") print("āœ… Core dependencies available") print(f" - PyTorch: {torch.__version__}")