Compare commits
8 Commits
feat/multi
...
feat/claud
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
38ec6aae11 | ||
|
|
1e5d05e36a | ||
|
|
5d21f5bd9d | ||
|
|
42690cb74e | ||
|
|
a2a5b0db1b | ||
|
|
67c5a3e838 | ||
|
|
1071479c05 | ||
|
|
068fcd71cf |
@@ -97,6 +97,7 @@ uv sync
|
|||||||
|
|
||||||
</details>
|
</details>
|
||||||
|
|
||||||
|
|
||||||
## Quick Start
|
## Quick Start
|
||||||
|
|
||||||
Our declarative API makes RAG as easy as writing a config file.
|
Our declarative API makes RAG as easy as writing a config file.
|
||||||
|
|||||||
@@ -86,7 +86,9 @@ Examples:
|
|||||||
|
|
||||||
# Build command
|
# Build command
|
||||||
build_parser = subparsers.add_parser("build", help="Build document index")
|
build_parser = subparsers.add_parser("build", help="Build document index")
|
||||||
build_parser.add_argument("index_name", help="Index name")
|
build_parser.add_argument(
|
||||||
|
"index_name", nargs="?", help="Index name (default: current directory name)"
|
||||||
|
)
|
||||||
build_parser.add_argument(
|
build_parser.add_argument(
|
||||||
"--docs", type=str, default=".", help="Documents directory (default: current directory)"
|
"--docs", type=str, default=".", help="Documents directory (default: current directory)"
|
||||||
)
|
)
|
||||||
@@ -201,6 +203,63 @@ Examples:
|
|||||||
with open(global_registry, "w") as f:
|
with open(global_registry, "w") as f:
|
||||||
json.dump(projects, f, indent=2)
|
json.dump(projects, f, indent=2)
|
||||||
|
|
||||||
|
def _read_gitignore_patterns(self, docs_dir: str) -> list[str]:
|
||||||
|
"""Read .gitignore file and return patterns for exclusion."""
|
||||||
|
gitignore_path = Path(docs_dir) / ".gitignore"
|
||||||
|
patterns = []
|
||||||
|
|
||||||
|
# Add some essential patterns that should always be excluded
|
||||||
|
essential_patterns = [
|
||||||
|
".git",
|
||||||
|
".DS_Store",
|
||||||
|
]
|
||||||
|
patterns.extend(essential_patterns)
|
||||||
|
|
||||||
|
if gitignore_path.exists():
|
||||||
|
try:
|
||||||
|
with open(gitignore_path, encoding="utf-8") as f:
|
||||||
|
for line in f:
|
||||||
|
line = line.strip()
|
||||||
|
# Skip empty lines and comments
|
||||||
|
if line and not line.startswith("#"):
|
||||||
|
# Remove leading slash if present (make it relative)
|
||||||
|
if line.startswith("/"):
|
||||||
|
line = line[1:]
|
||||||
|
patterns.append(line)
|
||||||
|
print(
|
||||||
|
f"📋 Loaded {len(patterns) - len(essential_patterns)} patterns from .gitignore"
|
||||||
|
)
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Warning: Could not read .gitignore: {e}")
|
||||||
|
else:
|
||||||
|
print("📋 No .gitignore found, using minimal exclusion patterns")
|
||||||
|
|
||||||
|
return patterns
|
||||||
|
|
||||||
|
def _should_exclude_file(self, relative_path: Path, exclude_patterns: list[str]) -> bool:
|
||||||
|
"""Check if a file should be excluded based on gitignore-style patterns."""
|
||||||
|
path_str = str(relative_path)
|
||||||
|
|
||||||
|
for pattern in exclude_patterns:
|
||||||
|
# Simple pattern matching (could be enhanced with full gitignore syntax)
|
||||||
|
if pattern.endswith("*"):
|
||||||
|
# Wildcard pattern
|
||||||
|
prefix = pattern[:-1]
|
||||||
|
if path_str.startswith(prefix):
|
||||||
|
return True
|
||||||
|
elif "*" in pattern:
|
||||||
|
# Contains wildcard - simple glob-like matching
|
||||||
|
import fnmatch
|
||||||
|
|
||||||
|
if fnmatch.fnmatch(path_str, pattern):
|
||||||
|
return True
|
||||||
|
else:
|
||||||
|
# Exact match or directory match
|
||||||
|
if path_str == pattern or path_str.startswith(pattern + "/"):
|
||||||
|
return True
|
||||||
|
|
||||||
|
return False
|
||||||
|
|
||||||
def list_indexes(self):
|
def list_indexes(self):
|
||||||
print("Stored LEANN indexes:")
|
print("Stored LEANN indexes:")
|
||||||
|
|
||||||
@@ -282,34 +341,49 @@ Examples:
|
|||||||
if custom_file_types:
|
if custom_file_types:
|
||||||
print(f"Using custom file types: {custom_file_types}")
|
print(f"Using custom file types: {custom_file_types}")
|
||||||
|
|
||||||
# Try to use better PDF parsers first
|
# Read .gitignore patterns first
|
||||||
|
exclude_patterns = self._read_gitignore_patterns(docs_dir)
|
||||||
|
|
||||||
|
# Try to use better PDF parsers first, but only if PDFs are requested
|
||||||
documents = []
|
documents = []
|
||||||
docs_path = Path(docs_dir)
|
docs_path = Path(docs_dir)
|
||||||
|
|
||||||
for file_path in docs_path.rglob("*.pdf"):
|
# Check if we should process PDFs
|
||||||
print(f"Processing PDF: {file_path}")
|
should_process_pdfs = custom_file_types is None or ".pdf" in custom_file_types
|
||||||
|
|
||||||
# Try PyMuPDF first (best quality)
|
if should_process_pdfs:
|
||||||
text = extract_pdf_text_with_pymupdf(str(file_path))
|
for file_path in docs_path.rglob("*.pdf"):
|
||||||
if text is None:
|
# Check if file matches any exclude pattern
|
||||||
# Try pdfplumber
|
relative_path = file_path.relative_to(docs_path)
|
||||||
text = extract_pdf_text_with_pdfplumber(str(file_path))
|
if self._should_exclude_file(relative_path, exclude_patterns):
|
||||||
|
continue
|
||||||
|
|
||||||
if text:
|
print(f"Processing PDF: {file_path}")
|
||||||
# Create a simple document structure
|
|
||||||
from llama_index.core import Document
|
|
||||||
|
|
||||||
doc = Document(text=text, metadata={"source": str(file_path)})
|
# Try PyMuPDF first (best quality)
|
||||||
documents.append(doc)
|
text = extract_pdf_text_with_pymupdf(str(file_path))
|
||||||
else:
|
if text is None:
|
||||||
# Fallback to default reader
|
# Try pdfplumber
|
||||||
print(f"Using default reader for {file_path}")
|
text = extract_pdf_text_with_pdfplumber(str(file_path))
|
||||||
default_docs = SimpleDirectoryReader(
|
|
||||||
str(file_path.parent),
|
if text:
|
||||||
filename_as_id=True,
|
# Create a simple document structure
|
||||||
required_exts=[file_path.suffix],
|
from llama_index.core import Document
|
||||||
).load_data()
|
|
||||||
documents.extend(default_docs)
|
doc = Document(text=text, metadata={"source": str(file_path)})
|
||||||
|
documents.append(doc)
|
||||||
|
else:
|
||||||
|
# Fallback to default reader
|
||||||
|
print(f"Using default reader for {file_path}")
|
||||||
|
try:
|
||||||
|
default_docs = SimpleDirectoryReader(
|
||||||
|
str(file_path.parent),
|
||||||
|
filename_as_id=True,
|
||||||
|
required_exts=[file_path.suffix],
|
||||||
|
).load_data()
|
||||||
|
documents.extend(default_docs)
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Warning: Could not process {file_path}: {e}")
|
||||||
|
|
||||||
# Load other file types with default reader
|
# Load other file types with default reader
|
||||||
if custom_file_types:
|
if custom_file_types:
|
||||||
@@ -380,6 +454,7 @@ Examples:
|
|||||||
recursive=True,
|
recursive=True,
|
||||||
encoding="utf-8",
|
encoding="utf-8",
|
||||||
required_exts=code_extensions,
|
required_exts=code_extensions,
|
||||||
|
exclude=exclude_patterns,
|
||||||
).load_data(show_progress=True)
|
).load_data(show_progress=True)
|
||||||
documents.extend(other_docs)
|
documents.extend(other_docs)
|
||||||
except ValueError as e:
|
except ValueError as e:
|
||||||
@@ -454,7 +529,13 @@ Examples:
|
|||||||
|
|
||||||
async def build_index(self, args):
|
async def build_index(self, args):
|
||||||
docs_dir = args.docs
|
docs_dir = args.docs
|
||||||
index_name = args.index_name
|
# Use current directory name if index_name not provided
|
||||||
|
if args.index_name:
|
||||||
|
index_name = args.index_name
|
||||||
|
else:
|
||||||
|
index_name = Path.cwd().name
|
||||||
|
print(f"Using current directory name as index: '{index_name}'")
|
||||||
|
|
||||||
index_dir = self.indexes_dir / index_name
|
index_dir = self.indexes_dir / index_name
|
||||||
index_path = self.get_index_path(index_name)
|
index_path = self.get_index_path(index_name)
|
||||||
|
|
||||||
|
|||||||
@@ -25,32 +25,61 @@ def handle_request(request):
|
|||||||
"tools": [
|
"tools": [
|
||||||
{
|
{
|
||||||
"name": "leann_search",
|
"name": "leann_search",
|
||||||
"description": "Search LEANN index",
|
"description": """🔍 Search code using natural language - like having a coding assistant who knows your entire codebase!
|
||||||
|
|
||||||
|
🎯 **Perfect for**:
|
||||||
|
- "How does authentication work?" → finds auth-related code
|
||||||
|
- "Error handling patterns" → locates try-catch blocks and error logic
|
||||||
|
- "Database connection setup" → finds DB initialization code
|
||||||
|
- "API endpoint definitions" → locates route handlers
|
||||||
|
- "Configuration management" → finds config files and usage
|
||||||
|
|
||||||
|
💡 **Pro tip**: Use this before making any changes to understand existing patterns and conventions.""",
|
||||||
"inputSchema": {
|
"inputSchema": {
|
||||||
"type": "object",
|
"type": "object",
|
||||||
"properties": {
|
"properties": {
|
||||||
"index_name": {"type": "string"},
|
"index_name": {
|
||||||
"query": {"type": "string"},
|
"type": "string",
|
||||||
"top_k": {"type": "integer", "default": 5},
|
"description": "Name of the LEANN index to search. Use 'leann_list' first to see available indexes.",
|
||||||
|
},
|
||||||
|
"query": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "Search query - can be natural language (e.g., 'how to handle errors') or technical terms (e.g., 'async function definition')",
|
||||||
|
},
|
||||||
|
"top_k": {
|
||||||
|
"type": "integer",
|
||||||
|
"default": 5,
|
||||||
|
"minimum": 1,
|
||||||
|
"maximum": 20,
|
||||||
|
"description": "Number of search results to return. Use 5-10 for focused results, 15-20 for comprehensive exploration.",
|
||||||
|
},
|
||||||
|
"complexity": {
|
||||||
|
"type": "integer",
|
||||||
|
"default": 32,
|
||||||
|
"minimum": 16,
|
||||||
|
"maximum": 128,
|
||||||
|
"description": "Search complexity level. Use 16-32 for fast searches (recommended), 64+ for higher precision when needed.",
|
||||||
|
},
|
||||||
},
|
},
|
||||||
"required": ["index_name", "query"],
|
"required": ["index_name", "query"],
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"name": "leann_ask",
|
"name": "leann_status",
|
||||||
"description": "Ask question using LEANN RAG",
|
"description": "📊 Check the health and stats of your code indexes - like a medical checkup for your codebase knowledge!",
|
||||||
"inputSchema": {
|
"inputSchema": {
|
||||||
"type": "object",
|
"type": "object",
|
||||||
"properties": {
|
"properties": {
|
||||||
"index_name": {"type": "string"},
|
"index_name": {
|
||||||
"question": {"type": "string"},
|
"type": "string",
|
||||||
|
"description": "Optional: Name of specific index to check. If not provided, shows status of all indexes.",
|
||||||
|
}
|
||||||
},
|
},
|
||||||
"required": ["index_name", "question"],
|
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"name": "leann_list",
|
"name": "leann_list",
|
||||||
"description": "List all LEANN indexes",
|
"description": "📋 Show all your indexed codebases - your personal code library! Use this to see what's available for search.",
|
||||||
"inputSchema": {"type": "object", "properties": {}},
|
"inputSchema": {"type": "object", "properties": {}},
|
||||||
},
|
},
|
||||||
]
|
]
|
||||||
@@ -63,19 +92,41 @@ def handle_request(request):
|
|||||||
|
|
||||||
try:
|
try:
|
||||||
if tool_name == "leann_search":
|
if tool_name == "leann_search":
|
||||||
|
# Validate required parameters
|
||||||
|
if not args.get("index_name") or not args.get("query"):
|
||||||
|
return {
|
||||||
|
"jsonrpc": "2.0",
|
||||||
|
"id": request.get("id"),
|
||||||
|
"result": {
|
||||||
|
"content": [
|
||||||
|
{
|
||||||
|
"type": "text",
|
||||||
|
"text": "Error: Both index_name and query are required",
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
# Build simplified command
|
||||||
cmd = [
|
cmd = [
|
||||||
"leann",
|
"leann",
|
||||||
"search",
|
"search",
|
||||||
args["index_name"],
|
args["index_name"],
|
||||||
args["query"],
|
args["query"],
|
||||||
"--recompute-embeddings",
|
|
||||||
f"--top-k={args.get('top_k', 5)}",
|
f"--top-k={args.get('top_k', 5)}",
|
||||||
|
f"--complexity={args.get('complexity', 32)}",
|
||||||
]
|
]
|
||||||
|
|
||||||
result = subprocess.run(cmd, capture_output=True, text=True)
|
result = subprocess.run(cmd, capture_output=True, text=True)
|
||||||
|
|
||||||
elif tool_name == "leann_ask":
|
elif tool_name == "leann_status":
|
||||||
cmd = f'echo "{args["question"]}" | leann ask {args["index_name"]} --recompute-embeddings --llm ollama --model qwen3:8b'
|
if args.get("index_name"):
|
||||||
result = subprocess.run(cmd, shell=True, capture_output=True, text=True)
|
# Check specific index status - for now, we'll use leann list and filter
|
||||||
|
result = subprocess.run(["leann", "list"], capture_output=True, text=True)
|
||||||
|
# We could enhance this to show more detailed status per index
|
||||||
|
else:
|
||||||
|
# Show all indexes status
|
||||||
|
result = subprocess.run(["leann", "list"], capture_output=True, text=True)
|
||||||
|
|
||||||
elif tool_name == "leann_list":
|
elif tool_name == "leann_list":
|
||||||
result = subprocess.run(["leann", "list"], capture_output=True, text=True)
|
result = subprocess.run(["leann", "list"], capture_output=True, text=True)
|
||||||
|
|||||||
Reference in New Issue
Block a user