Compare commits

...

7 Commits

Author SHA1 Message Date
yichuan520030910320
6c8801480d fall back to original faiss as i merge the PR 2025-10-30 16:36:14 -07:00
ww2283
d226f72bc0 feat: implement true batch processing for Ollama embeddings
Migrate from deprecated /api/embeddings to modern /api/embed endpoint
which supports batch inputs. This reduces HTTP overhead by sending
32 texts per request instead of making individual API calls.

Changes:
- Update endpoint from /api/embeddings to /api/embed
- Change parameter from 'prompt' (single) to 'input' (array)
- Update response parsing for batch embeddings array
- Increase timeout to 60s for batch processing
- Improve error handling for batch requests

Performance:
- Reduces API calls by 32x (batch size)
- Eliminates HTTP connection overhead per text
- Note: Ollama still processes batch items sequentially internally

Related: #151
2025-10-25 10:58:15 -04:00
ww2283
45b87ce128 Merge upstream/main into feature/add-metadata-output
Resolved conflicts in cli.py by keeping structured metadata approach over
inline text concatenation from PR #149.

Our approach uses separate metadata dictionary which is cleaner and more
maintainable than parsing embedded strings.
2025-10-25 10:53:19 -04:00
ww2283
585ef7785d chore: update faiss submodule to use ww2283 fork
Use ww2283/faiss fork with fix/zmq-linking branch to resolve CI checkout
failures. The ZMQ linking fixes are not yet merged upstream.
2025-10-25 10:44:48 -04:00
ww2283
5073f312b6 style: apply ruff formatting 2025-10-22 20:13:25 -04:00
ww2283
76e16338ca fix: resolve ZMQ linking issues in Python extension
- Use pkg_check_modules IMPORTED_TARGET to create PkgConfig::ZMQ
- Set PKG_CONFIG_PATH to prioritize ARM64 Homebrew on Apple Silicon
- Override macOS -undefined dynamic_lookup to force proper symbol resolution
- Use PUBLIC linkage for ZMQ in faiss library for transitive linking
- Mark cppzmq includes as SYSTEM to suppress warnings

Fixes editable install ZMQ symbol errors while maintaining compatibility
across Linux, macOS Intel, and macOS ARM64 platforms.
2025-10-22 18:53:13 -04:00
ww2283
d6a3c2821c feat: add metadata output to search results
- Add --show-metadata flag to display file paths in search results
- Preserve document metadata (file_path, file_name, timestamps) during chunking
- Update MCP tool schema to support show_metadata parameter
- Enhance CLI search output to display metadata when requested
- Fix pre-existing bug: args.backend -> args.backend_name

Resolves yichuan-w/LEANN#144
2025-10-22 14:10:47 -04:00
4 changed files with 127 additions and 65 deletions

View File

@@ -29,12 +29,25 @@ if(APPLE)
set(CMAKE_OSX_DEPLOYMENT_TARGET "11.0" CACHE STRING "Minimum macOS version") set(CMAKE_OSX_DEPLOYMENT_TARGET "11.0" CACHE STRING "Minimum macOS version")
endif() endif()
# Use system ZeroMQ instead of building from source # Find ZMQ using pkg-config with IMPORTED_TARGET for automatic target creation
find_package(PkgConfig REQUIRED) find_package(PkgConfig REQUIRED)
pkg_check_modules(ZMQ REQUIRED libzmq)
# On ARM64 macOS, ensure pkg-config finds ARM64 Homebrew packages first
if(APPLE AND CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64|arm64")
set(ENV{PKG_CONFIG_PATH} "/opt/homebrew/lib/pkgconfig:/opt/homebrew/share/pkgconfig:$ENV{PKG_CONFIG_PATH}")
endif()
pkg_check_modules(ZMQ REQUIRED IMPORTED_TARGET libzmq)
# This creates PkgConfig::ZMQ target automatically with correct properties
if(TARGET PkgConfig::ZMQ)
message(STATUS "Found and configured ZMQ target: PkgConfig::ZMQ")
else()
message(FATAL_ERROR "pkg_check_modules did not create IMPORTED target for ZMQ.")
endif()
# Add cppzmq headers # Add cppzmq headers
include_directories(third_party/cppzmq) include_directories(SYSTEM third_party/cppzmq)
# Configure msgpack-c - disable boost dependency # Configure msgpack-c - disable boost dependency
set(MSGPACK_USE_BOOST OFF CACHE BOOL "" FORCE) set(MSGPACK_USE_BOOST OFF CACHE BOOL "" FORCE)

View File

@@ -255,6 +255,11 @@ Examples:
action="store_true", action="store_true",
help="Non-interactive mode: automatically select index without prompting", help="Non-interactive mode: automatically select index without prompting",
) )
search_parser.add_argument(
"--show-metadata",
action="store_true",
help="Display file paths and metadata in search results",
)
# Ask command # Ask command
ask_parser = subparsers.add_parser("ask", help="Ask questions") ask_parser = subparsers.add_parser("ask", help="Ask questions")
@@ -1263,7 +1268,7 @@ Examples:
from .chunking_utils import create_text_chunks from .chunking_utils import create_text_chunks
# Use enhanced chunking with AST support # Use enhanced chunking with AST support
all_texts = create_text_chunks( chunk_texts = create_text_chunks(
documents, documents,
chunk_size=self.node_parser.chunk_size, chunk_size=self.node_parser.chunk_size,
chunk_overlap=self.node_parser.chunk_overlap, chunk_overlap=self.node_parser.chunk_overlap,
@@ -1274,6 +1279,14 @@ Examples:
ast_fallback_traditional=getattr(args, "ast_fallback_traditional", True), ast_fallback_traditional=getattr(args, "ast_fallback_traditional", True),
) )
# Note: AST chunking currently returns plain text chunks without metadata
# We preserve basic file info by associating chunks with their source documents
# For better metadata preservation, documents list order should be maintained
for chunk_text in chunk_texts:
# TODO: Enhance create_text_chunks to return metadata alongside text
# For now, we store chunks with empty metadata
all_texts.append({"text": chunk_text, "metadata": {}})
except ImportError as e: except ImportError as e:
print( print(
f"⚠️ AST chunking utilities not available in package ({e}), falling back to traditional chunking" f"⚠️ AST chunking utilities not available in package ({e}), falling back to traditional chunking"
@@ -1285,17 +1298,27 @@ Examples:
for doc in tqdm(documents, desc="Chunking documents", unit="doc"): for doc in tqdm(documents, desc="Chunking documents", unit="doc"):
# Check if this is a code file based on source path # Check if this is a code file based on source path
source_path = doc.metadata.get("source", "") source_path = doc.metadata.get("source", "")
file_path = doc.metadata.get("file_path", "")
is_code_file = any(source_path.endswith(ext) for ext in code_file_exts) is_code_file = any(source_path.endswith(ext) for ext in code_file_exts)
# Extract metadata to preserve with chunks
chunk_metadata = {
"file_path": file_path or source_path,
"file_name": doc.metadata.get("file_name", ""),
}
# Add optional metadata if available
if "creation_date" in doc.metadata:
chunk_metadata["creation_date"] = doc.metadata["creation_date"]
if "last_modified_date" in doc.metadata:
chunk_metadata["last_modified_date"] = doc.metadata["last_modified_date"]
# Use appropriate parser based on file type # Use appropriate parser based on file type
parser = self.code_parser if is_code_file else self.node_parser parser = self.code_parser if is_code_file else self.node_parser
nodes = parser.get_nodes_from_documents([doc]) nodes = parser.get_nodes_from_documents([doc])
for node in nodes: for node in nodes:
text_with_source = ( all_texts.append({"text": node.get_content(), "metadata": chunk_metadata})
"Chunk source:" + source_path + "\n" + node.get_content().replace("\n", " ")
)
all_texts.append(text_with_source)
print(f"Loaded {len(documents)} documents, {len(all_texts)} chunks") print(f"Loaded {len(documents)} documents, {len(all_texts)} chunks")
return all_texts return all_texts
@@ -1370,7 +1393,7 @@ Examples:
index_dir.mkdir(parents=True, exist_ok=True) index_dir.mkdir(parents=True, exist_ok=True)
print(f"Building index '{index_name}' with {args.backend} backend...") print(f"Building index '{index_name}' with {args.backend_name} backend...")
embedding_options: dict[str, Any] = {} embedding_options: dict[str, Any] = {}
if args.embedding_mode == "ollama": if args.embedding_mode == "ollama":
@@ -1382,7 +1405,7 @@ Examples:
embedding_options["api_key"] = resolved_embedding_key embedding_options["api_key"] = resolved_embedding_key
builder = LeannBuilder( builder = LeannBuilder(
backend_name=args.backend, backend_name=args.backend_name,
embedding_model=args.embedding_model, embedding_model=args.embedding_model,
embedding_mode=args.embedding_mode, embedding_mode=args.embedding_mode,
embedding_options=embedding_options or None, embedding_options=embedding_options or None,
@@ -1393,10 +1416,8 @@ Examples:
num_threads=args.num_threads, num_threads=args.num_threads,
) )
for chunk_text_with_source in all_texts: for chunk in all_texts:
chunk_source = chunk_text_with_source.split("\n")[0].split(":")[1] builder.add_text(chunk["text"], metadata=chunk["metadata"])
chunk_text = chunk_text_with_source.split("\n")[1]
builder.add_text(chunk_text, {"source": chunk_source})
builder.build_index(index_path) builder.build_index(index_path)
print(f"Index built at {index_path}") print(f"Index built at {index_path}")
@@ -1517,6 +1538,23 @@ Examples:
print(f"Search results for '{query}' (top {len(results)}):") print(f"Search results for '{query}' (top {len(results)}):")
for i, result in enumerate(results, 1): for i, result in enumerate(results, 1):
print(f"{i}. Score: {result.score:.3f}") print(f"{i}. Score: {result.score:.3f}")
# Display metadata if flag is set
if args.show_metadata and result.metadata:
file_path = result.metadata.get("file_path", "")
if file_path:
print(f" 📄 File: {file_path}")
file_name = result.metadata.get("file_name", "")
if file_name and file_name != file_path:
print(f" 📝 Name: {file_name}")
# Show timestamps if available
if "creation_date" in result.metadata:
print(f" 🕐 Created: {result.metadata['creation_date']}")
if "last_modified_date" in result.metadata:
print(f" 🕑 Modified: {result.metadata['last_modified_date']}")
print(f" {result.text[:200]}...") print(f" {result.text[:200]}...")
print(f" Source: {result.metadata.get('source', '')}") print(f" Source: {result.metadata.get('source', '')}")
print() print()

View File

@@ -574,9 +574,10 @@ def compute_embeddings_ollama(
host: Optional[str] = None, host: Optional[str] = None,
) -> np.ndarray: ) -> np.ndarray:
""" """
Compute embeddings using Ollama API with simplified batch processing. Compute embeddings using Ollama API with true batch processing.
Uses batch size of 32 for MPS/CPU and 128 for CUDA to optimize performance. Uses the /api/embed endpoint which supports batch inputs.
Batch size: 32 for MPS/CPU, 128 for CUDA to optimize performance.
Args: Args:
texts: List of texts to compute embeddings for texts: List of texts to compute embeddings for
@@ -681,11 +682,11 @@ def compute_embeddings_ollama(
logger.info(f"Resolved model name '{model_name}' to '{resolved_model_name}'") logger.info(f"Resolved model name '{model_name}' to '{resolved_model_name}'")
model_name = resolved_model_name model_name = resolved_model_name
# Verify the model supports embeddings by testing it # Verify the model supports embeddings by testing it with /api/embed
try: try:
test_response = requests.post( test_response = requests.post(
f"{resolved_host}/api/embeddings", f"{resolved_host}/api/embed",
json={"model": model_name, "prompt": "test"}, json={"model": model_name, "input": "test"},
timeout=10, timeout=10,
) )
if test_response.status_code != 200: if test_response.status_code != 200:
@@ -717,56 +718,55 @@ def compute_embeddings_ollama(
# If torch is not available, use conservative batch size # If torch is not available, use conservative batch size
batch_size = 32 batch_size = 32
logger.info(f"Using batch size: {batch_size}") logger.info(f"Using batch size: {batch_size} for true batch processing")
def get_batch_embeddings(batch_texts): def get_batch_embeddings(batch_texts):
"""Get embeddings for a batch of texts.""" """Get embeddings for a batch of texts using /api/embed endpoint."""
all_embeddings = [] max_retries = 3
failed_indices = [] retry_count = 0
for i, text in enumerate(batch_texts): # Truncate very long texts to avoid API issues
max_retries = 3 truncated_texts = [text[:8000] if len(text) > 8000 else text for text in batch_texts]
retry_count = 0
# Truncate very long texts to avoid API issues while retry_count < max_retries:
truncated_text = text[:8000] if len(text) > 8000 else text try:
while retry_count < max_retries: # Use /api/embed endpoint with "input" parameter for batch processing
try: response = requests.post(
response = requests.post( f"{resolved_host}/api/embed",
f"{resolved_host}/api/embeddings", json={"model": model_name, "input": truncated_texts},
json={"model": model_name, "prompt": truncated_text}, timeout=60, # Increased timeout for batch processing
timeout=30, )
response.raise_for_status()
result = response.json()
batch_embeddings = result.get("embeddings")
if batch_embeddings is None:
raise ValueError("No embeddings returned from API")
if not isinstance(batch_embeddings, list):
raise ValueError(f"Invalid embeddings format: {type(batch_embeddings)}")
if len(batch_embeddings) != len(batch_texts):
raise ValueError(
f"Mismatch: requested {len(batch_texts)} embeddings, got {len(batch_embeddings)}"
) )
response.raise_for_status()
result = response.json() return batch_embeddings, []
embedding = result.get("embedding")
if embedding is None: except requests.exceptions.Timeout:
raise ValueError(f"No embedding returned for text {i}") retry_count += 1
if retry_count >= max_retries:
logger.warning(f"Timeout for batch after {max_retries} retries")
return None, list(range(len(batch_texts)))
if not isinstance(embedding, list) or len(embedding) == 0: except Exception as e:
raise ValueError(f"Invalid embedding format for text {i}") retry_count += 1
if retry_count >= max_retries:
logger.error(f"Failed to get embeddings for batch: {e}")
return None, list(range(len(batch_texts)))
all_embeddings.append(embedding) return None, list(range(len(batch_texts)))
break
except requests.exceptions.Timeout:
retry_count += 1
if retry_count >= max_retries:
logger.warning(f"Timeout for text {i} after {max_retries} retries")
failed_indices.append(i)
all_embeddings.append(None)
break
except Exception as e:
retry_count += 1
if retry_count >= max_retries:
logger.error(f"Failed to get embedding for text {i}: {e}")
failed_indices.append(i)
all_embeddings.append(None)
break
return all_embeddings, failed_indices
# Process texts in batches # Process texts in batches
all_embeddings = [] all_embeddings = []
@@ -784,7 +784,7 @@ def compute_embeddings_ollama(
num_batches = (len(texts) + batch_size - 1) // batch_size num_batches = (len(texts) + batch_size - 1) // batch_size
if show_progress: if show_progress:
batch_iterator = tqdm(range(num_batches), desc="Computing Ollama embeddings") batch_iterator = tqdm(range(num_batches), desc="Computing Ollama embeddings (batched)")
else: else:
batch_iterator = range(num_batches) batch_iterator = range(num_batches)
@@ -795,10 +795,14 @@ def compute_embeddings_ollama(
batch_embeddings, batch_failed = get_batch_embeddings(batch_texts) batch_embeddings, batch_failed = get_batch_embeddings(batch_texts)
# Adjust failed indices to global indices if batch_embeddings is not None:
global_failed = [start_idx + idx for idx in batch_failed] all_embeddings.extend(batch_embeddings)
all_failed_indices.extend(global_failed) else:
all_embeddings.extend(batch_embeddings) # Entire batch failed, add None placeholders
all_embeddings.extend([None] * len(batch_texts))
# Adjust failed indices to global indices
global_failed = [start_idx + idx for idx in batch_failed]
all_failed_indices.extend(global_failed)
# Handle failed embeddings # Handle failed embeddings
if all_failed_indices: if all_failed_indices:

View File

@@ -60,6 +60,11 @@ def handle_request(request):
"maximum": 128, "maximum": 128,
"description": "Search complexity level. Use 16-32 for fast searches (recommended), 64+ for higher precision when needed.", "description": "Search complexity level. Use 16-32 for fast searches (recommended), 64+ for higher precision when needed.",
}, },
"show_metadata": {
"type": "boolean",
"default": False,
"description": "Include file paths and metadata in search results. Useful for understanding which files contain the results.",
},
}, },
"required": ["index_name", "query"], "required": ["index_name", "query"],
}, },
@@ -104,6 +109,8 @@ def handle_request(request):
f"--complexity={args.get('complexity', 32)}", f"--complexity={args.get('complexity', 32)}",
"--non-interactive", "--non-interactive",
] ]
if args.get("show_metadata", False):
cmd.append("--show-metadata")
result = subprocess.run(cmd, capture_output=True, text=True) result = subprocess.run(cmd, capture_output=True, text=True)
elif tool_name == "leann_list": elif tool_name == "leann_list":