Remove Key Features section from Slack RAG examples

- Simplified documentation by removing the bullet point list - Keeps the focus on the actual examples and screenshots
Update Slack RAG documentation with Ollama integration and new screenshots
2025-10-18 23:55:36 -07:00 · 2025-10-18 23:46:57 -07:00 · 2025-10-18 22:25:16 -07:00 · 2025-10-18 16:28:09 -07:00 · 2025-10-18 01:38:16 -07:00 · 2025-10-18 01:25:08 -07:00
11 changed files with 81 additions and 420 deletions
@@ -105,6 +105,3 @@ apps/multimodal/vision-based-pdf-multi-vector/multi-vector-colpali-native-weavia
 # The following line used to force-add a large demo PDF; remove it to satisfy pre-commit:
 # !apps/multimodal/vision-based-pdf-multi-vector/pdfs/2004.12832v2.pdf
 !apps/multimodal/vision-based-pdf-multi-vector/fig/*
-
-# AUR build directory (Arch Linux)
-paru-bin/
@@ -1213,7 +1213,3 @@ This work is done at [**Berkeley Sky Computing Lab**](https://sky.cs.berkeley.ed
 <p align="center">
  Made with ❤️ by the Leann team
 </p>
-
-## 🤖 Explore LEANN with AI
-
-LEANN is indexed on [DeepWiki](https://deepwiki.com/yichuan-w/LEANN), so you can ask questions to LLMs using Deep Research to explore the codebase and get help to add new features.
@@ -180,14 +180,14 @@ class BaseRAGExample(ABC):
        ast_group.add_argument(
            "--ast-chunk-size",
            type=int,
-            default=300,
-            help="Maximum CHARACTERS per AST chunk (default: 300). Final chunks may be larger due to overlap. For 512 token models: recommended 300 chars",
+            default=512,
+            help="Maximum characters per AST chunk (default: 512)",
        )
        ast_group.add_argument(
            "--ast-chunk-overlap",
            type=int,
            default=64,
-            help="Overlap between AST chunks in CHARACTERS (default: 64). Added to chunk size, not included in it",
+            help="Overlap between AST chunks (default: 64)",
        )
        ast_group.add_argument(
            "--code-file-extensions",
@@ -29,25 +29,12 @@ if(APPLE)
    set(CMAKE_OSX_DEPLOYMENT_TARGET "11.0" CACHE STRING "Minimum macOS version")
 endif()

-# Find ZMQ using pkg-config with IMPORTED_TARGET for automatic target creation
+# Use system ZeroMQ instead of building from source
 find_package(PkgConfig REQUIRED)
-
-# On ARM64 macOS, ensure pkg-config finds ARM64 Homebrew packages first
-if(APPLE AND CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64|arm64")
-    set(ENV{PKG_CONFIG_PATH} "/opt/homebrew/lib/pkgconfig:/opt/homebrew/share/pkgconfig:$ENV{PKG_CONFIG_PATH}")
-endif()
-
-pkg_check_modules(ZMQ REQUIRED IMPORTED_TARGET libzmq)
-
-# This creates PkgConfig::ZMQ target automatically with correct properties
-if(TARGET PkgConfig::ZMQ)
-    message(STATUS "Found and configured ZMQ target: PkgConfig::ZMQ")
-else()
-    message(FATAL_ERROR "pkg_check_modules did not create IMPORTED target for ZMQ.")
-endif()
+pkg_check_modules(ZMQ REQUIRED libzmq)

 # Add cppzmq headers
-include_directories(SYSTEM third_party/cppzmq)
+include_directories(third_party/cppzmq)

 # Configure msgpack-c - disable boost dependency
 set(MSGPACK_USE_BOOST OFF CACHE BOOL "" FORCE)
@@ -1236,17 +1236,6 @@ class LeannChat:
            "Please provide the best answer you can based on this context and your knowledge."
        )

-        print("The context provided to the LLM is:")
-        print(f"{'Relevance':<10} | {'Chunk id':<10} | {'Content':<60} | {'Source':<80}")
-        print("-" * 150)
-        for r in results:
-            chunk_relevance = f"{r.score:.3f}"
-            chunk_id = r.id
-            chunk_content = r.text[:60]
-            chunk_source = r.metadata.get("source", "")[:80]
-            print(
-                f"{chunk_relevance:<10} | {chunk_id:<10} | {chunk_content:<60} | {chunk_source:<80}"
-            )
        ask_time = time.time()
        ans = self.llm.ask(prompt, **llm_kwargs)
        ask_time = time.time() - ask_time
@@ -834,11 +834,6 @@ class OpenAIChat(LLMInterface):

        try:
            response = self.client.chat.completions.create(**params)
-            print(
-                f"Total tokens = {response.usage.total_tokens}, prompt tokens = {response.usage.prompt_tokens}, completion tokens = {response.usage.completion_tokens}"
-            )
-            if response.choices[0].finish_reason == "length":
-                print("The query is exceeding the maximum allowed number of tokens")
            return response.choices[0].message.content.strip()
        except Exception as e:
            logger.error(f"Error communicating with OpenAI: {e}")
@@ -11,119 +11,6 @@ from llama_index.core.node_parser import SentenceSplitter

 logger = logging.getLogger(__name__)

-
-def estimate_token_count(text: str) -> int:
-    """
-    Estimate token count for a text string.
-    Uses conservative estimation: ~4 characters per token for natural text,
-    ~1.2 tokens per character for code (worse tokenization).
-
-    Args:
-        text: Input text to estimate tokens for
-
-    Returns:
-        Estimated token count
-    """
-    try:
-        import tiktoken
-
-        encoder = tiktoken.get_encoding("cl100k_base")
-        return len(encoder.encode(text))
-    except ImportError:
-        # Fallback: Conservative character-based estimation
-        # Assume worst case for code: 1.2 tokens per character
-        return int(len(text) * 1.2)
-
-
-def calculate_safe_chunk_size(
-    model_token_limit: int,
-    overlap_tokens: int,
-    chunking_mode: str = "traditional",
-    safety_factor: float = 0.9,
-) -> int:
-    """
-    Calculate safe chunk size accounting for overlap and safety margin.
-
-    Args:
-        model_token_limit: Maximum tokens supported by embedding model
-        overlap_tokens: Overlap size (tokens for traditional, chars for AST)
-        chunking_mode: "traditional" (tokens) or "ast" (characters)
-        safety_factor: Safety margin (0.9 = 10% safety margin)
-
-    Returns:
-        Safe chunk size: tokens for traditional, characters for AST
-    """
-    safe_limit = int(model_token_limit * safety_factor)
-
-    if chunking_mode == "traditional":
-        # Traditional chunking uses tokens
-        # Max chunk = chunk_size + overlap, so chunk_size = limit - overlap
-        return max(1, safe_limit - overlap_tokens)
-    else:  # AST chunking
-        # AST uses characters, need to convert
-        # Conservative estimate: 1.2 tokens per char for code
-        overlap_chars = int(overlap_tokens * 3)  # ~3 chars per token for code
-        safe_chars = int(safe_limit / 1.2)
-        return max(1, safe_chars - overlap_chars)
-
-
-def validate_chunk_token_limits(chunks: list[str], max_tokens: int = 512) -> tuple[list[str], int]:
-    """
-    Validate that chunks don't exceed token limits and truncate if necessary.
-
-    Args:
-        chunks: List of text chunks to validate
-        max_tokens: Maximum tokens allowed per chunk
-
-    Returns:
-        Tuple of (validated_chunks, num_truncated)
-    """
-    validated_chunks = []
-    num_truncated = 0
-
-    for i, chunk in enumerate(chunks):
-        estimated_tokens = estimate_token_count(chunk)
-
-        if estimated_tokens > max_tokens:
-            # Truncate chunk to fit token limit
-            try:
-                import tiktoken
-
-                encoder = tiktoken.get_encoding("cl100k_base")
-                tokens = encoder.encode(chunk)
-                if len(tokens) > max_tokens:
-                    truncated_tokens = tokens[:max_tokens]
-                    truncated_chunk = encoder.decode(truncated_tokens)
-                    validated_chunks.append(truncated_chunk)
-                    num_truncated += 1
-                    logger.warning(
-                        f"Truncated chunk {i} from {len(tokens)} to {max_tokens} tokens "
-                        f"(from {len(chunk)} to {len(truncated_chunk)} characters)"
-                    )
-                else:
-                    validated_chunks.append(chunk)
-            except ImportError:
-                # Fallback: Conservative character truncation
-                char_limit = int(max_tokens / 1.2)  # Conservative for code
-                if len(chunk) > char_limit:
-                    truncated_chunk = chunk[:char_limit]
-                    validated_chunks.append(truncated_chunk)
-                    num_truncated += 1
-                    logger.warning(
-                        f"Truncated chunk {i} from {len(chunk)} to {char_limit} characters "
-                        f"(conservative estimate for {max_tokens} tokens)"
-                    )
-                else:
-                    validated_chunks.append(chunk)
-        else:
-            validated_chunks.append(chunk)
-
-    if num_truncated > 0:
-        logger.warning(f"Truncated {num_truncated}/{len(chunks)} chunks to fit token limits")
-
-    return validated_chunks, num_truncated
-
-
 # Code file extensions supported by astchunk
 CODE_EXTENSIONS = {
    ".py": "python",
@@ -195,17 +82,6 @@ def create_ast_chunks(
            continue

        try:
-            # Warn if AST chunk size + overlap might exceed common token limits
-            estimated_max_tokens = int(
-                (max_chunk_size + chunk_overlap) * 1.2
-            )  # Conservative estimate
-            if estimated_max_tokens > 512:
-                logger.warning(
-                    f"AST chunk size ({max_chunk_size}) + overlap ({chunk_overlap}) = {max_chunk_size + chunk_overlap} chars "
-                    f"may exceed 512 token limit (~{estimated_max_tokens} tokens estimated). "
-                    f"Consider reducing --ast-chunk-size to {int(400 / 1.2)} or --ast-chunk-overlap to {int(50 / 1.2)}"
-                )
-
            configs = {
                "max_chunk_size": max_chunk_size,
                "language": language,
@@ -341,14 +217,4 @@ def create_text_chunks(
        all_chunks = create_traditional_chunks(documents, chunk_size, chunk_overlap)

    logger.info(f"Total chunks created: {len(all_chunks)}")
-
-    # Validate chunk token limits (default to 512 for safety)
-    # This provides a safety net for embedding models with token limits
-    validated_chunks, num_truncated = validate_chunk_token_limits(all_chunks, max_tokens=512)
-
-    if num_truncated > 0:
-        logger.info(
-            f"Post-chunking validation: {num_truncated} chunks were truncated to fit 512 token limit"
-        )
-
-    return validated_chunks
+    return all_chunks
@@ -1,6 +1,5 @@
 import argparse
 import asyncio
-import time
 from pathlib import Path
 from typing import Any, Optional, Union

@@ -107,7 +106,7 @@ Examples:
            help="Documents directories and/or files (default: current directory)",
        )
        build_parser.add_argument(
-            "--backend-name",
+            "--backend",
            type=str,
            default="hnsw",
            choices=["hnsw", "diskann"],
@@ -181,25 +180,25 @@ Examples:
            "--doc-chunk-size",
            type=int,
            default=256,
-            help="Document chunk size in TOKENS (default: 256). Final chunks may be larger due to overlap. For 512 token models: recommended 350 tokens (350 + 128 overlap = 478 max)",
+            help="Document chunk size in tokens/characters (default: 256)",
        )
        build_parser.add_argument(
            "--doc-chunk-overlap",
            type=int,
            default=128,
-            help="Document chunk overlap in TOKENS (default: 128). Added to chunk size, not included in it",
+            help="Document chunk overlap (default: 128)",
        )
        build_parser.add_argument(
            "--code-chunk-size",
            type=int,
            default=512,
-            help="Code chunk size in TOKENS (default: 512). Final chunks may be larger due to overlap. For 512 token models: recommended 400 tokens (400 + 50 overlap = 450 max)",
+            help="Code chunk size in tokens/lines (default: 512)",
        )
        build_parser.add_argument(
            "--code-chunk-overlap",
            type=int,
            default=50,
-            help="Code chunk overlap in TOKENS (default: 50). Added to chunk size, not included in it",
+            help="Code chunk overlap (default: 50)",
        )
        build_parser.add_argument(
            "--use-ast-chunking",
@@ -209,14 +208,14 @@ Examples:
        build_parser.add_argument(
            "--ast-chunk-size",
            type=int,
-            default=300,
-            help="AST chunk size in CHARACTERS (non-whitespace) (default: 300). Final chunks may be larger due to overlap and expansion. For 512 token models: recommended 300 chars (300 + 64 overlap ~= 480 tokens)",
+            default=768,
+            help="AST chunk size in characters (default: 768)",
        )
        build_parser.add_argument(
            "--ast-chunk-overlap",
            type=int,
-            default=64,
-            help="AST chunk overlap in CHARACTERS (default: 64). Added to chunk size, not included in it. ~1.2 tokens per character for code",
+            default=96,
+            help="AST chunk overlap in characters (default: 96)",
        )
        build_parser.add_argument(
            "--ast-fallback-traditional",
@@ -255,11 +254,6 @@ Examples:
            action="store_true",
            help="Non-interactive mode: automatically select index without prompting",
        )
-        search_parser.add_argument(
-            "--show-metadata",
-            action="store_true",
-            help="Display file paths and metadata in search results",
-        )

        # Ask command
        ask_parser = subparsers.add_parser("ask", help="Ask questions")
@@ -1192,7 +1186,6 @@ Examples:
                for doc in other_docs:
                    file_path = doc.metadata.get("file_path", "")
                    if file_filter(file_path):
-                        doc.metadata["source"] = file_path
                        filtered_docs.append(doc)

                documents.extend(filtered_docs)
@@ -1268,7 +1261,7 @@ Examples:
                from .chunking_utils import create_text_chunks

                # Use enhanced chunking with AST support
-                chunk_texts = create_text_chunks(
+                all_texts = create_text_chunks(
                    documents,
                    chunk_size=self.node_parser.chunk_size,
                    chunk_overlap=self.node_parser.chunk_overlap,
@@ -1279,14 +1272,6 @@ Examples:
                    ast_fallback_traditional=getattr(args, "ast_fallback_traditional", True),
                )

-                # Note: AST chunking currently returns plain text chunks without metadata
-                # We preserve basic file info by associating chunks with their source documents
-                # For better metadata preservation, documents list order should be maintained
-                for chunk_text in chunk_texts:
-                    # TODO: Enhance create_text_chunks to return metadata alongside text
-                    # For now, we store chunks with empty metadata
-                    all_texts.append({"text": chunk_text, "metadata": {}})
-
            except ImportError as e:
                print(
                    f"⚠️  AST chunking utilities not available in package ({e}), falling back to traditional chunking"
@@ -1298,27 +1283,14 @@ Examples:
            for doc in tqdm(documents, desc="Chunking documents", unit="doc"):
                # Check if this is a code file based on source path
                source_path = doc.metadata.get("source", "")
-                file_path = doc.metadata.get("file_path", "")
                is_code_file = any(source_path.endswith(ext) for ext in code_file_exts)

-                # Extract metadata to preserve with chunks
-                chunk_metadata = {
-                    "file_path": file_path or source_path,
-                    "file_name": doc.metadata.get("file_name", ""),
-                }
-
-                # Add optional metadata if available
-                if "creation_date" in doc.metadata:
-                    chunk_metadata["creation_date"] = doc.metadata["creation_date"]
-                if "last_modified_date" in doc.metadata:
-                    chunk_metadata["last_modified_date"] = doc.metadata["last_modified_date"]
-
                # Use appropriate parser based on file type
                parser = self.code_parser if is_code_file else self.node_parser
                nodes = parser.get_nodes_from_documents([doc])

                for node in nodes:
-                    all_texts.append({"text": node.get_content(), "metadata": chunk_metadata})
+                    all_texts.append(node.get_content())

        print(f"Loaded {len(documents)} documents, {len(all_texts)} chunks")
        return all_texts
@@ -1393,7 +1365,7 @@ Examples:

        index_dir.mkdir(parents=True, exist_ok=True)

-        print(f"Building index '{index_name}' with {args.backend_name} backend...")
+        print(f"Building index '{index_name}' with {args.backend} backend...")

        embedding_options: dict[str, Any] = {}
        if args.embedding_mode == "ollama":
@@ -1405,7 +1377,7 @@ Examples:
                embedding_options["api_key"] = resolved_embedding_key

        builder = LeannBuilder(
-            backend_name=args.backend_name,
+            backend_name=args.backend,
            embedding_model=args.embedding_model,
            embedding_mode=args.embedding_mode,
            embedding_options=embedding_options or None,
@@ -1416,8 +1388,8 @@ Examples:
            num_threads=args.num_threads,
        )

-        for chunk in all_texts:
-            builder.add_text(chunk["text"], metadata=chunk["metadata"])
+        for chunk_text in all_texts:
+            builder.add_text(chunk_text)

        builder.build_index(index_path)
        print(f"Index built at {index_path}")
@@ -1538,25 +1510,7 @@ Examples:
        print(f"Search results for '{query}' (top {len(results)}):")
        for i, result in enumerate(results, 1):
            print(f"{i}. Score: {result.score:.3f}")
-
-            # Display metadata if flag is set
-            if args.show_metadata and result.metadata:
-                file_path = result.metadata.get("file_path", "")
-                if file_path:
-                    print(f"   📄 File: {file_path}")
-
-                file_name = result.metadata.get("file_name", "")
-                if file_name and file_name != file_path:
-                    print(f"   📝 Name: {file_name}")
-
-                # Show timestamps if available
-                if "creation_date" in result.metadata:
-                    print(f"   🕐 Created: {result.metadata['creation_date']}")
-                if "last_modified_date" in result.metadata:
-                    print(f"   🕑 Modified: {result.metadata['last_modified_date']}")
-
            print(f"   {result.text[:200]}...")
-            print(f"   Source: {result.metadata.get('source', '')}")
            print()

    async def ask_questions(self, args):
@@ -1588,7 +1542,6 @@ Examples:
            llm_kwargs["thinking_budget"] = args.thinking_budget

        def _ask_once(prompt: str) -> None:
-            query_start_time = time.time()
            response = chat.ask(
                prompt,
                top_k=args.top_k,
@@ -1599,9 +1552,7 @@ Examples:
                pruning_strategy=args.pruning_strategy,
                llm_kwargs=llm_kwargs,
            )
-            query_completion_time = time.time() - query_start_time
            print(f"LEANN: {response}")
-            print(f"The query took {query_completion_time:.3f} seconds to finish")

        initial_query = (args.query or "").strip()

@@ -14,88 +14,6 @@ import torch

 from .settings import resolve_ollama_host, resolve_openai_api_key, resolve_openai_base_url

-
-def truncate_to_token_limit(texts: list[str], max_tokens: int = 512) -> list[str]:
-    """
-    Truncate texts to token limit using tiktoken or conservative character truncation.
-
-    Args:
-        texts: List of texts to truncate
-        max_tokens: Maximum tokens allowed per text
-
-    Returns:
-        List of truncated texts that should fit within token limit
-    """
-    try:
-        import tiktoken
-
-        encoder = tiktoken.get_encoding("cl100k_base")
-        truncated = []
-
-        for text in texts:
-            tokens = encoder.encode(text)
-            if len(tokens) > max_tokens:
-                # Truncate to max_tokens and decode back to text
-                truncated_tokens = tokens[:max_tokens]
-                truncated_text = encoder.decode(truncated_tokens)
-                truncated.append(truncated_text)
-                logger.warning(
-                    f"Truncated text from {len(tokens)} to {max_tokens} tokens "
-                    f"(from {len(text)} to {len(truncated_text)} characters)"
-                )
-            else:
-                truncated.append(text)
-        return truncated
-
-    except ImportError:
-        # Fallback: Conservative character truncation
-        # Assume worst case: 1.5 tokens per character for code content
-        char_limit = int(max_tokens / 1.5)
-        truncated = []
-
-        for text in texts:
-            if len(text) > char_limit:
-                truncated_text = text[:char_limit]
-                truncated.append(truncated_text)
-                logger.warning(
-                    f"Truncated text from {len(text)} to {char_limit} characters "
-                    f"(conservative estimate for {max_tokens} tokens)"
-                )
-            else:
-                truncated.append(text)
-        return truncated
-
-
-def get_model_token_limit(model_name: str) -> int:
-    """
-    Get token limit for a given embedding model.
-
-    Args:
-        model_name: Name of the embedding model
-
-    Returns:
-        Token limit for the model, defaults to 512 if unknown
-    """
-    # Handle versioned model names (e.g., "nomic-embed-text:latest" -> "nomic-embed-text")
-    base_model_name = model_name.split(":")[0]
-
-    # Check exact match first
-    if model_name in EMBEDDING_MODEL_LIMITS:
-        return EMBEDDING_MODEL_LIMITS[model_name]
-
-    # Check base name match
-    if base_model_name in EMBEDDING_MODEL_LIMITS:
-        return EMBEDDING_MODEL_LIMITS[base_model_name]
-
-    # Check partial matches for common patterns
-    for known_model, limit in EMBEDDING_MODEL_LIMITS.items():
-        if known_model in base_model_name or base_model_name in known_model:
-            return limit
-
-    # Default to conservative 512 token limit
-    logger.warning(f"Unknown model '{model_name}', using default 512 token limit")
-    return 512
-
 # Set up logger with proper level
 logger = logging.getLogger(__name__)
 LOG_LEVEL = os.getenv("LEANN_LOG_LEVEL", "WARNING").upper()
@@ -105,17 +23,6 @@ logger.setLevel(log_level)
 # Global model cache to avoid repeated loading
 _model_cache: dict[str, Any] = {}

-# Known embedding model token limits
-EMBEDDING_MODEL_LIMITS = {
-    "nomic-embed-text": 512,
-    "nomic-embed-text-v2": 512,
-    "mxbai-embed-large": 512,
-    "all-minilm": 512,
-    "bge-m3": 8192,
-    "snowflake-arctic-embed": 512,
-    # Add more models as needed
-}
-

 def compute_embeddings(
    texts: list[str],
@@ -667,10 +574,9 @@ def compute_embeddings_ollama(
    host: Optional[str] = None,
 ) -> np.ndarray:
    """
-    Compute embeddings using Ollama API with true batch processing.
+    Compute embeddings using Ollama API with simplified batch processing.

-    Uses the /api/embed endpoint which supports batch inputs.
-    Batch size: 32 for MPS/CPU, 128 for CUDA to optimize performance.
+    Uses batch size of 32 for MPS/CPU and 128 for CUDA to optimize performance.

    Args:
        texts: List of texts to compute embeddings for
@@ -775,11 +681,11 @@ def compute_embeddings_ollama(
            logger.info(f"Resolved model name '{model_name}' to '{resolved_model_name}'")
        model_name = resolved_model_name

-        # Verify the model supports embeddings by testing it with /api/embed
+        # Verify the model supports embeddings by testing it
        try:
            test_response = requests.post(
-                f"{resolved_host}/api/embed",
-                json={"model": model_name, "input": "test"},
+                f"{resolved_host}/api/embeddings",
+                json={"model": model_name, "prompt": "test"},
                timeout=10,
            )
            if test_response.status_code != 200:
@@ -811,78 +717,63 @@ def compute_embeddings_ollama(
        # If torch is not available, use conservative batch size
        batch_size = 32

-    logger.info(f"Using batch size: {batch_size} for true batch processing")
-
-    # Get model token limit and apply truncation
-    token_limit = get_model_token_limit(model_name)
-    logger.info(f"Model '{model_name}' token limit: {token_limit}")
-
-    # Apply token-aware truncation to all texts
-    truncated_texts = truncate_to_token_limit(texts, token_limit)
-    if len(truncated_texts) != len(texts):
-        logger.error("Truncation failed - text count mismatch")
-        truncated_texts = texts  # Fallback to original texts
+    logger.info(f"Using batch size: {batch_size}")

    def get_batch_embeddings(batch_texts):
-        """Get embeddings for a batch of texts using /api/embed endpoint."""
-        max_retries = 3
-        retry_count = 0
+        """Get embeddings for a batch of texts."""
+        all_embeddings = []
+        failed_indices = []

-        # Texts are already truncated to token limit by the outer function
-        while retry_count < max_retries:
-            try:
-                # Use /api/embed endpoint with "input" parameter for batch processing
-                response = requests.post(
-                    f"{resolved_host}/api/embed",
-                    json={"model": model_name, "input": batch_texts},
-                    timeout=60,  # Increased timeout for batch processing
-                )
-                response.raise_for_status()
+        for i, text in enumerate(batch_texts):
+            max_retries = 3
+            retry_count = 0

-                result = response.json()
-                batch_embeddings = result.get("embeddings")
-
-                if batch_embeddings is None:
-                    raise ValueError("No embeddings returned from API")
-
-                if not isinstance(batch_embeddings, list):
-                    raise ValueError(f"Invalid embeddings format: {type(batch_embeddings)}")
-
-                if len(batch_embeddings) != len(batch_texts):
-                    raise ValueError(
-                        f"Mismatch: requested {len(batch_texts)} embeddings, got {len(batch_embeddings)}"
+            # Truncate very long texts to avoid API issues
+            truncated_text = text[:8000] if len(text) > 8000 else text
+            while retry_count < max_retries:
+                try:
+                    response = requests.post(
+                        f"{resolved_host}/api/embeddings",
+                        json={"model": model_name, "prompt": truncated_text},
+                        timeout=30,
                    )
+                    response.raise_for_status()

-                return batch_embeddings, []
+                    result = response.json()
+                    embedding = result.get("embedding")

-            except requests.exceptions.Timeout:
-                retry_count += 1
-                if retry_count >= max_retries:
-                    logger.warning(f"Timeout for batch after {max_retries} retries")
-                    return None, list(range(len(batch_texts)))
+                    if embedding is None:
+                        raise ValueError(f"No embedding returned for text {i}")

-            except Exception as e:
-                retry_count += 1
-                if retry_count >= max_retries:
-                    # Enhanced error detection for token limit violations
-                    error_msg = str(e).lower()
-                    if "token" in error_msg and ("limit" in error_msg or "exceed" in error_msg or "length" in error_msg):
-                        logger.error(
-                            f"Token limit exceeded for batch. Error: {e}. "
-                            f"Consider reducing chunk sizes or check token truncation."
-                        )
-                    else:
-                        logger.error(f"Failed to get embeddings for batch: {e}")
-                    return None, list(range(len(batch_texts)))
+                    if not isinstance(embedding, list) or len(embedding) == 0:
+                        raise ValueError(f"Invalid embedding format for text {i}")

-        return None, list(range(len(batch_texts)))
+                    all_embeddings.append(embedding)
+                    break

-    # Process truncated texts in batches
+                except requests.exceptions.Timeout:
+                    retry_count += 1
+                    if retry_count >= max_retries:
+                        logger.warning(f"Timeout for text {i} after {max_retries} retries")
+                        failed_indices.append(i)
+                        all_embeddings.append(None)
+                        break
+
+                except Exception as e:
+                    retry_count += 1
+                    if retry_count >= max_retries:
+                        logger.error(f"Failed to get embedding for text {i}: {e}")
+                        failed_indices.append(i)
+                        all_embeddings.append(None)
+                        break
+        return all_embeddings, failed_indices
+
+    # Process texts in batches
    all_embeddings = []
    all_failed_indices = []

    # Setup progress bar if needed
-    show_progress = is_build or len(truncated_texts) > 10
+    show_progress = is_build or len(texts) > 10
    try:
        if show_progress:
            from tqdm import tqdm
@@ -890,36 +781,32 @@ def compute_embeddings_ollama(
        show_progress = False

    # Process batches
-    num_batches = (len(truncated_texts) + batch_size - 1) // batch_size
+    num_batches = (len(texts) + batch_size - 1) // batch_size

    if show_progress:
-        batch_iterator = tqdm(range(num_batches), desc="Computing Ollama embeddings (batched)")
+        batch_iterator = tqdm(range(num_batches), desc="Computing Ollama embeddings")
    else:
        batch_iterator = range(num_batches)

    for batch_idx in batch_iterator:
        start_idx = batch_idx * batch_size
-        end_idx = min(start_idx + batch_size, len(truncated_texts))
-        batch_texts = truncated_texts[start_idx:end_idx]
+        end_idx = min(start_idx + batch_size, len(texts))
+        batch_texts = texts[start_idx:end_idx]

        batch_embeddings, batch_failed = get_batch_embeddings(batch_texts)

-        if batch_embeddings is not None:
-            all_embeddings.extend(batch_embeddings)
-        else:
-            # Entire batch failed, add None placeholders
-            all_embeddings.extend([None] * len(batch_texts))
-            # Adjust failed indices to global indices
-            global_failed = [start_idx + idx for idx in batch_failed]
-            all_failed_indices.extend(global_failed)
+        # Adjust failed indices to global indices
+        global_failed = [start_idx + idx for idx in batch_failed]
+        all_failed_indices.extend(global_failed)
+        all_embeddings.extend(batch_embeddings)

    # Handle failed embeddings
    if all_failed_indices:
-        if len(all_failed_indices) == len(truncated_texts):
+        if len(all_failed_indices) == len(texts):
            raise RuntimeError("Failed to compute any embeddings")

        logger.warning(
-            f"Failed to compute embeddings for {len(all_failed_indices)}/{len(truncated_texts)} texts"
+            f"Failed to compute embeddings for {len(all_failed_indices)}/{len(texts)} texts"
        )

        # Use zero embeddings as fallback for failed ones
@@ -60,11 +60,6 @@ def handle_request(request):
                                    "maximum": 128,
                                    "description": "Search complexity level. Use 16-32 for fast searches (recommended), 64+ for higher precision when needed.",
                                },
-                                "show_metadata": {
-                                    "type": "boolean",
-                                    "default": False,
-                                    "description": "Include file paths and metadata in search results. Useful for understanding which files contain the results.",
-                                },
                            },
                            "required": ["index_name", "query"],
                        },
@@ -109,8 +104,6 @@ def handle_request(request):
                    f"--complexity={args.get('complexity', 32)}",
                    "--non-interactive",
                ]
-                if args.get("show_metadata", False):
-                    cmd.append("--show-metadata")
                result = subprocess.run(cmd, capture_output=True, text=True)

            elif tool_name == "leann_list":
Author	SHA1	Message	Date
aakash	bc00b29d7e	Remove Key Features section from Slack RAG examples - Simplified documentation by removing the bullet point list - Keeps the focus on the actual examples and screenshots	2025-10-18 23:55:36 -07:00
aakash	22664dace8	Update Slack RAG documentation with Ollama integration and new screenshots - Updated slack-setup-guide.md with comprehensive Ollama setup instructions - Added 6 new screenshots showing complete RAG workflow: - Command setup, search results, and LLM responses for both queries - Removed simulated LLM references, now uses real Ollama with llama3.2:1b - Enhanced documentation with step-by-step Ollama installation - Updated troubleshooting checklist to include Ollama-specific checks - Fixed command syntax and added proper Ollama configuration - Demonstrates working Slack RAG with real AI-generated responses	2025-10-18 23:46:57 -07:00
aakash	fb9405e99a	Update Slack RAG integration with improved CSV parsing and new screenshots - Fixed CSV message parsing in slack_mcp_reader.py to properly handle individual messages - Updated slack_rag.py to filter empty channel strings - Enhanced slack-setup-guide.md with two new query examples: - Advisor Models query: 'train black-box models to adopt to your personal data' - Barbarians at the Gate query: 'AI-driven research systems ADRS' - Replaced old screenshots with four new ones showing both query examples - Updated documentation to use User OAuth Token (xoxp-) instead of Bot Token (xoxb-) - Added proper command examples with --no-concatenate-conversations and --force-rebuild flags	2025-10-18 22:25:16 -07:00
aakash	73ffc3cc37	Update Slack RAG example to show LEANN announcement retrieval - Change query from 'PUBPOL 290' to 'What is LEANN about?' for more challenging retrieval - Update command to use python -m apps.slack_rag instead of test script - Add expected response showing Yichuan Wang's LEANN announcement message - Emphasize this demonstrates ability to find specific announcements in conversation history - Update description to highlight challenging query capabilities	2025-10-18 16:28:09 -07:00
aakash	d411c94f21	Remove test_channel_by_id_or_name.py - Clean up temporary test file that was used for debugging - Keep only the main slack_rag.py application for production use	2025-10-18 01:38:16 -07:00
aakash	3937e1b143	Update Slack integration screenshot with latest changes	2025-10-18 01:25:08 -07:00
aakash	8221b37156	Add Slack integration screenshots to docs/videos - Add slack_integration.png showing RAG query results - Add slack_integration_2.png showing additional demo functionality - Fixes lychee link checker errors for missing image files	2025-10-18 01:14:03 -07:00
aakash	c05650103b	Fix Slack MCP integration and update documentation - Fix SlackMCPReader to use conversations_history instead of channels_list - Add fallback imports for leann.interactive_utils and leann.settings - Update slack-setup-guide.md with real screenshots and improved text - Remove old screenshot files	2025-10-18 01:08:34 -07:00
aakash	8bdd5a17ba	Docs: finalize Slack setup guide with Sky random RAG example and image path fixes\n\n- Redact example tokens from docs	2025-10-15 12:53:46 -07:00
aakash	8b537f6246	Docs: fix image path for lychee (use videos/ relative under docs/)	2025-10-15 04:18:45 -07:00
aakash	0a7a283dda	Docs/CI: fix broken image paths and ruff lint\n\n- Move screenshot to docs/videos and update references\n- Remove obsolete rag-query-results image\n- Rename variable to satisfy ruff	2025-10-15 04:15:44 -07:00
aakash	151b24a456	Docs: add real RAG example for Sky Lab #random - Embed screenshot videos/rag-sky-random.png - Add step-by-step commands and notes - Include helper test script tests/test_channel_by_id_or_name.py - Redact example tokens from docs	2025-10-15 04:09:26 -07:00
aakash	06505c069e	Update Slack setup guide with bot invitation requirements - Add important section about inviting bot to channels before RAG queries - Explain the 'not_in_channel' errors and their meaning - Provide clear steps for bot invitation process - Document realistic scenario where bot needs explicit channel access - Update documentation to be more professional and less cursor-style	2025-10-12 16:17:47 -07:00
aakash	c76a1e2c71	Add real RAG example showing intelligent Slack query functionality - Add detailed example of asking 'What is LEANN about?' - Show retrieved messages from Slack channels - Demonstrate intelligent answer generation based on context - Add command example for running real RAG queries - Explain the 4-step process: retrieve, index, generate, cite	2025-10-12 15:49:42 -07:00
aakash	1b80bcf1a0	Fix formatting issues in Slack setup guide - Remove trailing whitespace - Fix end of file formatting - Pre-commit hooks formatting fixes	2025-10-12 14:17:56 -07:00
aakash	f28c3ecc7a	Add comprehensive Slack setup guide with success screenshot - Create detailed setup guide with step-by-step instructions - Add troubleshooting section for common issues like cache sync errors - Include real terminal output example from successful integration - Add screenshot showing VS Code interface with Slack channel data - Remove excessive emojis for more professional documentation - Document retry logic improvements and CLI arguments	2025-10-12 14:09:52 -07:00
aakash	9e067e7fb3	Fix trailing whitespace in slack setup guide Pre-commit hooks formatting fixes	2025-10-09 20:04:32 -07:00
Aakash Suresh	1f98681d68	Merge branch 'main' into fix/twitter-bookmarks-anchor-link	2025-10-09 20:02:41 -07:00
aakash	3b94b7b8af	Improve Slack MCP integration with retry logic and comprehensive setup guide - Add retry mechanism with exponential backoff for cache sync issues - Handle 'users cache is not ready yet' errors gracefully - Add max-retries and retry-delay CLI arguments for better control - Create comprehensive Slack setup guide with troubleshooting - Update README with link to detailed setup guide - Improve error messages and user experience	2025-10-09 19:54:38 -07:00
aakash	df168634c8	fix: Point Slack and Twitter links to main MCP section - Both Slack and Twitter are subsections under MCP Integration - Links should point to #mcp-integration-rag-on-live-data-from-any-platform - Users will land on the MCP section and can find both Slack and Twitter subsections there This matches the actual document structure where Slack and Twitter are under the MCP Integration section.	2025-10-08 02:24:04 -07:00
aakash	9798e3cbe6	fix: Fix Slack messages anchor link as well - Convert Slack Messages from collapsible details to proper header - Update internal link to match new anchor format - Ensures external links to #slack-messages-search-your-team-conversations work correctly Both Twitter and Slack MCP sections now have reliable anchor links.	2025-10-08 02:16:59 -07:00
aakash	ab8bcba2c6	fix: Fix Twitter bookmarks anchor link - Convert Twitter Bookmarks from collapsible details to proper header - Update internal link to match new anchor format - Ensures external links to #twitter-bookmarks-your-personal-tweet-library work correctly Fixes broken link: https://github.com/yichuan-w/LEANN?tab=readme-ov-file#twitter-bookmarks-your-personal-tweet-library	2025-10-08 01:52:14 -07:00