diff --git a/README.md b/README.md
index 9685480..1f43f39 100755
--- a/README.md
+++ b/README.md
@@ -445,27 +445,8 @@ We welcome contributions! Leann is built by the community, for the community.
 - 📖 **Documentation**: Help make Leann more accessible
 - 🧪 **Benchmarks**: Share your performance results
 
-### Development Setup
 
-```bash
-git clone git@github.com:yichuan520030910320/LEANN-RAG.git leann
-cd leann
-git submodule update --init --recursive
-uv sync --dev
-uv run pytest tests/
-```
-
-### Quick Tests
-
-```bash
-# Sanity check all distance functions
-uv run python tests/sanity_checks/test_distance_functions.py
-
-# Verify L2 implementation
-uv run python tests/sanity_checks/test_l2_verification.py
-```
-
-## ❓ FAQ
+<!-- ## ❓ FAQ
 
 ### Common Issues
 
@@ -486,7 +467,7 @@ export NCCL_DEBUG_SUBSYS=INIT,GRAPH
 export NCCL_IB_DISABLE=1
 export NCCL_NET_PLUGIN=none
 export NCCL_SOCKET_IFNAME=ens5
-```
+``` -->
 
 ## 📈 Roadmap
 
@@ -501,7 +482,7 @@ export NCCL_SOCKET_IFNAME=ens5
 
 
 - [ ] Advanced caching strategies
-- [ ] GPU-accelerated embedding computation
+- [ ] Add contextual-retrieval https://www.anthropic.com/news/contextual-retrieval
 - [ ] Add sleep-time-compute and summarize agent! to summarilze the file on computer!
 - [ ] Add OpenAI recompute API
 
diff --git a/examples/main_cli_example.py b/examples/main_cli_example.py
index 77b0bec..787baa1 100644
--- a/examples/main_cli_example.py
+++ b/examples/main_cli_example.py
@@ -14,7 +14,7 @@ dotenv.load_dotenv()
 
 node_parser = SentenceSplitter(
     chunk_size=256,
-    chunk_overlap=20,
+    chunk_overlap=64,
     separator=" ",
     paragraph_separator="\n\n"
 )
@@ -32,41 +32,40 @@ for doc in documents:
     for node in nodes:
         all_texts.append(node.get_content())
 
-INDEX_DIR = Path("./test_pdf_index_pangu_test")
-INDEX_PATH = str(INDEX_DIR / "pdf_documents.leann")
-
-if not INDEX_DIR.exists():
-    print(f"--- Index directory not found, building new index ---")
-    
-    print(f"\n[PHASE 1] Building Leann index...")
-
-    # Use HNSW backend for better macOS compatibility
-    builder = LeannBuilder(
-        backend_name="hnsw",
-        embedding_model="facebook/contriever",
-        graph_degree=32, 
-        complexity=64,
-        is_compact=True,
-        is_recompute=True,
-        num_threads=1  # Force single-threaded mode
-    )
-
-    print(f"Loaded {len(all_texts)} text chunks from documents.")
-    for chunk_text in all_texts:
-        builder.add_text(chunk_text)
-        
-    builder.build_index(INDEX_PATH)
-    print(f"\nLeann index built at {INDEX_PATH}!")
-else:
-    print(f"--- Using existing index at {INDEX_DIR} ---")
-
 async def main(args):
+    INDEX_DIR = Path(args.index_dir)
+    INDEX_PATH = str(INDEX_DIR / "pdf_documents.leann")
+
+    if not INDEX_DIR.exists():
+        print(f"--- Index directory not found, building new index ---")
+        
+        print(f"\n[PHASE 1] Building Leann index...")
+
+        # Use HNSW backend for better macOS compatibility
+        builder = LeannBuilder(
+            backend_name="hnsw",
+            embedding_model="facebook/contriever",
+            graph_degree=32, 
+            complexity=64,
+            is_compact=True,
+            is_recompute=True,
+            num_threads=1  # Force single-threaded mode
+        )
+
+        print(f"Loaded {len(all_texts)} text chunks from documents.")
+        for chunk_text in all_texts:
+            builder.add_text(chunk_text)
+            
+        builder.build_index(INDEX_PATH)
+        print(f"\nLeann index built at {INDEX_PATH}!")
+    else:
+        print(f"--- Using existing index at {INDEX_DIR} ---")
+
     print(f"\n[PHASE 2] Starting Leann chat session...")
     
+
     llm_config = {
-        "type": args.llm,
-        "model": args.model,
-        "host": args.host
+        "type": "ollama", "model": "Qwen/Qwen3-8B"
     }
 
     chat = LeannChat(index_path=INDEX_PATH, llm_config=llm_config)
@@ -82,8 +81,9 @@ async def main(args):
 if __name__ == "__main__":
     parser = argparse.ArgumentParser(description="Run Leann Chat with various LLM backends.")
     parser.add_argument("--llm", type=str, default="hf", choices=["simulated", "ollama", "hf", "openai"], help="The LLM backend to use.")
-    parser.add_argument("--model", type=str, default='meta-llama/Llama-3.2-3B-Instruct', help="The model name to use (e.g., 'llama3:8b' for ollama, 'deepseek-ai/deepseek-llm-7b-chat' for hf, 'gpt-4o' for openai).")
+    parser.add_argument("--model", type=str, default='Qwen/Qwen3-0.6B', help="The model name to use (e.g., 'llama3:8b' for ollama, 'deepseek-ai/deepseek-llm-7b-chat' for hf, 'gpt-4o' for openai).")
     parser.add_argument("--host", type=str, default="http://localhost:11434", help="The host for the Ollama API.")
+    parser.add_argument("--index-dir", type=str, default="./test_pdf_index_pangu_test", help="Directory where the Leann index will be stored.")
     args = parser.parse_args()
 
     asyncio.run(main(args))
\ No newline at end of file