add gif
This commit is contained in:
17
README.md
17
README.md
@@ -133,6 +133,10 @@ LEANN supports RAG on various data sources including documents (.pdf, .txt, .md)
|
|||||||
|
|
||||||
Ask questions directly about your personal PDFs, documents, and any directory containing your files!
|
Ask questions directly about your personal PDFs, documents, and any directory containing your files!
|
||||||
|
|
||||||
|
<p align="center">
|
||||||
|
<img src="videos/paper_clear.gif" alt="LEANN Document Search Demo" width="600">
|
||||||
|
</p>
|
||||||
|
|
||||||
The example below asks a question about summarizing two papers (uses default data in `examples/data`):
|
The example below asks a question about summarizing two papers (uses default data in `examples/data`):
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
@@ -150,6 +154,10 @@ python ./examples/main_cli_example.py
|
|||||||
|
|
||||||
### 📧 Your Personal Email Secretary: RAG on Apple Mail!
|
### 📧 Your Personal Email Secretary: RAG on Apple Mail!
|
||||||
|
|
||||||
|
<p align="center">
|
||||||
|
<img src="videos/mail_clear.gif" alt="LEANN Email Search Demo" width="600">
|
||||||
|
</p>
|
||||||
|
|
||||||
**Note:** You need to grant full disk access to your terminal/VS Code in System Preferences → Privacy & Security → Full Disk Access.
|
**Note:** You need to grant full disk access to your terminal/VS Code in System Preferences → Privacy & Security → Full Disk Access.
|
||||||
```bash
|
```bash
|
||||||
python examples/mail_reader_leann.py --query "What's the food I ordered by doordash or Uber eat mostly?"
|
python examples/mail_reader_leann.py --query "What's the food I ordered by doordash or Uber eat mostly?"
|
||||||
@@ -188,6 +196,11 @@ Once the index is built, you can ask questions like:
|
|||||||
</details>
|
</details>
|
||||||
|
|
||||||
### 🔍 Time Machine for the Web: RAG Your Entire Google Browser History!
|
### 🔍 Time Machine for the Web: RAG Your Entire Google Browser History!
|
||||||
|
|
||||||
|
<p align="center">
|
||||||
|
<img src="videos/google_clear.gif" alt="LEANN Browser History Search Demo" width="600">
|
||||||
|
</p>
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
python examples/google_history_reader_leann.py --query "Tell me my browser history about machine learning?"
|
python examples/google_history_reader_leann.py --query "Tell me my browser history about machine learning?"
|
||||||
```
|
```
|
||||||
@@ -242,6 +255,10 @@ Once the index is built, you can ask questions like:
|
|||||||
|
|
||||||
### 💬 WeChat Detective: Unlock Your Golden Memories!
|
### 💬 WeChat Detective: Unlock Your Golden Memories!
|
||||||
|
|
||||||
|
<p align="center">
|
||||||
|
<img src="videos/wechat_clear.gif" alt="LEANN WeChat Search Demo" width="600">
|
||||||
|
</p>
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
python examples/wechat_history_reader_leann.py --query "Show me all group chats about weekend plans"
|
python examples/wechat_history_reader_leann.py --query "Show me all group chats about weekend plans"
|
||||||
```
|
```
|
||||||
|
|||||||
@@ -222,14 +222,15 @@ async def query_leann_index(index_path: str, query: str):
|
|||||||
"max_tokens": 1000
|
"max_tokens": 1000
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
print(f"Leann: {chat_response}")
|
|
||||||
|
print(f"Leann chat response: \033[36m{chat_response}\033[0m")
|
||||||
|
|
||||||
async def main():
|
async def main():
|
||||||
# Parse command line arguments
|
# Parse command line arguments
|
||||||
parser = argparse.ArgumentParser(description='LEANN Chrome History Reader - Create and query browser history index')
|
parser = argparse.ArgumentParser(description='LEANN Chrome History Reader - Create and query browser history index')
|
||||||
parser.add_argument('--chrome-profile', type=str, default=DEFAULT_CHROME_PROFILE,
|
parser.add_argument('--chrome-profile', type=str, default=DEFAULT_CHROME_PROFILE,
|
||||||
help=f'Path to Chrome profile directory (default: {DEFAULT_CHROME_PROFILE}), usually you dont need to change this')
|
help=f'Path to Chrome profile directory (default: {DEFAULT_CHROME_PROFILE}), usually you dont need to change this')
|
||||||
parser.add_argument('--index-dir', type=str, default="./all_google_new",
|
parser.add_argument('--index-dir', type=str, default="./google_history_index",
|
||||||
help='Directory to store the LEANN index (default: ./chrome_history_index_leann_test)')
|
help='Directory to store the LEANN index (default: ./chrome_history_index_leann_test)')
|
||||||
parser.add_argument('--max-entries', type=int, default=1000,
|
parser.add_argument('--max-entries', type=int, default=1000,
|
||||||
help='Maximum number of history entries to process (default: 1000)')
|
help='Maximum number of history entries to process (default: 1000)')
|
||||||
|
|||||||
@@ -224,15 +224,16 @@ async def query_leann_index(index_path: str, query: str):
|
|||||||
beam_width=1,
|
beam_width=1,
|
||||||
)
|
)
|
||||||
end_time = time.time()
|
end_time = time.time()
|
||||||
print(f"Time taken: {end_time - start_time} seconds")
|
# print(f"Time taken: {end_time - start_time} seconds")
|
||||||
print(f"Leann: {chat_response}")
|
# highlight the answer
|
||||||
|
print(f"Leann chat response: \033[36m{chat_response}\033[0m")
|
||||||
|
|
||||||
async def main():
|
async def main():
|
||||||
# Parse command line arguments
|
# Parse command line arguments
|
||||||
parser = argparse.ArgumentParser(description='LEANN Mail Reader - Create and query email index')
|
parser = argparse.ArgumentParser(description='LEANN Mail Reader - Create and query email index')
|
||||||
# Remove --mail-path argument and auto-detect all Messages directories
|
# Remove --mail-path argument and auto-detect all Messages directories
|
||||||
# Remove DEFAULT_MAIL_PATH
|
# Remove DEFAULT_MAIL_PATH
|
||||||
parser.add_argument('--index-dir', type=str, default="./mail_index_index_file",
|
parser.add_argument('--index-dir', type=str, default="./mail_index",
|
||||||
help='Directory to store the LEANN index (default: ./mail_index_leann_raw_text_all_dicts)')
|
help='Directory to store the LEANN index (default: ./mail_index_leann_raw_text_all_dicts)')
|
||||||
parser.add_argument('--max-emails', type=int, default=1000,
|
parser.add_argument('--max-emails', type=int, default=1000,
|
||||||
help='Maximum number of emails to process (-1 means all)')
|
help='Maximum number of emails to process (-1 means all)')
|
||||||
|
|||||||
@@ -63,16 +63,14 @@ async def main(args):
|
|||||||
llm_config = {"type": "openai", "model": "gpt-4o"}
|
llm_config = {"type": "openai", "model": "gpt-4o"}
|
||||||
|
|
||||||
chat = LeannChat(index_path=INDEX_PATH, llm_config=llm_config)
|
chat = LeannChat(index_path=INDEX_PATH, llm_config=llm_config)
|
||||||
|
|
||||||
query = "Based on the paper, what are the main techniques LEANN explores to reduce the storage overhead and DLPM explore to achieve Fairness and Efiiciency trade-off?"
|
|
||||||
|
|
||||||
# query = (
|
# query = (
|
||||||
# "什么是盘古大模型以及盘古开发过程中遇到了什么阴暗面,任务令一般在什么城市颁发"
|
# "什么是盘古大模型以及盘古开发过程中遇到了什么阴暗面,任务令一般在什么城市颁发"
|
||||||
# )
|
# )
|
||||||
|
query = args.query
|
||||||
|
|
||||||
print(f"You: {query}")
|
print(f"You: {query}")
|
||||||
chat_response = chat.ask(query, top_k=20, recompute_embeddings=True, complexity=32)
|
chat_response = chat.ask(query, top_k=20, recompute_embeddings=True, complexity=32)
|
||||||
print(f"Leann: {chat_response}")
|
print(f"Leann chat response: \033[36m{chat_response}\033[0m")
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
@@ -110,6 +108,12 @@ if __name__ == "__main__":
|
|||||||
default="examples/data",
|
default="examples/data",
|
||||||
help="Directory containing documents to index (PDF, TXT, MD files).",
|
help="Directory containing documents to index (PDF, TXT, MD files).",
|
||||||
)
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--query",
|
||||||
|
type=str,
|
||||||
|
default="Based on the paper, what are the main techniques LEANN explores to reduce the storage overhead and DLPM explore to achieve Fairness and Efiiciency trade-off?",
|
||||||
|
help="The query to ask the Leann chat system.",
|
||||||
|
)
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|
||||||
asyncio.run(main(args))
|
asyncio.run(main(args))
|
||||||
|
|||||||
@@ -234,7 +234,7 @@ async def query_leann_index(index_path: str, query: str):
|
|||||||
},
|
},
|
||||||
llm_kwargs={"temperature": 0.0, "max_tokens": 1000},
|
llm_kwargs={"temperature": 0.0, "max_tokens": 1000},
|
||||||
)
|
)
|
||||||
print(f"Leann: {chat_response}")
|
print(f"Leann chat response: \033[36m{chat_response}\033[0m")
|
||||||
|
|
||||||
|
|
||||||
async def main():
|
async def main():
|
||||||
|
|||||||
Submodule packages/leann-backend-diskann/third_party/DiskANN updated: 25339b0341...af2a26481e
@@ -441,9 +441,9 @@ class LeannSearcher:
|
|||||||
use_server_if_available=recompute_embeddings,
|
use_server_if_available=recompute_embeddings,
|
||||||
zmq_port=zmq_port,
|
zmq_port=zmq_port,
|
||||||
)
|
)
|
||||||
logger.info(f" Generated embedding shape: {query_embedding.shape}")
|
# logger.info(f" Generated embedding shape: {query_embedding.shape}")
|
||||||
embedding_time = time.time() - start_time
|
embedding_time = time.time() - start_time
|
||||||
logger.info(f" Embedding time: {embedding_time} seconds")
|
# logger.info(f" Embedding time: {embedding_time} seconds")
|
||||||
|
|
||||||
start_time = time.time()
|
start_time = time.time()
|
||||||
results = self.backend_impl.search(
|
results = self.backend_impl.search(
|
||||||
@@ -458,7 +458,7 @@ class LeannSearcher:
|
|||||||
**kwargs,
|
**kwargs,
|
||||||
)
|
)
|
||||||
search_time = time.time() - start_time
|
search_time = time.time() - start_time
|
||||||
logger.info(f" Search time: {search_time} seconds")
|
# logger.info(f" Search time: {search_time} seconds")
|
||||||
logger.info(
|
logger.info(
|
||||||
f" Backend returned: labels={len(results.get('labels', [[]])[0])} results"
|
f" Backend returned: labels={len(results.get('labels', [[]])[0])} results"
|
||||||
)
|
)
|
||||||
@@ -479,15 +479,25 @@ class LeannSearcher:
|
|||||||
metadata=passage_data.get("metadata", {}),
|
metadata=passage_data.get("metadata", {}),
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Color codes for better logging
|
||||||
|
GREEN = "\033[92m"
|
||||||
|
BLUE = "\033[94m"
|
||||||
|
YELLOW = "\033[93m"
|
||||||
|
RESET = "\033[0m"
|
||||||
|
|
||||||
|
# Truncate text for display (first 100 chars)
|
||||||
|
display_text = passage_data['text']
|
||||||
logger.info(
|
logger.info(
|
||||||
f" {i + 1}. passage_id='{string_id}' -> SUCCESS: {passage_data['text']}..."
|
f" {GREEN}✓{RESET} {BLUE}[{i + 1:2d}]{RESET} {YELLOW}ID:{RESET} '{string_id}' {YELLOW}Score:{RESET} {dist:.4f} {YELLOW}Text:{RESET} {display_text}"
|
||||||
)
|
)
|
||||||
except KeyError:
|
except KeyError:
|
||||||
|
RED = "\033[91m"
|
||||||
logger.error(
|
logger.error(
|
||||||
f" {i + 1}. passage_id='{string_id}' -> ERROR: Passage not found in PassageManager!"
|
f" {RED}✗{RESET} [{i + 1:2d}] ID: '{string_id}' -> {RED}ERROR: Passage not found!{RESET}"
|
||||||
)
|
)
|
||||||
|
|
||||||
logger.info(f" Final enriched results: {len(enriched_results)} passages")
|
logger.info(f" {GREEN}✓ Final enriched results: {len(enriched_results)} passages{RESET}")
|
||||||
return enriched_results
|
return enriched_results
|
||||||
|
|
||||||
|
|
||||||
@@ -517,7 +527,7 @@ class LeannChat:
|
|||||||
):
|
):
|
||||||
if llm_kwargs is None:
|
if llm_kwargs is None:
|
||||||
llm_kwargs = {}
|
llm_kwargs = {}
|
||||||
|
search_time = time.time()
|
||||||
results = self.searcher.search(
|
results = self.searcher.search(
|
||||||
question,
|
question,
|
||||||
top_k=top_k,
|
top_k=top_k,
|
||||||
@@ -529,6 +539,8 @@ class LeannChat:
|
|||||||
expected_zmq_port=expected_zmq_port,
|
expected_zmq_port=expected_zmq_port,
|
||||||
**search_kwargs,
|
**search_kwargs,
|
||||||
)
|
)
|
||||||
|
search_time = time.time() - search_time
|
||||||
|
# logger.info(f" Search time: {search_time} seconds")
|
||||||
context = "\n\n".join([r.text for r in results])
|
context = "\n\n".join([r.text for r in results])
|
||||||
prompt = (
|
prompt = (
|
||||||
"Here is some retrieved context that might help answer your question:\n\n"
|
"Here is some retrieved context that might help answer your question:\n\n"
|
||||||
|
|||||||
BIN
videos/google_clear.gif
Normal file
BIN
videos/google_clear.gif
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 4.2 MiB |
BIN
videos/mail_clear.gif
Normal file
BIN
videos/mail_clear.gif
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 1.9 MiB |
BIN
videos/paper_clear.gif
Normal file
BIN
videos/paper_clear.gif
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 1.4 MiB |
BIN
videos/wechat_clear.gif
Normal file
BIN
videos/wechat_clear.gif
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 1.5 MiB |
Reference in New Issue
Block a user