update wechat and we should fix the bug introduced in 1c5fec5

2025-07-21 16:22:16 -07:00
parent e728449b8f
commit 32364320f8
4 changed files with 15 additions and 15 deletions
@@ -197,8 +197,8 @@ class WeChatHistoryReader(BaseReader):
        
        Args:
            messages: List of message dictionaries
-            max_length: Maximum length for concatenated message groups
-            time_window_minutes: Time window in minutes to group messages together
+            max_length: Maximum length for concatenated message groups. Use -1 to disable length constraint.
+            time_window_minutes: Time window in minutes to group messages together. Use -1 to disable time constraint.
            overlap_messages: Number of messages to overlap between consecutive groups
            
        Returns:
@@ -230,8 +230,8 @@ class WeChatHistoryReader(BaseReader):
            if not readable_text.strip():
                continue
            
-            # Check time window constraint
-            if last_timestamp is not None and create_time > 0:
+            # Check time window constraint (only if time_window_minutes != -1)
+            if time_window_minutes != -1 and last_timestamp is not None and create_time > 0:
                time_diff_minutes = (create_time - last_timestamp) / 60
                if time_diff_minutes > time_window_minutes:
                    # Time gap too large, start new group
@@ -250,9 +250,9 @@ class WeChatHistoryReader(BaseReader):
                            current_group = []
                            current_length = 0
            
-            # Check length constraint
+            # Check length constraint (only if max_length != -1)
            message_length = len(readable_text)
-            if current_length + message_length > max_length and current_group:
+            if max_length != -1 and current_length + message_length > max_length and current_group:
                # Current group would exceed max length, save it and start new
                concatenated_groups.append({
                    'messages': current_group,
@@ -431,9 +431,9 @@ Contact: {contact_name}
                        # Concatenate messages based on rules
                        message_groups = self._concatenate_messages(
                            readable_messages, 
-                            max_length=max_length, 
-                            time_window_minutes=time_window_minutes,
-                            overlap_messages=2  # Keep 2 messages overlap between groups
+                            max_length=-1, 
+                            time_window_minutes=-1,
+                            overlap_messages=0  # Keep 2 messages overlap between groups
                        )
                        
                        # Create documents from concatenated groups
@@ -52,7 +52,7 @@ def create_leann_index_from_multiple_wechat_exports(
                documents = reader.load_data(
                    wechat_export_dir=str(export_dir),
                    max_count=max_count,
-                    concatenate_messages=True,  # Disable concatenation - one message per document
+                    concatenate_messages=False,  # Disable concatenation - one message per document
                )
                if documents:
                    print(f"Loaded {len(documents)} chat documents from {export_dir}")
@@ -78,7 +78,7 @@ def create_leann_index_from_multiple_wechat_exports(
        )

        # Create text splitter with 256 chunk size
-        text_splitter = SentenceSplitter(chunk_size=256, chunk_overlap=25)
+        text_splitter = SentenceSplitter(chunk_size=128, chunk_overlap=64)

        # Convert Documents to text strings and chunk them
        all_texts = []
@@ -224,7 +224,7 @@ async def query_leann_index(index_path: str, query: str):
        query,
        top_k=20,
        recompute_beighbor_embeddings=True,
-        complexity=64,
+        complexity=128,
        beam_width=1,
        llm_config={
            "type": "openai",