Compare commits
1 Commits
fix/empty-
...
docs/updat
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
444ea9fbea |
@@ -306,23 +306,6 @@ class LeannBuilder:
|
|||||||
def build_index(self, index_path: str):
|
def build_index(self, index_path: str):
|
||||||
if not self.chunks:
|
if not self.chunks:
|
||||||
raise ValueError("No chunks added.")
|
raise ValueError("No chunks added.")
|
||||||
|
|
||||||
# Filter out invalid/empty text chunks early to keep passage and embedding counts aligned
|
|
||||||
valid_chunks: list[dict[str, Any]] = []
|
|
||||||
skipped = 0
|
|
||||||
for chunk in self.chunks:
|
|
||||||
text = chunk.get("text", "")
|
|
||||||
if isinstance(text, str) and text.strip():
|
|
||||||
valid_chunks.append(chunk)
|
|
||||||
else:
|
|
||||||
skipped += 1
|
|
||||||
if skipped > 0:
|
|
||||||
print(
|
|
||||||
f"Warning: Skipping {skipped} empty/invalid text chunk(s). Processing {len(valid_chunks)} valid chunks"
|
|
||||||
)
|
|
||||||
self.chunks = valid_chunks
|
|
||||||
if not self.chunks:
|
|
||||||
raise ValueError("All provided chunks are empty or invalid. Nothing to index.")
|
|
||||||
if self.dimensions is None:
|
if self.dimensions is None:
|
||||||
self.dimensions = len(
|
self.dimensions = len(
|
||||||
compute_embeddings(
|
compute_embeddings(
|
||||||
|
|||||||
@@ -244,16 +244,6 @@ def compute_embeddings_openai(texts: list[str], model_name: str) -> np.ndarray:
|
|||||||
except ImportError as e:
|
except ImportError as e:
|
||||||
raise ImportError(f"OpenAI package not installed: {e}")
|
raise ImportError(f"OpenAI package not installed: {e}")
|
||||||
|
|
||||||
# Validate input list
|
|
||||||
if not texts:
|
|
||||||
raise ValueError("Cannot compute embeddings for empty text list")
|
|
||||||
# Extra validation: abort early if any item is empty/whitespace
|
|
||||||
invalid_count = sum(1 for t in texts if not isinstance(t, str) or not t.strip())
|
|
||||||
if invalid_count > 0:
|
|
||||||
raise ValueError(
|
|
||||||
f"Found {invalid_count} empty/invalid text(s) in input. Upstream should filter before calling OpenAI."
|
|
||||||
)
|
|
||||||
|
|
||||||
api_key = os.getenv("OPENAI_API_KEY")
|
api_key = os.getenv("OPENAI_API_KEY")
|
||||||
if not api_key:
|
if not api_key:
|
||||||
raise RuntimeError("OPENAI_API_KEY environment variable not set")
|
raise RuntimeError("OPENAI_API_KEY environment variable not set")
|
||||||
|
|||||||
Reference in New Issue
Block a user