From f42e086383529d349db57b0017d5de693db3f43f Mon Sep 17 00:00:00 2001 From: Andy Lee Date: Mon, 29 Sep 2025 19:10:09 -0700 Subject: [PATCH] fix: set ntotal for storage as well --- packages/leann-core/src/leann/api.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/packages/leann-core/src/leann/api.py b/packages/leann-core/src/leann/api.py index 1c8ab55..07d8373 100644 --- a/packages/leann-core/src/leann/api.py +++ b/packages/leann-core/src/leann/api.py @@ -735,6 +735,20 @@ class LeannBuilder: storage_index = faiss.IndexFlatL2(index.d) index.storage = storage_index index.own_fields = True + # Faiss expects storage.ntotal to reflect the existing graph's + # population (even if the vectors themselves were pruned from disk + # for recompute mode). When we attach a fresh IndexFlat here its + # ntotal starts at zero, which later causes IndexHNSW::add to + # believe new "preset" levels were provided and trips the + # `n0 + n == levels.size()` assertion. Seed the temporary storage + # with the current ntotal so Faiss maintains the proper offset for + # incoming vectors. + try: + storage_index.ntotal = index.ntotal + except AttributeError: + # Older Faiss builds may not expose ntotal as a writable + # attribute; in that case we fall back to the default behaviour. + pass if index.d != embedding_dim: raise ValueError( f"Existing index dimension ({index.d}) does not match new embeddings ({embedding_dim})."