fix: diskann build and prevent termination from hanging

- Fix OpenMP library linking in DiskANN CMake configuration - Add timeout protection for HuggingFace model loading to prevent hangs - Improve embedding server process termination with better timeouts - Make DiskANN backend default enabled alongside HNSW - Update documentation to reflect both backends included by default
2025-08-03 21:16:52 -07:00
parent 19bcc07814
commit 54df6310c5
5 changed files with 52 additions and 21 deletions
@@ -542,14 +542,41 @@ class HFChat(LLMInterface):
            self.device = "cpu"
            logger.info("No GPU detected. Using CPU.")

-        # Load tokenizer and model
-        self.tokenizer = AutoTokenizer.from_pretrained(model_name)
-        self.model = AutoModelForCausalLM.from_pretrained(
-            model_name,
-            torch_dtype=torch.float16 if self.device != "cpu" else torch.float32,
-            device_map="auto" if self.device != "cpu" else None,
-            trust_remote_code=True,
-        )
+        # Load tokenizer and model with timeout protection
+        try:
+            import signal
+
+            def timeout_handler(signum, frame):
+                raise TimeoutError("Model download/loading timed out")
+
+            # Set timeout for model loading (60 seconds)
+            old_handler = signal.signal(signal.SIGALRM, timeout_handler)
+            signal.alarm(60)
+
+            try:
+                logger.info(f"Loading tokenizer for {model_name}...")
+                self.tokenizer = AutoTokenizer.from_pretrained(model_name)
+
+                logger.info(f"Loading model {model_name}...")
+                self.model = AutoModelForCausalLM.from_pretrained(
+                    model_name,
+                    torch_dtype=torch.float16 if self.device != "cpu" else torch.float32,
+                    device_map="auto" if self.device != "cpu" else None,
+                    trust_remote_code=True,
+                )
+                logger.info(f"Successfully loaded {model_name}")
+            finally:
+                signal.alarm(0)  # Cancel the alarm
+                signal.signal(signal.SIGALRM, old_handler)  # Restore old handler
+
+        except TimeoutError:
+            logger.error(f"Model loading timed out for {model_name}")
+            raise RuntimeError(
+                f"Model loading timed out for {model_name}. Please check your internet connection or try a smaller model."
+            )
+        except Exception as e:
+            logger.error(f"Failed to load model {model_name}: {e}")
+            raise

        # Move model to device if not using device_map
        if self.device != "cpu" and "device_map" not in str(self.model):
@@ -354,13 +354,21 @@ class EmbeddingServerManager:
        self.server_process.terminate()

        try:
-            self.server_process.wait(timeout=5)
+            self.server_process.wait(timeout=3)
            logger.info(f"Server process {self.server_process.pid} terminated.")
        except subprocess.TimeoutExpired:
            logger.warning(
-                f"Server process {self.server_process.pid} did not terminate gracefully, killing it."
+                f"Server process {self.server_process.pid} did not terminate gracefully within 3 seconds, killing it."
            )
            self.server_process.kill()
+            try:
+                self.server_process.wait(timeout=2)
+                logger.info(f"Server process {self.server_process.pid} killed successfully.")
+            except subprocess.TimeoutExpired:
+                logger.error(
+                    f"Failed to kill server process {self.server_process.pid} - it may be hung"
+                )
+                # Don't hang indefinitely

        # Clean up process resources to prevent resource tracker warnings
        try: