From 32967daf81f110dea16a7d2897eb79f707130714 Mon Sep 17 00:00:00 2001 From: Aakash Suresh Date: Tue, 7 Oct 2025 13:13:44 -0700 Subject: [PATCH] security: Enhance Hugging Face model loading security - resolves #136 (#138) BREAKING CHANGE: trust_remote_code now defaults to False for security - Set trust_remote_code=False by default in HFChat class - Add explicit trust_remote_code parameter to HFChat.__init__() - Add security warning when trust_remote_code=True is used - Update get_llm() function to support trust_remote_code parameter - Update benchmark utilities (load_hf_model, load_vllm_model, load_qwen_vl_model) - Add comprehensive documentation for security implications Security Benefits: - Prevents arbitrary code execution from compromised model repositories - Requires explicit opt-in for models that need remote code execution - Shows clear warnings when security is reduced - Follows security-by-default principle Migration Guide: - Most users: No changes needed (more secure by default) - Users with models requiring remote code: Add trust_remote_code=True explicitly - Config users: Add 'trust_remote_code': true to LLM config if needed Fixes #136 --- benchmarks/llm_utils.py | 67 +++++++++++++++++++++------ packages/leann-core/src/leann/chat.py | 34 ++++++++++++-- 2 files changed, 83 insertions(+), 18 deletions(-) diff --git a/benchmarks/llm_utils.py b/benchmarks/llm_utils.py index 9a8217c..f64c55b 100644 --- a/benchmarks/llm_utils.py +++ b/benchmarks/llm_utils.py @@ -54,29 +54,51 @@ def extract_thinking_answer(response): return response.strip() -def load_hf_model(model_name="Qwen/Qwen3-8B"): - """Load HuggingFace model""" +def load_hf_model(model_name="Qwen/Qwen3-8B", trust_remote_code=False): + """Load HuggingFace model + + Args: + model_name (str): Name of the model to load + trust_remote_code (bool): Whether to allow execution of code from the model repository. + Defaults to False for security. Only enable for trusted models. + """ if not HF_AVAILABLE: raise ImportError("transformers not available") + if trust_remote_code: + print( + "⚠️ WARNING: Loading model with trust_remote_code=True. This can execute arbitrary code." + ) + print(f"Loading HF: {model_name}") - tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True) + tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=trust_remote_code) model = AutoModelForCausalLM.from_pretrained( model_name, torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32, device_map="auto", - trust_remote_code=True, + trust_remote_code=trust_remote_code, ) return tokenizer, model -def load_vllm_model(model_name="Qwen/Qwen3-8B"): - """Load vLLM model""" +def load_vllm_model(model_name="Qwen/Qwen3-8B", trust_remote_code=False): + """Load vLLM model + + Args: + model_name (str): Name of the model to load + trust_remote_code (bool): Whether to allow execution of code from the model repository. + Defaults to False for security. Only enable for trusted models. + """ if not VLLM_AVAILABLE: raise ImportError("vllm not available") + if trust_remote_code: + print( + "⚠️ WARNING: Loading model with trust_remote_code=True. This can execute arbitrary code." + ) + print(f"Loading vLLM: {model_name}") - llm = LLM(model=model_name, trust_remote_code=True) + llm = LLM(model=model_name, trust_remote_code=trust_remote_code) # Qwen3 specific config if is_qwen3_model(model_name): @@ -178,19 +200,33 @@ def evaluate_rag(searcher, llm_func, queries, domain="default", top_k=3, complex } -def load_qwen_vl_model(model_name="Qwen/Qwen2.5-VL-7B-Instruct"): - """Load Qwen2.5-VL multimodal model""" +def load_qwen_vl_model(model_name="Qwen/Qwen2.5-VL-7B-Instruct", trust_remote_code=False): + """Load Qwen2.5-VL multimodal model + + Args: + model_name (str): Name of the model to load + trust_remote_code (bool): Whether to allow execution of code from the model repository. + Defaults to False for security. Only enable for trusted models. + """ if not HF_AVAILABLE: raise ImportError("transformers not available") + if trust_remote_code: + print( + "⚠️ WARNING: Loading model with trust_remote_code=True. This can execute arbitrary code." + ) + print(f"Loading Qwen2.5-VL: {model_name}") try: from transformers import AutoModelForVision2Seq, AutoProcessor - processor = AutoProcessor.from_pretrained(model_name, trust_remote_code=True) + processor = AutoProcessor.from_pretrained(model_name, trust_remote_code=trust_remote_code) model = AutoModelForVision2Seq.from_pretrained( - model_name, torch_dtype=torch.bfloat16, device_map="auto", trust_remote_code=True + model_name, + torch_dtype=torch.bfloat16, + device_map="auto", + trust_remote_code=trust_remote_code, ) return processor, model @@ -202,9 +238,14 @@ def load_qwen_vl_model(model_name="Qwen/Qwen2.5-VL-7B-Instruct"): try: from transformers import AutoProcessor, Qwen2VLForConditionalGeneration - processor = AutoProcessor.from_pretrained(model_name, trust_remote_code=True) + processor = AutoProcessor.from_pretrained( + model_name, trust_remote_code=trust_remote_code + ) model = Qwen2VLForConditionalGeneration.from_pretrained( - model_name, torch_dtype=torch.bfloat16, device_map="auto", trust_remote_code=True + model_name, + torch_dtype=torch.bfloat16, + device_map="auto", + trust_remote_code=trust_remote_code, ) return processor, model diff --git a/packages/leann-core/src/leann/chat.py b/packages/leann-core/src/leann/chat.py index 8135daf..1f870bf 100644 --- a/packages/leann-core/src/leann/chat.py +++ b/packages/leann-core/src/leann/chat.py @@ -546,11 +546,30 @@ class OllamaChat(LLMInterface): class HFChat(LLMInterface): - """LLM interface for local Hugging Face Transformers models with proper chat templates.""" + """LLM interface for local Hugging Face Transformers models with proper chat templates. - def __init__(self, model_name: str = "deepseek-ai/deepseek-llm-7b-chat"): + Args: + model_name (str): Name of the Hugging Face model to load. + trust_remote_code (bool): Whether to allow execution of code from the model repository. + Defaults to False for security. Only enable for trusted models as this can pose + a security risk if the model repository is compromised. + """ + + def __init__( + self, model_name: str = "deepseek-ai/deepseek-llm-7b-chat", trust_remote_code: bool = False + ): logger.info(f"Initializing HFChat with model='{model_name}'") + # Security warning when trust_remote_code is enabled + if trust_remote_code: + logger.warning( + "SECURITY WARNING: trust_remote_code=True allows execution of arbitrary code from the model repository. " + "Only enable this for models from trusted sources. This creates a potential security risk if the model " + "repository is compromised." + ) + + self.trust_remote_code = trust_remote_code + # Pre-check model availability with helpful suggestions model_error = validate_model_and_suggest(model_name, "hf") if model_error: @@ -588,14 +607,16 @@ class HFChat(LLMInterface): try: logger.info(f"Loading tokenizer for {model_name}...") - self.tokenizer = AutoTokenizer.from_pretrained(model_name) + self.tokenizer = AutoTokenizer.from_pretrained( + model_name, trust_remote_code=self.trust_remote_code + ) logger.info(f"Loading model {model_name}...") self.model = AutoModelForCausalLM.from_pretrained( model_name, torch_dtype=torch.float16 if self.device != "cpu" else torch.float32, device_map="auto" if self.device != "cpu" else None, - trust_remote_code=True, + trust_remote_code=self.trust_remote_code, ) logger.info(f"Successfully loaded {model_name}") finally: @@ -859,7 +880,10 @@ def get_llm(llm_config: Optional[dict[str, Any]] = None) -> LLMInterface: host=llm_config.get("host"), ) elif llm_type == "hf": - return HFChat(model_name=model or "deepseek-ai/deepseek-llm-7b-chat") + return HFChat( + model_name=model or "deepseek-ai/deepseek-llm-7b-chat", + trust_remote_code=llm_config.get("trust_remote_code", False), + ) elif llm_type == "openai": return OpenAIChat( model=model or "gpt-4o",