first commit

2026-03-22 17:26:26 -04:00
commit c05cb71816
15 changed files with 2644 additions and 0 deletions
--- a/examples/api_client.py
+++ b/examples/api_client.py
@@ -0,0 +1,160 @@
+#!/usr/bin/env python3
+"""
+vLLM OpenAI-Compatible API Client Example
+Demonstrates using vLLM's OpenAI-compatible API endpoints
+"""
+
+import requests
+import json
+from typing import Dict, List
+
+class VLLMClient:
+    """Simple client for vLLM OpenAI-compatible API"""
+
+    def __init__(self, base_url: str = "http://localhost:8000"):
+        self.base_url = base_url.rstrip('/')
+
+    def list_models(self) -> List[Dict]:
+        """List available models"""
+        response = requests.get(f"{self.base_url}/v1/models")
+        response.raise_for_status()
+        return response.json()
+
+    def complete(
+        self,
+        prompt: str,
+        model: str = None,
+        max_tokens: int = 100,
+        temperature: float = 0.7,
+        stream: bool = False
+    ) -> Dict:
+        """Generate completion"""
+
+        # Get model name if not specified
+        if model is None:
+            models = self.list_models()
+            model = models['data'][0]['id']
+
+        payload = {
+            "model": model,
+            "prompt": prompt,
+            "max_tokens": max_tokens,
+            "temperature": temperature,
+            "stream": stream
+        }
+
+        response = requests.post(
+            f"{self.base_url}/v1/completions",
+            json=payload,
+            headers={"Content-Type": "application/json"},
+            stream=stream
+        )
+        response.raise_for_status()
+
+        if stream:
+            return response.iter_lines()
+        else:
+            return response.json()
+
+    def chat(
+        self,
+        messages: List[Dict[str, str]],
+        model: str = None,
+        max_tokens: int = 100,
+        temperature: float = 0.7,
+        stream: bool = False
+    ) -> Dict:
+        """Generate chat completion"""
+
+        # Get model name if not specified
+        if model is None:
+            models = self.list_models()
+            model = models['data'][0]['id']
+
+        payload = {
+            "model": model,
+            "messages": messages,
+            "max_tokens": max_tokens,
+            "temperature": temperature,
+            "stream": stream
+        }
+
+        response = requests.post(
+            f"{self.base_url}/v1/chat/completions",
+            json=payload,
+            headers={"Content-Type": "application/json"},
+            stream=stream
+        )
+        response.raise_for_status()
+
+        if stream:
+            return response.iter_lines()
+        else:
+            return response.json()
+
+
+def main():
+    # Initialize client
+    client = VLLMClient("http://localhost:8000")
+
+    print("="*60)
+    print("vLLM API Client Examples")
+    print("="*60)
+
+    # Example 1: List models
+    print("\n1. Listing available models...")
+    models = client.list_models()
+    for model in models['data']:
+        print(f"   - {model['id']}")
+
+    # Example 2: Simple completion
+    print("\n2. Simple completion...")
+    result = client.complete(
+        prompt="The capital of France is",
+        max_tokens=10,
+        temperature=0.0
+    )
+    print(f"   Prompt: The capital of France is")
+    print(f"   Response: {result['choices'][0]['text']}")
+
+    # Example 3: Chat completion
+    print("\n3. Chat completion...")
+    messages = [
+        {"role": "system", "content": "You are a helpful AI assistant."},
+        {"role": "user", "content": "What is the Blackwell GPU architecture?"}
+    ]
+    result = client.chat(
+        messages=messages,
+        max_tokens=100,
+        temperature=0.7
+    )
+    print(f"   User: {messages[1]['content']}")
+    print(f"   Assistant: {result['choices'][0]['message']['content']}")
+
+    # Example 4: Streaming completion
+    print("\n4. Streaming completion...")
+    print("   Prompt: Write a short poem about AI")
+    print("   Response: ", end="", flush=True)
+
+    stream = client.complete(
+        prompt="Write a short poem about AI",
+        max_tokens=50,
+        temperature=0.8,
+        stream=True
+    )
+
+    for line in stream:
+        if line:
+            try:
+                data = json.loads(line.decode('utf-8').removeprefix('data: '))
+                if 'choices' in data and len(data['choices']) > 0:
+                    token = data['choices'][0].get('text', '')
+                    print(token, end="", flush=True)
+            except (json.JSONDecodeError, AttributeError):
+                pass
+
+    print("\n")
+    print("="*60)
+
+if __name__ == "__main__":
+    main()