From 0b8562868a77cc6f8ea3de154d5736905db115b3 Mon Sep 17 00:00:00 2001
From: "praisonai-triage-agent[bot]"
 <272766704+praisonai-triage-agent[bot]@users.noreply.github.com>
Date: Thu, 9 Apr 2026 11:15:12 +0000
Subject: [PATCH 1/4] feat: Add OpenAI-compatible HTTP API layer (fixes #1327)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Implement OpenAICompatProvider with standard endpoints:
  - POST /v1/chat/completions (with streaming)
  - POST /v1/completions
  - GET /v1/models
  - POST /v1/tools/invoke (PraisonAI extension)
- Add 'praisonai serve openai' command for compatibility layer
- Create comprehensive test suite with OpenAI Python client
- Enable drop-in replacement for OpenAI client libraries

Addresses missing OpenAI-compatible HTTP surface vs OpenClaw Gateway.
Uses existing PraisonAI capabilities.completions infrastructure with
OpenAI request/response format conversion.

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-authored-by: praisonai-triage-agent[bot] <praisonai-triage-agent[bot]@users.noreply.github.com>
---
 src/praisonai/praisonai/cli/commands/serve.py | 126 +++++
 .../praisonai/endpoints/providers/__init__.py |   1 +
 .../endpoints/providers/openai_compat.py      | 494 ++++++++++++++++++
 test_openai_compatibility.py                  | 207 ++++++++
 4 files changed, 828 insertions(+)
 create mode 100644 src/praisonai/praisonai/endpoints/providers/openai_compat.py
 create mode 100644 test_openai_compatibility.py

diff --git a/src/praisonai/praisonai/cli/commands/serve.py b/src/praisonai/praisonai/cli/commands/serve.py
index 538ec9c07..31fbbadb2 100644
--- a/src/praisonai/praisonai/cli/commands/serve.py
+++ b/src/praisonai/praisonai/cli/commands/serve.py
@@ -19,6 +19,7 @@
 - a2a: Agent-to-Agent protocol
 - a2u: Agent-to-User event stream
 - unified: All providers combined
+- openai: OpenAI API compatibility layer
 """
 
 from typing import Optional
@@ -125,6 +126,7 @@ def serve_callback(ctx: typer.Context):
   [green]a2a[/green]         Agent-to-Agent protocol (port 8001)
   [green]a2u[/green]         Agent-to-User events (port 8002)
   [green]unified[/green]     All providers combined (port 8765)
+  [green]openai[/green]      OpenAI API compatibility layer (port 8765)
 
 [bold]Management:[/bold]
   [yellow]start[/yellow]       Start legacy API server
@@ -577,3 +579,127 @@ def serve_unified(
         output = get_output_controller()
         output.print_error(f"Unified serve module not available: {e}")
         raise typer.Exit(4)
+
+
+@app.command("openai")
+def serve_openai(
+    host: str = typer.Option("127.0.0.1", "--host", "-h", help="Host to bind to"),
+    port: int = typer.Option(8765, "--port", "-p", help="Port to bind to"),
+    api_key: Optional[str] = typer.Option(None, "--api-key", help="API key for authentication"),
+    reload: bool = typer.Option(False, "--reload", help="Enable auto-reload"),
+):
+    """Start OpenAI API compatibility server.
+    
+    Provides OpenAI-compatible endpoints like /v1/chat/completions for
+    drop-in compatibility with OpenAI client libraries.
+    
+    Examples:
+        praisonai serve openai
+        praisonai serve openai --port 8765 --api-key mykey
+    """
+    output = get_output_controller()
+    
+    try:
+        import uvicorn
+        from fastapi import FastAPI
+        from praisonai.endpoints.providers import OpenAICompatProvider, AgentsAPIProvider
+        from praisonai.endpoints.server import create_server, register_provider
+        
+        output.print_info(f"Starting OpenAI-compatible server on {host}:{port}")
+        
+        # Create providers
+        agents_provider = AgentsAPIProvider(base_url=f"http://{host}:{port}")
+        openai_provider = OpenAICompatProvider(
+            base_url=f"http://{host}:{port}",
+            api_key=api_key,
+            agent_provider=agents_provider
+        )
+        
+        # Create server and register OpenAI provider
+        app = create_server(title="PraisonAI OpenAI API", version="1.0.0")
+        register_provider(app, openai_provider, prefix="/v1")
+        
+        # Add OpenAI-style route mappings
+        from fastapi import Request, Response
+        from fastapi.responses import StreamingResponse
+        import json
+        
+        @app.post("/v1/chat/completions")
+        async def chat_completions(request: Request):
+            body = await request.json()
+            result = openai_provider.invoke("chat_completions", body, stream=body.get("stream", False))
+            
+            if not result.ok:
+                return Response(
+                    content=json.dumps({"error": {"message": result.error, "type": "api_error"}}),
+                    status_code=400,
+                    media_type="application/json"
+                )
+            
+            if body.get("stream", False):
+                def generate():
+                    for chunk in openai_provider.invoke_stream("chat_completions", body):
+                        if chunk["event"] == "data":
+                            yield f"data: {json.dumps(chunk['data'])}\n\n"
+                        elif chunk["event"] == "done":
+                            yield "data: [DONE]\n\n"
+                
+                return StreamingResponse(generate(), media_type="text/plain")
+            
+            return result.data
+        
+        @app.post("/v1/completions")
+        async def completions(request: Request):
+            body = await request.json()
+            result = openai_provider.invoke("completions", body)
+            
+            if not result.ok:
+                return Response(
+                    content=json.dumps({"error": {"message": result.error, "type": "api_error"}}),
+                    status_code=400,
+                    media_type="application/json"
+                )
+            
+            return result.data
+        
+        @app.get("/v1/models")
+        async def models():
+            result = openai_provider.invoke("models")
+            return result.data if result.ok else {"error": result.error}
+        
+        @app.post("/v1/tools/invoke")
+        async def tools_invoke(request: Request):
+            body = await request.json()
+            result = openai_provider.invoke("tools_invoke", body)
+            
+            if not result.ok:
+                return Response(
+                    content=json.dumps({"error": {"message": result.error, "type": "api_error"}}),
+                    status_code=400,
+                    media_type="application/json"
+                )
+            
+            return result.data
+        
+        output.print("OpenAI-compatible endpoints available:")
+        output.print("  POST /v1/chat/completions")
+        output.print("  POST /v1/completions") 
+        output.print("  GET /v1/models")
+        output.print("  POST /v1/tools/invoke")
+        
+        # Start server
+        uvicorn.run(
+            app,
+            host=host,
+            port=port,
+            reload=reload,
+            access_log=False,
+        )
+        
+    except ImportError as e:
+        output.print_error(f"OpenAI compatibility module not available: {e}")
+        output.print("Install with: pip install praisonai[api]")
+        raise typer.Exit(4)
+    except Exception as e:
+        output.print_error(f"Failed to start OpenAI server: {e}")
+        raise typer.Exit(1)
diff --git a/src/praisonai/praisonai/endpoints/providers/__init__.py b/src/praisonai/praisonai/endpoints/providers/__init__.py
index 51d53d26b..022679800 100644
--- a/src/praisonai/praisonai/endpoints/providers/__init__.py
+++ b/src/praisonai/praisonai/endpoints/providers/__init__.py
@@ -14,6 +14,7 @@
     "ToolsMCPProvider": ".tools_mcp",
     "A2AProvider": ".a2a",
     "A2UProvider": ".a2u",
+    "OpenAICompatProvider": ".openai_compat",
 }
 
 def __getattr__(name: str):
diff --git a/src/praisonai/praisonai/endpoints/providers/openai_compat.py b/src/praisonai/praisonai/endpoints/providers/openai_compat.py
new file mode 100644
index 000000000..2d0c2ebf4
--- /dev/null
+++ b/src/praisonai/praisonai/endpoints/providers/openai_compat.py
@@ -0,0 +1,494 @@
+"""
+OpenAI-Compatible HTTP Provider
+
+Provides OpenAI API compatibility layer for PraisonAI agents and completions.
+Implements standard OpenAI endpoints like /v1/chat/completions, /v1/models, etc.
+"""
+
+import json
+import time
+import uuid
+from typing import Any, Dict, List, Optional, Iterator
+
+from .base import BaseProvider, InvokeResult, HealthResult
+from ..discovery import EndpointInfo, ProviderInfo
+
+
+class OpenAICompatProvider(BaseProvider):
+    """
+    OpenAI API-compatible provider for PraisonAI.
+    
+    Implements standard OpenAI endpoints:
+    - POST /v1/chat/completions
+    - POST /v1/completions  
+    - GET /v1/models
+    - POST /v1/embeddings (if available)
+    - POST /v1/tools/invoke (custom)
+    """
+    
+    provider_type = "openai-compat"
+    
+    def __init__(
+        self,
+        base_url: str = "http://localhost:8765",
+        api_key: Optional[str] = None,
+        timeout: float = 30.0,
+        agent_provider: Optional[BaseProvider] = None,
+    ):
+        """
+        Initialize OpenAI-compatible provider.
+        
+        Args:
+            base_url: Base URL of the server
+            api_key: Optional API key for authentication
+            timeout: Request timeout in seconds
+            agent_provider: Optional AgentsAPIProvider for agent routing
+        """
+        super().__init__(base_url, api_key, timeout)
+        self.agent_provider = agent_provider
+    
+    def get_provider_info(self) -> ProviderInfo:
+        """Get provider information."""
+        return ProviderInfo(
+            type=self.provider_type,
+            name="OpenAI API Compatibility Layer",
+            description="OpenAI API-compatible endpoints for PraisonAI",
+            capabilities=["chat", "completions", "models", "embeddings", "tools"],
+        )
+    
+    def list_endpoints(self, tags: Optional[List[str]] = None) -> List[EndpointInfo]:
+        """List available OpenAI-compatible endpoints."""
+        endpoints = [
+            EndpointInfo(
+                name="chat_completions",
+                description="OpenAI-compatible chat completions",
+                provider_type=self.provider_type,
+                tags=["chat", "openai"],
+                version="1.0.0",
+                streaming=["none", "sse"],
+                auth_modes=["none", "api_key"],
+                input_schema={
+                    "type": "object",
+                    "properties": {
+                        "messages": {
+                            "type": "array",
+                            "items": {
+                                "type": "object",
+                                "properties": {
+                                    "role": {"type": "string", "enum": ["user", "assistant", "system"]},
+                                    "content": {"type": "string"}
+                                }
+                            }
+                        },
+                        "model": {"type": "string", "default": "gpt-4o-mini"},
+                        "temperature": {"type": "number", "default": 1.0},
+                        "max_tokens": {"type": "integer"},
+                        "stream": {"type": "boolean", "default": False},
+                        "tools": {"type": "array"},
+                        "tool_choice": {"type": "string"}
+                    },
+                    "required": ["messages"]
+                },
+            ),
+            EndpointInfo(
+                name="completions",
+                description="OpenAI-compatible text completions",
+                provider_type=self.provider_type,
+                tags=["completions", "openai"],
+                version="1.0.0",
+                streaming=["none"],
+                auth_modes=["none", "api_key"],
+                input_schema={
+                    "type": "object",
+                    "properties": {
+                        "prompt": {"type": "string"},
+                        "model": {"type": "string", "default": "gpt-3.5-turbo-instruct"},
+                        "temperature": {"type": "number", "default": 1.0},
+                        "max_tokens": {"type": "integer"}
+                    },
+                    "required": ["prompt"]
+                },
+            ),
+            EndpointInfo(
+                name="models",
+                description="List available models",
+                provider_type=self.provider_type,
+                tags=["models", "openai"],
+                version="1.0.0",
+                streaming=["none"],
+                auth_modes=["none"],
+            ),
+            EndpointInfo(
+                name="tools_invoke",
+                description="Invoke agent tools (PraisonAI extension)",
+                provider_type=self.provider_type,
+                tags=["tools", "praisonai"],
+                version="1.0.0",
+                streaming=["none"],
+                auth_modes=["none", "api_key"],
+                input_schema={
+                    "type": "object",
+                    "properties": {
+                        "agent": {"type": "string"},
+                        "tool_name": {"type": "string"},
+                        "parameters": {"type": "object"}
+                    },
+                    "required": ["tool_name"]
+                },
+            ),
+        ]
+        
+        if tags:
+            endpoints = [ep for ep in endpoints if any(tag in ep.tags for tag in tags)]
+        
+        return endpoints
+    
+    def describe_endpoint(self, name: str) -> Optional[EndpointInfo]:
+        """Get detailed information about an endpoint."""
+        endpoints = self.list_endpoints()
+        for ep in endpoints:
+            if ep.name == name:
+                return ep
+        return None
+    
+    def invoke(
+        self,
+        name: str,
+        input_data: Optional[Dict[str, Any]] = None,
+        config: Optional[Dict[str, Any]] = None,
+        stream: bool = False,
+    ) -> InvokeResult:
+        """Invoke OpenAI-compatible endpoint."""
+        try:
+            if name == "chat_completions":
+                return self._handle_chat_completions(input_data, stream)
+            elif name == "completions":
+                return self._handle_completions(input_data)
+            elif name == "models":
+                return self._handle_models()
+            elif name == "tools_invoke":
+                return self._handle_tools_invoke(input_data)
+            else:
+                return InvokeResult(
+                    ok=False,
+                    status="not_found",
+                    error=f"Unknown endpoint: {name}",
+                )
+        except Exception as e:
+            return InvokeResult(
+                ok=False,
+                status="error",
+                error=f"Internal error: {str(e)}",
+            )
+    
+    def invoke_stream(
+        self,
+        name: str,
+        input_data: Optional[Dict[str, Any]] = None,
+        config: Optional[Dict[str, Any]] = None,
+    ) -> Iterator[Dict[str, Any]]:
+        """Stream OpenAI-compatible responses."""
+        if name == "chat_completions":
+            yield from self._stream_chat_completions(input_data)
+        else:
+            # Fall back to non-streaming
+            result = self.invoke(name, input_data, config, stream=False)
+            if result.ok:
+                yield {"event": "complete", "data": result.data}
+            else:
+                yield {"event": "error", "data": {"error": result.error}}
+    
+    def _handle_chat_completions(self, input_data: Dict[str, Any], stream: bool = False) -> InvokeResult:
+        """Handle /v1/chat/completions endpoint."""
+        from praisonai.capabilities.completions import chat_completion
+        
+        try:
+            # Extract OpenAI-format request
+            messages = input_data.get("messages", [])
+            model = input_data.get("model", "gpt-4o-mini")
+            temperature = input_data.get("temperature", 1.0)
+            max_tokens = input_data.get("max_tokens")
+            tools = input_data.get("tools")
+            tool_choice = input_data.get("tool_choice")
+            
+            # Call PraisonAI completion capability
+            result = chat_completion(
+                messages=messages,
+                model=model,
+                temperature=temperature,
+                max_tokens=max_tokens,
+                tools=tools,
+                tool_choice=tool_choice,
+                stream=stream,
+                api_key=self.api_key,
+            )
+            
+            # Convert to OpenAI format
+            response = self._format_chat_completion_response(result)
+            
+            return InvokeResult(
+                ok=True,
+                status="success",
+                data=response,
+            )
+            
+        except Exception as e:
+            return InvokeResult(
+                ok=False,
+                status="error",
+                error=f"Chat completion error: {str(e)}",
+            )
+    
+    def _handle_completions(self, input_data: Dict[str, Any]) -> InvokeResult:
+        """Handle /v1/completions endpoint."""
+        from praisonai.capabilities.completions import text_completion
+        
+        try:
+            prompt = input_data.get("prompt", "")
+            model = input_data.get("model", "gpt-3.5-turbo-instruct")
+            temperature = input_data.get("temperature", 1.0)
+            max_tokens = input_data.get("max_tokens")
+            
+            result = text_completion(
+                prompt=prompt,
+                model=model,
+                temperature=temperature,
+                max_tokens=max_tokens,
+                api_key=self.api_key,
+            )
+            
+            response = self._format_completion_response(result)
+            
+            return InvokeResult(
+                ok=True,
+                status="success",
+                data=response,
+            )
+            
+        except Exception as e:
+            return InvokeResult(
+                ok=False,
+                status="error", 
+                error=f"Text completion error: {str(e)}",
+            )
+    
+    def _handle_models(self) -> InvokeResult:
+        """Handle /v1/models endpoint."""
+        # Return common models supported by LiteLLM
+        models = [
+            {
+                "id": "gpt-4o-mini",
+                "object": "model",
+                "created": int(time.time()),
+                "owned_by": "openai",
+            },
+            {
+                "id": "gpt-4o",
+                "object": "model", 
+                "created": int(time.time()),
+                "owned_by": "openai",
+            },
+            {
+                "id": "gpt-3.5-turbo",
+                "object": "model",
+                "created": int(time.time()),
+                "owned_by": "openai",
+            },
+            {
+                "id": "claude-3-5-sonnet-20241022",
+                "object": "model",
+                "created": int(time.time()),
+                "owned_by": "anthropic",
+            },
+        ]
+        
+        response = {
+            "object": "list",
+            "data": models
+        }
+        
+        return InvokeResult(
+            ok=True,
+            status="success",
+            data=response,
+        )
+    
+    def _handle_tools_invoke(self, input_data: Dict[str, Any]) -> InvokeResult:
+        """Handle /v1/tools/invoke endpoint (PraisonAI extension)."""
+        if not self.agent_provider:
+            return InvokeResult(
+                ok=False,
+                status="not_available",
+                error="Agent provider not configured for tool invocation",
+            )
+        
+        try:
+            agent_name = input_data.get("agent", "default")
+            tool_name = input_data.get("tool_name")
+            parameters = input_data.get("parameters", {})
+            
+            # Route to agent provider for tool execution
+            agent_input = {
+                "query": f"Use tool {tool_name} with parameters: {json.dumps(parameters)}",
+                "tool_name": tool_name,
+                "parameters": parameters,
+            }
+            
+            result = self.agent_provider.invoke(agent_name, agent_input)
+            
+            return InvokeResult(
+                ok=result.ok,
+                status=result.status,
+                data={
+                    "tool_name": tool_name,
+                    "result": result.data,
+                    "success": result.ok,
+                },
+                error=result.error,
+            )
+            
+        except Exception as e:
+            return InvokeResult(
+                ok=False,
+                status="error",
+                error=f"Tool invocation error: {str(e)}",
+            )
+    
+    def _stream_chat_completions(self, input_data: Dict[str, Any]) -> Iterator[Dict[str, Any]]:
+        """Stream chat completions in OpenAI SSE format."""
+        try:
+            # For now, use non-streaming and convert to SSE format
+            result = self._handle_chat_completions(input_data, stream=False)
+            
+            if not result.ok:
+                yield {
+                    "event": "error",
+                    "data": {"error": result.error}
+                }
+                return
+            
+            response = result.data
+            choice = response["choices"][0] if response["choices"] else {}
+            content = choice.get("message", {}).get("content", "")
+            
+            # Simulate streaming by chunking the response
+            chunk_id = str(uuid.uuid4())
+            
+            # Send chunks
+            for i, char in enumerate(content):
+                chunk = {
+                    "id": chunk_id,
+                    "object": "chat.completion.chunk",
+                    "created": int(time.time()),
+                    "model": response.get("model", "gpt-4o-mini"),
+                    "choices": [
+                        {
+                            "index": 0,
+                            "delta": {"content": char},
+                            "finish_reason": None
+                        }
+                    ]
+                }
+                yield {"event": "data", "data": chunk}
+            
+            # Send final chunk
+            final_chunk = {
+                "id": chunk_id,
+                "object": "chat.completion.chunk",
+                "created": int(time.time()),
+                "model": response.get("model", "gpt-4o-mini"),
+                "choices": [
+                    {
+                        "index": 0,
+                        "delta": {},
+                        "finish_reason": "stop"
+                    }
+                ]
+            }
+            yield {"event": "data", "data": final_chunk}
+            yield {"event": "done", "data": "[DONE]"}
+            
+        except Exception as e:
+            yield {
+                "event": "error", 
+                "data": {"error": f"Streaming error: {str(e)}"}
+            }
+    
+    def _format_chat_completion_response(self, result) -> Dict[str, Any]:
+        """Convert PraisonAI CompletionResult to OpenAI chat completion format."""
+        return {
+            "id": result.id or f"chatcmpl-{uuid.uuid4().hex[:8]}",
+            "object": "chat.completion",
+            "created": int(time.time()),
+            "model": result.model or "gpt-4o-mini",
+            "choices": [
+                {
+                    "index": 0,
+                    "message": {
+                        "role": result.role,
+                        "content": result.content,
+                        "tool_calls": result.tool_calls,
+                    },
+                    "finish_reason": result.finish_reason or "stop",
+                }
+            ],
+            "usage": result.usage or {
+                "prompt_tokens": 0,
+                "completion_tokens": 0,
+                "total_tokens": 0,
+            },
+        }
+    
+    def _format_completion_response(self, result) -> Dict[str, Any]:
+        """Convert PraisonAI CompletionResult to OpenAI completion format."""
+        return {
+            "id": result.id or f"cmpl-{uuid.uuid4().hex[:8]}",
+            "object": "text_completion",
+            "created": int(time.time()),
+            "model": result.model or "gpt-3.5-turbo-instruct",
+            "choices": [
+                {
+                    "index": 0,
+                    "text": result.content or "",
+                    "finish_reason": result.finish_reason or "stop",
+                }
+            ],
+            "usage": result.usage or {
+                "prompt_tokens": 0,
+                "completion_tokens": 0,
+                "total_tokens": 0,
+            },
+        }
+    
+    def health(self) -> HealthResult:
+        """Check OpenAI compatibility layer health."""
+        try:
+            # Test basic completion capability
+            from praisonai.capabilities.completions import chat_completion
+            
+            test_result = chat_completion(
+                messages=[{"role": "user", "content": "test"}],
+                model="gpt-4o-mini",
+                max_tokens=1,
+            )
+            
+            return HealthResult(
+                healthy=True,
+                status="healthy",
+                server_name="PraisonAI OpenAI Compatibility Layer",
+                server_version="1.0.0",
+                provider_type=self.provider_type,
+                metadata={
+                    "endpoints": ["chat/completions", "completions", "models", "tools/invoke"],
+                    "test_completion_id": test_result.id,
+                },
+            )
+            
+        except Exception as e:
+            return HealthResult(
+                healthy=False,
+                status="unhealthy",
+                server_name="PraisonAI OpenAI Compatibility Layer",
+                provider_type=self.provider_type,
+                metadata={"error": str(e)},
+            )
\ No newline at end of file
diff --git a/test_openai_compatibility.py b/test_openai_compatibility.py
new file mode 100644
index 000000000..c05074ec7
--- /dev/null
+++ b/test_openai_compatibility.py
@@ -0,0 +1,207 @@
+#!/usr/bin/env python3
+"""
+Test OpenAI Compatibility Layer
+
+Tests the OpenAI-compatible HTTP endpoints using the official OpenAI Python client.
+This verifies drop-in compatibility with existing OpenAI client code.
+"""
+
+import os
+import time
+import asyncio
+from openai import OpenAI
+
+def test_basic_chat_completion():
+    """Test basic chat completion with OpenAI client."""
+    print("Testing OpenAI Chat Completion compatibility...")
+    
+    # Use local PraisonAI server as OpenAI endpoint
+    client = OpenAI(
+        base_url="http://localhost:8765/v1",
+        api_key="test-key",  # Not required but good to test auth handling
+    )
+    
+    try:
+        response = client.chat.completions.create(
+            model="gpt-4o-mini",
+            messages=[
+                {"role": "user", "content": "Say 'Hello PraisonAI OpenAI compatibility!' and nothing else"}
+            ],
+            max_tokens=50
+        )
+        
+        print(f"✅ Chat completion response ID: {response.id}")
+        print(f"✅ Model: {response.model}")
+        print(f"✅ Content: {response.choices[0].message.content}")
+        print(f"✅ Usage: {response.usage}")
+        
+        return True
+        
+    except Exception as e:
+        print(f"❌ Chat completion failed: {e}")
+        return False
+
+def test_streaming_chat_completion():
+    """Test streaming chat completion."""
+    print("\nTesting OpenAI Streaming Chat Completion compatibility...")
+    
+    client = OpenAI(
+        base_url="http://localhost:8765/v1",
+        api_key="test-key",
+    )
+    
+    try:
+        stream = client.chat.completions.create(
+            model="gpt-4o-mini",
+            messages=[
+                {"role": "user", "content": "Count from 1 to 5, each number on a new line"}
+            ],
+            stream=True
+        )
+        
+        content_chunks = []
+        for chunk in stream:
+            if chunk.choices[0].delta.content is not None:
+                content_chunks.append(chunk.choices[0].delta.content)
+                print(f"Chunk: {chunk.choices[0].delta.content}", end="")
+        
+        print(f"\n✅ Streaming chat completion successful")
+        print(f"✅ Total chunks received: {len(content_chunks)}")
+        
+        return True
+        
+    except Exception as e:
+        print(f"❌ Streaming chat completion failed: {e}")
+        return False
+
+def test_models_list():
+    """Test models list endpoint."""
+    print("\nTesting OpenAI Models API compatibility...")
+    
+    client = OpenAI(
+        base_url="http://localhost:8765/v1",
+        api_key="test-key",
+    )
+    
+    try:
+        models = client.models.list()
+        
+        print(f"✅ Models list response: {models.object}")
+        print(f"✅ Number of models: {len(models.data)}")
+        
+        for model in models.data[:3]:  # Show first 3 models
+            print(f"✅ Model: {model.id} (owned by {model.owned_by})")
+        
+        return True
+        
+    except Exception as e:
+        print(f"❌ Models list failed: {e}")
+        return False
+
+def test_text_completion():
+    """Test legacy text completion."""
+    print("\nTesting OpenAI Text Completion compatibility...")
+    
+    client = OpenAI(
+        base_url="http://localhost:8765/v1", 
+        api_key="test-key",
+    )
+    
+    try:
+        response = client.completions.create(
+            model="gpt-3.5-turbo-instruct",
+            prompt="Complete this sentence: The weather today is",
+            max_tokens=20
+        )
+        
+        print(f"✅ Text completion response ID: {response.id}")
+        print(f"✅ Model: {response.model}")
+        print(f"✅ Text: {response.choices[0].text}")
+        print(f"✅ Usage: {response.usage}")
+        
+        return True
+        
+    except Exception as e:
+        print(f"❌ Text completion failed: {e}")
+        return False
+
+def test_tools_integration():
+    """Test tools integration (PraisonAI extension)."""
+    print("\nTesting PraisonAI Tools Integration...")
+    
+    import requests
+    
+    try:
+        response = requests.post(
+            "http://localhost:8765/v1/tools/invoke",
+            json={
+                "tool_name": "test_tool", 
+                "parameters": {"query": "test"},
+                "agent": "default"
+            }
+        )
+        
+        if response.status_code == 200:
+            data = response.json()
+            print(f"✅ Tools invoke successful: {data}")
+        else:
+            print(f"⚠️ Tools invoke returned {response.status_code}: {response.text}")
+        
+        return True
+        
+    except Exception as e:
+        print(f"❌ Tools integration failed: {e}")
+        return False
+
+def run_compatibility_tests():
+    """Run all compatibility tests."""
+    print("="*60)
+    print("🚀 PraisonAI OpenAI Compatibility Test Suite")
+    print("="*60)
+    
+    print("\n🔍 Make sure to start the OpenAI compatibility server first:")
+    print("   python -m praisonai serve openai --port 8765")
+    print("\n" + "="*60 + "\n")
+    
+    tests = [
+        test_basic_chat_completion,
+        test_streaming_chat_completion,
+        test_models_list,
+        test_text_completion,
+        test_tools_integration,
+    ]
+    
+    results = []
+    for test in tests:
+        try:
+            result = test()
+            results.append(result)
+        except Exception as e:
+            print(f"❌ Test {test.__name__} failed with error: {e}")
+            results.append(False)
+        
+        time.sleep(1)  # Brief pause between tests
+    
+    print("\n" + "="*60)
+    print("📊 Test Results Summary")
+    print("="*60)
+    
+    passed = sum(results)
+    total = len(results)
+    
+    for i, (test, result) in enumerate(zip(tests, results)):
+        status = "✅ PASS" if result else "❌ FAIL"
+        print(f"{i+1}. {test.__name__}: {status}")
+    
+    print(f"\nOverall: {passed}/{total} tests passed")
+    
+    if passed == total:
+        print("🎉 All tests passed! OpenAI compatibility is working.")
+    else:
+        print(f"⚠️ {total - passed} tests failed. Check server logs for details.")
+    
+    return passed == total
+
+if __name__ == "__main__":
+    success = run_compatibility_tests()
+    exit(0 if success else 1)
\ No newline at end of file

From ac6a8fa83fd4df282b77c326cc61a98c71e50c18 Mon Sep 17 00:00:00 2001
From: "praisonai-triage-agent[bot]"
 <272766704+praisonai-triage-agent[bot]@users.noreply.github.com>
Date: Fri, 19 Jun 2026 22:54:22 +0000
Subject: [PATCH 2/4] fix: Address critical issues in OpenAI compatibility
 layer

- Fix import errors: use create_unified_app instead of non-existent create_server
- Fix streaming: use text/event-stream media type and avoid double execution
- Fix health check: remove live LLM calls that consume tokens
- Add input validation: guard against None input data
- Add --agents-url parameter for proper agent routing

Addresses review feedback from Gemini, Greptile, and CodeRabbit
---
 src/praisonai/praisonai/cli/commands/serve.py | 30 ++++++++++---------
 .../endpoints/providers/openai_compat.py      | 25 +++++++---------
 2 files changed, 27 insertions(+), 28 deletions(-)

diff --git a/src/praisonai/praisonai/cli/commands/serve.py b/src/praisonai/praisonai/cli/commands/serve.py
index 31fbbadb2..9e0687fc0 100644
--- a/src/praisonai/praisonai/cli/commands/serve.py
+++ b/src/praisonai/praisonai/cli/commands/serve.py
@@ -585,6 +585,7 @@ def serve_unified(
 def serve_openai(
     host: str = typer.Option("127.0.0.1", "--host", "-h", help="Host to bind to"),
     port: int = typer.Option(8765, "--port", "-p", help="Port to bind to"),
+    agents_url: str = typer.Option("http://127.0.0.1:8000", "--agents-url", help="URL of the running Agents API server"),
     api_key: Optional[str] = typer.Option(None, "--api-key", help="API key for authentication"),
     reload: bool = typer.Option(False, "--reload", help="Enable auto-reload"),
 ):
@@ -603,12 +604,12 @@ def serve_openai(
         import uvicorn
         from fastapi import FastAPI
         from praisonai.endpoints.providers import OpenAICompatProvider, AgentsAPIProvider
-        from praisonai.endpoints.server import create_server, register_provider
+        from praisonai.endpoints.server import create_unified_app, register_provider_to_discovery, register_endpoint_to_discovery
         
         output.print_info(f"Starting OpenAI-compatible server on {host}:{port}")
         
         # Create providers
-        agents_provider = AgentsAPIProvider(base_url=f"http://{host}:{port}")
+        agents_provider = AgentsAPIProvider(base_url=agents_url)
         openai_provider = OpenAICompatProvider(
             base_url=f"http://{host}:{port}",
             api_key=api_key,
@@ -616,8 +617,10 @@ def serve_openai(
         )
         
         # Create server and register OpenAI provider
-        app = create_server(title="PraisonAI OpenAI API", version="1.0.0")
-        register_provider(app, openai_provider, prefix="/v1")
+        app = create_unified_app(server_name="PraisonAI OpenAI API")
+        register_provider_to_discovery(app, openai_provider.get_provider_info())
+        for endpoint in openai_provider.list_endpoints():
+            register_endpoint_to_discovery(app, endpoint)
         
         # Add OpenAI-style route mappings
         from fastapi import Request, Response
@@ -627,14 +630,6 @@ def serve_openai(
         @app.post("/v1/chat/completions")
         async def chat_completions(request: Request):
             body = await request.json()
-            result = openai_provider.invoke("chat_completions", body, stream=body.get("stream", False))
-            
-            if not result.ok:
-                return Response(
-                    content=json.dumps({"error": {"message": result.error, "type": "api_error"}}),
-                    status_code=400,
-                    media_type="application/json"
-                )
             
             if body.get("stream", False):
                 def generate():
@@ -643,8 +638,15 @@ def generate():
                             yield f"data: {json.dumps(chunk['data'])}\n\n"
                         elif chunk["event"] == "done":
                             yield "data: [DONE]\n\n"
-                
-                return StreamingResponse(generate(), media_type="text/plain")
+                return StreamingResponse(generate(), media_type="text/event-stream")
+            
+            result = openai_provider.invoke("chat_completions", body, stream=False)
+            if not result.ok:
+                return Response(
+                    content=json.dumps({"error": {"message": result.error, "type": "api_error"}}),
+                    status_code=400,
+                    media_type="application/json"
+                )
             
             return result.data
         
diff --git a/src/praisonai/praisonai/endpoints/providers/openai_compat.py b/src/praisonai/praisonai/endpoints/providers/openai_compat.py
index 2d0c2ebf4..8b9669178 100644
--- a/src/praisonai/praisonai/endpoints/providers/openai_compat.py
+++ b/src/praisonai/praisonai/endpoints/providers/openai_compat.py
@@ -198,11 +198,12 @@ def invoke_stream(
             else:
                 yield {"event": "error", "data": {"error": result.error}}
     
-    def _handle_chat_completions(self, input_data: Dict[str, Any], stream: bool = False) -> InvokeResult:
+    def _handle_chat_completions(self, input_data: Optional[Dict[str, Any]], stream: bool = False) -> InvokeResult:
         """Handle /v1/chat/completions endpoint."""
         from praisonai.capabilities.completions import chat_completion
         
         try:
+            input_data = input_data or {}
             # Extract OpenAI-format request
             messages = input_data.get("messages", [])
             model = input_data.get("model", "gpt-4o-mini")
@@ -239,11 +240,12 @@ def _handle_chat_completions(self, input_data: Dict[str, Any], stream: bool = Fa
                 error=f"Chat completion error: {str(e)}",
             )
     
-    def _handle_completions(self, input_data: Dict[str, Any]) -> InvokeResult:
+    def _handle_completions(self, input_data: Optional[Dict[str, Any]]) -> InvokeResult:
         """Handle /v1/completions endpoint."""
         from praisonai.capabilities.completions import text_completion
         
         try:
+            input_data = input_data or {}
             prompt = input_data.get("prompt", "")
             model = input_data.get("model", "gpt-3.5-turbo-instruct")
             temperature = input_data.get("temperature", 1.0)
@@ -313,7 +315,7 @@ def _handle_models(self) -> InvokeResult:
             data=response,
         )
     
-    def _handle_tools_invoke(self, input_data: Dict[str, Any]) -> InvokeResult:
+    def _handle_tools_invoke(self, input_data: Optional[Dict[str, Any]]) -> InvokeResult:
         """Handle /v1/tools/invoke endpoint (PraisonAI extension)."""
         if not self.agent_provider:
             return InvokeResult(
@@ -323,6 +325,7 @@ def _handle_tools_invoke(self, input_data: Dict[str, Any]) -> InvokeResult:
             )
         
         try:
+            input_data = input_data or {}
             agent_name = input_data.get("agent", "default")
             tool_name = input_data.get("tool_name")
             parameters = input_data.get("parameters", {})
@@ -354,7 +357,7 @@ def _handle_tools_invoke(self, input_data: Dict[str, Any]) -> InvokeResult:
                 error=f"Tool invocation error: {str(e)}",
             )
     
-    def _stream_chat_completions(self, input_data: Dict[str, Any]) -> Iterator[Dict[str, Any]]:
+    def _stream_chat_completions(self, input_data: Optional[Dict[str, Any]]) -> Iterator[Dict[str, Any]]:
         """Stream chat completions in OpenAI SSE format."""
         try:
             # For now, use non-streaming and convert to SSE format
@@ -367,8 +370,9 @@ def _stream_chat_completions(self, input_data: Dict[str, Any]) -> Iterator[Dict[
                 }
                 return
             
-            response = result.data
-            choice = response["choices"][0] if response["choices"] else {}
+            response = result.data or {}
+            choices = response.get("choices", [])
+            choice = choices[0] if choices else {}
             content = choice.get("message", {}).get("content", "")
             
             # Simulate streaming by chunking the response
@@ -463,15 +467,9 @@ def _format_completion_response(self, result) -> Dict[str, Any]:
     def health(self) -> HealthResult:
         """Check OpenAI compatibility layer health."""
         try:
-            # Test basic completion capability
+            # Verify that the required capabilities can be imported
             from praisonai.capabilities.completions import chat_completion
             
-            test_result = chat_completion(
-                messages=[{"role": "user", "content": "test"}],
-                model="gpt-4o-mini",
-                max_tokens=1,
-            )
-            
             return HealthResult(
                 healthy=True,
                 status="healthy",
@@ -480,7 +478,6 @@ def health(self) -> HealthResult:
                 provider_type=self.provider_type,
                 metadata={
                     "endpoints": ["chat/completions", "completions", "models", "tools/invoke"],
-                    "test_completion_id": test_result.id,
                 },
             )
             

From 113357c67c7a8f994221a48eb9302df8fe21f186 Mon Sep 17 00:00:00 2001
From: "praisonai-triage-agent[bot]"
 <272766704+praisonai-triage-agent[bot]@users.noreply.github.com>
Date: Mon, 22 Jun 2026 19:06:49 +0000
Subject: [PATCH 3/4] fix: Handle streaming errors and tool_calls in OpenAI
 compatibility layer

- Fix streaming error handling: error events are now properly sent to clients instead of being silently dropped
- Fix tool_calls in streaming: tool calls are now included in streamed responses instead of being lost
- Addresses critical issues identified by Greptile reviewer

Co-authored-by: Mervin Praison <MervinPraison@users.noreply.github.com>
---
 src/praisonai/praisonai/cli/commands/serve.py | 11 ++++
 .../endpoints/providers/openai_compat.py      | 66 ++++++++++++++-----
 2 files changed, 59 insertions(+), 18 deletions(-)

diff --git a/src/praisonai/praisonai/cli/commands/serve.py b/src/praisonai/praisonai/cli/commands/serve.py
index 9e0687fc0..c48afaf1a 100644
--- a/src/praisonai/praisonai/cli/commands/serve.py
+++ b/src/praisonai/praisonai/cli/commands/serve.py
@@ -638,6 +638,17 @@ def generate():
                             yield f"data: {json.dumps(chunk['data'])}\n\n"
                         elif chunk["event"] == "done":
                             yield "data: [DONE]\n\n"
+                        elif chunk["event"] == "error":
+                            # Send error as OpenAI-formatted SSE error chunk
+                            error_chunk = {
+                                "error": {
+                                    "message": chunk.get("data", {}).get("error", "Stream error occurred"),
+                                    "type": "stream_error"
+                                }
+                            }
+                            yield f"data: {json.dumps(error_chunk)}\n\n"
+                            yield "data: [DONE]\n\n"
+                            break
                 return StreamingResponse(generate(), media_type="text/event-stream")
             
             result = openai_provider.invoke("chat_completions", body, stream=False)
diff --git a/src/praisonai/praisonai/endpoints/providers/openai_compat.py b/src/praisonai/praisonai/endpoints/providers/openai_compat.py
index 8b9669178..fec89c8a4 100644
--- a/src/praisonai/praisonai/endpoints/providers/openai_compat.py
+++ b/src/praisonai/praisonai/endpoints/providers/openai_compat.py
@@ -373,27 +373,57 @@ def _stream_chat_completions(self, input_data: Optional[Dict[str, Any]]) -> Iter
             response = result.data or {}
             choices = response.get("choices", [])
             choice = choices[0] if choices else {}
-            content = choice.get("message", {}).get("content", "")
+            message = choice.get("message", {})
+            content = message.get("content", "")
+            tool_calls = message.get("tool_calls", [])
             
             # Simulate streaming by chunking the response
             chunk_id = str(uuid.uuid4())
             
-            # Send chunks
-            for i, char in enumerate(content):
-                chunk = {
-                    "id": chunk_id,
-                    "object": "chat.completion.chunk",
-                    "created": int(time.time()),
-                    "model": response.get("model", "gpt-4o-mini"),
-                    "choices": [
-                        {
-                            "index": 0,
-                            "delta": {"content": char},
-                            "finish_reason": None
-                        }
-                    ]
-                }
-                yield {"event": "data", "data": chunk}
+            # If there are tool calls, send them first
+            if tool_calls:
+                for idx, tool_call in enumerate(tool_calls):
+                    tool_chunk = {
+                        "id": chunk_id,
+                        "object": "chat.completion.chunk",
+                        "created": int(time.time()),
+                        "model": response.get("model", "gpt-4o-mini"),
+                        "choices": [
+                            {
+                                "index": 0,
+                                "delta": {
+                                    "tool_calls": [
+                                        {
+                                            "index": idx,
+                                            "id": tool_call.get("id"),
+                                            "type": tool_call.get("type", "function"),
+                                            "function": tool_call.get("function", {})
+                                        }
+                                    ]
+                                },
+                                "finish_reason": None
+                            }
+                        ]
+                    }
+                    yield {"event": "data", "data": tool_chunk}
+            
+            # Send content chunks
+            if content:
+                for i, char in enumerate(content):
+                    chunk = {
+                        "id": chunk_id,
+                        "object": "chat.completion.chunk",
+                        "created": int(time.time()),
+                        "model": response.get("model", "gpt-4o-mini"),
+                        "choices": [
+                            {
+                                "index": 0,
+                                "delta": {"content": char},
+                                "finish_reason": None
+                            }
+                        ]
+                    }
+                    yield {"event": "data", "data": chunk}
             
             # Send final chunk
             final_chunk = {
@@ -405,7 +435,7 @@ def _stream_chat_completions(self, input_data: Optional[Dict[str, Any]]) -> Iter
                     {
                         "index": 0,
                         "delta": {},
-                        "finish_reason": "stop"
+                        "finish_reason": "stop" if not tool_calls else "tool_calls"
                     }
                 ]
             }

From afa00d0265b6e18a6e9a908825a3852b9b18165f Mon Sep 17 00:00:00 2001
From: "praisonai-triage-agent[bot]"
 <272766704+praisonai-triage-agent[bot]@users.noreply.github.com>
Date: Mon, 22 Jun 2026 21:43:05 +0000
Subject: [PATCH 4/4] fix: Address OpenAI compatibility layer protocol issues

- Remove advertised but unimplemented embeddings capability
- Add required initial role chunk in streaming (OpenAI SDK requirement)
- Remove tool_calls: null from non-tool responses (violates OpenAI schema)

These fixes ensure strict OpenAI API compliance for better client compatibility.

Co-authored-by: Mervin Praison <MervinPraison@users.noreply.github.com>
---
 .../endpoints/providers/openai_compat.py      | 37 +++++++++++++++----
 1 file changed, 29 insertions(+), 8 deletions(-)

diff --git a/src/praisonai/praisonai/endpoints/providers/openai_compat.py b/src/praisonai/praisonai/endpoints/providers/openai_compat.py
index fec89c8a4..8745033ff 100644
--- a/src/praisonai/praisonai/endpoints/providers/openai_compat.py
+++ b/src/praisonai/praisonai/endpoints/providers/openai_compat.py
@@ -22,7 +22,6 @@ class OpenAICompatProvider(BaseProvider):
     - POST /v1/chat/completions
     - POST /v1/completions  
     - GET /v1/models
-    - POST /v1/embeddings (if available)
     - POST /v1/tools/invoke (custom)
     """
     
@@ -53,7 +52,7 @@ def get_provider_info(self) -> ProviderInfo:
             type=self.provider_type,
             name="OpenAI API Compatibility Layer",
             description="OpenAI API-compatible endpoints for PraisonAI",
-            capabilities=["chat", "completions", "models", "embeddings", "tools"],
+            capabilities=["chat", "completions", "models", "tools"],
         )
     
     def list_endpoints(self, tags: Optional[List[str]] = None) -> List[EndpointInfo]:
@@ -380,7 +379,23 @@ def _stream_chat_completions(self, input_data: Optional[Dict[str, Any]]) -> Iter
             # Simulate streaming by chunking the response
             chunk_id = str(uuid.uuid4())
             
-            # If there are tool calls, send them first
+            # Send initial role chunk (required by OpenAI SDK)
+            initial_chunk = {
+                "id": chunk_id,
+                "object": "chat.completion.chunk",
+                "created": int(time.time()),
+                "model": response.get("model", "gpt-4o-mini"),
+                "choices": [
+                    {
+                        "index": 0,
+                        "delta": {"role": "assistant"},
+                        "finish_reason": None
+                    }
+                ]
+            }
+            yield {"event": "data", "data": initial_chunk}
+            
+            # If there are tool calls, send them next
             if tool_calls:
                 for idx, tool_call in enumerate(tool_calls):
                     tool_chunk = {
@@ -450,6 +465,16 @@ def _stream_chat_completions(self, input_data: Optional[Dict[str, Any]]) -> Iter
     
     def _format_chat_completion_response(self, result) -> Dict[str, Any]:
         """Convert PraisonAI CompletionResult to OpenAI chat completion format."""
+        # Build message dict, only including tool_calls if they exist
+        message = {
+            "role": result.role,
+            "content": result.content,
+        }
+        
+        # Only add tool_calls if they exist (OpenAI schema doesn't allow null)
+        if result.tool_calls:
+            message["tool_calls"] = result.tool_calls
+        
         return {
             "id": result.id or f"chatcmpl-{uuid.uuid4().hex[:8]}",
             "object": "chat.completion",
@@ -458,11 +483,7 @@ def _format_chat_completion_response(self, result) -> Dict[str, Any]:
             "choices": [
                 {
                     "index": 0,
-                    "message": {
-                        "role": result.role,
-                        "content": result.content,
-                        "tool_calls": result.tool_calls,
-                    },
+                    "message": message,
                     "finish_reason": result.finish_reason or "stop",
                 }
             ],