{"openapi":"3.1.0","info":{"title":"LLM Gateway","description":"A standalone LLM gateway with intelligent model routing, adaptive signal tracking, degradation detection, prompt sanitization, cost tracking, and usage analytics. Forwards requests to Anthropic, OpenAI, Azure OpenAI, and local models.","license":{"name":"MIT"},"version":"0.3.1"},"paths":{"/health":{"get":{"tags":["Operations"],"summary":"Health check","description":"Returns service health, uptime, and configured providers.","operationId":"health","responses":{"200":{"description":"Service is healthy","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HealthResponse"}}}}}}},"/v1/alerts":{"get":{"tags":["Routing"],"summary":"Check for model degradation alerts","description":"Runs degradation detection against the provided signal aggregations. Returns alerts for models exceeding error rate, acceptance, edit distance, parse success, or governance violation thresholds.","operationId":"check_alerts","requestBody":{"content":{"application/json":{"schema":{"type":"array","items":{"$ref":"#/components/schemas/SignalAggregation"}}}},"required":true},"responses":{"200":{"description":"Degradation alerts","content":{"application/json":{"schema":{"$ref":"#/components/schemas/AlertsResponse"}}}}}}},"/v1/catalog":{"get":{"tags":["Models"],"summary":"Full model card catalog","description":"Returns the complete model card catalog with capabilities, pricing, latency profiles, quality signals, and lifecycle metadata for every model known to the gateway. Reflects the live runtime catalog (B-042): startup seed plus any sync results.","operationId":"list_catalog","responses":{"200":{"description":"Model card catalog","content":{"application/json":{"schema":{"type":"array","items":{"$ref":"#/components/schemas/ModelCard"}}}}}}}},"/v1/chat/completions":{"post":{"tags":["Proxy"],"summary":"OpenAI-compatible proxy","description":"Forwards requests to the OpenAI Chat Completions API (or Azure OpenAI). Supports streaming via SSE.","operationId":"handle_openai","requestBody":{"content":{"application/json":{"schema":{}}},"required":true},"responses":{"200":{"description":"Proxied response from OpenAI"},"403":{"description":"Blocked by compliance gate","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"}}}},"502":{"description":"Upstream error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"}}}}}}},"/v1/messages":{"post":{"tags":["Proxy"],"summary":"Anthropic-compatible proxy","description":"Forwards requests to the Anthropic Messages API. Supports streaming via SSE.","operationId":"handle_anthropic","requestBody":{"content":{"application/json":{"schema":{}}},"required":true},"responses":{"200":{"description":"Proxied response from Anthropic"},"403":{"description":"Blocked by compliance gate","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"}}}},"502":{"description":"Upstream error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"}}}}}}},"/v1/models":{"get":{"tags":["Models"],"summary":"List available models","description":"Returns models available from all configured and enabled providers. For cloud providers, reads from the live runtime catalog (B-042/B-053 dynamic discovery + seed) so freshly-synced models appear without a redeploy. For local providers, queries the provider's /v1/models endpoint directly. Azure providers report their configured deployment.","operationId":"list_models","responses":{"200":{"description":"List of available models","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ModelsResponse"}}}}}}},"/v1/projects":{"get":{"tags":["Projects"],"summary":"List projects with usage summaries","description":"Returns all projects that have sent requests through the gateway, with aggregate usage statistics for each. Optionally filter to projects active within the last N minutes.","operationId":"list_projects","parameters":[{"name":"active_since_mins","in":"query","description":"Only return projects with activity in the last N minutes","required":false,"schema":{"type":"integer","format":"int64","minimum":0}}],"responses":{"200":{"description":"List of project summaries","content":{"application/json":{"schema":{"type":"array","items":{"$ref":"#/components/schemas/ProjectSummary"}}}}}}}},"/v1/projects/{project}/records":{"get":{"tags":["Projects"],"summary":"Get records for a specific project","description":"Returns all proxy records for the specified project, ordered by timestamp (most recent first).","operationId":"project_records","parameters":[{"name":"project","in":"path","description":"Project identifier","required":true,"schema":{"type":"string"}}],"responses":{"200":{"description":"Project request records","content":{"application/json":{"schema":{"type":"array","items":{"$ref":"#/components/schemas/ProxyRecord"}}}}}}}},"/v1/providers":{"get":{"tags":["Providers"],"summary":"List providers","description":"Returns all configured providers. API keys are never included in the response.","operationId":"list_providers","responses":{"200":{"description":"List of configured providers","content":{"application/json":{"schema":{"type":"array","items":{"$ref":"#/components/schemas/ProviderInfo"}}}}}}},"post":{"tags":["Providers"],"summary":"Add a provider","description":"Registers a new LLM provider at runtime. The provider is stored in memory and will be lost on restart unless persisted to config.","operationId":"add_provider","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/AddProviderRequest"}}},"required":true},"responses":{"201":{"description":"Provider added","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ProviderInfo"}}}},"400":{"description":"Invalid provider type"},"409":{"description":"Provider ID already exists"}}}},"/v1/providers/{id}":{"delete":{"tags":["Providers"],"summary":"Remove a provider","description":"Removes a provider by ID. This is an in-memory operation.","operationId":"remove_provider","parameters":[{"name":"id","in":"path","description":"Provider ID to remove","required":true,"schema":{"type":"string"}}],"responses":{"204":{"description":"Provider removed"},"404":{"description":"Provider not found"}}}},"/v1/records":{"get":{"tags":["Operations"],"summary":"Request history","description":"Returns proxy request records, most recent first. Defaults to 1000 records. Does not include request/response bodies — use /v1/requests for full payloads.","operationId":"records","parameters":[{"name":"limit","in":"query","description":"Maximum number of records to return (default 1000)","required":false,"schema":{"type":"integer","format":"int64"}}],"responses":{"200":{"description":"List of proxy records","content":{"application/json":{"schema":{"type":"array","items":{"$ref":"#/components/schemas/ProxyRecord"}}}}}}}},"/v1/route":{"post":{"tags":["Routing"],"summary":"Route a request to the optimal model","description":"Runs the 5-layer routing engine (compliance, capability, quality, cost, adaptive) to select the best model for the given task and constraints.","operationId":"route_request","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/RoutingInput"}}},"required":true},"responses":{"200":{"description":"Routing decision with selection reasoning","content":{"application/json":{"schema":{"$ref":"#/components/schemas/RoutingOutput"}}}}}}},"/v1/routing-profiles":{"get":{"tags":["Routing"],"summary":"List bundled routing profiles","description":"Returns the 9 pre-configured routing profiles that optimize model selection for different task types: coding, code review, architecture, migration planning, document generation, compliance analysis, coaching, classification, and batch analysis.","operationId":"list_routing_profiles","responses":{"200":{"description":"Routing profiles","content":{"application/json":{"schema":{"type":"array","items":{"$ref":"#/components/schemas/RoutingProfile"}}}}}}}},"/v1/signals":{"get":{"tags":["Routing"],"summary":"Drain pending signals","description":"Returns and clears all pending routing signals collected since the last drain.","operationId":"drain_signals","responses":{"200":{"description":"Pending signals","content":{"application/json":{"schema":{"$ref":"#/components/schemas/SignalsResponse"}}}}}},"post":{"tags":["Routing"],"summary":"Record a quality signal","description":"Records an adaptive routing signal for a model. Signal types: acceptance, edit_distance, error, parse_success, governance_violation, latency.","operationId":"record_signal","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/RecordSignalRequest"}}},"required":true},"responses":{"200":{"description":"Signal recorded"}}}},"/v1/stats":{"get":{"tags":["Operations"],"summary":"Usage statistics","description":"Returns aggregate usage statistics across all proxied requests.","operationId":"stats","responses":{"200":{"description":"Aggregate stats","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ProxyStats"}}}}}}}},"components":{"schemas":{"AddProviderRequest":{"type":"object","required":["id","provider_type"],"properties":{"api_key":{"type":["string","null"]},"api_version":{"type":["string","null"]},"deployment":{"type":["string","null"]},"endpoint":{"type":["string","null"]},"id":{"type":"string"},"provider_type":{"type":"string"}}},"AlertSeverity":{"type":"string","enum":["warning","critical"]},"AlertsResponse":{"type":"object","required":["alerts","signals_checked"],"properties":{"alerts":{"type":"array","items":{"$ref":"#/components/schemas/DegradationAlert"}},"signals_checked":{"type":"integer","minimum":0}}},"CacheType":{"type":"string","enum":["none","explicit","automatic"]},"CapabilityFallbackConfig":{"type":"object","properties":{"citations":{"$ref":"#/components/schemas/FallbackPolicy"},"extended_thinking":{"$ref":"#/components/schemas/FallbackPolicy"},"json_mode":{"$ref":"#/components/schemas/FallbackPolicy"},"prompt_caching":{"$ref":"#/components/schemas/FallbackPolicy"},"streaming":{"$ref":"#/components/schemas/FallbackPolicy"},"tool_use":{"$ref":"#/components/schemas/FallbackPolicy"},"vision":{"$ref":"#/components/schemas/FallbackPolicy"}}},"CapabilitySet":{"type":"object","properties":{"batch_api":{"type":"boolean"},"citations":{"type":"boolean"},"embeddings":{"type":"boolean"},"extended_thinking":{"type":"boolean"},"json_mode":{"type":"boolean"},"prompt_caching":{"$ref":"#/components/schemas/CacheType"},"reasoning_modes":{"type":"array","items":{"$ref":"#/components/schemas/ReasoningMode"}},"streaming":{"type":"boolean"},"tool_use":{"type":"boolean"},"vision":{"type":"boolean"}}},"CostWeight":{"type":"string","enum":["low","medium","high","very_high"]},"DataClassification":{"type":"string","enum":["public","internal","cui","itar","classified"]},"DegradationAlert":{"type":"object","required":["model_card_id","alert_type","severity","message","current_value","threshold","timestamp"],"properties":{"alert_type":{"$ref":"#/components/schemas/DegradationType"},"current_value":{"type":"number","format":"double"},"message":{"type":"string"},"model_card_id":{"type":"string"},"severity":{"$ref":"#/components/schemas/AlertSeverity"},"threshold":{"type":"number","format":"double"},"timestamp":{"type":"string"}}},"DegradationType":{"type":"string","enum":["high_error_rate","low_acceptance","high_edit_distance","low_parse_success","high_governance_violation_rate","high_latency"]},"ErrorDetail":{"type":"object","required":["type","message"],"properties":{"message":{"type":"string"},"type":{"type":"string"}}},"ErrorResponse":{"type":"object","required":["error"],"properties":{"error":{"$ref":"#/components/schemas/ErrorDetail"}}},"FallbackPolicy":{"type":"string","enum":["fail_closed","fail_open"]},"HealthResponse":{"type":"object","required":["status","version","uptime_secs","port","providers"],"properties":{"port":{"type":"integer","format":"int32","minimum":0},"providers":{"type":"array","items":{"type":"string"}},"status":{"type":"string"},"uptime_secs":{"type":"integer","format":"int64","minimum":0},"version":{"type":"string"}}},"LatencyProfile":{"type":"object","properties":{"median_tps":{"type":"integer","format":"int32","minimum":0},"median_ttft_ms":{"type":"integer","format":"int32","minimum":0},"p95_ttft_ms":{"type":"integer","format":"int32","minimum":0}}},"LatencyTolerance":{"type":"string","enum":["realtime","interactive","background","flexible"]},"LlmModelStatus":{"type":"string","enum":["active","deprecated","preview","retired"]},"ModelCard":{"type":"object","required":["id","provider_id","model_id","display_name"],"properties":{"authorized_classifications":{"type":"array","items":{"$ref":"#/components/schemas/DataClassification"}},"capabilities":{"$ref":"#/components/schemas/CapabilitySet"},"context_window":{"type":"integer","format":"int32","minimum":0},"deprecation_date":{"type":["string","null"]},"description":{"type":["string","null"]},"display_name":{"type":"string"},"family":{"type":"string"},"id":{"type":"string"},"latency":{"$ref":"#/components/schemas/LatencyProfile"},"max_output_tokens":{"type":"integer","format":"int32","minimum":0},"max_thinking_tokens":{"type":["integer","null"],"format":"int32","minimum":0},"model_id":{"type":"string"},"pricing":{"$ref":"#/components/schemas/ModelPricing"},"provider_id":{"type":"string"},"quality_signals":{"$ref":"#/components/schemas/QualitySignals"},"release_date":{"type":["string","null"]},"retirement_date":{"type":["string","null"]},"status":{"$ref":"#/components/schemas/LlmModelStatus"},"successor":{"type":["string","null"]},"tags":{"type":"array","items":{"type":"string"}},"version":{"type":"string"}}},"ModelEntry":{"type":"object","required":["id","provider","owned_by"],"properties":{"id":{"type":"string"},"owned_by":{"type":"string"},"provider":{"type":"string"}}},"ModelPricing":{"type":"object","properties":{"batch_input_per_m":{"type":["number","null"],"format":"double"},"batch_output_per_m":{"type":["number","null"],"format":"double"},"cached_input_per_m":{"type":["number","null"],"format":"double"},"embedding_per_m":{"type":["number","null"],"format":"double"},"input_per_m":{"type":"number","format":"double"},"output_per_m":{"type":"number","format":"double"},"thinking_per_m":{"type":["number","null"],"format":"double"}}},"ModelsResponse":{"type":"object","required":["object","data"],"properties":{"data":{"type":"array","items":{"$ref":"#/components/schemas/ModelEntry"}},"object":{"type":"string"}}},"ProjectSummary":{"type":"object","required":["project","total_requests","total_input_tokens","total_output_tokens","total_cost","errors","models_used","sources","last_activity"],"properties":{"errors":{"type":"integer","format":"int64","minimum":0},"last_activity":{"type":"string"},"models_used":{"type":"array","items":{"type":"string"}},"project":{"type":"string"},"sources":{"type":"array","items":{"type":"string"}},"total_cost":{"type":"number","format":"double"},"total_input_tokens":{"type":"integer","format":"int64","minimum":0},"total_output_tokens":{"type":"integer","format":"int64","minimum":0},"total_requests":{"type":"integer","format":"int64","minimum":0}}},"ProviderInfo":{"type":"object","required":["id","provider_type","enabled","has_key"],"properties":{"enabled":{"type":"boolean"},"endpoint":{"type":["string","null"]},"has_key":{"type":"boolean"},"id":{"type":"string"},"provider_type":{"type":"string"}}},"ProxyRecord":{"type":"object","required":["id","timestamp","source","project","provider","model","input_tokens","output_tokens","thinking_tokens","cached_tokens","total_cost","latency_ms","status_code"],"properties":{"attempts":{"type":"integer","format":"int32","minimum":0},"cached_tokens":{"type":"integer","format":"int32","minimum":0},"error":{"type":["string","null"]},"failover_occurred":{"type":"boolean"},"id":{"type":"string"},"input_tokens":{"type":"integer","format":"int32","minimum":0},"latency_ms":{"type":"integer","format":"int64","minimum":0},"model":{"type":"string"},"output_tokens":{"type":"integer","format":"int32","minimum":0},"project":{"type":"string"},"provider":{"type":"string"},"request_body":{},"request_id":{"type":["string","null"]},"response_body":{},"source":{"type":"string"},"status_code":{"type":"integer","format":"int32","minimum":0},"thinking_tokens":{"type":"integer","format":"int32","minimum":0},"timestamp":{"type":"string"},"total_cost":{"type":"number","format":"double"}}},"ProxyStats":{"type":"object","required":["total_requests","total_input_tokens","total_output_tokens","total_thinking_tokens","total_cached_tokens","total_estimated_cost","requests_by_source","requests_by_model","requests_by_project","errors","avg_latency_ms"],"properties":{"avg_latency_ms":{"type":"number","format":"double"},"errors":{"type":"integer","format":"int64","minimum":0},"requests_by_model":{"type":"object","additionalProperties":{"type":"integer","format":"int64","minimum":0},"propertyNames":{"type":"string"}},"requests_by_project":{"type":"object","additionalProperties":{"type":"integer","format":"int64","minimum":0},"propertyNames":{"type":"string"}},"requests_by_source":{"type":"object","additionalProperties":{"type":"integer","format":"int64","minimum":0},"propertyNames":{"type":"string"}},"total_cached_tokens":{"type":"integer","format":"int64","minimum":0},"total_estimated_cost":{"type":"number","format":"double"},"total_input_tokens":{"type":"integer","format":"int64","minimum":0},"total_output_tokens":{"type":"integer","format":"int64","minimum":0},"total_requests":{"type":"integer","format":"int64","minimum":0},"total_thinking_tokens":{"type":"integer","format":"int64","minimum":0}}},"QualitySignals":{"type":"object","properties":{"arena_elo":{"type":["integer","null"],"format":"int32","minimum":0},"classification_score":{"type":["number","null"],"format":"double"},"coding_score":{"type":["number","null"],"format":"double"},"document_score":{"type":["number","null"],"format":"double"},"instruction_following":{"type":["number","null"],"format":"double"},"reasoning_score":{"type":["number","null"],"format":"double"}}},"QualityWeight":{"type":"string","enum":["low","medium","high","very_high"]},"ReasoningMode":{"type":"string","enum":["standard","extended_thinking","adaptive"]},"RecordSignalRequest":{"type":"object","required":["model_card_id","task_type","signal_type"],"properties":{"model_card_id":{"type":"string"},"signal_type":{"type":"string"},"task_type":{"type":"string"},"value":{"type":["number","null"],"format":"double"}}},"RoutingInput":{"type":"object","required":["task_type","profile","available_models"],"properties":{"adaptive_signals":{"type":"array","items":{"$ref":"#/components/schemas/SignalAggregation"}},"available_models":{"type":"array","items":{"$ref":"#/components/schemas/ModelCard"}},"capability_fallback":{"$ref":"#/components/schemas/CapabilityFallbackConfig"},"data_classification":{"$ref":"#/components/schemas/DataClassification"},"max_cost_per_request":{"type":["number","null"],"format":"double"},"profile":{"$ref":"#/components/schemas/RoutingProfile"},"provider_authorized_classifications":{"type":"array","items":{"type":"array","items":false,"prefixItems":[{"type":"string"},{"type":"array","items":{"type":"string","enum":["public","internal","cui","itar","classified"]}}]}},"task_type":{"type":"string"}}},"RoutingOutput":{"type":"object","required":["compliance_eligible","capability_matched","quality_ranked","cost_ranked","final_ranked","selection_reason"],"properties":{"capability_matched":{"type":"array","items":{"type":"string"}},"compliance_eligible":{"type":"array","items":{"type":"string"}},"cost_ranked":{"type":"array","items":{"type":"string"}},"final_ranked":{"type":"array","items":{"type":"string"}},"missing_capabilities":{"type":"array","items":{"type":"string"}},"quality_ranked":{"type":"array","items":{"type":"string"}},"selected_model":{"type":["string","null"]},"selection_reason":{"type":"string"}}},"RoutingProfile":{"type":"object","required":["id","name"],"properties":{"cost_weight":{"$ref":"#/components/schemas/CostWeight"},"custom":{"type":"boolean"},"description":{"type":["string","null"]},"id":{"type":"string"},"latency_tolerance":{"$ref":"#/components/schemas/LatencyTolerance"},"name":{"type":"string"},"preferred_capabilities":{"type":"array","items":{"type":"string"}},"quality_weight":{"$ref":"#/components/schemas/QualityWeight"},"reasoning_mode":{"oneOf":[{"type":"null"},{"$ref":"#/components/schemas/ReasoningMode"}]},"required_capabilities":{"type":"array","items":{"type":"string"}},"timeout_ms":{"type":["integer","null"],"format":"int32","minimum":0}}},"RoutingSignal":{"type":"object","required":["id","model_card_id","task_type","signal_type","value","timestamp"],"properties":{"id":{"type":"string"},"model_card_id":{"type":"string"},"signal_type":{"$ref":"#/components/schemas/RoutingSignalType"},"task_type":{"type":"string"},"timestamp":{"type":"string"},"value":{"type":"number","format":"double"}}},"RoutingSignalType":{"type":"string","enum":["acceptance","edit_distance","regeneration","latency","parse_success","error","governance_violation"]},"SanitizationFinding":{"type":"object","required":["stage","severity","message","redacted"],"properties":{"message":{"type":"string"},"redacted":{"type":"boolean"},"severity":{"type":"string"},"stage":{"type":"string"}}},"SanitizationResult":{"type":"object","required":["sanitized_text","passed","findings"],"properties":{"findings":{"type":"array","items":{"$ref":"#/components/schemas/SanitizationFinding"}},"passed":{"type":"boolean"},"sanitized_text":{"type":"string"}}},"SignalAggregation":{"type":"object","required":["model_card_id"],"properties":{"avg_acceptance":{"type":"number","format":"double"},"avg_edit_distance":{"type":"number","format":"double"},"avg_latency_ms":{"type":"number","format":"double"},"error_rate":{"type":"number","format":"double"},"governance_violation_rate":{"type":"number","format":"double"},"model_card_id":{"type":"string"},"parse_success_rate":{"type":"number","format":"double"},"sample_count":{"type":"integer","format":"int32","minimum":0}}},"SignalsResponse":{"type":"object","required":["pending_count","signals"],"properties":{"pending_count":{"type":"integer","minimum":0},"signals":{"type":"array","items":{"$ref":"#/components/schemas/RoutingSignal"}}}}}},"tags":[{"name":"Proxy","description":"LLM request proxy endpoints — forward requests to Anthropic or OpenAI-compatible APIs"},{"name":"Routing","description":"Intelligent model routing, adaptive signals, and degradation detection"},{"name":"Models","description":"Discover available models across all configured providers"},{"name":"Providers","description":"Manage LLM provider configuration at runtime"},{"name":"Projects","description":"Per-project usage visibility, cost tracking, and request history"},{"name":"Operations","description":"Health checks, usage statistics, and request history"}]}