feat: populate upstream nginx variables when ai-proxy uses cosocket transport (#13317)

nic-6443 · web-flow · commit bda084d5233c · 2026-04-30T15:36:26.000+08:00
diff --git a/.requirements b/.requirements
@@ -17,5 +17,5 @@
 
 APISIX_PACKAGE_NAME=apisix
 
-APISIX_RUNTIME=1.3.4
+APISIX_RUNTIME=1.3.5
 APISIX_DASHBOARD_COMMIT=bbe05ad95396f90db03d831660f454c2844d906e
diff --git a/apisix/plugins/ai-proxy/base.lua b/apisix/plugins/ai-proxy/base.lua
@@ -16,6 +16,7 @@
 --
 
 local ngx = ngx
+local ngx_now = ngx.now
 local core = require("apisix.core")
 local require = require
 local pcall   = pcall
@@ -26,6 +27,7 @@ local exporter = require("apisix.plugins.prometheus.exporter")
 local protocols = require("apisix.plugins.ai-protocols")
 local transport_http = require("apisix.plugins.ai-transport.http")
 local log_sanitize = require("apisix.utils.log-sanitize")
+local apisix_upstream = require("resty.apisix.upstream")
 
 local _M = {}
 
@@ -202,22 +204,76 @@ function _M.before_proxy(conf, ctx, on_error)
                           core.json.delay_encode(log_sanitize.redact_params(params), true))
 
             -- Step 4: Send via transport
-            local res, transport_err = transport_http.request(params, conf.timeout)
+            local res, transport_err, err_meta = transport_http.request(params, conf.timeout)
             if not res then
                 core.log.warn("failed to send request to LLM server: ", transport_err)
+                if err_meta then
+                    apisix_upstream.push_upstream_state({
+                        addr = err_meta.upstream_addr,
+                        status = transport_http.handle_error(transport_err),
+                        connect_time = err_meta.connect_time,
+                    })
+                    if err_meta.upstream_uri then
+                        ctx.var.upstream_uri = err_meta.upstream_uri
+                    end
+                    if err_meta.upstream_host then
+                        ctx.var.upstream_host = err_meta.upstream_host
+                    end
+                    if err_meta.upstream_scheme then
+                        ctx.var.upstream_scheme = err_meta.upstream_scheme
+                    end
+                    if err_meta.t0 then
+                        apisix_upstream.update_upstream_state({
+                            response_time = (ngx_now() - err_meta.t0) * 1000,
+                        })
+                    end
+                end
                 return transport_http.handle_error(transport_err)
             end
 
+            -- Upstream responded — populate upstream state for access log
+            apisix_upstream.push_upstream_state({
+                addr = res._upstream_addr,
+                status = res.status,
+                connect_time = res._connect_time,
+                header_time = res._header_time,
+            })
+            if res._upstream_uri then
+                ctx.var.upstream_uri = res._upstream_uri
+            end
+            if res._upstream_host then
+                ctx.var.upstream_host = res._upstream_host
+            end
+            if res._upstream_scheme then
+                ctx.var.upstream_scheme = res._upstream_scheme
+            end
+
             -- Upstream responded — mark source before any early returns
             core.response.set_response_source(ctx, "upstream")
 
             if res.status == 429 or (res.status >= 500 and res.status < 600) then
+                if res._t0 then
+                    apisix_upstream.update_upstream_state({
+                        response_time = (ngx_now() - res._t0) * 1000,
+                    })
+                end
+                if res._httpc then
+                    res._httpc:close()
+                end
                 return res.status
             end
 
             local body_reader = res.body_reader
             if not body_reader then
                 core.log.warn("AI service sent no response body")
+                if res._t0 then
+                    apisix_upstream.update_upstream_state({
+                        response_time = (ngx_now() - res._t0) * 1000,
+                    })
+                end
+                if res._httpc then
+                    res._httpc:close()
+                end
                 return 500
             end
 
@@ -243,6 +299,13 @@ function _M.before_proxy(conf, ctx, on_error)
                 end
             end
 
+            -- Finalize upstream state with response_time after body is consumed
+            if res._t0 then
+                apisix_upstream.update_upstream_state({
+                    response_time = (ngx_now() - res._t0) * 1000,
+                })
+            end
+
             if conf.keepalive then
                 transport_http.set_keepalive(res, conf.keepalive_timeout, conf.keepalive_pool)
             end
diff --git a/apisix/plugins/ai-transport/http.lua b/apisix/plugins/ai-transport/http.lua
@@ -20,6 +20,7 @@
 
 local core = require("apisix.core")
 local http = require("resty.http")
+local ngx_now = ngx.now
 local pairs = pairs
 local ipairs = ipairs
 local type = type
@@ -71,20 +72,35 @@ end
 --   {method, scheme, host, port, path, headers, query, body (table),
 --    ssl_verify, ssl_server_name}
 -- @param timeout number Request timeout in milliseconds
--- @return table|nil Response object (with body_reader, headers, status)
+-- @return table|nil Response object (with body_reader, headers, status,
+--   _upstream_addr, _upstream_uri, _connect_time, _header_time, _t0)
 -- @return string|nil Error message
+-- @return table|nil Upstream metadata on failure (for recording failed attempts)
 function _M.request(params, timeout)
     local httpc, err = http.new()
     if not httpc then
         return nil, "failed to create http client: " .. (err or "unknown")
     end
     httpc:set_timeout(timeout)
 
+    local upstream_addr = (params.host or "") .. ":" .. (params.port or "")
+    local upstream_host = params.host or ""
+    local upstream_scheme = params.scheme or "http"
+    local t0 = ngx_now()
+
     local ok, err = httpc:connect(params)
     if not ok then
-        return nil, "connect: " .. (err or "unknown")
+        return nil, "connect: " .. (err or "unknown"), {
+            upstream_addr = upstream_addr,
+            upstream_host = upstream_host,
+            upstream_scheme = upstream_scheme,
+            upstream_uri = params.path,
+            t0 = t0,
+        }
     end
 
+    local connect_time = (ngx_now() - t0) * 1000
+
     local req_json
     if type(params.body) == "string" then
         -- Body already serialized (e.g., by SigV4 signing)
@@ -93,18 +109,43 @@ function _M.request(params, timeout)
         local err
         req_json, err = core.json.encode(params.body)
         if not req_json then
-            return nil, "encode body: " .. (err or "unknown")
+            httpc:close()
+            return nil, "encode body: " .. (err or "unknown"), {
+                upstream_addr = upstream_addr,
+                upstream_host = upstream_host,
+                upstream_scheme = upstream_scheme,
+                upstream_uri = params.path,
+                connect_time = connect_time,
+                t0 = t0,
+            }
         end
     end
     params.body = req_json
 
     local res, err = httpc:request(params)
     if not res then
-        return nil, "request: " .. (err or "unknown")
+        httpc:close()
+        return nil, "request: " .. (err or "unknown"), {
+            upstream_addr = upstream_addr,
+            upstream_host = upstream_host,
+            upstream_scheme = upstream_scheme,
+            upstream_uri = params.path,
+            connect_time = connect_time,
+            t0 = t0,
+        }
     end
 
-    -- Attach httpc to res so caller can manage keepalive
+    local header_time = (ngx_now() - t0) * 1000
+
+    -- Attach httpc and upstream metadata to res
     res._httpc = httpc
+    res._upstream_addr = upstream_addr
+    res._upstream_host = upstream_host
+    res._upstream_scheme = upstream_scheme
+    res._upstream_uri = params.path
+    res._connect_time = connect_time
+    res._header_time = header_time
+    res._t0 = t0
 
     return res
 end
diff --git a/docs/en/latest/plugins/ai-proxy.md b/docs/en/latest/plugins/ai-proxy.md
@@ -2049,19 +2049,29 @@ In the Kafka topic, you should also see a log entry corresponding to the request
 The following example demonstrates how you can log LLM request related information in the gateway's access log to improve analytics and audit. The following variables are available:
 
 * `request_llm_model`: LLM model name specified in the request.
-* `apisix_upstream_response_time`: Time taken for APISIX to send the request to the upstream service and receive the full response.
 * `request_type`: Type of request, where the value could be `traditional_http`, `ai_chat`, or `ai_stream`.
 * `llm_time_to_first_token`: Duration from request sending to the first token received from the LLM service, in milliseconds.
 * `llm_model`: LLM model.
 * `llm_prompt_tokens`: Number of tokens in the prompt.
 * `llm_completion_tokens`: Number of chat completion tokens in the prompt.
 
+In addition, the following standard nginx upstream variables are automatically populated when `ai-proxy` sends requests via cosocket transport:
+
+* `upstream_addr`: Address of the upstream LLM service (e.g., `api.openai.com:443`).
+* `upstream_status`: HTTP status code returned by the upstream LLM service.
+* `upstream_response_time`: Total time spent receiving the response from the upstream LLM service, in seconds (e.g., `2.858`).
+* `upstream_connect_time`: Time spent establishing the connection to the upstream LLM service, in seconds.
+* `upstream_header_time`: Time spent receiving the response headers from the upstream LLM service, in seconds.
+* `upstream_host`: Hostname of the upstream LLM service as configured in the endpoint (e.g., `api.openai.com`).
+* `upstream_scheme`: Scheme used to connect to the upstream LLM service (e.g., `https`).
+* `upstream_uri`: Request URI path sent to the upstream LLM service (e.g., `/v1/chat/completions`).
+
 Update the access log format in your configuration file to include additional LLM related variables:
 
 ```yaml title="conf/config.yaml"
 nginx_config:
   http:
-    access_log_format: "$remote_addr - $remote_user [$time_local] $http_host \"$request_line\" $status $body_bytes_sent $request_time \"$http_referer\" \"$http_user_agent\" $upstream_addr $upstream_status $apisix_upstream_response_time \"$upstream_scheme://$upstream_host$upstream_uri\" \"$apisix_request_id\" \"$request_type\" \"$llm_time_to_first_token\" \"$llm_model\" \"$request_llm_model\"  \"$llm_prompt_tokens\" \"$llm_completion_tokens\""
+    access_log_format: "$remote_addr - $remote_user [$time_local] $http_host \"$request_line\" $status $body_bytes_sent $request_time \"$http_referer\" \"$http_user_agent\" $upstream_addr $upstream_status $upstream_response_time \"$upstream_scheme://$upstream_host$upstream_uri\" \"$apisix_request_id\" \"$request_type\" \"$llm_time_to_first_token\" \"$llm_model\" \"$request_llm_model\"  \"$llm_prompt_tokens\" \"$llm_completion_tokens\""
 ```
 
 Reload APISIX for configuration changes to take effect.
@@ -2103,7 +2113,7 @@ Now if you create a Route and send a request following the [Proxy to OpenAI exam
 In the gateway's access log, you should see a log entry similar to the following:
 
 ```text
-192.168.215.1 - - [21/Mar/2025:04:28:03 +0000] api.openai.com "POST /anything HTTP/1.1" 200 804 2.858 "-" "curl/8.6.0" - - - 5765 "http://api.openai.com" "5c5e0b95f8d303cb81e4dc456a4b12d9" "ai_chat" "2858" "gpt-4" "gpt-4" "23" "8"
+192.168.215.1 - - [21/Mar/2025:04:28:03 +0000] api.openai.com "POST /anything HTTP/1.1" 200 804 2.858 "-" "curl/8.6.0" api.openai.com:443 200 2.858 "https://api.openai.com/v1/chat/completions" "5c5e0b95f8d303cb81e4dc456a4b12d9" "ai_chat" "2858" "gpt-4" "gpt-4" "23" "8"
 ```
 
-The access log entry shows the request type is `ai_chat`, Apisix upstream response time is `5765` milliseconds, time to first token is `2858` milliseconds, Requested LLM model is `gpt-4`. LLM model is `gpt-4`, prompt token usage is `23`, and completion token usage is `8`.
+The access log entry shows the upstream address is `api.openai.com:443` with status `200`, the request type is `ai_chat`, APISIX upstream response time is `2.858` seconds, time to first token is `2858` milliseconds, requested LLM model is `gpt-4`, LLM model is `gpt-4`, prompt token usage is `23`, and completion token usage is `8`.
diff --git a/t/plugin/ai-proxy-upstream-vars.t b/t/plugin/ai-proxy-upstream-vars.t
@@ -0,0 +1,118 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+=encoding utf-8
+
+Validates that upstream nginx variables ($upstream_status, $upstream_addr,
+$upstream_response_time, $upstream_uri, etc.) are populated when ai-proxy
+sends requests via cosocket transport.
+
+=cut
+
+use t::APISIX 'no_plan';
+
+log_level("info");
+repeat_each(1);
+no_long_string();
+no_root_location();
+
+
+add_block_preprocessor(sub {
+    my ($block) = @_;
+
+    if (!defined $block->request) {
+        $block->set_value("request", "GET /t");
+    }
+});
+
+run_tests();
+
+__DATA__
+
+=== TEST 1: set route with ai-proxy pointing to mock server
+--- config
+    location /t {
+        content_by_lua_block {
+            local t = require("lib.test_admin").test
+            local code, body = t('/apisix/admin/routes/1',
+                ngx.HTTP_PUT,
+                [[{
+                    "uri": "/anything",
+                    "plugins": {
+                        "ai-proxy": {
+                            "provider": "openai",
+                            "auth": {
+                                "header": {
+                                    "Authorization": "Bearer test-key"
+                                }
+                            },
+                            "options": {
+                                "model": "gpt-4"
+                            },
+                            "override": {
+                                "endpoint": "http://127.0.0.1:1980"
+                            },
+                            "ssl_verify": false
+                        }
+                    }
+                }]]
+            )
+
+            if code >= 300 then
+                ngx.status = code
+            end
+            ngx.say(body)
+        }
+    }
+--- response_body
+passed
+
+
+
+=== TEST 2: non-streaming request populates upstream variables in access log
+--- request
+POST /anything
+{"model":"gpt-4","messages":[{"role":"user","content":"hello"}]}
+--- more_headers
+X-AI-Fixture: openai/chat-basic.json
+--- error_code: 200
+--- access_log eval
+qr/127\.0\.0\.1:\d+ 200 [\d.]+/
+
+
+
+=== TEST 3: streaming request populates upstream variables in access log
+--- request
+POST /anything
+{"model":"gpt-4","messages":[{"role":"user","content":"hello"}],"stream":true}
+--- more_headers
+X-AI-Fixture: openai/chat-streaming.sse
+--- error_code: 200
+--- access_log eval
+qr/127\.0\.0\.1:\d+ 200 [\d.]+/
+
+
+
+=== TEST 4: upstream_uri and upstream_host are populated with the target path and host
+--- request
+POST /anything
+{"model":"gpt-4","messages":[{"role":"user","content":"hello"}]}
+--- more_headers
+X-AI-Fixture: openai/chat-basic.json
+--- error_code: 200
+--- access_log eval
+qr{http://127\.0\.0\.1/v1/chat/completions}
diff --git a/t/plugin/ai-proxy3.t b/t/plugin/ai-proxy3.t
@@ -96,7 +96,7 @@ X-AI-Fixture: openai/chat-basic.json
 --- response_body eval
 qr/.*completion_tokens.*/
 --- access_log eval
-qr/.*[\d.]+ \"http:\/\/localhost[^"]*\" gpt-4 gpt-3.5-turbo [\d.]+ 23 8.*/
+qr/127\.0\.0\.1:1980 200 [\d.]+ \"http:\/\/127\.0\.0\.1\/v1\/chat\/completions\" gpt-4 gpt-3.5-turbo [\d.]+ 23 8.*/
 
 
 
@@ -256,4 +256,4 @@ passed
 --- response_body_like eval
 qr/6data: \[DONE\]\n\n/
 --- access_log eval
-qr/.*[\d.]+ \"http:\/\/localhost:1984\" gpt-4 gpt-3.5-turbo 2\d\d 15 20.*/
+qr/localhost:7737 200 [\d.]+ \"http:\/\/localhost\/v1\/chat\/completions\" gpt-4 gpt-3.5-turbo 2\d\d 15 20.*/