Skip to content

Commit bda084d

Browse files
authored
feat: populate upstream nginx variables when ai-proxy uses cosocket transport (#13317)
1 parent a4a7f8a commit bda084d

6 files changed

Lines changed: 245 additions & 13 deletions

File tree

.requirements

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,5 +17,5 @@
1717

1818
APISIX_PACKAGE_NAME=apisix
1919

20-
APISIX_RUNTIME=1.3.4
20+
APISIX_RUNTIME=1.3.5
2121
APISIX_DASHBOARD_COMMIT=bbe05ad95396f90db03d831660f454c2844d906e

apisix/plugins/ai-proxy/base.lua

Lines changed: 64 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
--
1717

1818
local ngx = ngx
19+
local ngx_now = ngx.now
1920
local core = require("apisix.core")
2021
local require = require
2122
local pcall = pcall
@@ -26,6 +27,7 @@ local exporter = require("apisix.plugins.prometheus.exporter")
2627
local protocols = require("apisix.plugins.ai-protocols")
2728
local transport_http = require("apisix.plugins.ai-transport.http")
2829
local log_sanitize = require("apisix.utils.log-sanitize")
30+
local apisix_upstream = require("resty.apisix.upstream")
2931

3032
local _M = {}
3133

@@ -202,22 +204,76 @@ function _M.before_proxy(conf, ctx, on_error)
202204
core.json.delay_encode(log_sanitize.redact_params(params), true))
203205

204206
-- Step 4: Send via transport
205-
local res, transport_err = transport_http.request(params, conf.timeout)
207+
local res, transport_err, err_meta = transport_http.request(params, conf.timeout)
206208
if not res then
207209
core.log.warn("failed to send request to LLM server: ", transport_err)
210+
if err_meta then
211+
apisix_upstream.push_upstream_state({
212+
addr = err_meta.upstream_addr,
213+
status = transport_http.handle_error(transport_err),
214+
connect_time = err_meta.connect_time,
215+
})
216+
if err_meta.upstream_uri then
217+
ctx.var.upstream_uri = err_meta.upstream_uri
218+
end
219+
if err_meta.upstream_host then
220+
ctx.var.upstream_host = err_meta.upstream_host
221+
end
222+
if err_meta.upstream_scheme then
223+
ctx.var.upstream_scheme = err_meta.upstream_scheme
224+
end
225+
if err_meta.t0 then
226+
apisix_upstream.update_upstream_state({
227+
response_time = (ngx_now() - err_meta.t0) * 1000,
228+
})
229+
end
230+
end
208231
return transport_http.handle_error(transport_err)
209232
end
210233

234+
-- Upstream responded — populate upstream state for access log
235+
apisix_upstream.push_upstream_state({
236+
addr = res._upstream_addr,
237+
status = res.status,
238+
connect_time = res._connect_time,
239+
header_time = res._header_time,
240+
})
241+
if res._upstream_uri then
242+
ctx.var.upstream_uri = res._upstream_uri
243+
end
244+
if res._upstream_host then
245+
ctx.var.upstream_host = res._upstream_host
246+
end
247+
if res._upstream_scheme then
248+
ctx.var.upstream_scheme = res._upstream_scheme
249+
end
250+
211251
-- Upstream responded — mark source before any early returns
212252
core.response.set_response_source(ctx, "upstream")
213253

214254
if res.status == 429 or (res.status >= 500 and res.status < 600) then
255+
if res._t0 then
256+
apisix_upstream.update_upstream_state({
257+
response_time = (ngx_now() - res._t0) * 1000,
258+
})
259+
end
260+
if res._httpc then
261+
res._httpc:close()
262+
end
215263
return res.status
216264
end
217265

218266
local body_reader = res.body_reader
219267
if not body_reader then
220268
core.log.warn("AI service sent no response body")
269+
if res._t0 then
270+
apisix_upstream.update_upstream_state({
271+
response_time = (ngx_now() - res._t0) * 1000,
272+
})
273+
end
274+
if res._httpc then
275+
res._httpc:close()
276+
end
221277
return 500
222278
end
223279

@@ -243,6 +299,13 @@ function _M.before_proxy(conf, ctx, on_error)
243299
end
244300
end
245301

302+
-- Finalize upstream state with response_time after body is consumed
303+
if res._t0 then
304+
apisix_upstream.update_upstream_state({
305+
response_time = (ngx_now() - res._t0) * 1000,
306+
})
307+
end
308+
246309
if conf.keepalive then
247310
transport_http.set_keepalive(res, conf.keepalive_timeout, conf.keepalive_pool)
248311
end

apisix/plugins/ai-transport/http.lua

Lines changed: 46 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020

2121
local core = require("apisix.core")
2222
local http = require("resty.http")
23+
local ngx_now = ngx.now
2324
local pairs = pairs
2425
local ipairs = ipairs
2526
local type = type
@@ -71,20 +72,35 @@ end
7172
-- {method, scheme, host, port, path, headers, query, body (table),
7273
-- ssl_verify, ssl_server_name}
7374
-- @param timeout number Request timeout in milliseconds
74-
-- @return table|nil Response object (with body_reader, headers, status)
75+
-- @return table|nil Response object (with body_reader, headers, status,
76+
-- _upstream_addr, _upstream_uri, _connect_time, _header_time, _t0)
7577
-- @return string|nil Error message
78+
-- @return table|nil Upstream metadata on failure (for recording failed attempts)
7679
function _M.request(params, timeout)
7780
local httpc, err = http.new()
7881
if not httpc then
7982
return nil, "failed to create http client: " .. (err or "unknown")
8083
end
8184
httpc:set_timeout(timeout)
8285

86+
local upstream_addr = (params.host or "") .. ":" .. (params.port or "")
87+
local upstream_host = params.host or ""
88+
local upstream_scheme = params.scheme or "http"
89+
local t0 = ngx_now()
90+
8391
local ok, err = httpc:connect(params)
8492
if not ok then
85-
return nil, "connect: " .. (err or "unknown")
93+
return nil, "connect: " .. (err or "unknown"), {
94+
upstream_addr = upstream_addr,
95+
upstream_host = upstream_host,
96+
upstream_scheme = upstream_scheme,
97+
upstream_uri = params.path,
98+
t0 = t0,
99+
}
86100
end
87101

102+
local connect_time = (ngx_now() - t0) * 1000
103+
88104
local req_json
89105
if type(params.body) == "string" then
90106
-- Body already serialized (e.g., by SigV4 signing)
@@ -93,18 +109,43 @@ function _M.request(params, timeout)
93109
local err
94110
req_json, err = core.json.encode(params.body)
95111
if not req_json then
96-
return nil, "encode body: " .. (err or "unknown")
112+
httpc:close()
113+
return nil, "encode body: " .. (err or "unknown"), {
114+
upstream_addr = upstream_addr,
115+
upstream_host = upstream_host,
116+
upstream_scheme = upstream_scheme,
117+
upstream_uri = params.path,
118+
connect_time = connect_time,
119+
t0 = t0,
120+
}
97121
end
98122
end
99123
params.body = req_json
100124

101125
local res, err = httpc:request(params)
102126
if not res then
103-
return nil, "request: " .. (err or "unknown")
127+
httpc:close()
128+
return nil, "request: " .. (err or "unknown"), {
129+
upstream_addr = upstream_addr,
130+
upstream_host = upstream_host,
131+
upstream_scheme = upstream_scheme,
132+
upstream_uri = params.path,
133+
connect_time = connect_time,
134+
t0 = t0,
135+
}
104136
end
105137

106-
-- Attach httpc to res so caller can manage keepalive
138+
local header_time = (ngx_now() - t0) * 1000
139+
140+
-- Attach httpc and upstream metadata to res
107141
res._httpc = httpc
142+
res._upstream_addr = upstream_addr
143+
res._upstream_host = upstream_host
144+
res._upstream_scheme = upstream_scheme
145+
res._upstream_uri = params.path
146+
res._connect_time = connect_time
147+
res._header_time = header_time
148+
res._t0 = t0
108149

109150
return res
110151
end

docs/en/latest/plugins/ai-proxy.md

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2049,19 +2049,29 @@ In the Kafka topic, you should also see a log entry corresponding to the request
20492049
The following example demonstrates how you can log LLM request related information in the gateway's access log to improve analytics and audit. The following variables are available:
20502050

20512051
* `request_llm_model`: LLM model name specified in the request.
2052-
* `apisix_upstream_response_time`: Time taken for APISIX to send the request to the upstream service and receive the full response.
20532052
* `request_type`: Type of request, where the value could be `traditional_http`, `ai_chat`, or `ai_stream`.
20542053
* `llm_time_to_first_token`: Duration from request sending to the first token received from the LLM service, in milliseconds.
20552054
* `llm_model`: LLM model.
20562055
* `llm_prompt_tokens`: Number of tokens in the prompt.
20572056
* `llm_completion_tokens`: Number of chat completion tokens in the prompt.
20582057

2058+
In addition, the following standard nginx upstream variables are automatically populated when `ai-proxy` sends requests via cosocket transport:
2059+
2060+
* `upstream_addr`: Address of the upstream LLM service (e.g., `api.openai.com:443`).
2061+
* `upstream_status`: HTTP status code returned by the upstream LLM service.
2062+
* `upstream_response_time`: Total time spent receiving the response from the upstream LLM service, in seconds (e.g., `2.858`).
2063+
* `upstream_connect_time`: Time spent establishing the connection to the upstream LLM service, in seconds.
2064+
* `upstream_header_time`: Time spent receiving the response headers from the upstream LLM service, in seconds.
2065+
* `upstream_host`: Hostname of the upstream LLM service as configured in the endpoint (e.g., `api.openai.com`).
2066+
* `upstream_scheme`: Scheme used to connect to the upstream LLM service (e.g., `https`).
2067+
* `upstream_uri`: Request URI path sent to the upstream LLM service (e.g., `/v1/chat/completions`).
2068+
20592069
Update the access log format in your configuration file to include additional LLM related variables:
20602070

20612071
```yaml title="conf/config.yaml"
20622072
nginx_config:
20632073
http:
2064-
access_log_format: "$remote_addr - $remote_user [$time_local] $http_host \"$request_line\" $status $body_bytes_sent $request_time \"$http_referer\" \"$http_user_agent\" $upstream_addr $upstream_status $apisix_upstream_response_time \"$upstream_scheme://$upstream_host$upstream_uri\" \"$apisix_request_id\" \"$request_type\" \"$llm_time_to_first_token\" \"$llm_model\" \"$request_llm_model\" \"$llm_prompt_tokens\" \"$llm_completion_tokens\""
2074+
access_log_format: "$remote_addr - $remote_user [$time_local] $http_host \"$request_line\" $status $body_bytes_sent $request_time \"$http_referer\" \"$http_user_agent\" $upstream_addr $upstream_status $upstream_response_time \"$upstream_scheme://$upstream_host$upstream_uri\" \"$apisix_request_id\" \"$request_type\" \"$llm_time_to_first_token\" \"$llm_model\" \"$request_llm_model\" \"$llm_prompt_tokens\" \"$llm_completion_tokens\""
20652075
```
20662076
20672077
Reload APISIX for configuration changes to take effect.
@@ -2103,7 +2113,7 @@ Now if you create a Route and send a request following the [Proxy to OpenAI exam
21032113
In the gateway's access log, you should see a log entry similar to the following:
21042114

21052115
```text
2106-
192.168.215.1 - - [21/Mar/2025:04:28:03 +0000] api.openai.com "POST /anything HTTP/1.1" 200 804 2.858 "-" "curl/8.6.0" - - - 5765 "http://api.openai.com" "5c5e0b95f8d303cb81e4dc456a4b12d9" "ai_chat" "2858" "gpt-4" "gpt-4" "23" "8"
2116+
192.168.215.1 - - [21/Mar/2025:04:28:03 +0000] api.openai.com "POST /anything HTTP/1.1" 200 804 2.858 "-" "curl/8.6.0" api.openai.com:443 200 2.858 "https://api.openai.com/v1/chat/completions" "5c5e0b95f8d303cb81e4dc456a4b12d9" "ai_chat" "2858" "gpt-4" "gpt-4" "23" "8"
21072117
```
21082118

2109-
The access log entry shows the request type is `ai_chat`, Apisix upstream response time is `5765` milliseconds, time to first token is `2858` milliseconds, Requested LLM model is `gpt-4`. LLM model is `gpt-4`, prompt token usage is `23`, and completion token usage is `8`.
2119+
The access log entry shows the upstream address is `api.openai.com:443` with status `200`, the request type is `ai_chat`, APISIX upstream response time is `2.858` seconds, time to first token is `2858` milliseconds, requested LLM model is `gpt-4`, LLM model is `gpt-4`, prompt token usage is `23`, and completion token usage is `8`.

t/plugin/ai-proxy-upstream-vars.t

Lines changed: 118 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,118 @@
1+
#
2+
# Licensed to the Apache Software Foundation (ASF) under one or more
3+
# contributor license agreements. See the NOTICE file distributed with
4+
# this work for additional information regarding copyright ownership.
5+
# The ASF licenses this file to You under the Apache License, Version 2.0
6+
# (the "License"); you may not use this file except in compliance with
7+
# the License. You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing, software
12+
# distributed under the License is distributed on an "AS IS" BASIS,
13+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
# See the License for the specific language governing permissions and
15+
# limitations under the License.
16+
#
17+
18+
=encoding utf-8
19+
20+
Validates that upstream nginx variables ($upstream_status, $upstream_addr,
21+
$upstream_response_time, $upstream_uri, etc.) are populated when ai-proxy
22+
sends requests via cosocket transport.
23+
24+
=cut
25+
26+
use t::APISIX 'no_plan';
27+
28+
log_level("info");
29+
repeat_each(1);
30+
no_long_string();
31+
no_root_location();
32+
33+
34+
add_block_preprocessor(sub {
35+
my ($block) = @_;
36+
37+
if (!defined $block->request) {
38+
$block->set_value("request", "GET /t");
39+
}
40+
});
41+
42+
run_tests();
43+
44+
__DATA__
45+
46+
=== TEST 1: set route with ai-proxy pointing to mock server
47+
--- config
48+
location /t {
49+
content_by_lua_block {
50+
local t = require("lib.test_admin").test
51+
local code, body = t('/apisix/admin/routes/1',
52+
ngx.HTTP_PUT,
53+
[[{
54+
"uri": "/anything",
55+
"plugins": {
56+
"ai-proxy": {
57+
"provider": "openai",
58+
"auth": {
59+
"header": {
60+
"Authorization": "Bearer test-key"
61+
}
62+
},
63+
"options": {
64+
"model": "gpt-4"
65+
},
66+
"override": {
67+
"endpoint": "http://127.0.0.1:1980"
68+
},
69+
"ssl_verify": false
70+
}
71+
}
72+
}]]
73+
)
74+
75+
if code >= 300 then
76+
ngx.status = code
77+
end
78+
ngx.say(body)
79+
}
80+
}
81+
--- response_body
82+
passed
83+
84+
85+
86+
=== TEST 2: non-streaming request populates upstream variables in access log
87+
--- request
88+
POST /anything
89+
{"model":"gpt-4","messages":[{"role":"user","content":"hello"}]}
90+
--- more_headers
91+
X-AI-Fixture: openai/chat-basic.json
92+
--- error_code: 200
93+
--- access_log eval
94+
qr/127\.0\.0\.1:\d+ 200 [\d.]+/
95+
96+
97+
98+
=== TEST 3: streaming request populates upstream variables in access log
99+
--- request
100+
POST /anything
101+
{"model":"gpt-4","messages":[{"role":"user","content":"hello"}],"stream":true}
102+
--- more_headers
103+
X-AI-Fixture: openai/chat-streaming.sse
104+
--- error_code: 200
105+
--- access_log eval
106+
qr/127\.0\.0\.1:\d+ 200 [\d.]+/
107+
108+
109+
110+
=== TEST 4: upstream_uri and upstream_host are populated with the target path and host
111+
--- request
112+
POST /anything
113+
{"model":"gpt-4","messages":[{"role":"user","content":"hello"}]}
114+
--- more_headers
115+
X-AI-Fixture: openai/chat-basic.json
116+
--- error_code: 200
117+
--- access_log eval
118+
qr{http://127\.0\.0\.1/v1/chat/completions}

t/plugin/ai-proxy3.t

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,7 @@ X-AI-Fixture: openai/chat-basic.json
9696
--- response_body eval
9797
qr/.*completion_tokens.*/
9898
--- access_log eval
99-
qr/.*[\d.]+ \"http:\/\/localhost[^"]*\" gpt-4 gpt-3.5-turbo [\d.]+ 23 8.*/
99+
qr/127\.0\.0\.1:1980 200 [\d.]+ \"http:\/\/127\.0\.0\.1\/v1\/chat\/completions\" gpt-4 gpt-3.5-turbo [\d.]+ 23 8.*/
100100
101101
102102
@@ -256,4 +256,4 @@ passed
256256
--- response_body_like eval
257257
qr/6data: \[DONE\]\n\n/
258258
--- access_log eval
259-
qr/.*[\d.]+ \"http:\/\/localhost:1984\" gpt-4 gpt-3.5-turbo 2\d\d 15 20.*/
259+
qr/localhost:7737 200 [\d.]+ \"http:\/\/localhost\/v1\/chat\/completions\" gpt-4 gpt-3.5-turbo 2\d\d 15 20.*/

0 commit comments

Comments
 (0)