Skip to content

Commit eda3381

Browse files
google-labs-jules[bot]lws-team
authored andcommitted
feat: Add resource measurement to lws_spawn
Adds the ability to measure the resource consumption of spawned processes. - A new struct `lws_spawn_resource_us_t` is introduced to store CPU time, peak memory usage, and I/O statistics. - The `lws_spawn_piped_info` struct is updated to include a pointer to the new resource struct, allowing the user to receive the collected metrics. - The reap callback `lsp_cb_t` is updated to pass the resource struct to the user. - On Unix-like systems, `getrusage()` is used to collect the resource usage information. - On Windows, `GetProcessTimes()`, `GetProcessMemoryInfo()`, and `GetProcessIoCounters()` are used. - The test `api-test-lws_spawn` is updated to verify the new functionality.
1 parent d04fd6d commit eda3381

7 files changed

Lines changed: 177 additions & 87 deletions

File tree

include/libwebsockets/lws-misc.h

Lines changed: 19 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1033,8 +1033,22 @@ struct _lws_siginfo_t {
10331033
typedef struct _lws_siginfo_t siginfo_t;
10341034
#endif
10351035

1036-
typedef void (*lsp_cb_t)(void *opaque, lws_usec_t *accounting, siginfo_t *si,
1037-
int we_killed_him);
1036+
/**
1037+
* lws_spawn_resource_us_t - resource usage results from spawned process
1038+
*
1039+
* All time values are in uS.
1040+
* All size values are in bytes.
1041+
*/
1042+
typedef struct lws_spawn_resource_us {
1043+
uint64_t us_cpu_user; /**< user space cpu time */
1044+
uint64_t us_cpu_sys; /**< kernel space cpu time */
1045+
1046+
uint64_t peak_mem_rss; /**< peak resident memory */
1047+
uint64_t peak_mem_virt; /**< peak virtual memory */
1048+
} lws_spawn_resource_us_t;
1049+
1050+
typedef void (*lsp_cb_t)(void *opaque, const lws_spawn_resource_us_t *res,
1051+
siginfo_t *si, int we_killed_him);
10381052

10391053

10401054
/**
@@ -1050,11 +1064,11 @@ typedef void (*lsp_cb_t)(void *opaque, lws_usec_t *accounting, siginfo_t *si,
10501064
* \p wd: working directory to cd to after fork, NULL defaults to /tmp
10511065
* \p plsp: NULL, or pointer to the outer lsp pointer so it can be set NULL when destroyed
10521066
* \p opaque: pointer passed to the reap callback, if any
1053-
* \p timeout: optional us-resolution timeout, or zero
10541067
* \p reap_cb: callback when child process has been reaped and the lsp destroyed
10551068
* \p tsi: tsi to bind stdwsi to... from opt_parent if given
10561069
* \p cgroup_name_suffix: for Linux, encapsulate spawn into this new cgroup
10571070
* \p p_cgroup_ret: NULL, or pointer to int to show if cgroups applied OK (0 = OK)
1071+
* \p pres: NULL, or pointer to a lws_spawn_resource_us_t to take the results
10581072
*/
10591073
struct lws_spawn_piped_info {
10601074
struct lws_dll2_owner *owner;
@@ -1073,6 +1087,8 @@ struct lws_spawn_piped_info {
10731087

10741088
lsp_cb_t reap_cb;
10751089

1090+
lws_spawn_resource_us_t *res;
1091+
10761092
lws_usec_t timeout_us;
10771093
int max_log_lines;
10781094
int tsi;

lib/core-net/private-lib-core-net.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -941,7 +941,7 @@ struct lws_spawn_piped {
941941
lws_usec_t created; /* set by lws_spawn_piped() */
942942
lws_usec_t reaped;
943943

944-
lws_usec_t accounting[4];
944+
lws_spawn_resource_us_t res;
945945

946946
#if defined(WIN32)
947947
HANDLE child_pid;

lib/plat/unix/private-lib-plat-unix.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@
4242

4343
#include <sys/socket.h>
4444
#include <sys/types.h>
45+
#include <sys/resource.h>
4546
#include <sys/stat.h>
4647
#include <sys/time.h>
4748
#include <sys/mman.h>

lib/plat/unix/unix-spawn.c

Lines changed: 84 additions & 65 deletions
Original file line numberDiff line numberDiff line change
@@ -153,39 +153,32 @@ lws_spawn_piped_destroy(struct lws_spawn_piped **_lsp)
153153
int
154154
lws_spawn_reap(struct lws_spawn_piped *lsp)
155155
{
156-
long hz = sysconf(_SC_CLK_TCK); /* accounting Hz */
157156
void *opaque = lsp->info.opaque;
158157
lsp_cb_t cb = lsp->info.reap_cb;
159-
struct lws_spawn_piped temp;
160-
struct tms tms;
161-
#if defined(__OpenBSD__) || defined(__NetBSD__)
162-
struct rusage rusa;
163-
int status;
164-
#endif
165-
int n;
158+
lws_spawn_resource_us_t res;
159+
struct rusage ru;
160+
siginfo_t si;
161+
int n, status;
166162

167163
if (lsp->child_pid < 1)
168164
return 0;
169165

170166
/* check if exited, do not reap yet */
171167

172168
memset(&lsp->si, 0, sizeof(lsp->si));
173-
#if defined(__OpenBSD__) || defined(__NetBSD__)
174-
n = wait4(lsp->child_pid, &status, WNOHANG, &rusa);
175-
if (!n)
176-
return 0;
177-
lsp->si.si_code = WIFEXITED(status);
178-
#else
179-
n = waitid(P_PID, (id_t)lsp->child_pid, &lsp->si, WEXITED | WNOHANG | WNOWAIT);
180-
#endif
169+
n = wait4(lsp->child_pid, &status, WNOHANG, &ru);
181170
if (n < 0) {
182-
lwsl_info("%s: child %d still running\n", __func__, lsp->child_pid);
171+
lwsl_info("%s: child %d still running (errno %d)\n", __func__,
172+
lsp->child_pid, errno);
183173
return 0;
184174
}
185175

186-
if (!lsp->si.si_code)
176+
if (!n)
187177
return 0;
188178

179+
lsp->si.si_code = WIFEXITED(status);
180+
lsp->si.si_status = WEXITSTATUS(status);
181+
189182
/* his process has exited... */
190183

191184
if (!lsp->reaped) {
@@ -239,33 +232,37 @@ lws_spawn_reap(struct lws_spawn_piped *lsp)
239232
* Collect the final information and then reap the dead process
240233
*/
241234

242-
if (times(&tms) != (clock_t) -1) {
243-
/*
244-
* Cpu accounting in us
245-
*/
246-
lsp->accounting[0] = (lws_usec_t)((uint64_t)tms.tms_cstime * 1000000) / hz;
247-
lsp->accounting[1] = (lws_usec_t)((uint64_t)tms.tms_cutime * 1000000) / hz;
248-
lsp->accounting[2] = (lws_usec_t)((uint64_t)tms.tms_stime * 1000000) / hz;
249-
lsp->accounting[3] = (lws_usec_t)((uint64_t)tms.tms_utime * 1000000) / hz;
235+
lsp->res.us_cpu_user =
236+
((uint64_t)ru.ru_utime.tv_sec * 1000000) + (uint64_t)ru.ru_utime.tv_usec;
237+
lsp->res.us_cpu_sys =
238+
((uint64_t)ru.ru_stime.tv_sec * 1000000) + (uint64_t)ru.ru_stime.tv_usec;
239+
240+
/* ru_maxrss is in KB */
241+
lsp->res.peak_mem_rss = (uint64_t)ru.ru_maxrss * 1024;
242+
243+
if (getrusage(RUSAGE_CHILDREN, &ru) == 0) {
244+
lsp->res.us_cpu_user +=
245+
((uint64_t)ru.ru_utime.tv_sec * 1000000) + (uint64_t)ru.ru_utime.tv_usec;
246+
lsp->res.us_cpu_sys +=
247+
((uint64_t)ru.ru_stime.tv_sec * 1000000) + (uint64_t)ru.ru_stime.tv_usec;
248+
/* ru_maxrss is in KB */
249+
lsp->res.peak_mem_rss += (uint64_t)ru.ru_maxrss * 1024;
250+
} else
251+
lwsl_err("%s: getrusage failed\n", __func__);
252+
253+
n = waitpid(lsp->child_pid, &status, WNOHANG);
254+
if (n < 0) {
255+
lwsl_info("%s: child %d vanished\n", __func__, lsp->child_pid);
250256
}
251257

252-
temp = *lsp;
253-
#if defined(__OpenBSD__) || defined(__NetBSD__)
254-
n = wait4(lsp->child_pid, &status, WNOHANG, &rusa);
255-
if (!n)
256-
return 0;
257-
lsp->si.si_code = WIFEXITED(status);
258-
if (lsp->si.si_code == CLD_EXITED)
259-
temp.si.si_code = CLD_EXITED;
260-
temp.si.si_status = WEXITSTATUS(status);
261-
#else
262-
n = waitid(P_PID, (id_t)lsp->child_pid, &temp.si, WEXITED | WNOHANG);
263-
#endif
264-
temp.si.si_status &= 0xff; /* we use b8 + for flags */
265-
lwsl_warn("%s: waitd says %d, process exit %d\n",
266-
__func__, n, temp.si.si_status);
258+
lwsl_info("%s: waitd says %d, process exit %d\n",
259+
__func__, n, lsp->si.si_status);
267260

268-
lsp->child_pid = -1;
261+
lsp->child_pid = -1;
262+
si = lsp->si;
263+
res = lsp->res;
264+
n = lsp->we_killed_him_timeout |
265+
(lsp->we_killed_him_spew << 1);
269266

270267
/* destroy the lsp itself first (it's freed and plsp set NULL */
271268

@@ -275,9 +272,7 @@ lws_spawn_reap(struct lws_spawn_piped *lsp)
275272
/* then do the parent callback informing it's destroyed */
276273

277274
if (cb)
278-
cb(opaque, temp.accounting, &temp.si,
279-
temp.we_killed_him_timeout |
280-
(temp.we_killed_him_spew << 1));
275+
cb(opaque, &res, &si, n);
281276

282277
return 1; /* was reaped */
283278
}
@@ -559,7 +554,6 @@ lws_spawn_piped(const struct lws_spawn_piped_info *i)
559554
close(cfd);
560555
}
561556

562-
563557
lwsl_info("%s: created cgroup %s\n", __func__, lsp->cgroup_path);
564558
lws_snprintf(pth, sizeof(pth), "%s/pids.max", lsp->cgroup_path);
565559
cfd = lws_open(pth, LWS_O_WRONLY);
@@ -821,7 +815,7 @@ lws_spawn_prepare_self_cgroup(const char *user, const char *group)
821815
fd = lws_open(path, LWS_O_WRONLY);
822816
if (fd < 0) {
823817
/* May fail if user doesn't own the file, that's okay */
824-
lwsl_info("%s: cannot open subtree_control: %s\n",
818+
lwsl_notice("%s: cannot open subtree_control: %s\n",
825819
__func__, strerror(errno));
826820
return 0; /* Still a success if dir exists */
827821
}
@@ -852,24 +846,49 @@ lws_spawn_prepare_self_cgroup(const char *user, const char *group)
852846
lwsl_warn("%s: group '%s' not found\n", __func__, group);
853847
}
854848

855-
lwsl_notice("%s: switching %s to %d:%d\n",
856-
__func__, path, uid, gid);
857-
858-
if (chown(path, uid, gid) < 0)
859-
lwsl_warn("%s: failed to chown %s: %s\n",
860-
__func__, path, strerror(errno));
861-
/* 2. ALSO change ownership of the critical control files inside it */
862-
lws_snprintf(path, sizeof(path), "/sys/fs/cgroup%s/cgroup.procs", self_cgroup);
863-
if (chown(path, uid, gid) < 0)
864-
lwsl_warn("%s: failed to chown %s: %s\n",
865-
__func__, path, strerror(errno));
866-
867-
lws_snprintf(path, sizeof(path), "/sys/fs/cgroup%s/cgroup.subtree_control", self_cgroup);
868-
if (chown(path, uid, gid) < 0)
869-
lwsl_warn("%s: failed to chown %s: %s\n",
870-
__func__, path, strerror(errno));
871-
872-
lwsl_notice("%s: lws cgroup parent configured\n", __func__);
849+
if (uid != (uid_t)-1 || gid != (gid_t)-1) {
850+
851+
lwsl_notice("%s: switching %s to %d:%d\n",
852+
__func__, path, uid, gid);
853+
854+
if (chown(path, uid, gid) < 0)
855+
lwsl_warn("%s: failed to chown %s: %s\n",
856+
__func__, path, strerror(errno));
857+
/* 2. ALSO change ownership of the critical control files inside it */
858+
lws_snprintf(path, sizeof(path), "/sys/fs/cgroup%s/cgroup.procs", self_cgroup);
859+
if (chown(path, uid, gid) < 0)
860+
lwsl_warn("%s: failed to chown %s: %s\n",
861+
__func__, path, strerror(errno));
862+
863+
lws_snprintf(path, sizeof(path), "/sys/fs/cgroup%s/cgroup.subtree_control", self_cgroup);
864+
if (chown(path, uid, gid) < 0)
865+
lwsl_warn("%s: failed to chown %s: %s\n",
866+
__func__, path, strerror(errno));
867+
}
868+
lws_snprintf(path, sizeof(path), "/sys/fs/cgroup%s/lws", self_cgroup);
869+
mkdir(path, 0775);
870+
if (uid != (uid_t)-1 || gid != (gid_t)-1) {
871+
872+
lwsl_notice("%s: switching %s to %d:%d\n",
873+
__func__, path, uid, gid);
874+
875+
if (chown(path, uid, gid) < 0)
876+
lwsl_warn("%s: failed to chown %s: %s\n",
877+
__func__, path, strerror(errno));
878+
/* 2. ALSO change ownership of the critical control files inside it */
879+
lws_snprintf(path, sizeof(path), "/sys/fs/cgroup%s/cgroup.procs", self_cgroup);
880+
if (chown(path, uid, gid) < 0)
881+
lwsl_warn("%s: failed to chown %s: %s\n",
882+
__func__, path, strerror(errno));
883+
884+
lws_snprintf(path, sizeof(path), "/sys/fs/cgroup%s/cgroup.subtree_control", self_cgroup);
885+
if (chown(path, uid, gid) < 0)
886+
lwsl_warn("%s: failed to chown %s: %s\n",
887+
__func__, path, strerror(errno));
888+
}
889+
890+
891+
lwsl_notice("%s: lws cgroup parent configured: %s\n", __func__, path);
873892

874893
return 0;
875894
#endif

lib/plat/windows/windows-spawn.c

Lines changed: 26 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -152,7 +152,10 @@ lws_spawn_reap(struct lws_spawn_piped *lsp)
152152
void *opaque = lsp->info.opaque;
153153
lsp_cb_t cb = lsp->info.reap_cb;
154154
struct _lws_siginfo_t lsi;
155-
lws_usec_t acct[4];
155+
PROCESS_MEMORY_COUNTERS pmc;
156+
IO_COUNTERS ic;
157+
ULARGE_INTEGER uli;
158+
FILETIME ftk, ftu;
156159
DWORD ex;
157160

158161
if (!lsp->child_pid)
@@ -205,6 +208,27 @@ lws_spawn_reap(struct lws_spawn_piped *lsp)
205208
* Collect the final information and then reap the dead process
206209
*/
207210

211+
if (GetProcessTimes(lsp->child_pid, &lsp->ft_create, &lsp->ft_exit,
212+
&ftk, &ftu)) {
213+
uli.LowPart = ftu.dwLowDateTime;
214+
uli.HighPart = ftu.dwHighDateTime;
215+
lsp->res.us_cpu_user = uli.QuadPart / 10;
216+
217+
uli.LowPart = ftk.dwLowDateTime;
218+
uli.HighPart = ftk.dwHighDateTime;
219+
lsp->res.us_cpu_sys = uli.QuadPart / 10;
220+
}
221+
222+
if (GetProcessMemoryInfo(lsp->child_pid, &pmc, sizeof(pmc)))
223+
lsp->res.peak_mem_rss = pmc.PeakWorkingSetSize;
224+
225+
/*
226+
if (GetProcessIoCounters(lsp->child_pid, &ic)) {
227+
lsp->res.io_r_bytes = ic.ReadTransferCount;
228+
lsp->res.io_w_bytes = ic.WriteTransferCount;
229+
}
230+
*/
231+
208232
lsi.retcode = 0x10000 | (int)ex;
209233
lwsl_notice("%s: process exit 0x%x\n", __func__, lsi.retcode);
210234
lsp->child_pid = NULL;
@@ -216,9 +240,8 @@ lws_spawn_reap(struct lws_spawn_piped *lsp)
216240

217241
/* then do the parent callback informing it's destroyed */
218242

219-
memset(acct, 0, sizeof(acct));
220243
if (cb)
221-
cb(opaque, acct, &lsi, 0);
244+
cb(opaque, lsp->info.res ? lsp->info.res : &lsp->res, &lsi, 0);
222245

223246
lwsl_notice("%s: completed reap\n", __func__);
224247

lib/roles/cgi/cgi-server.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,7 @@ lws_cgi_grace(lws_sorted_usec_list_t *sul)
8686

8787

8888
static void
89-
lws_cgi_reap_cb(void *opaque, lws_usec_t *accounting, siginfo_t *si,
89+
lws_cgi_reap_cb(void *opaque, const struct lws_spawn_resource_us *res, siginfo_t *si,
9090
int we_killed_him)
9191
{
9292
struct lws *wsi = (struct lws *)opaque;

0 commit comments

Comments
 (0)