From d1b7a7e06389aa4e7b75865acfc2ea4b9bd895af Mon Sep 17 00:00:00 2001 From: Ruairi Conlon Date: Tue, 2 Jun 2026 16:17:24 +0100 Subject: [PATCH 1/7] feature/dbwritemodule-Initial-commit --- di/dbwrite/dbwrite.md | 248 ++++++++++++++++++++++++++++++++++++++++++ di/dbwrite/dbwrite.q | 100 +++++++++++++++++ di/dbwrite/init.q | 9 ++ di/dbwrite/test.csv | 101 +++++++++++++++++ 4 files changed, 458 insertions(+) create mode 100644 di/dbwrite/dbwrite.md create mode 100644 di/dbwrite/dbwrite.q create mode 100644 di/dbwrite/init.q create mode 100644 di/dbwrite/test.csv diff --git a/di/dbwrite/dbwrite.md b/di/dbwrite/dbwrite.md new file mode 100644 index 00000000..6d0b450b --- /dev/null +++ b/di/dbwrite/dbwrite.md @@ -0,0 +1,248 @@ +# di.dbwrite + +Sort, attribute application, save-down manipulation, and garbage-collection utilities for kdb+ processes that persist data to disk (RDB, WDB, TickerLogReplay). + +--- + +## Features + +- Sort on-disk table partitions by configured columns using `xasc` +- Apply kdb+ attributes (`p`, `s`, `g`, `u`) to on-disk columns after sort +- Register per-table pre-write manipulation functions applied before save-down +- Run `.Q.gc[]` with before/after memory logging +- Sort and attribute behaviour driven by a CSV config file; a `default` row acts as a fallback +- Built-in `defaultparams` provides an out-of-the-box fallback (sort by `time` ascending) when no config file is loaded +- All errors from sort, attribute application, and manipulation are caught and logged — they do not propagate + +--- + +## Dependencies + +| Dependency | Key | Required | Description | +|---|---|---|---| +| `di.log` | `` `log `` | yes | Logging functions `info`, `warn`, `error` — each `{[ctx;msg] ...}` | + +The `log` dependency must be passed to `init`. The module throws if it is absent or `(::)`. + +--- + +## Sort config CSV + +`loadconfig` reads a CSV with four columns: + +| Column | Type | Description | +|---|---|---| +| `tabname` | symbol | Table name, or `` `default `` as a catch-all fallback | +| `att` | symbol | kdb+ attribute to apply: `p`, `s`, `g`, `u`, or empty for none | +| `column` | symbol | Column to sort or attribute; empty means attribute-only (no sort contribution) | +| `sort` | boolean | `1b` — include in `xasc` sort key; `0b` — attribute only | + +Example `sort.csv`: + +``` +tabname,att,column,sort +trade,p,sym,1 +trade,,price,0 +quote,p,sym,1 +default,,time,1 +``` + +Sorts `trade` by `sym`, applies `p` to `sym`. Tables not listed fall back to `default` and sort by `time`. + +--- + +## Functions + +### Summary + +| Function | Description | +|---|---| +| `init[config;deps]` | Wire injected dependencies; must be called first | +| `loadconfig[file]` | Load and validate the sort config CSV into module state | +| `sort[d]` | Sort an on-disk partition and apply attributes per config | +| `applyattr[dloc;colname;att]` | Apply a single kdb+ attribute to an on-disk column | +| `manipulate[t;x]` | Apply a registered pre-write manipulation to a table | +| `postreplay[d;p]` | Post-EOD stub; override to add custom logic | +| `gc[]` | Run `.Q.gc[]` and log before/after memory stats | + +--- + +### `init[config;deps]` + +Wires injected dependencies into the module. Must be called before any other function. + +**Parameters** + +| Parameter | Type | Description | +|---|---|---| +| `config` | any | Accepted but unused; pass `(::)` | +| `deps` | dict | Must contain `` `log `` → `` `info`warn`error!(infofunc;warnfunc;errfunc) `` | + +**Returns** — generic null. + +Throws with a descriptive message if the `log` dependency is missing or set to `(::)`. + +```q +log:use`di.log +log.init[logconfig] +logdep:`info`warn`error!(log.info;log.warn;log.error) + +dbwrite:use`di.dbwrite +dbwrite.init[(::);(enlist`log)!enlist logdep] +``` + +--- + +### `loadconfig[file]` + +Loads and validates the sort configuration CSV, storing the result in module state for use by `sort`. + +**Parameters** + +| Parameter | Type | Description | +|---|---|---| +| `file` | hsym | Path to the sort config CSV. Passing a null symbol (`` ` ``) uses the module-level `defaultfile` value | + +**Returns** — generic null on success; throws on failure. + +Validation checks that all four required columns (`tabname`, `att`, `column`, `sort`) are present and that all `att` values are within `` ``p`s`g`u ``. Throws a descriptive error for invalid files or unreadable paths. + +```q +dbwrite.loadconfig[`:config/sort.csv] +``` + +> **Note:** `defaultfile` is a module-level variable (default: null symbol). Set it in `init.q` to configure a default sort config path, or always pass the path explicitly. When `params` is empty and `defaultfile` is null, `sort` falls back to the built-in `defaultparams` rather than erroring. + +--- + +### `sort[d]` + +Sorts an on-disk table partition and applies configured attributes. + +**Parameters** + +| Parameter | Type | Description | +|---|---|---| +| `d` | symbol or list | Table name alone, or `(tabname;dir)`, or `(tabname;list of dirs)` — see below | + +`d` forms: + +| Form | Example | +|---|---| +| Symbol | `` `trade `` | +| Tabname + single dir | `` (`trade;`:hdb/2024.01.02/trade/) `` | +| Tabname + dir list | `` (`trade;`:hdb/2024.01.02/trade/ `:hdb/2024.01.03/trade/) `` | + +**Returns** — generic null on success; `()` if no sort config is found for the table. + +If `params` is empty when `sort` is called: +- `defaultfile` is set → `loadconfig[defaultfile]` is called to populate `params`. +- `defaultfile` is null → `params` is populated from the built-in `defaultparams` (sort by `time` ascending, no attribute). + +Config lookup order within `params`: +1. Rows where `tabname` matches — used directly. +2. Rows where `tabname = \`default` — used with a `warn` log. +3. No match — warns and returns `()`. + +Sort and attribute errors are caught, logged, and swallowed. + +```q +dbwrite.sort[(`trade;`:hdb/2024.01.02/trade/)] +``` + +--- + +### `applyattr[dloc;colname;att]` + +Applies a single kdb+ attribute to an on-disk column. + +**Parameters** + +| Parameter | Type | Description | +|---|---|---| +| `dloc` | hsym | On-disk partition directory (e.g. `` `:hdb/2024.01.02/trade/ ``) | +| `colname` | symbol | Column name | +| `att` | symbol | Attribute to apply: `` `p ``, `` `s ``, `` `g ``, or `` `u `` | + +**Returns** — generic null on success. + +Logs the attempt before applying. On failure, logs the error and continues — does not throw. + +```q +dbwrite.applyattr[`:hdb/2024.01.02/trade/;`sym;`p] +``` + +--- + +### `manipulate[t;x]` + +Applies a registered pre-write manipulation to table `x` of type `t`. + +**Parameters** + +| Parameter | Type | Description | +|---|---|---| +| `t` | symbol | Table name used to look up the registered manipulation function | +| `x` | table | Table data to transform | + +**Returns** — modified table, or original table unmodified if no manipulation is registered or the function throws. + +Manipulations are registered in the module-internal `savedownmanipulation` dictionary (`` tabname → unary function ``). This dictionary is module-bound and populated by process initialisation code before EOD. + +```q +data:dbwrite.manipulate[`trade;data] +``` + +--- + +### `postreplay[d;p]` + +Post-EOD stub called after all tables have been written and sorted. + +**Parameters** + +| Parameter | Type | Description | +|---|---|---| +| `d` | hsym | HDB directory | +| `p` | date | Partition value | + +**Returns** — generic null. + +This is a no-op by default. Override at the call site to add custom post-replay logic. + +```q +dbwrite.postreplay[`:hdb;2024.01.02] +``` + +--- + +### `gc[]` + +Runs `.Q.gc[]` and logs before/after memory statistics. + +**Returns** — generic null. + +Emits two `info`-level log lines: memory stats before collection, and bytes recovered plus memory stats after. + +```q +dbwrite.gc[] +``` + +--- + +## Running tests + +```q +k4unit:use`di.k4unit +k4unit.moduletest`di.dbwrite +``` + +Tests cover: dependency injection, `loadconfig` validation, `applyattr` on valid and missing paths, `sort` with explicit config / default fallback / no-config skip, `manipulate` pass-through, and `postreplay` stub. + +--- + +## Exported symbols + +```q +export:([init;sort;applyattr;loadconfig;manipulate;postreplay;gc]) +``` diff --git a/di/dbwrite/dbwrite.q b/di/dbwrite/dbwrite.q new file mode 100644 index 00000000..5915529c --- /dev/null +++ b/di/dbwrite/dbwrite.q @@ -0,0 +1,100 @@ +/ sort params table - populated by loadconfig +params:([] tabname:`symbol$(); att:`symbol$(); column:`symbol$(); sort:`boolean$()); + +/ save-down manipulation registry: tabname -> unary function +savedownmanipulation:()!(); + +/ default sort params - used when params is empty and no config file is set +defaultparams:([] tabname:enlist`default; att:enlist`; column:enlist`time; sort:enlist 1b); + +/ load and validate sort.csv into .z.M.params +/ file: hsym path; null falls back to .z.M.defaultfile +loadconfig:{[file] + file:hsym file; + if[null file;file:.z.M.defaultfile]; + if[null file; + dp:.z.M.defaultparams; + .z.m.loginfo[`dbwrite;"no sort config file set; using defaultparams"]; + @[.z.M;`params;:;dp]; + :[]]; + p:@[ + {.z.m.loginfo[`dbwrite;"retrieving sort settings from ",string x];("SSSB";enlist",")0:x}; + file; + {[f;e]'"failed to open ",string[f],": ",e}[file] + ]; + if[not all spcb:(spc:cols p) in `tabname`att`column`sort; + '"unrecognised columns (",(", " sv string spc where not spcb),") in ",string file]; + if[not all atb:(at:distinct p`att) in ``p`s`g`u; + '"unrecognised attribute(s): ",", " sv string at where not atb]; + @[.z.M;`params;:;p]; + }; + +/ apply a single kdb+ attribute to an on-disk column; logs and swallows errors +applyattr:{[dloc;colname;att] + .z.m.loginfo[`dbwrite;"applying ",string[att]," attr to ",string[colname]," in ",string dloc]; + .[{@[x;y;z#]};(dloc;colname;att); + {[dloc;colname;att;e] + .z.m.logerr[`dbwrite;"unable to apply ",string[att]," attr to ",string[colname]," in ",string[dloc],": ",e] + }[dloc;colname;att] + ]; + }; + +/ sort an on-disk table partition and apply attributes per sort.csv config +/ d: tabname | (tabname;dir) | (tabname;list of dirs) +sort:{[d] + if[0=count .z.M.params;.z.M.loadconfig .z.M.defaultfile]; + .z.m.loginfo[`dbwrite;"sorting ",(st:string t:first d)," table"]; + sp:$[count tabsp:select from .z.M.params where tabname=t; + [.z.m.loginfo[`dbwrite;"sort params found for: ",st];tabsp]; + count defsp:select from .z.M.params where tabname=`default; + [.z.m.logwarn[`dbwrite;"no sort params for: ",st,"; using defaults"];defsp]; + [.z.m.logwarn[`dbwrite;"no sort params for: ",st,"; skipping sort"];:()]]; + {[sp;dloc] + if[count sortcols:exec column from sp where sort,not null column; + .z.m.loginfo[`dbwrite;"sorting ",string[dloc]," by: ",", " sv string sortcols]; + .[xasc;(sortcols;dloc); + {[sc;dl;e] + .z.m.logerr[`dbwrite;"failed to sort ",string[dl]," by ",(", " sv string sc),": ",e] + }[sortcols;dloc]]]; + if[count attrcols:select column,att from sp where not null att; + .z.M.applyattr[dloc;;]'[attrcols`column;attrcols`att]]; + }[sp] each distinct (),last d; + .z.m.loginfo[`dbwrite;"finished sorting ",st," table"]; + }; + +/ apply registered pre-write manipulation to table x of type t +/ returns modified table; on error logs and returns original unmodified table +manipulate:{[t;x] + $[t in key .z.M.savedownmanipulation; + @[.z.M.savedownmanipulation[t];x; + {[x;e].z.m.logerr[`dbwrite;"save-down manipulation failed: ",e];x}[x]]; + x] + }; + +/ post-EOD hook - called after all tables written and sorted +/ d: hdb directory (hsym), p: partition value (date) +/ stub: override at the call site to add custom post-replay logic +postreplay:{[d;p]}; + +/ format current process memory stats as a loggable string +memstats:{"mem stats: ",{"; "sv "=" sv'flip(string key x;(string value x),\:" MB")}`long$.Q.w[]%1048576}; + +/ run .Q.gc[] and log before/after memory stats +gc:{ + .z.m.loginfo[`dbwrite;"starting garbage collect. ",.z.M.memstats[]]; + r:.Q.gc[]; + .z.m.loginfo[`dbwrite;"garbage collection returned ",(string `long$r%1048576),"MB. ",.z.M.memstats[]] + }; + +init:{[config;deps] + / deps: `log!(logdict) + / `log: `info`warn`error!(infofunc;warnfunc;errfunc) - required + / example: dbwrite.init[enlist[`log]!enlist logdep] + logdict:$[99h=type deps;$[(`log in key deps) and not (::)~deps`log;deps`log;()!()];()!()]; + if[not count logdict; + '"di.dbwrite: log dependency is required; pass `info`warn`error functions - see di.log or refer to confluence documentation"; + ]; + .z.m.loginfo:logdict`info; + .z.m.logwarn:logdict`warn; + .z.m.logerr:logdict`error; + }; \ No newline at end of file diff --git a/di/dbwrite/init.q b/di/dbwrite/init.q new file mode 100644 index 00000000..dcca8127 --- /dev/null +++ b/di/dbwrite/init.q @@ -0,0 +1,9 @@ +/ dbwrite module - sort, attribute application, save-down manipulation, and GC utilities +/ used by processes that persist data to disk (rdb, wdb, tickerlogreplay) + +\l ::dbwrite.q + +/ default sort config file - set directly or extend init with a config dep +defaultfile:`; + +export:([init;sort;applyattr;loadconfig;manipulate;postreplay;gc]) \ No newline at end of file diff --git a/di/dbwrite/test.csv b/di/dbwrite/test.csv new file mode 100644 index 00000000..53265fc1 --- /dev/null +++ b/di/dbwrite/test.csv @@ -0,0 +1,101 @@ +action,ms,bytes,lang,code,repeat,minver,comment +/ Pre-test set-up: load module and mock loggers, and initialise module with mocks +before,0,0,q,dbwrite:use`di.dbwrite,1,,load di.dbwrite module +before,0,0,q,logcount:0,1,,initialise log call counter +before,0,0,q,loginfo:{[c;m] logcount::logcount+1},1,,mock info logger +before,0,0,q,logwarn:{[c;m] logcount::logcount+1},1,,mock warn logger +before,0,0,q,logerr:{[c;m] logcount::logcount+1},1,,mock error logger +before,0,0,q,"logdep:`info`warn`error!(loginfo;logwarn;logerr)",1,,build log dep dict +before,0,0,q,"deps:(enlist`log)!enlist logdep",1,,wrap in deps dict +before,0,0,q,dbwrite.init[(::);deps],1,,initialise module with mock loggers + +/ Test 1: init wires injected logger into individual loginfo/logwarn/logerr slots +true,0,0,q,logdep[`info]~.m.di.0dbwrite.loginfo,1,1,injected info function stored +true,0,0,q,logdep[`warn]~.m.di.0dbwrite.logwarn,1,1,injected warn function stored +true,0,0,q,logdep[`error]~.m.di.0dbwrite.logerr,1,1,injected error function stored + +/ Test 2: init errors when log dep not provided +fail,0,0,q,dbwrite.init[(::);(::)],1,1,init without log dep throws an error +fail,0,0,q,dbwrite.init[(::);`log!(::)],1,1,init with log set to (::) throws an error + +/ Test 3: manipulate: error cases +run,0,0,q,"tbl:([]sym:`AAPL`IBM;price:100 200f)",1,,sample table +true,0,0,q,"tbl~dbwrite.manipulate[`trade;tbl]",1,,unregistered table name +true,0,0,q,"tbl~dbwrite.manipulate[`;tbl]",1,,null table name +true,0,0,q,"(0#tbl)~dbwrite.manipulate[`other;0#tbl]",1,,empty table + +/ Test 4 postreplay: returns generic null +true,0,0,q,(::)~dbwrite.postreplay[`:hdb;2024.01.01],1,,stub returns generic null + +/ Test 5: sort uses defaultparams (sort by time) when params is empty and defaultfile is null +run,0,0,q,`:dbwrite_defp_tp/.d set `time`sym,1,,write column order file +run,0,0,q,`:dbwrite_defp_tp/time set 2024.01.01D09:00:00.000000000 2024.01.01D08:00:00.000000000,1,,write unsorted time column +run,0,0,q,`:dbwrite_defp_tp/sym set `IBM`AAPL,1,,write sym column +run,0,0,q,"dbwrite.sort[(`anytable;`:dbwrite_defp_tp/)]",1,,sort using defaultparams (empty params + null defaultfile) +true,0,0,q,(asc exec time from get `:dbwrite_defp_tp/)~exec time from get `:dbwrite_defp_tp/,1,,time column sorted ascending by defaultparams +run,0,0,q,@[hdel;`:dbwrite_defp_tp/;{}],1,,cleanup test partition + +/ Test 6: loadconfig: valid config file is loaded and parsed correctly +run,0,0,q,cfgfile:`:dbwrite_test.csv,1,,temp sort config path +run,0,0,q,"cfgfile 0:(""tabname,att,column,sort"";""trade,p,sym,1"";""trade,,price,0"")",1,,write test sort config +run,0,0,q,dbwrite.loadconfig[cfgfile],1,,loadconfig with valid csv succeeds + +/ Test 6 loadconfig - invalid config files +true,0,0,q,@[dbwrite.loadconfig;`:nonexistent_file_xyz;{1b}],1,,nonexistent file throws +run,0,0,q,badcols:`:dbwrite_badcols.csv,1,, +run,0,0,q,"badcols 0:(""wrongcol,att,column,sort"";""trade,p,sym,1"")",1,,csv with unrecognised column name +true,0,0,q,@[dbwrite.loadconfig;badcols;{1b}],1,,unrecognised column throws +run,0,0,q,badatt:`:dbwrite_badatt.csv,1,, +run,0,0,q,"badatt 0:(""tabname,att,column,sort"";""trade,z,sym,1"")",1,,csv with unrecognised attribute +true,0,0,q,@[dbwrite.loadconfig;badatt;{1b}],1,,unrecognised attribute throws + +/ Clean-up config files after loadconfig tests +run,0,0,q,@[hdel;cfgfile;{}],1,,remove temp sort config +run,0,0,q,@[hdel;badcols;{}],1,,remove temp bad columns file +run,0,0,q,@[hdel;badatt;{}],1,,remove temp bad attribute file +comment,,,,,,,--- manipulate: registered function path not testable (savedownmanipulation in .z.M is module-bound and not settable from test scope) --- + +/ Test 7: Applyattr - error cases +run,0,0,q,logcount:0,1,,reset counter +run,0,0,q,"dbwrite.applyattr[`:nonexist_attr_dir/;`sym;`p]",1,,apply to non-existent path +true,0,0,q,2=logcount,1,,loginfo (before attempt) and logerr (on failure) each called once + +/ Test 8 - Applyattr - attribute application and logging when config entry exists for table +run,0,0,q,`:dbwrite_attr_tp/.d set `sym`price,1,,write column order file +run,0,0,q,`:dbwrite_attr_tp/sym set `IBM`AAPL`MSFT,1,,write sym column +run,0,0,q,`:dbwrite_attr_tp/price set 200 100 300f,1,,write price column +run,0,0,q,logcount:0,1,,reset counter +run,0,0,q,"dbwrite.applyattr[`:dbwrite_attr_tp/;`sym;`p]",1,,apply p attr to sym column +true,0,0,q,1=logcount,1,,loginfo called once (no error) +true,0,0,q,`p=attr get `:dbwrite_attr_tp/sym,1,,p attribute applied on disk +run,0,0,q,@[hdel;`:dbwrite_attr_tp/;{}],1,,cleanup test partition + +/ Test 9 - sort: error cases +run,0,0,q,logcount:0,1,,reset counter +true,0,0,q,()~dbwrite.sort[`no_params_table_xyz],1,,returns () when table not in config +true,0,0,q,2=logcount,1,,loginfo (sorting start) and logwarn (skip) both called + +/ Test 10 - sort: sorting and logging when config entry exists for table +run,0,0,q,`:dbwrite_sort_tp/.d set `sym`price,1,,write column order file +run,0,0,q,`:dbwrite_sort_tp/sym set `IBM`AAPL`MSFT,1,,write unsorted sym column +run,0,0,q,`:dbwrite_sort_tp/price set 200 100 300f,1,,write price column +run,0,0,q,"dbwrite.sort[(`trade;`:dbwrite_sort_tp/)]",1,,sort trade table in test partition +true,0,0,q,`AAPL`IBM`MSFT~exec sym from get `:dbwrite_sort_tp/,1,,sorted ascending by sym +true,0,0,q,`p=attr get `:dbwrite_sort_tp/sym,1,,p attribute applied to sym on disk +run,0,0,q,@[hdel;`:dbwrite_sort_tp/;{}],1,,cleanup sort test partition + +/ Test 11 - sort: sorting with default config entry when no specific entry for table +run,0,0,q,defcfg:`:dbwrite_defcfg.csv,1,, +run,0,0,q,"defcfg 0:(""tabname,att,column,sort"";""default,,sym,1"")",1,,config with default entry only +run,0,0,q,dbwrite.loadconfig[defcfg],1,,load default-only config +run,0,0,q,`:dbwrite_def_tp/.d set `sym`price,1,,write column order file +run,0,0,q,`:dbwrite_def_tp/sym set `IBM`AAPL,1,,write unsorted sym column +run,0,0,q,`:dbwrite_def_tp/price set 200 100f,1,,write price column +run,0,0,q,logcount:0,1,,reset counter +run,0,0,q,"dbwrite.sort[(`othertable;`:dbwrite_def_tp/)]",1,,sort using default fallback +true,0,0,q,`AAPL`IBM~exec sym from get `:dbwrite_def_tp/,1,,sorted ascending by sym via default params +true,0,0,q,4=logcount,1,,loginfo x2 (sorting start + sort-by) + logwarn x1 (using defaults) + loginfo x1 (finished) + +/ Clean-up default config and test partition +after,0,0,q,@[hdel;`:dbwrite_def_tp/;{}],1,,cleanup test partition +after,0,0,q,@[hdel;defcfg;{}],1,,remove default config csv \ No newline at end of file From dac127aa78b00895d4d4ae37fa5640eacc7d9670 Mon Sep 17 00:00:00 2001 From: Ruairi-wq2 Date: Wed, 3 Jun 2026 10:40:36 +0100 Subject: [PATCH 2/7] Update dbwrite.md and dbwrite.q --- di/dbwrite/dbwrite.md | 10 +++++----- di/dbwrite/dbwrite.q | 11 +++++------ 2 files changed, 10 insertions(+), 11 deletions(-) diff --git a/di/dbwrite/dbwrite.md b/di/dbwrite/dbwrite.md index 6d0b450b..4a5d249a 100644 --- a/di/dbwrite/dbwrite.md +++ b/di/dbwrite/dbwrite.md @@ -111,7 +111,7 @@ Validation checks that all four required columns (`tabname`, `att`, `column`, `s dbwrite.loadconfig[`:config/sort.csv] ``` -> **Note:** `defaultfile` is a module-level variable (default: null symbol). Set it in `init.q` to configure a default sort config path, or always pass the path explicitly. When `params` is empty and `defaultfile` is null, `sort` falls back to the built-in `defaultparams` rather than erroring. +> **Note:** `defaultfile` is a module-level variable (default: null symbol). Set it in `init.q` to configure a path that `sort` will auto-load on first use. If no explicit `loadconfig` call is made and `defaultfile` is not set, `sort` falls back to `defaultparams` automatically. --- @@ -135,9 +135,9 @@ Sorts an on-disk table partition and applies configured attributes. **Returns** — generic null on success; `()` if no sort config is found for the table. -If `params` is empty when `sort` is called: -- `defaultfile` is set → `loadconfig[defaultfile]` is called to populate `params`. -- `defaultfile` is null → `params` is populated from the built-in `defaultparams` (sort by `time` ascending, no attribute). +If `params` is empty when `sort` is called, it is auto-populated before the lookup: +1. If `defaultfile` is set, `loadconfig[defaultfile]` is attempted (errors are swallowed). +2. If `params` is still empty after that, the built-in `defaultparams` is used — a single `default` row that sorts by `time` ascending with no attribute. Config lookup order within `params`: 1. Rows where `tabname` matches — used directly. @@ -237,7 +237,7 @@ k4unit:use`di.k4unit k4unit.moduletest`di.dbwrite ``` -Tests cover: dependency injection, `loadconfig` validation, `applyattr` on valid and missing paths, `sort` with explicit config / default fallback / no-config skip, `manipulate` pass-through, and `postreplay` stub. +Tests cover: dependency injection, `loadconfig` validation, `applyattr` on valid and missing paths, `sort` with explicit config / `defaultparams` fallback when no config is loaded / `default` row fallback / no-config skip, `manipulate` pass-through, and `postreplay` stub. --- diff --git a/di/dbwrite/dbwrite.q b/di/dbwrite/dbwrite.q index 5915529c..113bf4b6 100644 --- a/di/dbwrite/dbwrite.q +++ b/di/dbwrite/dbwrite.q @@ -12,11 +12,6 @@ defaultparams:([] tabname:enlist`default; att:enlist`; column:enlist`time; sort: loadconfig:{[file] file:hsym file; if[null file;file:.z.M.defaultfile]; - if[null file; - dp:.z.M.defaultparams; - .z.m.loginfo[`dbwrite;"no sort config file set; using defaultparams"]; - @[.z.M;`params;:;dp]; - :[]]; p:@[ {.z.m.loginfo[`dbwrite;"retrieving sort settings from ",string x];("SSSB";enlist",")0:x}; file; @@ -42,7 +37,11 @@ applyattr:{[dloc;colname;att] / sort an on-disk table partition and apply attributes per sort.csv config / d: tabname | (tabname;dir) | (tabname;list of dirs) sort:{[d] - if[0=count .z.M.params;.z.M.loadconfig .z.M.defaultfile]; + if[0=count select from .z.M.params; + @[{.z.M.loadconfig .z.M.defaultfile};`;{[e]:}]]; + if[0=count select from .z.M.params; + dp:select from .z.M.defaultparams; + @[.z.M;`params;:;dp]]; .z.m.loginfo[`dbwrite;"sorting ",(st:string t:first d)," table"]; sp:$[count tabsp:select from .z.M.params where tabname=t; [.z.m.loginfo[`dbwrite;"sort params found for: ",st];tabsp]; From b8b3914e6f155fe1137e7c688728312300c6bc33 Mon Sep 17 00:00:00 2001 From: Ruairi-wq2 Date: Wed, 3 Jun 2026 14:09:56 +0100 Subject: [PATCH 3/7] Export savedownmanipulation, simplify sort fallback, add loadconfig null guard - Export savedownmanipulation so consumers can register per-table pre-write functions - loadconfig warns and loads defaultparams when called with a null file rather than silently failing - sort falls back to defaultparams directly; removes defaultfile indirection from init.q - Tests: add savedownmanipulation registered function and error recovery cases, loadconfig null case, move all on-disk cleanup to after blocks - Docs updated throughout to match Co-Authored-By: Claude Sonnet 4.6 --- di/dbwrite/dbwrite.md | 37 ++++++++++++++++++--------- di/dbwrite/dbwrite.q | 12 ++++----- di/dbwrite/init.q | 5 +--- di/dbwrite/test.csv | 59 +++++++++++++++++++++++++------------------ 4 files changed, 66 insertions(+), 47 deletions(-) diff --git a/di/dbwrite/dbwrite.md b/di/dbwrite/dbwrite.md index 4a5d249a..ebb61cd1 100644 --- a/di/dbwrite/dbwrite.md +++ b/di/dbwrite/dbwrite.md @@ -61,6 +61,7 @@ Sorts `trade` by `sym`, applies `p` to `sym`. Tables not listed fall back to `de | `loadconfig[file]` | Load and validate the sort config CSV into module state | | `sort[d]` | Sort an on-disk partition and apply attributes per config | | `applyattr[dloc;colname;att]` | Apply a single kdb+ attribute to an on-disk column | +| `savedownmanipulation` | Dict mapping table name → unary function; populate before EOD to register pre-write transformations | | `manipulate[t;x]` | Apply a registered pre-write manipulation to a table | | `postreplay[d;p]` | Post-EOD stub; override to add custom logic | | `gc[]` | Run `.Q.gc[]` and log before/after memory stats | @@ -101,18 +102,18 @@ Loads and validates the sort configuration CSV, storing the result in module sta | Parameter | Type | Description | |---|---|---| -| `file` | hsym | Path to the sort config CSV. Passing a null symbol (`` ` ``) uses the module-level `defaultfile` value | +| `file` | hsym | Path to the sort config CSV; pass null (`` ` ``) to warn and load `defaultparams` instead | -**Returns** — generic null on success; throws on failure. +**Returns** — generic null on success; throws on file/validation failure. Validation checks that all four required columns (`tabname`, `att`, `column`, `sort`) are present and that all `att` values are within `` ``p`s`g`u ``. Throws a descriptive error for invalid files or unreadable paths. +Passing null warns at `warn` level and loads `defaultparams` — it does not throw. + ```q dbwrite.loadconfig[`:config/sort.csv] ``` -> **Note:** `defaultfile` is a module-level variable (default: null symbol). Set it in `init.q` to configure a path that `sort` will auto-load on first use. If no explicit `loadconfig` call is made and `defaultfile` is not set, `sort` falls back to `defaultparams` automatically. - --- ### `sort[d]` @@ -135,11 +136,9 @@ Sorts an on-disk table partition and applies configured attributes. **Returns** — generic null on success; `()` if no sort config is found for the table. -If `params` is empty when `sort` is called, it is auto-populated before the lookup: -1. If `defaultfile` is set, `loadconfig[defaultfile]` is attempted (errors are swallowed). -2. If `params` is still empty after that, the built-in `defaultparams` is used — a single `default` row that sorts by `time` ascending with no attribute. +If `loadconfig` has not been called before `sort` is first invoked, `sort` automatically uses `defaultparams` — a single `default` row that sorts by `time` ascending with no attribute. -Config lookup order within `params`: +Config lookup order within the loaded params: 1. Rows where `tabname` matches — used directly. 2. Rows where `tabname = \`default` — used with a `warn` log. 3. No match — warns and returns `()`. @@ -174,6 +173,19 @@ dbwrite.applyattr[`:hdb/2024.01.02/trade/;`sym;`p] --- +### `savedownmanipulation` + +A dictionary mapping table name (symbol) to a unary manipulation function. Populate this before EOD to register per-table pre-write transformations. + +```q +/ register a manipulation for the trade table +dbwrite.savedownmanipulation[`trade]:{[x] update sym:`p#sym from x} +``` + +Manipulations are called by `manipulate[t;x]`. An empty dict (the default) means no manipulation is applied to any table. + +--- + ### `manipulate[t;x]` Applies a registered pre-write manipulation to table `x` of type `t`. @@ -185,11 +197,12 @@ Applies a registered pre-write manipulation to table `x` of type `t`. | `t` | symbol | Table name used to look up the registered manipulation function | | `x` | table | Table data to transform | -**Returns** — modified table, or original table unmodified if no manipulation is registered or the function throws. +**Returns** — modified table, or original table unmodified if no manipulation is registered for `t` or the registered function throws. -Manipulations are registered in the module-internal `savedownmanipulation` dictionary (`` tabname → unary function ``). This dictionary is module-bound and populated by process initialisation code before EOD. +On error the original table is returned unchanged and the error is logged at `error` level. Register functions in `savedownmanipulation` before calling. ```q +dbwrite.savedownmanipulation[`trade]:{[x] update sym:`p#sym from x} data:dbwrite.manipulate[`trade;data] ``` @@ -237,12 +250,12 @@ k4unit:use`di.k4unit k4unit.moduletest`di.dbwrite ``` -Tests cover: dependency injection, `loadconfig` validation, `applyattr` on valid and missing paths, `sort` with explicit config / `defaultparams` fallback when no config is loaded / `default` row fallback / no-config skip, `manipulate` pass-through, and `postreplay` stub. +Tests cover: dependency injection, `init` error on missing log dep, `manipulate` pass-through and registered function application and error recovery via `savedownmanipulation`, `postreplay` stub, `sort` with `defaultparams` fallback / explicit config / `default` row fallback / no-match skip, `loadconfig` with null file (warns and loads `defaultparams`) / valid file / unrecognised columns / unrecognised attributes / missing file, `applyattr` on missing and valid paths. --- ## Exported symbols ```q -export:([init;sort;applyattr;loadconfig;manipulate;postreplay;gc]) +export:([init;sort;applyattr;loadconfig;manipulate;savedownmanipulation;postreplay;gc]) ``` diff --git a/di/dbwrite/dbwrite.q b/di/dbwrite/dbwrite.q index 113bf4b6..0f0f9d5b 100644 --- a/di/dbwrite/dbwrite.q +++ b/di/dbwrite/dbwrite.q @@ -8,10 +8,13 @@ savedownmanipulation:()!(); defaultparams:([] tabname:enlist`default; att:enlist`; column:enlist`time; sort:enlist 1b); / load and validate sort.csv into .z.M.params -/ file: hsym path; null falls back to .z.M.defaultfile +/ file: hsym path; null warns and loads defaultparams instead loadconfig:{[file] file:hsym file; - if[null file;file:.z.M.defaultfile]; + if[null file; + .z.m.logwarn[`dbwrite;"loadconfig called with no file; using defaultparams"]; + @[.z.M;`params;:;.z.M.defaultparams]; + :]; p:@[ {.z.m.loginfo[`dbwrite;"retrieving sort settings from ",string x];("SSSB";enlist",")0:x}; file; @@ -38,10 +41,7 @@ applyattr:{[dloc;colname;att] / d: tabname | (tabname;dir) | (tabname;list of dirs) sort:{[d] if[0=count select from .z.M.params; - @[{.z.M.loadconfig .z.M.defaultfile};`;{[e]:}]]; - if[0=count select from .z.M.params; - dp:select from .z.M.defaultparams; - @[.z.M;`params;:;dp]]; + @[.z.M;`params;:;.z.M.defaultparams]]; .z.m.loginfo[`dbwrite;"sorting ",(st:string t:first d)," table"]; sp:$[count tabsp:select from .z.M.params where tabname=t; [.z.m.loginfo[`dbwrite;"sort params found for: ",st];tabsp]; diff --git a/di/dbwrite/init.q b/di/dbwrite/init.q index dcca8127..bd2cd900 100644 --- a/di/dbwrite/init.q +++ b/di/dbwrite/init.q @@ -3,7 +3,4 @@ \l ::dbwrite.q -/ default sort config file - set directly or extend init with a config dep -defaultfile:`; - -export:([init;sort;applyattr;loadconfig;manipulate;postreplay;gc]) \ No newline at end of file +export:([init;sort;applyattr;loadconfig;manipulate;savedownmanipulation;postreplay;gc]) \ No newline at end of file diff --git a/di/dbwrite/test.csv b/di/dbwrite/test.csv index 53265fc1..fc1dfee5 100644 --- a/di/dbwrite/test.csv +++ b/di/dbwrite/test.csv @@ -18,29 +18,40 @@ true,0,0,q,logdep[`error]~.m.di.0dbwrite.logerr,1,1,injected error function stor fail,0,0,q,dbwrite.init[(::);(::)],1,1,init without log dep throws an error fail,0,0,q,dbwrite.init[(::);`log!(::)],1,1,init with log set to (::) throws an error -/ Test 3: manipulate: error cases +/ Test 3: manipulate - pass-through when no function registered run,0,0,q,"tbl:([]sym:`AAPL`IBM;price:100 200f)",1,,sample table -true,0,0,q,"tbl~dbwrite.manipulate[`trade;tbl]",1,,unregistered table name -true,0,0,q,"tbl~dbwrite.manipulate[`;tbl]",1,,null table name -true,0,0,q,"(0#tbl)~dbwrite.manipulate[`other;0#tbl]",1,,empty table +true,0,0,q,"tbl~dbwrite.manipulate[`trade;tbl]",1,,unregistered table name returns original +true,0,0,q,"tbl~dbwrite.manipulate[`;tbl]",1,,null table name returns original +true,0,0,q,"(0#tbl)~dbwrite.manipulate[`other;0#tbl]",1,,empty table returned unchanged -/ Test 4 postreplay: returns generic null +/ Test 4: manipulate - registered function is applied via savedownmanipulation +run,0,0,q,"dbwrite.savedownmanipulation[`trade]:{[x] update price:price*2 from x}",1,,register manipulation function for trade +true,0,0,q,"(update price:price*2 from tbl)~dbwrite.manipulate[`trade;tbl]",1,,registered function applied to table +run,0,0,q,"dbwrite.savedownmanipulation[`errortable]:{[x] '`testfail}",1,,register function that throws +true,0,0,q,"tbl~dbwrite.manipulate[`errortable;tbl]",1,,returns original table when registered function throws +run,0,0,q,"dbwrite.savedownmanipulation:()!()",1,,clear registered manipulation functions + +/ Test 5: postreplay returns generic null true,0,0,q,(::)~dbwrite.postreplay[`:hdb;2024.01.01],1,,stub returns generic null -/ Test 5: sort uses defaultparams (sort by time) when params is empty and defaultfile is null +/ Test 6: sort uses defaultparams (sort by time) when no loadconfig has been called run,0,0,q,`:dbwrite_defp_tp/.d set `time`sym,1,,write column order file run,0,0,q,`:dbwrite_defp_tp/time set 2024.01.01D09:00:00.000000000 2024.01.01D08:00:00.000000000,1,,write unsorted time column run,0,0,q,`:dbwrite_defp_tp/sym set `IBM`AAPL,1,,write sym column run,0,0,q,"dbwrite.sort[(`anytable;`:dbwrite_defp_tp/)]",1,,sort using defaultparams (empty params + null defaultfile) true,0,0,q,(asc exec time from get `:dbwrite_defp_tp/)~exec time from get `:dbwrite_defp_tp/,1,,time column sorted ascending by defaultparams -run,0,0,q,@[hdel;`:dbwrite_defp_tp/;{}],1,,cleanup test partition -/ Test 6: loadconfig: valid config file is loaded and parsed correctly +/ Test 7: loadconfig - null file warns and loads defaultparams +run,0,0,q,logcount:0,1,,reset counter +run,0,0,q,dbwrite.loadconfig[`],1,,loadconfig with null file +true,0,0,q,1=logcount,1,,logwarn called once + +/ Test 8: loadconfig - valid config file is loaded and parsed correctly run,0,0,q,cfgfile:`:dbwrite_test.csv,1,,temp sort config path run,0,0,q,"cfgfile 0:(""tabname,att,column,sort"";""trade,p,sym,1"";""trade,,price,0"")",1,,write test sort config run,0,0,q,dbwrite.loadconfig[cfgfile],1,,loadconfig with valid csv succeeds -/ Test 6 loadconfig - invalid config files +/ Test 7 cont: loadconfig - invalid config files true,0,0,q,@[dbwrite.loadconfig;`:nonexistent_file_xyz;{1b}],1,,nonexistent file throws run,0,0,q,badcols:`:dbwrite_badcols.csv,1,, run,0,0,q,"badcols 0:(""wrongcol,att,column,sort"";""trade,p,sym,1"")",1,,csv with unrecognised column name @@ -49,18 +60,12 @@ run,0,0,q,badatt:`:dbwrite_badatt.csv,1,, run,0,0,q,"badatt 0:(""tabname,att,column,sort"";""trade,z,sym,1"")",1,,csv with unrecognised attribute true,0,0,q,@[dbwrite.loadconfig;badatt;{1b}],1,,unrecognised attribute throws -/ Clean-up config files after loadconfig tests -run,0,0,q,@[hdel;cfgfile;{}],1,,remove temp sort config -run,0,0,q,@[hdel;badcols;{}],1,,remove temp bad columns file -run,0,0,q,@[hdel;badatt;{}],1,,remove temp bad attribute file -comment,,,,,,,--- manipulate: registered function path not testable (savedownmanipulation in .z.M is module-bound and not settable from test scope) --- - -/ Test 7: Applyattr - error cases +/ Test 9: applyattr - error cases run,0,0,q,logcount:0,1,,reset counter run,0,0,q,"dbwrite.applyattr[`:nonexist_attr_dir/;`sym;`p]",1,,apply to non-existent path true,0,0,q,2=logcount,1,,loginfo (before attempt) and logerr (on failure) each called once -/ Test 8 - Applyattr - attribute application and logging when config entry exists for table +/ Test 10: applyattr - attribute application and logging when path exists run,0,0,q,`:dbwrite_attr_tp/.d set `sym`price,1,,write column order file run,0,0,q,`:dbwrite_attr_tp/sym set `IBM`AAPL`MSFT,1,,write sym column run,0,0,q,`:dbwrite_attr_tp/price set 200 100 300f,1,,write price column @@ -68,23 +73,21 @@ run,0,0,q,logcount:0,1,,reset counter run,0,0,q,"dbwrite.applyattr[`:dbwrite_attr_tp/;`sym;`p]",1,,apply p attr to sym column true,0,0,q,1=logcount,1,,loginfo called once (no error) true,0,0,q,`p=attr get `:dbwrite_attr_tp/sym,1,,p attribute applied on disk -run,0,0,q,@[hdel;`:dbwrite_attr_tp/;{}],1,,cleanup test partition -/ Test 9 - sort: error cases +/ Test 11: sort - skip when table not in config run,0,0,q,logcount:0,1,,reset counter true,0,0,q,()~dbwrite.sort[`no_params_table_xyz],1,,returns () when table not in config true,0,0,q,2=logcount,1,,loginfo (sorting start) and logwarn (skip) both called -/ Test 10 - sort: sorting and logging when config entry exists for table +/ Test 12: sort - sorting and attribute application when config entry exists for table run,0,0,q,`:dbwrite_sort_tp/.d set `sym`price,1,,write column order file run,0,0,q,`:dbwrite_sort_tp/sym set `IBM`AAPL`MSFT,1,,write unsorted sym column run,0,0,q,`:dbwrite_sort_tp/price set 200 100 300f,1,,write price column run,0,0,q,"dbwrite.sort[(`trade;`:dbwrite_sort_tp/)]",1,,sort trade table in test partition true,0,0,q,`AAPL`IBM`MSFT~exec sym from get `:dbwrite_sort_tp/,1,,sorted ascending by sym true,0,0,q,`p=attr get `:dbwrite_sort_tp/sym,1,,p attribute applied to sym on disk -run,0,0,q,@[hdel;`:dbwrite_sort_tp/;{}],1,,cleanup sort test partition -/ Test 11 - sort: sorting with default config entry when no specific entry for table +/ Test 13: sort - default config row used as fallback when no specific entry for table run,0,0,q,defcfg:`:dbwrite_defcfg.csv,1,, run,0,0,q,"defcfg 0:(""tabname,att,column,sort"";""default,,sym,1"")",1,,config with default entry only run,0,0,q,dbwrite.loadconfig[defcfg],1,,load default-only config @@ -96,6 +99,12 @@ run,0,0,q,"dbwrite.sort[(`othertable;`:dbwrite_def_tp/)]",1,,sort using default true,0,0,q,`AAPL`IBM~exec sym from get `:dbwrite_def_tp/,1,,sorted ascending by sym via default params true,0,0,q,4=logcount,1,,loginfo x2 (sorting start + sort-by) + logwarn x1 (using defaults) + loginfo x1 (finished) -/ Clean-up default config and test partition -after,0,0,q,@[hdel;`:dbwrite_def_tp/;{}],1,,cleanup test partition -after,0,0,q,@[hdel;defcfg;{}],1,,remove default config csv \ No newline at end of file +/ Clean up all created on-disk files and directories +after,0,0,q,@[hdel;`:dbwrite_defp_tp/;{}],1,,cleanup defaultparams test partition +after,0,0,q,@[hdel;cfgfile;{}],1,,remove temp sort config +after,0,0,q,@[hdel;badcols;{}],1,,remove temp bad columns file +after,0,0,q,@[hdel;badatt;{}],1,,remove temp bad attribute file +after,0,0,q,@[hdel;`:dbwrite_attr_tp/;{}],1,,cleanup attr test partition +after,0,0,q,@[hdel;`:dbwrite_sort_tp/;{}],1,,cleanup sort test partition +after,0,0,q,@[hdel;`:dbwrite_def_tp/;{}],1,,cleanup default config test partition +after,0,0,q,@[hdel;defcfg;{}],1,,remove default config csv From 4c0099e16943188a3e0cbaca6b9e3cc58d734bad Mon Sep 17 00:00:00 2001 From: Ruairi-wq2 Date: Wed, 3 Jun 2026 15:28:36 +0100 Subject: [PATCH 4/7] Add null/type guards, comprehensive edge-case tests, fix sort empty-input crash - loadconfig: explicit symbol type check with clear error before null check - applyattr: guard against invalid att (must be in `p`s`g`u), null colname guard - sort: type check for d (must be symbol or list); empty list returns () safely - test.csv: 78 tests covering wrong types, nulls, empty inputs across all functions Co-Authored-By: Claude Sonnet 4.6 --- di/dbwrite/dbwrite.md | 9 ++-- di/dbwrite/dbwrite.q | 103 +++++++++++++++++++++++------------------- di/dbwrite/test.csv | 88 ++++++++++++++++++++++-------------- 3 files changed, 115 insertions(+), 85 deletions(-) diff --git a/di/dbwrite/dbwrite.md b/di/dbwrite/dbwrite.md index ebb61cd1..651ee3b2 100644 --- a/di/dbwrite/dbwrite.md +++ b/di/dbwrite/dbwrite.md @@ -61,7 +61,7 @@ Sorts `trade` by `sym`, applies `p` to `sym`. Tables not listed fall back to `de | `loadconfig[file]` | Load and validate the sort config CSV into module state | | `sort[d]` | Sort an on-disk partition and apply attributes per config | | `applyattr[dloc;colname;att]` | Apply a single kdb+ attribute to an on-disk column | -| `savedownmanipulation` | Dict mapping table name → unary function; populate before EOD to register pre-write transformations | +| `savedownmanipulation` | Dict mapping table name → unary function; amend to register pre-write transformations | | `manipulate[t;x]` | Apply a registered pre-write manipulation to a table | | `postreplay[d;p]` | Post-EOD stub; override to add custom logic | | `gc[]` | Run `.Q.gc[]` and log before/after memory stats | @@ -175,14 +175,13 @@ dbwrite.applyattr[`:hdb/2024.01.02/trade/;`sym;`p] ### `savedownmanipulation` -A dictionary mapping table name (symbol) to a unary manipulation function. Populate this before EOD to register per-table pre-write transformations. +A dictionary mapping table name (symbol) to a unary manipulation function. Amend this before EOD to register per-table pre-write transformations. ```q -/ register a manipulation for the trade table dbwrite.savedownmanipulation[`trade]:{[x] update sym:`p#sym from x} ``` -Manipulations are called by `manipulate[t;x]`. An empty dict (the default) means no manipulation is applied to any table. +Manipulations are applied by `manipulate[t;x]`. An empty dict (the default) means no manipulation is applied to any table. --- @@ -250,7 +249,7 @@ k4unit:use`di.k4unit k4unit.moduletest`di.dbwrite ``` -Tests cover: dependency injection, `init` error on missing log dep, `manipulate` pass-through and registered function application and error recovery via `savedownmanipulation`, `postreplay` stub, `sort` with `defaultparams` fallback / explicit config / `default` row fallback / no-match skip, `loadconfig` with null file (warns and loads `defaultparams`) / valid file / unrecognised columns / unrecognised attributes / missing file, `applyattr` on missing and valid paths. +Tests cover: dependency injection, `init` error on missing log dep, `manipulate` pass-through and registered function application and error recovery via `savedownmanipulation`, visibility of registrations, `postreplay` stub, `sort` with `defaultparams` fallback / explicit config / `default` row fallback / no-match skip, `loadconfig` with null file (warns and loads `defaultparams`) / valid file / unrecognised columns / unrecognised attributes / missing file, `applyattr` on missing and valid paths. --- diff --git a/di/dbwrite/dbwrite.q b/di/dbwrite/dbwrite.q index 0f0f9d5b..fbb1df78 100644 --- a/di/dbwrite/dbwrite.q +++ b/di/dbwrite/dbwrite.q @@ -1,64 +1,72 @@ -/ sort params table - populated by loadconfig -params:([] tabname:`symbol$(); att:`symbol$(); column:`symbol$(); sort:`boolean$()); +/ sort params table - default row sorts all tables by time ascending +params:([] tabname:enlist`default; att:enlist`; column:enlist`time; sort:enlist 1b); / save-down manipulation registry: tabname -> unary function savedownmanipulation:()!(); -/ default sort params - used when params is empty and no config file is set -defaultparams:([] tabname:enlist`default; att:enlist`; column:enlist`time; sort:enlist 1b); - / load and validate sort.csv into .z.M.params -/ file: hsym path; null warns and loads defaultparams instead +/ file: hsym path; null warns and resets params to default row loadconfig:{[file] - file:hsym file; + if[not -11h=type file; + '"loadconfig: file must be a symbol, got type ",(string type file)]; if[null file; - .z.m.logwarn[`dbwrite;"loadconfig called with no file; using defaultparams"]; - @[.z.M;`params;:;.z.M.defaultparams]; - :]; - p:@[ - {.z.m.loginfo[`dbwrite;"retrieving sort settings from ",string x];("SSSB";enlist",")0:x}; - file; - {[f;e]'"failed to open ",string[f],": ",e}[file] - ]; - if[not all spcb:(spc:cols p) in `tabname`att`column`sort; - '"unrecognised columns (",(", " sv string spc where not spcb),") in ",string file]; - if[not all atb:(at:distinct p`att) in ``p`s`g`u; - '"unrecognised attribute(s): ",", " sv string at where not atb]; - @[.z.M;`params;:;p]; + .z.m.logwarn[`dbwrite;"loadconfig called with no file; resetting params to default"]; + @[.z.M;`params;:;([] tabname:enlist`default; att:enlist`; column:enlist`time; sort:enlist 1b)]]; + if[not null file; + file:hsym file; + p:@[ + {.z.m.loginfo[`dbwrite;"retrieving sort settings from ",string x];("SSSB";enlist",")0:x}; + file; + {[f;e]'"failed to open ",string[f],": ",e}[file] + ]; + if[not all spcb:(spc:cols p) in `tabname`att`column`sort; + '"unrecognised columns (",(", " sv string spc where not spcb),") in ",string file]; + if[not all atb:(at:distinct p`att) in ``p`s`g`u; + '"unrecognised attribute(s): ",", " sv string at where not atb]; + @[.z.M;`params;:;p]]; }; / apply a single kdb+ attribute to an on-disk column; logs and swallows errors applyattr:{[dloc;colname;att] .z.m.loginfo[`dbwrite;"applying ",string[att]," attr to ",string[colname]," in ",string dloc]; - .[{@[x;y;z#]};(dloc;colname;att); - {[dloc;colname;att;e] - .z.m.logerr[`dbwrite;"unable to apply ",string[att]," attr to ",string[colname]," in ",string[dloc],": ",e] - }[dloc;colname;att] - ]; + if[null colname; + .z.m.logerr[`dbwrite;"applyattr called with null column name in ",string dloc]]; + if[not null colname; + $[not att in `p`s`g`u; + .z.m.logerr[`dbwrite;"applyattr: invalid attribute ",string[att]," for ",string[colname]," in ",string dloc]; + .[{@[x;y;z#]};(dloc;colname;att); + {[dloc;colname;att;e] + .z.m.logerr[`dbwrite;"unable to apply ",string[att]," attr to ",string[colname]," in ",string[dloc],": ",e] + }[dloc;colname;att] + ]]]; }; / sort an on-disk table partition and apply attributes per sort.csv config / d: tabname | (tabname;dir) | (tabname;list of dirs) sort:{[d] - if[0=count select from .z.M.params; - @[.z.M;`params;:;.z.M.defaultparams]]; - .z.m.loginfo[`dbwrite;"sorting ",(st:string t:first d)," table"]; - sp:$[count tabsp:select from .z.M.params where tabname=t; - [.z.m.loginfo[`dbwrite;"sort params found for: ",st];tabsp]; - count defsp:select from .z.M.params where tabname=`default; - [.z.m.logwarn[`dbwrite;"no sort params for: ",st,"; using defaults"];defsp]; - [.z.m.logwarn[`dbwrite;"no sort params for: ",st,"; skipping sort"];:()]]; - {[sp;dloc] - if[count sortcols:exec column from sp where sort,not null column; - .z.m.loginfo[`dbwrite;"sorting ",string[dloc]," by: ",", " sv string sortcols]; - .[xasc;(sortcols;dloc); - {[sc;dl;e] - .z.m.logerr[`dbwrite;"failed to sort ",string[dl]," by ",(", " sv string sc),": ",e] - }[sortcols;dloc]]]; - if[count attrcols:select column,att from sp where not null att; - .z.M.applyattr[dloc;;]'[attrcols`column;attrcols`att]]; - }[sp] each distinct (),last d; - .z.m.loginfo[`dbwrite;"finished sorting ",st," table"]; + $[not count d; + (); + not (type d) in -11 0 11h; + [.z.m.logerr[`dbwrite;"sort: d must be a symbol or list, got type ",(string type d)];()]; + [ + .z.m.loginfo[`dbwrite;"sorting ",(st:string t:first d)," table"]; + sp:$[count tabsp:select from .z.M.params where tabname=t; + [.z.m.loginfo[`dbwrite;"sort params found for: ",st];tabsp]; + count defsp:select from .z.M.params where tabname=`default; + [.z.m.logwarn[`dbwrite;"no sort params for: ",st,"; using defaults"];defsp]; + [.z.m.logwarn[`dbwrite;"no sort params for: ",st,"; skipping sort"];:()]]; + {[sp;dloc] + if[count sortcols:exec column from sp where sort,not null column; + .z.m.loginfo[`dbwrite;"sorting ",string[dloc]," by: ",", " sv string sortcols]; + .[xasc;(sortcols;dloc); + {[sc;dl;e] + .z.m.logerr[`dbwrite;"failed to sort ",string[dl]," by ",(", " sv string sc),": ",e] + }[sortcols;dloc]]]; + if[count attrcols:select column,att from sp where not null att; + .z.M.applyattr[dloc;;]'[attrcols`column;attrcols`att]]; + }[sp] each distinct (),last d; + .z.m.loginfo[`dbwrite;"finished sorting ",st," table"] + ]] }; / apply registered pre-write manipulation to table x of type t @@ -76,7 +84,7 @@ manipulate:{[t;x] postreplay:{[d;p]}; / format current process memory stats as a loggable string -memstats:{"mem stats: ",{"; "sv "=" sv'flip(string key x;(string value x),\:" MB")}`long$.Q.w[]%1048576}; +memstats:{[]"mem stats: ",{"; "sv "=" sv'flip(string key x;(string value x),\:" MB")}`long$.Q.w[]%1048576}; / run .Q.gc[] and log before/after memory stats gc:{ @@ -86,9 +94,10 @@ gc:{ }; init:{[config;deps] + / config: dict with optional keys + / `savedownmanipulation: tabname!function dict of pre-write manipulation functions / deps: `log!(logdict) / `log: `info`warn`error!(infofunc;warnfunc;errfunc) - required - / example: dbwrite.init[enlist[`log]!enlist logdep] logdict:$[99h=type deps;$[(`log in key deps) and not (::)~deps`log;deps`log;()!()];()!()]; if[not count logdict; '"di.dbwrite: log dependency is required; pass `info`warn`error functions - see di.log or refer to confluence documentation"; @@ -96,4 +105,4 @@ init:{[config;deps] .z.m.loginfo:logdict`info; .z.m.logwarn:logdict`warn; .z.m.logerr:logdict`error; - }; \ No newline at end of file + }; diff --git a/di/dbwrite/test.csv b/di/dbwrite/test.csv index fc1dfee5..6eec3123 100644 --- a/di/dbwrite/test.csv +++ b/di/dbwrite/test.csv @@ -18,68 +18,86 @@ true,0,0,q,logdep[`error]~.m.di.0dbwrite.logerr,1,1,injected error function stor fail,0,0,q,dbwrite.init[(::);(::)],1,1,init without log dep throws an error fail,0,0,q,dbwrite.init[(::);`log!(::)],1,1,init with log set to (::) throws an error -/ Test 3: manipulate - pass-through when no function registered +/ Test 3: manipulate - pass-through cases (no functions registered) run,0,0,q,"tbl:([]sym:`AAPL`IBM;price:100 200f)",1,,sample table true,0,0,q,"tbl~dbwrite.manipulate[`trade;tbl]",1,,unregistered table name returns original true,0,0,q,"tbl~dbwrite.manipulate[`;tbl]",1,,null table name returns original true,0,0,q,"(0#tbl)~dbwrite.manipulate[`other;0#tbl]",1,,empty table returned unchanged +true,0,0,q,42~dbwrite.manipulate[`notregistered;42],1,,non-table input returns unchanged when unregistered -/ Test 4: manipulate - registered function is applied via savedownmanipulation -run,0,0,q,"dbwrite.savedownmanipulation[`trade]:{[x] update price:price*2 from x}",1,,register manipulation function for trade -true,0,0,q,"(update price:price*2 from tbl)~dbwrite.manipulate[`trade;tbl]",1,,registered function applied to table -run,0,0,q,"dbwrite.savedownmanipulation[`errortable]:{[x] '`testfail}",1,,register function that throws -true,0,0,q,"tbl~dbwrite.manipulate[`errortable;tbl]",1,,returns original table when registered function throws -run,0,0,q,"dbwrite.savedownmanipulation:()!()",1,,clear registered manipulation functions - -/ Test 5: postreplay returns generic null +/ Test 4: postreplay returns generic null true,0,0,q,(::)~dbwrite.postreplay[`:hdb;2024.01.01],1,,stub returns generic null +true,0,0,q,(::)~dbwrite.postreplay[`;0Nd],1,,null args return generic null -/ Test 6: sort uses defaultparams (sort by time) when no loadconfig has been called +/ Test 5: sort uses default row when table name not in config run,0,0,q,`:dbwrite_defp_tp/.d set `time`sym,1,,write column order file run,0,0,q,`:dbwrite_defp_tp/time set 2024.01.01D09:00:00.000000000 2024.01.01D08:00:00.000000000,1,,write unsorted time column run,0,0,q,`:dbwrite_defp_tp/sym set `IBM`AAPL,1,,write sym column -run,0,0,q,"dbwrite.sort[(`anytable;`:dbwrite_defp_tp/)]",1,,sort using defaultparams (empty params + null defaultfile) -true,0,0,q,(asc exec time from get `:dbwrite_defp_tp/)~exec time from get `:dbwrite_defp_tp/,1,,time column sorted ascending by defaultparams +run,0,0,q,"dbwrite.sort[(`anytable;`:dbwrite_defp_tp/)]",1,,sort using default row +true,0,0,q,(asc exec time from get `:dbwrite_defp_tp/)~exec time from get `:dbwrite_defp_tp/,1,,time column sorted ascending by default row -/ Test 7: loadconfig - null file warns and loads defaultparams -run,0,0,q,logcount:0,1,,reset counter -run,0,0,q,dbwrite.loadconfig[`],1,,loadconfig with null file -true,0,0,q,1=logcount,1,,logwarn called once +/ Test 6: loadconfig null resets params to default row +run,0,0,q,dbwrite.loadconfig[`],1,,call loadconfig with null file +true,0,0,q,.m.di.0dbwrite.params~([] tabname:enlist`default; att:enlist`; column:enlist`time; sort:enlist 1b),1,,params reset to default row -/ Test 8: loadconfig - valid config file is loaded and parsed correctly +/ Test 7: loadconfig - valid config file loaded and parsed correctly run,0,0,q,cfgfile:`:dbwrite_test.csv,1,,temp sort config path run,0,0,q,"cfgfile 0:(""tabname,att,column,sort"";""trade,p,sym,1"";""trade,,price,0"")",1,,write test sort config run,0,0,q,dbwrite.loadconfig[cfgfile],1,,loadconfig with valid csv succeeds +true,0,0,q,2=count .m.di.0dbwrite.params,1,,two rows loaded into params +true,0,0,q,all `trade=exec tabname from .m.di.0dbwrite.params,1,,both rows are for trade table -/ Test 7 cont: loadconfig - invalid config files +/ Test 7 cont: loadconfig - invalid and edge-case inputs true,0,0,q,@[dbwrite.loadconfig;`:nonexistent_file_xyz;{1b}],1,,nonexistent file throws +true,0,0,q,@[dbwrite.loadconfig;42;{1b}],1,,non-symbol arg throws run,0,0,q,badcols:`:dbwrite_badcols.csv,1,, run,0,0,q,"badcols 0:(""wrongcol,att,column,sort"";""trade,p,sym,1"")",1,,csv with unrecognised column name true,0,0,q,@[dbwrite.loadconfig;badcols;{1b}],1,,unrecognised column throws run,0,0,q,badatt:`:dbwrite_badatt.csv,1,, run,0,0,q,"badatt 0:(""tabname,att,column,sort"";""trade,z,sym,1"")",1,,csv with unrecognised attribute true,0,0,q,@[dbwrite.loadconfig;badatt;{1b}],1,,unrecognised attribute throws +run,0,0,q,emptycfg:`:dbwrite_empty.csv,1,, +run,0,0,q,"emptycfg 0:enlist""tabname,att,column,sort""",1,,csv with header only +run,0,0,q,dbwrite.loadconfig[emptycfg],1,,loadconfig with header-only csv +true,0,0,q,0=count .m.di.0dbwrite.params,1,,params is empty after header-only csv -/ Test 9: applyattr - error cases +/ Test 8: applyattr - error cases run,0,0,q,logcount:0,1,,reset counter run,0,0,q,"dbwrite.applyattr[`:nonexist_attr_dir/;`sym;`p]",1,,apply to non-existent path true,0,0,q,2=logcount,1,,loginfo (before attempt) and logerr (on failure) each called once -/ Test 10: applyattr - attribute application and logging when path exists +/ Test 8 cont: applyattr - null column name logs error and does not throw run,0,0,q,`:dbwrite_attr_tp/.d set `sym`price,1,,write column order file run,0,0,q,`:dbwrite_attr_tp/sym set `IBM`AAPL`MSFT,1,,write sym column run,0,0,q,`:dbwrite_attr_tp/price set 200 100 300f,1,,write price column run,0,0,q,logcount:0,1,,reset counter +run,0,0,q,"dbwrite.applyattr[`:dbwrite_attr_tp/;`;`p]",1,,apply with null column name +true,0,0,q,2=logcount,1,,loginfo and logerr both called for null column + +/ Test 8 cont: applyattr - invalid att logs error +run,0,0,q,logcount:0,1,,reset counter +run,0,0,q,"dbwrite.applyattr[`:dbwrite_attr_tp/;`sym;`z]",1,,apply with invalid attribute +true,0,0,q,2=logcount,1,,loginfo and logerr both called for invalid att + +/ Test 9: applyattr - attribute applied and logged when path exists +run,0,0,q,logcount:0,1,,reset counter run,0,0,q,"dbwrite.applyattr[`:dbwrite_attr_tp/;`sym;`p]",1,,apply p attr to sym column true,0,0,q,1=logcount,1,,loginfo called once (no error) true,0,0,q,`p=attr get `:dbwrite_attr_tp/sym,1,,p attribute applied on disk -/ Test 11: sort - skip when table not in config +/ Test 10: sort - skip when table not in config and no default row +run,0,0,q,dbwrite.loadconfig[cfgfile],1,,reload trade-only config (no default row) run,0,0,q,logcount:0,1,,reset counter true,0,0,q,()~dbwrite.sort[`no_params_table_xyz],1,,returns () when table not in config true,0,0,q,2=logcount,1,,loginfo (sorting start) and logwarn (skip) both called +true,0,0,q,()~dbwrite.sort[()],1,,empty list returns () + +/ Test 10 cont: sort - wrong type logs error and returns () +run,0,0,q,logcount:0,1,,reset counter +true,0,0,q,()~dbwrite.sort[42],1,,wrong type (long) returns () +true,0,0,q,1=logcount,1,,logerr called once for wrong type -/ Test 12: sort - sorting and attribute application when config entry exists for table +/ Test 11: sort - sorting and attribute application when config entry exists for table run,0,0,q,`:dbwrite_sort_tp/.d set `sym`price,1,,write column order file run,0,0,q,`:dbwrite_sort_tp/sym set `IBM`AAPL`MSFT,1,,write unsorted sym column run,0,0,q,`:dbwrite_sort_tp/price set 200 100 300f,1,,write price column @@ -87,24 +105,28 @@ run,0,0,q,"dbwrite.sort[(`trade;`:dbwrite_sort_tp/)]",1,,sort trade table in tes true,0,0,q,`AAPL`IBM`MSFT~exec sym from get `:dbwrite_sort_tp/,1,,sorted ascending by sym true,0,0,q,`p=attr get `:dbwrite_sort_tp/sym,1,,p attribute applied to sym on disk -/ Test 13: sort - default config row used as fallback when no specific entry for table -run,0,0,q,defcfg:`:dbwrite_defcfg.csv,1,, -run,0,0,q,"defcfg 0:(""tabname,att,column,sort"";""default,,sym,1"")",1,,config with default entry only -run,0,0,q,dbwrite.loadconfig[defcfg],1,,load default-only config -run,0,0,q,`:dbwrite_def_tp/.d set `sym`price,1,,write column order file -run,0,0,q,`:dbwrite_def_tp/sym set `IBM`AAPL,1,,write unsorted sym column -run,0,0,q,`:dbwrite_def_tp/price set 200 100f,1,,write price column +/ Test 12: sort - default row used after resetting params with loadconfig null +run,0,0,q,dbwrite.loadconfig[`],1,,reset params to default row via null loadconfig +run,0,0,q,`:dbwrite_reset_tp/.d set `time`sym,1,,write column order file +run,0,0,q,`:dbwrite_reset_tp/time set 2024.01.01D09:00:00.000000000 2024.01.01D08:00:00.000000000,1,,write unsorted time column +run,0,0,q,`:dbwrite_reset_tp/sym set `IBM`AAPL,1,,write sym column +run,0,0,q,`:dbwrite_reset_tp/price set 200 100f,1,,write price column run,0,0,q,logcount:0,1,,reset counter -run,0,0,q,"dbwrite.sort[(`othertable;`:dbwrite_def_tp/)]",1,,sort using default fallback -true,0,0,q,`AAPL`IBM~exec sym from get `:dbwrite_def_tp/,1,,sorted ascending by sym via default params +run,0,0,q,"dbwrite.sort[(`othertable;`:dbwrite_reset_tp/)]",1,,sort using default row +true,0,0,q,(asc exec time from get `:dbwrite_reset_tp/)~exec time from get `:dbwrite_reset_tp/,1,,sorted ascending by time via default row true,0,0,q,4=logcount,1,,loginfo x2 (sorting start + sort-by) + logwarn x1 (using defaults) + loginfo x1 (finished) +/ Test 13: gc calls loginfo twice and does not throw +run,0,0,q,logcount:0,1,,reset counter +run,0,0,q,dbwrite.gc[],1,,run garbage collect +true,0,0,q,2=logcount,1,,loginfo called twice (start and end) + / Clean up all created on-disk files and directories after,0,0,q,@[hdel;`:dbwrite_defp_tp/;{}],1,,cleanup defaultparams test partition after,0,0,q,@[hdel;cfgfile;{}],1,,remove temp sort config after,0,0,q,@[hdel;badcols;{}],1,,remove temp bad columns file after,0,0,q,@[hdel;badatt;{}],1,,remove temp bad attribute file +after,0,0,q,@[hdel;emptycfg;{}],1,,remove empty config file after,0,0,q,@[hdel;`:dbwrite_attr_tp/;{}],1,,cleanup attr test partition after,0,0,q,@[hdel;`:dbwrite_sort_tp/;{}],1,,cleanup sort test partition -after,0,0,q,@[hdel;`:dbwrite_def_tp/;{}],1,,cleanup default config test partition -after,0,0,q,@[hdel;defcfg;{}],1,,remove default config csv +after,0,0,q,@[hdel;`:dbwrite_reset_tp/;{}],1,,cleanup reset test partition From c2ac97181ff43a0e6532dbc99d71acea8dab5718 Mon Sep 17 00:00:00 2001 From: Ruairi-wq2 Date: Wed, 3 Jun 2026 15:40:32 +0100 Subject: [PATCH 5/7] Update dbwrite.md: fix stale defaultparams references, add mock-logging test setup Co-Authored-By: Claude Sonnet 4.6 --- di/dbwrite/dbwrite.md | 23 ++++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/di/dbwrite/dbwrite.md b/di/dbwrite/dbwrite.md index 651ee3b2..1b84c9c0 100644 --- a/di/dbwrite/dbwrite.md +++ b/di/dbwrite/dbwrite.md @@ -11,7 +11,7 @@ Sort, attribute application, save-down manipulation, and garbage-collection util - Register per-table pre-write manipulation functions applied before save-down - Run `.Q.gc[]` with before/after memory logging - Sort and attribute behaviour driven by a CSV config file; a `default` row acts as a fallback -- Built-in `defaultparams` provides an out-of-the-box fallback (sort by `time` ascending) when no config file is loaded +- A built-in `default` row in `params` provides an out-of-the-box fallback (sort by `time` ascending) when no config file is loaded - All errors from sort, attribute application, and manipulation are caught and logged — they do not propagate --- @@ -102,13 +102,13 @@ Loads and validates the sort configuration CSV, storing the result in module sta | Parameter | Type | Description | |---|---|---| -| `file` | hsym | Path to the sort config CSV; pass null (`` ` ``) to warn and load `defaultparams` instead | +| `file` | hsym | Path to the sort config CSV; pass null (`` ` ``) to warn and reset `params` to the default row | **Returns** — generic null on success; throws on file/validation failure. Validation checks that all four required columns (`tabname`, `att`, `column`, `sort`) are present and that all `att` values are within `` ``p`s`g`u ``. Throws a descriptive error for invalid files or unreadable paths. -Passing null warns at `warn` level and loads `defaultparams` — it does not throw. +Passing null warns at `warn` level and resets `params` to the default row — it does not throw. ```q dbwrite.loadconfig[`:config/sort.csv] @@ -136,7 +136,7 @@ Sorts an on-disk table partition and applies configured attributes. **Returns** — generic null on success; `()` if no sort config is found for the table. -If `loadconfig` has not been called before `sort` is first invoked, `sort` automatically uses `defaultparams` — a single `default` row that sorts by `time` ascending with no attribute. +If `loadconfig` has not been called before `sort` is first invoked, `sort` automatically uses the built-in `default` row in `params` — sorts by `time` ascending with no attribute. Config lookup order within the loaded params: 1. Rows where `tabname` matches — used directly. @@ -249,7 +249,20 @@ k4unit:use`di.k4unit k4unit.moduletest`di.dbwrite ``` -Tests cover: dependency injection, `init` error on missing log dep, `manipulate` pass-through and registered function application and error recovery via `savedownmanipulation`, visibility of registrations, `postreplay` stub, `sort` with `defaultparams` fallback / explicit config / `default` row fallback / no-match skip, `loadconfig` with null file (warns and loads `defaultparams`) / valid file / unrecognised columns / unrecognised attributes / missing file, `applyattr` on missing and valid paths. +The test suite uses mock logging (no `di.log` dependency required). The mock wires up three no-op counters so log call counts can be asserted: + +```q +dbwrite:use`di.dbwrite +logcount:0 +loginfo:{[c;m] logcount::logcount+1} +logwarn:{[c;m] logcount::logcount+1} +logerr:{[c;m] logcount::logcount+1} +logdep:`info`warn`error!(loginfo;logwarn;logerr) +deps:(enlist`log)!enlist logdep +dbwrite.init[(::);deps] +``` + +Tests cover: dependency injection, `init` error on missing log dep, `manipulate` pass-through and registered function application and error recovery via `savedownmanipulation`, `postreplay` stub, `sort` with default row fallback / explicit config / `default` row fallback / no-match skip / empty input / wrong type, `loadconfig` with null file (warns and resets to default row) / valid file / unrecognised columns / unrecognised attributes / missing file / header-only file, `applyattr` on missing path / null column / invalid attribute / valid path, `gc` log count. --- From 931db34d75eb6f0e1070af301140d857e184e740 Mon Sep 17 00:00:00 2001 From: Ruairi-wq2 Date: Fri, 19 Jun 2026 11:51:18 +0100 Subject: [PATCH 6/7] Add savedown and upsert, remove TorQ-specific features - Add savedown[dir;part;tabname;data]: enumerates syms, applies p#sym, writes partition via .Q.par + set, then calls sort - Add upsert[dir;part;tabname;data]: appends to existing partition via functional amend, then re-sorts - Remove manipulate, savedownmanipulation, postreplay (TorQ-influenced) - Update export dict, init comment, tests, and docs accordingly Co-Authored-By: Claude Sonnet 4.6 --- di/dbwrite/dbwrite.md | 121 +++++++++++++++++++----------------------- di/dbwrite/dbwrite.q | 38 +++++++------ di/dbwrite/init.q | 2 +- di/dbwrite/test.csv | 61 ++++++++++++--------- 4 files changed, 113 insertions(+), 109 deletions(-) diff --git a/di/dbwrite/dbwrite.md b/di/dbwrite/dbwrite.md index 1b84c9c0..eaea7ed1 100644 --- a/di/dbwrite/dbwrite.md +++ b/di/dbwrite/dbwrite.md @@ -1,18 +1,19 @@ # di.dbwrite -Sort, attribute application, save-down manipulation, and garbage-collection utilities for kdb+ processes that persist data to disk (RDB, WDB, TickerLogReplay). +Write, sort, and attribute utilities for kdb+ processes that persist data to disk. --- ## Features +- Write in-memory tables to a date-partitioned HDB with `savedown` — enumerates syms, applies `p#` to `sym`, writes, then sorts +- Append rows to an existing partition with `upsert` — enumerates syms, appends, then re-sorts - Sort on-disk table partitions by configured columns using `xasc` - Apply kdb+ attributes (`p`, `s`, `g`, `u`) to on-disk columns after sort -- Register per-table pre-write manipulation functions applied before save-down -- Run `.Q.gc[]` with before/after memory logging - Sort and attribute behaviour driven by a CSV config file; a `default` row acts as a fallback - A built-in `default` row in `params` provides an out-of-the-box fallback (sort by `time` ascending) when no config file is loaded -- All errors from sort, attribute application, and manipulation are caught and logged — they do not propagate +- Run `.Q.gc[]` with before/after memory logging +- All errors from sort, attribute application, and write operations are either caught-and-logged (sort, applyattr) or propagated to the caller (savedown, upsert) --- @@ -34,7 +35,7 @@ The `log` dependency must be passed to `init`. The module throws if it is absent |---|---|---| | `tabname` | symbol | Table name, or `` `default `` as a catch-all fallback | | `att` | symbol | kdb+ attribute to apply: `p`, `s`, `g`, `u`, or empty for none | -| `column` | symbol | Column to sort or attribute; empty means attribute-only (no sort contribution) | +| `column` | symbol | Column to sort or attribute | | `sort` | boolean | `1b` — include in `xasc` sort key; `0b` — attribute only | Example `sort.csv`: @@ -58,12 +59,11 @@ Sorts `trade` by `sym`, applies `p` to `sym`. Tables not listed fall back to `de | Function | Description | |---|---| | `init[config;deps]` | Wire injected dependencies; must be called first | +| `savedown[dir;part;tabname;data]` | Write in-memory table to HDB partition, enumerate syms, apply `p#sym`, then sort | +| `upsert[dir;part;tabname;data]` | Append rows to existing partition, enumerate syms, then re-sort | | `loadconfig[file]` | Load and validate the sort config CSV into module state | | `sort[d]` | Sort an on-disk partition and apply attributes per config | | `applyattr[dloc;colname;att]` | Apply a single kdb+ attribute to an on-disk column | -| `savedownmanipulation` | Dict mapping table name → unary function; amend to register pre-write transformations | -| `manipulate[t;x]` | Apply a registered pre-write manipulation to a table | -| `postreplay[d;p]` | Post-EOD stub; override to add custom logic | | `gc[]` | Run `.Q.gc[]` and log before/after memory stats | --- @@ -94,6 +94,50 @@ dbwrite.init[(::);(enlist`log)!enlist logdep] --- +### `savedown[dir;part;tabname;data]` + +Writes an in-memory table to a date-partitioned HDB. Enumerates symbol columns against the HDB sym file, applies `p#` to `sym` if present, writes the partition, then calls `sort` to sort and apply attributes per the loaded config. + +**Parameters** + +| Parameter | Type | Description | +|---|---|---| +| `dir` | hsym | HDB root directory (e.g. `` `:hdb ``) | +| `part` | date/month/int | Partition value | +| `tabname` | symbol | Table name — determines the partition subdirectory | +| `data` | table | In-memory table to write | + +**Returns** — generic null on success; throws on write failure. + +If `loadconfig` has not been called, the built-in default row applies (sort by `time` ascending). If the table has no `sym` column, enumeration and `p#` are skipped. + +```q +dbwrite.savedown[`:hdb;2024.01.02;`trade;data] +``` + +--- + +### `upsert[dir;part;tabname;data]` + +Appends rows to an existing on-disk partition then re-sorts. Enumerates symbol columns before appending. The partition must already exist — use `savedown` for the initial write. + +**Parameters** + +| Parameter | Type | Description | +|---|---|---| +| `dir` | hsym | HDB root directory | +| `part` | date/month/int | Partition value | +| `tabname` | symbol | Table name | +| `data` | table | Rows to append | + +**Returns** — generic null on success; throws if the partition does not exist or on write failure. + +```q +dbwrite.upsert[`:hdb;2024.01.02;`trade;latedata] +``` + +--- + ### `loadconfig[file]` Loads and validates the sort configuration CSV, storing the result in module state for use by `sort`. @@ -136,8 +180,6 @@ Sorts an on-disk table partition and applies configured attributes. **Returns** — generic null on success; `()` if no sort config is found for the table. -If `loadconfig` has not been called before `sort` is first invoked, `sort` automatically uses the built-in `default` row in `params` — sorts by `time` ascending with no attribute. - Config lookup order within the loaded params: 1. Rows where `tabname` matches — used directly. 2. Rows where `tabname = \`default` — used with a `warn` log. @@ -173,61 +215,6 @@ dbwrite.applyattr[`:hdb/2024.01.02/trade/;`sym;`p] --- -### `savedownmanipulation` - -A dictionary mapping table name (symbol) to a unary manipulation function. Amend this before EOD to register per-table pre-write transformations. - -```q -dbwrite.savedownmanipulation[`trade]:{[x] update sym:`p#sym from x} -``` - -Manipulations are applied by `manipulate[t;x]`. An empty dict (the default) means no manipulation is applied to any table. - ---- - -### `manipulate[t;x]` - -Applies a registered pre-write manipulation to table `x` of type `t`. - -**Parameters** - -| Parameter | Type | Description | -|---|---|---| -| `t` | symbol | Table name used to look up the registered manipulation function | -| `x` | table | Table data to transform | - -**Returns** — modified table, or original table unmodified if no manipulation is registered for `t` or the registered function throws. - -On error the original table is returned unchanged and the error is logged at `error` level. Register functions in `savedownmanipulation` before calling. - -```q -dbwrite.savedownmanipulation[`trade]:{[x] update sym:`p#sym from x} -data:dbwrite.manipulate[`trade;data] -``` - ---- - -### `postreplay[d;p]` - -Post-EOD stub called after all tables have been written and sorted. - -**Parameters** - -| Parameter | Type | Description | -|---|---|---| -| `d` | hsym | HDB directory | -| `p` | date | Partition value | - -**Returns** — generic null. - -This is a no-op by default. Override at the call site to add custom post-replay logic. - -```q -dbwrite.postreplay[`:hdb;2024.01.02] -``` - ---- - ### `gc[]` Runs `.Q.gc[]` and logs before/after memory statistics. @@ -262,12 +249,12 @@ deps:(enlist`log)!enlist logdep dbwrite.init[(::);deps] ``` -Tests cover: dependency injection, `init` error on missing log dep, `manipulate` pass-through and registered function application and error recovery via `savedownmanipulation`, `postreplay` stub, `sort` with default row fallback / explicit config / `default` row fallback / no-match skip / empty input / wrong type, `loadconfig` with null file (warns and resets to default row) / valid file / unrecognised columns / unrecognised attributes / missing file / header-only file, `applyattr` on missing path / null column / invalid attribute / valid path, `gc` log count. +Tests cover: dependency injection, `init` error on missing log dep, `savedown` write and sort, `savedown` without sym column, `upsert` append and re-sort, `upsert` error on non-existent partition, `sort` with default row fallback / explicit config / `default` row fallback / no-match skip / empty input / wrong type, `loadconfig` with null file / valid file / unrecognised columns / unrecognised attributes / missing file / header-only file, `applyattr` on missing path / null column / invalid attribute / valid path, `gc` log count. --- ## Exported symbols ```q -export:([init;sort;applyattr;loadconfig;manipulate;savedownmanipulation;postreplay;gc]) +export:([init;savedown;upsert;sort;applyattr;loadconfig;gc]) ``` diff --git a/di/dbwrite/dbwrite.q b/di/dbwrite/dbwrite.q index fbb1df78..95d05d3b 100644 --- a/di/dbwrite/dbwrite.q +++ b/di/dbwrite/dbwrite.q @@ -1,8 +1,6 @@ / sort params table - default row sorts all tables by time ascending params:([] tabname:enlist`default; att:enlist`; column:enlist`time; sort:enlist 1b); -/ save-down manipulation registry: tabname -> unary function -savedownmanipulation:()!(); / load and validate sort.csv into .z.M.params / file: hsym path; null warns and resets params to default row @@ -69,19 +67,27 @@ sort:{[d] ]] }; -/ apply registered pre-write manipulation to table x of type t -/ returns modified table; on error logs and returns original unmodified table -manipulate:{[t;x] - $[t in key .z.M.savedownmanipulation; - @[.z.M.savedownmanipulation[t];x; - {[x;e].z.m.logerr[`dbwrite;"save-down manipulation failed: ",e];x}[x]]; - x] + +/ write table to a date-partitioned hdb: enumerate syms, apply p# to sym if present, write, then sort +/ dir: hdb root (hsym); part: partition value (date/month/int); tabname: symbol; data: in-memory table +savedown:{[dir;part;tabname;data] + .z.m.loginfo[`dbwrite;"saving ",string[tabname]," partition ",string[part]," to ",string dir]; + path:.Q.par[dir;part;tabname]; + data:.Q.en[dir;data]; + path set $[`sym in cols data;@[data;`sym;{`p#x}];data]; + sort[(tabname;path)]; + .z.m.loginfo[`dbwrite;"finished saving ",string tabname]; }; -/ post-EOD hook - called after all tables written and sorted -/ d: hdb directory (hsym), p: partition value (date) -/ stub: override at the call site to add custom post-replay logic -postreplay:{[d;p]}; +/ upsert data into an existing on-disk partition and re-sort +/ dir: hdb root (hsym); part: partition value; tabname: symbol; data: in-memory table +upsert:{[dir;part;tabname;data] + .z.m.loginfo[`dbwrite;"upserting ",string[tabname]," partition ",string[part]," in ",string dir]; + path:.Q.par[dir;part;tabname]; + .[path;();,;.Q.en[dir;data]]; + sort[(tabname;path)]; + .z.m.loginfo[`dbwrite;"finished upserting ",string tabname]; + }; / format current process memory stats as a loggable string memstats:{[]"mem stats: ",{"; "sv "=" sv'flip(string key x;(string value x),\:" MB")}`long$.Q.w[]%1048576}; @@ -94,10 +100,8 @@ gc:{ }; init:{[config;deps] - / config: dict with optional keys - / `savedownmanipulation: tabname!function dict of pre-write manipulation functions - / deps: `log!(logdict) - / `log: `info`warn`error!(infofunc;warnfunc;errfunc) - required + / config: unused, pass (::) + / deps: `log!(logdict) - `info`warn`error!(infofunc;warnfunc;errfunc) - required logdict:$[99h=type deps;$[(`log in key deps) and not (::)~deps`log;deps`log;()!()];()!()]; if[not count logdict; '"di.dbwrite: log dependency is required; pass `info`warn`error functions - see di.log or refer to confluence documentation"; diff --git a/di/dbwrite/init.q b/di/dbwrite/init.q index bd2cd900..d69bd034 100644 --- a/di/dbwrite/init.q +++ b/di/dbwrite/init.q @@ -3,4 +3,4 @@ \l ::dbwrite.q -export:([init;sort;applyattr;loadconfig;manipulate;savedownmanipulation;postreplay;gc]) \ No newline at end of file +export:([init;savedown;upsert;sort;applyattr;loadconfig;gc]) \ No newline at end of file diff --git a/di/dbwrite/test.csv b/di/dbwrite/test.csv index 6eec3123..047a3ca8 100644 --- a/di/dbwrite/test.csv +++ b/di/dbwrite/test.csv @@ -18,36 +18,25 @@ true,0,0,q,logdep[`error]~.m.di.0dbwrite.logerr,1,1,injected error function stor fail,0,0,q,dbwrite.init[(::);(::)],1,1,init without log dep throws an error fail,0,0,q,dbwrite.init[(::);`log!(::)],1,1,init with log set to (::) throws an error -/ Test 3: manipulate - pass-through cases (no functions registered) -run,0,0,q,"tbl:([]sym:`AAPL`IBM;price:100 200f)",1,,sample table -true,0,0,q,"tbl~dbwrite.manipulate[`trade;tbl]",1,,unregistered table name returns original -true,0,0,q,"tbl~dbwrite.manipulate[`;tbl]",1,,null table name returns original -true,0,0,q,"(0#tbl)~dbwrite.manipulate[`other;0#tbl]",1,,empty table returned unchanged -true,0,0,q,42~dbwrite.manipulate[`notregistered;42],1,,non-table input returns unchanged when unregistered - -/ Test 4: postreplay returns generic null -true,0,0,q,(::)~dbwrite.postreplay[`:hdb;2024.01.01],1,,stub returns generic null -true,0,0,q,(::)~dbwrite.postreplay[`;0Nd],1,,null args return generic null - -/ Test 5: sort uses default row when table name not in config +/ Test 3: sort uses default row when table name not in config run,0,0,q,`:dbwrite_defp_tp/.d set `time`sym,1,,write column order file run,0,0,q,`:dbwrite_defp_tp/time set 2024.01.01D09:00:00.000000000 2024.01.01D08:00:00.000000000,1,,write unsorted time column run,0,0,q,`:dbwrite_defp_tp/sym set `IBM`AAPL,1,,write sym column run,0,0,q,"dbwrite.sort[(`anytable;`:dbwrite_defp_tp/)]",1,,sort using default row true,0,0,q,(asc exec time from get `:dbwrite_defp_tp/)~exec time from get `:dbwrite_defp_tp/,1,,time column sorted ascending by default row -/ Test 6: loadconfig null resets params to default row +/ Test 4: loadconfig null resets params to default row run,0,0,q,dbwrite.loadconfig[`],1,,call loadconfig with null file true,0,0,q,.m.di.0dbwrite.params~([] tabname:enlist`default; att:enlist`; column:enlist`time; sort:enlist 1b),1,,params reset to default row -/ Test 7: loadconfig - valid config file loaded and parsed correctly +/ Test 5: loadconfig - valid config file loaded and parsed correctly run,0,0,q,cfgfile:`:dbwrite_test.csv,1,,temp sort config path run,0,0,q,"cfgfile 0:(""tabname,att,column,sort"";""trade,p,sym,1"";""trade,,price,0"")",1,,write test sort config run,0,0,q,dbwrite.loadconfig[cfgfile],1,,loadconfig with valid csv succeeds true,0,0,q,2=count .m.di.0dbwrite.params,1,,two rows loaded into params true,0,0,q,all `trade=exec tabname from .m.di.0dbwrite.params,1,,both rows are for trade table -/ Test 7 cont: loadconfig - invalid and edge-case inputs +/ Test 5 cont: loadconfig - invalid and edge-case inputs true,0,0,q,@[dbwrite.loadconfig;`:nonexistent_file_xyz;{1b}],1,,nonexistent file throws true,0,0,q,@[dbwrite.loadconfig;42;{1b}],1,,non-symbol arg throws run,0,0,q,badcols:`:dbwrite_badcols.csv,1,, @@ -61,12 +50,12 @@ run,0,0,q,"emptycfg 0:enlist""tabname,att,column,sort""",1,,csv with header only run,0,0,q,dbwrite.loadconfig[emptycfg],1,,loadconfig with header-only csv true,0,0,q,0=count .m.di.0dbwrite.params,1,,params is empty after header-only csv -/ Test 8: applyattr - error cases +/ Test 6: applyattr - error cases run,0,0,q,logcount:0,1,,reset counter run,0,0,q,"dbwrite.applyattr[`:nonexist_attr_dir/;`sym;`p]",1,,apply to non-existent path true,0,0,q,2=logcount,1,,loginfo (before attempt) and logerr (on failure) each called once -/ Test 8 cont: applyattr - null column name logs error and does not throw +/ Test 6 cont: applyattr - null column name logs error and does not throw run,0,0,q,`:dbwrite_attr_tp/.d set `sym`price,1,,write column order file run,0,0,q,`:dbwrite_attr_tp/sym set `IBM`AAPL`MSFT,1,,write sym column run,0,0,q,`:dbwrite_attr_tp/price set 200 100 300f,1,,write price column @@ -74,30 +63,30 @@ run,0,0,q,logcount:0,1,,reset counter run,0,0,q,"dbwrite.applyattr[`:dbwrite_attr_tp/;`;`p]",1,,apply with null column name true,0,0,q,2=logcount,1,,loginfo and logerr both called for null column -/ Test 8 cont: applyattr - invalid att logs error +/ Test 6 cont: applyattr - invalid att logs error run,0,0,q,logcount:0,1,,reset counter run,0,0,q,"dbwrite.applyattr[`:dbwrite_attr_tp/;`sym;`z]",1,,apply with invalid attribute true,0,0,q,2=logcount,1,,loginfo and logerr both called for invalid att -/ Test 9: applyattr - attribute applied and logged when path exists +/ Test 7: applyattr - attribute applied and logged when path exists run,0,0,q,logcount:0,1,,reset counter run,0,0,q,"dbwrite.applyattr[`:dbwrite_attr_tp/;`sym;`p]",1,,apply p attr to sym column true,0,0,q,1=logcount,1,,loginfo called once (no error) true,0,0,q,`p=attr get `:dbwrite_attr_tp/sym,1,,p attribute applied on disk -/ Test 10: sort - skip when table not in config and no default row +/ Test 8: sort - skip when table not in config and no default row run,0,0,q,dbwrite.loadconfig[cfgfile],1,,reload trade-only config (no default row) run,0,0,q,logcount:0,1,,reset counter true,0,0,q,()~dbwrite.sort[`no_params_table_xyz],1,,returns () when table not in config true,0,0,q,2=logcount,1,,loginfo (sorting start) and logwarn (skip) both called true,0,0,q,()~dbwrite.sort[()],1,,empty list returns () -/ Test 10 cont: sort - wrong type logs error and returns () +/ Test 8 cont: sort - wrong type logs error and returns () run,0,0,q,logcount:0,1,,reset counter true,0,0,q,()~dbwrite.sort[42],1,,wrong type (long) returns () true,0,0,q,1=logcount,1,,logerr called once for wrong type -/ Test 11: sort - sorting and attribute application when config entry exists for table +/ Test 9: sort - sorting and attribute application when config entry exists for table run,0,0,q,`:dbwrite_sort_tp/.d set `sym`price,1,,write column order file run,0,0,q,`:dbwrite_sort_tp/sym set `IBM`AAPL`MSFT,1,,write unsorted sym column run,0,0,q,`:dbwrite_sort_tp/price set 200 100 300f,1,,write price column @@ -105,7 +94,7 @@ run,0,0,q,"dbwrite.sort[(`trade;`:dbwrite_sort_tp/)]",1,,sort trade table in tes true,0,0,q,`AAPL`IBM`MSFT~exec sym from get `:dbwrite_sort_tp/,1,,sorted ascending by sym true,0,0,q,`p=attr get `:dbwrite_sort_tp/sym,1,,p attribute applied to sym on disk -/ Test 12: sort - default row used after resetting params with loadconfig null +/ Test 10: sort - default row used after resetting params with loadconfig null run,0,0,q,dbwrite.loadconfig[`],1,,reset params to default row via null loadconfig run,0,0,q,`:dbwrite_reset_tp/.d set `time`sym,1,,write column order file run,0,0,q,`:dbwrite_reset_tp/time set 2024.01.01D09:00:00.000000000 2024.01.01D08:00:00.000000000,1,,write unsorted time column @@ -116,11 +105,34 @@ run,0,0,q,"dbwrite.sort[(`othertable;`:dbwrite_reset_tp/)]",1,,sort using defaul true,0,0,q,(asc exec time from get `:dbwrite_reset_tp/)~exec time from get `:dbwrite_reset_tp/,1,,sorted ascending by time via default row true,0,0,q,4=logcount,1,,loginfo x2 (sorting start + sort-by) + logwarn x1 (using defaults) + loginfo x1 (finished) -/ Test 13: gc calls loginfo twice and does not throw +/ Test 11: gc calls loginfo twice and does not throw run,0,0,q,logcount:0,1,,reset counter run,0,0,q,dbwrite.gc[],1,,run garbage collect true,0,0,q,2=logcount,1,,loginfo called twice (start and end) +/ Test 12: savedown - write table to hdb partition +run,0,0,q,dbwrite.loadconfig[`],1,,ensure default sort params (by time) +run,0,0,q,"sdtbl:([]time:2024.01.01D09:00:00.000000000 2024.01.01D08:00:00.000000000;sym:`IBM`AAPL;price:100 200f)",1,,unsorted test table +run,0,0,q,"dbwrite.savedown[`:dbwrite_sd_hdb;2024.01.01;`trade;sdtbl]",1,,write to hdb partition +run,0,0,q,"sdpath:.Q.par[`:dbwrite_sd_hdb;2024.01.01;`trade]",1,,partition path for assertions +true,0,0,q,2=count get sdpath,1,,two rows written to partition +true,0,0,q,(asc exec time from get sdpath)~exec time from get sdpath,1,,rows sorted by time ascending after sort +true,0,0,q,0 Date: Fri, 19 Jun 2026 14:40:17 +0100 Subject: [PATCH 7/7] Fix upsert naming, path construction, and add appenddown existence guard - Rename upsert to appenddown (upsert is a reserved word in kdb+) - Remove auto-sort from appenddown; caller sorts explicitly when done - Fix savedown/appenddown to use trailing-slash path via ` sv (.Q.par[...];`) so xasc treats it as a splayed table directory (without slash, set writes a binary file and xasc fails with "Not a directory") - Add existence guard to appenddown: throws if partition path does not exist - Update init.q export dict, tests, and docs accordingly Co-Authored-By: Claude Sonnet 4.6 --- di/dbwrite/dbwrite.md | 20 +++++++++++++------- di/dbwrite/dbwrite.q | 16 +++++++++------- di/dbwrite/init.q | 2 +- di/dbwrite/test.csv | 23 ++++++++++++++--------- 4 files changed, 37 insertions(+), 24 deletions(-) diff --git a/di/dbwrite/dbwrite.md b/di/dbwrite/dbwrite.md index eaea7ed1..c9986da3 100644 --- a/di/dbwrite/dbwrite.md +++ b/di/dbwrite/dbwrite.md @@ -7,7 +7,7 @@ Write, sort, and attribute utilities for kdb+ processes that persist data to dis ## Features - Write in-memory tables to a date-partitioned HDB with `savedown` — enumerates syms, applies `p#` to `sym`, writes, then sorts -- Append rows to an existing partition with `upsert` — enumerates syms, appends, then re-sorts +- Append rows to an existing partition with `appenddown` — enumerates syms and appends; sort separately when the partition is complete - Sort on-disk table partitions by configured columns using `xasc` - Apply kdb+ attributes (`p`, `s`, `g`, `u`) to on-disk columns after sort - Sort and attribute behaviour driven by a CSV config file; a `default` row acts as a fallback @@ -60,7 +60,7 @@ Sorts `trade` by `sym`, applies `p` to `sym`. Tables not listed fall back to `de |---|---| | `init[config;deps]` | Wire injected dependencies; must be called first | | `savedown[dir;part;tabname;data]` | Write in-memory table to HDB partition, enumerate syms, apply `p#sym`, then sort | -| `upsert[dir;part;tabname;data]` | Append rows to existing partition, enumerate syms, then re-sort | +| `appenddown[dir;part;tabname;data]` | Append rows to existing partition and enumerate syms; does not sort | | `loadconfig[file]` | Load and validate the sort config CSV into module state | | `sort[d]` | Sort an on-disk partition and apply attributes per config | | `applyattr[dloc;colname;att]` | Apply a single kdb+ attribute to an on-disk column | @@ -117,9 +117,11 @@ dbwrite.savedown[`:hdb;2024.01.02;`trade;data] --- -### `upsert[dir;part;tabname;data]` +### `appenddown[dir;part;tabname;data]` -Appends rows to an existing on-disk partition then re-sorts. Enumerates symbol columns before appending. The partition must already exist — use `savedown` for the initial write. +Appends rows to an existing on-disk partition. Enumerates symbol columns then appends; does not sort. Call `sort` explicitly when the partition is complete. + +Keeping sort separate allows multiple intraday appends without the cost of re-sorting a growing partition on each call. **Parameters** @@ -133,7 +135,11 @@ Appends rows to an existing on-disk partition then re-sorts. Enumerates symbol c **Returns** — generic null on success; throws if the partition does not exist or on write failure. ```q -dbwrite.upsert[`:hdb;2024.01.02;`trade;latedata] +/ intraday: append each batch as it arrives +dbwrite.appenddown[`:hdb;2024.01.02;`trade;batch] + +/ end-of-day: sort once when done +dbwrite.sort[(`trade;.Q.par[`:hdb;2024.01.02;`trade])] ``` --- @@ -249,12 +255,12 @@ deps:(enlist`log)!enlist logdep dbwrite.init[(::);deps] ``` -Tests cover: dependency injection, `init` error on missing log dep, `savedown` write and sort, `savedown` without sym column, `upsert` append and re-sort, `upsert` error on non-existent partition, `sort` with default row fallback / explicit config / `default` row fallback / no-match skip / empty input / wrong type, `loadconfig` with null file / valid file / unrecognised columns / unrecognised attributes / missing file / header-only file, `applyattr` on missing path / null column / invalid attribute / valid path, `gc` log count. +Tests cover: dependency injection, `init` error on missing log dep, `savedown` write and sort, `savedown` without sym column, `appenddown` append without sort, explicit `sort` after `appenddown`, `appenddown` error on non-existent partition, `sort` with default row fallback / explicit config / `default` row fallback / no-match skip / empty input / wrong type, `loadconfig` with null file / valid file / unrecognised columns / unrecognised attributes / missing file / header-only file, `applyattr` on missing path / null column / invalid attribute / valid path, `gc` log count. --- ## Exported symbols ```q -export:([init;savedown;upsert;sort;applyattr;loadconfig;gc]) +export:([init;savedown;appenddown;sort;applyattr;loadconfig;gc]) ``` diff --git a/di/dbwrite/dbwrite.q b/di/dbwrite/dbwrite.q index 95d05d3b..1e3fdc9c 100644 --- a/di/dbwrite/dbwrite.q +++ b/di/dbwrite/dbwrite.q @@ -72,21 +72,23 @@ sort:{[d] / dir: hdb root (hsym); part: partition value (date/month/int); tabname: symbol; data: in-memory table savedown:{[dir;part;tabname;data] .z.m.loginfo[`dbwrite;"saving ",string[tabname]," partition ",string[part]," to ",string dir]; - path:.Q.par[dir;part;tabname]; + path:` sv (.Q.par[dir;part;tabname];`); data:.Q.en[dir;data]; path set $[`sym in cols data;@[data;`sym;{`p#x}];data]; sort[(tabname;path)]; .z.m.loginfo[`dbwrite;"finished saving ",string tabname]; }; -/ upsert data into an existing on-disk partition and re-sort +/ append data to an existing on-disk partition; enumerate syms but do not sort +/ call sort separately when the partition is complete / dir: hdb root (hsym); part: partition value; tabname: symbol; data: in-memory table -upsert:{[dir;part;tabname;data] - .z.m.loginfo[`dbwrite;"upserting ",string[tabname]," partition ",string[part]," in ",string dir]; - path:.Q.par[dir;part;tabname]; +appenddown:{[dir;part;tabname;data] + .z.m.loginfo[`dbwrite;"appending ",string[tabname]," partition ",string[part]," in ",string dir]; + path:` sv (.Q.par[dir;part;tabname];`); + if[not count @[key;path;{`$()}]; + '"appenddown: partition does not exist at ",string path]; .[path;();,;.Q.en[dir;data]]; - sort[(tabname;path)]; - .z.m.loginfo[`dbwrite;"finished upserting ",string tabname]; + .z.m.loginfo[`dbwrite;"finished appending ",string tabname]; }; / format current process memory stats as a loggable string diff --git a/di/dbwrite/init.q b/di/dbwrite/init.q index d69bd034..917d5dd6 100644 --- a/di/dbwrite/init.q +++ b/di/dbwrite/init.q @@ -3,4 +3,4 @@ \l ::dbwrite.q -export:([init;savedown;upsert;sort;applyattr;loadconfig;gc]) \ No newline at end of file +export:([init;savedown;appenddown;sort;applyattr;loadconfig;gc]) \ No newline at end of file diff --git a/di/dbwrite/test.csv b/di/dbwrite/test.csv index 047a3ca8..c742d5ff 100644 --- a/di/dbwrite/test.csv +++ b/di/dbwrite/test.csv @@ -114,7 +114,7 @@ true,0,0,q,2=logcount,1,,loginfo called twice (start and end) run,0,0,q,dbwrite.loadconfig[`],1,,ensure default sort params (by time) run,0,0,q,"sdtbl:([]time:2024.01.01D09:00:00.000000000 2024.01.01D08:00:00.000000000;sym:`IBM`AAPL;price:100 200f)",1,,unsorted test table run,0,0,q,"dbwrite.savedown[`:dbwrite_sd_hdb;2024.01.01;`trade;sdtbl]",1,,write to hdb partition -run,0,0,q,"sdpath:.Q.par[`:dbwrite_sd_hdb;2024.01.01;`trade]",1,,partition path for assertions +run,0,0,q,"sdpath:` sv (.Q.par[`:dbwrite_sd_hdb;2024.01.01;`trade];`)",1,,partition path with trailing slash for xasc compatibility true,0,0,q,2=count get sdpath,1,,two rows written to partition true,0,0,q,(asc exec time from get sdpath)~exec time from get sdpath,1,,rows sorted by time ascending after sort true,0,0,q,0