From c7f8ed0bee748eb2ad005657410ef5277d81623a Mon Sep 17 00:00:00 2001 From: ascottDI Date: Thu, 11 Jun 2026 16:54:00 +0100 Subject: [PATCH 1/6] created framework for di.sort module, adding required functions and test.csv. working on ensuring code is robust --- di/sort/init.q | 5 ++ di/sort/sort.md | 104 +++++++++++++++++++++++++++++++++++++++++ di/sort/sort.q | 119 +++++++++++++++++++++++++++++++++++++++++++++++ di/sort/test.csv | 117 ++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 345 insertions(+) create mode 100644 di/sort/init.q create mode 100644 di/sort/sort.md create mode 100644 di/sort/sort.q create mode 100644 di/sort/test.csv diff --git a/di/sort/init.q b/di/sort/init.q new file mode 100644 index 00000000..00bcdb84 --- /dev/null +++ b/di/sort/init.q @@ -0,0 +1,5 @@ +/ library for sorting and applying attributes to on-disk kdb+ tables + +\l ::sort.q + +export:([init;getsortcsv;sorttab;getparams]) diff --git a/di/sort/sort.md b/di/sort/sort.md new file mode 100644 index 00000000..b916d394 --- /dev/null +++ b/di/sort/sort.md @@ -0,0 +1,104 @@ +# di.sort + +Library for sorting and applying attributes to on-disk kdb+ tables. Driven by a configuration CSV that specifies which columns to sort and which attributes to apply per table. + +## Dependencies + +**Injectable (required):** +- `log` — `{[c;m]}` functions for `info`, `warn`, `error` + +**Hard:** none + +## Init + +```q +srt:use`di.sort + +/ log only - savedir defaults to `:sort.csv +log:use`di.log +logdep:`info`warn`error!(log.info;log.warn;log.error) +srt.init[enlist[`log]!enlist logdep] + +/ log + custom sort.csv path +srt.init[`log`savedir!(logdep; `:config/sort.csv)] + +/ using a custom logger +mylog:`info`warn`error!( + {[c;m] -1 "INFO [",string[c],"] ",m;}; + {[c;m] -1 "WARN [",string[c],"] ",m;}; + {[c;m] -2 "ERROR [",string[c],"] ",m;}); +srt.init[`log`savedir!(mylog; `:config/sort.csv)] +``` + +## Exported Functions + +### `init[deps]` + +Wire injectable dependencies and optional config. Must be called before any other function. + +| Key | Required | Type | Description | +|---|---|---|---| +| `` `log `` | yes | dict | `info`, `warn`, `error` functions | +| `` `savedir `` | no | hsym | Path to sort.csv; defaults to `` `:sort.csv `` | + +If `params` is empty when `sorttab` is called, it auto-loads from `savedir`. + +### `getsortcsv[file]` + +Load and validate sort configuration from a CSV file. + +| Parameter | Type | Description | +|---|---|---| +| `file` | hsym | Path to sort.csv | + +**sort.csv format:** +``` +tabname,att,column,sort +trade,p,sym,1 +trade,s,time,1 +quote,p,sym,1 +quote,s,time,1 +default,p,sym,1 +``` + +| Column | Description | +|---|---| +| `tabname` | Table name, or `` `default `` to apply to all unlisted tables | +| `att` | Attribute to apply: `` `p`s`g`u `` or `` ` `` for none | +| `column` | Column to sort/attribute, or `` ` `` for no sort | +| `sort` | `1b` to sort by this column, `0b` to only apply attribute | + +### `sorttab[d]` + +Sort and apply attributes to on-disk partitions for a single table. + +| Parameter | Type | Description | +|---|---|---| +| `d` | list | 2-element: `` (`tablename; `:/hdb/2000.01.01/trade`:/hdb/2000.01.02/trade) `` | + +Looks up `params` by `tablename`, falls back to `default` row, skips if neither found. + +```q +srt.getsortcsv[`:config/sort.csv] +srt.sorttab[(`trade; `:/hdb/2000.01.01/trade`:/hdb/2000.01.02/trade)] +``` + +### `getparams[]` + +Return the current sort configuration table loaded by `getsortcsv`. + +```q +srt.getparams[] +``` + +## Example + +```q +srt:use`di.sort +log:use`di.log +logdep:`info`warn`error!(log.info;log.warn;log.error) + +srt.init[enlist[`log]!enlist logdep] +srt.getsortcsv[`:config/sort.csv] +srt.sorttab[(`trade; `:/hdb/2000.01.01/trade`:/hdb/2000.01.02/trade)] +``` diff --git a/di/sort/sort.q b/di/sort/sort.q new file mode 100644 index 00000000..2baf68ee --- /dev/null +++ b/di/sort/sort.q @@ -0,0 +1,119 @@ +/ library for sorting and applying attributes to on-disk kdb+ tables + +/ sort configuration table - populated by getsortcsv, read via getparams +params:([] tabname:`symbol$(); att:`symbol$(); column:`symbol$(); sort:`boolean$()); + +/ valid attribute symbols accepted in sort.csv att column +validatts:``p`s`g`u; + +init:{[deps] + / wire injectable log dependency (required) and optional config from deps dict + / optional keys: `savedir - hsym path to sort.csv, defaults to `:sort.csv + logdict:$[99h=type deps; + $[`log in key deps; + $[99h=type deps`log; deps`log; ()!()]; + ()!()]; + ()!()]; + if[not count logdict; + '"di.sort: log dependency is required; pass `info`warn`error functions - ", + "see di.log for a default implementation or refer to confluence documentation"; + ]; + .z.m.loginfo:logdict`info; + .z.m.logwarn:logdict`warn; + .z.m.logerr:logdict`error; + .z.m.defaultfile:$[`savedir in key deps; deps`savedir; `:sort.csv]; + }; + +/ internal - log and rethrow a csv load error +csvloaderr:{[f;e] + / logs the failure then rethrows so getsortcsv surfaces the error to the caller + .z.m.logerr[`getsortcsv;"failed to open ",string[f],". the error was: ",e]; + '"failed to open ",string[f],". the error was: ",e; + }; + +getsortcsv:{[file] + / load and validate sort configuration from a CSV file; populates the params table + file:hsym file; + rawparams:@[ + {.z.m.loginfo[`getsortcsv;"retrieving sort settings from ",string x]; ("SSSB";enlist",")0:x}; + file; + csvloaderr[file] + ]; + spc:cols rawparams; + badcols:spc where not spc in `tabname`att`column`sort; + if[count badcols; + '"unrecognised columns (",(", " sv string badcols),") in ",string file]; + missingcols:(`tabname`att`column`sort) where not (`tabname`att`column`sort) in spc; + if[count missingcols; + '"missing required columns (",(", " sv string missingcols),") in ",string file]; + at:distinct rawparams`att; + badatts:at where not at in validatts; + if[count badatts; + '"unrecognised type of attribute - ",(", " sv string badatts)]; + .z.m.params:rawparams; + .z.m.loginfo[`getsortcsv;"loaded ",(string count rawparams)," sort config rows from ",string file]; + }; + +sorttab:{[d] + / sort and apply attributes to on-disk partitions for one table + st:string t:first d; + if[0=count params; getsortcsv[.z.m.defaultfile]]; + .z.m.loginfo[`sorttab;"sorting the ",st," table"]; + sp:getsortparams[t;st]; + if[not count sp; :()]; + sortdir[sp] each distinct (),last d; + .z.m.loginfo[`sorttab;"finished sorting the ",st," table"]; + }; + +getparams:{[] + / return the current sort configuration table + :params; + }; + +/ internal - look up sort params for a table, falling back to the default row +getsortparams:{[t;st] + / return table-specific params, then default row, then empty table if neither found + if[count tabsp:select from params where tabname=t; + .z.m.loginfo[`sorttab;"sort parameters have been retrieved for: ",st]; + :tabsp]; + if[count defsp:select from params where tabname=`default; + .z.m.loginfo[`sorttab;"no sort parameters have been specified for: ",st,". using default parameters"]; + :defsp]; + .z.m.logwarn[`sorttab;"no sort parameters have been found for: ",st,". the table will not be sorted"]; + :0#params; + }; + +/ internal - log a sort failure without rethrowing so remaining partitions still run +sorterr:{[sc;dl;e] + / called as error handler in sortdir; best-effort — a single partition failure should not halt the run + .z.m.logerr[`sorttab;"failed to sort ",string[dl]," by these columns: ",(", " sv string sc),". the error was: ",e]; + :(); + }; + +/ internal - sort columns and apply attributes for a single on-disk partition directory +sortdir:{[sp;dloc] + / sort by columns flagged sort=1b, then apply attributes to columns with a non-null att + if[count sortcols:exec column from sp where sort, not null column; + .z.m.loginfo[`sorttab;"sorting ",string[dloc]," by these columns: ",", " sv string sortcols]; + .[xasc;(sortcols;dloc); + sorterr[sortcols;dloc]]]; + if[count attrcols:select column,att from sp where not null att; + applyattr[dloc;;]'[attrcols`column;attrcols`att]]; + }; + +/ internal - log an attribute application failure without rethrowing +attrerr:{[dl;cn;at;e] + / called as error handler in applyattr; logs failure and continues + .z.m.logerr[`applyattr;"unable to apply ",string[at]," attr to the ",string[cn]," column in ",string[dl],". the error was: ",e]; + :(); + }; + +/ internal - apply a single attribute to a specific column in an on-disk partition +applyattr:{[dloc;colname;att] + / null attributes are filtered upstream by sorttab; guard here for safety + if[null att; :()]; + .z.m.loginfo[`applyattr;"applying ",string[att]," attr to the ",string[colname]," column in ",string dloc]; + .[{@[x;y;z#]}; + (dloc;colname;att); + attrerr[dloc;colname;att]]; + }; diff --git a/di/sort/test.csv b/di/sort/test.csv new file mode 100644 index 00000000..f0ba2b14 --- /dev/null +++ b/di/sort/test.csv @@ -0,0 +1,117 @@ +action,ms,bytes,lang,code,repeat,minver,comment +before,0,0,q,srt:use`di.sort,1,1,load module +before,0,0,q,mylog:`info`warn`error!({[c;m]};{[c;m]};{[c;m]}),1,1,define no-op mock logger +before,0,0,q,"caplog:([] fn:`symbol$();ctx:`symbol$();msg:())",1,1,initialise log capture table +before,0,0,q,logcap:`info`warn`error!({[c;m] `caplog upsert(`info;c;m)};{[c;m] `caplog upsert(`warn;c;m)};{[c;m] `caplog upsert(`error;c;m)}),1,1,define capturing mock logger +before,0,0,q,"`:tmp_sort_autoload.csv 0: (""tabname,att,column,sort"";""default,p,sym,1"")",1,1,write autoload fixture csv +before,0,0,q,"`:tmp_sort_valid.csv 0: (""tabname,att,column,sort"";""trade,p,sym,1"";""trade,,time,0"";""default,p,sym,1"")",1,1,write valid sort csv +before,0,0,q,"`:tmp_sort_badcols.csv 0: (""tabname,attr,col,sort"";""trade,p,sym,1"")",1,1,write csv with bad column names +before,0,0,q,"`:tmp_sort_badatt.csv 0: (""tabname,att,column,sort"";""trade,z,sym,1"")",1,1,write csv with invalid attribute value +before,0,0,q,"`:tmp_sort_3col.csv 0: (""tabname,att,sort"";""trade,p,1"")",1,1,write csv with only 3 columns - missing column +before,0,0,q,"`:tmp_sort_empty.csv 0: enlist ""tabname,att,column,sort""",1,1,write header-only sort csv +before,0,0,q,"`:tmp_sort_nodfl.csv 0: (""tabname,att,column,sort"";""trade,p,sym,1"")",1,1,write sort csv with no default row +before,0,0,q,srt.init[`log`savedir!(mylog;`:tmp_sort_autoload.csv)],1,1,init with autoload savedir so params=0 before tests + +comment,,,,,,,init - dependency injection validation +fail,0,0,q,srt.init[(::)],1,1,init rejects :: as deps +fail,0,0,q,srt.init[enlist[`log]!enlist(::)],1,1,init rejects null log dep +fail,0,0,q,srt.init[()!()],1,1,init rejects empty dict +fail,0,0,q,srt.init[enlist[`other]!enlist mylog],1,1,init rejects dict missing log key +fail,0,0,q,srt.init[enlist[`log]!enlist 42],1,1,init rejects non-dict log value +true,0,0,q,"(7#@[srt.init;(::);{x}])~""di.sort""",1,1,error message is prefixed di.sort + +comment,,,,,,,getparams - initial state before getsortcsv +true,0,0,q,0=count srt.getparams[],1,1,params empty before getsortcsv +true,0,0,q,"([] tabname:`symbol$();att:`symbol$();column:`symbol$();sort:`boolean$())~srt.getparams[]",1,1,params has correct empty schema + +comment,,,,,,,sorttab - auto-load from savedir when params is empty +run,0,0,q,srt.sorttab[(`other;enlist`:/)] ,1,1,sorttab triggers getsortcsv auto-load when params is empty +true,0,0,q,1=count srt.getparams[],1,1,params populated via auto-load in sorttab + +comment,,,,,,,init - accept valid configs +run,0,0,q,srt.init[enlist[`log]!enlist mylog],1,1,init accepts valid log dep - savedir defaults to :sort.csv +run,0,0,q,srt.init[`log`savedir!(mylog;`:config/sort.csv)],1,1,init accepts savedir config key + +comment,,,,,,,getsortcsv - validation failures +fail,0,0,q,srt.getsortcsv[`:tmp_sort_badcols.csv],1,1,getsortcsv rejects unknown column names +fail,0,0,q,srt.getsortcsv[`:tmp_sort_badatt.csv],1,1,getsortcsv rejects unknown attribute values +fail,0,0,q,srt.getsortcsv[`:nonexistent_file.csv],1,1,getsortcsv errors on missing file +fail,0,0,q,srt.getsortcsv[`:tmp_sort_3col.csv],1,1,getsortcsv rejects csv with a missing required column + +comment,,,,,,,getsortcsv - empty csv (header only) +run,0,0,q,srt.getsortcsv[`:tmp_sort_empty.csv],1,1,getsortcsv loads header-only csv without error +true,0,0,q,0=count srt.getparams[],1,1,params is empty after loading header-only csv + +comment,,,,,,,getsortcsv - successful load +run,0,0,q,srt.getsortcsv[`:tmp_sort_valid.csv],1,1,getsortcsv loads valid csv without error + +comment,,,,,,,getparams - state after getsortcsv +true,0,0,q,3=count srt.getparams[],1,1,params has 3 rows after getsortcsv +true,0,0,q,`tabname`att`column`sort~cols srt.getparams[],1,1,params has correct column schema +true,0,0,q,`trade`trade`default~(srt.getparams[])`tabname,1,1,params tabnames match csv content +true,0,0,q,`p``p~(srt.getparams[])`att,1,1,params attributes match csv content +true,0,0,q,101b~(srt.getparams[])`sort,1,1,params sort flags match csv content + +comment,,,,,,,sorttab - table-specific params and edge cases +run,0,0,q,srt.sorttab[(`trade;enlist`:/)] ,1,1,sorttab uses table-specific params row not default fallback +run,0,0,q,srt.sorttab[(`trade;())],1,1,sorttab handles empty partition list without error + +comment,,,,,,,sorttab - no matching params and no default row +run,0,0,q,srt.getsortcsv[`:tmp_sort_nodfl.csv],1,1,load params with trade row only and no default +true,0,0,q,()~srt.sorttab[(`other;enlist`:/)] ,1,1,sorttab returns () when no params found and no default row + +comment,,,,,,,log call verification - init with capturing logger +run,0,0,q,srt.init[`log`savedir!(logcap;`:sort.csv)],1,1,switch to capturing logger +run,0,0,q,srt.getsortcsv[`:tmp_sort_valid.csv],1,1,reload valid params for log tests + +comment,,,,,,,log call verification - getsortcsv info logging on successful load +run,0,0,q,caplog:0#caplog,1,1,reset log capture +run,0,0,q,srt.getsortcsv[`:tmp_sort_valid.csv],1,1,call getsortcsv with capturing logger +true,0,0,q,2=count select from caplog where fn=`info,1,1,getsortcsv emits exactly two info entries +true,0,0,q,"any caplog[`msg] like ""*retrieving sort settings*""",1,1,getsortcsv logs retrieval start message +true,0,0,q,"any caplog[`msg] like ""*loaded 3 sort config rows*""",1,1,getsortcsv logs loaded row count +true,0,0,q,all (select from caplog where fn=`info)[`ctx]=`getsortcsv,1,1,getsortcsv info entries use getsortcsv context symbol +true,0,0,q,not any caplog[`fn]=`error,1,1,getsortcsv logs no errors on success + +comment,,,,,,,log call verification - getsortcsv error logging on file load failure +run,0,0,q,caplog:0#caplog,1,1,reset log capture +run,0,0,q,@[srt.getsortcsv;`:nonexistent_file.csv;{x}],1,1,trigger file load error +true,0,0,q,1=count select from caplog where fn=`error,1,1,getsortcsv logs exactly one error on file failure +true,0,0,q,"any caplog[`msg] like ""*failed to open*""",1,1,error log message mentions failed to open +true,0,0,q,`getsortcsv~first (select from caplog where fn=`error)[`ctx],1,1,file error is logged under getsortcsv context symbol + +comment,,,,,,,log call verification - sorttab info logging for table-specific params and partition operations +run,0,0,q,caplog:0#caplog,1,1,reset log capture +run,0,0,q,srt.sorttab[(`trade;enlist`:/)] ,1,1,sorttab with table-specific params +true,0,0,q,"any caplog[`msg] like ""*sorting the trade table*""",1,1,sorttab logs sort start +true,0,0,q,"any caplog[`msg] like ""*sort parameters have been retrieved for: trade*""",1,1,sorttab logs table-specific params lookup +true,0,0,q,"any caplog[`msg] like ""*finished sorting the trade table*""",1,1,sorttab logs sort completion +true,0,0,q,"any caplog[`msg] like ""*by these columns*""",1,1,sortdir logs sort column list +true,0,0,q,"any caplog[`msg] like ""*applying*""",1,1,applyattr logs attribute application +true,0,0,q,any (select from caplog where fn=`info)[`ctx]=`applyattr,1,1,applyattr info entries use applyattr context symbol + +comment,,,,,,,log call verification - sorttab error logging when partition operations fail +true,0,0,q,"any caplog[`msg] like ""*failed to sort*""",1,1,sorterr logs partition sort failure +true,0,0,q,any (select from caplog where fn=`error)[`ctx]=`sorttab,1,1,partition sort errors are logged under sorttab context symbol +true,0,0,q,"any caplog[`msg] like ""*unable to apply*""",1,1,attrerr logs attribute application failure + +comment,,,,,,,log call verification - sorttab info logging for default params fallback path +run,0,0,q,caplog:0#caplog,1,1,reset log capture +run,0,0,q,srt.sorttab[(`other;enlist`:/)] ,1,1,sorttab falls back to default params row +true,0,0,q,"any caplog[`msg] like ""*using default parameters*""",1,1,sorttab logs default params fallback + +comment,,,,,,,log call verification - sorttab warn logging when no params and no default row +run,0,0,q,srt.getsortcsv[`:tmp_sort_nodfl.csv],1,1,load params with no default row +run,0,0,q,caplog:0#caplog,1,1,reset log capture +run,0,0,q,srt.sorttab[(`other;enlist`:/)] ,1,1,sorttab with no matching params +true,0,0,q,any caplog[`fn]=`warn,1,1,sorttab emits a warn log when table has no sort config +true,0,0,q,"any caplog[`msg] like ""*will not be sorted*""",1,1,warn message indicates table will not be sorted +true,0,0,q,`sorttab~first (select from caplog where fn=`warn)[`ctx],1,1,warn is logged under sorttab context symbol + +after,0,0,q,hdel`:tmp_sort_valid.csv,1,1,remove valid sort csv +after,0,0,q,hdel`:tmp_sort_badcols.csv,1,1,remove bad cols csv +after,0,0,q,hdel`:tmp_sort_badatt.csv,1,1,remove bad att csv +after,0,0,q,hdel`:tmp_sort_autoload.csv,1,1,remove autoload fixture csv +after,0,0,q,hdel`:tmp_sort_3col.csv,1,1,remove 3-col fixture csv +after,0,0,q,hdel`:tmp_sort_empty.csv,1,1,remove empty fixture csv +after,0,0,q,hdel`:tmp_sort_nodfl.csv,1,1,remove no-default fixture csv From c5183f4656a4c5f941fced6095eb5b6b71ba3ed0 Mon Sep 17 00:00:00 2001 From: ascottDI Date: Mon, 15 Jun 2026 14:33:23 +0100 Subject: [PATCH 2/6] fixed small bug around log dependancy, added more tests to ensure correct sorting of on disk data --- di/sort/sort.q | 3 +++ di/sort/test.csv | 1 + 2 files changed, 4 insertions(+) diff --git a/di/sort/sort.q b/di/sort/sort.q index 2baf68ee..21e776ee 100644 --- a/di/sort/sort.q +++ b/di/sort/sort.q @@ -18,6 +18,9 @@ init:{[deps] '"di.sort: log dependency is required; pass `info`warn`error functions - ", "see di.log for a default implementation or refer to confluence documentation"; ]; + if[not all (`info`warn`error) in key logdict; + '"di.sort: log dict must have `info`warn`error keys; got: ",(", " sv string key logdict); + ]; .z.m.loginfo:logdict`info; .z.m.logwarn:logdict`warn; .z.m.logerr:logdict`error; diff --git a/di/sort/test.csv b/di/sort/test.csv index f0ba2b14..f532f1d2 100644 --- a/di/sort/test.csv +++ b/di/sort/test.csv @@ -18,6 +18,7 @@ fail,0,0,q,srt.init[enlist[`log]!enlist(::)],1,1,init rejects null log dep fail,0,0,q,srt.init[()!()],1,1,init rejects empty dict fail,0,0,q,srt.init[enlist[`other]!enlist mylog],1,1,init rejects dict missing log key fail,0,0,q,srt.init[enlist[`log]!enlist 42],1,1,init rejects non-dict log value +fail,0,0,q,srt.init[enlist[`log]!enlist(`info`warn!(mylog`info;mylog`warn))],1,1,init rejects log dict missing required key true,0,0,q,"(7#@[srt.init;(::);{x}])~""di.sort""",1,1,error message is prefixed di.sort comment,,,,,,,getparams - initial state before getsortcsv From 44d3e7e09468d5c10a790e15ef8640867120ed39 Mon Sep 17 00:00:00 2001 From: ascottDI Date: Mon, 15 Jun 2026 15:22:03 +0100 Subject: [PATCH 3/6] updating .md file --- di/sort/sort.md | 164 +++++++++++++++++++++++++++++++++++------------- 1 file changed, 119 insertions(+), 45 deletions(-) diff --git a/di/sort/sort.md b/di/sort/sort.md index b916d394..6bffd69f 100644 --- a/di/sort/sort.md +++ b/di/sort/sort.md @@ -1,72 +1,114 @@ # di.sort -Library for sorting and applying attributes to on-disk kdb+ tables. Driven by a configuration CSV that specifies which columns to sort and which attributes to apply per table. +Module for sorting and applying attributes to on-disk kdb+ tables. Driven by a configuration CSV (`sort.csv`) that specifies which columns to sort by and which attributes to apply per table. If a table has no explicit entry, `di.sort` falls back to a `default` row. Extracted from the `.sort` namespace in TorQ's `dbwriteutils.q`. -## Dependencies +## Usage -**Injectable (required):** -- `log` — `{[c;m]}` functions for `info`, `warn`, `error` +```q +srt:use`di.sort + +/ inject dependencies — log is required, savedir defaults to `:sort.csv +log:use`di.log +logdep:`info`warn`error!(log.info;log.warn;log.error) +srt.init[`log`savedir!(logdep;`:config/sort.csv)] + +/ load sort configuration +srt.getsortcsv[`:config/sort.csv] + +/ sort all partitions for a table +srt.sorttab[(`trade; `:/hdb/2000.01.01/trade`:/hdb/2000.01.02/trade)] -**Hard:** none +/ inspect loaded config +srt.getparams[] +``` -## Init +### Typical HDB sort loop ```q srt:use`di.sort - -/ log only - savedir defaults to `:sort.csv log:use`di.log logdep:`info`warn`error!(log.info;log.warn;log.error) -srt.init[enlist[`log]!enlist logdep] +srt.init[`log`savedir!(logdep;`:config/sort.csv)] + +/ collect partitions per table then sort each +tables:`trade`quote +dirs:raze {[t;hdb] ([] t:t; d:hdb,/:t) } [;`:/hdb] each tables +srt.sorttab each {(first x;exec d from x)} each value tables!dirs +``` -/ log + custom sort.csv path -srt.init[`log`savedir!(logdep; `:config/sort.csv)] +## sort.csv format -/ using a custom logger -mylog:`info`warn`error!( - {[c;m] -1 "INFO [",string[c],"] ",m;}; - {[c;m] -1 "WARN [",string[c],"] ",m;}; - {[c;m] -2 "ERROR [",string[c],"] ",m;}); -srt.init[`log`savedir!(mylog; `:config/sort.csv)] ``` +tabname,att,column,sort +trade,p,sym,1 +trade,s,time,1 +trade,,price,0 +quote,p,sym,1 +default,p,sym,1 +``` + +| Column | Type | Description | +|---|---|---| +| `tabname` | symbol | Table name, or `default` to apply to all unlisted tables | +| `att` | symbol | Attribute to apply after sort: `p` `s` `g` `u` or empty for none | +| `column` | symbol | Column to sort/attribute | +| `sort` | boolean | `1` to use this column as a sort key; `0` to apply attribute only | + +**Attributes:** -## Exported Functions +| Value | Description | +|---|---| +| `p` | Parted — all rows with the same value are contiguous. Requires the column to be sorted first (`sort=1`). | +| `s` | Sorted — values are in ascending order. Applied automatically by `xasc`; set explicitly here if wanted after the sort step. | +| `g` | Grouped — inverse index stored on disk. Suitable for low-to-medium cardinality unsorted columns. | +| `u` | Unique — all values are distinct. | +| ` ` (empty) | No attribute applied. Column may still participate in the sort if `sort=1`. | + +Multiple rows for the same table are supported. All rows with `sort=1` for a table are used as the compound sort key in the order they appear. Attribute rows with `sort=0` are applied independently after sorting. + +## API ### `init[deps]` -Wire injectable dependencies and optional config. Must be called before any other function. +Wire injectable dependencies and optional configuration. Must be called before any other function. | Key | Required | Type | Description | |---|---|---|---| -| `` `log `` | yes | dict | `info`, `warn`, `error` functions | -| `` `savedir `` | no | hsym | Path to sort.csv; defaults to `` `:sort.csv `` | +| `` `log `` | yes | dict | Functions keyed `` `info`warn`error ``, each with signature `{[ctx;msg]}` | +| `` `savedir `` | no | hsym | Path to sort.csv used as fallback when `sorttab` is called with an empty `params` table. Defaults to `` `:sort.csv ``. | + +Errors with prefix `di.sort:` if `log` is missing or does not contain all three required keys. -If `params` is empty when `sorttab` is called, it auto-loads from `savedir`. +```q +/ minimal — savedir defaults to `:sort.csv +srt.init[enlist[`log]!enlist logdep] + +/ with explicit sort.csv path +srt.init[`log`savedir!(logdep;`:config/sort.csv)] +``` + +--- ### `getsortcsv[file]` -Load and validate sort configuration from a CSV file. +Load and validate sort configuration from a CSV file. Populates the internal `params` table used by `sorttab`. | Parameter | Type | Description | |---|---|---| | `file` | hsym | Path to sort.csv | -**sort.csv format:** -``` -tabname,att,column,sort -trade,p,sym,1 -trade,s,time,1 -quote,p,sym,1 -quote,s,time,1 -default,p,sym,1 +Validates that: +- All four required columns (`tabname`, `att`, `column`, `sort`) are present +- No unrecognised column names exist +- All `att` values are one of `` ` `p`s`g`u `` + +Logs an info message on successful load and an error message on file-read failure (then rethrows). + +```q +srt.getsortcsv[`:config/sort.csv] ``` -| Column | Description | -|---|---| -| `tabname` | Table name, or `` `default `` to apply to all unlisted tables | -| `att` | Attribute to apply: `` `p`s`g`u `` or `` ` `` for none | -| `column` | Column to sort/attribute, or `` ` `` for no sort | -| `sort` | `1b` to sort by this column, `0b` to only apply attribute | +--- ### `sorttab[d]` @@ -74,15 +116,27 @@ Sort and apply attributes to on-disk partitions for a single table. | Parameter | Type | Description | |---|---|---| -| `d` | list | 2-element: `` (`tablename; `:/hdb/2000.01.01/trade`:/hdb/2000.01.02/trade) `` | +| `d` | 2-element list | `(tablename; partition_dirs)` where `partition_dirs` is an hsym or list of hsyms | -Looks up `params` by `tablename`, falls back to `default` row, skips if neither found. +Lookup order for sort parameters: +1. Rows where `tabname` matches the supplied table name +2. Rows where `tabname = \`default` +3. If neither found — logs a warn and returns `()` without error + +Each partition directory is processed independently. A failure on one partition is logged (as an error) and does not halt remaining partitions. + +If `params` is empty when `sorttab` is called, it auto-loads from the `savedir` set during `init`. ```q -srt.getsortcsv[`:config/sort.csv] +/ single partition +srt.sorttab[(`trade; enlist `:/hdb/2000.01.01/trade)] + +/ multiple partitions srt.sorttab[(`trade; `:/hdb/2000.01.01/trade`:/hdb/2000.01.02/trade)] ``` +--- + ### `getparams[]` Return the current sort configuration table loaded by `getsortcsv`. @@ -91,14 +145,34 @@ Return the current sort configuration table loaded by `getsortcsv`. srt.getparams[] ``` -## Example +Returns a table with schema `([] tabname:\`symbol$(); att:\`symbol$(); column:\`symbol$(); sort:\`boolean$())`. + +## Log dependency contract + +`di.sort` requires a log dependency dictionary with keys `` `info`warn`error ``, each a function with signature `{[ctx;msg]}`: + +```q +`info`warn`error!({[ctx;msg] ...};{[ctx;msg] ...};{[ctx;msg] ...}) +``` + +`di.log` satisfies this contract out of the box: ```q -srt:use`di.sort log:use`di.log logdep:`info`warn`error!(log.info;log.warn;log.error) - srt.init[enlist[`log]!enlist logdep] -srt.getsortcsv[`:config/sort.csv] -srt.sorttab[(`trade; `:/hdb/2000.01.01/trade`:/hdb/2000.01.02/trade)] ``` + +You can supply any custom implementation with the same signatures. + +Context symbols used by `di.sort` in log calls: + +| Context | Level | When | +|---|---|---| +| `` `getsortcsv `` | info | CSV retrieval start and row count on successful load | +| `` `getsortcsv `` | error | File read failure (rethrown after logging) | +| `` `sorttab `` | info | Sort start, params lookup result, column list, sort completion | +| `` `sorttab `` | warn | Table has no matching params and no default row | +| `` `sorttab `` | error | `xasc` failure on a partition (non-fatal — remaining partitions continue) | +| `` `applyattr `` | info | Attribute applied to a column | +| `` `applyattr `` | error | Attribute application failure (non-fatal) | From ff644017222d49ef3f03d60cb3badcbb010751ac Mon Sep 17 00:00:00 2001 From: ascottDI Date: Fri, 19 Jun 2026 10:19:52 +0100 Subject: [PATCH 4/6] Removed dependence on loading csv from disk, added in memory table functionality. Reduced number of public functions and ordering issues to reduce complexity --- di/sort/init.q | 2 +- di/sort/sort.md | 138 ++++++++++++++++++------------------ di/sort/sort.q | 178 +++++++++++++++++++++++++---------------------- di/sort/test.csv | 130 +++++++++++++++++----------------- 4 files changed, 228 insertions(+), 220 deletions(-) diff --git a/di/sort/init.q b/di/sort/init.q index 00bcdb84..e670c07f 100644 --- a/di/sort/init.q +++ b/di/sort/init.q @@ -2,4 +2,4 @@ \l ::sort.q -export:([init;getsortcsv;sorttab;getparams]) +export:([init;readcsv;sorttab]) diff --git a/di/sort/sort.md b/di/sort/sort.md index 6bffd69f..392ddf4d 100644 --- a/di/sort/sort.md +++ b/di/sort/sort.md @@ -1,152 +1,152 @@ # di.sort -Module for sorting and applying attributes to on-disk kdb+ tables. Driven by a configuration CSV (`sort.csv`) that specifies which columns to sort by and which attributes to apply per table. If a table has no explicit entry, `di.sort` falls back to a `default` row. Extracted from the `.sort` namespace in TorQ's `dbwriteutils.q`. +Module for sorting and applying attributes to on-disk kdb+ tables. Driven by a **config table** that specifies which columns to sort by and which attributes to apply per table. You pass that table straight to `sorttab`; if you keep your config in a CSV file, `readcsv` reads one into the right shape for you. If a table has no explicit entry, `di.sort` falls back to a `default` row. Extracted from the `.sort` namespace in TorQ's `dbwriteutils.q`. ## Usage ```q srt:use`di.sort -/ inject dependencies — log is required, savedir defaults to `:sort.csv +/ inject dependencies — log is required log:use`di.log logdep:`info`warn`error!(log.info;log.warn;log.error) -srt.init[`log`savedir!(logdep;`:config/sort.csv)] - -/ load sort configuration -srt.getsortcsv[`:config/sort.csv] +srt.init[enlist[`log]!enlist logdep] -/ sort all partitions for a table -srt.sorttab[(`trade; `:/hdb/2000.01.01/trade`:/hdb/2000.01.02/trade)] +/ build a config table directly ... +config:([] tabname:`trade`trade`default; att:`p``p; column:`sym`time`sym; sort:101b) +srt.sorttab[config; `trade; `:/hdb/2000.01.01/trade`:/hdb/2000.01.02/trade] -/ inspect loaded config -srt.getparams[] +/ ... or read the config from a csv and pass it straight in +srt.sorttab[srt.readcsv `:config/sort.csv; `trade; `:/hdb/2000.01.01/trade] ``` +`di.sort` holds no state of its own: every `sorttab` call takes the config it should use, so you can build that config however you like (by hand, from a query, or from a CSV) and reuse or vary it freely. + ### Typical HDB sort loop ```q srt:use`di.sort log:use`di.log logdep:`info`warn`error!(log.info;log.warn;log.error) -srt.init[`log`savedir!(logdep;`:config/sort.csv)] +srt.init[enlist[`log]!enlist logdep] + +config:srt.readcsv `:config/sort.csv -/ collect partitions per table then sort each -tables:`trade`quote -dirs:raze {[t;hdb] ([] t:t; d:hdb,/:t) } [;`:/hdb] each tables -srt.sorttab each {(first x;exec d from x)} each value tables!dirs +/ .Q.par[root;date;table] builds the on-disk partition path +hdb:`:/hdb +dates:2000.01.01 2000.01.02 +tabs:`trade`quote +/ for each table, build its partition paths, then sort every table with the same config +pdirs:{[hdb;dates;t] .Q.par[hdb;;t] each dates}[hdb;dates] each tabs +srt.sorttab[config]'[tabs; pdirs] ``` -## sort.csv format +## The config table -``` -tabname,att,column,sort -trade,p,sym,1 -trade,s,time,1 -trade,,price,0 -quote,p,sym,1 -default,p,sym,1 -``` +`di.sort` is configured with a table of this shape: | Column | Type | Description | |---|---|---| | `tabname` | symbol | Table name, or `default` to apply to all unlisted tables | -| `att` | symbol | Attribute to apply after sort: `p` `s` `g` `u` or empty for none | +| `att` | symbol | Attribute to apply after sort: `p` `s` `g` `u` or empty (`` ` ``) for none | | `column` | symbol | Column to sort/attribute | -| `sort` | boolean | `1` to use this column as a sort key; `0` to apply attribute only | +| `sort` | boolean | `1b` to use this column as a sort key; `0b` to apply an attribute only | + +Build it any way you like — in code, from a query, or from a CSV via `readcsv`: + +```q +([] tabname:`trade`trade`default; att:`p``p; column:`sym`time`sym; sort:101b) +``` + +Multiple rows for the same table are supported. All rows with `sort=1b` for a table form the compound sort key, in the order they appear. Attribute rows with `sort=0b` are applied independently after sorting. **Attributes:** | Value | Description | |---|---| -| `p` | Parted — all rows with the same value are contiguous. Requires the column to be sorted first (`sort=1`). | +| `p` | Parted — all rows with the same value are contiguous. Requires the column to be sorted first (`sort=1b`). | | `s` | Sorted — values are in ascending order. Applied automatically by `xasc`; set explicitly here if wanted after the sort step. | | `g` | Grouped — inverse index stored on disk. Suitable for low-to-medium cardinality unsorted columns. | | `u` | Unique — all values are distinct. | -| ` ` (empty) | No attribute applied. Column may still participate in the sort if `sort=1`. | +| ` ` (empty) | No attribute applied. Column may still participate in the sort if `sort=1b`. | + +### sort.csv format (for `readcsv`) -Multiple rows for the same table are supported. All rows with `sort=1` for a table are used as the compound sort key in the order they appear. Attribute rows with `sort=0` are applied independently after sorting. +A CSV consumed by `readcsv` must have these four columns, in this order: + +``` +tabname,att,column,sort +trade,p,sym,1 +trade,,time,0 +quote,p,sym,1 +default,p,sym,1 +``` ## API ### `init[deps]` -Wire injectable dependencies and optional configuration. Must be called before any other function. +Wire injectable dependencies. Must be called before any other function. | Key | Required | Type | Description | |---|---|---|---| | `` `log `` | yes | dict | Functions keyed `` `info`warn`error ``, each with signature `{[ctx;msg]}` | -| `` `savedir `` | no | hsym | Path to sort.csv used as fallback when `sorttab` is called with an empty `params` table. Defaults to `` `:sort.csv ``. | -Errors with prefix `di.sort:` if `log` is missing or does not contain all three required keys. +Errors with prefix `di.sort:` if `deps` is not a dict, if `log` is missing, or if the log dict does not contain all three required keys. ```q -/ minimal — savedir defaults to `:sort.csv srt.init[enlist[`log]!enlist logdep] - -/ with explicit sort.csv path -srt.init[`log`savedir!(logdep;`:config/sort.csv)] ``` --- -### `getsortcsv[file]` +### `readcsv[file]` -Load and validate sort configuration from a CSV file. Populates the internal `params` table used by `sorttab`. +Read a config CSV and **return** it as a table. Does not store it — pass the result to `sorttab`. Use this only when your config lives in a CSV; a hand-built table goes straight to `sorttab`. | Parameter | Type | Description | |---|---|---| -| `file` | hsym | Path to sort.csv | - -Validates that: -- All four required columns (`tabname`, `att`, `column`, `sort`) are present -- No unrecognised column names exist -- All `att` values are one of `` ` `p`s`g`u `` +| `file` | hsym (or symbol) | Path to the CSV. Coerced with `hsym`, so `` `:config/sort.csv `` and `` `config/sort.csv `` both work. | -Logs an info message on successful load and an error message on file-read failure (then rethrows). +The CSV must have the four columns `tabname,att,column,sort` in that order. Logs an info message on read and an error message on file-read failure (then rethrows). Content validation (column names, attribute values) happens in `sorttab`. ```q -srt.getsortcsv[`:config/sort.csv] +config:srt.readcsv `:config/sort.csv +srt.sorttab[config; `trade; dirs] + +/ or in one line +srt.sorttab[srt.readcsv `:config/sort.csv; `trade; dirs] ``` --- -### `sorttab[d]` +### `sorttab[config;tabname;dirs]` -Sort and apply attributes to on-disk partitions for a single table. +Sort and apply attributes to on-disk partitions for a single table, using the supplied config table. | Parameter | Type | Description | |---|---|---| -| `d` | 2-element list | `(tablename; partition_dirs)` where `partition_dirs` is an hsym or list of hsyms | +| `config` | table | A config table with columns `` `tabname`att`column`sort `` (see [The config table](#the-config-table)) | +| `tabname` | symbol | Table name | +| `dirs` | hsym, or list of hsyms | Partition directory (or directories) for that table | + +`config` is validated first; `sorttab` errors (prefixed `di.sort:`) if it is not a table, has unknown or missing columns, has a non-boolean `sort` column, or has an `att` value outside `` ` `p`s`g`u ``. -Lookup order for sort parameters: +Lookup order for sort parameters within `config`: 1. Rows where `tabname` matches the supplied table name 2. Rows where `tabname = \`default` 3. If neither found — logs a warn and returns `()` without error -Each partition directory is processed independently. A failure on one partition is logged (as an error) and does not halt remaining partitions. - -If `params` is empty when `sorttab` is called, it auto-loads from the `savedir` set during `init`. +Each partition directory is processed independently: a failure on one partition is logged (as an error) and does not halt remaining partitions. ```q / single partition -srt.sorttab[(`trade; enlist `:/hdb/2000.01.01/trade)] +srt.sorttab[config; `trade; enlist `:/hdb/2000.01.01/trade] / multiple partitions -srt.sorttab[(`trade; `:/hdb/2000.01.01/trade`:/hdb/2000.01.02/trade)] +srt.sorttab[config; `trade; `:/hdb/2000.01.01/trade`:/hdb/2000.01.02/trade] ``` ---- - -### `getparams[]` - -Return the current sort configuration table loaded by `getsortcsv`. - -```q -srt.getparams[] -``` - -Returns a table with schema `([] tabname:\`symbol$(); att:\`symbol$(); column:\`symbol$(); sort:\`boolean$())`. - ## Log dependency contract `di.sort` requires a log dependency dictionary with keys `` `info`warn`error ``, each a function with signature `{[ctx;msg]}`: @@ -169,8 +169,8 @@ Context symbols used by `di.sort` in log calls: | Context | Level | When | |---|---|---| -| `` `getsortcsv `` | info | CSV retrieval start and row count on successful load | -| `` `getsortcsv `` | error | File read failure (rethrown after logging) | +| `` `readcsv `` | info | CSV read start and row count on successful read | +| `` `readcsv `` | error | File read failure (rethrown after logging) | | `` `sorttab `` | info | Sort start, params lookup result, column list, sort completion | | `` `sorttab `` | warn | Table has no matching params and no default row | | `` `sorttab `` | error | `xasc` failure on a partition (non-fatal — remaining partitions continue) | diff --git a/di/sort/sort.q b/di/sort/sort.q index 21e776ee..175fefcc 100644 --- a/di/sort/sort.q +++ b/di/sort/sort.q @@ -1,121 +1,133 @@ / library for sorting and applying attributes to on-disk kdb+ tables -/ sort configuration table - populated by getsortcsv, read via getparams -params:([] tabname:`symbol$(); att:`symbol$(); column:`symbol$(); sort:`boolean$()); - -/ valid attribute symbols accepted in sort.csv att column +/ attributes that may legitimately be applied on disk (empty leaves a column unattributed) validatts:``p`s`g`u; init:{[deps] - / wire injectable log dependency (required) and optional config from deps dict - / optional keys: `savedir - hsym path to sort.csv, defaults to `:sort.csv - logdict:$[99h=type deps; - $[`log in key deps; - $[99h=type deps`log; deps`log; ()!()]; - ()!()]; - ()!()]; - if[not count logdict; - '"di.sort: log dependency is required; pass `info`warn`error functions - ", - "see di.log for a default implementation or refer to confluence documentation"; - ]; - if[not all (`info`warn`error) in key logdict; - '"di.sort: log dict must have `info`warn`error keys; got: ",(", " sv string key logdict); - ]; - .z.m.loginfo:logdict`info; - .z.m.logwarn:logdict`warn; - .z.m.logerr:logdict`error; - .z.m.defaultfile:$[`savedir in key deps; deps`savedir; `:sort.csv]; + / wire the injectable log dependency so the module reports through the host's logger + / deps: a dict with a `log key -> `info`warn`error!({[c;m]};{[c;m]};{[c;m]}) + / see di.log for a default implementation, or pass any matching dict + / example: srt.init[enlist[`log]!enlist logdep] + if[99h<>type deps; + '"di.sort: deps must be a dict with a `log key; see di.log for a default logger"]; + if[not `log in key deps; + '"di.sort: log dependency is required; pass `info`warn`error functions keyed on `log"]; + if[99h<>type deps`log; + '"di.sort: log value must be a dict of `info`warn`error functions"]; + if[not all (`info`warn`error) in key deps`log; + '"di.sort: log dict must have `info`warn`error keys; got: ",(", " sv string key deps`log)]; + .z.m.log:deps`log; }; -/ internal - log and rethrow a csv load error -csvloaderr:{[f;e] - / logs the failure then rethrows so getsortcsv surfaces the error to the caller - .z.m.logerr[`getsortcsv;"failed to open ",string[f],". the error was: ",e]; - '"failed to open ",string[f],". the error was: ",e; +readcsv:{[file] + / convenience for the common case where config lives in a csv - returns it, does not store + / pass the result to sorttab, e.g. srt.sorttab[srt.readcsv `:sort.csv;`trade;dirs] + / the csv must have columns tabname,att,column,sort in that order + file:hsym file; + t:@[readfile; file; readerr[file]]; + .z.m.log[`info][`readcsv;"read ",(string count t)," sort param row(s) from ",string file]; + :t; }; -getsortcsv:{[file] - / load and validate sort configuration from a CSV file; populates the params table - file:hsym file; - rawparams:@[ - {.z.m.loginfo[`getsortcsv;"retrieving sort settings from ",string x]; ("SSSB";enlist",")0:x}; - file; - csvloaderr[file] - ]; - spc:cols rawparams; - badcols:spc where not spc in `tabname`att`column`sort; - if[count badcols; - '"unrecognised columns (",(", " sv string badcols),") in ",string file]; - missingcols:(`tabname`att`column`sort) where not (`tabname`att`column`sort) in spc; - if[count missingcols; - '"missing required columns (",(", " sv string missingcols),") in ",string file]; - at:distinct rawparams`att; - badatts:at where not at in validatts; - if[count badatts; - '"unrecognised type of attribute - ",(", " sv string badatts)]; - .z.m.params:rawparams; - .z.m.loginfo[`getsortcsv;"loaded ",(string count rawparams)," sort config rows from ",string file]; +/ internal - read and parse a sort-config csv (the protected action in readcsv) +readfile:{[file] + / kept named rather than inline so readcsv reads cleanly and matches the style guide + .z.m.log[`info][`readcsv;"reading sort params from ",string file]; + :("SSSB";enlist",") 0: file; + }; + +/ internal - log and rethrow a csv read failure +readerr:{[file;e] + / surfaces the failure to the caller after logging it under the readcsv context + .z.m.log[`error][`readcsv;"failed to read ",string[file],": ",e]; + '"failed to read ",string[file],": ",e; }; -sorttab:{[d] - / sort and apply attributes to on-disk partitions for one table - st:string t:first d; - if[0=count params; getsortcsv[.z.m.defaultfile]]; - .z.m.loginfo[`sorttab;"sorting the ",st," table"]; - sp:getsortparams[t;st]; +sorttab:{[config;tabname;dirs] + / sort and apply attributes to the on-disk partition directories for one table + / config: a sort-config table (build it directly or via readcsv); tabname: symbol; dirs: hsym or list of hsyms + / example: srt.sorttab[srt.readcsv `:sort.csv;`trade;`:/hdb/2024.01.01/trade] + checkconfig config; + st:string tabname; + .z.m.log[`info][`sorttab;"sorting the ",st," table"]; + sp:getsortparams[config;tabname;st]; if[not count sp; :()]; - sortdir[sp] each distinct (),last d; - .z.m.loginfo[`sorttab;"finished sorting the ",st," table"]; + sortdir[sp] each distinct (),dirs; + .z.m.log[`info][`sorttab;"finished sorting the ",st," table"]; }; -getparams:{[] - / return the current sort configuration table - :params; +/ internal - validate a sort-config table, signalling a clear error if it is malformed +checkconfig:{[t] + / guards every sorttab call so a hand-built or csv-derived table is rejected early if wrong + if[98h<>type t; + '"di.sort: config must be a table with columns `tabname`att`column`sort"]; + c:cols t; + badcols:c where not c in `tabname`att`column`sort; + if[count badcols; + '"di.sort: unrecognised config column(s): ",", " sv string badcols]; + missingcols:(`tabname`att`column`sort) where not (`tabname`att`column`sort) in c; + if[count missingcols; + '"di.sort: missing required config column(s): ",", " sv string missingcols]; + if[not 1h=type t`sort; + '"di.sort: the sort column must be boolean"]; + badatts:at where not (at:distinct t`att) in validatts; + if[count badatts; + '"di.sort: unrecognised attribute(s) in att column: ",", " sv string badatts]; }; -/ internal - look up sort params for a table, falling back to the default row -getsortparams:{[t;st] - / return table-specific params, then default row, then empty table if neither found - if[count tabsp:select from params where tabname=t; - .z.m.loginfo[`sorttab;"sort parameters have been retrieved for: ",st]; - :tabsp]; - if[count defsp:select from params where tabname=`default; - .z.m.loginfo[`sorttab;"no sort parameters have been specified for: ",st,". using default parameters"]; - :defsp]; - .z.m.logwarn[`sorttab;"no sort parameters have been found for: ",st,". the table will not be sorted"]; - :0#params; +/ internal - log a sorttab message then return the resolved rows +logreturn:{[lvl;msg;rows] + / keeps each branch body in getsortparams to a single statement + .z.m.log[lvl][`sorttab;msg]; + :rows; + }; + +/ internal - resolve which config rows apply to a table +getsortparams:{[config;t;st] + / a table uses its own rows; unlisted tables fall back to the default row, else are skipped + if[count tabsp:select from config where tabname=t; + :logreturn[`info;"sort parameters have been retrieved for: ",st;tabsp]]; + if[count defsp:select from config where tabname=`default; + :logreturn[`info;"no sort parameters have been specified for: ",st,". using default parameters";defsp]]; + :logreturn[`warn;"no sort parameters have been found for: ",st,". the table will not be sorted";0#config]; }; / internal - log a sort failure without rethrowing so remaining partitions still run sorterr:{[sc;dl;e] - / called as error handler in sortdir; best-effort — a single partition failure should not halt the run - .z.m.logerr[`sorttab;"failed to sort ",string[dl]," by these columns: ",(", " sv string sc),". the error was: ",e]; + / a single partition failure should not halt the whole run + .z.m.log[`error][`sorttab;"failed to sort ",string[dl]," by these columns: ",(", " sv string sc),". the error was: ",e]; :(); }; +/ internal - sort one partition directory by the given columns +sortcolumns:{[dloc;sortcols] + / split out of sortdir so the conditional body there stays a single statement + .z.m.log[`info][`sorttab;"sorting ",string[dloc]," by these columns: ",", " sv string sortcols]; + .[xasc;(sortcols;dloc); + sorterr[sortcols;dloc]]; + }; + / internal - sort columns and apply attributes for a single on-disk partition directory sortdir:{[sp;dloc] - / sort by columns flagged sort=1b, then apply attributes to columns with a non-null att - if[count sortcols:exec column from sp where sort, not null column; - .z.m.loginfo[`sorttab;"sorting ",string[dloc]," by these columns: ",", " sv string sortcols]; - .[xasc;(sortcols;dloc); - sorterr[sortcols;dloc]]]; - if[count attrcols:select column,att from sp where not null att; - applyattr[dloc;;]'[attrcols`column;attrcols`att]]; + / sort by the columns flagged sort=1b, then attribute the columns that request one + sortcols:exec column from sp where sort, not null column; + if[count sortcols; sortcolumns[dloc;sortcols]]; + attrcols:select column,att from sp where not null att; + if[count attrcols; applyattr[dloc;;]'[attrcols`column;attrcols`att]]; }; / internal - log an attribute application failure without rethrowing attrerr:{[dl;cn;at;e] - / called as error handler in applyattr; logs failure and continues - .z.m.logerr[`applyattr;"unable to apply ",string[at]," attr to the ",string[cn]," column in ",string[dl],". the error was: ",e]; + / logs failure and continues so other columns and partitions still get processed + .z.m.log[`error][`applyattr;"unable to apply ",string[at]," attr to the ",string[cn]," column in ",string[dl],". the error was: ",e]; :(); }; / internal - apply a single attribute to a specific column in an on-disk partition applyattr:{[dloc;colname;att] - / null attributes are filtered upstream by sorttab; guard here for safety + / sortdir only passes non-null atts; guard here in case applyattr is ever called directly if[null att; :()]; - .z.m.loginfo[`applyattr;"applying ",string[att]," attr to the ",string[colname]," column in ",string dloc]; + .z.m.log[`info][`applyattr;"applying ",string[att]," attr to the ",string[colname]," column in ",string dloc]; .[{@[x;y;z#]}; (dloc;colname;att); attrerr[dloc;colname;att]]; diff --git a/di/sort/test.csv b/di/sort/test.csv index f532f1d2..ddb924c8 100644 --- a/di/sort/test.csv +++ b/di/sort/test.csv @@ -3,87 +3,85 @@ before,0,0,q,srt:use`di.sort,1,1,load module before,0,0,q,mylog:`info`warn`error!({[c;m]};{[c;m]};{[c;m]}),1,1,define no-op mock logger before,0,0,q,"caplog:([] fn:`symbol$();ctx:`symbol$();msg:())",1,1,initialise log capture table before,0,0,q,logcap:`info`warn`error!({[c;m] `caplog upsert(`info;c;m)};{[c;m] `caplog upsert(`warn;c;m)};{[c;m] `caplog upsert(`error;c;m)}),1,1,define capturing mock logger -before,0,0,q,"`:tmp_sort_autoload.csv 0: (""tabname,att,column,sort"";""default,p,sym,1"")",1,1,write autoload fixture csv before,0,0,q,"`:tmp_sort_valid.csv 0: (""tabname,att,column,sort"";""trade,p,sym,1"";""trade,,time,0"";""default,p,sym,1"")",1,1,write valid sort csv before,0,0,q,"`:tmp_sort_badcols.csv 0: (""tabname,attr,col,sort"";""trade,p,sym,1"")",1,1,write csv with bad column names before,0,0,q,"`:tmp_sort_badatt.csv 0: (""tabname,att,column,sort"";""trade,z,sym,1"")",1,1,write csv with invalid attribute value before,0,0,q,"`:tmp_sort_3col.csv 0: (""tabname,att,sort"";""trade,p,1"")",1,1,write csv with only 3 columns - missing column before,0,0,q,"`:tmp_sort_empty.csv 0: enlist ""tabname,att,column,sort""",1,1,write header-only sort csv before,0,0,q,"`:tmp_sort_nodfl.csv 0: (""tabname,att,column,sort"";""trade,p,sym,1"")",1,1,write sort csv with no default row -before,0,0,q,srt.init[`log`savedir!(mylog;`:tmp_sort_autoload.csv)],1,1,init with autoload savedir so params=0 before tests +before,0,0,q,srt.init[enlist[`log]!enlist mylog],1,1,init with no-op logger +before,0,0,q,cfg:([] tabname:`trade`trade`default; att:`p``p; column:`sym`time`sym; sort:101b),1,1,reusable valid config table (hand-built) +before,0,0,q,nodflcfg:srt.readcsv `:tmp_sort_nodfl.csv,1,1,reusable config with a trade row but no default -comment,,,,,,,init - dependency injection validation +comment,,,,,,,init - dependency injection validation (type errors / empty / null) fail,0,0,q,srt.init[(::)],1,1,init rejects :: as deps -fail,0,0,q,srt.init[enlist[`log]!enlist(::)],1,1,init rejects null log dep +fail,0,0,q,srt.init[42],1,1,init rejects a non-dict deps value fail,0,0,q,srt.init[()!()],1,1,init rejects empty dict fail,0,0,q,srt.init[enlist[`other]!enlist mylog],1,1,init rejects dict missing log key +fail,0,0,q,srt.init[enlist[`log]!enlist(::)],1,1,init rejects null log dep fail,0,0,q,srt.init[enlist[`log]!enlist 42],1,1,init rejects non-dict log value fail,0,0,q,srt.init[enlist[`log]!enlist(`info`warn!(mylog`info;mylog`warn))],1,1,init rejects log dict missing required key true,0,0,q,"(7#@[srt.init;(::);{x}])~""di.sort""",1,1,error message is prefixed di.sort - -comment,,,,,,,getparams - initial state before getsortcsv -true,0,0,q,0=count srt.getparams[],1,1,params empty before getsortcsv -true,0,0,q,"([] tabname:`symbol$();att:`symbol$();column:`symbol$();sort:`boolean$())~srt.getparams[]",1,1,params has correct empty schema - -comment,,,,,,,sorttab - auto-load from savedir when params is empty -run,0,0,q,srt.sorttab[(`other;enlist`:/)] ,1,1,sorttab triggers getsortcsv auto-load when params is empty -true,0,0,q,1=count srt.getparams[],1,1,params populated via auto-load in sorttab - -comment,,,,,,,init - accept valid configs -run,0,0,q,srt.init[enlist[`log]!enlist mylog],1,1,init accepts valid log dep - savedir defaults to :sort.csv -run,0,0,q,srt.init[`log`savedir!(mylog;`:config/sort.csv)],1,1,init accepts savedir config key - -comment,,,,,,,getsortcsv - validation failures -fail,0,0,q,srt.getsortcsv[`:tmp_sort_badcols.csv],1,1,getsortcsv rejects unknown column names -fail,0,0,q,srt.getsortcsv[`:tmp_sort_badatt.csv],1,1,getsortcsv rejects unknown attribute values -fail,0,0,q,srt.getsortcsv[`:nonexistent_file.csv],1,1,getsortcsv errors on missing file -fail,0,0,q,srt.getsortcsv[`:tmp_sort_3col.csv],1,1,getsortcsv rejects csv with a missing required column - -comment,,,,,,,getsortcsv - empty csv (header only) -run,0,0,q,srt.getsortcsv[`:tmp_sort_empty.csv],1,1,getsortcsv loads header-only csv without error -true,0,0,q,0=count srt.getparams[],1,1,params is empty after loading header-only csv - -comment,,,,,,,getsortcsv - successful load -run,0,0,q,srt.getsortcsv[`:tmp_sort_valid.csv],1,1,getsortcsv loads valid csv without error - -comment,,,,,,,getparams - state after getsortcsv -true,0,0,q,3=count srt.getparams[],1,1,params has 3 rows after getsortcsv -true,0,0,q,`tabname`att`column`sort~cols srt.getparams[],1,1,params has correct column schema -true,0,0,q,`trade`trade`default~(srt.getparams[])`tabname,1,1,params tabnames match csv content -true,0,0,q,`p``p~(srt.getparams[])`att,1,1,params attributes match csv content -true,0,0,q,101b~(srt.getparams[])`sort,1,1,params sort flags match csv content - -comment,,,,,,,sorttab - table-specific params and edge cases -run,0,0,q,srt.sorttab[(`trade;enlist`:/)] ,1,1,sorttab uses table-specific params row not default fallback -run,0,0,q,srt.sorttab[(`trade;())],1,1,sorttab handles empty partition list without error - -comment,,,,,,,sorttab - no matching params and no default row -run,0,0,q,srt.getsortcsv[`:tmp_sort_nodfl.csv],1,1,load params with trade row only and no default -true,0,0,q,()~srt.sorttab[(`other;enlist`:/)] ,1,1,sorttab returns () when no params found and no default row - -comment,,,,,,,log call verification - init with capturing logger -run,0,0,q,srt.init[`log`savedir!(logcap;`:sort.csv)],1,1,switch to capturing logger -run,0,0,q,srt.getsortcsv[`:tmp_sort_valid.csv],1,1,reload valid params for log tests - -comment,,,,,,,log call verification - getsortcsv info logging on successful load +run,0,0,q,srt.init[enlist[`log]!enlist mylog],1,1,re-init with valid log dep + +comment,,,,,,,readcsv - returns a config table from a csv +true,0,0,q,3=count srt.readcsv `:tmp_sort_valid.csv,1,1,readcsv returns a 3-row config table +true,0,0,q,`tabname`att`column`sort~cols srt.readcsv `:tmp_sort_valid.csv,1,1,readcsv table has the correct column schema +true,0,0,q,(srt.readcsv `:tmp_sort_valid.csv)~([] tabname:`trade`trade`default; att:`p``p; column:`sym`time`sym; sort:101b),1,1,readcsv round-trips the csv content +true,0,0,q,0=count srt.readcsv `:tmp_sort_empty.csv,1,1,readcsv returns an empty table for a header-only csv + +comment,,,,,,,readcsv - input failures (missing file / type error) +fail,0,0,q,srt.readcsv `:nonexistent_file.csv,1,1,readcsv errors on a missing file +fail,0,0,q,srt.readcsv 42,1,1,readcsv errors on a non-symbol file argument + +comment,,,,,,,sorttab - happy path via both a hand-built table and a csv +run,0,0,q,srt.sorttab[cfg;`trade;()],1,1,sorttab accepts a hand-built config table +run,0,0,q,srt.sorttab[srt.readcsv `:tmp_sort_valid.csv;`trade;()],1,1,sorttab accepts a config table read from a csv + +comment,,,,,,,sorttab - config validation (type errors / bad content) +fail,0,0,q,srt.sorttab[42;`trade;enlist`:/],1,1,sorttab rejects a non-table config +fail,0,0,q,srt.sorttab[([] tabname:enlist`trade; bad:enlist`p; column:enlist`sym; sort:enlist 1b);`trade;enlist`:/],1,1,sorttab rejects unrecognised config columns +fail,0,0,q,srt.sorttab[([] tabname:enlist`trade; att:enlist`p; column:enlist`sym);`trade;enlist`:/],1,1,sorttab rejects a missing required config column +fail,0,0,q,srt.sorttab[([] tabname:enlist`trade; att:enlist`z; column:enlist`sym; sort:enlist 1b);`trade;enlist`:/],1,1,sorttab rejects unknown attribute values +fail,0,0,q,srt.sorttab[([] tabname:enlist`trade; att:enlist`p; column:enlist`sym; sort:enlist 1);`trade;enlist`:/],1,1,sorttab rejects a non-boolean sort column +fail,0,0,q,srt.sorttab[srt.readcsv `:tmp_sort_badcols.csv;`trade;enlist`:/],1,1,sorttab rejects csv config with unknown columns +fail,0,0,q,srt.sorttab[srt.readcsv `:tmp_sort_badatt.csv;`trade;enlist`:/],1,1,sorttab rejects csv config with invalid attributes +fail,0,0,q,srt.sorttab[srt.readcsv `:tmp_sort_3col.csv;`trade;enlist`:/],1,1,sorttab rejects csv config missing a required column + +comment,,,,,,,sorttab - edge cases (empty config / null att / empty dirs / atom dir / type error) +true,0,0,q,()~srt.sorttab[([] tabname:`symbol$();att:`symbol$();column:`symbol$();sort:`boolean$());`trade;enlist`:/],1,1,empty config yields no work and returns () +run,0,0,q,srt.sorttab[([] tabname:enlist`trade; att:enlist`; column:enlist`sym; sort:enlist 0b);`trade;enlist`:/],1,1,sorttab accepts a null (no) attribute row +run,0,0,q,srt.sorttab[cfg;`trade;()],1,1,sorttab handles an empty partition list without error +run,0,0,q,srt.sorttab[cfg;`trade;`:/],1,1,sorttab accepts a single atom hsym dir not just a list +fail,0,0,q,srt.sorttab[cfg;42;enlist`:/],1,1,sorttab errors on a non-symbol table name + +comment,,,,,,,sorttab - params resolution (table-specific / default / none) +run,0,0,q,srt.sorttab[cfg;`trade;enlist`:/],1,1,sorttab uses the table-specific params row +run,0,0,q,srt.sorttab[cfg;`other;enlist`:/],1,1,sorttab falls back to the default params row +true,0,0,q,()~srt.sorttab[nodflcfg;`other;enlist`:/],1,1,sorttab returns () when no params found and no default row + +comment,,,,,,,log call verification - switch to capturing logger +run,0,0,q,srt.init[enlist[`log]!enlist logcap],1,1,re-init with capturing logger + +comment,,,,,,,log call verification - readcsv info logging on successful read run,0,0,q,caplog:0#caplog,1,1,reset log capture -run,0,0,q,srt.getsortcsv[`:tmp_sort_valid.csv],1,1,call getsortcsv with capturing logger -true,0,0,q,2=count select from caplog where fn=`info,1,1,getsortcsv emits exactly two info entries -true,0,0,q,"any caplog[`msg] like ""*retrieving sort settings*""",1,1,getsortcsv logs retrieval start message -true,0,0,q,"any caplog[`msg] like ""*loaded 3 sort config rows*""",1,1,getsortcsv logs loaded row count -true,0,0,q,all (select from caplog where fn=`info)[`ctx]=`getsortcsv,1,1,getsortcsv info entries use getsortcsv context symbol -true,0,0,q,not any caplog[`fn]=`error,1,1,getsortcsv logs no errors on success - -comment,,,,,,,log call verification - getsortcsv error logging on file load failure +run,0,0,q,srt.readcsv `:tmp_sort_valid.csv,1,1,call readcsv with capturing logger +true,0,0,q,2=count select from caplog where fn=`info,1,1,readcsv emits exactly two info entries +true,0,0,q,"any caplog[`msg] like ""*reading sort params*""",1,1,readcsv logs read start message +true,0,0,q,"any caplog[`msg] like ""*read 3 sort param*""",1,1,readcsv logs the row count read +true,0,0,q,all (select from caplog where fn=`info)[`ctx]=`readcsv,1,1,readcsv info entries use readcsv context symbol +true,0,0,q,not any caplog[`fn]=`error,1,1,readcsv logs no errors on success + +comment,,,,,,,log call verification - readcsv error logging on file read failure run,0,0,q,caplog:0#caplog,1,1,reset log capture -run,0,0,q,@[srt.getsortcsv;`:nonexistent_file.csv;{x}],1,1,trigger file load error -true,0,0,q,1=count select from caplog where fn=`error,1,1,getsortcsv logs exactly one error on file failure -true,0,0,q,"any caplog[`msg] like ""*failed to open*""",1,1,error log message mentions failed to open -true,0,0,q,`getsortcsv~first (select from caplog where fn=`error)[`ctx],1,1,file error is logged under getsortcsv context symbol +run,0,0,q,@[srt.readcsv;`:nonexistent_file.csv;{x}],1,1,trigger file read error +true,0,0,q,1=count select from caplog where fn=`error,1,1,readcsv logs exactly one error on file failure +true,0,0,q,"any caplog[`msg] like ""*failed to read*""",1,1,error log message mentions failed to read +true,0,0,q,`readcsv~first (select from caplog where fn=`error)[`ctx],1,1,file error is logged under readcsv context symbol comment,,,,,,,log call verification - sorttab info logging for table-specific params and partition operations run,0,0,q,caplog:0#caplog,1,1,reset log capture -run,0,0,q,srt.sorttab[(`trade;enlist`:/)] ,1,1,sorttab with table-specific params +run,0,0,q,srt.sorttab[cfg;`trade;enlist`:/],1,1,sorttab with table-specific params true,0,0,q,"any caplog[`msg] like ""*sorting the trade table*""",1,1,sorttab logs sort start true,0,0,q,"any caplog[`msg] like ""*sort parameters have been retrieved for: trade*""",1,1,sorttab logs table-specific params lookup true,0,0,q,"any caplog[`msg] like ""*finished sorting the trade table*""",1,1,sorttab logs sort completion @@ -98,13 +96,12 @@ true,0,0,q,"any caplog[`msg] like ""*unable to apply*""",1,1,attrerr logs attrib comment,,,,,,,log call verification - sorttab info logging for default params fallback path run,0,0,q,caplog:0#caplog,1,1,reset log capture -run,0,0,q,srt.sorttab[(`other;enlist`:/)] ,1,1,sorttab falls back to default params row +run,0,0,q,srt.sorttab[cfg;`other;enlist`:/],1,1,sorttab falls back to default params row true,0,0,q,"any caplog[`msg] like ""*using default parameters*""",1,1,sorttab logs default params fallback comment,,,,,,,log call verification - sorttab warn logging when no params and no default row -run,0,0,q,srt.getsortcsv[`:tmp_sort_nodfl.csv],1,1,load params with no default row run,0,0,q,caplog:0#caplog,1,1,reset log capture -run,0,0,q,srt.sorttab[(`other;enlist`:/)] ,1,1,sorttab with no matching params +run,0,0,q,srt.sorttab[nodflcfg;`other;enlist`:/],1,1,sorttab with no matching params true,0,0,q,any caplog[`fn]=`warn,1,1,sorttab emits a warn log when table has no sort config true,0,0,q,"any caplog[`msg] like ""*will not be sorted*""",1,1,warn message indicates table will not be sorted true,0,0,q,`sorttab~first (select from caplog where fn=`warn)[`ctx],1,1,warn is logged under sorttab context symbol @@ -112,7 +109,6 @@ true,0,0,q,`sorttab~first (select from caplog where fn=`warn)[`ctx],1,1,warn is after,0,0,q,hdel`:tmp_sort_valid.csv,1,1,remove valid sort csv after,0,0,q,hdel`:tmp_sort_badcols.csv,1,1,remove bad cols csv after,0,0,q,hdel`:tmp_sort_badatt.csv,1,1,remove bad att csv -after,0,0,q,hdel`:tmp_sort_autoload.csv,1,1,remove autoload fixture csv after,0,0,q,hdel`:tmp_sort_3col.csv,1,1,remove 3-col fixture csv after,0,0,q,hdel`:tmp_sort_empty.csv,1,1,remove empty fixture csv after,0,0,q,hdel`:tmp_sort_nodfl.csv,1,1,remove no-default fixture csv From 094d2910579191bb285bd7a68d67f1368d66f736 Mon Sep 17 00:00:00 2001 From: ascottDI Date: Fri, 19 Jun 2026 14:02:35 +0100 Subject: [PATCH 5/6] updating .md --- di/sort/sort.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/di/sort/sort.md b/di/sort/sort.md index 392ddf4d..7e15efbe 100644 --- a/di/sort/sort.md +++ b/di/sort/sort.md @@ -108,7 +108,7 @@ Read a config CSV and **return** it as a table. Does not store it — pass the r |---|---|---| | `file` | hsym (or symbol) | Path to the CSV. Coerced with `hsym`, so `` `:config/sort.csv `` and `` `config/sort.csv `` both work. | -The CSV must have the four columns `tabname,att,column,sort` in that order. Logs an info message on read and an error message on file-read failure (then rethrows). Content validation (column names, attribute values) happens in `sorttab`. +The CSV must have the four columns `tabname,att,column,sort` in that order. Logs info messages while reading (the read start and the row count) and an error message on file-read failure (then rethrows). Content validation (column names, attribute values) happens in `sorttab`. ```q config:srt.readcsv `:config/sort.csv From 4c4ece898118293cb0ecccbc1fda118df434783a Mon Sep 17 00:00:00 2001 From: ascottDI Date: Fri, 19 Jun 2026 15:35:31 +0100 Subject: [PATCH 6/6] fixes and changes following automated code review --- di/sort/sort.md | 4 ++-- di/sort/sort.q | 44 +++++++++++++++++++++++++++++++------------- di/sort/test.csv | 14 +++++++++++--- 3 files changed, 44 insertions(+), 18 deletions(-) diff --git a/di/sort/sort.md b/di/sort/sort.md index 7e15efbe..678942b8 100644 --- a/di/sort/sort.md +++ b/di/sort/sort.md @@ -108,7 +108,7 @@ Read a config CSV and **return** it as a table. Does not store it — pass the r |---|---|---| | `file` | hsym (or symbol) | Path to the CSV. Coerced with `hsym`, so `` `:config/sort.csv `` and `` `config/sort.csv `` both work. | -The CSV must have the four columns `tabname,att,column,sort` in that order. Logs info messages while reading (the read start and the row count) and an error message on file-read failure (then rethrows). Content validation (column names, attribute values) happens in `sorttab`. +The CSV must have exactly the four columns `tabname`, `att`, `column`, `sort` — in **any** order (the result is normalised to canonical column order). The header is validated as it is read: a missing, extra, or misnamed column raises a clear `di.sort:` error rather than silently mis-parsing or dropping data. Logs info messages while reading (the read start and the row count) and an error message on file-read failure (then rethrows). Attribute-value validation (e.g. an unknown `att`) happens later in `sorttab`. ```q config:srt.readcsv `:config/sort.csv @@ -130,7 +130,7 @@ Sort and apply attributes to on-disk partitions for a single table, using the su | `tabname` | symbol | Table name | | `dirs` | hsym, or list of hsyms | Partition directory (or directories) for that table | -`config` is validated first; `sorttab` errors (prefixed `di.sort:`) if it is not a table, has unknown or missing columns, has a non-boolean `sort` column, or has an `att` value outside `` ` `p`s`g`u ``. +`config` is validated first; `sorttab` errors (prefixed `di.sort:`) if it is not a table, has unknown or missing columns, has a non-boolean `sort` column, or has an `att` value outside `` ` `p`s`g`u ``. It also errors (prefixed `di.sort:`) if `tabname` is not a symbol. Lookup order for sort parameters within `config`: 1. Rows where `tabname` matches the supplied table name diff --git a/di/sort/sort.q b/di/sort/sort.q index 175fefcc..ab2d6ba1 100644 --- a/di/sort/sort.q +++ b/di/sort/sort.q @@ -22,25 +22,39 @@ init:{[deps] readcsv:{[file] / convenience for the common case where config lives in a csv - returns it, does not store / pass the result to sorttab, e.g. srt.sorttab[srt.readcsv `:sort.csv;`trade;dirs] - / the csv must have columns tabname,att,column,sort in that order + / the csv must have the columns tabname,att,column,sort (in any order) file:hsym file; - t:@[readfile; file; readerr[file]]; + t:parsecsv @[readfile; file; readerr[file]]; .z.m.log[`info][`readcsv;"read ",(string count t)," sort param row(s) from ",string file]; :t; }; -/ internal - read and parse a sort-config csv (the protected action in readcsv) +/ internal - protected file read; only the i/o so a genuine read failure gets the readerr message readfile:{[file] - / kept named rather than inline so readcsv reads cleanly and matches the style guide + / returns the raw csv lines; header validation and parsing happen in parsecsv .z.m.log[`info][`readcsv;"reading sort params from ",string file]; - :("SSSB";enlist",") 0: file; + :read0 file; }; / internal - log and rethrow a csv read failure readerr:{[file;e] - / surfaces the failure to the caller after logging it under the readcsv context - .z.m.log[`error][`readcsv;"failed to read ",string[file],": ",e]; - '"failed to read ",string[file],": ",e; + / build the message once, surface it under the readcsv context, then rethrow it to the caller + m:"failed to read ",string[file],": ",e; + .z.m.log[`error][`readcsv;m]; + 'm; + }; + +/ internal - validate the header and parse csv lines into a config table +parsecsv:{[lines] + / map types by column name so the csv column order does not matter; reject any other shape + / outside the readfile i/o trap so a bad header surfaces as a clear di.sort: error + if[0=count lines; + '"di.sort: csv has no header row"]; + hdr:`$"," vs first lines; + if[not (asc distinct hdr)~`att`column`sort`tabname; + '"di.sort: csv header must be exactly tabname,att,column,sort; got: ",", " sv string hdr]; + types:{$[x=`sort;"B";"S"]} each hdr; + :`tabname`att`column`sort#(types;enlist",") 0: lines; }; sorttab:{[config;tabname;dirs] @@ -48,6 +62,8 @@ sorttab:{[config;tabname;dirs] / config: a sort-config table (build it directly or via readcsv); tabname: symbol; dirs: hsym or list of hsyms / example: srt.sorttab[srt.readcsv `:sort.csv;`trade;`:/hdb/2024.01.01/trade] checkconfig config; + if[not -11h=type tabname; + '"di.sort: tabname must be a symbol, got type ",string type tabname]; st:string tabname; .z.m.log[`info][`sorttab;"sorting the ",st," table"]; sp:getsortparams[config;tabname;st]; @@ -83,9 +99,10 @@ logreturn:{[lvl;msg;rows] }; / internal - resolve which config rows apply to a table -getsortparams:{[config;t;st] +getsortparams:{[config;tab;st] + / tab is the table-name symbol (NOT a table); named to avoid clashing with the tabname column / a table uses its own rows; unlisted tables fall back to the default row, else are skipped - if[count tabsp:select from config where tabname=t; + if[count tabsp:select from config where tabname=tab; :logreturn[`info;"sort parameters have been retrieved for: ",st;tabsp]]; if[count defsp:select from config where tabname=`default; :logreturn[`info;"no sort parameters have been specified for: ",st,". using default parameters";defsp]]; @@ -112,7 +129,7 @@ sortdir:{[sp;dloc] / sort by the columns flagged sort=1b, then attribute the columns that request one sortcols:exec column from sp where sort, not null column; if[count sortcols; sortcolumns[dloc;sortcols]]; - attrcols:select column,att from sp where not null att; + attrcols:select column,att from sp where att in `p`s`g`u; if[count attrcols; applyattr[dloc;;]'[attrcols`column;attrcols`att]]; }; @@ -125,8 +142,9 @@ attrerr:{[dl;cn;at;e] / internal - apply a single attribute to a specific column in an on-disk partition applyattr:{[dloc;colname;att] - / sortdir only passes non-null atts; guard here in case applyattr is ever called directly - if[null att; :()]; + / skip anything that is not a real attribute - covers the empty none-sentinel and any bad value + / sortdir already filters to valid atts; this guards a direct call to applyattr + if[not att in `p`s`g`u; :()]; .z.m.log[`info][`applyattr;"applying ",string[att]," attr to the ",string[colname]," column in ",string dloc]; .[{@[x;y;z#]}; (dloc;colname;att); diff --git a/di/sort/test.csv b/di/sort/test.csv index ddb924c8..c33d97c0 100644 --- a/di/sort/test.csv +++ b/di/sort/test.csv @@ -9,6 +9,8 @@ before,0,0,q,"`:tmp_sort_badatt.csv 0: (""tabname,att,column,sort"";""trade,z,sy before,0,0,q,"`:tmp_sort_3col.csv 0: (""tabname,att,sort"";""trade,p,1"")",1,1,write csv with only 3 columns - missing column before,0,0,q,"`:tmp_sort_empty.csv 0: enlist ""tabname,att,column,sort""",1,1,write header-only sort csv before,0,0,q,"`:tmp_sort_nodfl.csv 0: (""tabname,att,column,sort"";""trade,p,sym,1"")",1,1,write sort csv with no default row +before,0,0,q,"`:tmp_sort_5col.csv 0: (""tabname,att,column,sort,extra"";""trade,p,sym,1,foo"")",1,1,write csv with an unexpected extra column +before,0,0,q,"`:tmp_sort_reorder.csv 0: (""tabname,att,sort,column"";""trade,p,1,sym"";""trade,,0,time"";""default,p,1,sym"")",1,1,write valid csv with columns in a different order before,0,0,q,srt.init[enlist[`log]!enlist mylog],1,1,init with no-op logger before,0,0,q,cfg:([] tabname:`trade`trade`default; att:`p``p; column:`sym`time`sym; sort:101b),1,1,reusable valid config table (hand-built) before,0,0,q,nodflcfg:srt.readcsv `:tmp_sort_nodfl.csv,1,1,reusable config with a trade row but no default @@ -29,10 +31,15 @@ true,0,0,q,3=count srt.readcsv `:tmp_sort_valid.csv,1,1,readcsv returns a 3-row true,0,0,q,`tabname`att`column`sort~cols srt.readcsv `:tmp_sort_valid.csv,1,1,readcsv table has the correct column schema true,0,0,q,(srt.readcsv `:tmp_sort_valid.csv)~([] tabname:`trade`trade`default; att:`p``p; column:`sym`time`sym; sort:101b),1,1,readcsv round-trips the csv content true,0,0,q,0=count srt.readcsv `:tmp_sort_empty.csv,1,1,readcsv returns an empty table for a header-only csv +true,0,0,q,(srt.readcsv `:tmp_sort_reorder.csv)~srt.readcsv `:tmp_sort_valid.csv,1,1,readcsv is column-order independent +true,0,0,q,`tabname`att`column`sort~cols srt.readcsv `:tmp_sort_reorder.csv,1,1,readcsv normalises reordered columns to canonical order -comment,,,,,,,readcsv - input failures (missing file / type error) +comment,,,,,,,readcsv - input and header validation failures fail,0,0,q,srt.readcsv `:nonexistent_file.csv,1,1,readcsv errors on a missing file fail,0,0,q,srt.readcsv 42,1,1,readcsv errors on a non-symbol file argument +fail,0,0,q,srt.readcsv `:tmp_sort_badcols.csv,1,1,readcsv rejects a csv with wrong column names +fail,0,0,q,srt.readcsv `:tmp_sort_3col.csv,1,1,readcsv rejects a csv missing a required column +fail,0,0,q,srt.readcsv `:tmp_sort_5col.csv,1,1,readcsv rejects a csv with an extra column comment,,,,,,,sorttab - happy path via both a hand-built table and a csv run,0,0,q,srt.sorttab[cfg;`trade;()],1,1,sorttab accepts a hand-built config table @@ -44,9 +51,7 @@ fail,0,0,q,srt.sorttab[([] tabname:enlist`trade; bad:enlist`p; column:enlist`sym fail,0,0,q,srt.sorttab[([] tabname:enlist`trade; att:enlist`p; column:enlist`sym);`trade;enlist`:/],1,1,sorttab rejects a missing required config column fail,0,0,q,srt.sorttab[([] tabname:enlist`trade; att:enlist`z; column:enlist`sym; sort:enlist 1b);`trade;enlist`:/],1,1,sorttab rejects unknown attribute values fail,0,0,q,srt.sorttab[([] tabname:enlist`trade; att:enlist`p; column:enlist`sym; sort:enlist 1);`trade;enlist`:/],1,1,sorttab rejects a non-boolean sort column -fail,0,0,q,srt.sorttab[srt.readcsv `:tmp_sort_badcols.csv;`trade;enlist`:/],1,1,sorttab rejects csv config with unknown columns fail,0,0,q,srt.sorttab[srt.readcsv `:tmp_sort_badatt.csv;`trade;enlist`:/],1,1,sorttab rejects csv config with invalid attributes -fail,0,0,q,srt.sorttab[srt.readcsv `:tmp_sort_3col.csv;`trade;enlist`:/],1,1,sorttab rejects csv config missing a required column comment,,,,,,,sorttab - edge cases (empty config / null att / empty dirs / atom dir / type error) true,0,0,q,()~srt.sorttab[([] tabname:`symbol$();att:`symbol$();column:`symbol$();sort:`boolean$());`trade;enlist`:/],1,1,empty config yields no work and returns () @@ -54,6 +59,7 @@ run,0,0,q,srt.sorttab[([] tabname:enlist`trade; att:enlist`; column:enlist`sym; run,0,0,q,srt.sorttab[cfg;`trade;()],1,1,sorttab handles an empty partition list without error run,0,0,q,srt.sorttab[cfg;`trade;`:/],1,1,sorttab accepts a single atom hsym dir not just a list fail,0,0,q,srt.sorttab[cfg;42;enlist`:/],1,1,sorttab errors on a non-symbol table name +true,0,0,q,"""di.sort:""~8#@[{srt.sorttab[cfg;42;enlist`:/]};(::);{x}]",1,1,non-symbol table name gives a di.sort:-prefixed error comment,,,,,,,sorttab - params resolution (table-specific / default / none) run,0,0,q,srt.sorttab[cfg;`trade;enlist`:/],1,1,sorttab uses the table-specific params row @@ -112,3 +118,5 @@ after,0,0,q,hdel`:tmp_sort_badatt.csv,1,1,remove bad att csv after,0,0,q,hdel`:tmp_sort_3col.csv,1,1,remove 3-col fixture csv after,0,0,q,hdel`:tmp_sort_empty.csv,1,1,remove empty fixture csv after,0,0,q,hdel`:tmp_sort_nodfl.csv,1,1,remove no-default fixture csv +after,0,0,q,hdel`:tmp_sort_5col.csv,1,1,remove 5-col fixture csv +after,0,0,q,hdel`:tmp_sort_reorder.csv,1,1,remove reordered fixture csv