From c4f4556b332f636c644e5b0425ce10a714ac91a4 Mon Sep 17 00:00:00 2001 From: ketaki-deodhar Date: Thu, 11 Jun 2026 16:14:03 -0700 Subject: [PATCH 01/10] 31328 - initial commit 1 --- data-tool/scripts/_generated/subset_load.sql | 48 + .../subset_load_chunks/transfer_all.sql | 1481 +++++++++++++++++ data-tool/scripts/corp_ids_ctst.txt | 2 + .../scripts/generate_cprd_subset_extract.py | 141 +- .../scripts/subset/subset_delete_cars.sql | 8 +- .../scripts/subset/subset_delete_chunk.sql | 98 +- .../subset/subset_disable_triggers.sql | 70 +- .../scripts/subset/subset_enable_triggers.sql | 70 +- .../subset/subset_pg_boolean_casts.sql | 8 +- .../subset_pg_cleanup_address_stage.sql | 2 + .../subset_pg_cleanup_orphan_children.sql | 64 +- .../subset_pg_prepare_address_stage.sql | 2 +- .../subset_pg_purge_bcomps_excluded.sql | 156 +- .../scripts/subset/subset_transfer_cars.sql | 12 +- .../scripts/subset/subset_transfer_chunk.sql | 76 +- 15 files changed, 1955 insertions(+), 283 deletions(-) create mode 100644 data-tool/scripts/_generated/subset_load.sql create mode 100644 data-tool/scripts/_generated/subset_load_chunks/transfer_all.sql create mode 100644 data-tool/scripts/corp_ids_ctst.txt diff --git a/data-tool/scripts/_generated/subset_load.sql b/data-tool/scripts/_generated/subset_load.sql new file mode 100644 index 0000000000..b090edbac0 --- /dev/null +++ b/data-tool/scripts/_generated/subset_load.sql @@ -0,0 +1,48 @@ +vset cli.settings.ignore_errors=false +vset cli.settings.replace_variables=false +vset cli.settings.transfer_threads=4 +vset format.date=YYYY-MM-dd'T'hh:mm:ss'Z' +vset format.timestamp=YYYY-MM-dd'T'hh:mm:ss'Z' + +connect cprd_pg_subset; +-- Serialize subset runs on this target DB. +execute /home/kdeodhar/repos/lear/data-tool/scripts/subset/subset_pg_acquire_advisory_lock.sql + +-- Prepare shared address staging table before learning schema +execute /home/kdeodhar/repos/lear/data-tool/scripts/subset/subset_pg_prepare_address_stage.sql +learn schema public; + +truncate table public.colin_extract_version; insert into public.colin_extract_version (extracted_at) values (current_timestamp); + +-- Postgres fast-load mode (session-level settings) +execute /home/kdeodhar/repos/lear/data-tool/scripts/subset/subset_pg_fastload_begin.sql + +-- Postgres helper: allow VARCHAR/BPCHAR -> BOOLEAN assignment (DbSchemaCLI boolean inserts) +execute /home/kdeodhar/repos/lear/data-tool/scripts/subset/subset_pg_boolean_casts.sql +-- Fail-fast: verify varchar/bpchar -> boolean casts exist +select 't'::varchar::boolean; +select 'f'::bpchar::boolean; + +execute /home/kdeodhar/repos/lear/data-tool/scripts/subset/subset_disable_triggers.sql + +-- global cars* refresh (not corp-scoped; full dataset truncate + reload) +execute /home/kdeodhar/repos/lear/data-tool/scripts/subset/subset_delete_cars.sql +execute /home/kdeodhar/repos/lear/data-tool/scripts/subset/subset_transfer_cars.sql + +-- transfer corp-scoped subset from Oracle to Postgres +-- transfer chunk 001/001 +execute /home/kdeodhar/repos/lear/data-tool/scripts/_generated/subset_load_chunks/transfer_all.sql + +-- purge BCOMPS-excluded corps (computed in Postgres after load) +execute /home/kdeodhar/repos/lear/data-tool/scripts/subset/subset_pg_purge_bcomps_excluded.sql + +execute /home/kdeodhar/repos/lear/data-tool/scripts/subset/subset_enable_triggers.sql + +-- Cleanup shared address staging table +execute /home/kdeodhar/repos/lear/data-tool/scripts/subset/subset_pg_cleanup_address_stage.sql + +-- Release subset-run advisory lock +execute /home/kdeodhar/repos/lear/data-tool/scripts/subset/subset_pg_release_advisory_lock.sql + +-- Reset Postgres fast-load session settings +execute /home/kdeodhar/repos/lear/data-tool/scripts/subset/subset_pg_fastload_end.sql diff --git a/data-tool/scripts/_generated/subset_load_chunks/transfer_all.sql b/data-tool/scripts/_generated/subset_load_chunks/transfer_all.sql new file mode 100644 index 0000000000..89f17c9102 --- /dev/null +++ b/data-tool/scripts/_generated/subset_load_chunks/transfer_all.sql @@ -0,0 +1,1481 @@ +-- generated chunk script: transfer_all.sql +-- mode: load +-- chunk: 001/001 +-- target corps: 2 +-- oracle corp_num: 2 + +-- Transfer a chunk (or a whole subset) of corps from the SOURCE Oracle DB (cprd) into the TARGET Postgres extract DB (cprd_pg). +-- +-- REQUIRED DbSchemaCLI variables (replace_variables=true): +-- target_corp_num_predicate : SQL predicate restricting the computed target_corp_num (NO trailing semicolon). +-- Examples: +-- target_corp_num in ('BC0460007','A1234567') +-- (target_corp_num in (...) OR target_corp_num in (...)) +-- oracle_corp_num_predicate : SQL predicate restricting Oracle corporation.corp_num (NO trailing semicolon). +-- Examples: +-- c.CORP_NUM in ('0460007','A1234567') +-- (c.CORP_NUM in (...) OR c.CORP_NUM in (...)) +-- oracle_corp_type_predicate : SQL predicate restricting Oracle corporation.corp_typ_cd (NO trailing semicolon). +-- Examples: +-- c.CORP_TYP_CD in ('BC','C','ULC','CUL','CC','CCC','QA','QB','QC','QD','QE') +-- c.CORP_TYP_CD in ('BC','C','ULC','CUL','CC','CCC','QA','QB','QC','QD','QE','CP') +-- +-- Intended to be executed from a master DbSchemaCLI script connected to the target Postgres DB (cprd_pg). +-- +-- IMPORTANT: +-- - This template intentionally avoids the boolean<->integer ALTER COLUMN hacks used in the full refresh script. +-- Instead, Oracle SELECTs emit boolean-friendly 't'/'f' strings for Postgres boolean columns. +-- - This template transfers corp-scoped tables only (no cars* tables). +-- +-- Performance notes: +-- - BCOMPS exclusion is NOT computed in Oracle in this template (to avoid repeating expensive Oracle-side joins per table). +-- Instead, load the requested corp set and purge BCOMPS-excluded corps ONCE in Postgres after the transfer suite completes +-- (see: subset_pg_purge_bcomps_excluded.sql). +-- - Joins are written to start from the subset (corporation_cte) to avoid "0 rows but slow" plans. +-- - ORDER BY clauses are removed (sorting is unnecessary overhead for transfers). +-- +-- Example (legacy vset mode): +-- vset target_corp_num_predicate=target_corp_num in ('BC1111585','BC1226175'); +-- vset oracle_corp_num_predicate=c.CORP_NUM in ('1111585','1226175'); + +-- corporation +transfer public.corporation from cprd using +with corp_list as ( + select /*+ materialize */ c.corp_num + from corporation c + where c.CORP_NUM in ( + '0008367', + '0008368' +) + and c.CORP_TYP_CD in ('BC','C','ULC','CUL','CC','CCC','QA','QB','QC','QD','QE') + -- altered from BC to BEN then BEN to BC before directed launch + and c.CORP_NUM not in ('0460007', '1255957', '1186381') +), +corporation_cte as ( + select * + from ( + select c.*, + case + when c.CORP_TYP_CD in ('BC', 'ULC', 'CC') then 'BC' || c.CORP_NUM + else c.CORP_NUM + end as target_corp_num + from corporation c + join corp_list cl on cl.corp_num = c.corp_num + ) + where target_corp_num in ( + 'BC0008367', + 'BC0008368' +) +), +last_ar as ( + select e.corp_num, + to_number(to_char(max(date_1), 'YYYY')) as last_ar_reminder_year + from eml_log e + join rep_data r on r.param_id = e.param_id + and r.t20_1 = e.corp_num + join corp_list cl on cl.corp_num = e.corp_num + group by e.corp_num +) +select c.target_corp_num as CORP_NUM, + c.CORP_FROZEN_TYP_CD as corp_frozen_type_cd, + case + when c.CORP_TYP_CD in ('QA', 'QB', 'QC', 'QD', 'QE') then 'BC' + else c.CORP_TYP_CD + end as CORP_TYPE_CD, + c.CORP_PASSWORD, + c.RECOGNITION_DTS, + c.BN_9, + c.BN_15, + c.ADMIN_EMAIL, + c.ACCESSION_NUM, + c.LAST_AR_FILED_DT, + case c.SEND_AR_IND + when 'N' then 'f' + when 'Y' then 't' + else 't' + end as SEND_AR_IND, + la.last_ar_reminder_year as LAST_AR_REMINDER_YEAR +from corporation_cte c +left join last_ar la on la.corp_num = c.corp_num; + + +-- event +transfer public.event from cprd using +with corp_list as ( + select /*+ materialize */ c.corp_num + from corporation c + where c.CORP_NUM in ( + '0008367', + '0008368' +) + and c.CORP_TYP_CD in ('BC','C','ULC','CUL','CC','CCC','QA','QB','QC','QD','QE') + and c.CORP_NUM not in ('0460007', '1255957', '1186381') +), +corporation_cte as ( + select * + from ( + select c.*, + case + when c.CORP_TYP_CD in ('BC', 'ULC', 'CC') then 'BC' || c.CORP_NUM + else c.CORP_NUM + end as target_corp_num + from corporation c + join corp_list cl on cl.corp_num = c.corp_num + ) + where target_corp_num in ( + 'BC0008367', + 'BC0008368' +) +) +select e.event_id, + c.target_corp_num as CORP_NUM, + e.event_typ_cd as event_type_cd, + e.event_timestmp as event_timerstamp, + e.trigger_dts +from corporation_cte c +join event e on e.corp_num = c.corp_num +-- not transferring BNUPD, ADDLEDGR events +where e.event_typ_cd not in ('BNUPD', 'ADDLEDGR'); + + +-- corp_name +transfer public.corp_name from cprd using +with corp_list as ( + select /*+ materialize */ c.corp_num + from corporation c + where c.CORP_NUM in ( + '0008367', + '0008368' +) + and c.CORP_TYP_CD in ('BC','C','ULC','CUL','CC','CCC','QA','QB','QC','QD','QE') + and c.CORP_NUM not in ('0460007', '1255957', '1186381') +), +corporation_cte as ( + select * + from ( + select c.*, + case + when c.CORP_TYP_CD in ('BC', 'ULC', 'CC') then 'BC' || c.CORP_NUM + else c.CORP_NUM + end as target_corp_num + from corporation c + join corp_list cl on cl.corp_num = c.corp_num + ) + where target_corp_num in ( + 'BC0008367', + 'BC0008368' +) +) +select c.target_corp_num as CORP_NUM, + cn.CORP_NAME_TYP_CD, + cn.start_event_id, + cn.end_event_id, + cn.CORP_NME as corp_name +from corporation_cte c +join CORP_NAME cn on cn.corp_num = c.corp_num; + + +-- corp_state +transfer public.corp_state from cprd using +with corp_list as ( + select /*+ materialize */ c.corp_num + from corporation c + where c.CORP_NUM in ( + '0008367', + '0008368' +) + and c.CORP_TYP_CD in ('BC','C','ULC','CUL','CC','CCC','QA','QB','QC','QD','QE') + and c.CORP_NUM not in ('0460007', '1255957', '1186381') +), +corporation_cte as ( + select * + from ( + select c.*, + case + when c.CORP_TYP_CD in ('BC', 'ULC', 'CC') then 'BC' || c.CORP_NUM + else c.CORP_NUM + end as target_corp_num + from corporation c + join corp_list cl on cl.corp_num = c.corp_num + ) + where target_corp_num in ( + 'BC0008367', + 'BC0008368' +) +) +select c.target_corp_num as CORP_NUM, + cs.STATE_TYP_CD as state_type_cd, + cos.op_state_typ_cd as op_state_type_cd, + cs.start_event_id, + cs.end_event_id +from corporation_cte c +join CORP_STATE cs on cs.corp_num = c.corp_num +join corp_op_state cos on cos.state_typ_cd = cs.state_typ_cd; + + +-- filing +transfer public.filing from cprd using +with corp_list as ( + select /*+ materialize */ c.corp_num + from corporation c + where c.CORP_NUM in ( + '0008367', + '0008368' +) + and c.CORP_TYP_CD in ('BC','C','ULC','CUL','CC','CCC','QA','QB','QC','QD','QE') + and c.CORP_NUM not in ('0460007', '1255957', '1186381') +), +corporation_cte as ( + select * + from ( + select c.*, + case + when c.CORP_TYP_CD in ('BC', 'ULC', 'CC') then 'BC' || c.CORP_NUM + else c.CORP_NUM + end as target_corp_num + from corporation c + join corp_list cl on cl.corp_num = c.corp_num + ) + where target_corp_num in ( + 'BC0008367', + 'BC0008368' +) +) +select e.event_id, + f.filing_typ_cd as filing_type_cd, + f.effective_dt, + f.withdrawn_event_id, + trim(f.ods_typ_cd) as ods_type_cd, + f.nr_num, + f.COURT_ORDER_NUM, + f.CHANGE_DT, + f.PERIOD_END_DT, + case f.ARRANGEMENT_IND + when 'N' then 'f' + when 'Y' then 't' + else 't' + end as ARRANGEMENT_IND, + f.AUTH_SIGN_DT, + case f.COURT_APPR_IND + when 'N' then 'f' + when 'Y' then 't' + else 't' + end as COURT_APPR_IND +from corporation_cte c +join event e on e.corp_num = c.corp_num +join filing f on f.event_id = e.event_id; + + +-- filing_user +transfer public.filing_user from cprd using +with corp_list as ( + select /*+ materialize */ c.corp_num + from corporation c + where c.CORP_NUM in ( + '0008367', + '0008368' +) + and c.CORP_TYP_CD in ('BC','C','ULC','CUL','CC','CCC','QA','QB','QC','QD','QE') + and c.CORP_NUM not in ('0460007', '1255957', '1186381') +), +corporation_cte as ( + select * + from ( + select c.*, + case + when c.CORP_TYP_CD in ('BC', 'ULC', 'CC') then 'BC' || c.CORP_NUM + else c.CORP_NUM + end as target_corp_num + from corporation c + join corp_list cl on cl.corp_num = c.corp_num + ) + where target_corp_num in ( + 'BC0008367', + 'BC0008368' +) +) +select e.event_id, + u.user_id, + u.last_nme as last_name, + u.first_nme as first_name, + u.middle_nme as middle_name, + u.email_addr, + u.BCOL_ACCT_NUM, + u.ROLE_TYP_CD +from corporation_cte c +join event e on e.corp_num = c.corp_num +join filing_user u on u.event_id = e.event_id; + + +-- address (shared/global table; stage then merge before loading dependents) +TRUNCATE TABLE public.subset_address_stage; + +transfer public.subset_address_stage from cprd using +with corp_list as ( + select /*+ materialize */ c.corp_num + from corporation c + where c.CORP_NUM in ( + '0008367', + '0008368' +) + and c.CORP_TYP_CD in ('BC','C','ULC','CUL','CC','CCC','QA','QB','QC','QD','QE') + and c.CORP_NUM not in ('0460007', '1255957', '1186381') +), +corporation_cte as ( + select * + from ( + select c.*, + case + when c.CORP_TYP_CD in ('BC', 'ULC', 'CC') then 'BC' || c.CORP_NUM + else c.CORP_NUM + end as target_corp_num + from corporation c + join corp_list cl on cl.corp_num = c.corp_num + ) + where target_corp_num in ( + 'BC0008367', + 'BC0008368' +) +) +select distinct + addr_id, + trim(province) as province, + trim(country_typ_cd) as country_typ_cd, + trim(replace(postal_cd, CHR(0), '')) as POSTAL_CD, + trim(addr_line_1) as addr_line_1, + trim(replace(addr_line_2, CHR(0), '')) as ADDR_LINE_2, + trim(addr_line_3) as addr_line_3, + trim(city) as city +from ( + select a.* + from corporation_cte c + join corp_party x on x.corp_num = c.corp_num + join address a on (x.delivery_addr_id = a.addr_id or x.mailing_addr_id = a.addr_id) + + UNION ALL + select a.* + from corporation_cte c + join office x on x.corp_num = c.corp_num + join address a on (x.delivery_addr_id = a.addr_id or x.mailing_addr_id = a.addr_id) + + UNION ALL + select a.* + from corporation_cte c + join event e on e.corp_num = c.corp_num + join completing_party x on x.event_id = e.event_id + join address a on x.mailing_addr_id = a.addr_id + + UNION ALL + select a.* + from corporation_cte c + join event e on e.corp_num = c.corp_num + join notification x on x.event_id = e.event_id + join address a on x.mailing_addr_id = a.addr_id + + UNION ALL + select a.* + from corporation_cte c + join event e on e.corp_num = c.corp_num + join notification_resend x on x.event_id = e.event_id + join address a on x.mailing_addr_id = a.addr_id + + UNION ALL + select a.* + from corporation_cte c + join event e on e.corp_num = c.corp_num + join submitting_party x on x.event_id = e.event_id + join address a on (x.notify_addr_id = a.addr_id or x.mailing_addr_id = a.addr_id) + + UNION ALL + select a.* + from corporation_cte c + join corp_party p on p.corp_num = c.corp_num + join party_notification x on x.party_id = p.corp_party_id + join address a on x.mailing_addr_id = a.addr_id +); + +INSERT INTO public.address ( + addr_id, + province, + country_typ_cd, + postal_cd, + addr_line_1, + addr_line_2, + addr_line_3, + city +) +SELECT s.addr_id, + s.province, + s.country_typ_cd, + s.postal_cd, + s.addr_line_1, + s.addr_line_2, + s.addr_line_3, + s.city +FROM ( + SELECT DISTINCT ON (addr_id) + addr_id, + province, + country_typ_cd, + postal_cd, + addr_line_1, + addr_line_2, + addr_line_3, + city + FROM public.subset_address_stage + WHERE addr_id IS NOT NULL + ORDER BY addr_id +) s +ON CONFLICT (addr_id) DO UPDATE +SET province = EXCLUDED.province, + country_typ_cd = EXCLUDED.country_typ_cd, + postal_cd = EXCLUDED.postal_cd, + addr_line_1 = EXCLUDED.addr_line_1, + addr_line_2 = EXCLUDED.addr_line_2, + addr_line_3 = EXCLUDED.addr_line_3, + city = EXCLUDED.city; + +TRUNCATE TABLE public.subset_address_stage; + + +-- office +transfer public.office from cprd using +with corp_list as ( + select /*+ materialize */ c.corp_num + from corporation c + where c.CORP_NUM in ( + '0008367', + '0008368' +) + and c.CORP_TYP_CD in ('BC','C','ULC','CUL','CC','CCC','QA','QB','QC','QD','QE') + and c.CORP_NUM not in ('0460007', '1255957', '1186381') +), +corporation_cte as ( + select * + from ( + select c.*, + case + when c.CORP_TYP_CD in ('BC', 'ULC', 'CC') then 'BC' || c.CORP_NUM + else c.CORP_NUM + end as target_corp_num + from corporation c + join corp_list cl on cl.corp_num = c.corp_num + ) + where target_corp_num in ( + 'BC0008367', + 'BC0008368' +) +) +select c.target_corp_num as CORP_NUM, + o.office_typ_cd, + o.start_event_id, + o.end_event_id, + o.mailing_addr_id, + o.delivery_addr_id +from corporation_cte c +join office o on o.corp_num = c.corp_num; + + +-- corp_comments +transfer public.corp_comments from cprd using +with corp_list as ( + select /*+ materialize */ c.corp_num + from corporation c + where c.CORP_NUM in ( + '0008367', + '0008368' +) + and c.CORP_TYP_CD in ('BC','C','ULC','CUL','CC','CCC','QA','QB','QC','QD','QE') + and c.CORP_NUM not in ('0460007', '1255957', '1186381') +), +corporation_cte as ( + select * + from ( + select c.*, + case + when c.CORP_TYP_CD in ('BC', 'ULC', 'CC') then 'BC' || c.CORP_NUM + else c.CORP_NUM + end as target_corp_num + from corporation c + join corp_list cl on cl.corp_num = c.corp_num + ) + where target_corp_num in ( + 'BC0008367', + 'BC0008368' +) +) +select cc.comment_dts, + c.target_corp_num as CORP_NUM, + cc.comments, + cc.USER_ID, + cc.FIRST_NME, + cc.LAST_NME, + cc.MIDDLE_NME, + cc.ACCESSION_COMMENTS +from corporation_cte c +join corp_comments cc on cc.corp_num = c.corp_num; + + +-- ledger_text +transfer public.ledger_text from cprd using +with corp_list as ( + select /*+ materialize */ c.corp_num + from corporation c + where c.CORP_NUM in ( + '0008367', + '0008368' +) + and c.CORP_TYP_CD in ('BC','C','ULC','CUL','CC','CCC','QA','QB','QC','QD','QE') + and c.CORP_NUM not in ('0460007', '1255957', '1186381') +), +corporation_cte as ( + select * + from ( + select c.*, + case + when c.CORP_TYP_CD in ('BC', 'ULC', 'CC') then 'BC' || c.CORP_NUM + else c.CORP_NUM + end as target_corp_num + from corporation c + join corp_list cl on cl.corp_num = c.corp_num + ) + where target_corp_num in ( + 'BC0008367', + 'BC0008368' +) +) +select e.event_id, + l.notation, + l.USER_ID, + l.LEDGER_TEXT_DTS +from corporation_cte c +join event e on e.corp_num = c.corp_num +join ledger_text l on l.event_id = e.event_id; + + +-- corp_party +transfer public.corp_party from cprd using +with corp_list as ( + select /*+ materialize */ c.corp_num + from corporation c + where c.CORP_NUM in ( + '0008367', + '0008368' +) + and c.CORP_TYP_CD in ('BC','C','ULC','CUL','CC','CCC','QA','QB','QC','QD','QE') + and c.CORP_NUM not in ('0460007', '1255957', '1186381') +), +corporation_cte as ( + select * + from ( + select c.*, + case + when c.CORP_TYP_CD in ('BC', 'ULC', 'CC') then 'BC' || c.CORP_NUM + else c.CORP_NUM + end as target_corp_num + from corporation c + join corp_list cl on cl.corp_num = c.corp_num + ) + where target_corp_num in ( + 'BC0008367', + 'BC0008368' +) +) +select p.corp_party_id, + p.mailing_addr_id, + p.delivery_addr_id, + c.target_corp_num as CORP_NUM, + nvl(p.party_typ_cd, ' ') as party_typ_cd, + p.start_event_id, + p.end_event_id, + p.prev_party_id, + p.appointment_dt, + p.cessation_dt, + nvl(p.LAST_NME, ' ') as last_name, + nvl(p.MIDDLE_NME, ' ') as middle_name, + nvl(p.FIRST_NME, ' ') as first_name, + nvl(p.BUSINESS_NME, ' ') as business_name, + p.BUS_COMPANY_NUM, + p.CORR_TYP_CD, + p.OFFICE_NOTIFICATION_DT +from corporation_cte c +join corp_party p on p.corp_num = c.corp_num; + + +-- corp_party_relationship +transfer public.corp_party_relationship from cprd using +with corp_list as ( + select /*+ materialize */ c.corp_num + from corporation c + where c.CORP_NUM in ( + '0008367', + '0008368' +) + and c.CORP_TYP_CD in ('BC','C','ULC','CUL','CC','CCC','QA','QB','QC','QD','QE') + and c.CORP_NUM not in ('0460007', '1255957', '1186381') +), +corporation_cte as ( + select * + from ( + select c.*, + case + when c.CORP_TYP_CD in ('BC', 'ULC', 'CC') then 'BC' || c.CORP_NUM + else c.CORP_NUM + end as target_corp_num + from corporation c + join corp_list cl on cl.corp_num = c.corp_num + ) + where target_corp_num in ( + 'BC0008367', + 'BC0008368' +) +) +select cpr.CORP_PARTY_ID as corp_party_id, + cpr.RELATIONSHIP_TYP_CD as relationship_typ_cd +from corporation_cte c +join corp_party p on p.corp_num = c.corp_num +join CORP_PARTY_RELATIONSHIP cpr on cpr.corp_party_id = p.corp_party_id; + + +-- offices_held +transfer public.offices_held from cprd using +with corp_list as ( + select /*+ materialize */ c.corp_num + from corporation c + where c.CORP_NUM in ( + '0008367', + '0008368' +) + and c.CORP_TYP_CD in ('BC','C','ULC','CUL','CC','CCC','QA','QB','QC','QD','QE') + and c.CORP_NUM not in ('0460007', '1255957', '1186381') +), +corporation_cte as ( + select * + from ( + select c.*, + case + when c.CORP_TYP_CD in ('BC', 'ULC', 'CC') then 'BC' || c.CORP_NUM + else c.CORP_NUM + end as target_corp_num + from corporation c + join corp_list cl on cl.corp_num = c.corp_num + ) + where target_corp_num in ( + 'BC0008367', + 'BC0008368' +) +) +select oh.CORP_PARTY_ID as corp_party_id, + oh.OFFICER_TYP_CD as officer_typ_cd +from corporation_cte c +join corp_party p on p.corp_num = c.corp_num +join OFFICES_HELD oh on oh.corp_party_id = p.corp_party_id; + + +-- completing_party +transfer public.completing_party from cprd using +with corp_list as ( + select /*+ materialize */ c.corp_num + from corporation c + where c.CORP_NUM in ( + '0008367', + '0008368' +) + and c.CORP_TYP_CD in ('BC','C','ULC','CUL','CC','CCC','QA','QB','QC','QD','QE') + and c.CORP_NUM not in ('0460007', '1255957', '1186381') +), +corporation_cte as ( + select * + from ( + select c.*, + case + when c.CORP_TYP_CD in ('BC', 'ULC', 'CC') then 'BC' || c.CORP_NUM + else c.CORP_NUM + end as target_corp_num + from corporation c + join corp_list cl on cl.corp_num = c.corp_num + ) + where target_corp_num in ( + 'BC0008367', + 'BC0008368' +) +) +select e.event_id, + cp.MAILING_ADDR_ID, + cp.FIRST_NME, + cp.LAST_NME, + cp.MIDDLE_NME, + cp.EMAIL_REQ_ADDRESS +from corporation_cte c +join event e on e.corp_num = c.corp_num +join completing_party cp on cp.event_id = e.event_id; + + +-- submitting_party +transfer public.submitting_party from cprd using +with corp_list as ( + select /*+ materialize */ c.corp_num + from corporation c + where c.CORP_NUM in ( + '0008367', + '0008368' +) + and c.CORP_TYP_CD in ('BC','C','ULC','CUL','CC','CCC','QA','QB','QC','QD','QE') + and c.CORP_NUM not in ('0460007', '1255957', '1186381') +), +corporation_cte as ( + select * + from ( + select c.*, + case + when c.CORP_TYP_CD in ('BC', 'ULC', 'CC') then 'BC' || c.CORP_NUM + else c.CORP_NUM + end as target_corp_num + from corporation c + join corp_list cl on cl.corp_num = c.corp_num + ) + where target_corp_num in ( + 'BC0008367', + 'BC0008368' +) +) +select e.event_id, + sp.MAILING_ADDR_ID, + sp.NOTIFY_ADDR_ID, + sp.METHOD_TYP_CD, + sp.FIRST_NME, + sp.LAST_NME, + sp.MIDDLE_NME, + sp.EMAIL_REQ_ADDRESS, + sp.PICKUP_BY, + sp.BUSINESS_NME, + sp.NOTIFY_FIRST_NME, + sp.NOTIFY_LAST_NME, + sp.NOTIFY_MIDDLE_NME, + sp.PHONE_NUMBER +from corporation_cte c +join event e on e.corp_num = c.corp_num +join SUBMITTING_PARTY sp on sp.event_id = e.event_id; + + +-- corp_flag +transfer public.corp_flag from cprd using +with corp_list as ( + select /*+ materialize */ c.corp_num + from corporation c + where c.CORP_NUM in ( + '0008367', + '0008368' +) + and c.CORP_TYP_CD in ('BC','C','ULC','CUL','CC','CCC','QA','QB','QC','QD','QE') + and c.CORP_NUM not in ('0460007', '1255957', '1186381') +), +corporation_cte as ( + select * + from ( + select c.*, + case + when c.CORP_TYP_CD in ('BC', 'ULC', 'CC') then 'BC' || c.CORP_NUM + else c.CORP_NUM + end as target_corp_num + from corporation c + join corp_list cl on cl.corp_num = c.corp_num + ) + where target_corp_num in ( + 'BC0008367', + 'BC0008368' +) +) +select c.target_corp_num as CORP_NUM, + cf.CORP_FLAG_TYPE_CD, + cf.start_event_id, + cf.end_event_id +from corporation_cte c +join corp_flag cf on cf.corp_num = c.corp_num; + + +-- cont_out +transfer public.cont_out from cprd using +with corp_list as ( + select /*+ materialize */ c.corp_num + from corporation c + where c.CORP_NUM in ( + '0008367', + '0008368' +) + and c.CORP_TYP_CD in ('BC','C','ULC','CUL','CC','CCC','QA','QB','QC','QD','QE') + and c.CORP_NUM not in ('0460007', '1255957', '1186381') +), +corporation_cte as ( + select * + from ( + select c.*, + case + when c.CORP_TYP_CD in ('BC', 'ULC', 'CC') then 'BC' || c.CORP_NUM + else c.CORP_NUM + end as target_corp_num + from corporation c + join corp_list cl on cl.corp_num = c.corp_num + ) + where target_corp_num in ( + 'BC0008367', + 'BC0008368' +) +) +select c.target_corp_num as CORP_NUM, + co.CAN_JUR_TYP_CD, + co.CONT_OUT_DT, + co.OTHR_JURI_DESC, + co.HOME_COMPANY_NME, + co.start_event_id, + co.end_event_id +from corporation_cte c +join CONT_OUT co on co.corp_num = c.corp_num; + + +-- conv_event +transfer public.conv_event from cprd using +with corp_list as ( + select /*+ materialize */ c.corp_num + from corporation c + where c.CORP_NUM in ( + '0008367', + '0008368' +) + and c.CORP_TYP_CD in ('BC','C','ULC','CUL','CC','CCC','QA','QB','QC','QD','QE') + and c.CORP_NUM not in ('0460007', '1255957', '1186381') +), +corporation_cte as ( + select * + from ( + select c.*, + case + when c.CORP_TYP_CD in ('BC', 'ULC', 'CC') then 'BC' || c.CORP_NUM + else c.CORP_NUM + end as target_corp_num + from corporation c + join corp_list cl on cl.corp_num = c.corp_num + ) + where target_corp_num in ( + 'BC0008367', + 'BC0008368' +) +) +select e.event_id, + ce.effective_dt, + case ce.REPORT_CORP_IND + when 'N' then 'f' + when 'Y' then 't' + else 't' + end as REPORT_CORP_IND, + ce.ACTIVITY_USER_ID, + ce.ACTIVITY_DT, + ce.ANNUAL_FILE_DT, + ce.ACCESSION_NUM, + ce.REMARKS +from corporation_cte c +join event e on e.corp_num = c.corp_num +join CONV_EVENT ce on ce.event_id = e.event_id; + + +-- conv_ledger +transfer public.conv_ledger from cprd using +with corp_list as ( + select /*+ materialize */ c.corp_num + from corporation c + where c.CORP_NUM in ( + '0008367', + '0008368' +) + and c.CORP_TYP_CD in ('BC','C','ULC','CUL','CC','CCC','QA','QB','QC','QD','QE') + and c.CORP_NUM not in ('0460007', '1255957', '1186381') +), +corporation_cte as ( + select * + from ( + select c.*, + case + when c.CORP_TYP_CD in ('BC', 'ULC', 'CC') then 'BC' || c.CORP_NUM + else c.CORP_NUM + end as target_corp_num + from corporation c + join corp_list cl on cl.corp_num = c.corp_num + ) + where target_corp_num in ( + 'BC0008367', + 'BC0008368' +) +) +select e.event_id, + cl.LEDGER_TITLE_TXT, + cl.LEDGER_DESC, + cl.cars_docmnt_id +from corporation_cte c +join event e on e.corp_num = c.corp_num +join CONV_LEDGER cl on cl.event_id = e.event_id; + + +-- corp_involved - amalgamaTING_businesses +transfer public.corp_involved_amalgamating from cprd using +with corp_list as ( + select /*+ materialize */ c.corp_num + from corporation c + where c.CORP_NUM in ( + '0008367', + '0008368' +) + and c.CORP_TYP_CD in ('BC','C','ULC','CUL','CC','CCC','QA','QB','QC','QD','QE') + and c.CORP_NUM not in ('0460007', '1255957', '1186381') +), +corporation_cte as ( + select * + from ( + select c.*, + case + when c.CORP_TYP_CD in ('BC', 'ULC', 'CC') then 'BC' || c.CORP_NUM + else c.CORP_NUM + end as target_corp_num + from corporation c + join corp_list cl on cl.corp_num = c.corp_num + ) + where target_corp_num in ( + 'BC0008367', + 'BC0008368' +) +) +select e.event_id as event_id, + c.target_corp_num as ted_corp_num, + case + when c2.corp_typ_cd in ('BC', 'ULC', 'CC') then 'BC' || c2.corp_num + else c2.corp_num + end as ting_corp_num, + ci.CORP_INVOLVE_ID as corp_involve_id, + ci.CAN_JUR_TYP_CD as can_jur_typ_cd, + case ci.ADOPTED_CORP_IND + when 'N' then 'f' + when 'Y' then 't' + else 'f' + end as adopted_corp_ind, + ci.HOME_JURI_NUM as home_juri_num, + ci.OTHR_JURI_DESC as othr_juri_desc, + ci.FOREIGN_NME as foreign_nme +from corporation_cte c +join event e on e.corp_num = c.corp_num +join CORP_INVOLVED ci on ci.event_id = e.event_id +join corporation c2 on c2.corp_num = ci.corp_num +where e.event_typ_cd = 'CONVAMAL' +UNION ALL +select e.event_id as event_id, + c.target_corp_num as ted_corp_num, + case + when c2.corp_typ_cd in ('BC', 'ULC', 'CC') then 'BC' || c2.corp_num + else c2.corp_num + end as ting_corp_num, + ci.CORP_INVOLVE_ID as corp_involve_id, + ci.CAN_JUR_TYP_CD as can_jur_typ_cd, + case ci.ADOPTED_CORP_IND + when 'N' then 'f' + when 'Y' then 't' + else 'f' + end as adopted_corp_ind, + ci.HOME_JURI_NUM as home_juri_num, + ci.OTHR_JURI_DESC as othr_juri_desc, + ci.FOREIGN_NME as foreign_nme +from corporation_cte c +join event e on e.corp_num = c.corp_num +join filing f on f.event_id = e.event_id +join CORP_INVOLVED ci on ci.event_id = e.event_id +join corporation c2 on c2.corp_num = ci.corp_num +where f.filing_typ_cd in ('AMALH', 'AMALV', 'AMALR', 'AMLHU', 'AMLVU', 'AMLRU', 'AMLHC', 'AMLVC', 'AMLRC'); + + +-- corp_involved - continue_in_historical_xpro +transfer public.corp_involved_cont_in from cprd using +with corp_list as ( + select /*+ materialize */ c.corp_num + from corporation c + where c.CORP_NUM in ( + '0008367', + '0008368' +) + and c.CORP_TYP_CD in ('BC','C','ULC','CUL','CC','CCC','QA','QB','QC','QD','QE') + and c.CORP_NUM not in ('0460007', '1255957', '1186381') +), +corporation_cte as ( + select * + from ( + select c.*, + case + when c.CORP_TYP_CD in ('BC', 'ULC', 'CC') then 'BC' || c.CORP_NUM + else c.CORP_NUM + end as target_corp_num + from corporation c + join corp_list cl on cl.corp_num = c.corp_num + ) + where target_corp_num in ( + 'BC0008367', + 'BC0008368' +) +) +select e.event_id, + c.target_corp_num as CORP_NUM +from corporation_cte c +join event e on e.corp_num = c.corp_num +join filing f on f.event_id = e.event_id +where f.filing_typ_cd in ('CONTI', 'CONTU', 'CONTC') + and exists (select 1 from CORP_INVOLVED ci where ci.event_id = e.event_id); + + +-- corp_restriction +transfer public.corp_restriction from cprd using +with corp_list as ( + select /*+ materialize */ c.corp_num + from corporation c + where c.CORP_NUM in ( + '0008367', + '0008368' +) + and c.CORP_TYP_CD in ('BC','C','ULC','CUL','CC','CCC','QA','QB','QC','QD','QE') + and c.CORP_NUM not in ('0460007', '1255957', '1186381') +), +corporation_cte as ( + select * + from ( + select c.*, + case + when c.CORP_TYP_CD in ('BC', 'ULC', 'CC') then 'BC' || c.CORP_NUM + else c.CORP_NUM + end as target_corp_num + from corporation c + join corp_list cl on cl.corp_num = c.corp_num + ) + where target_corp_num in ( + 'BC0008367', + 'BC0008368' +) +) +select c.target_corp_num as CORP_NUM, + case cr.RESTRICTION_IND + when 'N' then 'f' + when 'Y' then 't' + else 'f' + end as RESTRICTION_IND, + cr.start_event_id, + cr.end_event_id +from corporation_cte c +join CORP_RESTRICTION cr on cr.corp_num = c.corp_num; + + +-- correction +transfer public.correction from cprd using +with corp_list as ( + select /*+ materialize */ c.corp_num + from corporation c + where c.CORP_NUM in ( + '0008367', + '0008368' +) + and c.CORP_TYP_CD in ('BC','C','ULC','CUL','CC','CCC','QA','QB','QC','QD','QE') + and c.CORP_NUM not in ('0460007', '1255957', '1186381') +), +corporation_cte as ( + select * + from ( + select c.*, + case + when c.CORP_TYP_CD in ('BC', 'ULC', 'CC') then 'BC' || c.CORP_NUM + else c.CORP_NUM + end as target_corp_num + from corporation c + join corp_list cl on cl.corp_num = c.corp_num + ) + where target_corp_num in ( + 'BC0008367', + 'BC0008368' +) +) +select e.event_id, + c.target_corp_num as CORP_NUM, + corr.ASSOCIATED_DOC_DESC +from corporation_cte c +join event e on e.corp_num = c.corp_num +join CORRECTION corr on corr.event_id = e.event_id; + + +-- continued_in_from_jurisdiction +transfer public.jurisdiction from cprd using +with corp_list as ( + select /*+ materialize */ c.corp_num + from corporation c + where c.CORP_NUM in ( + '0008367', + '0008368' +) + and c.CORP_TYP_CD in ('BC','C','ULC','CUL','CC','CCC','QA','QB','QC','QD','QE') + and c.CORP_NUM not in ('0460007', '1255957', '1186381') +), +corporation_cte as ( + select * + from ( + select c.*, + case + when c.CORP_TYP_CD in ('BC', 'ULC', 'CC') then 'BC' || c.CORP_NUM + else c.CORP_NUM + end as target_corp_num + from corporation c + join corp_list cl on cl.corp_num = c.corp_num + ) + where target_corp_num in ( + 'BC0008367', + 'BC0008368' +) +) +select c.target_corp_num as CORP_NUM, + j.CAN_JUR_TYP_CD, + j.XPRO_TYP_CD, + j.HOME_RECOGN_DT, + j.OTHR_JURIS_DESC, + j.HOME_JURIS_NUM, + j.BC_XPRO_NUM, + j.HOME_COMPANY_NME, + j.start_event_id +from corporation_cte c +join JURISDICTION j on j.corp_num = c.corp_num; + + +-- resolution +transfer public.resolution from cprd using +with corp_list as ( + select /*+ materialize */ c.corp_num + from corporation c + where c.CORP_NUM in ( + '0008367', + '0008368' +) + and c.CORP_TYP_CD in ('BC','C','ULC','CUL','CC','CCC','QA','QB','QC','QD','QE') + and c.CORP_NUM not in ('0460007', '1255957', '1186381') +), +corporation_cte as ( + select * + from ( + select c.*, + case + when c.CORP_TYP_CD in ('BC', 'ULC', 'CC') then 'BC' || c.CORP_NUM + else c.CORP_NUM + end as target_corp_num + from corporation c + join corp_list cl on cl.corp_num = c.corp_num + ) + where target_corp_num in ( + 'BC0008367', + 'BC0008368' +) +) +select c.target_corp_num as CORP_NUM, + r.RESOLUTION_DT, + r.RESOLUTION_TYPE_CODE, + r.start_event_id, + r.end_event_id +from corporation_cte c +join RESOLUTION r on r.corp_num = c.corp_num; + + +-- share_struct +transfer public.share_struct from cprd using +with corp_list as ( + select /*+ materialize */ c.corp_num + from corporation c + where c.CORP_NUM in ( + '0008367', + '0008368' +) + and c.CORP_TYP_CD in ('BC','C','ULC','CUL','CC','CCC','QA','QB','QC','QD','QE') + and c.CORP_NUM not in ('0460007', '1255957', '1186381') +), +corporation_cte as ( + select * + from ( + select c.*, + case + when c.CORP_TYP_CD in ('BC', 'ULC', 'CC') then 'BC' || c.CORP_NUM + else c.CORP_NUM + end as target_corp_num + from corporation c + join corp_list cl on cl.corp_num = c.corp_num + ) + where target_corp_num in ( + 'BC0008367', + 'BC0008368' +) +) +select c.target_corp_num as CORP_NUM, + ss.start_event_id, + ss.end_event_id +from corporation_cte c +join SHARE_STRUCT ss on ss.corp_num = c.corp_num; + + +-- share_struct_cls +transfer public.share_struct_cls from cprd using +with corp_list as ( + select /*+ materialize */ c.corp_num + from corporation c + where c.CORP_NUM in ( + '0008367', + '0008368' +) + and c.CORP_TYP_CD in ('BC','C','ULC','CUL','CC','CCC','QA','QB','QC','QD','QE') + and c.CORP_NUM not in ('0460007', '1255957', '1186381') +), +corporation_cte as ( + select * + from ( + select c.*, + case + when c.CORP_TYP_CD in ('BC', 'ULC', 'CC') then 'BC' || c.CORP_NUM + else c.CORP_NUM + end as target_corp_num + from corporation c + join corp_list cl on cl.corp_num = c.corp_num + ) + where target_corp_num in ( + 'BC0008367', + 'BC0008368' +) +) +select c.target_corp_num as CORP_NUM, + ssc.SHARE_CLASS_ID, + replace(ssc.CLASS_NME, CHR(0), '') as CLASS_NME, + ssc.CURRENCY_TYP_CD, + case ssc.MAX_SHARE_IND + when 'N' then 'f' + when 'Y' then 't' + else 'f' + end as MAX_SHARE_IND, + ssc.SHARE_QUANTITY, + case ssc.SPEC_RIGHTS_IND + when 'N' then 'f' + when 'Y' then 't' + else 'f' + end as SPEC_RIGHTS_IND, + case ssc.PAR_VALUE_IND + when 'N' then 'f' + when 'Y' then 't' + else 'f' + end as PAR_VALUE_IND, + ssc.PAR_VALUE_AMT + 0 as PAR_VALUE_AMT, + ssc.OTHER_CURRENCY, + ssc.start_event_id +from corporation_cte c +join SHARE_STRUCT_CLS ssc on ssc.corp_num = c.corp_num; + + +-- share_series +transfer public.share_series from cprd using +with corp_list as ( + select /*+ materialize */ c.corp_num + from corporation c + where c.CORP_NUM in ( + '0008367', + '0008368' +) + and c.CORP_TYP_CD in ('BC','C','ULC','CUL','CC','CCC','QA','QB','QC','QD','QE') + and c.CORP_NUM not in ('0460007', '1255957', '1186381') +), +corporation_cte as ( + select * + from ( + select c.*, + case + when c.CORP_TYP_CD in ('BC', 'ULC', 'CC') then 'BC' || c.CORP_NUM + else c.CORP_NUM + end as target_corp_num + from corporation c + join corp_list cl on cl.corp_num = c.corp_num + ) + where target_corp_num in ( + 'BC0008367', + 'BC0008368' +) +) +select c.target_corp_num as CORP_NUM, + ss.SHARE_CLASS_ID, + ss.SERIES_ID, + case ss.MAX_SHARE_IND + when 'N' then 'f' + when 'Y' then 't' + else 'f' + end as MAX_SHARE_IND, + ss.SHARE_QUANTITY, + case ss.SPEC_RIGHT_IND + when 'N' then 'f' + when 'Y' then 't' + else 'f' + end as SPEC_RIGHT_IND, + ss.SERIES_NME, + ss.start_event_id +from corporation_cte c +join SHARE_SERIES ss on ss.corp_num = c.corp_num; + + +-- notification +transfer public.notification from cprd using +with corp_list as ( + select /*+ materialize */ c.corp_num + from corporation c + where c.CORP_NUM in ( + '0008367', + '0008368' +) + and c.CORP_TYP_CD in ('BC','C','ULC','CUL','CC','CCC','QA','QB','QC','QD','QE') + and c.CORP_NUM not in ('0460007', '1255957', '1186381') +), +corporation_cte as ( + select * + from ( + select c.*, + case + when c.CORP_TYP_CD in ('BC', 'ULC', 'CC') then 'BC' || c.CORP_NUM + else c.CORP_NUM + end as target_corp_num + from corporation c + join corp_list cl on cl.corp_num = c.corp_num + ) + where target_corp_num in ( + 'BC0008367', + 'BC0008368' +) +) +select e.event_id, + n.METHOD_TYP_CD, + n.mailing_addr_id, + n.FIRST_NME, + n.LAST_NME, + n.MIDDLE_NME, + n.PICKUP_BY, + n.EMAIL_ADDRESS, + n.PHONE_NUMBER +from corporation_cte c +join event e on e.corp_num = c.corp_num +join NOTIFICATION n on n.event_id = e.event_id; + + +-- notification_resend +transfer public.notification_resend from cprd using +with corp_list as ( + select /*+ materialize */ c.corp_num + from corporation c + where c.CORP_NUM in ( + '0008367', + '0008368' +) + and c.CORP_TYP_CD in ('BC','C','ULC','CUL','CC','CCC','QA','QB','QC','QD','QE') + and c.CORP_NUM not in ('0460007', '1255957', '1186381') +), +corporation_cte as ( + select * + from ( + select c.*, + case + when c.CORP_TYP_CD in ('BC', 'ULC', 'CC') then 'BC' || c.CORP_NUM + else c.CORP_NUM + end as target_corp_num + from corporation c + join corp_list cl on cl.corp_num = c.corp_num + ) + where target_corp_num in ( + 'BC0008367', + 'BC0008368' +) +) +select e.event_id, + nr.METHOD_TYP_CD, + nr.mailing_addr_id, + nr.FIRST_NME, + nr.LAST_NME, + nr.MIDDLE_NME, + nr.PICKUP_BY, + nr.EMAIL_ADDRESS, + nr.PHONE_NUMBER +from corporation_cte c +join event e on e.corp_num = c.corp_num +join NOTIFICATION_RESEND nr on nr.event_id = e.event_id; + + +-- party_notification +transfer public.party_notification from cprd using +with corp_list as ( + select /*+ materialize */ c.corp_num + from corporation c + where c.CORP_NUM in ( + '0008367', + '0008368' +) + and c.CORP_TYP_CD in ('BC','C','ULC','CUL','CC','CCC','QA','QB','QC','QD','QE') + and c.CORP_NUM not in ('0460007', '1255957', '1186381') +), +corporation_cte as ( + select * + from ( + select c.*, + case + when c.CORP_TYP_CD in ('BC', 'ULC', 'CC') then 'BC' || c.CORP_NUM + else c.CORP_NUM + end as target_corp_num + from corporation c + join corp_list cl on cl.corp_num = c.corp_num + ) + where target_corp_num in ( + 'BC0008367', + 'BC0008368' +) +) +select pn.PARTY_ID, + pn.METHOD_TYP_CD, + pn.mailing_addr_id, + pn.FIRST_NME, + pn.LAST_NME, + pn.MIDDLE_NME, + pn.BUSINESS_NME, + pn.PICKUP_BY, + pn.EMAIL_ADDRESS, + pn.PHONE_NUMBER +from corporation_cte c +join corp_party cp on cp.corp_num = c.corp_num +join PARTY_NOTIFICATION pn on pn.party_id = cp.corp_party_id; + + +-- payment +transfer public.payment from cprd using +with corp_list as ( + select /*+ materialize */ c.corp_num + from corporation c + where c.CORP_NUM in ( + '0008367', + '0008368' +) + and c.CORP_TYP_CD in ('BC','C','ULC','CUL','CC','CCC','QA','QB','QC','QD','QE') + and c.CORP_NUM not in ('0460007', '1255957', '1186381') +), +corporation_cte as ( + select * + from ( + select c.*, + case + when c.CORP_TYP_CD in ('BC', 'ULC', 'CC') then 'BC' || c.CORP_NUM + else c.CORP_NUM + end as target_corp_num + from corporation c + join corp_list cl on cl.corp_num = c.corp_num + ) + where target_corp_num in ( + 'BC0008367', + 'BC0008368' +) +) +select p.event_id, + p.payment_typ_cd, + p.cc_holder_nme +from corporation_cte c +join event e on e.corp_num = c.corp_num +join payment p on p.event_id = e.event_id; diff --git a/data-tool/scripts/corp_ids_ctst.txt b/data-tool/scripts/corp_ids_ctst.txt new file mode 100644 index 0000000000..be5a749550 --- /dev/null +++ b/data-tool/scripts/corp_ids_ctst.txt @@ -0,0 +1,2 @@ +BC0008367 +BC0008368 \ No newline at end of file diff --git a/data-tool/scripts/generate_cprd_subset_extract.py b/data-tool/scripts/generate_cprd_subset_extract.py index 479aba2f58..d74dcd9e76 100644 --- a/data-tool/scripts/generate_cprd_subset_extract.py +++ b/data-tool/scripts/generate_cprd_subset_extract.py @@ -30,8 +30,9 @@ from __future__ import annotations import argparse +import os import re -from dataclasses import dataclass +from dataclasses import dataclass,replace from enum import Enum from pathlib import Path from typing import Dict, Iterable, List, Sequence @@ -86,6 +87,7 @@ class cfg_GenerationConfig: out_master: Path out_chunks_dir: Path + source_connection: str target_connection: str target_schema: str @@ -98,6 +100,10 @@ class cfg_GenerationConfig: TMPL_TOKEN_TARGET_PRED = "&target_corp_num_predicate" # used by transfer template (Oracle-side) TMPL_TOKEN_ORACLE_PRED = "&oracle_corp_num_predicate" # used by transfer template (Oracle-side) TMPL_TOKEN_ORACLE_CORP_TYPE_PRED = "&oracle_corp_type_predicate" # used by transfer template (Oracle-side) +TMPL_TOKEN_SOURCE_CONNECTION = "__DBSCHEMA_SOURCE_CONNECTION__" # rendered generator-time source DbSchema alias +TMPL_TOKEN_TARGET_SCHEMA = "__DBSCHEMA_TARGET_SCHEMA__" # rendered generator-time target Postgres schema +TMPL_CONNECTION_TOKENS = (TMPL_TOKEN_SOURCE_CONNECTION, TMPL_TOKEN_TARGET_SCHEMA) +DBSCHEMA_IDENTIFIER_RE = re.compile(r"^[A-Za-z_][A-Za-z0-9_]*$") @dataclass(frozen=True) @@ -404,6 +410,44 @@ def tmpl_render(template_text: str, *, replacements: Dict[str, str]) -> str: out = out.replace(token, value) return out +def cfg_validate_dbschema_identifier(name: str, value: str) -> str: + if not value: + raise SystemExit(f"{name} must not be empty") + if not DBSCHEMA_IDENTIFIER_RE.match(value): + raise SystemExit( + f"{name} must be a conservative DbSchema/Postgres identifier using letters, digits, " + "and underscore, and must not start with a digit" + ) + return value + + +def cfg_validate_pg_schema_identifier(name: str, value: str) -> str: + value = cfg_validate_dbschema_identifier(name, value) + if value != value.lower(): + raise SystemExit( + f"{name} must be lowercase. PostgreSQL folds unquoted uppercase identifiers to lowercase, " + "so uppercase schema values are rejected to avoid mismatches." + ) + return value + + +def tmpl_connection_replacements(cfg: cfg_GenerationConfig) -> Dict[str, str]: + return { + TMPL_TOKEN_SOURCE_CONNECTION: cfg.source_connection, + TMPL_TOKEN_TARGET_SCHEMA: cfg.target_schema, + } + + +def tmpl_assert_no_connection_tokens(spec: tmpl_TemplateSpec, rendered_text: str) -> None: + remaining = [token for token in TMPL_CONNECTION_TOKENS if token in rendered_text] + if remaining: + raise SystemExit( + "Template source/schema token rendering failed.\n" + f"Template: {spec.name}\n" + f"Path: {spec.path}\n" + f"Remaining token(s): {', '.join(remaining)}\n" + ) + # ========================= # chunk_* (chunk planning) @@ -449,6 +493,90 @@ def gen_write_text(path: Path, text: str) -> None: path.write_text(text, encoding="utf-8") +def gen_write_rendered_connection_template( + *, + cfg: cfg_GenerationConfig, + spec: tmpl_TemplateSpec, + output_dir: Path, +) -> tmpl_TemplateSpec: + template_text = tmpl_load_text(spec) + rendered_text = tmpl_render(template_text, replacements=tmpl_connection_replacements(cfg)) + tmpl_assert_no_connection_tokens(spec, rendered_text) + rendered_path = output_dir / spec.path.name + gen_write_text(rendered_path, rendered_text) + return replace( + spec, + path=rendered_path, + required_tokens=tuple(token for token in spec.required_tokens if token not in TMPL_CONNECTION_TOKENS), + ) + + +def gen_write_rendered_connection_templates( + *, + cfg: cfg_GenerationConfig, + templates: tmpl_TemplateBundle, +) -> tmpl_TemplateBundle: + support_dir = cfg.out_chunks_dir / "support" + return replace( + templates, + pg_prepare_address_stage=gen_write_rendered_connection_template( + cfg=cfg, + spec=templates.pg_prepare_address_stage, + output_dir=support_dir, + ), + pg_cleanup_address_stage=gen_write_rendered_connection_template( + cfg=cfg, + spec=templates.pg_cleanup_address_stage, + output_dir=support_dir, + ), + pg_cleanup_orphan_children=gen_write_rendered_connection_template( + cfg=cfg, + spec=templates.pg_cleanup_orphan_children, + output_dir=support_dir, + ), + disable_triggers=gen_write_rendered_connection_template( + cfg=cfg, + spec=templates.disable_triggers, + output_dir=support_dir, + ), + enable_triggers=gen_write_rendered_connection_template( + cfg=cfg, + spec=templates.enable_triggers, + output_dir=support_dir, + ), + pg_boolean_casts=gen_write_rendered_connection_template( + cfg=cfg, + spec=templates.pg_boolean_casts, + output_dir=support_dir, + ), + pg_purge_bcomps_excluded=gen_write_rendered_connection_template( + cfg=cfg, + spec=templates.pg_purge_bcomps_excluded, + output_dir=support_dir, + ), + delete_chunk=gen_write_rendered_connection_template( + cfg=cfg, + spec=templates.delete_chunk, + output_dir=support_dir, + ), + transfer_chunk=gen_write_rendered_connection_template( + cfg=cfg, + spec=templates.transfer_chunk, + output_dir=support_dir, + ), + delete_cars=gen_write_rendered_connection_template( + cfg=cfg, + spec=templates.delete_cars, + output_dir=support_dir, + ), + transfer_cars=gen_write_rendered_connection_template( + cfg=cfg, + spec=templates.transfer_cars, + output_dir=support_dir, + ), + ) + + def gen_build_chunk_sql( *, chunk: chunk_ChunkSpec, @@ -951,6 +1079,12 @@ def cli_parse_args(argv: List[str] | None = None) -> argparse.Namespace: "session_replication_role in the master script and nested execute files.", ) + parser.add_argument( + "--source-connection", + default="cprd", + help="DbSchemaCLI connection name for the SOURCE Oracle DB (default: cprd).", + ) + parser.add_argument( "--target-connection", default="cprd_pg_subset", @@ -1021,6 +1155,7 @@ def cfg_build_config(args: argparse.Namespace) -> cfg_GenerationConfig: or_of_in_max_ids=or_of_in_max_ids, out_master=out_master, out_chunks_dir=out_chunks_dir, + source_connection=str(args.source_connection), target_connection=str(args.target_connection), target_schema=str(args.target_schema), ) @@ -1034,6 +1169,10 @@ def _effective_oracle_strategy(cfg: cfg_GenerationConfig, total_ids: int) -> cfg def run(cfg: cfg_GenerationConfig) -> int: templates = tmpl_default_bundle(cfg.repo_root) + templates = gen_write_rendered_connection_templates(cfg=cfg, templates=templates) + + DBSCHEMA_SOURCE_CONNECTION = os.getenv('DBSCHEMA_SOURCE_CONNECTION', '') + DBSCHEMA_TARGET_SCHEMA = os.getenv('DBSCHEMA_TARGET_SCHEMA', '') if cfg.pg_debug_session_probes and cfg.render_mode != cfg_RenderMode.INLINE: raise SystemExit("--pg-debug-session-probes currently supports only --render-mode inline.") diff --git a/data-tool/scripts/subset/subset_delete_cars.sql b/data-tool/scripts/subset/subset_delete_cars.sql index ebcbec0ac5..c7b41a6c87 100644 --- a/data-tool/scripts/subset/subset_delete_cars.sql +++ b/data-tool/scripts/subset/subset_delete_cars.sql @@ -4,7 +4,7 @@ -- These tables are NOT corp-scoped, so we truncate the entire dataset and reload from Oracle. -- Volume is low enough that a full refresh is appropriate. -TRUNCATE TABLE carindiv; -TRUNCATE TABLE carsrept; -TRUNCATE TABLE carsbox; -TRUNCATE TABLE carsfile; +TRUNCATE TABLE __DBSCHEMA_TARGET_SCHEMA__.carindiv; +TRUNCATE TABLE __DBSCHEMA_TARGET_SCHEMA__.carsrept; +TRUNCATE TABLE __DBSCHEMA_TARGET_SCHEMA__.carsbox; +TRUNCATE TABLE __DBSCHEMA_TARGET_SCHEMA__.carsfile; diff --git a/data-tool/scripts/subset/subset_delete_chunk.sql b/data-tool/scripts/subset/subset_delete_chunk.sql index 45ed54a76d..89dbb71ddf 100644 --- a/data-tool/scripts/subset/subset_delete_chunk.sql +++ b/data-tool/scripts/subset/subset_delete_chunk.sql @@ -4,7 +4,7 @@ -- corp_ids_in : comma-separated SQL string literals for target corp_num values (NO parentheses), -- e.g. 'BC0460007','A1234567' -- --- Intended to be executed from a master DbSchemaCLI script connected to the target Postgres DB (cprd_pg). +-- Intended to be executed from a master DbSchemaCLI script connected to the target Postgres DB (__DBSCHEMA_TARGET_SCHEMA__ schema). -- -- Note: This script intentionally does NOT delete internal migration/processing tables (mig_*, corp_processing, -- colin_tracking, affiliation_processing, etc). It only deletes the corp-scoped COLIN extract tables that are @@ -16,104 +16,104 @@ -- Address rows are treated as shared/global during subset refresh. -- Do not delete them here: subset_transfer_chunk.sql stages incoming Oracle address rows and --- merges them into public.address by addr_id. +-- merges them into the configured target address table by addr_id. -- Delete child tables first (event-scoped). -DELETE FROM notification_resend -WHERE event_id IN (SELECT event_id FROM event WHERE corp_num IN (&corp_ids_in)); +DELETE FROM __DBSCHEMA_TARGET_SCHEMA__.notification_resend +WHERE event_id IN (SELECT event_id FROM __DBSCHEMA_TARGET_SCHEMA__.event WHERE corp_num IN (&corp_ids_in)); -DELETE FROM notification -WHERE event_id IN (SELECT event_id FROM event WHERE corp_num IN (&corp_ids_in)); +DELETE FROM __DBSCHEMA_TARGET_SCHEMA__.notification +WHERE event_id IN (SELECT event_id FROM __DBSCHEMA_TARGET_SCHEMA__.event WHERE corp_num IN (&corp_ids_in)); -DELETE FROM filing_user -WHERE event_id IN (SELECT event_id FROM event WHERE corp_num IN (&corp_ids_in)); +DELETE FROM __DBSCHEMA_TARGET_SCHEMA__.filing_user +WHERE event_id IN (SELECT event_id FROM __DBSCHEMA_TARGET_SCHEMA__.event WHERE corp_num IN (&corp_ids_in)); -DELETE FROM payment -WHERE event_id IN (SELECT event_id FROM event WHERE corp_num IN (&corp_ids_in)); +DELETE FROM __DBSCHEMA_TARGET_SCHEMA__.payment +WHERE event_id IN (SELECT event_id FROM __DBSCHEMA_TARGET_SCHEMA__.event WHERE corp_num IN (&corp_ids_in)); -DELETE FROM ledger_text -WHERE event_id IN (SELECT event_id FROM event WHERE corp_num IN (&corp_ids_in)); +DELETE FROM __DBSCHEMA_TARGET_SCHEMA__.ledger_text +WHERE event_id IN (SELECT event_id FROM __DBSCHEMA_TARGET_SCHEMA__.event WHERE corp_num IN (&corp_ids_in)); -DELETE FROM conv_ledger -WHERE event_id IN (SELECT event_id FROM event WHERE corp_num IN (&corp_ids_in)); +DELETE FROM __DBSCHEMA_TARGET_SCHEMA__.conv_ledger +WHERE event_id IN (SELECT event_id FROM __DBSCHEMA_TARGET_SCHEMA__.event WHERE corp_num IN (&corp_ids_in)); -DELETE FROM conv_event -WHERE event_id IN (SELECT event_id FROM event WHERE corp_num IN (&corp_ids_in)); +DELETE FROM __DBSCHEMA_TARGET_SCHEMA__.conv_event +WHERE event_id IN (SELECT event_id FROM __DBSCHEMA_TARGET_SCHEMA__.event WHERE corp_num IN (&corp_ids_in)); -DELETE FROM completing_party -WHERE event_id IN (SELECT event_id FROM event WHERE corp_num IN (&corp_ids_in)); +DELETE FROM __DBSCHEMA_TARGET_SCHEMA__.completing_party +WHERE event_id IN (SELECT event_id FROM __DBSCHEMA_TARGET_SCHEMA__.event WHERE corp_num IN (&corp_ids_in)); -DELETE FROM submitting_party -WHERE event_id IN (SELECT event_id FROM event WHERE corp_num IN (&corp_ids_in)); +DELETE FROM __DBSCHEMA_TARGET_SCHEMA__.submitting_party +WHERE event_id IN (SELECT event_id FROM __DBSCHEMA_TARGET_SCHEMA__.event WHERE corp_num IN (&corp_ids_in)); -DELETE FROM corp_involved_amalgamating -WHERE event_id IN (SELECT event_id FROM event WHERE corp_num IN (&corp_ids_in)); +DELETE FROM __DBSCHEMA_TARGET_SCHEMA__.corp_involved_amalgamating +WHERE event_id IN (SELECT event_id FROM __DBSCHEMA_TARGET_SCHEMA__.event WHERE corp_num IN (&corp_ids_in)); -DELETE FROM corp_involved_cont_in -WHERE event_id IN (SELECT event_id FROM event WHERE corp_num IN (&corp_ids_in)); +DELETE FROM __DBSCHEMA_TARGET_SCHEMA__.corp_involved_cont_in +WHERE event_id IN (SELECT event_id FROM __DBSCHEMA_TARGET_SCHEMA__.event WHERE corp_num IN (&corp_ids_in)); -DELETE FROM correction -WHERE event_id IN (SELECT event_id FROM event WHERE corp_num IN (&corp_ids_in)); +DELETE FROM __DBSCHEMA_TARGET_SCHEMA__.correction +WHERE event_id IN (SELECT event_id FROM __DBSCHEMA_TARGET_SCHEMA__.event WHERE corp_num IN (&corp_ids_in)); -DELETE FROM filing -WHERE event_id IN (SELECT event_id FROM event WHERE corp_num IN (&corp_ids_in)); +DELETE FROM __DBSCHEMA_TARGET_SCHEMA__.filing +WHERE event_id IN (SELECT event_id FROM __DBSCHEMA_TARGET_SCHEMA__.event WHERE corp_num IN (&corp_ids_in)); -- Delete corp-party related tables. -DELETE FROM party_notification -WHERE party_id IN (SELECT corp_party_id FROM corp_party WHERE corp_num IN (&corp_ids_in)); +DELETE FROM __DBSCHEMA_TARGET_SCHEMA__.party_notification +WHERE party_id IN (SELECT corp_party_id FROM __DBSCHEMA_TARGET_SCHEMA__.corp_party WHERE corp_num IN (&corp_ids_in)); -DELETE FROM offices_held -WHERE corp_party_id IN (SELECT corp_party_id FROM corp_party WHERE corp_num IN (&corp_ids_in)); +DELETE FROM __DBSCHEMA_TARGET_SCHEMA__.offices_held +WHERE corp_party_id IN (SELECT corp_party_id FROM __DBSCHEMA_TARGET_SCHEMA__.corp_party WHERE corp_num IN (&corp_ids_in)); -DELETE FROM corp_party_relationship -WHERE corp_party_id IN (SELECT corp_party_id FROM corp_party WHERE corp_num IN (&corp_ids_in)); +DELETE FROM __DBSCHEMA_TARGET_SCHEMA__.corp_party_relationship +WHERE corp_party_id IN (SELECT corp_party_id FROM __DBSCHEMA_TARGET_SCHEMA__.corp_party WHERE corp_num IN (&corp_ids_in)); -DELETE FROM corp_party +DELETE FROM __DBSCHEMA_TARGET_SCHEMA__.corp_party WHERE corp_num IN (&corp_ids_in); -- Delete corp-scoped tables. -DELETE FROM office +DELETE FROM __DBSCHEMA_TARGET_SCHEMA__.office WHERE corp_num IN (&corp_ids_in); -DELETE FROM corp_name +DELETE FROM __DBSCHEMA_TARGET_SCHEMA__.corp_name WHERE corp_num IN (&corp_ids_in); -DELETE FROM corp_state +DELETE FROM __DBSCHEMA_TARGET_SCHEMA__.corp_state WHERE corp_num IN (&corp_ids_in); -DELETE FROM corp_comments +DELETE FROM __DBSCHEMA_TARGET_SCHEMA__.corp_comments WHERE corp_num IN (&corp_ids_in); -DELETE FROM corp_flag +DELETE FROM __DBSCHEMA_TARGET_SCHEMA__.corp_flag WHERE corp_num IN (&corp_ids_in); -DELETE FROM cont_out +DELETE FROM __DBSCHEMA_TARGET_SCHEMA__.cont_out WHERE corp_num IN (&corp_ids_in); -DELETE FROM corp_restriction +DELETE FROM __DBSCHEMA_TARGET_SCHEMA__.corp_restriction WHERE corp_num IN (&corp_ids_in); -DELETE FROM jurisdiction +DELETE FROM __DBSCHEMA_TARGET_SCHEMA__.jurisdiction WHERE corp_num IN (&corp_ids_in); -DELETE FROM resolution +DELETE FROM __DBSCHEMA_TARGET_SCHEMA__.resolution WHERE corp_num IN (&corp_ids_in); -DELETE FROM share_series +DELETE FROM __DBSCHEMA_TARGET_SCHEMA__.share_series WHERE corp_num IN (&corp_ids_in); -DELETE FROM share_struct_cls +DELETE FROM __DBSCHEMA_TARGET_SCHEMA__.share_struct_cls WHERE corp_num IN (&corp_ids_in); -DELETE FROM share_struct +DELETE FROM __DBSCHEMA_TARGET_SCHEMA__.share_struct WHERE corp_num IN (&corp_ids_in); -- Delete events last (many tables reference event_id). -DELETE FROM event +DELETE FROM __DBSCHEMA_TARGET_SCHEMA__.event WHERE corp_num IN (&corp_ids_in); -- Delete the corp rows last. -DELETE FROM corporation +DELETE FROM __DBSCHEMA_TARGET_SCHEMA__.corporation WHERE corp_num IN (&corp_ids_in); -- Address rows are refreshed via stage+merge in subset_transfer_chunk.sql. diff --git a/data-tool/scripts/subset/subset_disable_triggers.sql b/data-tool/scripts/subset/subset_disable_triggers.sql index 6089f524c8..fd2c945fb3 100644 --- a/data-tool/scripts/subset/subset_disable_triggers.sql +++ b/data-tool/scripts/subset/subset_disable_triggers.sql @@ -1,38 +1,38 @@ -- Disable triggers for corp-scoped tables (subset refresh/load). -- Intended to be executed from a master DbSchemaCLI script while connected to the target Postgres DB. -ALTER TABLE corporation DISABLE TRIGGER ALL; -ALTER TABLE corp_name DISABLE TRIGGER ALL; -ALTER TABLE corp_state DISABLE TRIGGER ALL; -ALTER TABLE event DISABLE TRIGGER ALL; -ALTER TABLE filing DISABLE TRIGGER ALL; -ALTER TABLE filing_user DISABLE TRIGGER ALL; -ALTER TABLE office DISABLE TRIGGER ALL; -ALTER TABLE corp_comments DISABLE TRIGGER ALL; -ALTER TABLE ledger_text DISABLE TRIGGER ALL; -ALTER TABLE corp_party DISABLE TRIGGER ALL; -ALTER TABLE corp_party_relationship DISABLE TRIGGER ALL; -ALTER TABLE offices_held DISABLE TRIGGER ALL; -ALTER TABLE completing_party DISABLE TRIGGER ALL; -ALTER TABLE submitting_party DISABLE TRIGGER ALL; -ALTER TABLE corp_flag DISABLE TRIGGER ALL; -ALTER TABLE cont_out DISABLE TRIGGER ALL; -ALTER TABLE conv_event DISABLE TRIGGER ALL; -ALTER TABLE conv_ledger DISABLE TRIGGER ALL; -ALTER TABLE corp_involved_amalgamating DISABLE TRIGGER ALL; -ALTER TABLE corp_involved_cont_in DISABLE TRIGGER ALL; -ALTER TABLE corp_restriction DISABLE TRIGGER ALL; -ALTER TABLE correction DISABLE TRIGGER ALL; -ALTER TABLE jurisdiction DISABLE TRIGGER ALL; -ALTER TABLE resolution DISABLE TRIGGER ALL; -ALTER TABLE share_series DISABLE TRIGGER ALL; -ALTER TABLE share_struct DISABLE TRIGGER ALL; -ALTER TABLE share_struct_cls DISABLE TRIGGER ALL; -ALTER TABLE notification DISABLE TRIGGER ALL; -ALTER TABLE notification_resend DISABLE TRIGGER ALL; -ALTER TABLE party_notification DISABLE TRIGGER ALL; -ALTER TABLE payment DISABLE TRIGGER ALL; -ALTER TABLE carsfile DISABLE TRIGGER ALL; -ALTER TABLE carsbox DISABLE TRIGGER ALL; -ALTER TABLE carsrept DISABLE TRIGGER ALL; -ALTER TABLE carindiv DISABLE TRIGGER ALL; +ALTER TABLE __DBSCHEMA_TARGET_SCHEMA__.corporation DISABLE TRIGGER ALL; +ALTER TABLE __DBSCHEMA_TARGET_SCHEMA__.corp_name DISABLE TRIGGER ALL; +ALTER TABLE __DBSCHEMA_TARGET_SCHEMA__.corp_state DISABLE TRIGGER ALL; +ALTER TABLE __DBSCHEMA_TARGET_SCHEMA__.event DISABLE TRIGGER ALL; +ALTER TABLE __DBSCHEMA_TARGET_SCHEMA__.filing DISABLE TRIGGER ALL; +ALTER TABLE __DBSCHEMA_TARGET_SCHEMA__.filing_user DISABLE TRIGGER ALL; +ALTER TABLE __DBSCHEMA_TARGET_SCHEMA__.office DISABLE TRIGGER ALL; +ALTER TABLE __DBSCHEMA_TARGET_SCHEMA__.corp_comments DISABLE TRIGGER ALL; +ALTER TABLE __DBSCHEMA_TARGET_SCHEMA__.ledger_text DISABLE TRIGGER ALL; +ALTER TABLE __DBSCHEMA_TARGET_SCHEMA__.corp_party DISABLE TRIGGER ALL; +ALTER TABLE __DBSCHEMA_TARGET_SCHEMA__.corp_party_relationship DISABLE TRIGGER ALL; +ALTER TABLE __DBSCHEMA_TARGET_SCHEMA__.offices_held DISABLE TRIGGER ALL; +ALTER TABLE __DBSCHEMA_TARGET_SCHEMA__.completing_party DISABLE TRIGGER ALL; +ALTER TABLE __DBSCHEMA_TARGET_SCHEMA__.submitting_party DISABLE TRIGGER ALL; +ALTER TABLE __DBSCHEMA_TARGET_SCHEMA__.corp_flag DISABLE TRIGGER ALL; +ALTER TABLE __DBSCHEMA_TARGET_SCHEMA__.cont_out DISABLE TRIGGER ALL; +ALTER TABLE __DBSCHEMA_TARGET_SCHEMA__.conv_event DISABLE TRIGGER ALL; +ALTER TABLE __DBSCHEMA_TARGET_SCHEMA__.conv_ledger DISABLE TRIGGER ALL; +ALTER TABLE __DBSCHEMA_TARGET_SCHEMA__.corp_involved_amalgamating DISABLE TRIGGER ALL; +ALTER TABLE __DBSCHEMA_TARGET_SCHEMA__.corp_involved_cont_in DISABLE TRIGGER ALL; +ALTER TABLE __DBSCHEMA_TARGET_SCHEMA__.corp_restriction DISABLE TRIGGER ALL; +ALTER TABLE __DBSCHEMA_TARGET_SCHEMA__.correction DISABLE TRIGGER ALL; +ALTER TABLE __DBSCHEMA_TARGET_SCHEMA__.jurisdiction DISABLE TRIGGER ALL; +ALTER TABLE __DBSCHEMA_TARGET_SCHEMA__.resolution DISABLE TRIGGER ALL; +ALTER TABLE __DBSCHEMA_TARGET_SCHEMA__.share_series DISABLE TRIGGER ALL; +ALTER TABLE __DBSCHEMA_TARGET_SCHEMA__.share_struct DISABLE TRIGGER ALL; +ALTER TABLE __DBSCHEMA_TARGET_SCHEMA__.share_struct_cls DISABLE TRIGGER ALL; +ALTER TABLE __DBSCHEMA_TARGET_SCHEMA__.notification DISABLE TRIGGER ALL; +ALTER TABLE __DBSCHEMA_TARGET_SCHEMA__.notification_resend DISABLE TRIGGER ALL; +ALTER TABLE __DBSCHEMA_TARGET_SCHEMA__.party_notification DISABLE TRIGGER ALL; +ALTER TABLE __DBSCHEMA_TARGET_SCHEMA__.payment DISABLE TRIGGER ALL; +ALTER TABLE __DBSCHEMA_TARGET_SCHEMA__.carsfile DISABLE TRIGGER ALL; +ALTER TABLE __DBSCHEMA_TARGET_SCHEMA__.carsbox DISABLE TRIGGER ALL; +ALTER TABLE __DBSCHEMA_TARGET_SCHEMA__.carsrept DISABLE TRIGGER ALL; +ALTER TABLE __DBSCHEMA_TARGET_SCHEMA__.carindiv DISABLE TRIGGER ALL; diff --git a/data-tool/scripts/subset/subset_enable_triggers.sql b/data-tool/scripts/subset/subset_enable_triggers.sql index 071691976d..b7ef1327ac 100644 --- a/data-tool/scripts/subset/subset_enable_triggers.sql +++ b/data-tool/scripts/subset/subset_enable_triggers.sql @@ -1,38 +1,38 @@ -- Enable triggers for corp-scoped tables (subset refresh/load). -- Intended to be executed from a master DbSchemaCLI script while connected to the target Postgres DB. -ALTER TABLE corporation ENABLE TRIGGER ALL; -ALTER TABLE corp_name ENABLE TRIGGER ALL; -ALTER TABLE corp_state ENABLE TRIGGER ALL; -ALTER TABLE event ENABLE TRIGGER ALL; -ALTER TABLE filing ENABLE TRIGGER ALL; -ALTER TABLE filing_user ENABLE TRIGGER ALL; -ALTER TABLE office ENABLE TRIGGER ALL; -ALTER TABLE corp_comments ENABLE TRIGGER ALL; -ALTER TABLE ledger_text ENABLE TRIGGER ALL; -ALTER TABLE corp_party ENABLE TRIGGER ALL; -ALTER TABLE corp_party_relationship ENABLE TRIGGER ALL; -ALTER TABLE offices_held ENABLE TRIGGER ALL; -ALTER TABLE completing_party ENABLE TRIGGER ALL; -ALTER TABLE submitting_party ENABLE TRIGGER ALL; -ALTER TABLE corp_flag ENABLE TRIGGER ALL; -ALTER TABLE cont_out ENABLE TRIGGER ALL; -ALTER TABLE conv_event ENABLE TRIGGER ALL; -ALTER TABLE conv_ledger ENABLE TRIGGER ALL; -ALTER TABLE corp_involved_amalgamating ENABLE TRIGGER ALL; -ALTER TABLE corp_involved_cont_in ENABLE TRIGGER ALL; -ALTER TABLE corp_restriction ENABLE TRIGGER ALL; -ALTER TABLE correction ENABLE TRIGGER ALL; -ALTER TABLE jurisdiction ENABLE TRIGGER ALL; -ALTER TABLE resolution ENABLE TRIGGER ALL; -ALTER TABLE share_series ENABLE TRIGGER ALL; -ALTER TABLE share_struct ENABLE TRIGGER ALL; -ALTER TABLE share_struct_cls ENABLE TRIGGER ALL; -ALTER TABLE notification ENABLE TRIGGER ALL; -ALTER TABLE notification_resend ENABLE TRIGGER ALL; -ALTER TABLE party_notification ENABLE TRIGGER ALL; -ALTER TABLE payment ENABLE TRIGGER ALL; -ALTER TABLE carsfile ENABLE TRIGGER ALL; -ALTER TABLE carsbox ENABLE TRIGGER ALL; -ALTER TABLE carsrept ENABLE TRIGGER ALL; -ALTER TABLE carindiv ENABLE TRIGGER ALL; +ALTER TABLE __DBSCHEMA_TARGET_SCHEMA__.corporation ENABLE TRIGGER ALL; +ALTER TABLE __DBSCHEMA_TARGET_SCHEMA__.corp_name ENABLE TRIGGER ALL; +ALTER TABLE __DBSCHEMA_TARGET_SCHEMA__.corp_state ENABLE TRIGGER ALL; +ALTER TABLE __DBSCHEMA_TARGET_SCHEMA__.event ENABLE TRIGGER ALL; +ALTER TABLE __DBSCHEMA_TARGET_SCHEMA__.filing ENABLE TRIGGER ALL; +ALTER TABLE __DBSCHEMA_TARGET_SCHEMA__.filing_user ENABLE TRIGGER ALL; +ALTER TABLE __DBSCHEMA_TARGET_SCHEMA__.office ENABLE TRIGGER ALL; +ALTER TABLE __DBSCHEMA_TARGET_SCHEMA__.corp_comments ENABLE TRIGGER ALL; +ALTER TABLE __DBSCHEMA_TARGET_SCHEMA__.ledger_text ENABLE TRIGGER ALL; +ALTER TABLE __DBSCHEMA_TARGET_SCHEMA__.corp_party ENABLE TRIGGER ALL; +ALTER TABLE __DBSCHEMA_TARGET_SCHEMA__.corp_party_relationship ENABLE TRIGGER ALL; +ALTER TABLE __DBSCHEMA_TARGET_SCHEMA__.offices_held ENABLE TRIGGER ALL; +ALTER TABLE __DBSCHEMA_TARGET_SCHEMA__.completing_party ENABLE TRIGGER ALL; +ALTER TABLE __DBSCHEMA_TARGET_SCHEMA__.submitting_party ENABLE TRIGGER ALL; +ALTER TABLE __DBSCHEMA_TARGET_SCHEMA__.corp_flag ENABLE TRIGGER ALL; +ALTER TABLE __DBSCHEMA_TARGET_SCHEMA__.cont_out ENABLE TRIGGER ALL; +ALTER TABLE __DBSCHEMA_TARGET_SCHEMA__.conv_event ENABLE TRIGGER ALL; +ALTER TABLE __DBSCHEMA_TARGET_SCHEMA__.conv_ledger ENABLE TRIGGER ALL; +ALTER TABLE __DBSCHEMA_TARGET_SCHEMA__.corp_involved_amalgamating ENABLE TRIGGER ALL; +ALTER TABLE __DBSCHEMA_TARGET_SCHEMA__.corp_involved_cont_in ENABLE TRIGGER ALL; +ALTER TABLE __DBSCHEMA_TARGET_SCHEMA__.corp_restriction ENABLE TRIGGER ALL; +ALTER TABLE __DBSCHEMA_TARGET_SCHEMA__.correction ENABLE TRIGGER ALL; +ALTER TABLE __DBSCHEMA_TARGET_SCHEMA__.jurisdiction ENABLE TRIGGER ALL; +ALTER TABLE __DBSCHEMA_TARGET_SCHEMA__.resolution ENABLE TRIGGER ALL; +ALTER TABLE __DBSCHEMA_TARGET_SCHEMA__.share_series ENABLE TRIGGER ALL; +ALTER TABLE __DBSCHEMA_TARGET_SCHEMA__.share_struct ENABLE TRIGGER ALL; +ALTER TABLE __DBSCHEMA_TARGET_SCHEMA__.share_struct_cls ENABLE TRIGGER ALL; +ALTER TABLE __DBSCHEMA_TARGET_SCHEMA__.notification ENABLE TRIGGER ALL; +ALTER TABLE __DBSCHEMA_TARGET_SCHEMA__.notification_resend ENABLE TRIGGER ALL; +ALTER TABLE __DBSCHEMA_TARGET_SCHEMA__.party_notification ENABLE TRIGGER ALL; +ALTER TABLE __DBSCHEMA_TARGET_SCHEMA__.payment ENABLE TRIGGER ALL; +ALTER TABLE __DBSCHEMA_TARGET_SCHEMA__.carsfile ENABLE TRIGGER ALL; +ALTER TABLE __DBSCHEMA_TARGET_SCHEMA__.carsbox ENABLE TRIGGER ALL; +ALTER TABLE __DBSCHEMA_TARGET_SCHEMA__.carsrept ENABLE TRIGGER ALL; +ALTER TABLE __DBSCHEMA_TARGET_SCHEMA__.carindiv ENABLE TRIGGER ALL; diff --git a/data-tool/scripts/subset/subset_pg_boolean_casts.sql b/data-tool/scripts/subset/subset_pg_boolean_casts.sql index b97a72568d..b141cd5e00 100644 --- a/data-tool/scripts/subset/subset_pg_boolean_casts.sql +++ b/data-tool/scripts/subset/subset_pg_boolean_casts.sql @@ -23,7 +23,7 @@ -- DbSchemaCLI splits statements on semicolons and does not reliably handle semicolons inside dollar-quoted -- bodies. Keep dollar-quoted bodies free of internal semicolons and avoid DO $$ blocks. -CREATE OR REPLACE FUNCTION public.dbcli_varchar_to_boolean(val varchar) +CREATE OR REPLACE FUNCTION __DBSCHEMA_TARGET_SCHEMA__.dbcli_varchar_to_boolean(val varchar) RETURNS boolean LANGUAGE sql IMMUTABLE @@ -32,7 +32,7 @@ AS $$ SELECT (val::text)::boolean $$; -CREATE OR REPLACE FUNCTION public.dbcli_bpchar_to_boolean(val bpchar) +CREATE OR REPLACE FUNCTION __DBSCHEMA_TARGET_SCHEMA__.dbcli_bpchar_to_boolean(val bpchar) RETURNS boolean LANGUAGE sql IMMUTABLE @@ -44,12 +44,12 @@ $$; -- Recreate casts in an idempotent way (Postgres has no CREATE CAST IF NOT EXISTS). DROP CAST IF EXISTS (varchar AS boolean); CREATE CAST (varchar AS boolean) - WITH FUNCTION public.dbcli_varchar_to_boolean(varchar) + WITH FUNCTION __DBSCHEMA_TARGET_SCHEMA__.dbcli_varchar_to_boolean(varchar) AS IMPLICIT -- DbSchemaCLI workaround: avoid keyword being last token ; DROP CAST IF EXISTS (bpchar AS boolean); CREATE CAST (bpchar AS boolean) - WITH FUNCTION public.dbcli_bpchar_to_boolean(bpchar) + WITH FUNCTION __DBSCHEMA_TARGET_SCHEMA__.dbcli_bpchar_to_boolean(bpchar) AS IMPLICIT -- DbSchemaCLI workaround: avoid keyword being last token ; diff --git a/data-tool/scripts/subset/subset_pg_cleanup_address_stage.sql b/data-tool/scripts/subset/subset_pg_cleanup_address_stage.sql index a702de8e9c..407d969791 100644 --- a/data-tool/scripts/subset/subset_pg_cleanup_address_stage.sql +++ b/data-tool/scripts/subset/subset_pg_cleanup_address_stage.sql @@ -1,2 +1,4 @@ -- Cleanup the shared address staging table used by subset_transfer_chunk.sql. -- No-op: the helper table is predeclared in the COLIN extract DDL and is truncated during prepare/chunk execution. + +TRUNCATE TABLE __DBSCHEMA_TARGET_SCHEMA__.subset_address_stage; diff --git a/data-tool/scripts/subset/subset_pg_cleanup_orphan_children.sql b/data-tool/scripts/subset/subset_pg_cleanup_orphan_children.sql index 345a6e55e1..33199b4f71 100644 --- a/data-tool/scripts/subset/subset_pg_cleanup_orphan_children.sql +++ b/data-tool/scripts/subset/subset_pg_cleanup_orphan_children.sql @@ -11,52 +11,52 @@ -- - corp-scoped rows deleted directly by corp_num are left to the regular chunk deletes -- Event-scoped children whose parent event row is missing. -DELETE FROM notification_resend t -WHERE NOT EXISTS (SELECT 1 FROM event e WHERE e.event_id = t.event_id); +DELETE FROM __DBSCHEMA_TARGET_SCHEMA__.notification_resend t +WHERE NOT EXISTS (SELECT 1 FROM __DBSCHEMA_TARGET_SCHEMA__.event e WHERE e.event_id = t.event_id); -DELETE FROM notification t -WHERE NOT EXISTS (SELECT 1 FROM event e WHERE e.event_id = t.event_id); +DELETE FROM __DBSCHEMA_TARGET_SCHEMA__.notification t +WHERE NOT EXISTS (SELECT 1 FROM __DBSCHEMA_TARGET_SCHEMA__.event e WHERE e.event_id = t.event_id); -DELETE FROM filing_user t -WHERE NOT EXISTS (SELECT 1 FROM event e WHERE e.event_id = t.event_id); +DELETE FROM __DBSCHEMA_TARGET_SCHEMA__.filing_user t +WHERE NOT EXISTS (SELECT 1 FROM __DBSCHEMA_TARGET_SCHEMA__.event e WHERE e.event_id = t.event_id); -DELETE FROM payment t -WHERE NOT EXISTS (SELECT 1 FROM event e WHERE e.event_id = t.event_id); +DELETE FROM __DBSCHEMA_TARGET_SCHEMA__.payment t +WHERE NOT EXISTS (SELECT 1 FROM __DBSCHEMA_TARGET_SCHEMA__.event e WHERE e.event_id = t.event_id); -DELETE FROM ledger_text t -WHERE NOT EXISTS (SELECT 1 FROM event e WHERE e.event_id = t.event_id); +DELETE FROM __DBSCHEMA_TARGET_SCHEMA__.ledger_text t +WHERE NOT EXISTS (SELECT 1 FROM __DBSCHEMA_TARGET_SCHEMA__.event e WHERE e.event_id = t.event_id); -DELETE FROM conv_ledger t -WHERE NOT EXISTS (SELECT 1 FROM event e WHERE e.event_id = t.event_id); +DELETE FROM __DBSCHEMA_TARGET_SCHEMA__.conv_ledger t +WHERE NOT EXISTS (SELECT 1 FROM __DBSCHEMA_TARGET_SCHEMA__.event e WHERE e.event_id = t.event_id); -DELETE FROM conv_event t -WHERE NOT EXISTS (SELECT 1 FROM event e WHERE e.event_id = t.event_id); +DELETE FROM __DBSCHEMA_TARGET_SCHEMA__.conv_event t +WHERE NOT EXISTS (SELECT 1 FROM __DBSCHEMA_TARGET_SCHEMA__.event e WHERE e.event_id = t.event_id); -DELETE FROM completing_party t -WHERE NOT EXISTS (SELECT 1 FROM event e WHERE e.event_id = t.event_id); +DELETE FROM __DBSCHEMA_TARGET_SCHEMA__.completing_party t +WHERE NOT EXISTS (SELECT 1 FROM __DBSCHEMA_TARGET_SCHEMA__.event e WHERE e.event_id = t.event_id); -DELETE FROM submitting_party t -WHERE NOT EXISTS (SELECT 1 FROM event e WHERE e.event_id = t.event_id); +DELETE FROM __DBSCHEMA_TARGET_SCHEMA__.submitting_party t +WHERE NOT EXISTS (SELECT 1 FROM __DBSCHEMA_TARGET_SCHEMA__.event e WHERE e.event_id = t.event_id); -DELETE FROM corp_involved_amalgamating t +DELETE FROM __DBSCHEMA_TARGET_SCHEMA__.corp_involved_amalgamating t WHERE t.event_id IS NOT NULL - AND NOT EXISTS (SELECT 1 FROM event e WHERE e.event_id = t.event_id); + AND NOT EXISTS (SELECT 1 FROM __DBSCHEMA_TARGET_SCHEMA__.event e WHERE e.event_id = t.event_id); -DELETE FROM corp_involved_cont_in t -WHERE NOT EXISTS (SELECT 1 FROM event e WHERE e.event_id = t.event_id); +DELETE FROM __DBSCHEMA_TARGET_SCHEMA__.corp_involved_cont_in t +WHERE NOT EXISTS (SELECT 1 FROM __DBSCHEMA_TARGET_SCHEMA__.event e WHERE e.event_id = t.event_id); -DELETE FROM correction t -WHERE NOT EXISTS (SELECT 1 FROM event e WHERE e.event_id = t.event_id); +DELETE FROM __DBSCHEMA_TARGET_SCHEMA__.correction t +WHERE NOT EXISTS (SELECT 1 FROM __DBSCHEMA_TARGET_SCHEMA__.event e WHERE e.event_id = t.event_id); -DELETE FROM filing t -WHERE NOT EXISTS (SELECT 1 FROM event e WHERE e.event_id = t.event_id); +DELETE FROM __DBSCHEMA_TARGET_SCHEMA__.filing t +WHERE NOT EXISTS (SELECT 1 FROM __DBSCHEMA_TARGET_SCHEMA__.event e WHERE e.event_id = t.event_id); -- Corp-party children whose parent corp_party row is missing. -DELETE FROM party_notification t -WHERE NOT EXISTS (SELECT 1 FROM corp_party cp WHERE cp.corp_party_id = t.party_id); +DELETE FROM __DBSCHEMA_TARGET_SCHEMA__.party_notification t +WHERE NOT EXISTS (SELECT 1 FROM __DBSCHEMA_TARGET_SCHEMA__.corp_party cp WHERE cp.corp_party_id = t.party_id); -DELETE FROM offices_held t -WHERE NOT EXISTS (SELECT 1 FROM corp_party cp WHERE cp.corp_party_id = t.corp_party_id); +DELETE FROM __DBSCHEMA_TARGET_SCHEMA__.offices_held t +WHERE NOT EXISTS (SELECT 1 FROM __DBSCHEMA_TARGET_SCHEMA__.corp_party cp WHERE cp.corp_party_id = t.corp_party_id); -DELETE FROM corp_party_relationship t -WHERE NOT EXISTS (SELECT 1 FROM corp_party cp WHERE cp.corp_party_id = t.corp_party_id); +DELETE FROM __DBSCHEMA_TARGET_SCHEMA__.corp_party_relationship t +WHERE NOT EXISTS (SELECT 1 FROM __DBSCHEMA_TARGET_SCHEMA__.corp_party cp WHERE cp.corp_party_id = t.corp_party_id); diff --git a/data-tool/scripts/subset/subset_pg_prepare_address_stage.sql b/data-tool/scripts/subset/subset_pg_prepare_address_stage.sql index 12c00c30d3..0305556ec2 100644 --- a/data-tool/scripts/subset/subset_pg_prepare_address_stage.sql +++ b/data-tool/scripts/subset/subset_pg_prepare_address_stage.sql @@ -1,4 +1,4 @@ -- Prepare the shared address staging table used by subset_transfer_chunk.sql. -- This is a predeclared regular table (not TEMP) because DbSchemaCLI transfer work may use separate sessions. -TRUNCATE TABLE public.subset_address_stage; +TRUNCATE TABLE __DBSCHEMA_TARGET_SCHEMA__.subset_address_stage; diff --git a/data-tool/scripts/subset/subset_pg_purge_bcomps_excluded.sql b/data-tool/scripts/subset/subset_pg_purge_bcomps_excluded.sql index 7c48f0e562..e7313ed0dd 100644 --- a/data-tool/scripts/subset/subset_pg_purge_bcomps_excluded.sql +++ b/data-tool/scripts/subset/subset_pg_purge_bcomps_excluded.sql @@ -15,167 +15,167 @@ -- 1) Build keysets -TRUNCATE TABLE public.subset_excluded_corp_parties; -TRUNCATE TABLE public.subset_excluded_events; -TRUNCATE TABLE public.subset_excluded_corps; +TRUNCATE TABLE __DBSCHEMA_TARGET_SCHEMA__.subset_excluded_corp_parties; +TRUNCATE TABLE __DBSCHEMA_TARGET_SCHEMA__.subset_excluded_events; +TRUNCATE TABLE __DBSCHEMA_TARGET_SCHEMA__.subset_excluded_corps; -INSERT INTO public.subset_excluded_corps (corp_num) +INSERT INTO __DBSCHEMA_TARGET_SCHEMA__.subset_excluded_corps (corp_num) SELECT DISTINCT e.corp_num -FROM event e -JOIN filing f ON f.event_id = e.event_id -JOIN filing_user u ON u.event_id = e.event_id +FROM __DBSCHEMA_TARGET_SCHEMA__.event e +JOIN __DBSCHEMA_TARGET_SCHEMA__.filing f ON f.event_id = e.event_id +JOIN __DBSCHEMA_TARGET_SCHEMA__.filing_user u ON u.event_id = e.event_id WHERE e.corp_num IS NOT NULL AND u.user_id = 'BCOMPS' AND f.filing_type_cd IN ('BEINC', 'ICORP', 'ICORU', 'ICORC', 'CONTB', 'CONTI', 'CONTU', 'CONTC'); -INSERT INTO public.subset_excluded_events (event_id) +INSERT INTO __DBSCHEMA_TARGET_SCHEMA__.subset_excluded_events (event_id) SELECT DISTINCT e.event_id -FROM event e -JOIN public.subset_excluded_corps x ON x.corp_num = e.corp_num +FROM __DBSCHEMA_TARGET_SCHEMA__.event e +JOIN __DBSCHEMA_TARGET_SCHEMA__.subset_excluded_corps x ON x.corp_num = e.corp_num WHERE e.event_id IS NOT NULL; -INSERT INTO public.subset_excluded_corp_parties (corp_party_id) +INSERT INTO __DBSCHEMA_TARGET_SCHEMA__.subset_excluded_corp_parties (corp_party_id) SELECT DISTINCT cp.corp_party_id -FROM corp_party cp -JOIN public.subset_excluded_corps x ON x.corp_num = cp.corp_num +FROM __DBSCHEMA_TARGET_SCHEMA__.corp_party cp +JOIN __DBSCHEMA_TARGET_SCHEMA__.subset_excluded_corps x ON x.corp_num = cp.corp_num WHERE cp.corp_party_id IS NOT NULL; -- 2) Purge (delete child tables first) -- Event-scoped children -DELETE FROM notification_resend t -USING public.subset_excluded_events x +DELETE FROM __DBSCHEMA_TARGET_SCHEMA__.notification_resend t +USING __DBSCHEMA_TARGET_SCHEMA__.subset_excluded_events x WHERE t.event_id = x.event_id; -DELETE FROM notification t -USING public.subset_excluded_events x +DELETE FROM __DBSCHEMA_TARGET_SCHEMA__.notification t +USING __DBSCHEMA_TARGET_SCHEMA__.subset_excluded_events x WHERE t.event_id = x.event_id; -DELETE FROM filing_user t -USING public.subset_excluded_events x +DELETE FROM __DBSCHEMA_TARGET_SCHEMA__.filing_user t +USING __DBSCHEMA_TARGET_SCHEMA__.subset_excluded_events x WHERE t.event_id = x.event_id; -DELETE FROM payment t -USING public.subset_excluded_events x +DELETE FROM __DBSCHEMA_TARGET_SCHEMA__.payment t +USING __DBSCHEMA_TARGET_SCHEMA__.subset_excluded_events x WHERE t.event_id = x.event_id; -DELETE FROM ledger_text t -USING public.subset_excluded_events x +DELETE FROM __DBSCHEMA_TARGET_SCHEMA__.ledger_text t +USING __DBSCHEMA_TARGET_SCHEMA__.subset_excluded_events x WHERE t.event_id = x.event_id; -DELETE FROM conv_ledger t -USING public.subset_excluded_events x +DELETE FROM __DBSCHEMA_TARGET_SCHEMA__.conv_ledger t +USING __DBSCHEMA_TARGET_SCHEMA__.subset_excluded_events x WHERE t.event_id = x.event_id; -DELETE FROM conv_event t -USING public.subset_excluded_events x +DELETE FROM __DBSCHEMA_TARGET_SCHEMA__.conv_event t +USING __DBSCHEMA_TARGET_SCHEMA__.subset_excluded_events x WHERE t.event_id = x.event_id; -DELETE FROM completing_party t -USING public.subset_excluded_events x +DELETE FROM __DBSCHEMA_TARGET_SCHEMA__.completing_party t +USING __DBSCHEMA_TARGET_SCHEMA__.subset_excluded_events x WHERE t.event_id = x.event_id; -DELETE FROM submitting_party t -USING public.subset_excluded_events x +DELETE FROM __DBSCHEMA_TARGET_SCHEMA__.submitting_party t +USING __DBSCHEMA_TARGET_SCHEMA__.subset_excluded_events x WHERE t.event_id = x.event_id; -DELETE FROM corp_involved_cont_in t -USING public.subset_excluded_events x +DELETE FROM __DBSCHEMA_TARGET_SCHEMA__.corp_involved_cont_in t +USING __DBSCHEMA_TARGET_SCHEMA__.subset_excluded_events x WHERE t.event_id = x.event_id; -DELETE FROM correction t -USING public.subset_excluded_events x +DELETE FROM __DBSCHEMA_TARGET_SCHEMA__.correction t +USING __DBSCHEMA_TARGET_SCHEMA__.subset_excluded_events x WHERE t.event_id = x.event_id; -DELETE FROM filing t -USING public.subset_excluded_events x +DELETE FROM __DBSCHEMA_TARGET_SCHEMA__.filing t +USING __DBSCHEMA_TARGET_SCHEMA__.subset_excluded_events x WHERE t.event_id = x.event_id; -- corp_involved_amalgamating can reference corp_num via ted_corp_num/ting_corp_num as well as event_id. -- Delete any rows where either side is excluded (covers non-event-owned references too). -DELETE FROM corp_involved_amalgamating t -USING public.subset_excluded_corps x +DELETE FROM __DBSCHEMA_TARGET_SCHEMA__.corp_involved_amalgamating t +USING __DBSCHEMA_TARGET_SCHEMA__.subset_excluded_corps x WHERE t.ted_corp_num = x.corp_num OR t.ting_corp_num = x.corp_num; -- Corp-party related -DELETE FROM party_notification t -USING public.subset_excluded_corp_parties x +DELETE FROM __DBSCHEMA_TARGET_SCHEMA__.party_notification t +USING __DBSCHEMA_TARGET_SCHEMA__.subset_excluded_corp_parties x WHERE t.party_id = x.corp_party_id; -DELETE FROM offices_held t -USING public.subset_excluded_corp_parties x +DELETE FROM __DBSCHEMA_TARGET_SCHEMA__.offices_held t +USING __DBSCHEMA_TARGET_SCHEMA__.subset_excluded_corp_parties x WHERE t.corp_party_id = x.corp_party_id; -DELETE FROM corp_party_relationship t -USING public.subset_excluded_corp_parties x +DELETE FROM __DBSCHEMA_TARGET_SCHEMA__.corp_party_relationship t +USING __DBSCHEMA_TARGET_SCHEMA__.subset_excluded_corp_parties x WHERE t.corp_party_id = x.corp_party_id; -DELETE FROM corp_party t -USING public.subset_excluded_corp_parties x +DELETE FROM __DBSCHEMA_TARGET_SCHEMA__.corp_party t +USING __DBSCHEMA_TARGET_SCHEMA__.subset_excluded_corp_parties x WHERE t.corp_party_id = x.corp_party_id; -- Corp-scoped tables -DELETE FROM office t -USING public.subset_excluded_corps x +DELETE FROM __DBSCHEMA_TARGET_SCHEMA__.office t +USING __DBSCHEMA_TARGET_SCHEMA__.subset_excluded_corps x WHERE t.corp_num = x.corp_num; -DELETE FROM corp_name t -USING public.subset_excluded_corps x +DELETE FROM __DBSCHEMA_TARGET_SCHEMA__.corp_name t +USING __DBSCHEMA_TARGET_SCHEMA__.subset_excluded_corps x WHERE t.corp_num = x.corp_num; -DELETE FROM corp_state t -USING public.subset_excluded_corps x +DELETE FROM __DBSCHEMA_TARGET_SCHEMA__.corp_state t +USING __DBSCHEMA_TARGET_SCHEMA__.subset_excluded_corps x WHERE t.corp_num = x.corp_num; -DELETE FROM corp_comments t -USING public.subset_excluded_corps x +DELETE FROM __DBSCHEMA_TARGET_SCHEMA__.corp_comments t +USING __DBSCHEMA_TARGET_SCHEMA__.subset_excluded_corps x WHERE t.corp_num = x.corp_num; -DELETE FROM corp_flag t -USING public.subset_excluded_corps x +DELETE FROM __DBSCHEMA_TARGET_SCHEMA__.corp_flag t +USING __DBSCHEMA_TARGET_SCHEMA__.subset_excluded_corps x WHERE t.corp_num = x.corp_num; -DELETE FROM cont_out t -USING public.subset_excluded_corps x +DELETE FROM __DBSCHEMA_TARGET_SCHEMA__.cont_out t +USING __DBSCHEMA_TARGET_SCHEMA__.subset_excluded_corps x WHERE t.corp_num = x.corp_num; -DELETE FROM corp_restriction t -USING public.subset_excluded_corps x +DELETE FROM __DBSCHEMA_TARGET_SCHEMA__.corp_restriction t +USING __DBSCHEMA_TARGET_SCHEMA__.subset_excluded_corps x WHERE t.corp_num = x.corp_num; -DELETE FROM jurisdiction t -USING public.subset_excluded_corps x +DELETE FROM __DBSCHEMA_TARGET_SCHEMA__.jurisdiction t +USING __DBSCHEMA_TARGET_SCHEMA__.subset_excluded_corps x WHERE t.corp_num = x.corp_num; -DELETE FROM resolution t -USING public.subset_excluded_corps x +DELETE FROM __DBSCHEMA_TARGET_SCHEMA__.resolution t +USING __DBSCHEMA_TARGET_SCHEMA__.subset_excluded_corps x WHERE t.corp_num = x.corp_num; -- Share tables (delete deepest first) -DELETE FROM share_series t -USING public.subset_excluded_corps x +DELETE FROM __DBSCHEMA_TARGET_SCHEMA__.share_series t +USING __DBSCHEMA_TARGET_SCHEMA__.subset_excluded_corps x WHERE t.corp_num = x.corp_num; -DELETE FROM share_struct_cls t -USING public.subset_excluded_corps x +DELETE FROM __DBSCHEMA_TARGET_SCHEMA__.share_struct_cls t +USING __DBSCHEMA_TARGET_SCHEMA__.subset_excluded_corps x WHERE t.corp_num = x.corp_num; -DELETE FROM share_struct t -USING public.subset_excluded_corps x +DELETE FROM __DBSCHEMA_TARGET_SCHEMA__.share_struct t +USING __DBSCHEMA_TARGET_SCHEMA__.subset_excluded_corps x WHERE t.corp_num = x.corp_num; -- Events last (many tables reference event_id) -DELETE FROM event t -USING public.subset_excluded_events x +DELETE FROM __DBSCHEMA_TARGET_SCHEMA__.event t +USING __DBSCHEMA_TARGET_SCHEMA__.subset_excluded_events x WHERE t.event_id = x.event_id; -- Corporation last -DELETE FROM corporation t -USING public.subset_excluded_corps x +DELETE FROM __DBSCHEMA_TARGET_SCHEMA__.corporation t +USING __DBSCHEMA_TARGET_SCHEMA__.subset_excluded_corps x WHERE t.corp_num = x.corp_num; -- 3) Cleanup helper tables -TRUNCATE TABLE public.subset_excluded_corp_parties; -TRUNCATE TABLE public.subset_excluded_events; -TRUNCATE TABLE public.subset_excluded_corps; +TRUNCATE TABLE __DBSCHEMA_TARGET_SCHEMA__.subset_excluded_corp_parties; +TRUNCATE TABLE __DBSCHEMA_TARGET_SCHEMA__.subset_excluded_events; +TRUNCATE TABLE __DBSCHEMA_TARGET_SCHEMA__.subset_excluded_corps; diff --git a/data-tool/scripts/subset/subset_transfer_cars.sql b/data-tool/scripts/subset/subset_transfer_cars.sql index f41876127a..665db97810 100644 --- a/data-tool/scripts/subset/subset_transfer_cars.sql +++ b/data-tool/scripts/subset/subset_transfer_cars.sql @@ -1,17 +1,17 @@ --- Global transfer of cars* tables from SOURCE Oracle DB (cprd) into TARGET Postgres extract DB (cprd_pg). --- Intended to be executed from a master DbSchemaCLI script connected to the target Postgres DB (cprd_pg). +-- Global transfer of cars* tables from SOURCE Oracle DB (__DBSCHEMA_SOURCE_CONNECTION__) into TARGET Postgres extract DB (__DBSCHEMA_TARGET_SCHEMA__ schema). +-- Intended to be executed from a master DbSchemaCLI script connected to the target Postgres DB (__DBSCHEMA_TARGET_SCHEMA__ schema). -- -- These tables are NOT corp-scoped. The full dataset is transferred without filtering. -- Volume is low enough that a full refresh is appropriate. -transfer public.carsfile from cprd using +transfer __DBSCHEMA_TARGET_SCHEMA__.carsfile from __DBSCHEMA_SOURCE_CONNECTION__ using select documtid, filedate, regiracf from carsfile; -transfer public.carsbox from cprd using +transfer __DBSCHEMA_TARGET_SCHEMA__.carsbox from __DBSCHEMA_SOURCE_CONNECTION__ using select documtid, accesnum, @@ -19,14 +19,14 @@ select boxrracf from carsbox; -transfer public.carsrept from cprd using +transfer __DBSCHEMA_TARGET_SCHEMA__.carsrept from __DBSCHEMA_SOURCE_CONNECTION__ using select documtid, docutype, compnumb from carsrept; -transfer public.carindiv from cprd using +transfer __DBSCHEMA_TARGET_SCHEMA__.carindiv from __DBSCHEMA_SOURCE_CONNECTION__ using select documtid, replace(surname, CHR(0), '') as surname, diff --git a/data-tool/scripts/subset/subset_transfer_chunk.sql b/data-tool/scripts/subset/subset_transfer_chunk.sql index 50b4e91757..f76703338c 100644 --- a/data-tool/scripts/subset/subset_transfer_chunk.sql +++ b/data-tool/scripts/subset/subset_transfer_chunk.sql @@ -1,4 +1,4 @@ --- Transfer a chunk (or a whole subset) of corps from the SOURCE Oracle DB (cprd) into the TARGET Postgres extract DB (cprd_pg). +-- Transfer a chunk (or a whole subset) of corps from the SOURCE Oracle DB (__DBSCHEMA_SOURCE_CONNECTION__) into the TARGET Postgres extract DB (__DBSCHEMA_TARGET_SCHEMA__ schema). -- -- REQUIRED DbSchemaCLI variables (replace_variables=true): -- target_corp_num_predicate : SQL predicate restricting the computed target_corp_num (NO trailing semicolon). @@ -14,7 +14,7 @@ -- c.CORP_TYP_CD in ('BC','C','ULC','CUL','CC','CCC','QA','QB','QC','QD','QE') -- c.CORP_TYP_CD in ('BC','C','ULC','CUL','CC','CCC','QA','QB','QC','QD','QE','CP') -- --- Intended to be executed from a master DbSchemaCLI script connected to the target Postgres DB (cprd_pg). +-- Intended to be executed from a master DbSchemaCLI script connected to the target Postgres DB (__DBSCHEMA_TARGET_SCHEMA__ schema). -- -- IMPORTANT: -- - This template intentionally avoids the boolean<->integer ALTER COLUMN hacks used in the full refresh script. @@ -33,7 +33,7 @@ -- vset oracle_corp_num_predicate=c.CORP_NUM in ('1111585','1226175'); -- corporation -transfer public.corporation from cprd using +transfer __DBSCHEMA_TARGET_SCHEMA__.corporation from __DBSCHEMA_SOURCE_CONNECTION__ using with corp_list as ( select /*+ materialize */ c.corp_num from corporation c @@ -88,7 +88,7 @@ left join last_ar la on la.corp_num = c.corp_num; -- event -transfer public.event from cprd using +transfer __DBSCHEMA_TARGET_SCHEMA__.event from __DBSCHEMA_SOURCE_CONNECTION__ using with corp_list as ( select /*+ materialize */ c.corp_num from corporation c @@ -121,7 +121,7 @@ where e.event_typ_cd not in ('BNUPD', 'ADDLEDGR'); -- corp_name -transfer public.corp_name from cprd using +transfer __DBSCHEMA_TARGET_SCHEMA__.corp_name from __DBSCHEMA_SOURCE_CONNECTION__ using with corp_list as ( select /*+ materialize */ c.corp_num from corporation c @@ -152,7 +152,7 @@ join CORP_NAME cn on cn.corp_num = c.corp_num; -- corp_state -transfer public.corp_state from cprd using +transfer __DBSCHEMA_TARGET_SCHEMA__.corp_state from __DBSCHEMA_SOURCE_CONNECTION__ using with corp_list as ( select /*+ materialize */ c.corp_num from corporation c @@ -184,7 +184,7 @@ join corp_op_state cos on cos.state_typ_cd = cs.state_typ_cd; -- filing -transfer public.filing from cprd using +transfer __DBSCHEMA_TARGET_SCHEMA__.filing from __DBSCHEMA_SOURCE_CONNECTION__ using with corp_list as ( select /*+ materialize */ c.corp_num from corporation c @@ -231,7 +231,7 @@ join filing f on f.event_id = e.event_id; -- filing_user -transfer public.filing_user from cprd using +transfer __DBSCHEMA_TARGET_SCHEMA__.filing_user from __DBSCHEMA_SOURCE_CONNECTION__ using with corp_list as ( select /*+ materialize */ c.corp_num from corporation c @@ -266,9 +266,9 @@ join filing_user u on u.event_id = e.event_id; -- address (shared/global table; stage then merge before loading dependents) -TRUNCATE TABLE public.subset_address_stage; +TRUNCATE TABLE __DBSCHEMA_TARGET_SCHEMA__.subset_address_stage; -transfer public.subset_address_stage from cprd using +transfer __DBSCHEMA_TARGET_SCHEMA__.subset_address_stage from __DBSCHEMA_SOURCE_CONNECTION__ using with corp_list as ( select /*+ materialize */ c.corp_num from corporation c @@ -346,7 +346,7 @@ from ( join address a on x.mailing_addr_id = a.addr_id ); -INSERT INTO public.address ( +INSERT INTO __DBSCHEMA_TARGET_SCHEMA__.address ( addr_id, province, country_typ_cd, @@ -374,7 +374,7 @@ FROM ( addr_line_2, addr_line_3, city - FROM public.subset_address_stage + FROM __DBSCHEMA_TARGET_SCHEMA__.subset_address_stage WHERE addr_id IS NOT NULL ORDER BY addr_id ) s @@ -387,11 +387,11 @@ SET province = EXCLUDED.province, addr_line_3 = EXCLUDED.addr_line_3, city = EXCLUDED.city; -TRUNCATE TABLE public.subset_address_stage; +TRUNCATE TABLE __DBSCHEMA_TARGET_SCHEMA__.subset_address_stage; -- office -transfer public.office from cprd using +transfer __DBSCHEMA_TARGET_SCHEMA__.office from __DBSCHEMA_SOURCE_CONNECTION__ using with corp_list as ( select /*+ materialize */ c.corp_num from corporation c @@ -423,7 +423,7 @@ join office o on o.corp_num = c.corp_num; -- corp_comments -transfer public.corp_comments from cprd using +transfer __DBSCHEMA_TARGET_SCHEMA__.corp_comments from __DBSCHEMA_SOURCE_CONNECTION__ using with corp_list as ( select /*+ materialize */ c.corp_num from corporation c @@ -457,7 +457,7 @@ join corp_comments cc on cc.corp_num = c.corp_num; -- ledger_text -transfer public.ledger_text from cprd using +transfer __DBSCHEMA_TARGET_SCHEMA__.ledger_text from __DBSCHEMA_SOURCE_CONNECTION__ using with corp_list as ( select /*+ materialize */ c.corp_num from corporation c @@ -488,7 +488,7 @@ join ledger_text l on l.event_id = e.event_id; -- corp_party -transfer public.corp_party from cprd using +transfer __DBSCHEMA_TARGET_SCHEMA__.corp_party from __DBSCHEMA_SOURCE_CONNECTION__ using with corp_list as ( select /*+ materialize */ c.corp_num from corporation c @@ -531,7 +531,7 @@ join corp_party p on p.corp_num = c.corp_num; -- corp_party_relationship -transfer public.corp_party_relationship from cprd using +transfer __DBSCHEMA_TARGET_SCHEMA__.corp_party_relationship from __DBSCHEMA_SOURCE_CONNECTION__ using with corp_list as ( select /*+ materialize */ c.corp_num from corporation c @@ -560,7 +560,7 @@ join CORP_PARTY_RELATIONSHIP cpr on cpr.corp_party_id = p.corp_party_id; -- offices_held -transfer public.offices_held from cprd using +transfer __DBSCHEMA_TARGET_SCHEMA__.offices_held from __DBSCHEMA_SOURCE_CONNECTION__ using with corp_list as ( select /*+ materialize */ c.corp_num from corporation c @@ -589,7 +589,7 @@ join OFFICES_HELD oh on oh.corp_party_id = p.corp_party_id; -- completing_party -transfer public.completing_party from cprd using +transfer __DBSCHEMA_TARGET_SCHEMA__.completing_party from __DBSCHEMA_SOURCE_CONNECTION__ using with corp_list as ( select /*+ materialize */ c.corp_num from corporation c @@ -622,7 +622,7 @@ join completing_party cp on cp.event_id = e.event_id; -- submitting_party -transfer public.submitting_party from cprd using +transfer __DBSCHEMA_TARGET_SCHEMA__.submitting_party from __DBSCHEMA_SOURCE_CONNECTION__ using with corp_list as ( select /*+ materialize */ c.corp_num from corporation c @@ -663,7 +663,7 @@ join SUBMITTING_PARTY sp on sp.event_id = e.event_id; -- corp_flag -transfer public.corp_flag from cprd using +transfer __DBSCHEMA_TARGET_SCHEMA__.corp_flag from __DBSCHEMA_SOURCE_CONNECTION__ using with corp_list as ( select /*+ materialize */ c.corp_num from corporation c @@ -693,7 +693,7 @@ join corp_flag cf on cf.corp_num = c.corp_num; -- cont_out -transfer public.cont_out from cprd using +transfer __DBSCHEMA_TARGET_SCHEMA__.cont_out from __DBSCHEMA_SOURCE_CONNECTION__ using with corp_list as ( select /*+ materialize */ c.corp_num from corporation c @@ -726,7 +726,7 @@ join CONT_OUT co on co.corp_num = c.corp_num; -- conv_event -transfer public.conv_event from cprd using +transfer __DBSCHEMA_TARGET_SCHEMA__.conv_event from __DBSCHEMA_SOURCE_CONNECTION__ using with corp_list as ( select /*+ materialize */ c.corp_num from corporation c @@ -765,7 +765,7 @@ join CONV_EVENT ce on ce.event_id = e.event_id; -- conv_ledger -transfer public.conv_ledger from cprd using +transfer __DBSCHEMA_TARGET_SCHEMA__.conv_ledger from __DBSCHEMA_SOURCE_CONNECTION__ using with corp_list as ( select /*+ materialize */ c.corp_num from corporation c @@ -796,7 +796,7 @@ join CONV_LEDGER cl on cl.event_id = e.event_id; -- corp_involved - amalgamaTING_businesses -transfer public.corp_involved_amalgamating from cprd using +transfer __DBSCHEMA_TARGET_SCHEMA__.corp_involved_amalgamating from __DBSCHEMA_SOURCE_CONNECTION__ using with corp_list as ( select /*+ materialize */ c.corp_num from corporation c @@ -864,7 +864,7 @@ where f.filing_typ_cd in ('AMALH', 'AMALV', 'AMALR', 'AMLHU', 'AMLVU', 'AMLRU', -- corp_involved - continue_in_historical_xpro -transfer public.corp_involved_cont_in from cprd using +transfer __DBSCHEMA_TARGET_SCHEMA__.corp_involved_cont_in from __DBSCHEMA_SOURCE_CONNECTION__ using with corp_list as ( select /*+ materialize */ c.corp_num from corporation c @@ -895,7 +895,7 @@ where f.filing_typ_cd in ('CONTI', 'CONTU', 'CONTC') -- corp_restriction -transfer public.corp_restriction from cprd using +transfer __DBSCHEMA_TARGET_SCHEMA__.corp_restriction from __DBSCHEMA_SOURCE_CONNECTION__ using with corp_list as ( select /*+ materialize */ c.corp_num from corporation c @@ -929,7 +929,7 @@ join CORP_RESTRICTION cr on cr.corp_num = c.corp_num; -- correction -transfer public.correction from cprd using +transfer __DBSCHEMA_TARGET_SCHEMA__.correction from __DBSCHEMA_SOURCE_CONNECTION__ using with corp_list as ( select /*+ materialize */ c.corp_num from corporation c @@ -959,7 +959,7 @@ join CORRECTION corr on corr.event_id = e.event_id; -- continued_in_from_jurisdiction -transfer public.jurisdiction from cprd using +transfer __DBSCHEMA_TARGET_SCHEMA__.jurisdiction from __DBSCHEMA_SOURCE_CONNECTION__ using with corp_list as ( select /*+ materialize */ c.corp_num from corporation c @@ -994,7 +994,7 @@ join JURISDICTION j on j.corp_num = c.corp_num; -- resolution -transfer public.resolution from cprd using +transfer __DBSCHEMA_TARGET_SCHEMA__.resolution from __DBSCHEMA_SOURCE_CONNECTION__ using with corp_list as ( select /*+ materialize */ c.corp_num from corporation c @@ -1025,7 +1025,7 @@ join RESOLUTION r on r.corp_num = c.corp_num; -- share_struct -transfer public.share_struct from cprd using +transfer __DBSCHEMA_TARGET_SCHEMA__.share_struct from __DBSCHEMA_SOURCE_CONNECTION__ using with corp_list as ( select /*+ materialize */ c.corp_num from corporation c @@ -1054,7 +1054,7 @@ join SHARE_STRUCT ss on ss.corp_num = c.corp_num; -- share_struct_cls -transfer public.share_struct_cls from cprd using +transfer __DBSCHEMA_TARGET_SCHEMA__.share_struct_cls from __DBSCHEMA_SOURCE_CONNECTION__ using with corp_list as ( select /*+ materialize */ c.corp_num from corporation c @@ -1103,7 +1103,7 @@ join SHARE_STRUCT_CLS ssc on ssc.corp_num = c.corp_num; -- share_series -transfer public.share_series from cprd using +transfer __DBSCHEMA_TARGET_SCHEMA__.share_series from __DBSCHEMA_SOURCE_CONNECTION__ using with corp_list as ( select /*+ materialize */ c.corp_num from corporation c @@ -1145,7 +1145,7 @@ join SHARE_SERIES ss on ss.corp_num = c.corp_num; -- notification -transfer public.notification from cprd using +transfer __DBSCHEMA_TARGET_SCHEMA__.notification from __DBSCHEMA_SOURCE_CONNECTION__ using with corp_list as ( select /*+ materialize */ c.corp_num from corporation c @@ -1181,7 +1181,7 @@ join NOTIFICATION n on n.event_id = e.event_id; -- notification_resend -transfer public.notification_resend from cprd using +transfer __DBSCHEMA_TARGET_SCHEMA__.notification_resend from __DBSCHEMA_SOURCE_CONNECTION__ using with corp_list as ( select /*+ materialize */ c.corp_num from corporation c @@ -1217,7 +1217,7 @@ join NOTIFICATION_RESEND nr on nr.event_id = e.event_id; -- party_notification -transfer public.party_notification from cprd using +transfer __DBSCHEMA_TARGET_SCHEMA__.party_notification from __DBSCHEMA_SOURCE_CONNECTION__ using with corp_list as ( select /*+ materialize */ c.corp_num from corporation c @@ -1254,7 +1254,7 @@ join PARTY_NOTIFICATION pn on pn.party_id = cp.corp_party_id; -- payment -transfer public.payment from cprd using +transfer __DBSCHEMA_TARGET_SCHEMA__.payment from __DBSCHEMA_SOURCE_CONNECTION__ using with corp_list as ( select /*+ materialize */ c.corp_num from corporation c From 657c16c5cb5cb1de5688d079ea31f95e262513bb Mon Sep 17 00:00:00 2001 From: ketaki-deodhar Date: Fri, 12 Jun 2026 07:49:21 -0700 Subject: [PATCH 02/10] 31328 - initial commit 2 --- data-tool/scripts/_generated/subset_load.sql | 20 +- .../support/subset_delete_cars.sql | 10 + .../support/subset_delete_chunk.sql | 119 ++ .../support/subset_disable_triggers.sql | 38 + .../support/subset_enable_triggers.sql | 38 + .../support/subset_pg_boolean_casts.sql | 55 + .../subset_pg_cleanup_address_stage.sql | 4 + .../subset_pg_cleanup_orphan_children.sql | 62 + .../subset_pg_prepare_address_stage.sql | 4 + .../subset_pg_purge_bcomps_excluded.sql | 181 +++ .../support/subset_transfer_cars.sql | 45 + .../support/subset_transfer_chunk.sql | 1283 +++++++++++++++++ .../subset_load_chunks/transfer_all.sql | 76 +- .../scripts/generate_cprd_subset_extract.py | 1 + 14 files changed, 1888 insertions(+), 48 deletions(-) create mode 100644 data-tool/scripts/_generated/subset_load_chunks/support/subset_delete_cars.sql create mode 100644 data-tool/scripts/_generated/subset_load_chunks/support/subset_delete_chunk.sql create mode 100644 data-tool/scripts/_generated/subset_load_chunks/support/subset_disable_triggers.sql create mode 100644 data-tool/scripts/_generated/subset_load_chunks/support/subset_enable_triggers.sql create mode 100644 data-tool/scripts/_generated/subset_load_chunks/support/subset_pg_boolean_casts.sql create mode 100644 data-tool/scripts/_generated/subset_load_chunks/support/subset_pg_cleanup_address_stage.sql create mode 100644 data-tool/scripts/_generated/subset_load_chunks/support/subset_pg_cleanup_orphan_children.sql create mode 100644 data-tool/scripts/_generated/subset_load_chunks/support/subset_pg_prepare_address_stage.sql create mode 100644 data-tool/scripts/_generated/subset_load_chunks/support/subset_pg_purge_bcomps_excluded.sql create mode 100644 data-tool/scripts/_generated/subset_load_chunks/support/subset_transfer_cars.sql create mode 100644 data-tool/scripts/_generated/subset_load_chunks/support/subset_transfer_chunk.sql diff --git a/data-tool/scripts/_generated/subset_load.sql b/data-tool/scripts/_generated/subset_load.sql index b090edbac0..f8ba3c891e 100644 --- a/data-tool/scripts/_generated/subset_load.sql +++ b/data-tool/scripts/_generated/subset_load.sql @@ -4,13 +4,13 @@ vset cli.settings.transfer_threads=4 vset format.date=YYYY-MM-dd'T'hh:mm:ss'Z' vset format.timestamp=YYYY-MM-dd'T'hh:mm:ss'Z' -connect cprd_pg_subset; +connect cprd_pg; -- Serialize subset runs on this target DB. execute /home/kdeodhar/repos/lear/data-tool/scripts/subset/subset_pg_acquire_advisory_lock.sql -- Prepare shared address staging table before learning schema -execute /home/kdeodhar/repos/lear/data-tool/scripts/subset/subset_pg_prepare_address_stage.sql -learn schema public; +execute /home/kdeodhar/repos/lear/data-tool/scripts/_generated/subset_load_chunks/support/subset_pg_prepare_address_stage.sql +learn schema colin_extract; truncate table public.colin_extract_version; insert into public.colin_extract_version (extracted_at) values (current_timestamp); @@ -18,28 +18,28 @@ truncate table public.colin_extract_version; insert into public.colin_extract_ve execute /home/kdeodhar/repos/lear/data-tool/scripts/subset/subset_pg_fastload_begin.sql -- Postgres helper: allow VARCHAR/BPCHAR -> BOOLEAN assignment (DbSchemaCLI boolean inserts) -execute /home/kdeodhar/repos/lear/data-tool/scripts/subset/subset_pg_boolean_casts.sql +execute /home/kdeodhar/repos/lear/data-tool/scripts/_generated/subset_load_chunks/support/subset_pg_boolean_casts.sql -- Fail-fast: verify varchar/bpchar -> boolean casts exist select 't'::varchar::boolean; select 'f'::bpchar::boolean; -execute /home/kdeodhar/repos/lear/data-tool/scripts/subset/subset_disable_triggers.sql +execute /home/kdeodhar/repos/lear/data-tool/scripts/_generated/subset_load_chunks/support/subset_disable_triggers.sql -- global cars* refresh (not corp-scoped; full dataset truncate + reload) -execute /home/kdeodhar/repos/lear/data-tool/scripts/subset/subset_delete_cars.sql -execute /home/kdeodhar/repos/lear/data-tool/scripts/subset/subset_transfer_cars.sql +execute /home/kdeodhar/repos/lear/data-tool/scripts/_generated/subset_load_chunks/support/subset_delete_cars.sql +execute /home/kdeodhar/repos/lear/data-tool/scripts/_generated/subset_load_chunks/support/subset_transfer_cars.sql -- transfer corp-scoped subset from Oracle to Postgres -- transfer chunk 001/001 execute /home/kdeodhar/repos/lear/data-tool/scripts/_generated/subset_load_chunks/transfer_all.sql -- purge BCOMPS-excluded corps (computed in Postgres after load) -execute /home/kdeodhar/repos/lear/data-tool/scripts/subset/subset_pg_purge_bcomps_excluded.sql +execute /home/kdeodhar/repos/lear/data-tool/scripts/_generated/subset_load_chunks/support/subset_pg_purge_bcomps_excluded.sql -execute /home/kdeodhar/repos/lear/data-tool/scripts/subset/subset_enable_triggers.sql +execute /home/kdeodhar/repos/lear/data-tool/scripts/_generated/subset_load_chunks/support/subset_enable_triggers.sql -- Cleanup shared address staging table -execute /home/kdeodhar/repos/lear/data-tool/scripts/subset/subset_pg_cleanup_address_stage.sql +execute /home/kdeodhar/repos/lear/data-tool/scripts/_generated/subset_load_chunks/support/subset_pg_cleanup_address_stage.sql -- Release subset-run advisory lock execute /home/kdeodhar/repos/lear/data-tool/scripts/subset/subset_pg_release_advisory_lock.sql diff --git a/data-tool/scripts/_generated/subset_load_chunks/support/subset_delete_cars.sql b/data-tool/scripts/_generated/subset_load_chunks/support/subset_delete_cars.sql new file mode 100644 index 0000000000..f13b53082d --- /dev/null +++ b/data-tool/scripts/_generated/subset_load_chunks/support/subset_delete_cars.sql @@ -0,0 +1,10 @@ +-- Global delete/clear for cars* tables (subset refresh/load). +-- Intended to be executed while connected to target Postgres extract DB (cprd_pg). +-- +-- These tables are NOT corp-scoped, so we truncate the entire dataset and reload from Oracle. +-- Volume is low enough that a full refresh is appropriate. + +TRUNCATE TABLE colin_extract.carindiv; +TRUNCATE TABLE colin_extract.carsrept; +TRUNCATE TABLE colin_extract.carsbox; +TRUNCATE TABLE colin_extract.carsfile; diff --git a/data-tool/scripts/_generated/subset_load_chunks/support/subset_delete_chunk.sql b/data-tool/scripts/_generated/subset_load_chunks/support/subset_delete_chunk.sql new file mode 100644 index 0000000000..68728542a1 --- /dev/null +++ b/data-tool/scripts/_generated/subset_load_chunks/support/subset_delete_chunk.sql @@ -0,0 +1,119 @@ +-- Delete a chunk of corps from the TARGET Postgres extract DB. +-- +-- REQUIRED DbSchemaCLI variables (replace_variables=true): +-- corp_ids_in : comma-separated SQL string literals for target corp_num values (NO parentheses), +-- e.g. 'BC0460007','A1234567' +-- +-- Intended to be executed from a master DbSchemaCLI script connected to the target Postgres DB (colin_extract schema). +-- +-- Note: This script intentionally does NOT delete internal migration/processing tables (mig_*, corp_processing, +-- colin_tracking, affiliation_processing, etc). It only deletes the corp-scoped COLIN extract tables that are +-- reloaded from Oracle. +-- IMPORTANT: +-- - Because preserved processing/tracking tables still reference corporation/event rows, refresh mode must keep +-- FK enforcement suppressed across this delete/reload window (for example via replica_role, or by disabling +-- triggers on the preserved referencing tables too). + +-- Address rows are treated as shared/global during subset refresh. +-- Do not delete them here: subset_transfer_chunk.sql stages incoming Oracle address rows and +-- merges them into the configured target address table by addr_id. + +-- Delete child tables first (event-scoped). +DELETE FROM colin_extract.notification_resend +WHERE event_id IN (SELECT event_id FROM colin_extract.event WHERE corp_num IN (&corp_ids_in)); + +DELETE FROM colin_extract.notification +WHERE event_id IN (SELECT event_id FROM colin_extract.event WHERE corp_num IN (&corp_ids_in)); + +DELETE FROM colin_extract.filing_user +WHERE event_id IN (SELECT event_id FROM colin_extract.event WHERE corp_num IN (&corp_ids_in)); + +DELETE FROM colin_extract.payment +WHERE event_id IN (SELECT event_id FROM colin_extract.event WHERE corp_num IN (&corp_ids_in)); + +DELETE FROM colin_extract.ledger_text +WHERE event_id IN (SELECT event_id FROM colin_extract.event WHERE corp_num IN (&corp_ids_in)); + +DELETE FROM colin_extract.conv_ledger +WHERE event_id IN (SELECT event_id FROM colin_extract.event WHERE corp_num IN (&corp_ids_in)); + +DELETE FROM colin_extract.conv_event +WHERE event_id IN (SELECT event_id FROM colin_extract.event WHERE corp_num IN (&corp_ids_in)); + +DELETE FROM colin_extract.completing_party +WHERE event_id IN (SELECT event_id FROM colin_extract.event WHERE corp_num IN (&corp_ids_in)); + +DELETE FROM colin_extract.submitting_party +WHERE event_id IN (SELECT event_id FROM colin_extract.event WHERE corp_num IN (&corp_ids_in)); + +DELETE FROM colin_extract.corp_involved_amalgamating +WHERE event_id IN (SELECT event_id FROM colin_extract.event WHERE corp_num IN (&corp_ids_in)); + +DELETE FROM colin_extract.corp_involved_cont_in +WHERE event_id IN (SELECT event_id FROM colin_extract.event WHERE corp_num IN (&corp_ids_in)); + +DELETE FROM colin_extract.correction +WHERE event_id IN (SELECT event_id FROM colin_extract.event WHERE corp_num IN (&corp_ids_in)); + +DELETE FROM colin_extract.filing +WHERE event_id IN (SELECT event_id FROM colin_extract.event WHERE corp_num IN (&corp_ids_in)); + +-- Delete corp-party related tables. +DELETE FROM colin_extract.party_notification +WHERE party_id IN (SELECT corp_party_id FROM colin_extract.corp_party WHERE corp_num IN (&corp_ids_in)); + +DELETE FROM colin_extract.offices_held +WHERE corp_party_id IN (SELECT corp_party_id FROM colin_extract.corp_party WHERE corp_num IN (&corp_ids_in)); + +DELETE FROM colin_extract.corp_party_relationship +WHERE corp_party_id IN (SELECT corp_party_id FROM colin_extract.corp_party WHERE corp_num IN (&corp_ids_in)); + +DELETE FROM colin_extract.corp_party +WHERE corp_num IN (&corp_ids_in); + +-- Delete corp-scoped tables. +DELETE FROM colin_extract.office +WHERE corp_num IN (&corp_ids_in); + +DELETE FROM colin_extract.corp_name +WHERE corp_num IN (&corp_ids_in); + +DELETE FROM colin_extract.corp_state +WHERE corp_num IN (&corp_ids_in); + +DELETE FROM colin_extract.corp_comments +WHERE corp_num IN (&corp_ids_in); + +DELETE FROM colin_extract.corp_flag +WHERE corp_num IN (&corp_ids_in); + +DELETE FROM colin_extract.cont_out +WHERE corp_num IN (&corp_ids_in); + +DELETE FROM colin_extract.corp_restriction +WHERE corp_num IN (&corp_ids_in); + +DELETE FROM colin_extract.jurisdiction +WHERE corp_num IN (&corp_ids_in); + +DELETE FROM colin_extract.resolution +WHERE corp_num IN (&corp_ids_in); + +DELETE FROM colin_extract.share_series +WHERE corp_num IN (&corp_ids_in); + +DELETE FROM colin_extract.share_struct_cls +WHERE corp_num IN (&corp_ids_in); + +DELETE FROM colin_extract.share_struct +WHERE corp_num IN (&corp_ids_in); + +-- Delete events last (many tables reference event_id). +DELETE FROM colin_extract.event +WHERE corp_num IN (&corp_ids_in); + +-- Delete the corp rows last. +DELETE FROM colin_extract.corporation +WHERE corp_num IN (&corp_ids_in); + +-- Address rows are refreshed via stage+merge in subset_transfer_chunk.sql. diff --git a/data-tool/scripts/_generated/subset_load_chunks/support/subset_disable_triggers.sql b/data-tool/scripts/_generated/subset_load_chunks/support/subset_disable_triggers.sql new file mode 100644 index 0000000000..02de0f1b14 --- /dev/null +++ b/data-tool/scripts/_generated/subset_load_chunks/support/subset_disable_triggers.sql @@ -0,0 +1,38 @@ +-- Disable triggers for corp-scoped tables (subset refresh/load). +-- Intended to be executed from a master DbSchemaCLI script while connected to the target Postgres DB. + +ALTER TABLE colin_extract.corporation DISABLE TRIGGER ALL; +ALTER TABLE colin_extract.corp_name DISABLE TRIGGER ALL; +ALTER TABLE colin_extract.corp_state DISABLE TRIGGER ALL; +ALTER TABLE colin_extract.event DISABLE TRIGGER ALL; +ALTER TABLE colin_extract.filing DISABLE TRIGGER ALL; +ALTER TABLE colin_extract.filing_user DISABLE TRIGGER ALL; +ALTER TABLE colin_extract.office DISABLE TRIGGER ALL; +ALTER TABLE colin_extract.corp_comments DISABLE TRIGGER ALL; +ALTER TABLE colin_extract.ledger_text DISABLE TRIGGER ALL; +ALTER TABLE colin_extract.corp_party DISABLE TRIGGER ALL; +ALTER TABLE colin_extract.corp_party_relationship DISABLE TRIGGER ALL; +ALTER TABLE colin_extract.offices_held DISABLE TRIGGER ALL; +ALTER TABLE colin_extract.completing_party DISABLE TRIGGER ALL; +ALTER TABLE colin_extract.submitting_party DISABLE TRIGGER ALL; +ALTER TABLE colin_extract.corp_flag DISABLE TRIGGER ALL; +ALTER TABLE colin_extract.cont_out DISABLE TRIGGER ALL; +ALTER TABLE colin_extract.conv_event DISABLE TRIGGER ALL; +ALTER TABLE colin_extract.conv_ledger DISABLE TRIGGER ALL; +ALTER TABLE colin_extract.corp_involved_amalgamating DISABLE TRIGGER ALL; +ALTER TABLE colin_extract.corp_involved_cont_in DISABLE TRIGGER ALL; +ALTER TABLE colin_extract.corp_restriction DISABLE TRIGGER ALL; +ALTER TABLE colin_extract.correction DISABLE TRIGGER ALL; +ALTER TABLE colin_extract.jurisdiction DISABLE TRIGGER ALL; +ALTER TABLE colin_extract.resolution DISABLE TRIGGER ALL; +ALTER TABLE colin_extract.share_series DISABLE TRIGGER ALL; +ALTER TABLE colin_extract.share_struct DISABLE TRIGGER ALL; +ALTER TABLE colin_extract.share_struct_cls DISABLE TRIGGER ALL; +ALTER TABLE colin_extract.notification DISABLE TRIGGER ALL; +ALTER TABLE colin_extract.notification_resend DISABLE TRIGGER ALL; +ALTER TABLE colin_extract.party_notification DISABLE TRIGGER ALL; +ALTER TABLE colin_extract.payment DISABLE TRIGGER ALL; +ALTER TABLE colin_extract.carsfile DISABLE TRIGGER ALL; +ALTER TABLE colin_extract.carsbox DISABLE TRIGGER ALL; +ALTER TABLE colin_extract.carsrept DISABLE TRIGGER ALL; +ALTER TABLE colin_extract.carindiv DISABLE TRIGGER ALL; diff --git a/data-tool/scripts/_generated/subset_load_chunks/support/subset_enable_triggers.sql b/data-tool/scripts/_generated/subset_load_chunks/support/subset_enable_triggers.sql new file mode 100644 index 0000000000..86d272effe --- /dev/null +++ b/data-tool/scripts/_generated/subset_load_chunks/support/subset_enable_triggers.sql @@ -0,0 +1,38 @@ +-- Enable triggers for corp-scoped tables (subset refresh/load). +-- Intended to be executed from a master DbSchemaCLI script while connected to the target Postgres DB. + +ALTER TABLE colin_extract.corporation ENABLE TRIGGER ALL; +ALTER TABLE colin_extract.corp_name ENABLE TRIGGER ALL; +ALTER TABLE colin_extract.corp_state ENABLE TRIGGER ALL; +ALTER TABLE colin_extract.event ENABLE TRIGGER ALL; +ALTER TABLE colin_extract.filing ENABLE TRIGGER ALL; +ALTER TABLE colin_extract.filing_user ENABLE TRIGGER ALL; +ALTER TABLE colin_extract.office ENABLE TRIGGER ALL; +ALTER TABLE colin_extract.corp_comments ENABLE TRIGGER ALL; +ALTER TABLE colin_extract.ledger_text ENABLE TRIGGER ALL; +ALTER TABLE colin_extract.corp_party ENABLE TRIGGER ALL; +ALTER TABLE colin_extract.corp_party_relationship ENABLE TRIGGER ALL; +ALTER TABLE colin_extract.offices_held ENABLE TRIGGER ALL; +ALTER TABLE colin_extract.completing_party ENABLE TRIGGER ALL; +ALTER TABLE colin_extract.submitting_party ENABLE TRIGGER ALL; +ALTER TABLE colin_extract.corp_flag ENABLE TRIGGER ALL; +ALTER TABLE colin_extract.cont_out ENABLE TRIGGER ALL; +ALTER TABLE colin_extract.conv_event ENABLE TRIGGER ALL; +ALTER TABLE colin_extract.conv_ledger ENABLE TRIGGER ALL; +ALTER TABLE colin_extract.corp_involved_amalgamating ENABLE TRIGGER ALL; +ALTER TABLE colin_extract.corp_involved_cont_in ENABLE TRIGGER ALL; +ALTER TABLE colin_extract.corp_restriction ENABLE TRIGGER ALL; +ALTER TABLE colin_extract.correction ENABLE TRIGGER ALL; +ALTER TABLE colin_extract.jurisdiction ENABLE TRIGGER ALL; +ALTER TABLE colin_extract.resolution ENABLE TRIGGER ALL; +ALTER TABLE colin_extract.share_series ENABLE TRIGGER ALL; +ALTER TABLE colin_extract.share_struct ENABLE TRIGGER ALL; +ALTER TABLE colin_extract.share_struct_cls ENABLE TRIGGER ALL; +ALTER TABLE colin_extract.notification ENABLE TRIGGER ALL; +ALTER TABLE colin_extract.notification_resend ENABLE TRIGGER ALL; +ALTER TABLE colin_extract.party_notification ENABLE TRIGGER ALL; +ALTER TABLE colin_extract.payment ENABLE TRIGGER ALL; +ALTER TABLE colin_extract.carsfile ENABLE TRIGGER ALL; +ALTER TABLE colin_extract.carsbox ENABLE TRIGGER ALL; +ALTER TABLE colin_extract.carsrept ENABLE TRIGGER ALL; +ALTER TABLE colin_extract.carindiv ENABLE TRIGGER ALL; diff --git a/data-tool/scripts/_generated/subset_load_chunks/support/subset_pg_boolean_casts.sql b/data-tool/scripts/_generated/subset_load_chunks/support/subset_pg_boolean_casts.sql new file mode 100644 index 0000000000..272e80b4c5 --- /dev/null +++ b/data-tool/scripts/_generated/subset_load_chunks/support/subset_pg_boolean_casts.sql @@ -0,0 +1,55 @@ +-- Postgres helper for DbSchemaCLI subset extracts. +-- +-- Problem: +-- DbSchemaCLI transfers from Oracle to Postgres commonly bind Oracle CHAR/VARCHAR values as JDBC VARCHAR. +-- When the TARGET column is BOOLEAN, Postgres does NOT have an assignment cast from VARCHAR/BPCHAR -> BOOLEAN, +-- so inserts like: +-- ... SEND_AR_IND = 't' +-- can fail with: +-- ERROR: column "send_ar_ind" is of type boolean but expression is of type character varying +-- +-- Solution: +-- Install IMPLICIT casts for varchar/bpchar -> boolean that use Postgres' standard boolean input rules +-- ('t'/'f', 'true'/'false', '1'/'0', 'yes'/'no', etc). +-- +-- NOTE: +-- Some DbSchemaCLI builds appear to mangle the keyword "ASSIGNMENT" when executing scripts, producing +-- Postgres errors like: +-- ERROR: syntax error at or near "ASSIGNMEN" +-- Using IMPLICIT still enables automatic casting for INSERT/UPDATE (it is stronger than ASSIGNMENT), +-- while avoiding that parsing issue. +-- +-- IMPORTANT (DbSchemaCLI compatibility): +-- DbSchemaCLI splits statements on semicolons and does not reliably handle semicolons inside dollar-quoted +-- bodies. Keep dollar-quoted bodies free of internal semicolons and avoid DO $$ blocks. + +CREATE OR REPLACE FUNCTION colin_extract.dbcli_varchar_to_boolean(val varchar) +RETURNS boolean +LANGUAGE sql +IMMUTABLE +STRICT +AS $$ + SELECT (val::text)::boolean +$$; + +CREATE OR REPLACE FUNCTION colin_extract.dbcli_bpchar_to_boolean(val bpchar) +RETURNS boolean +LANGUAGE sql +IMMUTABLE +STRICT +AS $$ + SELECT (val::text)::boolean +$$; + +-- Recreate casts in an idempotent way (Postgres has no CREATE CAST IF NOT EXISTS). +DROP CAST IF EXISTS (varchar AS boolean); +CREATE CAST (varchar AS boolean) + WITH FUNCTION colin_extract.dbcli_varchar_to_boolean(varchar) + AS IMPLICIT -- DbSchemaCLI workaround: avoid keyword being last token +; + +DROP CAST IF EXISTS (bpchar AS boolean); +CREATE CAST (bpchar AS boolean) + WITH FUNCTION colin_extract.dbcli_bpchar_to_boolean(bpchar) + AS IMPLICIT -- DbSchemaCLI workaround: avoid keyword being last token +; diff --git a/data-tool/scripts/_generated/subset_load_chunks/support/subset_pg_cleanup_address_stage.sql b/data-tool/scripts/_generated/subset_load_chunks/support/subset_pg_cleanup_address_stage.sql new file mode 100644 index 0000000000..eb4fa28551 --- /dev/null +++ b/data-tool/scripts/_generated/subset_load_chunks/support/subset_pg_cleanup_address_stage.sql @@ -0,0 +1,4 @@ +-- Cleanup the shared address staging table used by subset_transfer_chunk.sql. +-- No-op: the helper table is predeclared in the COLIN extract DDL and is truncated during prepare/chunk execution. + +TRUNCATE TABLE colin_extract.subset_address_stage; diff --git a/data-tool/scripts/_generated/subset_load_chunks/support/subset_pg_cleanup_orphan_children.sql b/data-tool/scripts/_generated/subset_load_chunks/support/subset_pg_cleanup_orphan_children.sql new file mode 100644 index 0000000000..09cb9a3a5d --- /dev/null +++ b/data-tool/scripts/_generated/subset_load_chunks/support/subset_pg_cleanup_orphan_children.sql @@ -0,0 +1,62 @@ +-- Cleanup stale child rows that no longer have the parent rows used by refresh-mode deletes. +-- +-- Why this exists: +-- - refresh-mode chunk deletes remove event-scoped rows by first looking up event_id in target `event` +-- - and remove corp-party child rows by first looking up corp_party_id in target `corp_party` +-- - so a prior failed/interleaved run can leave stale child rows behind when the parent row is missing +-- - those orphans can then collide with the next reload (for example, unique `filing.event_id`) +-- +-- This cleanup is intentionally narrow: +-- - only rows whose normal refresh delete path traverses a parent lookup are removed here +-- - corp-scoped rows deleted directly by corp_num are left to the regular chunk deletes + +-- Event-scoped children whose parent event row is missing. +DELETE FROM colin_extract.notification_resend t +WHERE NOT EXISTS (SELECT 1 FROM colin_extract.event e WHERE e.event_id = t.event_id); + +DELETE FROM colin_extract.notification t +WHERE NOT EXISTS (SELECT 1 FROM colin_extract.event e WHERE e.event_id = t.event_id); + +DELETE FROM colin_extract.filing_user t +WHERE NOT EXISTS (SELECT 1 FROM colin_extract.event e WHERE e.event_id = t.event_id); + +DELETE FROM colin_extract.payment t +WHERE NOT EXISTS (SELECT 1 FROM colin_extract.event e WHERE e.event_id = t.event_id); + +DELETE FROM colin_extract.ledger_text t +WHERE NOT EXISTS (SELECT 1 FROM colin_extract.event e WHERE e.event_id = t.event_id); + +DELETE FROM colin_extract.conv_ledger t +WHERE NOT EXISTS (SELECT 1 FROM colin_extract.event e WHERE e.event_id = t.event_id); + +DELETE FROM colin_extract.conv_event t +WHERE NOT EXISTS (SELECT 1 FROM colin_extract.event e WHERE e.event_id = t.event_id); + +DELETE FROM colin_extract.completing_party t +WHERE NOT EXISTS (SELECT 1 FROM colin_extract.event e WHERE e.event_id = t.event_id); + +DELETE FROM colin_extract.submitting_party t +WHERE NOT EXISTS (SELECT 1 FROM colin_extract.event e WHERE e.event_id = t.event_id); + +DELETE FROM colin_extract.corp_involved_amalgamating t +WHERE t.event_id IS NOT NULL + AND NOT EXISTS (SELECT 1 FROM colin_extract.event e WHERE e.event_id = t.event_id); + +DELETE FROM colin_extract.corp_involved_cont_in t +WHERE NOT EXISTS (SELECT 1 FROM colin_extract.event e WHERE e.event_id = t.event_id); + +DELETE FROM colin_extract.correction t +WHERE NOT EXISTS (SELECT 1 FROM colin_extract.event e WHERE e.event_id = t.event_id); + +DELETE FROM colin_extract.filing t +WHERE NOT EXISTS (SELECT 1 FROM colin_extract.event e WHERE e.event_id = t.event_id); + +-- Corp-party children whose parent corp_party row is missing. +DELETE FROM colin_extract.party_notification t +WHERE NOT EXISTS (SELECT 1 FROM colin_extract.corp_party cp WHERE cp.corp_party_id = t.party_id); + +DELETE FROM colin_extract.offices_held t +WHERE NOT EXISTS (SELECT 1 FROM colin_extract.corp_party cp WHERE cp.corp_party_id = t.corp_party_id); + +DELETE FROM colin_extract.corp_party_relationship t +WHERE NOT EXISTS (SELECT 1 FROM colin_extract.corp_party cp WHERE cp.corp_party_id = t.corp_party_id); diff --git a/data-tool/scripts/_generated/subset_load_chunks/support/subset_pg_prepare_address_stage.sql b/data-tool/scripts/_generated/subset_load_chunks/support/subset_pg_prepare_address_stage.sql new file mode 100644 index 0000000000..38be25e10f --- /dev/null +++ b/data-tool/scripts/_generated/subset_load_chunks/support/subset_pg_prepare_address_stage.sql @@ -0,0 +1,4 @@ +-- Prepare the shared address staging table used by subset_transfer_chunk.sql. +-- This is a predeclared regular table (not TEMP) because DbSchemaCLI transfer work may use separate sessions. + +TRUNCATE TABLE colin_extract.subset_address_stage; diff --git a/data-tool/scripts/_generated/subset_load_chunks/support/subset_pg_purge_bcomps_excluded.sql b/data-tool/scripts/_generated/subset_load_chunks/support/subset_pg_purge_bcomps_excluded.sql new file mode 100644 index 0000000000..b883a03f4f --- /dev/null +++ b/data-tool/scripts/_generated/subset_load_chunks/support/subset_pg_purge_bcomps_excluded.sql @@ -0,0 +1,181 @@ +-- Compute BCOMPS-excluded corps ONCE in Postgres, then purge those corps from the corp-scoped COLIN extract tables. +-- +-- This is intended to replace the repeated Oracle-side "excluded_corps" computation previously embedded in every +-- transfer query (subset_transfer_chunk.sql). +-- +-- IMPORTANT: +-- - This script intentionally does NOT touch internal migration/processing tables (mig_*, corp_processing, +-- colin_tracking, affiliation_processing, etc). It only purges the corp-scoped COLIN extract tables +-- that are reloaded from Oracle. +-- - Because preserved processing/tracking tables still reference corporation/event rows, refresh mode must keep +-- FK enforcement suppressed across this purge window too (for example via replica_role, or by disabling +-- triggers on the preserved referencing tables too). +-- - This script avoids DO $$ blocks for DbSchemaCLI compatibility. +-- - The helper keyset tables are predeclared in the COLIN extract DDL and reused via TRUNCATE/INSERT. + +-- 1) Build keysets + +TRUNCATE TABLE colin_extract.subset_excluded_corp_parties; +TRUNCATE TABLE colin_extract.subset_excluded_events; +TRUNCATE TABLE colin_extract.subset_excluded_corps; + +INSERT INTO colin_extract.subset_excluded_corps (corp_num) +SELECT DISTINCT e.corp_num +FROM colin_extract.event e +JOIN colin_extract.filing f ON f.event_id = e.event_id +JOIN colin_extract.filing_user u ON u.event_id = e.event_id +WHERE e.corp_num IS NOT NULL + AND u.user_id = 'BCOMPS' + AND f.filing_type_cd IN ('BEINC', 'ICORP', 'ICORU', 'ICORC', 'CONTB', 'CONTI', 'CONTU', 'CONTC'); + +INSERT INTO colin_extract.subset_excluded_events (event_id) +SELECT DISTINCT e.event_id +FROM colin_extract.event e +JOIN colin_extract.subset_excluded_corps x ON x.corp_num = e.corp_num +WHERE e.event_id IS NOT NULL; + +INSERT INTO colin_extract.subset_excluded_corp_parties (corp_party_id) +SELECT DISTINCT cp.corp_party_id +FROM colin_extract.corp_party cp +JOIN colin_extract.subset_excluded_corps x ON x.corp_num = cp.corp_num +WHERE cp.corp_party_id IS NOT NULL; + +-- 2) Purge (delete child tables first) + +-- Event-scoped children +DELETE FROM colin_extract.notification_resend t +USING colin_extract.subset_excluded_events x +WHERE t.event_id = x.event_id; + +DELETE FROM colin_extract.notification t +USING colin_extract.subset_excluded_events x +WHERE t.event_id = x.event_id; + +DELETE FROM colin_extract.filing_user t +USING colin_extract.subset_excluded_events x +WHERE t.event_id = x.event_id; + +DELETE FROM colin_extract.payment t +USING colin_extract.subset_excluded_events x +WHERE t.event_id = x.event_id; + +DELETE FROM colin_extract.ledger_text t +USING colin_extract.subset_excluded_events x +WHERE t.event_id = x.event_id; + +DELETE FROM colin_extract.conv_ledger t +USING colin_extract.subset_excluded_events x +WHERE t.event_id = x.event_id; + +DELETE FROM colin_extract.conv_event t +USING colin_extract.subset_excluded_events x +WHERE t.event_id = x.event_id; + +DELETE FROM colin_extract.completing_party t +USING colin_extract.subset_excluded_events x +WHERE t.event_id = x.event_id; + +DELETE FROM colin_extract.submitting_party t +USING colin_extract.subset_excluded_events x +WHERE t.event_id = x.event_id; + +DELETE FROM colin_extract.corp_involved_cont_in t +USING colin_extract.subset_excluded_events x +WHERE t.event_id = x.event_id; + +DELETE FROM colin_extract.correction t +USING colin_extract.subset_excluded_events x +WHERE t.event_id = x.event_id; + +DELETE FROM colin_extract.filing t +USING colin_extract.subset_excluded_events x +WHERE t.event_id = x.event_id; + +-- corp_involved_amalgamating can reference corp_num via ted_corp_num/ting_corp_num as well as event_id. +-- Delete any rows where either side is excluded (covers non-event-owned references too). +DELETE FROM colin_extract.corp_involved_amalgamating t +USING colin_extract.subset_excluded_corps x +WHERE t.ted_corp_num = x.corp_num + OR t.ting_corp_num = x.corp_num; + +-- Corp-party related +DELETE FROM colin_extract.party_notification t +USING colin_extract.subset_excluded_corp_parties x +WHERE t.party_id = x.corp_party_id; + +DELETE FROM colin_extract.offices_held t +USING colin_extract.subset_excluded_corp_parties x +WHERE t.corp_party_id = x.corp_party_id; + +DELETE FROM colin_extract.corp_party_relationship t +USING colin_extract.subset_excluded_corp_parties x +WHERE t.corp_party_id = x.corp_party_id; + +DELETE FROM colin_extract.corp_party t +USING colin_extract.subset_excluded_corp_parties x +WHERE t.corp_party_id = x.corp_party_id; + +-- Corp-scoped tables +DELETE FROM colin_extract.office t +USING colin_extract.subset_excluded_corps x +WHERE t.corp_num = x.corp_num; + +DELETE FROM colin_extract.corp_name t +USING colin_extract.subset_excluded_corps x +WHERE t.corp_num = x.corp_num; + +DELETE FROM colin_extract.corp_state t +USING colin_extract.subset_excluded_corps x +WHERE t.corp_num = x.corp_num; + +DELETE FROM colin_extract.corp_comments t +USING colin_extract.subset_excluded_corps x +WHERE t.corp_num = x.corp_num; + +DELETE FROM colin_extract.corp_flag t +USING colin_extract.subset_excluded_corps x +WHERE t.corp_num = x.corp_num; + +DELETE FROM colin_extract.cont_out t +USING colin_extract.subset_excluded_corps x +WHERE t.corp_num = x.corp_num; + +DELETE FROM colin_extract.corp_restriction t +USING colin_extract.subset_excluded_corps x +WHERE t.corp_num = x.corp_num; + +DELETE FROM colin_extract.jurisdiction t +USING colin_extract.subset_excluded_corps x +WHERE t.corp_num = x.corp_num; + +DELETE FROM colin_extract.resolution t +USING colin_extract.subset_excluded_corps x +WHERE t.corp_num = x.corp_num; + +-- Share tables (delete deepest first) +DELETE FROM colin_extract.share_series t +USING colin_extract.subset_excluded_corps x +WHERE t.corp_num = x.corp_num; + +DELETE FROM colin_extract.share_struct_cls t +USING colin_extract.subset_excluded_corps x +WHERE t.corp_num = x.corp_num; + +DELETE FROM colin_extract.share_struct t +USING colin_extract.subset_excluded_corps x +WHERE t.corp_num = x.corp_num; + +-- Events last (many tables reference event_id) +DELETE FROM colin_extract.event t +USING colin_extract.subset_excluded_events x +WHERE t.event_id = x.event_id; + +-- Corporation last +DELETE FROM colin_extract.corporation t +USING colin_extract.subset_excluded_corps x +WHERE t.corp_num = x.corp_num; + +-- 3) Cleanup helper tables +TRUNCATE TABLE colin_extract.subset_excluded_corp_parties; +TRUNCATE TABLE colin_extract.subset_excluded_events; +TRUNCATE TABLE colin_extract.subset_excluded_corps; diff --git a/data-tool/scripts/_generated/subset_load_chunks/support/subset_transfer_cars.sql b/data-tool/scripts/_generated/subset_load_chunks/support/subset_transfer_cars.sql new file mode 100644 index 0000000000..93d8a87103 --- /dev/null +++ b/data-tool/scripts/_generated/subset_load_chunks/support/subset_transfer_cars.sql @@ -0,0 +1,45 @@ +-- Global transfer of cars* tables from SOURCE Oracle DB (ctst) into TARGET Postgres extract DB (colin_extract schema). +-- Intended to be executed from a master DbSchemaCLI script connected to the target Postgres DB (colin_extract schema). +-- +-- These tables are NOT corp-scoped. The full dataset is transferred without filtering. +-- Volume is low enough that a full refresh is appropriate. + +transfer colin_extract.carsfile from ctst using +select + documtid, + filedate, + regiracf +from carsfile; + +transfer colin_extract.carsbox from ctst using +select + documtid, + accesnum, + batchnum, + boxrracf +from carsbox; + +transfer colin_extract.carsrept from ctst using +select + documtid, + docutype, + compnumb +from carsrept; + +transfer colin_extract.carindiv from ctst using +select + documtid, + replace(surname, CHR(0), '') as surname, + replace(firname, CHR(0), '') as firname, + replace(dircpoco, CHR(0), '') as dircpoco, + replace(dircflag, CHR(0), '') as dircflag, + replace(offiflag, CHR(0), '') as offiflag, + replace(chgreasn, CHR(0), '') as chgreasn, + replace(pfirname, CHR(0), '') as pfirname, + replace(psurname, CHR(0), '') as psurname, + replace(offtitle, CHR(0), '') as offtitle, + replace(dircaddr01, CHR(0), '') as dircaddr01, + replace(dircaddr02, CHR(0), '') as dircaddr02, + replace(dircaddr03, CHR(0), '') as dircaddr03, + replace(dircaddr04, CHR(0), '') as dircaddr04 +from carindiv; diff --git a/data-tool/scripts/_generated/subset_load_chunks/support/subset_transfer_chunk.sql b/data-tool/scripts/_generated/subset_load_chunks/support/subset_transfer_chunk.sql new file mode 100644 index 0000000000..2f27a78383 --- /dev/null +++ b/data-tool/scripts/_generated/subset_load_chunks/support/subset_transfer_chunk.sql @@ -0,0 +1,1283 @@ +-- Transfer a chunk (or a whole subset) of corps from the SOURCE Oracle DB (ctst) into the TARGET Postgres extract DB (colin_extract schema). +-- +-- REQUIRED DbSchemaCLI variables (replace_variables=true): +-- target_corp_num_predicate : SQL predicate restricting the computed target_corp_num (NO trailing semicolon). +-- Examples: +-- target_corp_num in ('BC0460007','A1234567') +-- (target_corp_num in (...) OR target_corp_num in (...)) +-- oracle_corp_num_predicate : SQL predicate restricting Oracle corporation.corp_num (NO trailing semicolon). +-- Examples: +-- c.CORP_NUM in ('0460007','A1234567') +-- (c.CORP_NUM in (...) OR c.CORP_NUM in (...)) +-- oracle_corp_type_predicate : SQL predicate restricting Oracle corporation.corp_typ_cd (NO trailing semicolon). +-- Examples: +-- c.CORP_TYP_CD in ('BC','C','ULC','CUL','CC','CCC','QA','QB','QC','QD','QE') +-- c.CORP_TYP_CD in ('BC','C','ULC','CUL','CC','CCC','QA','QB','QC','QD','QE','CP') +-- +-- Intended to be executed from a master DbSchemaCLI script connected to the target Postgres DB (colin_extract schema). +-- +-- IMPORTANT: +-- - This template intentionally avoids the boolean<->integer ALTER COLUMN hacks used in the full refresh script. +-- Instead, Oracle SELECTs emit boolean-friendly 't'/'f' strings for Postgres boolean columns. +-- - This template transfers corp-scoped tables only (no cars* tables). +-- +-- Performance notes: +-- - BCOMPS exclusion is NOT computed in Oracle in this template (to avoid repeating expensive Oracle-side joins per table). +-- Instead, load the requested corp set and purge BCOMPS-excluded corps ONCE in Postgres after the transfer suite completes +-- (see: subset_pg_purge_bcomps_excluded.sql). +-- - Joins are written to start from the subset (corporation_cte) to avoid "0 rows but slow" plans. +-- - ORDER BY clauses are removed (sorting is unnecessary overhead for transfers). +-- +-- Example (legacy vset mode): +-- vset target_corp_num_predicate=target_corp_num in ('BC1111585','BC1226175'); +-- vset oracle_corp_num_predicate=c.CORP_NUM in ('1111585','1226175'); + +-- corporation +transfer colin_extract.corporation from ctst using +with corp_list as ( + select /*+ materialize */ c.corp_num + from corporation c + where &oracle_corp_num_predicate + and &oracle_corp_type_predicate + -- altered from BC to BEN then BEN to BC before directed launch + and c.CORP_NUM not in ('0460007', '1255957', '1186381') +), +corporation_cte as ( + select * + from ( + select c.*, + case + when c.CORP_TYP_CD in ('BC', 'ULC', 'CC') then 'BC' || c.CORP_NUM + else c.CORP_NUM + end as target_corp_num + from corporation c + join corp_list cl on cl.corp_num = c.corp_num + ) + where &target_corp_num_predicate +), +last_ar as ( + select e.corp_num, + to_number(to_char(max(date_1), 'YYYY')) as last_ar_reminder_year + from eml_log e + join rep_data r on r.param_id = e.param_id + and r.t20_1 = e.corp_num + join corp_list cl on cl.corp_num = e.corp_num + group by e.corp_num +) +select c.target_corp_num as CORP_NUM, + c.CORP_FROZEN_TYP_CD as corp_frozen_type_cd, + case + when c.CORP_TYP_CD in ('QA', 'QB', 'QC', 'QD', 'QE') then 'BC' + else c.CORP_TYP_CD + end as CORP_TYPE_CD, + c.CORP_PASSWORD, + c.RECOGNITION_DTS, + c.BN_9, + c.BN_15, + c.ADMIN_EMAIL, + c.ACCESSION_NUM, + c.LAST_AR_FILED_DT, + case c.SEND_AR_IND + when 'N' then 'f' + when 'Y' then 't' + else 't' + end as SEND_AR_IND, + la.last_ar_reminder_year as LAST_AR_REMINDER_YEAR +from corporation_cte c +left join last_ar la on la.corp_num = c.corp_num; + + +-- event +transfer colin_extract.event from ctst using +with corp_list as ( + select /*+ materialize */ c.corp_num + from corporation c + where &oracle_corp_num_predicate + and &oracle_corp_type_predicate + and c.CORP_NUM not in ('0460007', '1255957', '1186381') +), +corporation_cte as ( + select * + from ( + select c.*, + case + when c.CORP_TYP_CD in ('BC', 'ULC', 'CC') then 'BC' || c.CORP_NUM + else c.CORP_NUM + end as target_corp_num + from corporation c + join corp_list cl on cl.corp_num = c.corp_num + ) + where &target_corp_num_predicate +) +select e.event_id, + c.target_corp_num as CORP_NUM, + e.event_typ_cd as event_type_cd, + e.event_timestmp as event_timerstamp, + e.trigger_dts +from corporation_cte c +join event e on e.corp_num = c.corp_num +-- not transferring BNUPD, ADDLEDGR events +where e.event_typ_cd not in ('BNUPD', 'ADDLEDGR'); + + +-- corp_name +transfer colin_extract.corp_name from ctst using +with corp_list as ( + select /*+ materialize */ c.corp_num + from corporation c + where &oracle_corp_num_predicate + and &oracle_corp_type_predicate + and c.CORP_NUM not in ('0460007', '1255957', '1186381') +), +corporation_cte as ( + select * + from ( + select c.*, + case + when c.CORP_TYP_CD in ('BC', 'ULC', 'CC') then 'BC' || c.CORP_NUM + else c.CORP_NUM + end as target_corp_num + from corporation c + join corp_list cl on cl.corp_num = c.corp_num + ) + where &target_corp_num_predicate +) +select c.target_corp_num as CORP_NUM, + cn.CORP_NAME_TYP_CD, + cn.start_event_id, + cn.end_event_id, + cn.CORP_NME as corp_name +from corporation_cte c +join CORP_NAME cn on cn.corp_num = c.corp_num; + + +-- corp_state +transfer colin_extract.corp_state from ctst using +with corp_list as ( + select /*+ materialize */ c.corp_num + from corporation c + where &oracle_corp_num_predicate + and &oracle_corp_type_predicate + and c.CORP_NUM not in ('0460007', '1255957', '1186381') +), +corporation_cte as ( + select * + from ( + select c.*, + case + when c.CORP_TYP_CD in ('BC', 'ULC', 'CC') then 'BC' || c.CORP_NUM + else c.CORP_NUM + end as target_corp_num + from corporation c + join corp_list cl on cl.corp_num = c.corp_num + ) + where &target_corp_num_predicate +) +select c.target_corp_num as CORP_NUM, + cs.STATE_TYP_CD as state_type_cd, + cos.op_state_typ_cd as op_state_type_cd, + cs.start_event_id, + cs.end_event_id +from corporation_cte c +join CORP_STATE cs on cs.corp_num = c.corp_num +join corp_op_state cos on cos.state_typ_cd = cs.state_typ_cd; + + +-- filing +transfer colin_extract.filing from ctst using +with corp_list as ( + select /*+ materialize */ c.corp_num + from corporation c + where &oracle_corp_num_predicate + and &oracle_corp_type_predicate + and c.CORP_NUM not in ('0460007', '1255957', '1186381') +), +corporation_cte as ( + select * + from ( + select c.*, + case + when c.CORP_TYP_CD in ('BC', 'ULC', 'CC') then 'BC' || c.CORP_NUM + else c.CORP_NUM + end as target_corp_num + from corporation c + join corp_list cl on cl.corp_num = c.corp_num + ) + where &target_corp_num_predicate +) +select e.event_id, + f.filing_typ_cd as filing_type_cd, + f.effective_dt, + f.withdrawn_event_id, + trim(f.ods_typ_cd) as ods_type_cd, + f.nr_num, + f.COURT_ORDER_NUM, + f.CHANGE_DT, + f.PERIOD_END_DT, + case f.ARRANGEMENT_IND + when 'N' then 'f' + when 'Y' then 't' + else 't' + end as ARRANGEMENT_IND, + f.AUTH_SIGN_DT, + case f.COURT_APPR_IND + when 'N' then 'f' + when 'Y' then 't' + else 't' + end as COURT_APPR_IND +from corporation_cte c +join event e on e.corp_num = c.corp_num +join filing f on f.event_id = e.event_id; + + +-- filing_user +transfer colin_extract.filing_user from ctst using +with corp_list as ( + select /*+ materialize */ c.corp_num + from corporation c + where &oracle_corp_num_predicate + and &oracle_corp_type_predicate + and c.CORP_NUM not in ('0460007', '1255957', '1186381') +), +corporation_cte as ( + select * + from ( + select c.*, + case + when c.CORP_TYP_CD in ('BC', 'ULC', 'CC') then 'BC' || c.CORP_NUM + else c.CORP_NUM + end as target_corp_num + from corporation c + join corp_list cl on cl.corp_num = c.corp_num + ) + where &target_corp_num_predicate +) +select e.event_id, + u.user_id, + u.last_nme as last_name, + u.first_nme as first_name, + u.middle_nme as middle_name, + u.email_addr, + u.BCOL_ACCT_NUM, + u.ROLE_TYP_CD +from corporation_cte c +join event e on e.corp_num = c.corp_num +join filing_user u on u.event_id = e.event_id; + + +-- address (shared/global table; stage then merge before loading dependents) +TRUNCATE TABLE colin_extract.subset_address_stage; + +transfer colin_extract.subset_address_stage from ctst using +with corp_list as ( + select /*+ materialize */ c.corp_num + from corporation c + where &oracle_corp_num_predicate + and &oracle_corp_type_predicate + and c.CORP_NUM not in ('0460007', '1255957', '1186381') +), +corporation_cte as ( + select * + from ( + select c.*, + case + when c.CORP_TYP_CD in ('BC', 'ULC', 'CC') then 'BC' || c.CORP_NUM + else c.CORP_NUM + end as target_corp_num + from corporation c + join corp_list cl on cl.corp_num = c.corp_num + ) + where &target_corp_num_predicate +) +select distinct + addr_id, + trim(province) as province, + trim(country_typ_cd) as country_typ_cd, + trim(replace(postal_cd, CHR(0), '')) as POSTAL_CD, + trim(addr_line_1) as addr_line_1, + trim(replace(addr_line_2, CHR(0), '')) as ADDR_LINE_2, + trim(addr_line_3) as addr_line_3, + trim(city) as city +from ( + select a.* + from corporation_cte c + join corp_party x on x.corp_num = c.corp_num + join address a on (x.delivery_addr_id = a.addr_id or x.mailing_addr_id = a.addr_id) + + UNION ALL + select a.* + from corporation_cte c + join office x on x.corp_num = c.corp_num + join address a on (x.delivery_addr_id = a.addr_id or x.mailing_addr_id = a.addr_id) + + UNION ALL + select a.* + from corporation_cte c + join event e on e.corp_num = c.corp_num + join completing_party x on x.event_id = e.event_id + join address a on x.mailing_addr_id = a.addr_id + + UNION ALL + select a.* + from corporation_cte c + join event e on e.corp_num = c.corp_num + join notification x on x.event_id = e.event_id + join address a on x.mailing_addr_id = a.addr_id + + UNION ALL + select a.* + from corporation_cte c + join event e on e.corp_num = c.corp_num + join notification_resend x on x.event_id = e.event_id + join address a on x.mailing_addr_id = a.addr_id + + UNION ALL + select a.* + from corporation_cte c + join event e on e.corp_num = c.corp_num + join submitting_party x on x.event_id = e.event_id + join address a on (x.notify_addr_id = a.addr_id or x.mailing_addr_id = a.addr_id) + + UNION ALL + select a.* + from corporation_cte c + join corp_party p on p.corp_num = c.corp_num + join party_notification x on x.party_id = p.corp_party_id + join address a on x.mailing_addr_id = a.addr_id +); + +INSERT INTO colin_extract.address ( + addr_id, + province, + country_typ_cd, + postal_cd, + addr_line_1, + addr_line_2, + addr_line_3, + city +) +SELECT s.addr_id, + s.province, + s.country_typ_cd, + s.postal_cd, + s.addr_line_1, + s.addr_line_2, + s.addr_line_3, + s.city +FROM ( + SELECT DISTINCT ON (addr_id) + addr_id, + province, + country_typ_cd, + postal_cd, + addr_line_1, + addr_line_2, + addr_line_3, + city + FROM colin_extract.subset_address_stage + WHERE addr_id IS NOT NULL + ORDER BY addr_id +) s +ON CONFLICT (addr_id) DO UPDATE +SET province = EXCLUDED.province, + country_typ_cd = EXCLUDED.country_typ_cd, + postal_cd = EXCLUDED.postal_cd, + addr_line_1 = EXCLUDED.addr_line_1, + addr_line_2 = EXCLUDED.addr_line_2, + addr_line_3 = EXCLUDED.addr_line_3, + city = EXCLUDED.city; + +TRUNCATE TABLE colin_extract.subset_address_stage; + + +-- office +transfer colin_extract.office from ctst using +with corp_list as ( + select /*+ materialize */ c.corp_num + from corporation c + where &oracle_corp_num_predicate + and &oracle_corp_type_predicate + and c.CORP_NUM not in ('0460007', '1255957', '1186381') +), +corporation_cte as ( + select * + from ( + select c.*, + case + when c.CORP_TYP_CD in ('BC', 'ULC', 'CC') then 'BC' || c.CORP_NUM + else c.CORP_NUM + end as target_corp_num + from corporation c + join corp_list cl on cl.corp_num = c.corp_num + ) + where &target_corp_num_predicate +) +select c.target_corp_num as CORP_NUM, + o.office_typ_cd, + o.start_event_id, + o.end_event_id, + o.mailing_addr_id, + o.delivery_addr_id +from corporation_cte c +join office o on o.corp_num = c.corp_num; + + +-- corp_comments +transfer colin_extract.corp_comments from ctst using +with corp_list as ( + select /*+ materialize */ c.corp_num + from corporation c + where &oracle_corp_num_predicate + and &oracle_corp_type_predicate + and c.CORP_NUM not in ('0460007', '1255957', '1186381') +), +corporation_cte as ( + select * + from ( + select c.*, + case + when c.CORP_TYP_CD in ('BC', 'ULC', 'CC') then 'BC' || c.CORP_NUM + else c.CORP_NUM + end as target_corp_num + from corporation c + join corp_list cl on cl.corp_num = c.corp_num + ) + where &target_corp_num_predicate +) +select cc.comment_dts, + c.target_corp_num as CORP_NUM, + cc.comments, + cc.USER_ID, + cc.FIRST_NME, + cc.LAST_NME, + cc.MIDDLE_NME, + cc.ACCESSION_COMMENTS +from corporation_cte c +join corp_comments cc on cc.corp_num = c.corp_num; + + +-- ledger_text +transfer colin_extract.ledger_text from ctst using +with corp_list as ( + select /*+ materialize */ c.corp_num + from corporation c + where &oracle_corp_num_predicate + and &oracle_corp_type_predicate + and c.CORP_NUM not in ('0460007', '1255957', '1186381') +), +corporation_cte as ( + select * + from ( + select c.*, + case + when c.CORP_TYP_CD in ('BC', 'ULC', 'CC') then 'BC' || c.CORP_NUM + else c.CORP_NUM + end as target_corp_num + from corporation c + join corp_list cl on cl.corp_num = c.corp_num + ) + where &target_corp_num_predicate +) +select e.event_id, + l.notation, + l.USER_ID, + l.LEDGER_TEXT_DTS +from corporation_cte c +join event e on e.corp_num = c.corp_num +join ledger_text l on l.event_id = e.event_id; + + +-- corp_party +transfer colin_extract.corp_party from ctst using +with corp_list as ( + select /*+ materialize */ c.corp_num + from corporation c + where &oracle_corp_num_predicate + and &oracle_corp_type_predicate + and c.CORP_NUM not in ('0460007', '1255957', '1186381') +), +corporation_cte as ( + select * + from ( + select c.*, + case + when c.CORP_TYP_CD in ('BC', 'ULC', 'CC') then 'BC' || c.CORP_NUM + else c.CORP_NUM + end as target_corp_num + from corporation c + join corp_list cl on cl.corp_num = c.corp_num + ) + where &target_corp_num_predicate +) +select p.corp_party_id, + p.mailing_addr_id, + p.delivery_addr_id, + c.target_corp_num as CORP_NUM, + nvl(p.party_typ_cd, ' ') as party_typ_cd, + p.start_event_id, + p.end_event_id, + p.prev_party_id, + p.appointment_dt, + p.cessation_dt, + nvl(p.LAST_NME, ' ') as last_name, + nvl(p.MIDDLE_NME, ' ') as middle_name, + nvl(p.FIRST_NME, ' ') as first_name, + nvl(p.BUSINESS_NME, ' ') as business_name, + p.BUS_COMPANY_NUM, + p.CORR_TYP_CD, + p.OFFICE_NOTIFICATION_DT +from corporation_cte c +join corp_party p on p.corp_num = c.corp_num; + + +-- corp_party_relationship +transfer colin_extract.corp_party_relationship from ctst using +with corp_list as ( + select /*+ materialize */ c.corp_num + from corporation c + where &oracle_corp_num_predicate + and &oracle_corp_type_predicate + and c.CORP_NUM not in ('0460007', '1255957', '1186381') +), +corporation_cte as ( + select * + from ( + select c.*, + case + when c.CORP_TYP_CD in ('BC', 'ULC', 'CC') then 'BC' || c.CORP_NUM + else c.CORP_NUM + end as target_corp_num + from corporation c + join corp_list cl on cl.corp_num = c.corp_num + ) + where &target_corp_num_predicate +) +select cpr.CORP_PARTY_ID as corp_party_id, + cpr.RELATIONSHIP_TYP_CD as relationship_typ_cd +from corporation_cte c +join corp_party p on p.corp_num = c.corp_num +join CORP_PARTY_RELATIONSHIP cpr on cpr.corp_party_id = p.corp_party_id; + + +-- offices_held +transfer colin_extract.offices_held from ctst using +with corp_list as ( + select /*+ materialize */ c.corp_num + from corporation c + where &oracle_corp_num_predicate + and &oracle_corp_type_predicate + and c.CORP_NUM not in ('0460007', '1255957', '1186381') +), +corporation_cte as ( + select * + from ( + select c.*, + case + when c.CORP_TYP_CD in ('BC', 'ULC', 'CC') then 'BC' || c.CORP_NUM + else c.CORP_NUM + end as target_corp_num + from corporation c + join corp_list cl on cl.corp_num = c.corp_num + ) + where &target_corp_num_predicate +) +select oh.CORP_PARTY_ID as corp_party_id, + oh.OFFICER_TYP_CD as officer_typ_cd +from corporation_cte c +join corp_party p on p.corp_num = c.corp_num +join OFFICES_HELD oh on oh.corp_party_id = p.corp_party_id; + + +-- completing_party +transfer colin_extract.completing_party from ctst using +with corp_list as ( + select /*+ materialize */ c.corp_num + from corporation c + where &oracle_corp_num_predicate + and &oracle_corp_type_predicate + and c.CORP_NUM not in ('0460007', '1255957', '1186381') +), +corporation_cte as ( + select * + from ( + select c.*, + case + when c.CORP_TYP_CD in ('BC', 'ULC', 'CC') then 'BC' || c.CORP_NUM + else c.CORP_NUM + end as target_corp_num + from corporation c + join corp_list cl on cl.corp_num = c.corp_num + ) + where &target_corp_num_predicate +) +select e.event_id, + cp.MAILING_ADDR_ID, + cp.FIRST_NME, + cp.LAST_NME, + cp.MIDDLE_NME, + cp.EMAIL_REQ_ADDRESS +from corporation_cte c +join event e on e.corp_num = c.corp_num +join completing_party cp on cp.event_id = e.event_id; + + +-- submitting_party +transfer colin_extract.submitting_party from ctst using +with corp_list as ( + select /*+ materialize */ c.corp_num + from corporation c + where &oracle_corp_num_predicate + and &oracle_corp_type_predicate + and c.CORP_NUM not in ('0460007', '1255957', '1186381') +), +corporation_cte as ( + select * + from ( + select c.*, + case + when c.CORP_TYP_CD in ('BC', 'ULC', 'CC') then 'BC' || c.CORP_NUM + else c.CORP_NUM + end as target_corp_num + from corporation c + join corp_list cl on cl.corp_num = c.corp_num + ) + where &target_corp_num_predicate +) +select e.event_id, + sp.MAILING_ADDR_ID, + sp.NOTIFY_ADDR_ID, + sp.METHOD_TYP_CD, + sp.FIRST_NME, + sp.LAST_NME, + sp.MIDDLE_NME, + sp.EMAIL_REQ_ADDRESS, + sp.PICKUP_BY, + sp.BUSINESS_NME, + sp.NOTIFY_FIRST_NME, + sp.NOTIFY_LAST_NME, + sp.NOTIFY_MIDDLE_NME, + sp.PHONE_NUMBER +from corporation_cte c +join event e on e.corp_num = c.corp_num +join SUBMITTING_PARTY sp on sp.event_id = e.event_id; + + +-- corp_flag +transfer colin_extract.corp_flag from ctst using +with corp_list as ( + select /*+ materialize */ c.corp_num + from corporation c + where &oracle_corp_num_predicate + and &oracle_corp_type_predicate + and c.CORP_NUM not in ('0460007', '1255957', '1186381') +), +corporation_cte as ( + select * + from ( + select c.*, + case + when c.CORP_TYP_CD in ('BC', 'ULC', 'CC') then 'BC' || c.CORP_NUM + else c.CORP_NUM + end as target_corp_num + from corporation c + join corp_list cl on cl.corp_num = c.corp_num + ) + where &target_corp_num_predicate +) +select c.target_corp_num as CORP_NUM, + cf.CORP_FLAG_TYPE_CD, + cf.start_event_id, + cf.end_event_id +from corporation_cte c +join corp_flag cf on cf.corp_num = c.corp_num; + + +-- cont_out +transfer colin_extract.cont_out from ctst using +with corp_list as ( + select /*+ materialize */ c.corp_num + from corporation c + where &oracle_corp_num_predicate + and &oracle_corp_type_predicate + and c.CORP_NUM not in ('0460007', '1255957', '1186381') +), +corporation_cte as ( + select * + from ( + select c.*, + case + when c.CORP_TYP_CD in ('BC', 'ULC', 'CC') then 'BC' || c.CORP_NUM + else c.CORP_NUM + end as target_corp_num + from corporation c + join corp_list cl on cl.corp_num = c.corp_num + ) + where &target_corp_num_predicate +) +select c.target_corp_num as CORP_NUM, + co.CAN_JUR_TYP_CD, + co.CONT_OUT_DT, + co.OTHR_JURI_DESC, + co.HOME_COMPANY_NME, + co.start_event_id, + co.end_event_id +from corporation_cte c +join CONT_OUT co on co.corp_num = c.corp_num; + + +-- conv_event +transfer colin_extract.conv_event from ctst using +with corp_list as ( + select /*+ materialize */ c.corp_num + from corporation c + where &oracle_corp_num_predicate + and &oracle_corp_type_predicate + and c.CORP_NUM not in ('0460007', '1255957', '1186381') +), +corporation_cte as ( + select * + from ( + select c.*, + case + when c.CORP_TYP_CD in ('BC', 'ULC', 'CC') then 'BC' || c.CORP_NUM + else c.CORP_NUM + end as target_corp_num + from corporation c + join corp_list cl on cl.corp_num = c.corp_num + ) + where &target_corp_num_predicate +) +select e.event_id, + ce.effective_dt, + case ce.REPORT_CORP_IND + when 'N' then 'f' + when 'Y' then 't' + else 't' + end as REPORT_CORP_IND, + ce.ACTIVITY_USER_ID, + ce.ACTIVITY_DT, + ce.ANNUAL_FILE_DT, + ce.ACCESSION_NUM, + ce.REMARKS +from corporation_cte c +join event e on e.corp_num = c.corp_num +join CONV_EVENT ce on ce.event_id = e.event_id; + + +-- conv_ledger +transfer colin_extract.conv_ledger from ctst using +with corp_list as ( + select /*+ materialize */ c.corp_num + from corporation c + where &oracle_corp_num_predicate + and &oracle_corp_type_predicate + and c.CORP_NUM not in ('0460007', '1255957', '1186381') +), +corporation_cte as ( + select * + from ( + select c.*, + case + when c.CORP_TYP_CD in ('BC', 'ULC', 'CC') then 'BC' || c.CORP_NUM + else c.CORP_NUM + end as target_corp_num + from corporation c + join corp_list cl on cl.corp_num = c.corp_num + ) + where &target_corp_num_predicate +) +select e.event_id, + cl.LEDGER_TITLE_TXT, + cl.LEDGER_DESC, + cl.cars_docmnt_id +from corporation_cte c +join event e on e.corp_num = c.corp_num +join CONV_LEDGER cl on cl.event_id = e.event_id; + + +-- corp_involved - amalgamaTING_businesses +transfer colin_extract.corp_involved_amalgamating from ctst using +with corp_list as ( + select /*+ materialize */ c.corp_num + from corporation c + where &oracle_corp_num_predicate + and &oracle_corp_type_predicate + and c.CORP_NUM not in ('0460007', '1255957', '1186381') +), +corporation_cte as ( + select * + from ( + select c.*, + case + when c.CORP_TYP_CD in ('BC', 'ULC', 'CC') then 'BC' || c.CORP_NUM + else c.CORP_NUM + end as target_corp_num + from corporation c + join corp_list cl on cl.corp_num = c.corp_num + ) + where &target_corp_num_predicate +) +select e.event_id as event_id, + c.target_corp_num as ted_corp_num, + case + when c2.corp_typ_cd in ('BC', 'ULC', 'CC') then 'BC' || c2.corp_num + else c2.corp_num + end as ting_corp_num, + ci.CORP_INVOLVE_ID as corp_involve_id, + ci.CAN_JUR_TYP_CD as can_jur_typ_cd, + case ci.ADOPTED_CORP_IND + when 'N' then 'f' + when 'Y' then 't' + else 'f' + end as adopted_corp_ind, + ci.HOME_JURI_NUM as home_juri_num, + ci.OTHR_JURI_DESC as othr_juri_desc, + ci.FOREIGN_NME as foreign_nme +from corporation_cte c +join event e on e.corp_num = c.corp_num +join CORP_INVOLVED ci on ci.event_id = e.event_id +join corporation c2 on c2.corp_num = ci.corp_num +where e.event_typ_cd = 'CONVAMAL' +UNION ALL +select e.event_id as event_id, + c.target_corp_num as ted_corp_num, + case + when c2.corp_typ_cd in ('BC', 'ULC', 'CC') then 'BC' || c2.corp_num + else c2.corp_num + end as ting_corp_num, + ci.CORP_INVOLVE_ID as corp_involve_id, + ci.CAN_JUR_TYP_CD as can_jur_typ_cd, + case ci.ADOPTED_CORP_IND + when 'N' then 'f' + when 'Y' then 't' + else 'f' + end as adopted_corp_ind, + ci.HOME_JURI_NUM as home_juri_num, + ci.OTHR_JURI_DESC as othr_juri_desc, + ci.FOREIGN_NME as foreign_nme +from corporation_cte c +join event e on e.corp_num = c.corp_num +join filing f on f.event_id = e.event_id +join CORP_INVOLVED ci on ci.event_id = e.event_id +join corporation c2 on c2.corp_num = ci.corp_num +where f.filing_typ_cd in ('AMALH', 'AMALV', 'AMALR', 'AMLHU', 'AMLVU', 'AMLRU', 'AMLHC', 'AMLVC', 'AMLRC'); + + +-- corp_involved - continue_in_historical_xpro +transfer colin_extract.corp_involved_cont_in from ctst using +with corp_list as ( + select /*+ materialize */ c.corp_num + from corporation c + where &oracle_corp_num_predicate + and &oracle_corp_type_predicate + and c.CORP_NUM not in ('0460007', '1255957', '1186381') +), +corporation_cte as ( + select * + from ( + select c.*, + case + when c.CORP_TYP_CD in ('BC', 'ULC', 'CC') then 'BC' || c.CORP_NUM + else c.CORP_NUM + end as target_corp_num + from corporation c + join corp_list cl on cl.corp_num = c.corp_num + ) + where &target_corp_num_predicate +) +select e.event_id, + c.target_corp_num as CORP_NUM +from corporation_cte c +join event e on e.corp_num = c.corp_num +join filing f on f.event_id = e.event_id +where f.filing_typ_cd in ('CONTI', 'CONTU', 'CONTC') + and exists (select 1 from CORP_INVOLVED ci where ci.event_id = e.event_id); + + +-- corp_restriction +transfer colin_extract.corp_restriction from ctst using +with corp_list as ( + select /*+ materialize */ c.corp_num + from corporation c + where &oracle_corp_num_predicate + and &oracle_corp_type_predicate + and c.CORP_NUM not in ('0460007', '1255957', '1186381') +), +corporation_cte as ( + select * + from ( + select c.*, + case + when c.CORP_TYP_CD in ('BC', 'ULC', 'CC') then 'BC' || c.CORP_NUM + else c.CORP_NUM + end as target_corp_num + from corporation c + join corp_list cl on cl.corp_num = c.corp_num + ) + where &target_corp_num_predicate +) +select c.target_corp_num as CORP_NUM, + case cr.RESTRICTION_IND + when 'N' then 'f' + when 'Y' then 't' + else 'f' + end as RESTRICTION_IND, + cr.start_event_id, + cr.end_event_id +from corporation_cte c +join CORP_RESTRICTION cr on cr.corp_num = c.corp_num; + + +-- correction +transfer colin_extract.correction from ctst using +with corp_list as ( + select /*+ materialize */ c.corp_num + from corporation c + where &oracle_corp_num_predicate + and &oracle_corp_type_predicate + and c.CORP_NUM not in ('0460007', '1255957', '1186381') +), +corporation_cte as ( + select * + from ( + select c.*, + case + when c.CORP_TYP_CD in ('BC', 'ULC', 'CC') then 'BC' || c.CORP_NUM + else c.CORP_NUM + end as target_corp_num + from corporation c + join corp_list cl on cl.corp_num = c.corp_num + ) + where &target_corp_num_predicate +) +select e.event_id, + c.target_corp_num as CORP_NUM, + corr.ASSOCIATED_DOC_DESC +from corporation_cte c +join event e on e.corp_num = c.corp_num +join CORRECTION corr on corr.event_id = e.event_id; + + +-- continued_in_from_jurisdiction +transfer colin_extract.jurisdiction from ctst using +with corp_list as ( + select /*+ materialize */ c.corp_num + from corporation c + where &oracle_corp_num_predicate + and &oracle_corp_type_predicate + and c.CORP_NUM not in ('0460007', '1255957', '1186381') +), +corporation_cte as ( + select * + from ( + select c.*, + case + when c.CORP_TYP_CD in ('BC', 'ULC', 'CC') then 'BC' || c.CORP_NUM + else c.CORP_NUM + end as target_corp_num + from corporation c + join corp_list cl on cl.corp_num = c.corp_num + ) + where &target_corp_num_predicate +) +select c.target_corp_num as CORP_NUM, + j.CAN_JUR_TYP_CD, + j.XPRO_TYP_CD, + j.HOME_RECOGN_DT, + j.OTHR_JURIS_DESC, + j.HOME_JURIS_NUM, + j.BC_XPRO_NUM, + j.HOME_COMPANY_NME, + j.start_event_id +from corporation_cte c +join JURISDICTION j on j.corp_num = c.corp_num; + + +-- resolution +transfer colin_extract.resolution from ctst using +with corp_list as ( + select /*+ materialize */ c.corp_num + from corporation c + where &oracle_corp_num_predicate + and &oracle_corp_type_predicate + and c.CORP_NUM not in ('0460007', '1255957', '1186381') +), +corporation_cte as ( + select * + from ( + select c.*, + case + when c.CORP_TYP_CD in ('BC', 'ULC', 'CC') then 'BC' || c.CORP_NUM + else c.CORP_NUM + end as target_corp_num + from corporation c + join corp_list cl on cl.corp_num = c.corp_num + ) + where &target_corp_num_predicate +) +select c.target_corp_num as CORP_NUM, + r.RESOLUTION_DT, + r.RESOLUTION_TYPE_CODE, + r.start_event_id, + r.end_event_id +from corporation_cte c +join RESOLUTION r on r.corp_num = c.corp_num; + + +-- share_struct +transfer colin_extract.share_struct from ctst using +with corp_list as ( + select /*+ materialize */ c.corp_num + from corporation c + where &oracle_corp_num_predicate + and &oracle_corp_type_predicate + and c.CORP_NUM not in ('0460007', '1255957', '1186381') +), +corporation_cte as ( + select * + from ( + select c.*, + case + when c.CORP_TYP_CD in ('BC', 'ULC', 'CC') then 'BC' || c.CORP_NUM + else c.CORP_NUM + end as target_corp_num + from corporation c + join corp_list cl on cl.corp_num = c.corp_num + ) + where &target_corp_num_predicate +) +select c.target_corp_num as CORP_NUM, + ss.start_event_id, + ss.end_event_id +from corporation_cte c +join SHARE_STRUCT ss on ss.corp_num = c.corp_num; + + +-- share_struct_cls +transfer colin_extract.share_struct_cls from ctst using +with corp_list as ( + select /*+ materialize */ c.corp_num + from corporation c + where &oracle_corp_num_predicate + and &oracle_corp_type_predicate + and c.CORP_NUM not in ('0460007', '1255957', '1186381') +), +corporation_cte as ( + select * + from ( + select c.*, + case + when c.CORP_TYP_CD in ('BC', 'ULC', 'CC') then 'BC' || c.CORP_NUM + else c.CORP_NUM + end as target_corp_num + from corporation c + join corp_list cl on cl.corp_num = c.corp_num + ) + where &target_corp_num_predicate +) +select c.target_corp_num as CORP_NUM, + ssc.SHARE_CLASS_ID, + replace(ssc.CLASS_NME, CHR(0), '') as CLASS_NME, + ssc.CURRENCY_TYP_CD, + case ssc.MAX_SHARE_IND + when 'N' then 'f' + when 'Y' then 't' + else 'f' + end as MAX_SHARE_IND, + ssc.SHARE_QUANTITY, + case ssc.SPEC_RIGHTS_IND + when 'N' then 'f' + when 'Y' then 't' + else 'f' + end as SPEC_RIGHTS_IND, + case ssc.PAR_VALUE_IND + when 'N' then 'f' + when 'Y' then 't' + else 'f' + end as PAR_VALUE_IND, + ssc.PAR_VALUE_AMT + 0 as PAR_VALUE_AMT, + ssc.OTHER_CURRENCY, + ssc.start_event_id +from corporation_cte c +join SHARE_STRUCT_CLS ssc on ssc.corp_num = c.corp_num; + + +-- share_series +transfer colin_extract.share_series from ctst using +with corp_list as ( + select /*+ materialize */ c.corp_num + from corporation c + where &oracle_corp_num_predicate + and &oracle_corp_type_predicate + and c.CORP_NUM not in ('0460007', '1255957', '1186381') +), +corporation_cte as ( + select * + from ( + select c.*, + case + when c.CORP_TYP_CD in ('BC', 'ULC', 'CC') then 'BC' || c.CORP_NUM + else c.CORP_NUM + end as target_corp_num + from corporation c + join corp_list cl on cl.corp_num = c.corp_num + ) + where &target_corp_num_predicate +) +select c.target_corp_num as CORP_NUM, + ss.SHARE_CLASS_ID, + ss.SERIES_ID, + case ss.MAX_SHARE_IND + when 'N' then 'f' + when 'Y' then 't' + else 'f' + end as MAX_SHARE_IND, + ss.SHARE_QUANTITY, + case ss.SPEC_RIGHT_IND + when 'N' then 'f' + when 'Y' then 't' + else 'f' + end as SPEC_RIGHT_IND, + ss.SERIES_NME, + ss.start_event_id +from corporation_cte c +join SHARE_SERIES ss on ss.corp_num = c.corp_num; + + +-- notification +transfer colin_extract.notification from ctst using +with corp_list as ( + select /*+ materialize */ c.corp_num + from corporation c + where &oracle_corp_num_predicate + and &oracle_corp_type_predicate + and c.CORP_NUM not in ('0460007', '1255957', '1186381') +), +corporation_cte as ( + select * + from ( + select c.*, + case + when c.CORP_TYP_CD in ('BC', 'ULC', 'CC') then 'BC' || c.CORP_NUM + else c.CORP_NUM + end as target_corp_num + from corporation c + join corp_list cl on cl.corp_num = c.corp_num + ) + where &target_corp_num_predicate +) +select e.event_id, + n.METHOD_TYP_CD, + n.mailing_addr_id, + n.FIRST_NME, + n.LAST_NME, + n.MIDDLE_NME, + n.PICKUP_BY, + n.EMAIL_ADDRESS, + n.PHONE_NUMBER +from corporation_cte c +join event e on e.corp_num = c.corp_num +join NOTIFICATION n on n.event_id = e.event_id; + + +-- notification_resend +transfer colin_extract.notification_resend from ctst using +with corp_list as ( + select /*+ materialize */ c.corp_num + from corporation c + where &oracle_corp_num_predicate + and &oracle_corp_type_predicate + and c.CORP_NUM not in ('0460007', '1255957', '1186381') +), +corporation_cte as ( + select * + from ( + select c.*, + case + when c.CORP_TYP_CD in ('BC', 'ULC', 'CC') then 'BC' || c.CORP_NUM + else c.CORP_NUM + end as target_corp_num + from corporation c + join corp_list cl on cl.corp_num = c.corp_num + ) + where &target_corp_num_predicate +) +select e.event_id, + nr.METHOD_TYP_CD, + nr.mailing_addr_id, + nr.FIRST_NME, + nr.LAST_NME, + nr.MIDDLE_NME, + nr.PICKUP_BY, + nr.EMAIL_ADDRESS, + nr.PHONE_NUMBER +from corporation_cte c +join event e on e.corp_num = c.corp_num +join NOTIFICATION_RESEND nr on nr.event_id = e.event_id; + + +-- party_notification +transfer colin_extract.party_notification from ctst using +with corp_list as ( + select /*+ materialize */ c.corp_num + from corporation c + where &oracle_corp_num_predicate + and &oracle_corp_type_predicate + and c.CORP_NUM not in ('0460007', '1255957', '1186381') +), +corporation_cte as ( + select * + from ( + select c.*, + case + when c.CORP_TYP_CD in ('BC', 'ULC', 'CC') then 'BC' || c.CORP_NUM + else c.CORP_NUM + end as target_corp_num + from corporation c + join corp_list cl on cl.corp_num = c.corp_num + ) + where &target_corp_num_predicate +) +select pn.PARTY_ID, + pn.METHOD_TYP_CD, + pn.mailing_addr_id, + pn.FIRST_NME, + pn.LAST_NME, + pn.MIDDLE_NME, + pn.BUSINESS_NME, + pn.PICKUP_BY, + pn.EMAIL_ADDRESS, + pn.PHONE_NUMBER +from corporation_cte c +join corp_party cp on cp.corp_num = c.corp_num +join PARTY_NOTIFICATION pn on pn.party_id = cp.corp_party_id; + + +-- payment +transfer colin_extract.payment from ctst using +with corp_list as ( + select /*+ materialize */ c.corp_num + from corporation c + where &oracle_corp_num_predicate + and &oracle_corp_type_predicate + and c.CORP_NUM not in ('0460007', '1255957', '1186381') +), +corporation_cte as ( + select * + from ( + select c.*, + case + when c.CORP_TYP_CD in ('BC', 'ULC', 'CC') then 'BC' || c.CORP_NUM + else c.CORP_NUM + end as target_corp_num + from corporation c + join corp_list cl on cl.corp_num = c.corp_num + ) + where &target_corp_num_predicate +) +select p.event_id, + p.payment_typ_cd, + p.cc_holder_nme +from corporation_cte c +join event e on e.corp_num = c.corp_num +join payment p on p.event_id = e.event_id; diff --git a/data-tool/scripts/_generated/subset_load_chunks/transfer_all.sql b/data-tool/scripts/_generated/subset_load_chunks/transfer_all.sql index 89f17c9102..332964eba0 100644 --- a/data-tool/scripts/_generated/subset_load_chunks/transfer_all.sql +++ b/data-tool/scripts/_generated/subset_load_chunks/transfer_all.sql @@ -4,7 +4,7 @@ -- target corps: 2 -- oracle corp_num: 2 --- Transfer a chunk (or a whole subset) of corps from the SOURCE Oracle DB (cprd) into the TARGET Postgres extract DB (cprd_pg). +-- Transfer a chunk (or a whole subset) of corps from the SOURCE Oracle DB (ctst) into the TARGET Postgres extract DB (colin_extract schema). -- -- REQUIRED DbSchemaCLI variables (replace_variables=true): -- target_corp_num_predicate : SQL predicate restricting the computed target_corp_num (NO trailing semicolon). @@ -20,7 +20,7 @@ -- c.CORP_TYP_CD in ('BC','C','ULC','CUL','CC','CCC','QA','QB','QC','QD','QE') -- c.CORP_TYP_CD in ('BC','C','ULC','CUL','CC','CCC','QA','QB','QC','QD','QE','CP') -- --- Intended to be executed from a master DbSchemaCLI script connected to the target Postgres DB (cprd_pg). +-- Intended to be executed from a master DbSchemaCLI script connected to the target Postgres DB (colin_extract schema). -- -- IMPORTANT: -- - This template intentionally avoids the boolean<->integer ALTER COLUMN hacks used in the full refresh script. @@ -39,7 +39,7 @@ -- vset oracle_corp_num_predicate=c.CORP_NUM in ('1111585','1226175'); -- corporation -transfer public.corporation from cprd using +transfer colin_extract.corporation from ctst using with corp_list as ( select /*+ materialize */ c.corp_num from corporation c @@ -100,7 +100,7 @@ left join last_ar la on la.corp_num = c.corp_num; -- event -transfer public.event from cprd using +transfer colin_extract.event from ctst using with corp_list as ( select /*+ materialize */ c.corp_num from corporation c @@ -139,7 +139,7 @@ where e.event_typ_cd not in ('BNUPD', 'ADDLEDGR'); -- corp_name -transfer public.corp_name from cprd using +transfer colin_extract.corp_name from ctst using with corp_list as ( select /*+ materialize */ c.corp_num from corporation c @@ -176,7 +176,7 @@ join CORP_NAME cn on cn.corp_num = c.corp_num; -- corp_state -transfer public.corp_state from cprd using +transfer colin_extract.corp_state from ctst using with corp_list as ( select /*+ materialize */ c.corp_num from corporation c @@ -214,7 +214,7 @@ join corp_op_state cos on cos.state_typ_cd = cs.state_typ_cd; -- filing -transfer public.filing from cprd using +transfer colin_extract.filing from ctst using with corp_list as ( select /*+ materialize */ c.corp_num from corporation c @@ -267,7 +267,7 @@ join filing f on f.event_id = e.event_id; -- filing_user -transfer public.filing_user from cprd using +transfer colin_extract.filing_user from ctst using with corp_list as ( select /*+ materialize */ c.corp_num from corporation c @@ -308,9 +308,9 @@ join filing_user u on u.event_id = e.event_id; -- address (shared/global table; stage then merge before loading dependents) -TRUNCATE TABLE public.subset_address_stage; +TRUNCATE TABLE colin_extract.subset_address_stage; -transfer public.subset_address_stage from cprd using +transfer colin_extract.subset_address_stage from ctst using with corp_list as ( select /*+ materialize */ c.corp_num from corporation c @@ -394,7 +394,7 @@ from ( join address a on x.mailing_addr_id = a.addr_id ); -INSERT INTO public.address ( +INSERT INTO colin_extract.address ( addr_id, province, country_typ_cd, @@ -422,7 +422,7 @@ FROM ( addr_line_2, addr_line_3, city - FROM public.subset_address_stage + FROM colin_extract.subset_address_stage WHERE addr_id IS NOT NULL ORDER BY addr_id ) s @@ -435,11 +435,11 @@ SET province = EXCLUDED.province, addr_line_3 = EXCLUDED.addr_line_3, city = EXCLUDED.city; -TRUNCATE TABLE public.subset_address_stage; +TRUNCATE TABLE colin_extract.subset_address_stage; -- office -transfer public.office from cprd using +transfer colin_extract.office from ctst using with corp_list as ( select /*+ materialize */ c.corp_num from corporation c @@ -477,7 +477,7 @@ join office o on o.corp_num = c.corp_num; -- corp_comments -transfer public.corp_comments from cprd using +transfer colin_extract.corp_comments from ctst using with corp_list as ( select /*+ materialize */ c.corp_num from corporation c @@ -517,7 +517,7 @@ join corp_comments cc on cc.corp_num = c.corp_num; -- ledger_text -transfer public.ledger_text from cprd using +transfer colin_extract.ledger_text from ctst using with corp_list as ( select /*+ materialize */ c.corp_num from corporation c @@ -554,7 +554,7 @@ join ledger_text l on l.event_id = e.event_id; -- corp_party -transfer public.corp_party from cprd using +transfer colin_extract.corp_party from ctst using with corp_list as ( select /*+ materialize */ c.corp_num from corporation c @@ -603,7 +603,7 @@ join corp_party p on p.corp_num = c.corp_num; -- corp_party_relationship -transfer public.corp_party_relationship from cprd using +transfer colin_extract.corp_party_relationship from ctst using with corp_list as ( select /*+ materialize */ c.corp_num from corporation c @@ -638,7 +638,7 @@ join CORP_PARTY_RELATIONSHIP cpr on cpr.corp_party_id = p.corp_party_id; -- offices_held -transfer public.offices_held from cprd using +transfer colin_extract.offices_held from ctst using with corp_list as ( select /*+ materialize */ c.corp_num from corporation c @@ -673,7 +673,7 @@ join OFFICES_HELD oh on oh.corp_party_id = p.corp_party_id; -- completing_party -transfer public.completing_party from cprd using +transfer colin_extract.completing_party from ctst using with corp_list as ( select /*+ materialize */ c.corp_num from corporation c @@ -712,7 +712,7 @@ join completing_party cp on cp.event_id = e.event_id; -- submitting_party -transfer public.submitting_party from cprd using +transfer colin_extract.submitting_party from ctst using with corp_list as ( select /*+ materialize */ c.corp_num from corporation c @@ -759,7 +759,7 @@ join SUBMITTING_PARTY sp on sp.event_id = e.event_id; -- corp_flag -transfer public.corp_flag from cprd using +transfer colin_extract.corp_flag from ctst using with corp_list as ( select /*+ materialize */ c.corp_num from corporation c @@ -795,7 +795,7 @@ join corp_flag cf on cf.corp_num = c.corp_num; -- cont_out -transfer public.cont_out from cprd using +transfer colin_extract.cont_out from ctst using with corp_list as ( select /*+ materialize */ c.corp_num from corporation c @@ -834,7 +834,7 @@ join CONT_OUT co on co.corp_num = c.corp_num; -- conv_event -transfer public.conv_event from cprd using +transfer colin_extract.conv_event from ctst using with corp_list as ( select /*+ materialize */ c.corp_num from corporation c @@ -879,7 +879,7 @@ join CONV_EVENT ce on ce.event_id = e.event_id; -- conv_ledger -transfer public.conv_ledger from cprd using +transfer colin_extract.conv_ledger from ctst using with corp_list as ( select /*+ materialize */ c.corp_num from corporation c @@ -916,7 +916,7 @@ join CONV_LEDGER cl on cl.event_id = e.event_id; -- corp_involved - amalgamaTING_businesses -transfer public.corp_involved_amalgamating from cprd using +transfer colin_extract.corp_involved_amalgamating from ctst using with corp_list as ( select /*+ materialize */ c.corp_num from corporation c @@ -990,7 +990,7 @@ where f.filing_typ_cd in ('AMALH', 'AMALV', 'AMALR', 'AMLHU', 'AMLVU', 'AMLRU', -- corp_involved - continue_in_historical_xpro -transfer public.corp_involved_cont_in from cprd using +transfer colin_extract.corp_involved_cont_in from ctst using with corp_list as ( select /*+ materialize */ c.corp_num from corporation c @@ -1027,7 +1027,7 @@ where f.filing_typ_cd in ('CONTI', 'CONTU', 'CONTC') -- corp_restriction -transfer public.corp_restriction from cprd using +transfer colin_extract.corp_restriction from ctst using with corp_list as ( select /*+ materialize */ c.corp_num from corporation c @@ -1067,7 +1067,7 @@ join CORP_RESTRICTION cr on cr.corp_num = c.corp_num; -- correction -transfer public.correction from cprd using +transfer colin_extract.correction from ctst using with corp_list as ( select /*+ materialize */ c.corp_num from corporation c @@ -1103,7 +1103,7 @@ join CORRECTION corr on corr.event_id = e.event_id; -- continued_in_from_jurisdiction -transfer public.jurisdiction from cprd using +transfer colin_extract.jurisdiction from ctst using with corp_list as ( select /*+ materialize */ c.corp_num from corporation c @@ -1144,7 +1144,7 @@ join JURISDICTION j on j.corp_num = c.corp_num; -- resolution -transfer public.resolution from cprd using +transfer colin_extract.resolution from ctst using with corp_list as ( select /*+ materialize */ c.corp_num from corporation c @@ -1181,7 +1181,7 @@ join RESOLUTION r on r.corp_num = c.corp_num; -- share_struct -transfer public.share_struct from cprd using +transfer colin_extract.share_struct from ctst using with corp_list as ( select /*+ materialize */ c.corp_num from corporation c @@ -1216,7 +1216,7 @@ join SHARE_STRUCT ss on ss.corp_num = c.corp_num; -- share_struct_cls -transfer public.share_struct_cls from cprd using +transfer colin_extract.share_struct_cls from ctst using with corp_list as ( select /*+ materialize */ c.corp_num from corporation c @@ -1271,7 +1271,7 @@ join SHARE_STRUCT_CLS ssc on ssc.corp_num = c.corp_num; -- share_series -transfer public.share_series from cprd using +transfer colin_extract.share_series from ctst using with corp_list as ( select /*+ materialize */ c.corp_num from corporation c @@ -1319,7 +1319,7 @@ join SHARE_SERIES ss on ss.corp_num = c.corp_num; -- notification -transfer public.notification from cprd using +transfer colin_extract.notification from ctst using with corp_list as ( select /*+ materialize */ c.corp_num from corporation c @@ -1361,7 +1361,7 @@ join NOTIFICATION n on n.event_id = e.event_id; -- notification_resend -transfer public.notification_resend from cprd using +transfer colin_extract.notification_resend from ctst using with corp_list as ( select /*+ materialize */ c.corp_num from corporation c @@ -1403,7 +1403,7 @@ join NOTIFICATION_RESEND nr on nr.event_id = e.event_id; -- party_notification -transfer public.party_notification from cprd using +transfer colin_extract.party_notification from ctst using with corp_list as ( select /*+ materialize */ c.corp_num from corporation c @@ -1446,7 +1446,7 @@ join PARTY_NOTIFICATION pn on pn.party_id = cp.corp_party_id; -- payment -transfer public.payment from cprd using +transfer colin_extract.payment from ctst using with corp_list as ( select /*+ materialize */ c.corp_num from corporation c diff --git a/data-tool/scripts/generate_cprd_subset_extract.py b/data-tool/scripts/generate_cprd_subset_extract.py index d74dcd9e76..42f04c6c45 100644 --- a/data-tool/scripts/generate_cprd_subset_extract.py +++ b/data-tool/scripts/generate_cprd_subset_extract.py @@ -490,6 +490,7 @@ def chunk_plan_chunks( # ========================= def gen_write_text(path: Path, text: str) -> None: + path.parent.mkdir(parents=True, exist_ok=True) path.write_text(text, encoding="utf-8") From b43b5b4adaf2588ec1e9491e9ba5e626fa77c7dc Mon Sep 17 00:00:00 2001 From: ketaki-deodhar Date: Wed, 17 Jun 2026 11:40:58 -0700 Subject: [PATCH 03/10] 31328 - initial commit 3 --- data-tool/scripts/_generated/subset_load.sql | 4 ++-- data-tool/scripts/generate_cprd_subset_extract.py | 14 ++++++++++---- 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/data-tool/scripts/_generated/subset_load.sql b/data-tool/scripts/_generated/subset_load.sql index f8ba3c891e..20336dfe31 100644 --- a/data-tool/scripts/_generated/subset_load.sql +++ b/data-tool/scripts/_generated/subset_load.sql @@ -4,7 +4,7 @@ vset cli.settings.transfer_threads=4 vset format.date=YYYY-MM-dd'T'hh:mm:ss'Z' vset format.timestamp=YYYY-MM-dd'T'hh:mm:ss'Z' -connect cprd_pg; +connect ctst_pg_subset; -- Serialize subset runs on this target DB. execute /home/kdeodhar/repos/lear/data-tool/scripts/subset/subset_pg_acquire_advisory_lock.sql @@ -12,7 +12,7 @@ execute /home/kdeodhar/repos/lear/data-tool/scripts/subset/subset_pg_acquire_adv execute /home/kdeodhar/repos/lear/data-tool/scripts/_generated/subset_load_chunks/support/subset_pg_prepare_address_stage.sql learn schema colin_extract; -truncate table public.colin_extract_version; insert into public.colin_extract_version (extracted_at) values (current_timestamp); +truncate table colin_extract.colin_extract_version; insert into colin_extract.colin_extract_version (extracted_at) values (current_timestamp); -- Postgres fast-load mode (session-level settings) execute /home/kdeodhar/repos/lear/data-tool/scripts/subset/subset_pg_fastload_begin.sql diff --git a/data-tool/scripts/generate_cprd_subset_extract.py b/data-tool/scripts/generate_cprd_subset_extract.py index 42f04c6c45..11a0b84634 100644 --- a/data-tool/scripts/generate_cprd_subset_extract.py +++ b/data-tool/scripts/generate_cprd_subset_extract.py @@ -775,8 +775,8 @@ def gen_build_master_script_inline( lines.append(f"learn schema {cfg.target_schema};") lines.append("") - lines.append("truncate table public.colin_extract_version; " - "insert into public.colin_extract_version (extracted_at) values (current_timestamp); " + lines.append(f"truncate table {cfg.target_schema}.colin_extract_version; " + f"insert into {cfg.target_schema}.colin_extract_version (extracted_at) values (current_timestamp); " ) lines.append("") @@ -879,8 +879,8 @@ def gen_build_master_script_vset( lines.append(f"learn schema {cfg.target_schema};") lines.append("") - lines.append("truncate table public.colin_extract_version; " - "insert into public.colin_extract_version (extracted_at) values (current_timestamp); " + lines.append(f"truncate table {cfg.target_schema}.colin_extract_version; " + f"insert into {cfg.target_schema}.colin_extract_version (extracted_at) values (current_timestamp); " ) lines.append("") @@ -1038,6 +1038,12 @@ def cli_parse_args(argv: List[str] | None = None) -> argparse.Namespace: action="store_true", help="If set, any all-numeric corp id lines will be normalized to BC for the TARGET/Postgres corp_num.", ) + parser.add_argument( + "--include-cars", + dest="include_cars", + action="store_true", + help="Include global cars* refresh step (carsfile/carsbox/carsrept/carindiv; generator default).", + ) parser.add_argument( "--no-cars", dest="include_cars", From 151b1ad13e278673cd3d7713e3d901a338039b98 Mon Sep 17 00:00:00 2001 From: ketaki-deodhar Date: Fri, 19 Jun 2026 13:26:29 -0700 Subject: [PATCH 04/10] 31328 - initial commit 4 --- .../scripts/generate_cprd_subset_extract.py | 74 ++++++++++++------- 1 file changed, 48 insertions(+), 26 deletions(-) diff --git a/data-tool/scripts/generate_cprd_subset_extract.py b/data-tool/scripts/generate_cprd_subset_extract.py index 11a0b84634..ff949861a7 100644 --- a/data-tool/scripts/generate_cprd_subset_extract.py +++ b/data-tool/scripts/generate_cprd_subset_extract.py @@ -30,9 +30,8 @@ from __future__ import annotations import argparse -import os import re -from dataclasses import dataclass,replace +from dataclasses import dataclass, replace from enum import Enum from pathlib import Path from typing import Dict, Iterable, List, Sequence @@ -312,30 +311,37 @@ def tmpl_default_bundle(repo_root: Path) -> tmpl_TemplateBundle: pg_prepare_address_stage = tmpl_TemplateSpec( name="subset_pg_prepare_address_stage", path=subset_dir / "subset_pg_prepare_address_stage.sql", + required_tokens=(TMPL_TOKEN_TARGET_SCHEMA,), ) pg_cleanup_address_stage = tmpl_TemplateSpec( name="subset_pg_cleanup_address_stage", path=subset_dir / "subset_pg_cleanup_address_stage.sql", + required_tokens=(TMPL_TOKEN_TARGET_SCHEMA,), ) pg_cleanup_orphan_children = tmpl_TemplateSpec( name="subset_pg_cleanup_orphan_children", path=subset_dir / "subset_pg_cleanup_orphan_children.sql", + required_tokens=(TMPL_TOKEN_TARGET_SCHEMA,), ) disable_triggers = tmpl_TemplateSpec( name="subset_disable_triggers", path=subset_dir / "subset_disable_triggers.sql", + required_tokens=(TMPL_TOKEN_TARGET_SCHEMA,), ) enable_triggers = tmpl_TemplateSpec( name="subset_enable_triggers", path=subset_dir / "subset_enable_triggers.sql", + required_tokens=(TMPL_TOKEN_TARGET_SCHEMA,), ) pg_boolean_casts = tmpl_TemplateSpec( name="subset_pg_boolean_casts", path=subset_dir / "subset_pg_boolean_casts.sql", + required_tokens=(TMPL_TOKEN_TARGET_SCHEMA,), ) pg_fastload_begin = tmpl_TemplateSpec( name="subset_pg_fastload_begin", path=subset_dir / "subset_pg_fastload_begin.sql", + required_tokens=(TMPL_TOKEN_TARGET_SCHEMA,), ) pg_fastload_end = tmpl_TemplateSpec( name="subset_pg_fastload_end", @@ -344,24 +350,33 @@ def tmpl_default_bundle(repo_root: Path) -> tmpl_TemplateBundle: pg_purge_bcomps_excluded = tmpl_TemplateSpec( name="subset_pg_purge_bcomps_excluded", path=subset_dir / "subset_pg_purge_bcomps_excluded.sql", + required_tokens=(TMPL_TOKEN_TARGET_SCHEMA,), ) delete_chunk = tmpl_TemplateSpec( name="subset_delete_chunk", path=subset_dir / "subset_delete_chunk.sql", - required_tokens=(TMPL_TOKEN_CORP_IDS,), + required_tokens=(TMPL_TOKEN_CORP_IDS, TMPL_TOKEN_TARGET_SCHEMA), ) transfer_chunk = tmpl_TemplateSpec( name="subset_transfer_chunk", path=subset_dir / "subset_transfer_chunk.sql", - required_tokens=(TMPL_TOKEN_TARGET_PRED, TMPL_TOKEN_ORACLE_PRED, TMPL_TOKEN_ORACLE_CORP_TYPE_PRED), + required_tokens=( + TMPL_TOKEN_TARGET_PRED, + TMPL_TOKEN_ORACLE_PRED, + TMPL_TOKEN_ORACLE_CORP_TYPE_PRED, + TMPL_TOKEN_SOURCE_CONNECTION, + TMPL_TOKEN_TARGET_SCHEMA, + ), ) delete_cars = tmpl_TemplateSpec( name="subset_delete_cars", path=subset_dir / "subset_delete_cars.sql", + required_tokens=(TMPL_TOKEN_TARGET_SCHEMA,), ) transfer_cars = tmpl_TemplateSpec( name="subset_transfer_cars", path=subset_dir / "subset_transfer_cars.sql", + required_tokens=(TMPL_TOKEN_SOURCE_CONNECTION, TMPL_TOKEN_TARGET_SCHEMA), ) return tmpl_TemplateBundle( @@ -626,7 +641,9 @@ def gen_build_chunk_sql( rendered_transfer = tmpl_render(transfer_template_text, replacements=replacements) if (TMPL_TOKEN_TARGET_PRED in rendered_transfer or TMPL_TOKEN_ORACLE_PRED in rendered_transfer or - TMPL_TOKEN_ORACLE_CORP_TYPE_PRED in rendered_transfer): + TMPL_TOKEN_ORACLE_CORP_TYPE_PRED in rendered_transfer or + TMPL_TOKEN_SOURCE_CONNECTION in rendered_transfer or + TMPL_TOKEN_TARGET_SCHEMA in rendered_transfer): raise SystemExit( f"Internal error: token(s) remained after rendering transfer template for chunk {chunk.index:03d}." ) @@ -690,9 +707,12 @@ def _gen_emit_pg_disable_begin(lines: List[str], *, cfg: cfg_GenerationConfig, t lines.append(f"execute {templates.disable_triggers.path.as_posix()}") if cfg.mode == cfg_GenerationMode.REFRESH: lines.append("-- Refresh-only: preserved processing/tracking tables still reference corporation/event rows.") - lines.append("ALTER TABLE corp_processing DISABLE TRIGGER ALL;") - # lines.append("ALTER TABLE auth_processing DISABLE TRIGGER ALL;") - lines.append("ALTER TABLE colin_tracking DISABLE TRIGGER ALL;") + lines.append(f"ALTER TABLE {cfg.target_schema}.corp_processing DISABLE TRIGGER ALL;") + lines.append("-- Deferred: DDL/docs identify auth_processing and affiliation_processing as preserved FK-owning") + lines.append("-- tables too, but auth_processing was introduced here as commented-out with no repo-history") + lines.append("-- rationale. Do not extend table-trigger suppression until Cloud SQL privilege/runtime") + lines.append("-- validation confirms the intended preserved-table set.") + lines.append(f"ALTER TABLE {cfg.target_schema}.colin_tracking DISABLE TRIGGER ALL;") lines.append("") return @@ -710,9 +730,9 @@ def _gen_emit_pg_disable_end(lines: List[str], *, cfg: cfg_GenerationConfig, tem lines.append(f"execute {templates.enable_triggers.path.as_posix()}") if cfg.mode == cfg_GenerationMode.REFRESH: lines.append("-- Refresh-only: restore preserved processing/tracking table triggers too.") - lines.append("ALTER TABLE corp_processing ENABLE TRIGGER ALL;") - # lines.append("ALTER TABLE auth_processing ENABLE TRIGGER ALL;") - lines.append("ALTER TABLE colin_tracking ENABLE TRIGGER ALL;") + lines.append(f"ALTER TABLE {cfg.target_schema}.corp_processing ENABLE TRIGGER ALL;") + lines.append("-- Deferred: see matching disable-side note for auth_processing/affiliation_processing.") + lines.append(f"ALTER TABLE {cfg.target_schema}.colin_tracking ENABLE TRIGGER ALL;") lines.append("") return @@ -902,10 +922,6 @@ def gen_build_master_script_vset( _gen_emit_pg_disable_begin(lines, cfg=cfg, templates=templates) _gen_emit_refresh_fk_note(lines, cfg=cfg) - if cfg.mode == cfg_GenerationMode.REFRESH: - lines.append("-- Cleanup stale orphan child rows before chunked refresh deletes.") - lines.append(f"execute {templates.pg_cleanup_orphan_children.path.as_posix()}") - lines.append("") if cfg.include_cars: lines.append("-- global cars* refresh (not corp-scoped; full dataset truncate + reload)") @@ -1135,16 +1151,19 @@ def cfg_build_config(args: argparse.Namespace) -> cfg_GenerationConfig: ) out_master.parent.mkdir(parents=True, exist_ok=True) - # Chunk scripts dir is always derived from master output stem for determinism. + # Chunk/support scripts dir is always derived from master output stem for determinism. out_chunks_dir = out_master.parent / f"{out_master.stem}_chunks" - if render_mode == cfg_RenderMode.INLINE: - out_chunks_dir.mkdir(parents=True, exist_ok=True) + out_chunks_dir.mkdir(parents=True, exist_ok=True) if args.chunk_size <= 0: raise SystemExit("--chunk-size must be > 0") if args.threads <= 0: raise SystemExit("--threads must be > 0") + source_connection = cfg_validate_dbschema_identifier("--source-connection", str(args.source_connection)) + target_connection = cfg_validate_dbschema_identifier("--target-connection", str(args.target_connection)) + target_schema = cfg_validate_pg_schema_identifier("--target-schema", str(args.target_schema)) + return cfg_GenerationConfig( repo_root=repo_root, corp_file=corp_file, @@ -1162,9 +1181,9 @@ def cfg_build_config(args: argparse.Namespace) -> cfg_GenerationConfig: or_of_in_max_ids=or_of_in_max_ids, out_master=out_master, out_chunks_dir=out_chunks_dir, - source_connection=str(args.source_connection), - target_connection=str(args.target_connection), - target_schema=str(args.target_schema), + source_connection=source_connection, + target_connection=target_connection, + target_schema=target_schema, ) @@ -1177,9 +1196,6 @@ def _effective_oracle_strategy(cfg: cfg_GenerationConfig, total_ids: int) -> cfg def run(cfg: cfg_GenerationConfig) -> int: templates = tmpl_default_bundle(cfg.repo_root) templates = gen_write_rendered_connection_templates(cfg=cfg, templates=templates) - - DBSCHEMA_SOURCE_CONNECTION = os.getenv('DBSCHEMA_SOURCE_CONNECTION', '') - DBSCHEMA_TARGET_SCHEMA = os.getenv('DBSCHEMA_TARGET_SCHEMA', '') if cfg.pg_debug_session_probes and cfg.render_mode != cfg_RenderMode.INLINE: raise SystemExit("--pg-debug-session-probes currently supports only --render-mode inline.") @@ -1303,6 +1319,9 @@ def run(cfg: cfg_GenerationConfig) -> int: print(" - Corp ids in the file should match the TARGET Postgres extract corp_num format (e.g. BC0460007).") print(" - If you have numeric-only corp ids, consider --prefix-numeric-bc.") print(f" - corp ids: {n_ids} => ceil({n_ids}/{cfg.chunk_size}) = {in_groups} chunk(s)") + print(f" - Source DbSchema connection: {cfg.source_connection}") + print(f" - Target DbSchema connection: {cfg.target_connection}") + print(f" - Target schema: {cfg.target_schema}") print(f" - Oracle IN-list handling: {effective_strategy.value} (configured: {cfg.oracle_in_strategy.value})") print(f" - chunk-size (max items per IN list): {cfg.chunk_size}") if effective_strategy == cfg_OracleInStrategy.CHUNK_FILES: @@ -1322,7 +1341,7 @@ def run(cfg: cfg_GenerationConfig) -> int: print(f" - Postgres fast-load session settings: {'ENABLED' if cfg.pg_fastload else 'disabled'} (--pg-fastload)") print(f" - Postgres trigger suppression: {cfg.pg_disable_method.value} (--pg-disable-method)") print(" - subset runs acquire a session-level advisory lock on the target DB to prevent overlap.") - print(" - Address loads use the predeclared helper table public.subset_address_stage and merge into public.address by addr_id.") + print(f" - Address loads use the predeclared helper table {cfg.target_schema}.subset_address_stage and merge into {cfg.target_schema}.address by addr_id.") print(" - BCOMPS purge keysets also use predeclared helper tables in the extract schema (subset_excluded_*).") print(" - subset runs should not overlap on the same target DB, and the runtime role must be able to truncate/read/write those helper tables.") if cfg.pg_debug_session_probes: @@ -1361,6 +1380,9 @@ def run(cfg: cfg_GenerationConfig) -> int: print("Notes:") print(" - This script relies on DbSchemaCLI vset variables and runtime substitution.") print(f" - corp ids: {n_ids} => ceil({n_ids}/{cfg.chunk_size}) = {in_groups} chunk(s)") + print(f" - Source DbSchema connection: {cfg.source_connection}") + print(f" - Target DbSchema connection: {cfg.target_connection}") + print(f" - Target schema: {cfg.target_schema}") print(f" - Oracle IN-list handling: {effective_strategy.value} (configured: {cfg.oracle_in_strategy.value})") print(f" - chunk-size (max items per IN list): {cfg.chunk_size}") if effective_strategy == cfg_OracleInStrategy.CHUNK_FILES: @@ -1376,7 +1398,7 @@ def run(cfg: cfg_GenerationConfig) -> int: print(f" - Postgres fast-load session settings: {'ENABLED' if cfg.pg_fastload else 'disabled'} (--pg-fastload)") print(f" - Postgres trigger suppression: {cfg.pg_disable_method.value} (--pg-disable-method)") print(" - subset runs acquire a session-level advisory lock on the target DB to prevent overlap.") - print(" - Address loads use the predeclared helper table public.subset_address_stage and merge into public.address by addr_id.") + print(f" - Address loads use the predeclared helper table {cfg.target_schema}.subset_address_stage and merge into {cfg.target_schema}.address by addr_id.") print(" - BCOMPS purge keysets also use predeclared helper tables in the extract schema (subset_excluded_*).") print(" - subset runs should not overlap on the same target DB, and the runtime role must be able to truncate/read/write those helper tables.") if cfg.pg_debug_session_probes: From 16ad839625bd1ddfb7ae9092f044e2879071bdfe Mon Sep 17 00:00:00 2001 From: ketaki-deodhar Date: Wed, 24 Jun 2026 15:15:10 -0700 Subject: [PATCH 05/10] 31328 - refresh extract flow updates --- .../flows/refresh_extract_subset_flow.py | 139 +++++++++++++++++- 1 file changed, 135 insertions(+), 4 deletions(-) diff --git a/data-tool/flows/refresh_extract_subset_flow.py b/data-tool/flows/refresh_extract_subset_flow.py index c3eedc8d6b..e5ce9e2b6a 100644 --- a/data-tool/flows/refresh_extract_subset_flow.py +++ b/data-tool/flows/refresh_extract_subset_flow.py @@ -1,6 +1,8 @@ import argparse import os +from dataclasses import dataclass from pathlib import Path +from enum import Enum import re import subprocess import sys @@ -26,6 +28,122 @@ _BUILD_VIEWS_SCRIPT = _REPO_ROOT / 'data-tool' / 'scripts' / 'colin_corps_extract_postgres_views_ddl' +# ========================= +# cfg_* (types & config) +# ========================= + +class cfg_GenerationMode(str, Enum): + REFRESH = "refresh" # delete + reload + LOAD = "load" # load only + + +class cfg_RenderMode(str, Enum): + INLINE = "inline" # render templates into chunk files (no vset) + VSET = "vset" # legacy behavior (runtime vset substitution) + + +class cfg_OracleInStrategy(str, Enum): + AUTO = "auto" + CHUNK_FILES = "chunk_files" + OR_OF_IN_LISTS = "or_of_in_lists" + + +class cfg_PgDisableMethod(str, Enum): + TABLE_TRIGGERS = "table_triggers" # ALTER TABLE ... DISABLE/ENABLE TRIGGER ALL (default) + REPLICA_ROLE = "replica_role" # SET session_replication_role=replica|origin (superuser only) + +@dataclass +class SubsetConfig: + corp_file: Path + mode: cfg_GenerationMode + delta_scope: str = 'batch' + chunk_size: int + threads: int + prefix_numeric_bc: bool + include_cp: bool + + pg_fastload: bool + pg_disable_method: cfg_PgDisableMethod + + out_master: Path + run_dbschemacli: bool + dbschemacli_cmd: str + + refresh_views: bool + + reset_extract_postgres: bool + + source_connection: str + target_connection: str + target_schema: str + + + +# @dataclass +# class ExtractSubsetConfig: +# """Configuration for Extract-Subset-Flow execution.""" +# corp_file: str +# mode: str = 'refresh' +# delta_scope: str = 'batch' +# chunk_size: int = 900 +# threads: int = 4 +# pg_fastload: bool = False +# include_cp: bool = False +# pg_disable_method: str = 'table_triggers' +# out: str | None = None +# run_dbschemacli: bool = False +# refresh_views: bool = True +# dbschemacli_cmd: str = 'dbschemacli' +# reset_extract_postgres: bool = True +# source_connection: str = 'ctst' +# target_connection: str = _DEFAULT_TARGET_CONNECTION +# target_schema: str = 'public' + +# @classmethod +# def from_args(cls, args: argparse.Namespace) -> 'ExtractSubsetConfig': +# """Create config from parsed arguments.""" +# return cls( +# corp_file=args.corp_file, +# mode=args.mode, +# delta_scope=args.delta_scope, +# chunk_size=args.chunk_size, +# threads=args.threads, +# pg_fastload=args.pg_fastload, +# include_cp=args.include_cp, +# pg_disable_method=args.pg_disable_method, +# out=args.out, +# run_dbschemacli=args.run_dbschemacli, +# refresh_views=args.refresh_views, +# dbschemacli_cmd=args.dbschemacli_cmd, +# reset_extract_postgres=args.reset_extract_postgres, +# source_connection=args.source_connection, +# target_connection=args.target_connection, +# target_schema=args.target_schema, +# ) + +def build_configs(args: argparse.Namespace) -> SubsetConfig: + """Build SubsetConfig from parsed arguments.""" + return SubsetConfig( + corp_file=Path(args.corp_file).expanduser().resolve(), + mode=cfg_GenerationMode(args.mode), + delta_scope=args.delta_scope, + chunk_size=args.chunk_size, + threads=args.threads, + prefix_numeric_bc=(args.mode == 'refresh'), + include_cp=args.include_cp, + pg_fastload=args.pg_fastload, + pg_disable_method=cfg_PgDisableMethod(args.pg_disable_method), + out_master=_resolve_master_script_path(args.out), + run_dbschemacli=args.run_dbschemacli, + dbschemacli_cmd=args.dbschemacli_cmd, + refresh_views=args.refresh_views, + reset_extract_postgres=args.reset_extract_postgres, + source_connection=args.source_connection, + target_connection=args.target_connection, + target_schema=args.target_schema + ) + + def _resolve_master_script_path(out: str | None) -> Path: if not out: return _SUBSET.resolve() @@ -334,9 +452,9 @@ def extract_pull_flow( prune_identifiers = get_fallen_identifiers(updated_corp_nums) prune_fallen_identifiers(prune_identifiers) -if __name__ == '__main__': +def build_arg_parser() -> argparse.ArgumentParser: p = argparse.ArgumentParser(description='Run Extract-Pull flow....') - p.add_argument('--corp_file', default='../data-tool/scripts/generated/delta_ctst.txt', help='Path to newline-delimited corp identifiers') + p.add_argument('--corp_file', default='../data-tool/scripts/generated/corp_ids_ctst.txt', help='Path to newline-delimited corp identifiers') p.add_argument('--mode', default='refresh', choices=('refresh', 'load')) p.add_argument('--delta-scope', default='batch', choices=('batch', 'full')) p.add_argument('--chunk-size', type=int, default=900, help='Max items per IN list.') @@ -349,5 +467,18 @@ def extract_pull_flow( p.add_argument('--refresh-views', action='store_false') p.add_argument('--dbschemacli-cmd', default='dbschemacli') p.add_argument('--reset-extract-postgres', action='store_false') - p.add_argument('--target-connection', default=_DEFAULT_TARGET_CONNECTION) - extract_pull_flow(**vars(p.parse_args())) + p.add_argument('--source-connection', default='ctst') + p.add_argument('--target-connection', default='ctst_pg') + p.add_argument('--target-schema', default='public') + return p + + +def main(argv: list[str] | None = None) -> int: + args = build_arg_parser().parse_args(argv) + config = build_configs(args) + extract_pull_flow(**vars(config)) + return 0 + + +if __name__ == '__main__': + raise SystemExit(main()) From fac932f9fc1679e2b8b7ffcef227f0f9caf40eb5 Mon Sep 17 00:00:00 2001 From: ketaki-deodhar Date: Mon, 29 Jun 2026 07:14:14 -0700 Subject: [PATCH 06/10] 31328 - refresh extract flow and related updates --- data-tool/flows/common/colin_queries.py | 12 +- data-tool/flows/common/query_utils.py | 6 +- .../flows/refresh_extract_subset_flow.py | 251 +++++++++++++----- 3 files changed, 200 insertions(+), 69 deletions(-) diff --git a/data-tool/flows/common/colin_queries.py b/data-tool/flows/common/colin_queries.py index 221b2280a1..f2426e9f5b 100644 --- a/data-tool/flows/common/colin_queries.py +++ b/data-tool/flows/common/colin_queries.py @@ -76,13 +76,15 @@ def get_identifiers_per_batch(mig_batch_id: int) -> str: WHERE mcb.mig_batch_id IN ({mig_batch_id}) """ -def unfreeze_identifiers() -> str: +def unfreeze_identifiers(target_schema: str | None = 'public') -> str: + safe_schema = (target_schema or 'public').strip() or 'public' + safe_schema = '"' + safe_schema.replace('"', '""') + '"' return f""" - UPDATE corporation AS c + UPDATE {safe_schema}.corporation AS c SET corp_frozen_type_cd = NULL - FROM mig_group AS mg - JOIN mig_batch AS mb ON mb.mig_group_id = mg.id - JOIN mig_corp_batch AS mcb ON mcb.mig_batch_id = mb.id + FROM {safe_schema}.mig_group AS mg + JOIN {safe_schema}.mig_batch AS mb ON mb.mig_group_id = mg.id + JOIN {safe_schema}.mig_corp_batch AS mcb ON mcb.mig_batch_id = mb.id WHERE c.corp_num = mcb.corp_num -- cprd and mg.name in ('group_0', 'group_1', 'group_3', 'group_4','gcp_migration_group_test','misc_group') diff --git a/data-tool/flows/common/query_utils.py b/data-tool/flows/common/query_utils.py index bb04fa378c..aa9ce1596f 100644 --- a/data-tool/flows/common/query_utils.py +++ b/data-tool/flows/common/query_utils.py @@ -108,7 +108,9 @@ def prune_candidates_from_account(pruning_corps_list: list) -> str: WHERE corp_num IN {in_list} """ -def get_cutoff_timestamp_query() -> str: +def get_cutoff_timestamp_query(target_schema: str | None = 'public') -> str: + safe_schema = (target_schema or 'public').strip() or 'public' + safe_schema = '"' + safe_schema.replace('"', '""') + '"' return f""" - SELECT extracted_at FROM colin_extract_version + SELECT extracted_at FROM {safe_schema}.colin_extract_version """ diff --git a/data-tool/flows/refresh_extract_subset_flow.py b/data-tool/flows/refresh_extract_subset_flow.py index e5ce9e2b6a..07f06e83be 100644 --- a/data-tool/flows/refresh_extract_subset_flow.py +++ b/data-tool/flows/refresh_extract_subset_flow.py @@ -52,11 +52,15 @@ class cfg_PgDisableMethod(str, Enum): TABLE_TRIGGERS = "table_triggers" # ALTER TABLE ... DISABLE/ENABLE TRIGGER ALL (default) REPLICA_ROLE = "replica_role" # SET session_replication_role=replica|origin (superuser only) + +class cfg_DeltaScope(str, Enum): + BATCH = "batch" # only process identifiers in the current batch + FULL = "full" # process all identifiers in the corp list + @dataclass class SubsetConfig: corp_file: Path mode: cfg_GenerationMode - delta_scope: str = 'batch' chunk_size: int threads: int prefix_numeric_bc: bool @@ -71,62 +75,20 @@ class SubsetConfig: refresh_views: bool - reset_extract_postgres: bool - source_connection: str target_connection: str target_schema: str + reset_extract_postgres: bool = True + delta_scope: cfg_DeltaScope = cfg_DeltaScope.BATCH - -# @dataclass -# class ExtractSubsetConfig: -# """Configuration for Extract-Subset-Flow execution.""" -# corp_file: str -# mode: str = 'refresh' -# delta_scope: str = 'batch' -# chunk_size: int = 900 -# threads: int = 4 -# pg_fastload: bool = False -# include_cp: bool = False -# pg_disable_method: str = 'table_triggers' -# out: str | None = None -# run_dbschemacli: bool = False -# refresh_views: bool = True -# dbschemacli_cmd: str = 'dbschemacli' -# reset_extract_postgres: bool = True -# source_connection: str = 'ctst' -# target_connection: str = _DEFAULT_TARGET_CONNECTION -# target_schema: str = 'public' - -# @classmethod -# def from_args(cls, args: argparse.Namespace) -> 'ExtractSubsetConfig': -# """Create config from parsed arguments.""" -# return cls( -# corp_file=args.corp_file, -# mode=args.mode, -# delta_scope=args.delta_scope, -# chunk_size=args.chunk_size, -# threads=args.threads, -# pg_fastload=args.pg_fastload, -# include_cp=args.include_cp, -# pg_disable_method=args.pg_disable_method, -# out=args.out, -# run_dbschemacli=args.run_dbschemacli, -# refresh_views=args.refresh_views, -# dbschemacli_cmd=args.dbschemacli_cmd, -# reset_extract_postgres=args.reset_extract_postgres, -# source_connection=args.source_connection, -# target_connection=args.target_connection, -# target_schema=args.target_schema, -# ) def build_configs(args: argparse.Namespace) -> SubsetConfig: """Build SubsetConfig from parsed arguments.""" return SubsetConfig( corp_file=Path(args.corp_file).expanduser().resolve(), mode=cfg_GenerationMode(args.mode), - delta_scope=args.delta_scope, + delta_scope=cfg_DeltaScope(args.delta_scope), chunk_size=args.chunk_size, threads=args.threads, prefix_numeric_bc=(args.mode == 'refresh'), @@ -165,14 +127,24 @@ def require_file(path: str | Path, description: str) -> Path: return resolved -def _reset_extract_postgres_db() -> None: +def _normalize_target_schema(target_schema: str | None, default_schema: str = 'public') -> str: + normalized = (target_schema or default_schema).strip() + return normalized or default_schema + + +def _reset_extract_postgres_db(target_schema: str | None = 'public') -> None: cfg = get_named_config() dbname = cfg.DB_NAME_COLIN_MIGR host = cfg.DB_HOST_COLIN_MIGR port = str(cfg.DB_PORT_COLIN_MIGR) user = cfg.DB_USER_COLIN_MIGR password = cfg.DB_PASSWORD_COLIN_MIGR - + + target_schema = _normalize_target_schema(target_schema) + safe_schema = '"' + target_schema.replace('"', '""') + '"' + search_path_sql = f'SET search_path TO {safe_schema};' + create_schema_sql = f'CREATE SCHEMA IF NOT EXISTS {safe_schema};' + require_file(_DEFAULT_DDL, 'Extract DDL File') pg_flags = ['-h', host, '-p', str(port), '-U', user] @@ -188,8 +160,37 @@ def _reset_extract_postgres_db() -> None: _run_cmd(['psql', *pg_flags, '-d', 'postgres', '-c', terminate_sql ], env=run_env) _run_cmd(['dropdb', *pg_flags, '--maintenance-db=postgres', '--if-exists', dbname ], env=run_env) _run_cmd(['createdb', *pg_flags, '--maintenance-db=postgres', '-T', 'template0', dbname ], env=run_env) - _run_cmd(['psql', *pg_flags, '-d', dbname, '-v', 'ON_ERROR_STOP=1', '-f', str(_DEFAULT_DDL) ], env=run_env) - _run_cmd(['psql', *pg_flags, '-d', dbname, '-v', 'ON_ERROR_STOP=1', '-f', str(_BUILD_VIEWS_SCRIPT) ], env=run_env) + _run_cmd(['psql', *pg_flags, '-d', dbname, '-v', 'ON_ERROR_STOP=1', '-c', create_schema_sql], env=run_env) + _run_cmd(['psql', *pg_flags, '-d', dbname, '-v', 'ON_ERROR_STOP=1', '-c', search_path_sql, '-f', str(_DEFAULT_DDL)], env=run_env) + _run_cmd(['psql', *pg_flags, '-d', dbname, '-v', 'ON_ERROR_STOP=1', '-c', search_path_sql, '-f', str(_BUILD_VIEWS_SCRIPT)], env=run_env) + + + +# def _reset_extract_postgres_db() -> None: +# cfg = get_named_config() +# dbname = cfg.DB_NAME_COLIN_MIGR +# host = cfg.DB_HOST_COLIN_MIGR +# port = str(cfg.DB_PORT_COLIN_MIGR) +# user = cfg.DB_USER_COLIN_MIGR +# password = cfg.DB_PASSWORD_COLIN_MIGR + +# require_file(_DEFAULT_DDL, 'Extract DDL File') + +# pg_flags = ['-h', host, '-p', str(port), '-U', user] +# run_env = dict(os.environ) +# if password and 'PGPASSWORD' not in run_env: +# run_env['PGPASSWORD'] = password +# safe_db = str(dbname).replace("'", "''") +# terminate_sql = ( +# "SELECT pg_terminate_backend(pg_stat_activity.pid) " +# "FROM pg_stat_activity " +# f"WHERE datname = '{safe_db}' AND pid <> pg_backend_pid();" +# ) +# _run_cmd(['psql', *pg_flags, '-d', 'postgres', '-c', terminate_sql ], env=run_env) +# _run_cmd(['dropdb', *pg_flags, '--maintenance-db=postgres', '--if-exists', dbname ], env=run_env) +# _run_cmd(['createdb', *pg_flags, '--maintenance-db=postgres', '-T', 'template0', dbname ], env=run_env) +# _run_cmd(['psql', *pg_flags, '-d', dbname, '-v', 'ON_ERROR_STOP=1', '-f', str(_DEFAULT_DDL) ], env=run_env) +# _run_cmd(['psql', *pg_flags, '-d', dbname, '-v', 'ON_ERROR_STOP=1', '-f', str(_BUILD_VIEWS_SCRIPT) ], env=run_env) @task(name='Get-Fallen-Out-Identifiers', cache_policy=NO_CACHE) def get_fallen_identifiers(updated_corp_nums: list) -> list[dict]: @@ -224,25 +225,28 @@ def prune_fallen_identifiers(fallenout_corp_nums: list) -> list[dict]: prune_account = conn.execute(text(account_query)) print(f"Pruned corp_processing={prune_cp.rowcount}, mig_corp_batch={prune_batch.rowcount}, mig_corp_account={prune_account.rowcount}") -def get_cuttoff_timestamp() -> datetime: +def get_cuttoff_timestamp(target_schema: str | None = 'public') -> datetime: cfg = get_named_config() - cuttoff_timestamp = get_cutoff_timestamp_query() + cuttoff_timestamp = get_cutoff_timestamp_query(target_schema) with create_engine(cfg.SQLALCHEMY_DATABASE_URI_COLIN_MIGR).begin() as conn: cuttoff_timestamp_result = conn.execute(text(cuttoff_timestamp)).scalar() print(f"cuttoff timestamp is {cuttoff_timestamp_result}") return cuttoff_timestamp_result +# @task(name='Cleanup-Extract-Postgres', cache_policy=NO_CACHE) +# def cleanup_extract_postgres_db() -> None: +# _reset_extract_postgres_db() @task(name='Cleanup-Extract-Postgres', cache_policy=NO_CACHE) -def cleanup_extract_postgres_db() -> None: - _reset_extract_postgres_db() +def cleanup_extract_postgres_db(target_schema: str | None = 'public') -> None: + _reset_extract_postgres_db(target_schema=target_schema) @task(name='Unfreeze-Identifiers', cache_policy=NO_CACHE) -def run_unfreeze_identifiers() -> None: +def run_unfreeze_identifiers(target_schema: str | None = 'public') -> None: cfg = get_named_config() with create_engine(cfg.SQLALCHEMY_DATABASE_URI_COLIN_MIGR).begin() as conn: - result = conn.execute(text(unfreeze_identifiers())) + result = conn.execute(text(unfreeze_identifiers(target_schema=target_schema))) print(f'Unfroze corporation rows={result.rowcount}') @task(name='Get-Updated-Identifiers-Colin', cache_policy=NO_CACHE) @@ -265,9 +269,51 @@ def get_updated_identifiers_colin(cutoff_timestamp: str, mig_batch_id: int, coli rows = [dict(row) for row in result.mappings()] return rows + +@task(name='Run-CPRD-Subset-Generator', cache_policy=NO_CACHE) +def run_cprd_subset_extract_generator(cfg: SubsetConfig) -> subprocess.CompletedProcess: + """ + Generate Commands + """ + require_file(_SCRIPT_PATH, 'Generated script') + corp_path =require_file(cfg.corp_file, 'Corp list file') + argv = [ + sys.executable, + str(_SCRIPT_PATH), + '--corp-file', + str(corp_path), + '--mode', + cfg.mode, + '--chunk-size', + str(cfg.chunk_size), + '--threads', + str(cfg.threads), + '--pg-disable-method', + cfg.pg_disable_method, + ] + argv.extend(['--target-connection', cfg.target_connection]) + argv.extend(['--source-connection', cfg.source_connection]) + argv.extend(['--target-schema', cfg.target_schema]) + if cfg.pg_fastload: + argv.append('--pg-fastload') + if cfg.include_cp: + argv.append('--include-cp') + if cfg.prefix_numeric_bc: + argv.append('--prefix-numeric-bc') + out_path = _resolve_master_script_path(cfg.out_master) + out_path.parent.mkdir(parents=True, exist_ok=True) + argv.extend(['--out', str(out_path)]) + return subprocess.run( + argv, + cwd=str(_REPO_ROOT), + capture_output=False, + text=True, + ) + + @task(name='Run-CPRD-Subset-Generator', cache_policy=NO_CACHE) -def run_cprd_subset_extract_generator( +def run_cprd_subset_extract_generator_original( corp_file: str, mode: str, chunk_size: int, @@ -355,6 +401,84 @@ def run_refresh_views(mode: str = 'refresh', targets: str = 'all') -> subproces ) +@flow(name='Extract-Subset-Flow', log_prints=True, persist_result=False) +def extract_flow(cfg: SubsetConfig) -> None: + print(f'Running Extract-Subset-Flow with config: {cfg} ') + """ + Generate files + """ + if cfg.mode == 'refresh': + cfg.reset_extract_postgres = False + print('Running in refresh mode: skipping Postgres DB reset') + if cfg.reset_extract_postgres: + cleanup_extract_postgres_db(cfg.target_schema) + + cutoff = get_cuttoff_timestamp(cfg.target_schema) + + config = get_config() + colin_oracle_engine = colin_oracle_init(config) + # Get Identifiers + feed_path: Path | None = None + if cfg.mode == 'refresh': + updated_rows = get_updated_identifiers_colin(cutoff_timestamp=cutoff, + mig_batch_id=config.MIG_BATCH_IDS, + colin_oracle_engine=colin_oracle_engine, + chunk_size=cfg.chunk_size, + scope=cfg.delta_scope) + print(f'Colin updated identifiers : {len(updated_rows)} rows') + _GENERATED_DIR.mkdir(parents=True, exist_ok=True) + feed_path = _GENERATED_DIR / f'refresh_corp_feed_{os.getpid()}.tmp' + seen = set() + lines = [] + updated_corp_nums = [] + for row in updated_rows: + for k, v in row.items(): + if k is None or v is None: + continue + if str(k).lower() == 'corp_num': + c = str(v).strip() + if c and c not in seen: + seen.add(c) + lines.append(c) + updated_corp_nums.append('BC'+c) + break + if not lines: + raise ValueError('refresh: no corp_num in updated_rows') + feed_path.write_text('\n'.join(lines) + '\n', encoding='utf-8') + corp_file = str(feed_path) + result: subprocess.CompletedProcess | None = None + print(f'Running CPRD subset extract generator {cfg.corp_file}') + try: + result = run_cprd_subset_extract_generator(cfg) + finally: + if feed_path is not None: + feed_path.unlink(missing_ok=True) + if result.returncode != 0 and result is not None: + raise RuntimeError(f'Generator exited with code {result.returncode}') + print(f'generator completed successfully') + + if cfg.run_dbschemacli: + master_script = _resolve_master_script_path(out=cfg.out_master) + run_result = run_dbschemacli_task( + master_script=str(master_script), + dbschemacli_cmd=cfg.dbschemacli_cmd, + ) + if run_result.returncode != 0: + raise RuntimeError(f'DbSchemaCLI exited with code {run_result.returncode}') + + print('Running Unfreezing Corps.......') + run_unfreeze_identifiers(cfg.target_schema) + + if cfg.refresh_views and cfg.delta_scope == 'batch': + refresh_result = run_refresh_views('refresh', 'all') + if refresh_result.returncode !=0: + raise RuntimeError(f'Refresh-Views exited with code {refresh_result.returncode}') + if cfg.mode == 'refresh' and cfg.delta_scope == 'batch': + prune_identifiers = get_fallen_identifiers(updated_corp_nums) + prune_fallen_identifiers(prune_identifiers) + + + @flow(name='Extract-Subset-Flow', log_prints=True, persist_result=False) def extract_pull_flow( corp_file: str, @@ -370,7 +494,8 @@ def extract_pull_flow( reset_extract_postgres: bool = True, include_cp: bool = False, target_connection: str = _DEFAULT_TARGET_CONNECTION, - delta_scope: str = 'batch' + delta_scope: str = 'batch', + target_schema: str = 'colin_extract', ) -> None: """ Generate files @@ -379,9 +504,10 @@ def extract_pull_flow( reset_extract_postgres = False print('Running in refresh mode: skipping Postgres DB reset') if reset_extract_postgres: - cleanup_extract_postgres_db() + # cleanup_extract_postgres_db() + cleanup_extract_postgres_db(target_schema) - cutoff = get_cuttoff_timestamp() + cutoff = get_cuttoff_timestamp(target_schema) config = get_config() colin_oracle_engine = colin_oracle_init(config) @@ -465,7 +591,7 @@ def build_arg_parser() -> argparse.ArgumentParser: p.add_argument('--out', default='data-tool/scripts/subset/generated/subset_refresh.sql', help='Output path for generated master script.') p.add_argument('--run-dbschemacli', action='store_false') p.add_argument('--refresh-views', action='store_false') - p.add_argument('--dbschemacli-cmd', default='dbschemacli') + p.add_argument('--dbschemacli-cmd', default='/usr/local/bin/DbSchemaCLI') p.add_argument('--reset-extract-postgres', action='store_false') p.add_argument('--source-connection', default='ctst') p.add_argument('--target-connection', default='ctst_pg') @@ -476,7 +602,8 @@ def build_arg_parser() -> argparse.ArgumentParser: def main(argv: list[str] | None = None) -> int: args = build_arg_parser().parse_args(argv) config = build_configs(args) - extract_pull_flow(**vars(config)) + extract_flow(config) + # extract_pull_flow(config) return 0 From 2a52d0afa51afe10306993a9c8145909308a914e Mon Sep 17 00:00:00 2001 From: ketaki-deodhar Date: Thu, 2 Jul 2026 08:47:01 -0700 Subject: [PATCH 07/10] 31328 - refresh extract flow and related updates --- .../flows/refresh_extract_subset_flow.py | 17 +- .../colin_corps_extract_postgres_views_ddl | 8 + ...olin_corps_extract_postgres_views_ddl_copy | 1490 +++++++++++++++++ 3 files changed, 1508 insertions(+), 7 deletions(-) create mode 100644 data-tool/scripts/colin_corps_extract_postgres_views_ddl_copy diff --git a/data-tool/flows/refresh_extract_subset_flow.py b/data-tool/flows/refresh_extract_subset_flow.py index 07f06e83be..5854d4bad9 100644 --- a/data-tool/flows/refresh_extract_subset_flow.py +++ b/data-tool/flows/refresh_extract_subset_flow.py @@ -144,6 +144,7 @@ def _reset_extract_postgres_db(target_schema: str | None = 'public') -> None: safe_schema = '"' + target_schema.replace('"', '""') + '"' search_path_sql = f'SET search_path TO {safe_schema};' create_schema_sql = f'CREATE SCHEMA IF NOT EXISTS {safe_schema};' + psql_schema_vars = ['-v', f'schema_name={target_schema}'] require_file(_DEFAULT_DDL, 'Extract DDL File') @@ -160,9 +161,9 @@ def _reset_extract_postgres_db(target_schema: str | None = 'public') -> None: _run_cmd(['psql', *pg_flags, '-d', 'postgres', '-c', terminate_sql ], env=run_env) _run_cmd(['dropdb', *pg_flags, '--maintenance-db=postgres', '--if-exists', dbname ], env=run_env) _run_cmd(['createdb', *pg_flags, '--maintenance-db=postgres', '-T', 'template0', dbname ], env=run_env) - _run_cmd(['psql', *pg_flags, '-d', dbname, '-v', 'ON_ERROR_STOP=1', '-c', create_schema_sql], env=run_env) - _run_cmd(['psql', *pg_flags, '-d', dbname, '-v', 'ON_ERROR_STOP=1', '-c', search_path_sql, '-f', str(_DEFAULT_DDL)], env=run_env) - _run_cmd(['psql', *pg_flags, '-d', dbname, '-v', 'ON_ERROR_STOP=1', '-c', search_path_sql, '-f', str(_BUILD_VIEWS_SCRIPT)], env=run_env) + _run_cmd(['psql', *pg_flags, '-d', dbname, '-v', 'ON_ERROR_STOP=1', *psql_schema_vars, '-c', create_schema_sql], env=run_env) + _run_cmd(['psql', *pg_flags, '-d', dbname, '-v', 'ON_ERROR_STOP=1', *psql_schema_vars, '-c', search_path_sql, '-f', str(_DEFAULT_DDL)], env=run_env) + _run_cmd(['psql', *pg_flags, '-d', dbname, '-v', 'ON_ERROR_STOP=1', *psql_schema_vars, '-c', search_path_sql, '-f', str(_BUILD_VIEWS_SCRIPT)], env=run_env) @@ -377,9 +378,10 @@ def run_dbschemacli_task(master_script: str, dbschemacli_cmd: str = 'dbschemacli ) @task(name='Refresh-Views', cache_policy=NO_CACHE) -def run_refresh_views(mode: str = 'refresh', targets: str = 'all') -> subprocess.CompletedProcess: +def run_refresh_views(mode: str = 'refresh', targets: str = 'all', schema: str | None = 'public') -> subprocess.CompletedProcess: cfg = get_named_config() script = require_file(_REFRESH_VIEWS_SCRIPT, 'refresh_colin_extract_views.sh') + normalized_schema = _normalize_target_schema(schema) argv = [ str(script), '--mode', mode, @@ -387,7 +389,8 @@ def run_refresh_views(mode: str = 'refresh', targets: str = 'all') -> subproces '--db', cfg.DB_NAME_COLIN_MIGR, '--host', cfg.DB_HOST_COLIN_MIGR, '--port', str(cfg.DB_PORT_COLIN_MIGR), - '--user', cfg.DB_USER_COLIN_MIGR + '--user', cfg.DB_USER_COLIN_MIGR, + '--schema', normalized_schema, ] run_env = dict(os.environ) if cfg.DB_PASSWORD_COLIN_MIGR and 'PGPASSWORD' not in run_env: @@ -470,7 +473,7 @@ def extract_flow(cfg: SubsetConfig) -> None: run_unfreeze_identifiers(cfg.target_schema) if cfg.refresh_views and cfg.delta_scope == 'batch': - refresh_result = run_refresh_views('refresh', 'all') + refresh_result = run_refresh_views('refresh', 'all', cfg.target_schema) if refresh_result.returncode !=0: raise RuntimeError(f'Refresh-Views exited with code {refresh_result.returncode}') if cfg.mode == 'refresh' and cfg.delta_scope == 'batch': @@ -571,7 +574,7 @@ def extract_pull_flow( run_unfreeze_identifiers() if refresh_views and delta_scope == 'batch': - refresh_result = run_refresh_views('refresh', 'all') + refresh_result = run_refresh_views('refresh', 'all', target_schema) if refresh_result.returncode !=0: raise RuntimeError(f'Refresh-Views exited with code {refresh_result.returncode}') if mode == 'refresh' and delta_scope == 'batch': diff --git a/data-tool/scripts/colin_corps_extract_postgres_views_ddl b/data-tool/scripts/colin_corps_extract_postgres_views_ddl index dd1c1c7f3c..645956c641 100644 --- a/data-tool/scripts/colin_corps_extract_postgres_views_ddl +++ b/data-tool/scripts/colin_corps_extract_postgres_views_ddl @@ -1,6 +1,14 @@ -- The following views are used mainly to help with bad data analysis and to help with determining eligibility of -- groups & batches of businesses we can migrate at specific points in time. +\if :{?schema_name} +\else +\set schema_name public +\endif + +CREATE SCHEMA IF NOT EXISTS :"schema_name"; +SET search_path TO :"schema_name", public; + CREATE MATERIALIZED VIEW mv_corps_with_officers AS select distinct cp.corp_num from corporation c diff --git a/data-tool/scripts/colin_corps_extract_postgres_views_ddl_copy b/data-tool/scripts/colin_corps_extract_postgres_views_ddl_copy new file mode 100644 index 0000000000..645956c641 --- /dev/null +++ b/data-tool/scripts/colin_corps_extract_postgres_views_ddl_copy @@ -0,0 +1,1490 @@ +-- The following views are used mainly to help with bad data analysis and to help with determining eligibility of +-- groups & batches of businesses we can migrate at specific points in time. + +\if :{?schema_name} +\else +\set schema_name public +\endif + +CREATE SCHEMA IF NOT EXISTS :"schema_name"; +SET search_path TO :"schema_name", public; + +CREATE MATERIALIZED VIEW mv_corps_with_officers AS +select distinct cp.corp_num +from corporation c + join event e on c.corp_num = e.corp_num + join corp_party cp on cp.start_event_id = e.event_id +where 1 = 1 + and cp.party_typ_cd = 'OFF' +WITH NO DATA +; + +ALTER MATERIALIZED VIEW mv_corps_with_officers + owner to postgres; + + +CREATE MATERIALIZED VIEW mv_corps_party_role_count AS +select c.corp_num, cp.party_typ_cd, count(cp.party_typ_cd) as party_typ_count +from corporation c + join corp_party cp on c.corp_num = cp.corp_num +where 1 = 1 + and cp.end_event_id is null +group by c.corp_num, cp.party_typ_cd +WITH NO DATA +; + +ALTER MATERIALIZED VIEW mv_corps_party_role_count + owner to postgres; + + +CREATE MATERIALIZED VIEW mv_admin_email_count AS +select admin_email, count(*) as email_count +from corporation +group by admin_email +WITH NO DATA +; + +ALTER MATERIALIZED VIEW mv_admin_email_count + owner to postgres; + + +CREATE MATERIALIZED VIEW mv_admin_email_domain_count AS +select LOWER(SPLIT_PART(c.admin_email, '@', 2)) as email_domain, + count(*) as domain_count +from corporation c +group by LOWER(SPLIT_PART(c.admin_email, '@', 2)) +order by domain_count desc +WITH NO DATA +; + +ALTER MATERIALIZED VIEW mv_admin_email_domain_count + owner to postgres; + + +create view v_addr_links as +with current_party as ( + select + cp.corp_num, + 'party'::text as entity_kind, + cp.party_typ_cd as entity_type, + addr.addr_role, + addr.addr_id + from corp_party cp + cross join lateral ( + values + ('mailing'::text, cp.mailing_addr_id), + ('delivery'::text, cp.delivery_addr_id) + ) as addr(addr_role, addr_id) + where cp.end_event_id is null +), +current_office as ( + select + o.corp_num, + 'office'::text as entity_kind, + o.office_typ_cd as entity_type, + addr.addr_role, + addr.addr_id + from office o + cross join lateral ( + values + ('mailing'::text, o.mailing_addr_id), + ('delivery'::text, o.delivery_addr_id) + ) as addr(addr_role, addr_id) + where o.end_event_id is null +) +select * +from current_party +union all +select * +from current_office; + +ALTER VIEW v_addr_links + owner to postgres; + +create or replace view v_addr_issues as +with link_rows as ( + select + l.corp_num, + l.entity_kind, + l.entity_type, + l.addr_role, + l.addr_id, + (l.addr_id is null) as is_null, + (l.addr_id = 1) as is_stub, + (l.addr_id is not null and l.addr_id <> 1) as is_real_addr, + a.city, + a.province, + a.country_typ_cd, + a.postal_cd, + a.addr_line_1, + a.addr_line_2, + a.addr_line_3 + from v_addr_links l + left join address a + on a.addr_id = l.addr_id + and l.addr_id is not null + and l.addr_id <> 1 +), +field_flags as ( + select + lr.corp_num, + lr.entity_kind, + lr.entity_type, + lr.addr_role, + lr.addr_id, + lr.is_null, + lr.is_stub, + (lr.is_real_addr and (lr.city is null or is_blank(lr.city))) as missing_city, + (lr.is_real_addr and (lr.province is null or is_blank(lr.province))) as missing_province, + (lr.is_real_addr and (lr.country_typ_cd is null or is_blank(lr.country_typ_cd))) as missing_country, + (lr.is_real_addr and (lr.postal_cd is null or is_blank(lr.postal_cd))) as missing_postal_code, + (lr.is_real_addr and (lr.addr_line_1 is null or is_blank(lr.addr_line_1))) as missing_addr_line_1, + (lr.is_real_addr and (lr.addr_line_2 is null or is_blank(lr.addr_line_2))) as missing_addr_line_2, + (lr.is_real_addr and (lr.addr_line_3 is null or is_blank(lr.addr_line_3))) as missing_addr_line_3 + from link_rows lr +), +row_flags as ( + select + ff.corp_num, + ff.entity_kind, + ff.entity_type, + ff.addr_role, + ff.addr_id, + ff.is_null, + ff.is_stub, + ff.missing_city, + ff.missing_province, + ff.missing_country, + ff.missing_postal_code, + ff.missing_addr_line_1, + ff.missing_addr_line_2, + ff.missing_addr_line_3, + ( + ff.missing_city + or ff.missing_province + or ff.missing_country + or ff.missing_postal_code + or ff.missing_addr_line_1 + ) as has_any_key_missing, + ( + ff.is_null + or ff.is_stub + or ff.missing_city + or ff.missing_province + or ff.missing_country + or ff.missing_postal_code + or ff.missing_addr_line_1 + ) as any_bad + from field_flags ff +) +select + rf.corp_num, + rf.entity_kind, + rf.entity_type, + rf.addr_role, + rf.addr_id, + rf.is_null, + rf.is_stub, + rf.missing_city, + rf.missing_province, + rf.missing_country, + rf.missing_postal_code, + rf.missing_addr_line_1, + rf.missing_addr_line_2, + rf.missing_addr_line_3, + rf.has_any_key_missing, + rf.any_bad, + not rf.any_bad as is_healthy +from row_flags rf; + +ALTER VIEW v_addr_issues + owner to postgres; + + +/* Shared per-corp/entity-type address issue counts reused by both the + slim screening MV and the wide/full address-quality MV. */ +CREATE MATERIALIZED VIEW mv_addr_issue_counts_by_entity AS +SELECT + i.corp_num, + i.entity_kind, + i.entity_type, + COUNT(*) AS address_count, + COUNT(*) FILTER (WHERE i.is_null) AS null_count, + COUNT(*) FILTER (WHERE i.is_stub) AS stub_count, + COUNT(*) FILTER (WHERE i.has_any_key_missing) AS any_bad_key_fields_count, + COUNT(*) FILTER (WHERE i.any_bad) AS any_bad_count, + COUNT(*) FILTER (WHERE NOT i.any_bad) AS healthy_count, + COUNT(*) FILTER (WHERE i.missing_city) AS missing_city_count, + COUNT(*) FILTER (WHERE i.missing_province) AS missing_province_count, + COUNT(*) FILTER (WHERE i.missing_country) AS missing_country_count, + COUNT(*) FILTER (WHERE i.missing_postal_code) AS missing_postal_code_count, + COUNT(*) FILTER (WHERE i.missing_addr_line_1) AS missing_addr_line_1_count, + COUNT(*) FILTER (WHERE i.missing_addr_line_2) AS missing_addr_line_2_count, + COUNT(*) FILTER (WHERE i.missing_addr_line_3) AS missing_addr_line_3_count +FROM v_addr_issues i +GROUP BY i.corp_num, i.entity_kind, i.entity_type +WITH NO DATA +; + +CREATE UNIQUE INDEX IF NOT EXISTS ux_mv_addr_issue_counts_by_entity + ON mv_addr_issue_counts_by_entity (corp_num, entity_kind, entity_type); + +ALTER MATERIALIZED VIEW mv_addr_issue_counts_by_entity + owner to postgres; + + +create materialized view mv_addr_quality_by_corp as +with +params as ( + select + array['DIR','OFF','LIQ','RCC','RCM']::text[] as party_types, + array['RG','RC','LQ','DS']::text[] as office_types +), +entity_counts as MATERIALIZED ( + select + a.corp_num, + a.entity_kind, + a.entity_type, + a.address_count, + a.null_count, + a.stub_count, + a.any_bad_key_fields_count, + a.any_bad_count, + a.healthy_count, + a.missing_city_count, + a.missing_province_count, + a.missing_country_count, + a.missing_postal_code_count, + a.missing_addr_line_1_count, + a.missing_addr_line_2_count, + a.missing_addr_line_3_count, + COALESCE((a.entity_kind = 'party' AND a.entity_type = ANY(p.party_types)), false) AS is_supported_party, + COALESCE((a.entity_kind = 'office' AND a.entity_type = ANY(p.office_types)), false) AS is_supported_office + from mv_addr_issue_counts_by_entity a + cross join params p +), + +/* --------------------------------------- + Wide aggregates (counts per corp_num) + --------------------------------------- */ +agg as ( + select + ec.corp_num, + CAST(COALESCE(SUM(ec.null_count) FILTER (WHERE ec.is_supported_party OR ec.is_supported_office), 0) AS bigint) AS address_all_null_count, + CAST(COALESCE(SUM(ec.stub_count) FILTER (WHERE ec.is_supported_party OR ec.is_supported_office), 0) AS bigint) AS address_all_stub_count, + CAST(COALESCE(SUM(ec.any_bad_key_fields_count) FILTER (WHERE ec.is_supported_party OR ec.is_supported_office), 0) AS bigint) AS address_all_any_bad_key_fields_count, + CAST(COALESCE(SUM(ec.any_bad_count) FILTER (WHERE ec.is_supported_party OR ec.is_supported_office), 0) AS bigint) AS address_all_any_bad_count, + CAST(COALESCE(SUM(ec.healthy_count) FILTER (WHERE ec.is_supported_party OR ec.is_supported_office), 0) AS bigint) AS address_all_healthy_count, + CAST(COALESCE(SUM(ec.missing_city_count) FILTER (WHERE ec.is_supported_party OR ec.is_supported_office), 0) AS bigint) AS address_all_missing_city_count, + CAST(COALESCE(SUM(ec.missing_province_count) FILTER (WHERE ec.is_supported_party OR ec.is_supported_office), 0) AS bigint) AS address_all_missing_province_count, + CAST(COALESCE(SUM(ec.missing_country_count) FILTER (WHERE ec.is_supported_party OR ec.is_supported_office), 0) AS bigint) AS address_all_missing_country_count, + CAST(COALESCE(SUM(ec.missing_postal_code_count) FILTER (WHERE ec.is_supported_party OR ec.is_supported_office), 0) AS bigint) AS address_all_missing_postal_code_count, + CAST(COALESCE(SUM(ec.missing_addr_line_1_count) FILTER (WHERE ec.is_supported_party OR ec.is_supported_office), 0) AS bigint) AS address_all_missing_addr1_count, + + CAST(COALESCE(SUM(ec.null_count) FILTER (WHERE ec.entity_kind='party' AND ec.is_supported_party), 0) AS bigint) AS party_all_null_count, + CAST(COALESCE(SUM(ec.stub_count) FILTER (WHERE ec.entity_kind='party' AND ec.is_supported_party), 0) AS bigint) AS party_all_stub_count, + CAST(COALESCE(SUM(ec.any_bad_key_fields_count) FILTER (WHERE ec.entity_kind='party' AND ec.is_supported_party), 0) AS bigint) AS party_all_any_bad_key_fields_count, + CAST(COALESCE(SUM(ec.any_bad_count) FILTER (WHERE ec.entity_kind='party' AND ec.is_supported_party), 0) AS bigint) AS party_any_bad_count, + CAST(COALESCE(SUM(ec.any_bad_count) FILTER (WHERE ec.entity_kind='party' AND ec.is_supported_party), 0) AS bigint) AS party_all_any_bad_count, + CAST(COALESCE(SUM(ec.healthy_count) FILTER (WHERE ec.entity_kind='party' AND ec.is_supported_party), 0) AS bigint) AS party_all_healthy_count, + CAST(COALESCE(SUM(ec.missing_city_count) FILTER (WHERE ec.entity_kind='party' AND ec.is_supported_party), 0) AS bigint) AS party_all_missing_city_count, + CAST(COALESCE(SUM(ec.missing_province_count) FILTER (WHERE ec.entity_kind='party' AND ec.is_supported_party), 0) AS bigint) AS party_all_missing_province_count, + CAST(COALESCE(SUM(ec.missing_country_count) FILTER (WHERE ec.entity_kind='party' AND ec.is_supported_party), 0) AS bigint) AS party_all_missing_country_count, + CAST(COALESCE(SUM(ec.missing_postal_code_count) FILTER (WHERE ec.entity_kind='party' AND ec.is_supported_party), 0) AS bigint) AS party_all_missing_postal_code_count, + CAST(COALESCE(SUM(ec.missing_addr_line_1_count) FILTER (WHERE ec.entity_kind='party' AND ec.is_supported_party), 0) AS bigint) AS party_all_missing_addr1_count, + + CAST(COALESCE(SUM(ec.null_count) FILTER (WHERE ec.entity_kind='office' AND ec.is_supported_office), 0) AS bigint) AS office_all_null_count, + CAST(COALESCE(SUM(ec.stub_count) FILTER (WHERE ec.entity_kind='office' AND ec.is_supported_office), 0) AS bigint) AS office_all_stub_count, + CAST(COALESCE(SUM(ec.any_bad_key_fields_count) FILTER (WHERE ec.entity_kind='office' AND ec.is_supported_office), 0) AS bigint) AS office_all_any_bad_key_fields_count, + CAST(COALESCE(SUM(ec.any_bad_count) FILTER (WHERE ec.entity_kind='office' AND ec.is_supported_office), 0) AS bigint) AS office_any_bad_count, + CAST(COALESCE(SUM(ec.any_bad_count) FILTER (WHERE ec.entity_kind='office' AND ec.is_supported_office), 0) AS bigint) AS office_all_any_bad_count, + CAST(COALESCE(SUM(ec.healthy_count) FILTER (WHERE ec.entity_kind='office' AND ec.is_supported_office), 0) AS bigint) AS office_all_healthy_count, + CAST(COALESCE(SUM(ec.missing_city_count) FILTER (WHERE ec.entity_kind='office' AND ec.is_supported_office), 0) AS bigint) AS office_all_missing_city_count, + CAST(COALESCE(SUM(ec.missing_province_count) FILTER (WHERE ec.entity_kind='office' AND ec.is_supported_office), 0) AS bigint) AS office_all_missing_province_count, + CAST(COALESCE(SUM(ec.missing_country_count) FILTER (WHERE ec.entity_kind='office' AND ec.is_supported_office), 0) AS bigint) AS office_all_missing_country_count, + CAST(COALESCE(SUM(ec.missing_postal_code_count) FILTER (WHERE ec.entity_kind='office' AND ec.is_supported_office), 0) AS bigint) AS office_all_missing_postal_code_count, + CAST(COALESCE(SUM(ec.missing_addr_line_1_count) FILTER (WHERE ec.entity_kind='office' AND ec.is_supported_office), 0) AS bigint) AS office_all_missing_addr1_count, + + CAST(COALESCE(SUM(ec.null_count) FILTER (WHERE ec.entity_kind='office' AND ec.is_supported_office AND ec.entity_type NOT IN ('RG','RC')), 0) AS bigint) AS non_base_offices_null_count, + CAST(COALESCE(SUM(ec.stub_count) FILTER (WHERE ec.entity_kind='office' AND ec.is_supported_office AND ec.entity_type NOT IN ('RG','RC')), 0) AS bigint) AS non_base_offices_stub_count, + CAST(COALESCE(SUM(ec.any_bad_key_fields_count) FILTER (WHERE ec.entity_kind='office' AND ec.is_supported_office AND ec.entity_type NOT IN ('RG','RC')), 0) AS bigint) AS non_base_offices_any_bad_key_fields_count, + CAST(COALESCE(SUM(ec.any_bad_count) FILTER (WHERE ec.entity_kind='office' AND ec.is_supported_office AND ec.entity_type NOT IN ('RG','RC')), 0) AS bigint) AS non_base_offices_any_bad_count, + CAST(COALESCE(SUM(ec.healthy_count) FILTER (WHERE ec.entity_kind='office' AND ec.is_supported_office AND ec.entity_type NOT IN ('RG','RC')), 0) AS bigint) AS non_base_offices_healthy_count, + CAST(COALESCE(SUM(ec.missing_city_count) FILTER (WHERE ec.entity_kind='office' AND ec.is_supported_office AND ec.entity_type NOT IN ('RG','RC')), 0) AS bigint) AS non_base_offices_missing_city_count, + CAST(COALESCE(SUM(ec.missing_province_count) FILTER (WHERE ec.entity_kind='office' AND ec.is_supported_office AND ec.entity_type NOT IN ('RG','RC')), 0) AS bigint) AS non_base_offices_missing_province_count, + CAST(COALESCE(SUM(ec.missing_country_count) FILTER (WHERE ec.entity_kind='office' AND ec.is_supported_office AND ec.entity_type NOT IN ('RG','RC')), 0) AS bigint) AS non_base_offices_missing_country_count, + CAST(COALESCE(SUM(ec.missing_postal_code_count) FILTER (WHERE ec.entity_kind='office' AND ec.is_supported_office AND ec.entity_type NOT IN ('RG','RC')), 0) AS bigint) AS non_base_offices_missing_postal_code_count, + CAST(COALESCE(SUM(ec.missing_addr_line_1_count) FILTER (WHERE ec.entity_kind='office' AND ec.is_supported_office AND ec.entity_type NOT IN ('RG','RC')), 0) AS bigint) AS non_base_offices_missing_addr1_count, + + CAST(COALESCE(SUM(ec.null_count) FILTER (WHERE ec.entity_kind='party' AND ec.entity_type NOT IN ('DIR','OFF')), 0) AS bigint) AS non_base_parties_null_count, + CAST(COALESCE(SUM(ec.stub_count) FILTER (WHERE ec.entity_kind='party' AND ec.entity_type NOT IN ('DIR','OFF')), 0) AS bigint) AS non_base_parties_stub_count, + CAST(COALESCE(SUM(ec.any_bad_key_fields_count) FILTER (WHERE ec.entity_kind='party' AND ec.entity_type NOT IN ('DIR','OFF')), 0) AS bigint) AS non_base_parties_any_bad_key_fields_count, + CAST(COALESCE(SUM(ec.any_bad_count) FILTER (WHERE ec.entity_kind='party' AND ec.entity_type NOT IN ('DIR','OFF')), 0) AS bigint) AS non_base_parties_any_bad_count, + CAST(COALESCE(SUM(ec.healthy_count) FILTER (WHERE ec.entity_kind='party' AND ec.entity_type NOT IN ('DIR','OFF')), 0) AS bigint) AS non_base_parties_healthy_count, + CAST(COALESCE(SUM(ec.missing_city_count) FILTER (WHERE ec.entity_kind='party' AND ec.entity_type NOT IN ('DIR','OFF')), 0) AS bigint) AS non_base_parties_missing_city_count, + CAST(COALESCE(SUM(ec.missing_province_count) FILTER (WHERE ec.entity_kind='party' AND ec.entity_type NOT IN ('DIR','OFF')), 0) AS bigint) AS non_base_parties_missing_province_count, + CAST(COALESCE(SUM(ec.missing_country_count) FILTER (WHERE ec.entity_kind='party' AND ec.entity_type NOT IN ('DIR','OFF')), 0) AS bigint) AS non_base_parties_missing_country_count, + CAST(COALESCE(SUM(ec.missing_postal_code_count) FILTER (WHERE ec.entity_kind='party' AND ec.entity_type NOT IN ('DIR','OFF')), 0) AS bigint) AS non_base_parties_missing_postal_code_count, + CAST(COALESCE(SUM(ec.missing_addr_line_1_count) FILTER (WHERE ec.entity_kind='party' AND ec.entity_type NOT IN ('DIR','OFF')), 0) AS bigint) AS non_base_parties_missing_addr1_count, + + CAST(COALESCE(SUM(ec.null_count) FILTER (WHERE ec.entity_kind='party' AND ec.entity_type='DIR'), 0) AS bigint) AS dir_null_count, + CAST(COALESCE(SUM(ec.stub_count) FILTER (WHERE ec.entity_kind='party' AND ec.entity_type='DIR'), 0) AS bigint) AS dir_stub_count, + CAST(COALESCE(SUM(ec.any_bad_key_fields_count) FILTER (WHERE ec.entity_kind='party' AND ec.entity_type='DIR'), 0) AS bigint) AS dir_any_bad_key_fields_count, + CAST(COALESCE(SUM(ec.any_bad_count) FILTER (WHERE ec.entity_kind='party' AND ec.entity_type='DIR'), 0) AS bigint) AS dir_any_bad_count, + CAST(COALESCE(SUM(ec.healthy_count) FILTER (WHERE ec.entity_kind='party' AND ec.entity_type='DIR'), 0) AS bigint) AS dir_healthy_count, + CAST(COALESCE(SUM(ec.missing_city_count) FILTER (WHERE ec.entity_kind='party' AND ec.entity_type='DIR'), 0) AS bigint) AS dir_missing_city_count, + CAST(COALESCE(SUM(ec.missing_province_count) FILTER (WHERE ec.entity_kind='party' AND ec.entity_type='DIR'), 0) AS bigint) AS dir_missing_province_count, + CAST(COALESCE(SUM(ec.missing_country_count) FILTER (WHERE ec.entity_kind='party' AND ec.entity_type='DIR'), 0) AS bigint) AS dir_missing_country_count, + CAST(COALESCE(SUM(ec.missing_postal_code_count) FILTER (WHERE ec.entity_kind='party' AND ec.entity_type='DIR'), 0) AS bigint) AS dir_missing_postal_code_count, + CAST(COALESCE(SUM(ec.missing_addr_line_1_count) FILTER (WHERE ec.entity_kind='party' AND ec.entity_type='DIR'), 0) AS bigint) AS dir_missing_addr1_count, + + CAST(COALESCE(SUM(ec.null_count) FILTER (WHERE ec.entity_kind='party' AND ec.entity_type='OFF'), 0) AS bigint) AS officer_null_count, + CAST(COALESCE(SUM(ec.stub_count) FILTER (WHERE ec.entity_kind='party' AND ec.entity_type='OFF'), 0) AS bigint) AS officer_stub_count, + CAST(COALESCE(SUM(ec.any_bad_key_fields_count) FILTER (WHERE ec.entity_kind='party' AND ec.entity_type='OFF'), 0) AS bigint) AS officer_any_bad_key_fields_count, + CAST(COALESCE(SUM(ec.any_bad_count) FILTER (WHERE ec.entity_kind='party' AND ec.entity_type='OFF'), 0) AS bigint) AS officer_any_bad_count, + CAST(COALESCE(SUM(ec.healthy_count) FILTER (WHERE ec.entity_kind='party' AND ec.entity_type='OFF'), 0) AS bigint) AS officer_healthy_count, + CAST(COALESCE(SUM(ec.missing_city_count) FILTER (WHERE ec.entity_kind='party' AND ec.entity_type='OFF'), 0) AS bigint) AS officer_missing_city_count, + CAST(COALESCE(SUM(ec.missing_province_count) FILTER (WHERE ec.entity_kind='party' AND ec.entity_type='OFF'), 0) AS bigint) AS officer_missing_province_count, + CAST(COALESCE(SUM(ec.missing_country_count) FILTER (WHERE ec.entity_kind='party' AND ec.entity_type='OFF'), 0) AS bigint) AS officer_missing_country_count, + CAST(COALESCE(SUM(ec.missing_postal_code_count) FILTER (WHERE ec.entity_kind='party' AND ec.entity_type='OFF'), 0) AS bigint) AS officer_missing_postal_code_count, + CAST(COALESCE(SUM(ec.missing_addr_line_1_count) FILTER (WHERE ec.entity_kind='party' AND ec.entity_type='OFF'), 0) AS bigint) AS officer_missing_addr1_count, + + CAST(COALESCE(SUM(ec.null_count) FILTER (WHERE ec.entity_kind='party' AND ec.entity_type='LIQ'), 0) AS bigint) AS liq_null_count, + CAST(COALESCE(SUM(ec.stub_count) FILTER (WHERE ec.entity_kind='party' AND ec.entity_type='LIQ'), 0) AS bigint) AS liq_stub_count, + CAST(COALESCE(SUM(ec.any_bad_key_fields_count) FILTER (WHERE ec.entity_kind='party' AND ec.entity_type='LIQ'), 0) AS bigint) AS liq_any_bad_key_fields_count, + CAST(COALESCE(SUM(ec.any_bad_count) FILTER (WHERE ec.entity_kind='party' AND ec.entity_type='LIQ'), 0) AS bigint) AS liq_any_bad_count, + CAST(COALESCE(SUM(ec.healthy_count) FILTER (WHERE ec.entity_kind='party' AND ec.entity_type='LIQ'), 0) AS bigint) AS liq_healthy_count, + CAST(COALESCE(SUM(ec.missing_city_count) FILTER (WHERE ec.entity_kind='party' AND ec.entity_type='LIQ'), 0) AS bigint) AS liq_missing_city_count, + CAST(COALESCE(SUM(ec.missing_province_count) FILTER (WHERE ec.entity_kind='party' AND ec.entity_type='LIQ'), 0) AS bigint) AS liq_missing_province_count, + CAST(COALESCE(SUM(ec.missing_country_count) FILTER (WHERE ec.entity_kind='party' AND ec.entity_type='LIQ'), 0) AS bigint) AS liq_missing_country_count, + CAST(COALESCE(SUM(ec.missing_postal_code_count) FILTER (WHERE ec.entity_kind='party' AND ec.entity_type='LIQ'), 0) AS bigint) AS liq_missing_postal_code_count, + CAST(COALESCE(SUM(ec.missing_addr_line_1_count) FILTER (WHERE ec.entity_kind='party' AND ec.entity_type='LIQ'), 0) AS bigint) AS liq_missing_addr1_count, + + CAST(COALESCE(SUM(ec.null_count) FILTER (WHERE ec.entity_kind='party' AND ec.entity_type='RCC'), 0) AS bigint) AS rcc_null_count, + CAST(COALESCE(SUM(ec.stub_count) FILTER (WHERE ec.entity_kind='party' AND ec.entity_type='RCC'), 0) AS bigint) AS rcc_stub_count, + CAST(COALESCE(SUM(ec.any_bad_key_fields_count) FILTER (WHERE ec.entity_kind='party' AND ec.entity_type='RCC'), 0) AS bigint) AS rcc_any_bad_key_fields_count, + CAST(COALESCE(SUM(ec.any_bad_count) FILTER (WHERE ec.entity_kind='party' AND ec.entity_type='RCC'), 0) AS bigint) AS rcc_any_bad_count, + CAST(COALESCE(SUM(ec.healthy_count) FILTER (WHERE ec.entity_kind='party' AND ec.entity_type='RCC'), 0) AS bigint) AS rcc_healthy_count, + CAST(COALESCE(SUM(ec.missing_city_count) FILTER (WHERE ec.entity_kind='party' AND ec.entity_type='RCC'), 0) AS bigint) AS rcc_missing_city_count, + CAST(COALESCE(SUM(ec.missing_province_count) FILTER (WHERE ec.entity_kind='party' AND ec.entity_type='RCC'), 0) AS bigint) AS rcc_missing_province_count, + CAST(COALESCE(SUM(ec.missing_country_count) FILTER (WHERE ec.entity_kind='party' AND ec.entity_type='RCC'), 0) AS bigint) AS rcc_missing_country_count, + CAST(COALESCE(SUM(ec.missing_postal_code_count) FILTER (WHERE ec.entity_kind='party' AND ec.entity_type='RCC'), 0) AS bigint) AS rcc_missing_postal_code_count, + CAST(COALESCE(SUM(ec.missing_addr_line_1_count) FILTER (WHERE ec.entity_kind='party' AND ec.entity_type='RCC'), 0) AS bigint) AS rcc_missing_addr1_count, + + CAST(COALESCE(SUM(ec.null_count) FILTER (WHERE ec.entity_kind='party' AND ec.entity_type='RCM'), 0) AS bigint) AS rcm_null_count, + CAST(COALESCE(SUM(ec.stub_count) FILTER (WHERE ec.entity_kind='party' AND ec.entity_type='RCM'), 0) AS bigint) AS rcm_stub_count, + CAST(COALESCE(SUM(ec.any_bad_key_fields_count) FILTER (WHERE ec.entity_kind='party' AND ec.entity_type='RCM'), 0) AS bigint) AS rcm_any_bad_key_fields_count, + CAST(COALESCE(SUM(ec.any_bad_count) FILTER (WHERE ec.entity_kind='party' AND ec.entity_type='RCM'), 0) AS bigint) AS rcm_any_bad_count, + CAST(COALESCE(SUM(ec.healthy_count) FILTER (WHERE ec.entity_kind='party' AND ec.entity_type='RCM'), 0) AS bigint) AS rcm_healthy_count, + CAST(COALESCE(SUM(ec.missing_city_count) FILTER (WHERE ec.entity_kind='party' AND ec.entity_type='RCM'), 0) AS bigint) AS rcm_missing_city_count, + CAST(COALESCE(SUM(ec.missing_province_count) FILTER (WHERE ec.entity_kind='party' AND ec.entity_type='RCM'), 0) AS bigint) AS rcm_missing_province_count, + CAST(COALESCE(SUM(ec.missing_country_count) FILTER (WHERE ec.entity_kind='party' AND ec.entity_type='RCM'), 0) AS bigint) AS rcm_missing_country_count, + CAST(COALESCE(SUM(ec.missing_postal_code_count) FILTER (WHERE ec.entity_kind='party' AND ec.entity_type='RCM'), 0) AS bigint) AS rcm_missing_postal_code_count, + CAST(COALESCE(SUM(ec.missing_addr_line_1_count) FILTER (WHERE ec.entity_kind='party' AND ec.entity_type='RCM'), 0) AS bigint) AS rcm_missing_addr1_count, + + CAST(COALESCE(SUM(ec.null_count) FILTER (WHERE ec.entity_kind='office' AND ec.entity_type='RG'), 0) AS bigint) AS office_rg_null_count, + CAST(COALESCE(SUM(ec.stub_count) FILTER (WHERE ec.entity_kind='office' AND ec.entity_type='RG'), 0) AS bigint) AS office_rg_stub_count, + CAST(COALESCE(SUM(ec.any_bad_key_fields_count) FILTER (WHERE ec.entity_kind='office' AND ec.entity_type='RG'), 0) AS bigint) AS office_rg_any_bad_key_fields_count, + CAST(COALESCE(SUM(ec.any_bad_count) FILTER (WHERE ec.entity_kind='office' AND ec.entity_type='RG'), 0) AS bigint) AS office_rg_any_bad_count, + CAST(COALESCE(SUM(ec.healthy_count) FILTER (WHERE ec.entity_kind='office' AND ec.entity_type='RG'), 0) AS bigint) AS office_rg_healthy_count, + CAST(COALESCE(SUM(ec.missing_city_count) FILTER (WHERE ec.entity_kind='office' AND ec.entity_type='RG'), 0) AS bigint) AS office_rg_missing_city_count, + CAST(COALESCE(SUM(ec.missing_province_count) FILTER (WHERE ec.entity_kind='office' AND ec.entity_type='RG'), 0) AS bigint) AS office_rg_missing_province_count, + CAST(COALESCE(SUM(ec.missing_country_count) FILTER (WHERE ec.entity_kind='office' AND ec.entity_type='RG'), 0) AS bigint) AS office_rg_missing_country_count, + CAST(COALESCE(SUM(ec.missing_postal_code_count) FILTER (WHERE ec.entity_kind='office' AND ec.entity_type='RG'), 0) AS bigint) AS office_rg_missing_postal_code_count, + CAST(COALESCE(SUM(ec.missing_addr_line_1_count) FILTER (WHERE ec.entity_kind='office' AND ec.entity_type='RG'), 0) AS bigint) AS office_rg_missing_addr1_count, + + CAST(COALESCE(SUM(ec.null_count) FILTER (WHERE ec.entity_kind='office' AND ec.entity_type='RC'), 0) AS bigint) AS office_rc_null_count, + CAST(COALESCE(SUM(ec.stub_count) FILTER (WHERE ec.entity_kind='office' AND ec.entity_type='RC'), 0) AS bigint) AS office_rc_stub_count, + CAST(COALESCE(SUM(ec.any_bad_key_fields_count) FILTER (WHERE ec.entity_kind='office' AND ec.entity_type='RC'), 0) AS bigint) AS office_rc_any_bad_key_fields_count, + CAST(COALESCE(SUM(ec.any_bad_count) FILTER (WHERE ec.entity_kind='office' AND ec.entity_type='RC'), 0) AS bigint) AS office_rc_any_bad_count, + CAST(COALESCE(SUM(ec.healthy_count) FILTER (WHERE ec.entity_kind='office' AND ec.entity_type='RC'), 0) AS bigint) AS office_rc_healthy_count, + CAST(COALESCE(SUM(ec.missing_city_count) FILTER (WHERE ec.entity_kind='office' AND ec.entity_type='RC'), 0) AS bigint) AS office_rc_missing_city_count, + CAST(COALESCE(SUM(ec.missing_province_count) FILTER (WHERE ec.entity_kind='office' AND ec.entity_type='RC'), 0) AS bigint) AS office_rc_missing_province_count, + CAST(COALESCE(SUM(ec.missing_country_count) FILTER (WHERE ec.entity_kind='office' AND ec.entity_type='RC'), 0) AS bigint) AS office_rc_missing_country_count, + CAST(COALESCE(SUM(ec.missing_postal_code_count) FILTER (WHERE ec.entity_kind='office' AND ec.entity_type='RC'), 0) AS bigint) AS office_rc_missing_postal_code_count, + CAST(COALESCE(SUM(ec.missing_addr_line_1_count) FILTER (WHERE ec.entity_kind='office' AND ec.entity_type='RC'), 0) AS bigint) AS office_rc_missing_addr1_count, + + CAST(COALESCE(SUM(ec.null_count) FILTER (WHERE ec.entity_kind='office' AND ec.entity_type='LQ'), 0) AS bigint) AS office_lq_null_count, + CAST(COALESCE(SUM(ec.stub_count) FILTER (WHERE ec.entity_kind='office' AND ec.entity_type='LQ'), 0) AS bigint) AS office_lq_stub_count, + CAST(COALESCE(SUM(ec.any_bad_key_fields_count) FILTER (WHERE ec.entity_kind='office' AND ec.entity_type='LQ'), 0) AS bigint) AS office_lq_any_bad_key_fields_count, + CAST(COALESCE(SUM(ec.any_bad_count) FILTER (WHERE ec.entity_kind='office' AND ec.entity_type='LQ'), 0) AS bigint) AS office_lq_any_bad_count, + CAST(COALESCE(SUM(ec.healthy_count) FILTER (WHERE ec.entity_kind='office' AND ec.entity_type='LQ'), 0) AS bigint) AS office_lq_healthy_count, + CAST(COALESCE(SUM(ec.missing_city_count) FILTER (WHERE ec.entity_kind='office' AND ec.entity_type='LQ'), 0) AS bigint) AS office_lq_missing_city_count, + CAST(COALESCE(SUM(ec.missing_province_count) FILTER (WHERE ec.entity_kind='office' AND ec.entity_type='LQ'), 0) AS bigint) AS office_lq_missing_province_count, + CAST(COALESCE(SUM(ec.missing_country_count) FILTER (WHERE ec.entity_kind='office' AND ec.entity_type='LQ'), 0) AS bigint) AS office_lq_missing_country_count, + CAST(COALESCE(SUM(ec.missing_postal_code_count) FILTER (WHERE ec.entity_kind='office' AND ec.entity_type='LQ'), 0) AS bigint) AS office_lq_missing_postal_code_count, + CAST(COALESCE(SUM(ec.missing_addr_line_1_count) FILTER (WHERE ec.entity_kind='office' AND ec.entity_type='LQ'), 0) AS bigint) AS office_lq_missing_addr1_count, + + CAST(COALESCE(SUM(ec.null_count) FILTER (WHERE ec.entity_kind='office' AND ec.entity_type='DS'), 0) AS bigint) AS office_ds_null_count, + CAST(COALESCE(SUM(ec.stub_count) FILTER (WHERE ec.entity_kind='office' AND ec.entity_type='DS'), 0) AS bigint) AS office_ds_stub_count, + CAST(COALESCE(SUM(ec.any_bad_key_fields_count) FILTER (WHERE ec.entity_kind='office' AND ec.entity_type='DS'), 0) AS bigint) AS office_ds_any_bad_key_fields_count, + CAST(COALESCE(SUM(ec.any_bad_count) FILTER (WHERE ec.entity_kind='office' AND ec.entity_type='DS'), 0) AS bigint) AS office_ds_any_bad_count, + CAST(COALESCE(SUM(ec.healthy_count) FILTER (WHERE ec.entity_kind='office' AND ec.entity_type='DS'), 0) AS bigint) AS office_ds_healthy_count, + CAST(COALESCE(SUM(ec.missing_city_count) FILTER (WHERE ec.entity_kind='office' AND ec.entity_type='DS'), 0) AS bigint) AS office_ds_missing_city_count, + CAST(COALESCE(SUM(ec.missing_province_count) FILTER (WHERE ec.entity_kind='office' AND ec.entity_type='DS'), 0) AS bigint) AS office_ds_missing_province_count, + CAST(COALESCE(SUM(ec.missing_country_count) FILTER (WHERE ec.entity_kind='office' AND ec.entity_type='DS'), 0) AS bigint) AS office_ds_missing_country_count, + CAST(COALESCE(SUM(ec.missing_postal_code_count) FILTER (WHERE ec.entity_kind='office' AND ec.entity_type='DS'), 0) AS bigint) AS office_ds_missing_postal_code_count, + CAST(COALESCE(SUM(ec.missing_addr_line_1_count) FILTER (WHERE ec.entity_kind='office' AND ec.entity_type='DS'), 0) AS bigint) AS office_ds_missing_addr1_count + + from entity_counts ec + group by ec.corp_num +), + +/* --------------------------------------- + JSON breakdowns (supported types only) + --------------------------------------- */ +party_by_type as ( + select + ec.corp_num, + jsonb_object_agg( + ec.entity_type, + jsonb_build_object( + 'addresses', ec.address_count, + 'null_count', ec.null_count, + 'stub_count', ec.stub_count, + 'any_bad_key_fields_count', ec.any_bad_key_fields_count, + 'any_bad_count', ec.any_bad_count, + 'healthy_count', ec.healthy_count, + 'missing_city', ec.missing_city_count, + 'missing_province', ec.missing_province_count, + 'missing_country', ec.missing_country_count, + 'missing_postal_code', ec.missing_postal_code_count, + 'missing_addr_line_1', ec.missing_addr_line_1_count, + 'missing_addr_line_2', ec.missing_addr_line_2_count, + 'missing_addr_line_3', ec.missing_addr_line_3_count + ) + ORDER BY ec.entity_type + ) as party_issue_breakdown + from entity_counts ec + where ec.entity_kind='party' + and ec.is_supported_party + group by ec.corp_num +), + +office_by_type as ( + select + ec.corp_num, + jsonb_object_agg( + ec.entity_type, + jsonb_build_object( + 'addresses', ec.address_count, + 'null_count', ec.null_count, + 'stub_count', ec.stub_count, + 'any_bad_key_fields_count', ec.any_bad_key_fields_count, + 'any_bad_count', ec.any_bad_count, + 'healthy_count', ec.healthy_count, + 'missing_city', ec.missing_city_count, + 'missing_province', ec.missing_province_count, + 'missing_country', ec.missing_country_count, + 'missing_postal_code', ec.missing_postal_code_count, + 'missing_addr_line_1', ec.missing_addr_line_1_count, + 'missing_addr_line_2', ec.missing_addr_line_2_count, + 'missing_addr_line_3', ec.missing_addr_line_3_count + ) + ORDER BY ec.entity_type + ) as office_issue_breakdown + from entity_counts ec + where ec.entity_kind='office' + and ec.is_supported_office + group by ec.corp_num +) + +-- ========== FINAL SELECT ========== +select + a.corp_num, + + -- JSON first (combined, then each kind) + jsonb_build_object( + 'party', coalesce(p.party_issue_breakdown, '{}'::jsonb), + 'office', coalesce(o.office_issue_breakdown, '{}'::jsonb) + ) as address_issue_breakdown, + p.party_issue_breakdown, + o.office_issue_breakdown, + + -- Address ALL (party + office; supported) + a.address_all_null_count, + a.address_all_stub_count, + a.address_all_any_bad_key_fields_count, + a.address_all_any_bad_count, + a.address_all_healthy_count, + a.address_all_missing_city_count, + a.address_all_missing_province_count, + a.address_all_missing_country_count, + a.address_all_missing_postal_code_count, + a.address_all_missing_addr1_count, + + -- Party ALL (supported) + a.party_all_null_count, + a.party_all_stub_count, + a.party_all_any_bad_key_fields_count, + a.party_any_bad_count, + a.party_all_any_bad_count, + a.party_all_healthy_count, + a.party_all_missing_city_count, + a.party_all_missing_province_count, + a.party_all_missing_country_count, + a.party_all_missing_postal_code_count, + a.party_all_missing_addr1_count, + + -- Office ALL (supported) + a.office_all_null_count, + a.office_all_stub_count, + a.office_all_any_bad_key_fields_count, + a.office_any_bad_count, + a.office_all_any_bad_count, + a.office_all_healthy_count, + a.office_all_missing_city_count, + a.office_all_missing_province_count, + a.office_all_missing_country_count, + a.office_all_missing_postal_code_count, + a.office_all_missing_addr1_count, + + -- Non‑base parties (NOT in DIR/OFF; all party codes) + a.non_base_parties_null_count, + a.non_base_parties_stub_count, + a.non_base_parties_any_bad_key_fields_count, + a.non_base_parties_any_bad_count, + a.non_base_parties_healthy_count, + a.non_base_parties_missing_city_count, + a.non_base_parties_missing_province_count, + a.non_base_parties_missing_country_count, + a.non_base_parties_missing_postal_code_count, + a.non_base_parties_missing_addr1_count, + + -- Non‑base offices (supported but not RG/RC → LQ, DS) + a.non_base_offices_null_count, + a.non_base_offices_stub_count, + a.non_base_offices_any_bad_key_fields_count, + a.non_base_offices_any_bad_count, + a.non_base_offices_healthy_count, + a.non_base_offices_missing_city_count, + a.non_base_offices_missing_province_count, + a.non_base_offices_missing_country_count, + a.non_base_offices_missing_postal_code_count, + a.non_base_offices_missing_addr1_count, + + -- Parties per type (supported) + a.dir_null_count, a.dir_stub_count, a.dir_any_bad_key_fields_count, a.dir_any_bad_count, a.dir_healthy_count, + a.dir_missing_city_count, a.dir_missing_province_count, a.dir_missing_country_count, a.dir_missing_postal_code_count, a.dir_missing_addr1_count, + + a.officer_null_count, a.officer_stub_count, a.officer_any_bad_key_fields_count, a.officer_any_bad_count, a.officer_healthy_count, + a.officer_missing_city_count, a.officer_missing_province_count, a.officer_missing_country_count, a.officer_missing_postal_code_count, a.officer_missing_addr1_count, + + a.liq_null_count, a.liq_stub_count, a.liq_any_bad_key_fields_count, a.liq_any_bad_count, a.liq_healthy_count, + a.liq_missing_city_count, a.liq_missing_province_count, a.liq_missing_country_count, a.liq_missing_postal_code_count, a.liq_missing_addr1_count, + + a.rcc_null_count, a.rcc_stub_count, a.rcc_any_bad_key_fields_count, a.rcc_any_bad_count, a.rcc_healthy_count, + a.rcc_missing_city_count, a.rcc_missing_province_count, a.rcc_missing_country_count, a.rcc_missing_postal_code_count, a.rcc_missing_addr1_count, + + a.rcm_null_count, a.rcm_stub_count, a.rcm_any_bad_key_fields_count, a.rcm_any_bad_count, a.rcm_healthy_count, + a.rcm_missing_city_count, a.rcm_missing_province_count, a.rcm_missing_country_count, a.rcm_missing_postal_code_count, a.rcm_missing_addr1_count, + + -- Offices per type (supported) + a.office_rg_null_count, a.office_rg_stub_count, a.office_rg_any_bad_key_fields_count, a.office_rg_any_bad_count, a.office_rg_healthy_count, + a.office_rg_missing_city_count, a.office_rg_missing_province_count, a.office_rg_missing_country_count, a.office_rg_missing_postal_code_count, a.office_rg_missing_addr1_count, + + a.office_rc_null_count, a.office_rc_stub_count, a.office_rc_any_bad_key_fields_count, a.office_rc_any_bad_count, a.office_rc_healthy_count, + a.office_rc_missing_city_count, a.office_rc_missing_province_count, a.office_rc_missing_country_count, a.office_rc_missing_postal_code_count, a.office_rc_missing_addr1_count, + + a.office_lq_null_count, a.office_lq_stub_count, a.office_lq_any_bad_key_fields_count, a.office_lq_any_bad_count, a.office_lq_healthy_count, + a.office_lq_missing_city_count, a.office_lq_missing_province_count, a.office_lq_missing_country_count, a.office_lq_missing_postal_code_count, a.office_lq_missing_addr1_count, + + a.office_ds_null_count, a.office_ds_stub_count, a.office_ds_any_bad_key_fields_count, a.office_ds_any_bad_count, a.office_ds_healthy_count, + a.office_ds_missing_city_count, a.office_ds_missing_province_count, a.office_ds_missing_country_count, a.office_ds_missing_postal_code_count, a.office_ds_missing_addr1_count + +from agg a +left join party_by_type p on p.corp_num = a.corp_num +left join office_by_type o on o.corp_num = a.corp_num +WITH NO DATA +; + +create unique index on mv_addr_quality_by_corp (corp_num); + +ALTER MATERIALIZED VIEW mv_addr_quality_by_corp + owner to postgres; + + +CREATE MATERIALIZED VIEW mv_addr_quality_screening_by_corp AS +WITH +-- IMPORTANT: keep these supported type lists in sync with mv_addr_quality_by_corp above. +params AS ( + SELECT + array['DIR','OFF','LIQ','RCC','RCM']::text[] AS party_types, + array['RG','RC','LQ','DS']::text[] AS office_types +) +SELECT + ec.corp_num, + CAST(COALESCE(SUM(ec.any_bad_count) FILTER ( + WHERE ((ec.entity_kind='party' AND ec.entity_type = ANY(p.party_types)) + OR (ec.entity_kind='office' AND ec.entity_type = ANY(p.office_types))) + ), 0) AS bigint) AS address_all_any_bad_count, + CAST(COALESCE(SUM(ec.any_bad_count) FILTER ( + WHERE ec.entity_kind='office' + AND ec.entity_type = ANY(p.office_types) + ), 0) AS bigint) AS office_all_any_bad_count, + CAST(COALESCE(SUM(ec.any_bad_count) FILTER ( + WHERE ec.entity_kind='party' + AND ec.entity_type = ANY(p.party_types) + ), 0) AS bigint) AS party_all_any_bad_count +FROM mv_addr_issue_counts_by_entity ec +CROSS JOIN params p +GROUP BY ec.corp_num +WITH NO DATA +; + +CREATE UNIQUE INDEX IF NOT EXISTS ux_mv_addr_quality_screening_by_corp_corpnum + ON mv_addr_quality_screening_by_corp (corp_num); + +ALTER MATERIALIZED VIEW mv_addr_quality_screening_by_corp + owner to postgres; + +/* --------------------------------------------------------------------- + Share class eligibility issue flags (1 row per corp_num) + + - Uses CURRENT share structure only: share_struct.end_event_id IS NULL + - Decimal-place checks use scale(par_value_amt) and therefore count + trailing zeros exactly as persisted. + --------------------------------------------------------------------- */ +CREATE MATERIALIZED VIEW mv_share_class_issue_flags AS +SELECT + ss.corp_num, + + /* 1) Currency type = OTHER (currency_typ_cd='OTH' OR other_currency populated) */ + bool_or( + (ssc.currency_typ_cd IS NOT NULL AND upper(btrim(ssc.currency_typ_cd)) = 'OTH') + OR (ssc.other_currency IS NOT NULL AND NOT is_blank(ssc.other_currency)) + ) AS has_other_currency, + + /* 2) Par value < 1 and more than 6 decimal places */ + bool_or( + ssc.par_value_ind IS TRUE + AND ssc.par_value_amt IS NOT NULL + AND ssc.par_value_amt < 1 + AND significant_decimal_places(ssc.par_value_amt) > 6 + ) AS has_par_value_lt1_gt6dp, + + /* 3) Par value > 1 and more than 2 decimal places */ + bool_or( + ssc.par_value_ind IS TRUE + AND ssc.par_value_amt IS NOT NULL + AND ssc.par_value_amt > 1 + AND significant_decimal_places(ssc.par_value_amt) > 2 + ) AS has_par_value_gt1_gt2dp + +FROM share_struct ss +JOIN share_struct_cls ssc + ON ssc.corp_num = ss.corp_num + AND ssc.start_event_id = ss.start_event_id +WHERE ss.end_event_id IS NULL +GROUP BY ss.corp_num +WITH NO DATA +; + +CREATE UNIQUE INDEX IF NOT EXISTS ux_mv_share_class_issue_flags_corpnum + ON mv_share_class_issue_flags (corp_num); + +ALTER MATERIALIZED VIEW mv_share_class_issue_flags + owner to postgres; + + +CREATE MATERIALIZED VIEW mv_corp_event_filing_rollup AS +WITH allowed_event_file_codes(code) AS ( + VALUES + ('FILE_ICORP'), + ('FILE_ICORU'), + ('FILE_ICORC'), + ('FILE_ANNBC'), + ('FILE_AM_AR'), + ('FILE_NOCAD'), + ('FILE_APTRA'), + ('FILE_NOERA'), + ('FILE_AM_DO'), + ('FILE_AM_RR'), + ('FILE_NOCDR'), + ('FILE_AM_DI'), + ('FILE_NOALA'), + ('FILE_NOALB'), + ('FILE_NOALU'), + ('FILE_NOALC'), + ('FILE_AM_BC'), + ('FILE_AM_LI'), + ('FILE_AM_RM'), + ('FILE_AM_SS'), + ('ADCORP_NULL'), + -- Skipped events/filing types + ('FILE_COGS1'), + ('FILE_CHGJU'), + ('FILE_NWPTA'), + ('FILE_PARES'), + ('FILE_TILAT'), + ('FILE_TILHO'), + ('FILE_TILMA'), + ('SYST_CANPS'), + ('SYST_CHGJU'), + ('SYST_CHGPN'), + ('SYST_CO_PN'), + ('SYST_LNKPS'), + ('SYST_NWPTA'), + ('SYST_PARES'), + ('SYST_RIPFL'), + ('SYST_TILAT'), + ('SYST_TILHO'), + ('SYST_NULL'), + ('SYSD1_NULL'), + ('SYSD2_NULL'), + ('SYST1_NULL'), + ('SYST2_NULL'), + ('TRESP_NULL'), + ('TRESP_COUTI') +), +event_rows AS ( + SELECT + e.corp_num, + e.event_id, + e.event_type_cd, + e.event_timerstamp, + e.event_type_cd || '_' || COALESCE(f.filing_type_cd, 'NULL') AS event_file_code + FROM event e + LEFT JOIN filing f + ON f.event_id = e.event_id +), +classified_event_rows AS ( + SELECT + er.corp_num, + er.event_id, + er.event_type_cd, + er.event_timerstamp, + er.event_file_code, + CASE + WHEN er.event_file_code IS NULL THEN false + ELSE ac.code IS NULL + END AS is_disallowed + FROM event_rows er + LEFT JOIN allowed_event_file_codes ac + ON ac.code = er.event_file_code +) +SELECT + cer.corp_num, + COUNT(*) AS event_count, + COUNT(*) FILTER ( + WHERE cer.event_type_cd = 'FILE' + AND cer.event_timerstamp >= CURRENT_DATE - INTERVAL '2 years' + ) AS file_event_count_last_2yrs, + MAX(CASE WHEN cer.event_type_cd = 'FILE' THEN cer.event_timerstamp END) AS last_file_event_ts, + MAX(cer.event_timerstamp) AS last_event_ts, + STRING_AGG(cer.event_file_code, ',' ORDER BY cer.event_id) AS event_file_types, + COALESCE(BOOL_OR(cer.is_disallowed), false) AS has_disallowed_event, + STRING_AGG(DISTINCT cer.event_file_code, ',' ORDER BY cer.event_file_code) + FILTER (WHERE cer.is_disallowed) AS failed_events +FROM classified_event_rows cer +GROUP BY cer.corp_num +WITH NO DATA +; + +CREATE UNIQUE INDEX IF NOT EXISTS ux_mv_corp_event_filing_rollup_corpnum + ON mv_corp_event_filing_rollup (corp_num); + +ALTER MATERIALIZED VIEW mv_corp_event_filing_rollup + owner to postgres; + + +CREATE MATERIALIZED VIEW mv_legacy_corps_data AS +WITH cp_completed AS ( + /* One completed migration row per corp in prod */ + SELECT DISTINCT ON (cp.corp_num) + cp.corp_num, + cp.mig_batch_id + FROM corp_processing cp + WHERE cp.environment = 'prod' + AND cp.processed_status = 'COMPLETED' + ORDER BY + cp.corp_num, + (cp.mig_batch_id IS NULL), + cp.last_modified DESC NULLS LAST, + cp.id DESC +), +mcb_prod AS ( + /* One queued migration batch per corp for prod */ + SELECT DISTINCT ON (mcb.corp_num) + mcb.corp_num, + mcb.mig_batch_id + FROM mig_corp_batch mcb + JOIN mig_batch mb + ON mb.id = mcb.mig_batch_id + LEFT JOIN mig_group mg + ON mg.id = mb.mig_group_id + WHERE mb.target_environment = 'prod' + AND (mg.target_environment = 'prod' OR mg.id IS NULL) + ORDER BY + mcb.corp_num, + mb.migrated_date DESC NULLS LAST, + mb.requested_date DESC NULLS LAST, + mb.created_date DESC NULLS LAST, + mb.id DESC, + mcb.id DESC +), +mig_status AS ( + /* Precedence: Y (completed) over QUEUED */ + SELECT DISTINCT ON (corp_num) + corp_num, + migrated, + mig_batch_id + FROM ( + SELECT corp_num, 'Y'::text AS migrated, mig_batch_id, 1 AS precedence FROM cp_completed + UNION ALL + SELECT corp_num, 'QUEUED'::text AS migrated, mig_batch_id, 2 AS precedence FROM mcb_prod + ) u + ORDER BY corp_num, precedence +) +SELECT COALESCE(edg.group_name, NULL) AS group_name, + tblFe.email_domain, + admin_email, + CAST(email_used_count as integer) as email_used_count, + CAST(EXISTS ( + SELECT 1 + FROM bad_emails be + WHERE btrim(tblFe.admin_email) <> '' + AND lower(btrim(be.email)) = lower(btrim(tblFe.admin_email)) + ) AS boolean) AS is_bad_email, + corp_num, + corp_name, + corp_type_cd, + CAST(is_frozen as boolean) as is_frozen, + CAST(is_active as boolean) as is_active, + CAST(in_dissolution as boolean) as in_dissolution, + CAST(meets_main_criteria as boolean) as meets_main_criteria, + CAST(recognition_dts as date) as recognition_dts, + CAST(last_ar_filed_dt as date) as last_ar_filed_dt, + CAST(director_count as integer) as director_count, + CAST(directors_within_bc as boolean) as directors_within_bc, + CAST(directors_within_ca as boolean) as directors_within_ca, + CAST(filing_cnt as integer) as filing_cnt, + CAST(months_since_last_ar_filing as integer) as months_since_last_ar_filing, + CAST(ar_unfiled_over_1yr as boolean) as ar_unfiled_over_1yr, + CAST(file_cnt_last_2yrs as integer) as file_cnt_last_2yrs, + CAST(last_file_event_ts as timestamp) as last_file_event_ts, + CAST(last_event_ts as timestamp) as last_event_ts, + event_file_types, + failed_events, + CAST(has_officers as boolean) as has_officers, + CAST(has_3rd_party as boolean) as has_3rd_party, + vendor, + -- NOTE: `group_name` comes from email_domain_groups; migration tracking fields are prefixed with `mig_` to avoid confusion. + mg.id as mig_group_id, + mg.name as mig_group_name, + mg.display_name as mig_group_display_name, + ms.mig_batch_id as mig_batch_id, + mb.name as mig_batch_name, + mb.display_name as mig_batch_display_name, + mb.migrated_date as mig_date, + COALESCE(ms.migrated, 'N') AS migrated, + CAST(EXISTS ( + SELECT 1 + FROM exclude_corps ec + WHERE ec.corp_num = tblFe.corp_num + ) AS boolean) AS is_migration_excluded, + CAST(has_password as boolean) as has_password, + CAST(COALESCE(sif.has_other_currency, false) as boolean) as has_other_share_currency, + CAST(COALESCE(sif.has_par_value_lt1_gt6dp, false) as boolean) as has_share_par_value_lt1_gt6dp, + CAST(COALESCE(sif.has_par_value_gt1_gt2dp, false) as boolean) as has_share_par_value_gt1_gt2dp, + CAST( + NOT ( + COALESCE(sif.has_other_currency, false) + OR COALESCE(sif.has_par_value_lt1_gt6dp, false) + OR COALESCE(sif.has_par_value_gt1_gt2dp, false) + ) + as boolean) as meets_share_criteria, + CAST(send_ar_ind as boolean) as send_ar_ind, + address_all_any_bad_count, + office_all_any_bad_count, + party_all_any_bad_count, + CAST(has_bar_filing as boolean) as has_bar_filing, + CAST(last_bar_fiscal_year as integer) as last_bar_fiscal_year, + CAST(last_bar_filing_date as timestamp) as last_bar_filing_date, + CAST(months_since_last_bar_filing as integer) as months_since_last_bar_filing, + bar_keycloak_guid, + bar_idp_userid, + bar_account_id, + CAST(bar_account_has_mailing_address as boolean) as bar_account_has_mailing_address, + ting_corps +FROM ( + SELECT DISTINCT + LOWER(SPLIT_PART(c.admin_email, '@', 2)) as email_domain, + LOWER(c.admin_email) as admin_email, + aec.email_count AS email_used_count, + ev.corp_num, + cn.corp_name, + c.corp_type_cd, + CASE WHEN c.corp_frozen_type_cd is null THEN 'N' ELSE 'Y' END AS is_frozen, + cs.is_active, + cs.in_dissolution, + CASE WHEN COALESCE(ev.has_disallowed_event, false) THEN 'N' ELSE 'Y' END AS meets_main_criteria, + c.recognition_dts::date, + c.last_ar_filed_dt::date, + CASE + WHEN last_ar_filed_dt IS NOT NULL THEN + EXTRACT(MONTH FROM AGE(CURRENT_DATE::TIMESTAMP, last_ar_filed_dt::TIMESTAMP)) + + EXTRACT(YEAR FROM AGE(CURRENT_DATE::TIMESTAMP, last_ar_filed_dt::TIMESTAMP)) * 12 + ELSE + EXTRACT(MONTH FROM AGE(CURRENT_DATE::TIMESTAMP, recognition_dts::TIMESTAMP)) + + EXTRACT(YEAR FROM AGE(CURRENT_DATE::TIMESTAMP, recognition_dts::TIMESTAMP)) * 12 + END as "months_since_last_ar_filing", + CASE + WHEN c.last_ar_filed_dt IS NULL AND c.recognition_dts < CURRENT_DATE - INTERVAL '1 year' THEN 'Y' + WHEN c.last_ar_filed_dt IS NOT NULL AND c.last_ar_filed_dt < CURRENT_DATE - INTERVAL '1 year' THEN 'Y' + ELSE 'N' + END AS ar_unfiled_over_1yr, + ev.event_count AS filing_cnt, + ev.file_event_count_last_2yrs AS file_cnt_last_2yrs, + ev.last_file_event_ts, + ev.last_event_ts, + ev.event_file_types, + ev.failed_events, + COALESCE(cprt_dir.director_count, 0) AS director_count, + CASE + WHEN COALESCE(dir_mail_loc.dir_cnt, 0) > 0 + AND COALESCE(dir_mail_loc.dir_bc_mailing_cnt, 0) = dir_mail_loc.dir_cnt + THEN 'Y' + ELSE 'N' + END AS directors_within_bc, + CASE + WHEN COALESCE(dir_mail_loc.dir_cnt, 0) > 0 + AND COALESCE(dir_mail_loc.dir_ca_mailing_cnt, 0) = dir_mail_loc.dir_cnt + THEN 'Y' + ELSE 'N' + END AS directors_within_ca, + CASE WHEN cow.corp_num IS NOT NULL THEN 'Y' ELSE 'N' END AS has_officers, + CASE WHEN cowtp.corp_num IS NOT NULL THEN 'Y' ELSE 'N' END AS has_3rd_party, + CASE WHEN cowtp.corp_num IS NOT NULL THEN cowtp.vendor ELSE '' END AS vendor, + CASE WHEN c.corp_password IS NOT NULL THEN 'Y' ELSE 'N' END AS has_password, + c.send_ar_ind, + mvaqs.address_all_any_bad_count, + mvaqs.office_all_any_bad_count, + mvaqs.party_all_any_bad_count, + CASE WHEN bc.identifier IS NOT NULL THEN 'Y' ELSE 'N' END AS has_bar_filing, + bc.latest_fiscal_year AS last_bar_fiscal_year, + bc.last_ar_filing_date AS last_bar_filing_date, + CASE + WHEN bc.last_ar_filing_date IS NOT NULL THEN + EXTRACT(MONTH FROM AGE(CURRENT_DATE::TIMESTAMP, bc.last_ar_filing_date::TIMESTAMP)) + + EXTRACT(YEAR FROM AGE(CURRENT_DATE::TIMESTAMP, bc.last_ar_filing_date::TIMESTAMP)) * 12 + ELSE NULL + END AS months_since_last_bar_filing, + bc.sub as bar_keycloak_guid, + bc.idp_userid as bar_idp_userid, + bc.payment_account as bar_account_id, + bc.bar_account_has_mailing_address as bar_account_has_mailing_address, + ciaml.ting_corps as ting_corps + FROM mv_corp_event_filing_rollup ev + JOIN corporation c ON c.corp_num = ev.corp_num + INNER JOIN ( + SELECT + corp_num, + COALESCE(BOOL_OR(COALESCE(op_state_type_cd = 'ACT', false)), false) AS is_active, + COALESCE( + BOOL_OR( + COALESCE( + op_state_type_cd = 'ACT' + AND state_type_cd IN ('D1A', 'D1F', 'D1T', 'D2A', 'D2F', 'D2T'), + false + ) + ), + false + ) AS in_dissolution + FROM corp_state + WHERE end_event_id IS NULL + GROUP BY corp_num + ) cs ON cs.corp_num = c.corp_num + INNER JOIN corp_name cn + ON cn.corp_num = c.corp_num + AND cn.end_event_id IS NULL + AND cn.corp_name_typ_cd IN ('CO', 'NB') + LEFT OUTER JOIN mv_corps_with_officers cow ON c.corp_num = cow.corp_num + LEFT OUTER JOIN corps_with_third_party cowtp ON c.corp_num = cowtp.corp_num + LEFT OUTER JOIN ( + SELECT corp_num, MAX(party_typ_count) AS director_count + FROM mv_corps_party_role_count + WHERE party_typ_cd = 'DIR' + GROUP BY corp_num + ) cprt_dir ON cprt_dir.corp_num = c.corp_num + LEFT OUTER JOIN ( + SELECT + cp.corp_num, + COUNT(*) AS dir_cnt, + COUNT(*) Filter ( + WHERE cp.mailing_addr_id IS NOT NULL + AND cp.mailing_addr_id <> 1 + AND UPPER(TRIM(ma.province)) = 'BC' + ) AS dir_bc_mailing_cnt, + COUNT(*) Filter ( + WHERE cp.mailing_addr_id IS NOT NULL + AND cp.mailing_addr_id <> 1 + AND UPPER(TRIM(ma.country_typ_cd)) = 'CA' + ) AS dir_ca_mailing_cnt + FROM corp_party cp + LEFT JOIN address ma ON ma.addr_id = cp.mailing_addr_id + WHERE cp.end_event_id is NULL + AND cp.party_typ_cd = 'DIR' + GROUP BY cp.corp_num + ) dir_mail_loc ON dir_mail_loc.corp_num = c.corp_num + LEFT OUTER JOIN mv_admin_email_count aec ON c.admin_email = aec.admin_email + LEFT OUTER JOIN mv_addr_quality_screening_by_corp mvaqs ON c.corp_num = mvaqs.corp_num + LEFT OUTER JOIN bar_corps bc ON c.corp_num = bc.identifier + LEFT OUTER JOIN ( + SELECT ted_corp_num, array_agg(ting_corp_num) AS ting_corps + FROM corp_involved_amalgamating + GROUP BY ted_corp_num + ) ciaml ON c.corp_num = ciaml.ted_corp_num + WHERE 1 = 1 +) tblFe +LEFT JOIN mig_status ms USING (corp_num) +LEFT JOIN mig_batch mb ON mb.id = ms.mig_batch_id +LEFT JOIN mig_group mg ON mg.id = mb.mig_group_id +LEFT JOIN email_domain_groups edg ON tblFe.email_domain = edg.email_domain +LEFT JOIN mv_share_class_issue_flags sif USING (corp_num) +where 1=1 +WITH NO DATA +; + +ALTER MATERIALIZED VIEW mv_legacy_corps_data + owner to postgres; + +CREATE OR REPLACE VIEW v_business_state AS +SELECT cs.corp_num, + CASE cs.op_state_type_cd + WHEN 'ACT' THEN 'ACTIVE' + WHEN 'HIS' THEN 'HISTORICAL' + ELSE cs.op_state_type_cd + END AS business_state +FROM corp_state cs +WHERE cs.end_event_id IS NULL; + +ALTER VIEW v_business_state + owner to postgres; + +CREATE OR REPLACE VIEW v_auth_component_operation_audit AS +SELECT + aco.id AS component_operation_id, + aco.auth_processing_id, + aco.corp_num, + aco.environment, + aco.flow_name, + aco.flow_run_id, + c.corp_type_cd, + c.recognition_dts, + ap.processed_status AS auth_processing_status, + ap.operation AS processing_operation, + ap.operation_scope AS processing_operation_scope, + ap.operation_target AS processing_operation_target, + ap.repeatability, + ap.attempt_key, + ap.dry_run AS processing_dry_run, + ap.attempt_key_context AS processing_attempt_key_context, + ap.mig_batch_id, + ap.claimed_at, + ap.create_date AS processing_create_date, + ap.last_modified AS processing_last_modified, + ap.last_error AS processing_last_error, + ap.entity_action, + ap.contact_action, + ap.affiliation_action, + ap.invite_action, + ap.action_detail AS processing_action_detail, + aco.operation AS component_operation, + aco.operation_scope AS component_operation_scope, + aco.component, + aco.target_type, + aco.target_value, + aco.action, + aco.status_code, + aco.error AS component_error, + aco.detail AS component_detail, + aco.dry_run AS component_dry_run, + aco.create_date AS component_create_date, + aco.corp_num = ap.corp_num AS corp_num_matches_parent, + aco.environment = ap.environment AS environment_matches_parent, + aco.flow_name = ap.flow_name AS flow_name_matches_parent, + aco.flow_run_id IS NOT DISTINCT FROM ap.flow_run_id AS flow_run_id_matches_parent +FROM auth_component_operation aco +JOIN auth_processing ap ON ap.id = aco.auth_processing_id +LEFT JOIN corporation c ON c.corp_num = aco.corp_num; + +ALTER VIEW v_auth_component_operation_audit + owner to postgres; + +CREATE MATERIALIZED VIEW mv_corp_issue_flags AS +SELECT + co.corp_num, + bs.business_state, + co.corp_type_cd, + + /* ---------- UNIVERSAL denominators (ALL links / ALL real) ---------- */ + ( + COALESCE(a.address_all_null_count,0) + + COALESCE(a.address_all_stub_count,0) + + COALESCE(a.address_all_any_bad_key_fields_count,0) + + COALESCE(a.address_all_healthy_count,0) + ) > 0 AS has_any_link, + ( + COALESCE(a.address_all_any_bad_key_fields_count,0) + + COALESCE(a.address_all_healthy_count,0) + ) > 0 AS has_any_real, + + /* ---------- PARTY denominators ---------- */ + ( + COALESCE(a.party_all_null_count,0) + + COALESCE(a.party_all_stub_count,0) + + COALESCE(a.party_all_any_bad_key_fields_count,0) + + COALESCE(a.party_all_healthy_count,0) + ) > 0 AS has_party_link, + ( + COALESCE(a.party_all_any_bad_key_fields_count,0) + + COALESCE(a.party_all_healthy_count,0) + ) > 0 AS has_party_real, + + /* ---------- OFFICE denominators ---------- */ + ( + COALESCE(a.office_all_null_count,0) + + COALESCE(a.office_all_stub_count,0) + + COALESCE(a.office_all_any_bad_key_fields_count,0) + + COALESCE(a.office_all_healthy_count,0) + ) > 0 AS has_office_link, + ( + COALESCE(a.office_all_any_bad_key_fields_count,0) + + COALESCE(a.office_all_healthy_count,0) + ) > 0 AS has_office_real, + + /* ---------- ENTITY‑TYPE denominators (party types) ---------- */ + (COALESCE(a.dir_null_count,0) + COALESCE(a.dir_stub_count,0) + + COALESCE(a.dir_any_bad_key_fields_count,0) + COALESCE(a.dir_healthy_count,0)) > 0 AS has_dir_link, + (COALESCE(a.dir_any_bad_key_fields_count,0) + COALESCE(a.dir_healthy_count,0)) > 0 AS has_dir_real, + + (COALESCE(a.officer_null_count,0) + COALESCE(a.officer_stub_count,0) + + COALESCE(a.officer_any_bad_key_fields_count,0) + COALESCE(a.officer_healthy_count,0)) > 0 AS has_officer_link, + (COALESCE(a.officer_any_bad_key_fields_count,0) + COALESCE(a.officer_healthy_count,0)) > 0 AS has_officer_real, + + (COALESCE(a.liq_null_count,0) + COALESCE(a.liq_stub_count,0) + + COALESCE(a.liq_any_bad_key_fields_count,0) + COALESCE(a.liq_healthy_count,0)) > 0 AS has_liq_link, + (COALESCE(a.liq_any_bad_key_fields_count,0) + COALESCE(a.liq_healthy_count,0)) > 0 AS has_liq_real, + + (COALESCE(a.rcc_null_count,0) + COALESCE(a.rcc_stub_count,0) + + COALESCE(a.rcc_any_bad_key_fields_count,0) + COALESCE(a.rcc_healthy_count,0)) > 0 AS has_rcc_link, + (COALESCE(a.rcc_any_bad_key_fields_count,0) + COALESCE(a.rcc_healthy_count,0)) > 0 AS has_rcc_real, + + (COALESCE(a.rcm_null_count,0) + COALESCE(a.rcm_stub_count,0) + + COALESCE(a.rcm_any_bad_key_fields_count,0) + COALESCE(a.rcm_healthy_count,0)) > 0 AS has_rcm_link, + (COALESCE(a.rcm_any_bad_key_fields_count,0) + COALESCE(a.rcm_healthy_count,0)) > 0 AS has_rcm_real, + + /* ---------- ENTITY‑TYPE denominators (office types) ---------- */ + (COALESCE(a.office_rg_null_count,0) + COALESCE(a.office_rg_stub_count,0) + + COALESCE(a.office_rg_any_bad_key_fields_count,0) + COALESCE(a.office_rg_healthy_count,0)) > 0 AS has_office_rg_link, + (COALESCE(a.office_rg_any_bad_key_fields_count,0) + COALESCE(a.office_rg_healthy_count,0)) > 0 AS has_office_rg_real, + + (COALESCE(a.office_rc_null_count,0) + COALESCE(a.office_rc_stub_count,0) + + COALESCE(a.office_rc_any_bad_key_fields_count,0) + COALESCE(a.office_rc_healthy_count,0)) > 0 AS has_office_rc_link, + (COALESCE(a.office_rc_any_bad_key_fields_count,0) + COALESCE(a.office_rc_healthy_count,0)) > 0 AS has_office_rc_real, + + (COALESCE(a.office_lq_null_count,0) + COALESCE(a.office_lq_stub_count,0) + + COALESCE(a.office_lq_any_bad_key_fields_count,0) + COALESCE(a.office_lq_healthy_count,0)) > 0 AS has_office_lq_link, + (COALESCE(a.office_lq_any_bad_key_fields_count,0) + COALESCE(a.office_lq_healthy_count,0)) > 0 AS has_office_lq_real, + + (COALESCE(a.office_ds_null_count,0) + COALESCE(a.office_ds_stub_count,0) + + COALESCE(a.office_ds_any_bad_key_fields_count,0) + COALESCE(a.office_ds_healthy_count,0)) > 0 AS has_office_ds_link, + (COALESCE(a.office_ds_any_bad_key_fields_count,0) + COALESCE(a.office_ds_healthy_count,0)) > 0 AS has_office_ds_real, + + /* ---------- Any‑bad / Healthy at BUSINESS level ---------- */ + COALESCE(a.address_all_any_bad_count,0) > 0 AS any_bad_business, + COALESCE(a.address_all_any_bad_count,0) = 0 AS healthy_business, + + /* ---------- ALL (party+office) issues as booleans ---------- */ + COALESCE(a.address_all_null_count,0) > 0 AS has_null, + COALESCE(a.address_all_stub_count,0) > 0 AS has_stub, + COALESCE(a.address_all_missing_city_count,0) > 0 AS has_missing_city, + COALESCE(a.address_all_missing_province_count,0)> 0 AS has_missing_province, + COALESCE(a.address_all_missing_country_count,0) > 0 AS has_missing_country, + COALESCE(a.address_all_missing_postal_code_count,0) > 0 AS has_missing_postal, + COALESCE(a.address_all_missing_addr1_count,0) > 0 AS has_missing_addr1, + + /* ---------- PARTY aggregate issues ---------- */ + COALESCE(a.party_all_null_count,0) > 0 AS party_has_null, + COALESCE(a.party_all_stub_count,0) > 0 AS party_has_stub, + COALESCE(a.party_all_any_bad_key_fields_count,0)> 0 AS party_has_key_missing, + COALESCE(a.party_all_missing_city_count,0) > 0 AS party_missing_city, + COALESCE(a.party_all_missing_province_count,0) > 0 AS party_missing_province, + COALESCE(a.party_all_missing_country_count,0) > 0 AS party_missing_country, + COALESCE(a.party_all_missing_postal_code_count,0)>0 AS party_missing_postal, + COALESCE(a.party_all_missing_addr1_count,0) > 0 AS party_missing_addr1, + + /* ---------- OFFICE aggregate issues ---------- */ + COALESCE(a.office_all_null_count,0) > 0 AS office_has_null, + COALESCE(a.office_all_stub_count,0) > 0 AS office_has_stub, + COALESCE(a.office_all_any_bad_key_fields_count,0)>0 AS office_has_key_missing, + COALESCE(a.office_all_missing_city_count,0) > 0 AS office_missing_city, + COALESCE(a.office_all_missing_province_count,0) > 0 AS office_missing_province, + COALESCE(a.office_all_missing_country_count,0) > 0 AS office_missing_country, + COALESCE(a.office_all_missing_postal_code_count,0)>0 AS office_missing_postal, + COALESCE(a.office_all_missing_addr1_count,0) > 0 AS office_missing_addr1, + + /* ---------- PARTY per‑type issues ---------- */ + COALESCE(a.dir_null_count,0) > 0 AS dir_null, + COALESCE(a.dir_stub_count,0) > 0 AS dir_stub, + COALESCE(a.dir_missing_city_count,0) > 0 AS dir_missing_city, + COALESCE(a.dir_missing_province_count,0) > 0 AS dir_missing_province, + COALESCE(a.dir_missing_country_count,0) > 0 AS dir_missing_country, + COALESCE(a.dir_missing_postal_code_count,0) > 0 AS dir_missing_postal, + COALESCE(a.dir_missing_addr1_count,0) > 0 AS dir_missing_addr1, + + COALESCE(a.officer_null_count,0) > 0 AS officer_null, + COALESCE(a.officer_stub_count,0) > 0 AS officer_stub, + COALESCE(a.officer_missing_city_count,0) > 0 AS officer_missing_city, + COALESCE(a.officer_missing_province_count,0) > 0 AS officer_missing_province, + COALESCE(a.officer_missing_country_count,0) > 0 AS officer_missing_country, + COALESCE(a.officer_missing_postal_code_count,0) > 0 AS officer_missing_postal, + COALESCE(a.officer_missing_addr1_count,0) > 0 AS officer_missing_addr1, + + COALESCE(a.liq_null_count,0) > 0 AS liq_null, + COALESCE(a.liq_stub_count,0) > 0 AS liq_stub, + COALESCE(a.liq_missing_city_count,0) > 0 AS liq_missing_city, + COALESCE(a.liq_missing_province_count,0) > 0 AS liq_missing_province, + COALESCE(a.liq_missing_country_count,0) > 0 AS liq_missing_country, + COALESCE(a.liq_missing_postal_code_count,0) > 0 AS liq_missing_postal, + COALESCE(a.liq_missing_addr1_count,0) > 0 AS liq_missing_addr1, + + COALESCE(a.rcc_null_count,0) > 0 AS rcc_null, + COALESCE(a.rcc_stub_count,0) > 0 AS rcc_stub, + COALESCE(a.rcc_missing_city_count,0) > 0 AS rcc_missing_city, + COALESCE(a.rcc_missing_province_count,0) > 0 AS rcc_missing_province, + COALESCE(a.rcc_missing_country_count,0) > 0 AS rcc_missing_country, + COALESCE(a.rcc_missing_postal_code_count,0) > 0 AS rcc_missing_postal, + COALESCE(a.rcc_missing_addr1_count,0) > 0 AS rcc_missing_addr1, + + COALESCE(a.rcm_null_count,0) > 0 AS rcm_null, + COALESCE(a.rcm_stub_count,0) > 0 AS rcm_stub, + COALESCE(a.rcm_missing_city_count,0) > 0 AS rcm_missing_city, + COALESCE(a.rcm_missing_province_count,0) > 0 AS rcm_missing_province, + COALESCE(a.rcm_missing_country_count,0) > 0 AS rcm_missing_country, + COALESCE(a.rcm_missing_postal_code_count,0) > 0 AS rcm_missing_postal, + COALESCE(a.rcm_missing_addr1_count,0) > 0 AS rcm_missing_addr1, + + /* ---------- OFFICE per‑type issues ---------- */ + COALESCE(a.office_rg_null_count,0) > 0 AS office_rg_null, + COALESCE(a.office_rg_stub_count,0) > 0 AS office_rg_stub, + COALESCE(a.office_rg_missing_city_count,0) > 0 AS office_rg_missing_city, + COALESCE(a.office_rg_missing_province_count,0) > 0 AS office_rg_missing_province, + COALESCE(a.office_rg_missing_country_count,0) > 0 AS office_rg_missing_country, + COALESCE(a.office_rg_missing_postal_code_count,0) > 0 AS office_rg_missing_postal, + COALESCE(a.office_rg_missing_addr1_count,0) > 0 AS office_rg_missing_addr1, + + COALESCE(a.office_rc_null_count,0) > 0 AS office_rc_null, + COALESCE(a.office_rc_stub_count,0) > 0 AS office_rc_stub, + COALESCE(a.office_rc_missing_city_count,0) > 0 AS office_rc_missing_city, + COALESCE(a.office_rc_missing_province_count,0) > 0 AS office_rc_missing_province, + COALESCE(a.office_rc_missing_country_count,0) > 0 AS office_rc_missing_country, + COALESCE(a.office_rc_missing_postal_code_count,0) > 0 AS office_rc_missing_postal, + COALESCE(a.office_rc_missing_addr1_count,0) > 0 AS office_rc_missing_addr1, + + COALESCE(a.office_lq_null_count,0) > 0 AS office_lq_null, + COALESCE(a.office_lq_stub_count,0) > 0 AS office_lq_stub, + COALESCE(a.office_lq_missing_city_count,0) > 0 AS office_lq_missing_city, + COALESCE(a.office_lq_missing_province_count,0) > 0 AS office_lq_missing_province, + COALESCE(a.office_lq_missing_country_count,0) > 0 AS office_lq_missing_country, + COALESCE(a.office_lq_missing_postal_code_count,0) > 0 AS office_lq_missing_postal, + COALESCE(a.office_lq_missing_addr1_count,0) > 0 AS office_lq_missing_addr1, + + COALESCE(a.office_ds_null_count,0) > 0 AS office_ds_null, + COALESCE(a.office_ds_stub_count,0) > 0 AS office_ds_stub, + COALESCE(a.office_ds_missing_city_count,0) > 0 AS office_ds_missing_city, + COALESCE(a.office_ds_missing_province_count,0) > 0 AS office_ds_missing_province, + COALESCE(a.office_ds_missing_country_count,0) > 0 AS office_ds_missing_country, + COALESCE(a.office_ds_missing_postal_code_count,0) > 0 AS office_ds_missing_postal, + COALESCE(a.office_ds_missing_addr1_count,0) > 0 AS office_ds_missing_addr1 + +FROM corporation co +LEFT JOIN v_business_state bs ON bs.corp_num = co.corp_num +LEFT JOIN mv_addr_quality_by_corp a ON a.corp_num = co.corp_num +WITH NO DATA +; + +-- Indexes for the MV (fast filtering + group‑bys) +CREATE UNIQUE INDEX IF NOT EXISTS ux_mv_corp_issue_flags_corp + ON mv_corp_issue_flags (corp_num); +CREATE INDEX IF NOT EXISTS ix_mv_corp_issue_flags_state_type + ON mv_corp_issue_flags (business_state, corp_type_cd); + +ALTER MATERIALIZED VIEW mv_corp_issue_flags + owner to postgres; + +CREATE OR REPLACE VIEW v_corp_issue_flags_long AS +WITH base AS ( + SELECT f.*, lcd.group_name, lcd.recognition_dts + FROM mv_corp_issue_flags f + LEFT JOIN mv_legacy_corps_data lcd USING (corp_num) +) +/* ----- ALL (party+office combined) ----- */ +SELECT corp_num, business_state, corp_type_cd, group_name, recognition_dts, + 'ALL'::text AS entity_kind, NULL::text AS entity_type, + 'Null'::text AS issue, has_null AS has_issue, + has_any_link AS denom_link, has_any_real AS denom_real +FROM base +UNION ALL SELECT corp_num, business_state, corp_type_cd, group_name, recognition_dts, 'ALL', NULL, 'Stub', has_stub, has_any_link, has_any_real FROM base +UNION ALL SELECT corp_num, business_state, corp_type_cd, group_name, recognition_dts, 'ALL', NULL, 'Missing city', has_missing_city, has_any_link, has_any_real FROM base +UNION ALL SELECT corp_num, business_state, corp_type_cd, group_name, recognition_dts, 'ALL', NULL, 'Missing province',has_missing_province,has_any_link, has_any_real FROM base +UNION ALL SELECT corp_num, business_state, corp_type_cd, group_name, recognition_dts, 'ALL', NULL, 'Missing country', has_missing_country, has_any_link, has_any_real FROM base +UNION ALL SELECT corp_num, business_state, corp_type_cd, group_name, recognition_dts, 'ALL', NULL, 'Missing postal', has_missing_postal, has_any_link, has_any_real FROM base +UNION ALL SELECT corp_num, business_state, corp_type_cd, group_name, recognition_dts, 'ALL', NULL, 'Missing addr1', has_missing_addr1, has_any_link, has_any_real FROM base + +/* ----- PARTY (aggregate) ----- */ +UNION ALL SELECT corp_num, business_state, corp_type_cd, group_name, recognition_dts, 'party', NULL, 'Null', party_has_null, has_party_link, has_party_real FROM base +UNION ALL SELECT corp_num, business_state, corp_type_cd, group_name, recognition_dts, 'party', NULL, 'Stub', party_has_stub, has_party_link, has_party_real FROM base +UNION ALL SELECT corp_num, business_state, corp_type_cd, group_name, recognition_dts, 'party', NULL, 'Missing city', party_missing_city, has_party_link, has_party_real FROM base +UNION ALL SELECT corp_num, business_state, corp_type_cd, group_name, recognition_dts, 'party', NULL, 'Missing province', party_missing_province, has_party_link, has_party_real FROM base +UNION ALL SELECT corp_num, business_state, corp_type_cd, group_name, recognition_dts, 'party', NULL, 'Missing country', party_missing_country, has_party_link, has_party_real FROM base +UNION ALL SELECT corp_num, business_state, corp_type_cd, group_name, recognition_dts, 'party', NULL, 'Missing postal', party_missing_postal, has_party_link, has_party_real FROM base +UNION ALL SELECT corp_num, business_state, corp_type_cd, group_name, recognition_dts, 'party', NULL, 'Missing addr1', party_missing_addr1, has_party_link, has_party_real FROM base + +/* ----- OFFICE (aggregate) ----- */ +UNION ALL SELECT corp_num, business_state, corp_type_cd, group_name, recognition_dts, 'office', NULL, 'Null', office_has_null, has_office_link, has_office_real FROM base +UNION ALL SELECT corp_num, business_state, corp_type_cd, group_name, recognition_dts, 'office', NULL, 'Stub', office_has_stub, has_office_link, has_office_real FROM base +UNION ALL SELECT corp_num, business_state, corp_type_cd, group_name, recognition_dts, 'office', NULL, 'Missing city', office_missing_city, has_office_link, has_office_real FROM base +UNION ALL SELECT corp_num, business_state, corp_type_cd, group_name, recognition_dts, 'office', NULL, 'Missing province', office_missing_province, has_office_link, has_office_real FROM base +UNION ALL SELECT corp_num, business_state, corp_type_cd, group_name, recognition_dts, 'office', NULL, 'Missing country', office_missing_country, has_office_link, has_office_real FROM base +UNION ALL SELECT corp_num, business_state, corp_type_cd, group_name, recognition_dts, 'office', NULL, 'Missing postal', office_missing_postal, has_office_link, has_office_real FROM base +UNION ALL SELECT corp_num, business_state, corp_type_cd, group_name, recognition_dts, 'office', NULL, 'Missing addr1', office_missing_addr1, has_office_link, has_office_real FROM base + +/* ----- PARTY per entity_type ----- */ +UNION ALL SELECT corp_num, business_state, corp_type_cd, group_name, recognition_dts, 'party', 'DIR','Null', dir_null, has_dir_link, has_dir_real FROM base +UNION ALL SELECT corp_num, business_state, corp_type_cd, group_name, recognition_dts, 'party', 'DIR','Stub', dir_stub, has_dir_link, has_dir_real FROM base +UNION ALL SELECT corp_num, business_state, corp_type_cd, group_name, recognition_dts, 'party', 'DIR','Missing city', dir_missing_city, has_dir_link, has_dir_real FROM base +UNION ALL SELECT corp_num, business_state, corp_type_cd, group_name, recognition_dts, 'party', 'DIR','Missing province', dir_missing_province, has_dir_link, has_dir_real FROM base +UNION ALL SELECT corp_num, business_state, corp_type_cd, group_name, recognition_dts, 'party', 'DIR','Missing country', dir_missing_country, has_dir_link, has_dir_real FROM base +UNION ALL SELECT corp_num, business_state, corp_type_cd, group_name, recognition_dts, 'party', 'DIR','Missing postal', dir_missing_postal, has_dir_link, has_dir_real FROM base +UNION ALL SELECT corp_num, business_state, corp_type_cd, group_name, recognition_dts, 'party', 'DIR','Missing addr1', dir_missing_addr1, has_dir_link, has_dir_real FROM base + +UNION ALL SELECT corp_num, business_state, corp_type_cd, group_name, recognition_dts, 'party', 'OFF','Null', officer_null, has_officer_link, has_officer_real FROM base +UNION ALL SELECT corp_num, business_state, corp_type_cd, group_name, recognition_dts, 'party', 'OFF','Stub', officer_stub, has_officer_link, has_officer_real FROM base +UNION ALL SELECT corp_num, business_state, corp_type_cd, group_name, recognition_dts, 'party', 'OFF','Missing city', officer_missing_city, has_officer_link, has_officer_real FROM base +UNION ALL SELECT corp_num, business_state, corp_type_cd, group_name, recognition_dts, 'party', 'OFF','Missing province', officer_missing_province, has_officer_link, has_officer_real FROM base +UNION ALL SELECT corp_num, business_state, corp_type_cd, group_name, recognition_dts, 'party', 'OFF','Missing country', officer_missing_country, has_officer_link, has_officer_real FROM base +UNION ALL SELECT corp_num, business_state, corp_type_cd, group_name, recognition_dts, 'party', 'OFF','Missing postal', officer_missing_postal, has_officer_link, has_officer_real FROM base +UNION ALL SELECT corp_num, business_state, corp_type_cd, group_name, recognition_dts, 'party', 'OFF','Missing addr1', officer_missing_addr1, has_officer_link, has_officer_real FROM base + +UNION ALL SELECT corp_num, business_state, corp_type_cd, group_name, recognition_dts, 'party', 'LIQ','Null', liq_null, has_liq_link, has_liq_real FROM base +UNION ALL SELECT corp_num, business_state, corp_type_cd, group_name, recognition_dts, 'party', 'LIQ','Stub', liq_stub, has_liq_link, has_liq_real FROM base +UNION ALL SELECT corp_num, business_state, corp_type_cd, group_name, recognition_dts, 'party', 'LIQ','Missing city', liq_missing_city, has_liq_link, has_liq_real FROM base +UNION ALL SELECT corp_num, business_state, corp_type_cd, group_name, recognition_dts, 'party', 'LIQ','Missing province', liq_missing_province, has_liq_link, has_liq_real FROM base +UNION ALL SELECT corp_num, business_state, corp_type_cd, group_name, recognition_dts, 'party', 'LIQ','Missing country', liq_missing_country, has_liq_link, has_liq_real FROM base +UNION ALL SELECT corp_num, business_state, corp_type_cd, group_name, recognition_dts, 'party', 'LIQ','Missing postal', liq_missing_postal, has_liq_link, has_liq_real FROM base +UNION ALL SELECT corp_num, business_state, corp_type_cd, group_name, recognition_dts, 'party', 'LIQ','Missing addr1', liq_missing_addr1, has_liq_link, has_liq_real FROM base + +UNION ALL SELECT corp_num, business_state, corp_type_cd, group_name, recognition_dts, 'party', 'RCC','Null', rcc_null, has_rcc_link, has_rcc_real FROM base +UNION ALL SELECT corp_num, business_state, corp_type_cd, group_name, recognition_dts, 'party', 'RCC','Stub', rcc_stub, has_rcc_link, has_rcc_real FROM base +UNION ALL SELECT corp_num, business_state, corp_type_cd, group_name, recognition_dts, 'party', 'RCC','Missing city', rcc_missing_city, has_rcc_link, has_rcc_real FROM base +UNION ALL SELECT corp_num, business_state, corp_type_cd, group_name, recognition_dts, 'party', 'RCC','Missing province', rcc_missing_province, has_rcc_link, has_rcc_real FROM base +UNION ALL SELECT corp_num, business_state, corp_type_cd, group_name, recognition_dts, 'party', 'RCC','Missing country', rcc_missing_country, has_rcc_link, has_rcc_real FROM base +UNION ALL SELECT corp_num, business_state, corp_type_cd, group_name, recognition_dts, 'party', 'RCC','Missing postal', rcc_missing_postal, has_rcc_link, has_rcc_real FROM base +UNION ALL SELECT corp_num, business_state, corp_type_cd, group_name, recognition_dts, 'party', 'RCC','Missing addr1', rcc_missing_addr1, has_rcc_link, has_rcc_real FROM base + +UNION ALL SELECT corp_num, business_state, corp_type_cd, group_name, recognition_dts, 'party', 'RCM','Null', rcm_null, has_rcm_link, has_rcm_real FROM base +UNION ALL SELECT corp_num, business_state, corp_type_cd, group_name, recognition_dts, 'party', 'RCM','Stub', rcm_stub, has_rcm_link, has_rcm_real FROM base +UNION ALL SELECT corp_num, business_state, corp_type_cd, group_name, recognition_dts, 'party', 'RCM','Missing city', rcm_missing_city, has_rcm_link, has_rcm_real FROM base +UNION ALL SELECT corp_num, business_state, corp_type_cd, group_name, recognition_dts, 'party', 'RCM','Missing province', rcm_missing_province, has_rcm_link, has_rcm_real FROM base +UNION ALL SELECT corp_num, business_state, corp_type_cd, group_name, recognition_dts, 'party', 'RCM','Missing country', rcm_missing_country, has_rcm_link, has_rcm_real FROM base +UNION ALL SELECT corp_num, business_state, corp_type_cd, group_name, recognition_dts, 'party', 'RCM','Missing postal', rcm_missing_postal, has_rcm_link, has_rcm_real FROM base +UNION ALL SELECT corp_num, business_state, corp_type_cd, group_name, recognition_dts, 'party', 'RCM','Missing addr1', rcm_missing_addr1, has_rcm_link, has_rcm_real FROM base + +/* ----- OFFICE per entity_type ----- */ +UNION ALL SELECT corp_num, business_state, corp_type_cd, group_name, recognition_dts, 'office','RG','Null', office_rg_null, has_office_rg_link, has_office_rg_real FROM base +UNION ALL SELECT corp_num, business_state, corp_type_cd, group_name, recognition_dts, 'office','RG','Stub', office_rg_stub, has_office_rg_link, has_office_rg_real FROM base +UNION ALL SELECT corp_num, business_state, corp_type_cd, group_name, recognition_dts, 'office','RG','Missing city', office_rg_missing_city, has_office_rg_link, has_office_rg_real FROM base +UNION ALL SELECT corp_num, business_state, corp_type_cd, group_name, recognition_dts, 'office','RG','Missing province', office_rg_missing_province, has_office_rg_link, has_office_rg_real FROM base +UNION ALL SELECT corp_num, business_state, corp_type_cd, group_name, recognition_dts, 'office','RG','Missing country', office_rg_missing_country, has_office_rg_link, has_office_rg_real FROM base +UNION ALL SELECT corp_num, business_state, corp_type_cd, group_name, recognition_dts, 'office','RG','Missing postal', office_rg_missing_postal, has_office_rg_link, has_office_rg_real FROM base +UNION ALL SELECT corp_num, business_state, corp_type_cd, group_name, recognition_dts, 'office','RG','Missing addr1', office_rg_missing_addr1, has_office_rg_link, has_office_rg_real FROM base + +UNION ALL SELECT corp_num, business_state, corp_type_cd, group_name, recognition_dts, 'office','RC','Null', office_rc_null, has_office_rc_link, has_office_rc_real FROM base +UNION ALL SELECT corp_num, business_state, corp_type_cd, group_name, recognition_dts, 'office','RC','Stub', office_rc_stub, has_office_rc_link, has_office_rc_real FROM base +UNION ALL SELECT corp_num, business_state, corp_type_cd, group_name, recognition_dts, 'office','RC','Missing city', office_rc_missing_city, has_office_rc_link, has_office_rc_real FROM base +UNION ALL SELECT corp_num, business_state, corp_type_cd, group_name, recognition_dts, 'office','RC','Missing province', office_rc_missing_province, has_office_rc_link, has_office_rc_real FROM base +UNION ALL SELECT corp_num, business_state, corp_type_cd, group_name, recognition_dts, 'office','RC','Missing country', office_rc_missing_country, has_office_rc_link, has_office_rc_real FROM base +UNION ALL SELECT corp_num, business_state, corp_type_cd, group_name, recognition_dts, 'office','RC','Missing postal', office_rc_missing_postal, has_office_rc_link, has_office_rc_real FROM base +UNION ALL SELECT corp_num, business_state, corp_type_cd, group_name, recognition_dts, 'office','RC','Missing addr1', office_rc_missing_addr1, has_office_rc_link, has_office_rc_real FROM base + +UNION ALL SELECT corp_num, business_state, corp_type_cd, group_name, recognition_dts, 'office','LQ','Null', office_lq_null, has_office_lq_link, has_office_lq_real FROM base +UNION ALL SELECT corp_num, business_state, corp_type_cd, group_name, recognition_dts, 'office','LQ','Stub', office_lq_stub, has_office_lq_link, has_office_lq_real FROM base +UNION ALL SELECT corp_num, business_state, corp_type_cd, group_name, recognition_dts, 'office','LQ','Missing city', office_lq_missing_city, has_office_lq_link, has_office_lq_real FROM base +UNION ALL SELECT corp_num, business_state, corp_type_cd, group_name, recognition_dts, 'office','LQ','Missing province', office_lq_missing_province, has_office_lq_link, has_office_lq_real FROM base +UNION ALL SELECT corp_num, business_state, corp_type_cd, group_name, recognition_dts, 'office','LQ','Missing country', office_lq_missing_country, has_office_lq_link, has_office_lq_real FROM base +UNION ALL SELECT corp_num, business_state, corp_type_cd, group_name, recognition_dts, 'office','LQ','Missing postal', office_lq_missing_postal, has_office_lq_link, has_office_lq_real FROM base +UNION ALL SELECT corp_num, business_state, corp_type_cd, group_name, recognition_dts, 'office','LQ','Missing addr1', office_lq_missing_addr1, has_office_lq_link, has_office_lq_real FROM base + +UNION ALL SELECT corp_num, business_state, corp_type_cd, group_name, recognition_dts, 'office','DS','Null', office_ds_null, has_office_ds_link, has_office_ds_real FROM base +UNION ALL SELECT corp_num, business_state, corp_type_cd, group_name, recognition_dts, 'office','DS','Stub', office_ds_stub, has_office_ds_link, has_office_ds_real FROM base +UNION ALL SELECT corp_num, business_state, corp_type_cd, group_name, recognition_dts, 'office','DS','Missing city', office_ds_missing_city, has_office_ds_link, has_office_ds_real FROM base +UNION ALL SELECT corp_num, business_state, corp_type_cd, group_name, recognition_dts, 'office','DS','Missing province', office_ds_missing_province, has_office_ds_link, has_office_ds_real FROM base +UNION ALL SELECT corp_num, business_state, corp_type_cd, group_name, recognition_dts, 'office','DS','Missing country', office_ds_missing_country, has_office_ds_link, has_office_ds_real FROM base +UNION ALL SELECT corp_num, business_state, corp_type_cd, group_name, recognition_dts, 'office','DS','Missing postal', office_ds_missing_postal, has_office_ds_link, has_office_ds_real FROM base +UNION ALL SELECT corp_num, business_state, corp_type_cd, group_name, recognition_dts, 'office','DS','Missing addr1', office_ds_missing_addr1, has_office_ds_link, has_office_ds_real FROM base +; + +ALTER VIEW v_corp_issue_flags_long + owner to postgres; + +CREATE MATERIALIZED VIEW mv_issue_counts_by_corp_type AS +SELECT + business_state, + corp_type_cd, + COUNT(*) FILTER (WHERE has_any_link) AS den_link, + COUNT(*) FILTER (WHERE has_any_real) AS den_real, + COUNT(*) AS total_corps, + COUNT(*) FILTER (WHERE has_null) AS c_null, + COUNT(*) FILTER (WHERE has_stub) AS c_stub, + COUNT(*) FILTER (WHERE has_missing_city) AS c_mcity, + COUNT(*) FILTER (WHERE has_missing_province) AS c_mprov, + COUNT(*) FILTER (WHERE has_missing_country) AS c_mcountry, + COUNT(*) FILTER (WHERE has_missing_postal) AS c_mpostal, + COUNT(*) FILTER (WHERE has_missing_addr1) AS c_maddr1 +FROM mv_corp_issue_flags +GROUP BY business_state, corp_type_cd +WITH NO DATA +; + +CREATE INDEX IF NOT EXISTS ix_mv_issue_counts_by_corp_type + ON mv_issue_counts_by_corp_type (corp_type_cd, business_state); +; + +ALTER MATERIALIZED VIEW mv_issue_counts_by_corp_type + owner to postgres; + +CREATE UNIQUE INDEX IF NOT EXISTS ux_legacy_corps_data_corpnum ON mv_legacy_corps_data (corp_num); + +CREATE INDEX IF NOT EXISTS ix_legacy_group_name ON mv_legacy_corps_data (group_name); + +CREATE INDEX IF NOT EXISTS ix_legacy_recog_dts ON mv_legacy_corps_data (recognition_dts); + +-- Populated materialized views with data +REFRESH MATERIALIZED VIEW mv_corps_with_officers; +REFRESH MATERIALIZED VIEW mv_corps_party_role_count; +REFRESH MATERIALIZED VIEW mv_admin_email_count; +REFRESH MATERIALIZED VIEW mv_admin_email_domain_count; +REFRESH MATERIALIZED VIEW mv_addr_issue_counts_by_entity; +REFRESH MATERIALIZED VIEW mv_addr_quality_by_corp; +REFRESH MATERIALIZED VIEW mv_addr_quality_screening_by_corp; +REFRESH MATERIALIZED VIEW mv_share_class_issue_flags; +REFRESH MATERIALIZED VIEW mv_corp_event_filing_rollup; +REFRESH MATERIALIZED VIEW mv_legacy_corps_data; +REFRESH MATERIALIZED VIEW mv_corp_issue_flags; +REFRESH MATERIALIZED VIEW mv_issue_counts_by_corp_type; + +ANALYZE mv_corps_with_officers; +ANALYZE mv_corps_party_role_count; +ANALYZE mv_admin_email_count; +ANALYZE mv_admin_email_domain_count; +ANALYZE mv_addr_issue_counts_by_entity; +ANALYZE mv_addr_quality_by_corp; +ANALYZE mv_addr_quality_screening_by_corp; +ANALYZE mv_share_class_issue_flags; +ANALYZE mv_corp_event_filing_rollup; +ANALYZE mv_legacy_corps_data; +ANALYZE mv_corp_issue_flags; +ANALYZE mv_issue_counts_by_corp_type; + From cf7e9455e4ff1023496152472b780483bc5fba86 Mon Sep 17 00:00:00 2001 From: ketaki-deodhar Date: Thu, 2 Jul 2026 11:47:15 -0700 Subject: [PATCH 08/10] remove file --- ...olin_corps_extract_postgres_views_ddl_copy | 1490 ----------------- 1 file changed, 1490 deletions(-) delete mode 100644 data-tool/scripts/colin_corps_extract_postgres_views_ddl_copy diff --git a/data-tool/scripts/colin_corps_extract_postgres_views_ddl_copy b/data-tool/scripts/colin_corps_extract_postgres_views_ddl_copy deleted file mode 100644 index 645956c641..0000000000 --- a/data-tool/scripts/colin_corps_extract_postgres_views_ddl_copy +++ /dev/null @@ -1,1490 +0,0 @@ --- The following views are used mainly to help with bad data analysis and to help with determining eligibility of --- groups & batches of businesses we can migrate at specific points in time. - -\if :{?schema_name} -\else -\set schema_name public -\endif - -CREATE SCHEMA IF NOT EXISTS :"schema_name"; -SET search_path TO :"schema_name", public; - -CREATE MATERIALIZED VIEW mv_corps_with_officers AS -select distinct cp.corp_num -from corporation c - join event e on c.corp_num = e.corp_num - join corp_party cp on cp.start_event_id = e.event_id -where 1 = 1 - and cp.party_typ_cd = 'OFF' -WITH NO DATA -; - -ALTER MATERIALIZED VIEW mv_corps_with_officers - owner to postgres; - - -CREATE MATERIALIZED VIEW mv_corps_party_role_count AS -select c.corp_num, cp.party_typ_cd, count(cp.party_typ_cd) as party_typ_count -from corporation c - join corp_party cp on c.corp_num = cp.corp_num -where 1 = 1 - and cp.end_event_id is null -group by c.corp_num, cp.party_typ_cd -WITH NO DATA -; - -ALTER MATERIALIZED VIEW mv_corps_party_role_count - owner to postgres; - - -CREATE MATERIALIZED VIEW mv_admin_email_count AS -select admin_email, count(*) as email_count -from corporation -group by admin_email -WITH NO DATA -; - -ALTER MATERIALIZED VIEW mv_admin_email_count - owner to postgres; - - -CREATE MATERIALIZED VIEW mv_admin_email_domain_count AS -select LOWER(SPLIT_PART(c.admin_email, '@', 2)) as email_domain, - count(*) as domain_count -from corporation c -group by LOWER(SPLIT_PART(c.admin_email, '@', 2)) -order by domain_count desc -WITH NO DATA -; - -ALTER MATERIALIZED VIEW mv_admin_email_domain_count - owner to postgres; - - -create view v_addr_links as -with current_party as ( - select - cp.corp_num, - 'party'::text as entity_kind, - cp.party_typ_cd as entity_type, - addr.addr_role, - addr.addr_id - from corp_party cp - cross join lateral ( - values - ('mailing'::text, cp.mailing_addr_id), - ('delivery'::text, cp.delivery_addr_id) - ) as addr(addr_role, addr_id) - where cp.end_event_id is null -), -current_office as ( - select - o.corp_num, - 'office'::text as entity_kind, - o.office_typ_cd as entity_type, - addr.addr_role, - addr.addr_id - from office o - cross join lateral ( - values - ('mailing'::text, o.mailing_addr_id), - ('delivery'::text, o.delivery_addr_id) - ) as addr(addr_role, addr_id) - where o.end_event_id is null -) -select * -from current_party -union all -select * -from current_office; - -ALTER VIEW v_addr_links - owner to postgres; - -create or replace view v_addr_issues as -with link_rows as ( - select - l.corp_num, - l.entity_kind, - l.entity_type, - l.addr_role, - l.addr_id, - (l.addr_id is null) as is_null, - (l.addr_id = 1) as is_stub, - (l.addr_id is not null and l.addr_id <> 1) as is_real_addr, - a.city, - a.province, - a.country_typ_cd, - a.postal_cd, - a.addr_line_1, - a.addr_line_2, - a.addr_line_3 - from v_addr_links l - left join address a - on a.addr_id = l.addr_id - and l.addr_id is not null - and l.addr_id <> 1 -), -field_flags as ( - select - lr.corp_num, - lr.entity_kind, - lr.entity_type, - lr.addr_role, - lr.addr_id, - lr.is_null, - lr.is_stub, - (lr.is_real_addr and (lr.city is null or is_blank(lr.city))) as missing_city, - (lr.is_real_addr and (lr.province is null or is_blank(lr.province))) as missing_province, - (lr.is_real_addr and (lr.country_typ_cd is null or is_blank(lr.country_typ_cd))) as missing_country, - (lr.is_real_addr and (lr.postal_cd is null or is_blank(lr.postal_cd))) as missing_postal_code, - (lr.is_real_addr and (lr.addr_line_1 is null or is_blank(lr.addr_line_1))) as missing_addr_line_1, - (lr.is_real_addr and (lr.addr_line_2 is null or is_blank(lr.addr_line_2))) as missing_addr_line_2, - (lr.is_real_addr and (lr.addr_line_3 is null or is_blank(lr.addr_line_3))) as missing_addr_line_3 - from link_rows lr -), -row_flags as ( - select - ff.corp_num, - ff.entity_kind, - ff.entity_type, - ff.addr_role, - ff.addr_id, - ff.is_null, - ff.is_stub, - ff.missing_city, - ff.missing_province, - ff.missing_country, - ff.missing_postal_code, - ff.missing_addr_line_1, - ff.missing_addr_line_2, - ff.missing_addr_line_3, - ( - ff.missing_city - or ff.missing_province - or ff.missing_country - or ff.missing_postal_code - or ff.missing_addr_line_1 - ) as has_any_key_missing, - ( - ff.is_null - or ff.is_stub - or ff.missing_city - or ff.missing_province - or ff.missing_country - or ff.missing_postal_code - or ff.missing_addr_line_1 - ) as any_bad - from field_flags ff -) -select - rf.corp_num, - rf.entity_kind, - rf.entity_type, - rf.addr_role, - rf.addr_id, - rf.is_null, - rf.is_stub, - rf.missing_city, - rf.missing_province, - rf.missing_country, - rf.missing_postal_code, - rf.missing_addr_line_1, - rf.missing_addr_line_2, - rf.missing_addr_line_3, - rf.has_any_key_missing, - rf.any_bad, - not rf.any_bad as is_healthy -from row_flags rf; - -ALTER VIEW v_addr_issues - owner to postgres; - - -/* Shared per-corp/entity-type address issue counts reused by both the - slim screening MV and the wide/full address-quality MV. */ -CREATE MATERIALIZED VIEW mv_addr_issue_counts_by_entity AS -SELECT - i.corp_num, - i.entity_kind, - i.entity_type, - COUNT(*) AS address_count, - COUNT(*) FILTER (WHERE i.is_null) AS null_count, - COUNT(*) FILTER (WHERE i.is_stub) AS stub_count, - COUNT(*) FILTER (WHERE i.has_any_key_missing) AS any_bad_key_fields_count, - COUNT(*) FILTER (WHERE i.any_bad) AS any_bad_count, - COUNT(*) FILTER (WHERE NOT i.any_bad) AS healthy_count, - COUNT(*) FILTER (WHERE i.missing_city) AS missing_city_count, - COUNT(*) FILTER (WHERE i.missing_province) AS missing_province_count, - COUNT(*) FILTER (WHERE i.missing_country) AS missing_country_count, - COUNT(*) FILTER (WHERE i.missing_postal_code) AS missing_postal_code_count, - COUNT(*) FILTER (WHERE i.missing_addr_line_1) AS missing_addr_line_1_count, - COUNT(*) FILTER (WHERE i.missing_addr_line_2) AS missing_addr_line_2_count, - COUNT(*) FILTER (WHERE i.missing_addr_line_3) AS missing_addr_line_3_count -FROM v_addr_issues i -GROUP BY i.corp_num, i.entity_kind, i.entity_type -WITH NO DATA -; - -CREATE UNIQUE INDEX IF NOT EXISTS ux_mv_addr_issue_counts_by_entity - ON mv_addr_issue_counts_by_entity (corp_num, entity_kind, entity_type); - -ALTER MATERIALIZED VIEW mv_addr_issue_counts_by_entity - owner to postgres; - - -create materialized view mv_addr_quality_by_corp as -with -params as ( - select - array['DIR','OFF','LIQ','RCC','RCM']::text[] as party_types, - array['RG','RC','LQ','DS']::text[] as office_types -), -entity_counts as MATERIALIZED ( - select - a.corp_num, - a.entity_kind, - a.entity_type, - a.address_count, - a.null_count, - a.stub_count, - a.any_bad_key_fields_count, - a.any_bad_count, - a.healthy_count, - a.missing_city_count, - a.missing_province_count, - a.missing_country_count, - a.missing_postal_code_count, - a.missing_addr_line_1_count, - a.missing_addr_line_2_count, - a.missing_addr_line_3_count, - COALESCE((a.entity_kind = 'party' AND a.entity_type = ANY(p.party_types)), false) AS is_supported_party, - COALESCE((a.entity_kind = 'office' AND a.entity_type = ANY(p.office_types)), false) AS is_supported_office - from mv_addr_issue_counts_by_entity a - cross join params p -), - -/* --------------------------------------- - Wide aggregates (counts per corp_num) - --------------------------------------- */ -agg as ( - select - ec.corp_num, - CAST(COALESCE(SUM(ec.null_count) FILTER (WHERE ec.is_supported_party OR ec.is_supported_office), 0) AS bigint) AS address_all_null_count, - CAST(COALESCE(SUM(ec.stub_count) FILTER (WHERE ec.is_supported_party OR ec.is_supported_office), 0) AS bigint) AS address_all_stub_count, - CAST(COALESCE(SUM(ec.any_bad_key_fields_count) FILTER (WHERE ec.is_supported_party OR ec.is_supported_office), 0) AS bigint) AS address_all_any_bad_key_fields_count, - CAST(COALESCE(SUM(ec.any_bad_count) FILTER (WHERE ec.is_supported_party OR ec.is_supported_office), 0) AS bigint) AS address_all_any_bad_count, - CAST(COALESCE(SUM(ec.healthy_count) FILTER (WHERE ec.is_supported_party OR ec.is_supported_office), 0) AS bigint) AS address_all_healthy_count, - CAST(COALESCE(SUM(ec.missing_city_count) FILTER (WHERE ec.is_supported_party OR ec.is_supported_office), 0) AS bigint) AS address_all_missing_city_count, - CAST(COALESCE(SUM(ec.missing_province_count) FILTER (WHERE ec.is_supported_party OR ec.is_supported_office), 0) AS bigint) AS address_all_missing_province_count, - CAST(COALESCE(SUM(ec.missing_country_count) FILTER (WHERE ec.is_supported_party OR ec.is_supported_office), 0) AS bigint) AS address_all_missing_country_count, - CAST(COALESCE(SUM(ec.missing_postal_code_count) FILTER (WHERE ec.is_supported_party OR ec.is_supported_office), 0) AS bigint) AS address_all_missing_postal_code_count, - CAST(COALESCE(SUM(ec.missing_addr_line_1_count) FILTER (WHERE ec.is_supported_party OR ec.is_supported_office), 0) AS bigint) AS address_all_missing_addr1_count, - - CAST(COALESCE(SUM(ec.null_count) FILTER (WHERE ec.entity_kind='party' AND ec.is_supported_party), 0) AS bigint) AS party_all_null_count, - CAST(COALESCE(SUM(ec.stub_count) FILTER (WHERE ec.entity_kind='party' AND ec.is_supported_party), 0) AS bigint) AS party_all_stub_count, - CAST(COALESCE(SUM(ec.any_bad_key_fields_count) FILTER (WHERE ec.entity_kind='party' AND ec.is_supported_party), 0) AS bigint) AS party_all_any_bad_key_fields_count, - CAST(COALESCE(SUM(ec.any_bad_count) FILTER (WHERE ec.entity_kind='party' AND ec.is_supported_party), 0) AS bigint) AS party_any_bad_count, - CAST(COALESCE(SUM(ec.any_bad_count) FILTER (WHERE ec.entity_kind='party' AND ec.is_supported_party), 0) AS bigint) AS party_all_any_bad_count, - CAST(COALESCE(SUM(ec.healthy_count) FILTER (WHERE ec.entity_kind='party' AND ec.is_supported_party), 0) AS bigint) AS party_all_healthy_count, - CAST(COALESCE(SUM(ec.missing_city_count) FILTER (WHERE ec.entity_kind='party' AND ec.is_supported_party), 0) AS bigint) AS party_all_missing_city_count, - CAST(COALESCE(SUM(ec.missing_province_count) FILTER (WHERE ec.entity_kind='party' AND ec.is_supported_party), 0) AS bigint) AS party_all_missing_province_count, - CAST(COALESCE(SUM(ec.missing_country_count) FILTER (WHERE ec.entity_kind='party' AND ec.is_supported_party), 0) AS bigint) AS party_all_missing_country_count, - CAST(COALESCE(SUM(ec.missing_postal_code_count) FILTER (WHERE ec.entity_kind='party' AND ec.is_supported_party), 0) AS bigint) AS party_all_missing_postal_code_count, - CAST(COALESCE(SUM(ec.missing_addr_line_1_count) FILTER (WHERE ec.entity_kind='party' AND ec.is_supported_party), 0) AS bigint) AS party_all_missing_addr1_count, - - CAST(COALESCE(SUM(ec.null_count) FILTER (WHERE ec.entity_kind='office' AND ec.is_supported_office), 0) AS bigint) AS office_all_null_count, - CAST(COALESCE(SUM(ec.stub_count) FILTER (WHERE ec.entity_kind='office' AND ec.is_supported_office), 0) AS bigint) AS office_all_stub_count, - CAST(COALESCE(SUM(ec.any_bad_key_fields_count) FILTER (WHERE ec.entity_kind='office' AND ec.is_supported_office), 0) AS bigint) AS office_all_any_bad_key_fields_count, - CAST(COALESCE(SUM(ec.any_bad_count) FILTER (WHERE ec.entity_kind='office' AND ec.is_supported_office), 0) AS bigint) AS office_any_bad_count, - CAST(COALESCE(SUM(ec.any_bad_count) FILTER (WHERE ec.entity_kind='office' AND ec.is_supported_office), 0) AS bigint) AS office_all_any_bad_count, - CAST(COALESCE(SUM(ec.healthy_count) FILTER (WHERE ec.entity_kind='office' AND ec.is_supported_office), 0) AS bigint) AS office_all_healthy_count, - CAST(COALESCE(SUM(ec.missing_city_count) FILTER (WHERE ec.entity_kind='office' AND ec.is_supported_office), 0) AS bigint) AS office_all_missing_city_count, - CAST(COALESCE(SUM(ec.missing_province_count) FILTER (WHERE ec.entity_kind='office' AND ec.is_supported_office), 0) AS bigint) AS office_all_missing_province_count, - CAST(COALESCE(SUM(ec.missing_country_count) FILTER (WHERE ec.entity_kind='office' AND ec.is_supported_office), 0) AS bigint) AS office_all_missing_country_count, - CAST(COALESCE(SUM(ec.missing_postal_code_count) FILTER (WHERE ec.entity_kind='office' AND ec.is_supported_office), 0) AS bigint) AS office_all_missing_postal_code_count, - CAST(COALESCE(SUM(ec.missing_addr_line_1_count) FILTER (WHERE ec.entity_kind='office' AND ec.is_supported_office), 0) AS bigint) AS office_all_missing_addr1_count, - - CAST(COALESCE(SUM(ec.null_count) FILTER (WHERE ec.entity_kind='office' AND ec.is_supported_office AND ec.entity_type NOT IN ('RG','RC')), 0) AS bigint) AS non_base_offices_null_count, - CAST(COALESCE(SUM(ec.stub_count) FILTER (WHERE ec.entity_kind='office' AND ec.is_supported_office AND ec.entity_type NOT IN ('RG','RC')), 0) AS bigint) AS non_base_offices_stub_count, - CAST(COALESCE(SUM(ec.any_bad_key_fields_count) FILTER (WHERE ec.entity_kind='office' AND ec.is_supported_office AND ec.entity_type NOT IN ('RG','RC')), 0) AS bigint) AS non_base_offices_any_bad_key_fields_count, - CAST(COALESCE(SUM(ec.any_bad_count) FILTER (WHERE ec.entity_kind='office' AND ec.is_supported_office AND ec.entity_type NOT IN ('RG','RC')), 0) AS bigint) AS non_base_offices_any_bad_count, - CAST(COALESCE(SUM(ec.healthy_count) FILTER (WHERE ec.entity_kind='office' AND ec.is_supported_office AND ec.entity_type NOT IN ('RG','RC')), 0) AS bigint) AS non_base_offices_healthy_count, - CAST(COALESCE(SUM(ec.missing_city_count) FILTER (WHERE ec.entity_kind='office' AND ec.is_supported_office AND ec.entity_type NOT IN ('RG','RC')), 0) AS bigint) AS non_base_offices_missing_city_count, - CAST(COALESCE(SUM(ec.missing_province_count) FILTER (WHERE ec.entity_kind='office' AND ec.is_supported_office AND ec.entity_type NOT IN ('RG','RC')), 0) AS bigint) AS non_base_offices_missing_province_count, - CAST(COALESCE(SUM(ec.missing_country_count) FILTER (WHERE ec.entity_kind='office' AND ec.is_supported_office AND ec.entity_type NOT IN ('RG','RC')), 0) AS bigint) AS non_base_offices_missing_country_count, - CAST(COALESCE(SUM(ec.missing_postal_code_count) FILTER (WHERE ec.entity_kind='office' AND ec.is_supported_office AND ec.entity_type NOT IN ('RG','RC')), 0) AS bigint) AS non_base_offices_missing_postal_code_count, - CAST(COALESCE(SUM(ec.missing_addr_line_1_count) FILTER (WHERE ec.entity_kind='office' AND ec.is_supported_office AND ec.entity_type NOT IN ('RG','RC')), 0) AS bigint) AS non_base_offices_missing_addr1_count, - - CAST(COALESCE(SUM(ec.null_count) FILTER (WHERE ec.entity_kind='party' AND ec.entity_type NOT IN ('DIR','OFF')), 0) AS bigint) AS non_base_parties_null_count, - CAST(COALESCE(SUM(ec.stub_count) FILTER (WHERE ec.entity_kind='party' AND ec.entity_type NOT IN ('DIR','OFF')), 0) AS bigint) AS non_base_parties_stub_count, - CAST(COALESCE(SUM(ec.any_bad_key_fields_count) FILTER (WHERE ec.entity_kind='party' AND ec.entity_type NOT IN ('DIR','OFF')), 0) AS bigint) AS non_base_parties_any_bad_key_fields_count, - CAST(COALESCE(SUM(ec.any_bad_count) FILTER (WHERE ec.entity_kind='party' AND ec.entity_type NOT IN ('DIR','OFF')), 0) AS bigint) AS non_base_parties_any_bad_count, - CAST(COALESCE(SUM(ec.healthy_count) FILTER (WHERE ec.entity_kind='party' AND ec.entity_type NOT IN ('DIR','OFF')), 0) AS bigint) AS non_base_parties_healthy_count, - CAST(COALESCE(SUM(ec.missing_city_count) FILTER (WHERE ec.entity_kind='party' AND ec.entity_type NOT IN ('DIR','OFF')), 0) AS bigint) AS non_base_parties_missing_city_count, - CAST(COALESCE(SUM(ec.missing_province_count) FILTER (WHERE ec.entity_kind='party' AND ec.entity_type NOT IN ('DIR','OFF')), 0) AS bigint) AS non_base_parties_missing_province_count, - CAST(COALESCE(SUM(ec.missing_country_count) FILTER (WHERE ec.entity_kind='party' AND ec.entity_type NOT IN ('DIR','OFF')), 0) AS bigint) AS non_base_parties_missing_country_count, - CAST(COALESCE(SUM(ec.missing_postal_code_count) FILTER (WHERE ec.entity_kind='party' AND ec.entity_type NOT IN ('DIR','OFF')), 0) AS bigint) AS non_base_parties_missing_postal_code_count, - CAST(COALESCE(SUM(ec.missing_addr_line_1_count) FILTER (WHERE ec.entity_kind='party' AND ec.entity_type NOT IN ('DIR','OFF')), 0) AS bigint) AS non_base_parties_missing_addr1_count, - - CAST(COALESCE(SUM(ec.null_count) FILTER (WHERE ec.entity_kind='party' AND ec.entity_type='DIR'), 0) AS bigint) AS dir_null_count, - CAST(COALESCE(SUM(ec.stub_count) FILTER (WHERE ec.entity_kind='party' AND ec.entity_type='DIR'), 0) AS bigint) AS dir_stub_count, - CAST(COALESCE(SUM(ec.any_bad_key_fields_count) FILTER (WHERE ec.entity_kind='party' AND ec.entity_type='DIR'), 0) AS bigint) AS dir_any_bad_key_fields_count, - CAST(COALESCE(SUM(ec.any_bad_count) FILTER (WHERE ec.entity_kind='party' AND ec.entity_type='DIR'), 0) AS bigint) AS dir_any_bad_count, - CAST(COALESCE(SUM(ec.healthy_count) FILTER (WHERE ec.entity_kind='party' AND ec.entity_type='DIR'), 0) AS bigint) AS dir_healthy_count, - CAST(COALESCE(SUM(ec.missing_city_count) FILTER (WHERE ec.entity_kind='party' AND ec.entity_type='DIR'), 0) AS bigint) AS dir_missing_city_count, - CAST(COALESCE(SUM(ec.missing_province_count) FILTER (WHERE ec.entity_kind='party' AND ec.entity_type='DIR'), 0) AS bigint) AS dir_missing_province_count, - CAST(COALESCE(SUM(ec.missing_country_count) FILTER (WHERE ec.entity_kind='party' AND ec.entity_type='DIR'), 0) AS bigint) AS dir_missing_country_count, - CAST(COALESCE(SUM(ec.missing_postal_code_count) FILTER (WHERE ec.entity_kind='party' AND ec.entity_type='DIR'), 0) AS bigint) AS dir_missing_postal_code_count, - CAST(COALESCE(SUM(ec.missing_addr_line_1_count) FILTER (WHERE ec.entity_kind='party' AND ec.entity_type='DIR'), 0) AS bigint) AS dir_missing_addr1_count, - - CAST(COALESCE(SUM(ec.null_count) FILTER (WHERE ec.entity_kind='party' AND ec.entity_type='OFF'), 0) AS bigint) AS officer_null_count, - CAST(COALESCE(SUM(ec.stub_count) FILTER (WHERE ec.entity_kind='party' AND ec.entity_type='OFF'), 0) AS bigint) AS officer_stub_count, - CAST(COALESCE(SUM(ec.any_bad_key_fields_count) FILTER (WHERE ec.entity_kind='party' AND ec.entity_type='OFF'), 0) AS bigint) AS officer_any_bad_key_fields_count, - CAST(COALESCE(SUM(ec.any_bad_count) FILTER (WHERE ec.entity_kind='party' AND ec.entity_type='OFF'), 0) AS bigint) AS officer_any_bad_count, - CAST(COALESCE(SUM(ec.healthy_count) FILTER (WHERE ec.entity_kind='party' AND ec.entity_type='OFF'), 0) AS bigint) AS officer_healthy_count, - CAST(COALESCE(SUM(ec.missing_city_count) FILTER (WHERE ec.entity_kind='party' AND ec.entity_type='OFF'), 0) AS bigint) AS officer_missing_city_count, - CAST(COALESCE(SUM(ec.missing_province_count) FILTER (WHERE ec.entity_kind='party' AND ec.entity_type='OFF'), 0) AS bigint) AS officer_missing_province_count, - CAST(COALESCE(SUM(ec.missing_country_count) FILTER (WHERE ec.entity_kind='party' AND ec.entity_type='OFF'), 0) AS bigint) AS officer_missing_country_count, - CAST(COALESCE(SUM(ec.missing_postal_code_count) FILTER (WHERE ec.entity_kind='party' AND ec.entity_type='OFF'), 0) AS bigint) AS officer_missing_postal_code_count, - CAST(COALESCE(SUM(ec.missing_addr_line_1_count) FILTER (WHERE ec.entity_kind='party' AND ec.entity_type='OFF'), 0) AS bigint) AS officer_missing_addr1_count, - - CAST(COALESCE(SUM(ec.null_count) FILTER (WHERE ec.entity_kind='party' AND ec.entity_type='LIQ'), 0) AS bigint) AS liq_null_count, - CAST(COALESCE(SUM(ec.stub_count) FILTER (WHERE ec.entity_kind='party' AND ec.entity_type='LIQ'), 0) AS bigint) AS liq_stub_count, - CAST(COALESCE(SUM(ec.any_bad_key_fields_count) FILTER (WHERE ec.entity_kind='party' AND ec.entity_type='LIQ'), 0) AS bigint) AS liq_any_bad_key_fields_count, - CAST(COALESCE(SUM(ec.any_bad_count) FILTER (WHERE ec.entity_kind='party' AND ec.entity_type='LIQ'), 0) AS bigint) AS liq_any_bad_count, - CAST(COALESCE(SUM(ec.healthy_count) FILTER (WHERE ec.entity_kind='party' AND ec.entity_type='LIQ'), 0) AS bigint) AS liq_healthy_count, - CAST(COALESCE(SUM(ec.missing_city_count) FILTER (WHERE ec.entity_kind='party' AND ec.entity_type='LIQ'), 0) AS bigint) AS liq_missing_city_count, - CAST(COALESCE(SUM(ec.missing_province_count) FILTER (WHERE ec.entity_kind='party' AND ec.entity_type='LIQ'), 0) AS bigint) AS liq_missing_province_count, - CAST(COALESCE(SUM(ec.missing_country_count) FILTER (WHERE ec.entity_kind='party' AND ec.entity_type='LIQ'), 0) AS bigint) AS liq_missing_country_count, - CAST(COALESCE(SUM(ec.missing_postal_code_count) FILTER (WHERE ec.entity_kind='party' AND ec.entity_type='LIQ'), 0) AS bigint) AS liq_missing_postal_code_count, - CAST(COALESCE(SUM(ec.missing_addr_line_1_count) FILTER (WHERE ec.entity_kind='party' AND ec.entity_type='LIQ'), 0) AS bigint) AS liq_missing_addr1_count, - - CAST(COALESCE(SUM(ec.null_count) FILTER (WHERE ec.entity_kind='party' AND ec.entity_type='RCC'), 0) AS bigint) AS rcc_null_count, - CAST(COALESCE(SUM(ec.stub_count) FILTER (WHERE ec.entity_kind='party' AND ec.entity_type='RCC'), 0) AS bigint) AS rcc_stub_count, - CAST(COALESCE(SUM(ec.any_bad_key_fields_count) FILTER (WHERE ec.entity_kind='party' AND ec.entity_type='RCC'), 0) AS bigint) AS rcc_any_bad_key_fields_count, - CAST(COALESCE(SUM(ec.any_bad_count) FILTER (WHERE ec.entity_kind='party' AND ec.entity_type='RCC'), 0) AS bigint) AS rcc_any_bad_count, - CAST(COALESCE(SUM(ec.healthy_count) FILTER (WHERE ec.entity_kind='party' AND ec.entity_type='RCC'), 0) AS bigint) AS rcc_healthy_count, - CAST(COALESCE(SUM(ec.missing_city_count) FILTER (WHERE ec.entity_kind='party' AND ec.entity_type='RCC'), 0) AS bigint) AS rcc_missing_city_count, - CAST(COALESCE(SUM(ec.missing_province_count) FILTER (WHERE ec.entity_kind='party' AND ec.entity_type='RCC'), 0) AS bigint) AS rcc_missing_province_count, - CAST(COALESCE(SUM(ec.missing_country_count) FILTER (WHERE ec.entity_kind='party' AND ec.entity_type='RCC'), 0) AS bigint) AS rcc_missing_country_count, - CAST(COALESCE(SUM(ec.missing_postal_code_count) FILTER (WHERE ec.entity_kind='party' AND ec.entity_type='RCC'), 0) AS bigint) AS rcc_missing_postal_code_count, - CAST(COALESCE(SUM(ec.missing_addr_line_1_count) FILTER (WHERE ec.entity_kind='party' AND ec.entity_type='RCC'), 0) AS bigint) AS rcc_missing_addr1_count, - - CAST(COALESCE(SUM(ec.null_count) FILTER (WHERE ec.entity_kind='party' AND ec.entity_type='RCM'), 0) AS bigint) AS rcm_null_count, - CAST(COALESCE(SUM(ec.stub_count) FILTER (WHERE ec.entity_kind='party' AND ec.entity_type='RCM'), 0) AS bigint) AS rcm_stub_count, - CAST(COALESCE(SUM(ec.any_bad_key_fields_count) FILTER (WHERE ec.entity_kind='party' AND ec.entity_type='RCM'), 0) AS bigint) AS rcm_any_bad_key_fields_count, - CAST(COALESCE(SUM(ec.any_bad_count) FILTER (WHERE ec.entity_kind='party' AND ec.entity_type='RCM'), 0) AS bigint) AS rcm_any_bad_count, - CAST(COALESCE(SUM(ec.healthy_count) FILTER (WHERE ec.entity_kind='party' AND ec.entity_type='RCM'), 0) AS bigint) AS rcm_healthy_count, - CAST(COALESCE(SUM(ec.missing_city_count) FILTER (WHERE ec.entity_kind='party' AND ec.entity_type='RCM'), 0) AS bigint) AS rcm_missing_city_count, - CAST(COALESCE(SUM(ec.missing_province_count) FILTER (WHERE ec.entity_kind='party' AND ec.entity_type='RCM'), 0) AS bigint) AS rcm_missing_province_count, - CAST(COALESCE(SUM(ec.missing_country_count) FILTER (WHERE ec.entity_kind='party' AND ec.entity_type='RCM'), 0) AS bigint) AS rcm_missing_country_count, - CAST(COALESCE(SUM(ec.missing_postal_code_count) FILTER (WHERE ec.entity_kind='party' AND ec.entity_type='RCM'), 0) AS bigint) AS rcm_missing_postal_code_count, - CAST(COALESCE(SUM(ec.missing_addr_line_1_count) FILTER (WHERE ec.entity_kind='party' AND ec.entity_type='RCM'), 0) AS bigint) AS rcm_missing_addr1_count, - - CAST(COALESCE(SUM(ec.null_count) FILTER (WHERE ec.entity_kind='office' AND ec.entity_type='RG'), 0) AS bigint) AS office_rg_null_count, - CAST(COALESCE(SUM(ec.stub_count) FILTER (WHERE ec.entity_kind='office' AND ec.entity_type='RG'), 0) AS bigint) AS office_rg_stub_count, - CAST(COALESCE(SUM(ec.any_bad_key_fields_count) FILTER (WHERE ec.entity_kind='office' AND ec.entity_type='RG'), 0) AS bigint) AS office_rg_any_bad_key_fields_count, - CAST(COALESCE(SUM(ec.any_bad_count) FILTER (WHERE ec.entity_kind='office' AND ec.entity_type='RG'), 0) AS bigint) AS office_rg_any_bad_count, - CAST(COALESCE(SUM(ec.healthy_count) FILTER (WHERE ec.entity_kind='office' AND ec.entity_type='RG'), 0) AS bigint) AS office_rg_healthy_count, - CAST(COALESCE(SUM(ec.missing_city_count) FILTER (WHERE ec.entity_kind='office' AND ec.entity_type='RG'), 0) AS bigint) AS office_rg_missing_city_count, - CAST(COALESCE(SUM(ec.missing_province_count) FILTER (WHERE ec.entity_kind='office' AND ec.entity_type='RG'), 0) AS bigint) AS office_rg_missing_province_count, - CAST(COALESCE(SUM(ec.missing_country_count) FILTER (WHERE ec.entity_kind='office' AND ec.entity_type='RG'), 0) AS bigint) AS office_rg_missing_country_count, - CAST(COALESCE(SUM(ec.missing_postal_code_count) FILTER (WHERE ec.entity_kind='office' AND ec.entity_type='RG'), 0) AS bigint) AS office_rg_missing_postal_code_count, - CAST(COALESCE(SUM(ec.missing_addr_line_1_count) FILTER (WHERE ec.entity_kind='office' AND ec.entity_type='RG'), 0) AS bigint) AS office_rg_missing_addr1_count, - - CAST(COALESCE(SUM(ec.null_count) FILTER (WHERE ec.entity_kind='office' AND ec.entity_type='RC'), 0) AS bigint) AS office_rc_null_count, - CAST(COALESCE(SUM(ec.stub_count) FILTER (WHERE ec.entity_kind='office' AND ec.entity_type='RC'), 0) AS bigint) AS office_rc_stub_count, - CAST(COALESCE(SUM(ec.any_bad_key_fields_count) FILTER (WHERE ec.entity_kind='office' AND ec.entity_type='RC'), 0) AS bigint) AS office_rc_any_bad_key_fields_count, - CAST(COALESCE(SUM(ec.any_bad_count) FILTER (WHERE ec.entity_kind='office' AND ec.entity_type='RC'), 0) AS bigint) AS office_rc_any_bad_count, - CAST(COALESCE(SUM(ec.healthy_count) FILTER (WHERE ec.entity_kind='office' AND ec.entity_type='RC'), 0) AS bigint) AS office_rc_healthy_count, - CAST(COALESCE(SUM(ec.missing_city_count) FILTER (WHERE ec.entity_kind='office' AND ec.entity_type='RC'), 0) AS bigint) AS office_rc_missing_city_count, - CAST(COALESCE(SUM(ec.missing_province_count) FILTER (WHERE ec.entity_kind='office' AND ec.entity_type='RC'), 0) AS bigint) AS office_rc_missing_province_count, - CAST(COALESCE(SUM(ec.missing_country_count) FILTER (WHERE ec.entity_kind='office' AND ec.entity_type='RC'), 0) AS bigint) AS office_rc_missing_country_count, - CAST(COALESCE(SUM(ec.missing_postal_code_count) FILTER (WHERE ec.entity_kind='office' AND ec.entity_type='RC'), 0) AS bigint) AS office_rc_missing_postal_code_count, - CAST(COALESCE(SUM(ec.missing_addr_line_1_count) FILTER (WHERE ec.entity_kind='office' AND ec.entity_type='RC'), 0) AS bigint) AS office_rc_missing_addr1_count, - - CAST(COALESCE(SUM(ec.null_count) FILTER (WHERE ec.entity_kind='office' AND ec.entity_type='LQ'), 0) AS bigint) AS office_lq_null_count, - CAST(COALESCE(SUM(ec.stub_count) FILTER (WHERE ec.entity_kind='office' AND ec.entity_type='LQ'), 0) AS bigint) AS office_lq_stub_count, - CAST(COALESCE(SUM(ec.any_bad_key_fields_count) FILTER (WHERE ec.entity_kind='office' AND ec.entity_type='LQ'), 0) AS bigint) AS office_lq_any_bad_key_fields_count, - CAST(COALESCE(SUM(ec.any_bad_count) FILTER (WHERE ec.entity_kind='office' AND ec.entity_type='LQ'), 0) AS bigint) AS office_lq_any_bad_count, - CAST(COALESCE(SUM(ec.healthy_count) FILTER (WHERE ec.entity_kind='office' AND ec.entity_type='LQ'), 0) AS bigint) AS office_lq_healthy_count, - CAST(COALESCE(SUM(ec.missing_city_count) FILTER (WHERE ec.entity_kind='office' AND ec.entity_type='LQ'), 0) AS bigint) AS office_lq_missing_city_count, - CAST(COALESCE(SUM(ec.missing_province_count) FILTER (WHERE ec.entity_kind='office' AND ec.entity_type='LQ'), 0) AS bigint) AS office_lq_missing_province_count, - CAST(COALESCE(SUM(ec.missing_country_count) FILTER (WHERE ec.entity_kind='office' AND ec.entity_type='LQ'), 0) AS bigint) AS office_lq_missing_country_count, - CAST(COALESCE(SUM(ec.missing_postal_code_count) FILTER (WHERE ec.entity_kind='office' AND ec.entity_type='LQ'), 0) AS bigint) AS office_lq_missing_postal_code_count, - CAST(COALESCE(SUM(ec.missing_addr_line_1_count) FILTER (WHERE ec.entity_kind='office' AND ec.entity_type='LQ'), 0) AS bigint) AS office_lq_missing_addr1_count, - - CAST(COALESCE(SUM(ec.null_count) FILTER (WHERE ec.entity_kind='office' AND ec.entity_type='DS'), 0) AS bigint) AS office_ds_null_count, - CAST(COALESCE(SUM(ec.stub_count) FILTER (WHERE ec.entity_kind='office' AND ec.entity_type='DS'), 0) AS bigint) AS office_ds_stub_count, - CAST(COALESCE(SUM(ec.any_bad_key_fields_count) FILTER (WHERE ec.entity_kind='office' AND ec.entity_type='DS'), 0) AS bigint) AS office_ds_any_bad_key_fields_count, - CAST(COALESCE(SUM(ec.any_bad_count) FILTER (WHERE ec.entity_kind='office' AND ec.entity_type='DS'), 0) AS bigint) AS office_ds_any_bad_count, - CAST(COALESCE(SUM(ec.healthy_count) FILTER (WHERE ec.entity_kind='office' AND ec.entity_type='DS'), 0) AS bigint) AS office_ds_healthy_count, - CAST(COALESCE(SUM(ec.missing_city_count) FILTER (WHERE ec.entity_kind='office' AND ec.entity_type='DS'), 0) AS bigint) AS office_ds_missing_city_count, - CAST(COALESCE(SUM(ec.missing_province_count) FILTER (WHERE ec.entity_kind='office' AND ec.entity_type='DS'), 0) AS bigint) AS office_ds_missing_province_count, - CAST(COALESCE(SUM(ec.missing_country_count) FILTER (WHERE ec.entity_kind='office' AND ec.entity_type='DS'), 0) AS bigint) AS office_ds_missing_country_count, - CAST(COALESCE(SUM(ec.missing_postal_code_count) FILTER (WHERE ec.entity_kind='office' AND ec.entity_type='DS'), 0) AS bigint) AS office_ds_missing_postal_code_count, - CAST(COALESCE(SUM(ec.missing_addr_line_1_count) FILTER (WHERE ec.entity_kind='office' AND ec.entity_type='DS'), 0) AS bigint) AS office_ds_missing_addr1_count - - from entity_counts ec - group by ec.corp_num -), - -/* --------------------------------------- - JSON breakdowns (supported types only) - --------------------------------------- */ -party_by_type as ( - select - ec.corp_num, - jsonb_object_agg( - ec.entity_type, - jsonb_build_object( - 'addresses', ec.address_count, - 'null_count', ec.null_count, - 'stub_count', ec.stub_count, - 'any_bad_key_fields_count', ec.any_bad_key_fields_count, - 'any_bad_count', ec.any_bad_count, - 'healthy_count', ec.healthy_count, - 'missing_city', ec.missing_city_count, - 'missing_province', ec.missing_province_count, - 'missing_country', ec.missing_country_count, - 'missing_postal_code', ec.missing_postal_code_count, - 'missing_addr_line_1', ec.missing_addr_line_1_count, - 'missing_addr_line_2', ec.missing_addr_line_2_count, - 'missing_addr_line_3', ec.missing_addr_line_3_count - ) - ORDER BY ec.entity_type - ) as party_issue_breakdown - from entity_counts ec - where ec.entity_kind='party' - and ec.is_supported_party - group by ec.corp_num -), - -office_by_type as ( - select - ec.corp_num, - jsonb_object_agg( - ec.entity_type, - jsonb_build_object( - 'addresses', ec.address_count, - 'null_count', ec.null_count, - 'stub_count', ec.stub_count, - 'any_bad_key_fields_count', ec.any_bad_key_fields_count, - 'any_bad_count', ec.any_bad_count, - 'healthy_count', ec.healthy_count, - 'missing_city', ec.missing_city_count, - 'missing_province', ec.missing_province_count, - 'missing_country', ec.missing_country_count, - 'missing_postal_code', ec.missing_postal_code_count, - 'missing_addr_line_1', ec.missing_addr_line_1_count, - 'missing_addr_line_2', ec.missing_addr_line_2_count, - 'missing_addr_line_3', ec.missing_addr_line_3_count - ) - ORDER BY ec.entity_type - ) as office_issue_breakdown - from entity_counts ec - where ec.entity_kind='office' - and ec.is_supported_office - group by ec.corp_num -) - --- ========== FINAL SELECT ========== -select - a.corp_num, - - -- JSON first (combined, then each kind) - jsonb_build_object( - 'party', coalesce(p.party_issue_breakdown, '{}'::jsonb), - 'office', coalesce(o.office_issue_breakdown, '{}'::jsonb) - ) as address_issue_breakdown, - p.party_issue_breakdown, - o.office_issue_breakdown, - - -- Address ALL (party + office; supported) - a.address_all_null_count, - a.address_all_stub_count, - a.address_all_any_bad_key_fields_count, - a.address_all_any_bad_count, - a.address_all_healthy_count, - a.address_all_missing_city_count, - a.address_all_missing_province_count, - a.address_all_missing_country_count, - a.address_all_missing_postal_code_count, - a.address_all_missing_addr1_count, - - -- Party ALL (supported) - a.party_all_null_count, - a.party_all_stub_count, - a.party_all_any_bad_key_fields_count, - a.party_any_bad_count, - a.party_all_any_bad_count, - a.party_all_healthy_count, - a.party_all_missing_city_count, - a.party_all_missing_province_count, - a.party_all_missing_country_count, - a.party_all_missing_postal_code_count, - a.party_all_missing_addr1_count, - - -- Office ALL (supported) - a.office_all_null_count, - a.office_all_stub_count, - a.office_all_any_bad_key_fields_count, - a.office_any_bad_count, - a.office_all_any_bad_count, - a.office_all_healthy_count, - a.office_all_missing_city_count, - a.office_all_missing_province_count, - a.office_all_missing_country_count, - a.office_all_missing_postal_code_count, - a.office_all_missing_addr1_count, - - -- Non‑base parties (NOT in DIR/OFF; all party codes) - a.non_base_parties_null_count, - a.non_base_parties_stub_count, - a.non_base_parties_any_bad_key_fields_count, - a.non_base_parties_any_bad_count, - a.non_base_parties_healthy_count, - a.non_base_parties_missing_city_count, - a.non_base_parties_missing_province_count, - a.non_base_parties_missing_country_count, - a.non_base_parties_missing_postal_code_count, - a.non_base_parties_missing_addr1_count, - - -- Non‑base offices (supported but not RG/RC → LQ, DS) - a.non_base_offices_null_count, - a.non_base_offices_stub_count, - a.non_base_offices_any_bad_key_fields_count, - a.non_base_offices_any_bad_count, - a.non_base_offices_healthy_count, - a.non_base_offices_missing_city_count, - a.non_base_offices_missing_province_count, - a.non_base_offices_missing_country_count, - a.non_base_offices_missing_postal_code_count, - a.non_base_offices_missing_addr1_count, - - -- Parties per type (supported) - a.dir_null_count, a.dir_stub_count, a.dir_any_bad_key_fields_count, a.dir_any_bad_count, a.dir_healthy_count, - a.dir_missing_city_count, a.dir_missing_province_count, a.dir_missing_country_count, a.dir_missing_postal_code_count, a.dir_missing_addr1_count, - - a.officer_null_count, a.officer_stub_count, a.officer_any_bad_key_fields_count, a.officer_any_bad_count, a.officer_healthy_count, - a.officer_missing_city_count, a.officer_missing_province_count, a.officer_missing_country_count, a.officer_missing_postal_code_count, a.officer_missing_addr1_count, - - a.liq_null_count, a.liq_stub_count, a.liq_any_bad_key_fields_count, a.liq_any_bad_count, a.liq_healthy_count, - a.liq_missing_city_count, a.liq_missing_province_count, a.liq_missing_country_count, a.liq_missing_postal_code_count, a.liq_missing_addr1_count, - - a.rcc_null_count, a.rcc_stub_count, a.rcc_any_bad_key_fields_count, a.rcc_any_bad_count, a.rcc_healthy_count, - a.rcc_missing_city_count, a.rcc_missing_province_count, a.rcc_missing_country_count, a.rcc_missing_postal_code_count, a.rcc_missing_addr1_count, - - a.rcm_null_count, a.rcm_stub_count, a.rcm_any_bad_key_fields_count, a.rcm_any_bad_count, a.rcm_healthy_count, - a.rcm_missing_city_count, a.rcm_missing_province_count, a.rcm_missing_country_count, a.rcm_missing_postal_code_count, a.rcm_missing_addr1_count, - - -- Offices per type (supported) - a.office_rg_null_count, a.office_rg_stub_count, a.office_rg_any_bad_key_fields_count, a.office_rg_any_bad_count, a.office_rg_healthy_count, - a.office_rg_missing_city_count, a.office_rg_missing_province_count, a.office_rg_missing_country_count, a.office_rg_missing_postal_code_count, a.office_rg_missing_addr1_count, - - a.office_rc_null_count, a.office_rc_stub_count, a.office_rc_any_bad_key_fields_count, a.office_rc_any_bad_count, a.office_rc_healthy_count, - a.office_rc_missing_city_count, a.office_rc_missing_province_count, a.office_rc_missing_country_count, a.office_rc_missing_postal_code_count, a.office_rc_missing_addr1_count, - - a.office_lq_null_count, a.office_lq_stub_count, a.office_lq_any_bad_key_fields_count, a.office_lq_any_bad_count, a.office_lq_healthy_count, - a.office_lq_missing_city_count, a.office_lq_missing_province_count, a.office_lq_missing_country_count, a.office_lq_missing_postal_code_count, a.office_lq_missing_addr1_count, - - a.office_ds_null_count, a.office_ds_stub_count, a.office_ds_any_bad_key_fields_count, a.office_ds_any_bad_count, a.office_ds_healthy_count, - a.office_ds_missing_city_count, a.office_ds_missing_province_count, a.office_ds_missing_country_count, a.office_ds_missing_postal_code_count, a.office_ds_missing_addr1_count - -from agg a -left join party_by_type p on p.corp_num = a.corp_num -left join office_by_type o on o.corp_num = a.corp_num -WITH NO DATA -; - -create unique index on mv_addr_quality_by_corp (corp_num); - -ALTER MATERIALIZED VIEW mv_addr_quality_by_corp - owner to postgres; - - -CREATE MATERIALIZED VIEW mv_addr_quality_screening_by_corp AS -WITH --- IMPORTANT: keep these supported type lists in sync with mv_addr_quality_by_corp above. -params AS ( - SELECT - array['DIR','OFF','LIQ','RCC','RCM']::text[] AS party_types, - array['RG','RC','LQ','DS']::text[] AS office_types -) -SELECT - ec.corp_num, - CAST(COALESCE(SUM(ec.any_bad_count) FILTER ( - WHERE ((ec.entity_kind='party' AND ec.entity_type = ANY(p.party_types)) - OR (ec.entity_kind='office' AND ec.entity_type = ANY(p.office_types))) - ), 0) AS bigint) AS address_all_any_bad_count, - CAST(COALESCE(SUM(ec.any_bad_count) FILTER ( - WHERE ec.entity_kind='office' - AND ec.entity_type = ANY(p.office_types) - ), 0) AS bigint) AS office_all_any_bad_count, - CAST(COALESCE(SUM(ec.any_bad_count) FILTER ( - WHERE ec.entity_kind='party' - AND ec.entity_type = ANY(p.party_types) - ), 0) AS bigint) AS party_all_any_bad_count -FROM mv_addr_issue_counts_by_entity ec -CROSS JOIN params p -GROUP BY ec.corp_num -WITH NO DATA -; - -CREATE UNIQUE INDEX IF NOT EXISTS ux_mv_addr_quality_screening_by_corp_corpnum - ON mv_addr_quality_screening_by_corp (corp_num); - -ALTER MATERIALIZED VIEW mv_addr_quality_screening_by_corp - owner to postgres; - -/* --------------------------------------------------------------------- - Share class eligibility issue flags (1 row per corp_num) - - - Uses CURRENT share structure only: share_struct.end_event_id IS NULL - - Decimal-place checks use scale(par_value_amt) and therefore count - trailing zeros exactly as persisted. - --------------------------------------------------------------------- */ -CREATE MATERIALIZED VIEW mv_share_class_issue_flags AS -SELECT - ss.corp_num, - - /* 1) Currency type = OTHER (currency_typ_cd='OTH' OR other_currency populated) */ - bool_or( - (ssc.currency_typ_cd IS NOT NULL AND upper(btrim(ssc.currency_typ_cd)) = 'OTH') - OR (ssc.other_currency IS NOT NULL AND NOT is_blank(ssc.other_currency)) - ) AS has_other_currency, - - /* 2) Par value < 1 and more than 6 decimal places */ - bool_or( - ssc.par_value_ind IS TRUE - AND ssc.par_value_amt IS NOT NULL - AND ssc.par_value_amt < 1 - AND significant_decimal_places(ssc.par_value_amt) > 6 - ) AS has_par_value_lt1_gt6dp, - - /* 3) Par value > 1 and more than 2 decimal places */ - bool_or( - ssc.par_value_ind IS TRUE - AND ssc.par_value_amt IS NOT NULL - AND ssc.par_value_amt > 1 - AND significant_decimal_places(ssc.par_value_amt) > 2 - ) AS has_par_value_gt1_gt2dp - -FROM share_struct ss -JOIN share_struct_cls ssc - ON ssc.corp_num = ss.corp_num - AND ssc.start_event_id = ss.start_event_id -WHERE ss.end_event_id IS NULL -GROUP BY ss.corp_num -WITH NO DATA -; - -CREATE UNIQUE INDEX IF NOT EXISTS ux_mv_share_class_issue_flags_corpnum - ON mv_share_class_issue_flags (corp_num); - -ALTER MATERIALIZED VIEW mv_share_class_issue_flags - owner to postgres; - - -CREATE MATERIALIZED VIEW mv_corp_event_filing_rollup AS -WITH allowed_event_file_codes(code) AS ( - VALUES - ('FILE_ICORP'), - ('FILE_ICORU'), - ('FILE_ICORC'), - ('FILE_ANNBC'), - ('FILE_AM_AR'), - ('FILE_NOCAD'), - ('FILE_APTRA'), - ('FILE_NOERA'), - ('FILE_AM_DO'), - ('FILE_AM_RR'), - ('FILE_NOCDR'), - ('FILE_AM_DI'), - ('FILE_NOALA'), - ('FILE_NOALB'), - ('FILE_NOALU'), - ('FILE_NOALC'), - ('FILE_AM_BC'), - ('FILE_AM_LI'), - ('FILE_AM_RM'), - ('FILE_AM_SS'), - ('ADCORP_NULL'), - -- Skipped events/filing types - ('FILE_COGS1'), - ('FILE_CHGJU'), - ('FILE_NWPTA'), - ('FILE_PARES'), - ('FILE_TILAT'), - ('FILE_TILHO'), - ('FILE_TILMA'), - ('SYST_CANPS'), - ('SYST_CHGJU'), - ('SYST_CHGPN'), - ('SYST_CO_PN'), - ('SYST_LNKPS'), - ('SYST_NWPTA'), - ('SYST_PARES'), - ('SYST_RIPFL'), - ('SYST_TILAT'), - ('SYST_TILHO'), - ('SYST_NULL'), - ('SYSD1_NULL'), - ('SYSD2_NULL'), - ('SYST1_NULL'), - ('SYST2_NULL'), - ('TRESP_NULL'), - ('TRESP_COUTI') -), -event_rows AS ( - SELECT - e.corp_num, - e.event_id, - e.event_type_cd, - e.event_timerstamp, - e.event_type_cd || '_' || COALESCE(f.filing_type_cd, 'NULL') AS event_file_code - FROM event e - LEFT JOIN filing f - ON f.event_id = e.event_id -), -classified_event_rows AS ( - SELECT - er.corp_num, - er.event_id, - er.event_type_cd, - er.event_timerstamp, - er.event_file_code, - CASE - WHEN er.event_file_code IS NULL THEN false - ELSE ac.code IS NULL - END AS is_disallowed - FROM event_rows er - LEFT JOIN allowed_event_file_codes ac - ON ac.code = er.event_file_code -) -SELECT - cer.corp_num, - COUNT(*) AS event_count, - COUNT(*) FILTER ( - WHERE cer.event_type_cd = 'FILE' - AND cer.event_timerstamp >= CURRENT_DATE - INTERVAL '2 years' - ) AS file_event_count_last_2yrs, - MAX(CASE WHEN cer.event_type_cd = 'FILE' THEN cer.event_timerstamp END) AS last_file_event_ts, - MAX(cer.event_timerstamp) AS last_event_ts, - STRING_AGG(cer.event_file_code, ',' ORDER BY cer.event_id) AS event_file_types, - COALESCE(BOOL_OR(cer.is_disallowed), false) AS has_disallowed_event, - STRING_AGG(DISTINCT cer.event_file_code, ',' ORDER BY cer.event_file_code) - FILTER (WHERE cer.is_disallowed) AS failed_events -FROM classified_event_rows cer -GROUP BY cer.corp_num -WITH NO DATA -; - -CREATE UNIQUE INDEX IF NOT EXISTS ux_mv_corp_event_filing_rollup_corpnum - ON mv_corp_event_filing_rollup (corp_num); - -ALTER MATERIALIZED VIEW mv_corp_event_filing_rollup - owner to postgres; - - -CREATE MATERIALIZED VIEW mv_legacy_corps_data AS -WITH cp_completed AS ( - /* One completed migration row per corp in prod */ - SELECT DISTINCT ON (cp.corp_num) - cp.corp_num, - cp.mig_batch_id - FROM corp_processing cp - WHERE cp.environment = 'prod' - AND cp.processed_status = 'COMPLETED' - ORDER BY - cp.corp_num, - (cp.mig_batch_id IS NULL), - cp.last_modified DESC NULLS LAST, - cp.id DESC -), -mcb_prod AS ( - /* One queued migration batch per corp for prod */ - SELECT DISTINCT ON (mcb.corp_num) - mcb.corp_num, - mcb.mig_batch_id - FROM mig_corp_batch mcb - JOIN mig_batch mb - ON mb.id = mcb.mig_batch_id - LEFT JOIN mig_group mg - ON mg.id = mb.mig_group_id - WHERE mb.target_environment = 'prod' - AND (mg.target_environment = 'prod' OR mg.id IS NULL) - ORDER BY - mcb.corp_num, - mb.migrated_date DESC NULLS LAST, - mb.requested_date DESC NULLS LAST, - mb.created_date DESC NULLS LAST, - mb.id DESC, - mcb.id DESC -), -mig_status AS ( - /* Precedence: Y (completed) over QUEUED */ - SELECT DISTINCT ON (corp_num) - corp_num, - migrated, - mig_batch_id - FROM ( - SELECT corp_num, 'Y'::text AS migrated, mig_batch_id, 1 AS precedence FROM cp_completed - UNION ALL - SELECT corp_num, 'QUEUED'::text AS migrated, mig_batch_id, 2 AS precedence FROM mcb_prod - ) u - ORDER BY corp_num, precedence -) -SELECT COALESCE(edg.group_name, NULL) AS group_name, - tblFe.email_domain, - admin_email, - CAST(email_used_count as integer) as email_used_count, - CAST(EXISTS ( - SELECT 1 - FROM bad_emails be - WHERE btrim(tblFe.admin_email) <> '' - AND lower(btrim(be.email)) = lower(btrim(tblFe.admin_email)) - ) AS boolean) AS is_bad_email, - corp_num, - corp_name, - corp_type_cd, - CAST(is_frozen as boolean) as is_frozen, - CAST(is_active as boolean) as is_active, - CAST(in_dissolution as boolean) as in_dissolution, - CAST(meets_main_criteria as boolean) as meets_main_criteria, - CAST(recognition_dts as date) as recognition_dts, - CAST(last_ar_filed_dt as date) as last_ar_filed_dt, - CAST(director_count as integer) as director_count, - CAST(directors_within_bc as boolean) as directors_within_bc, - CAST(directors_within_ca as boolean) as directors_within_ca, - CAST(filing_cnt as integer) as filing_cnt, - CAST(months_since_last_ar_filing as integer) as months_since_last_ar_filing, - CAST(ar_unfiled_over_1yr as boolean) as ar_unfiled_over_1yr, - CAST(file_cnt_last_2yrs as integer) as file_cnt_last_2yrs, - CAST(last_file_event_ts as timestamp) as last_file_event_ts, - CAST(last_event_ts as timestamp) as last_event_ts, - event_file_types, - failed_events, - CAST(has_officers as boolean) as has_officers, - CAST(has_3rd_party as boolean) as has_3rd_party, - vendor, - -- NOTE: `group_name` comes from email_domain_groups; migration tracking fields are prefixed with `mig_` to avoid confusion. - mg.id as mig_group_id, - mg.name as mig_group_name, - mg.display_name as mig_group_display_name, - ms.mig_batch_id as mig_batch_id, - mb.name as mig_batch_name, - mb.display_name as mig_batch_display_name, - mb.migrated_date as mig_date, - COALESCE(ms.migrated, 'N') AS migrated, - CAST(EXISTS ( - SELECT 1 - FROM exclude_corps ec - WHERE ec.corp_num = tblFe.corp_num - ) AS boolean) AS is_migration_excluded, - CAST(has_password as boolean) as has_password, - CAST(COALESCE(sif.has_other_currency, false) as boolean) as has_other_share_currency, - CAST(COALESCE(sif.has_par_value_lt1_gt6dp, false) as boolean) as has_share_par_value_lt1_gt6dp, - CAST(COALESCE(sif.has_par_value_gt1_gt2dp, false) as boolean) as has_share_par_value_gt1_gt2dp, - CAST( - NOT ( - COALESCE(sif.has_other_currency, false) - OR COALESCE(sif.has_par_value_lt1_gt6dp, false) - OR COALESCE(sif.has_par_value_gt1_gt2dp, false) - ) - as boolean) as meets_share_criteria, - CAST(send_ar_ind as boolean) as send_ar_ind, - address_all_any_bad_count, - office_all_any_bad_count, - party_all_any_bad_count, - CAST(has_bar_filing as boolean) as has_bar_filing, - CAST(last_bar_fiscal_year as integer) as last_bar_fiscal_year, - CAST(last_bar_filing_date as timestamp) as last_bar_filing_date, - CAST(months_since_last_bar_filing as integer) as months_since_last_bar_filing, - bar_keycloak_guid, - bar_idp_userid, - bar_account_id, - CAST(bar_account_has_mailing_address as boolean) as bar_account_has_mailing_address, - ting_corps -FROM ( - SELECT DISTINCT - LOWER(SPLIT_PART(c.admin_email, '@', 2)) as email_domain, - LOWER(c.admin_email) as admin_email, - aec.email_count AS email_used_count, - ev.corp_num, - cn.corp_name, - c.corp_type_cd, - CASE WHEN c.corp_frozen_type_cd is null THEN 'N' ELSE 'Y' END AS is_frozen, - cs.is_active, - cs.in_dissolution, - CASE WHEN COALESCE(ev.has_disallowed_event, false) THEN 'N' ELSE 'Y' END AS meets_main_criteria, - c.recognition_dts::date, - c.last_ar_filed_dt::date, - CASE - WHEN last_ar_filed_dt IS NOT NULL THEN - EXTRACT(MONTH FROM AGE(CURRENT_DATE::TIMESTAMP, last_ar_filed_dt::TIMESTAMP)) + - EXTRACT(YEAR FROM AGE(CURRENT_DATE::TIMESTAMP, last_ar_filed_dt::TIMESTAMP)) * 12 - ELSE - EXTRACT(MONTH FROM AGE(CURRENT_DATE::TIMESTAMP, recognition_dts::TIMESTAMP)) + - EXTRACT(YEAR FROM AGE(CURRENT_DATE::TIMESTAMP, recognition_dts::TIMESTAMP)) * 12 - END as "months_since_last_ar_filing", - CASE - WHEN c.last_ar_filed_dt IS NULL AND c.recognition_dts < CURRENT_DATE - INTERVAL '1 year' THEN 'Y' - WHEN c.last_ar_filed_dt IS NOT NULL AND c.last_ar_filed_dt < CURRENT_DATE - INTERVAL '1 year' THEN 'Y' - ELSE 'N' - END AS ar_unfiled_over_1yr, - ev.event_count AS filing_cnt, - ev.file_event_count_last_2yrs AS file_cnt_last_2yrs, - ev.last_file_event_ts, - ev.last_event_ts, - ev.event_file_types, - ev.failed_events, - COALESCE(cprt_dir.director_count, 0) AS director_count, - CASE - WHEN COALESCE(dir_mail_loc.dir_cnt, 0) > 0 - AND COALESCE(dir_mail_loc.dir_bc_mailing_cnt, 0) = dir_mail_loc.dir_cnt - THEN 'Y' - ELSE 'N' - END AS directors_within_bc, - CASE - WHEN COALESCE(dir_mail_loc.dir_cnt, 0) > 0 - AND COALESCE(dir_mail_loc.dir_ca_mailing_cnt, 0) = dir_mail_loc.dir_cnt - THEN 'Y' - ELSE 'N' - END AS directors_within_ca, - CASE WHEN cow.corp_num IS NOT NULL THEN 'Y' ELSE 'N' END AS has_officers, - CASE WHEN cowtp.corp_num IS NOT NULL THEN 'Y' ELSE 'N' END AS has_3rd_party, - CASE WHEN cowtp.corp_num IS NOT NULL THEN cowtp.vendor ELSE '' END AS vendor, - CASE WHEN c.corp_password IS NOT NULL THEN 'Y' ELSE 'N' END AS has_password, - c.send_ar_ind, - mvaqs.address_all_any_bad_count, - mvaqs.office_all_any_bad_count, - mvaqs.party_all_any_bad_count, - CASE WHEN bc.identifier IS NOT NULL THEN 'Y' ELSE 'N' END AS has_bar_filing, - bc.latest_fiscal_year AS last_bar_fiscal_year, - bc.last_ar_filing_date AS last_bar_filing_date, - CASE - WHEN bc.last_ar_filing_date IS NOT NULL THEN - EXTRACT(MONTH FROM AGE(CURRENT_DATE::TIMESTAMP, bc.last_ar_filing_date::TIMESTAMP)) + - EXTRACT(YEAR FROM AGE(CURRENT_DATE::TIMESTAMP, bc.last_ar_filing_date::TIMESTAMP)) * 12 - ELSE NULL - END AS months_since_last_bar_filing, - bc.sub as bar_keycloak_guid, - bc.idp_userid as bar_idp_userid, - bc.payment_account as bar_account_id, - bc.bar_account_has_mailing_address as bar_account_has_mailing_address, - ciaml.ting_corps as ting_corps - FROM mv_corp_event_filing_rollup ev - JOIN corporation c ON c.corp_num = ev.corp_num - INNER JOIN ( - SELECT - corp_num, - COALESCE(BOOL_OR(COALESCE(op_state_type_cd = 'ACT', false)), false) AS is_active, - COALESCE( - BOOL_OR( - COALESCE( - op_state_type_cd = 'ACT' - AND state_type_cd IN ('D1A', 'D1F', 'D1T', 'D2A', 'D2F', 'D2T'), - false - ) - ), - false - ) AS in_dissolution - FROM corp_state - WHERE end_event_id IS NULL - GROUP BY corp_num - ) cs ON cs.corp_num = c.corp_num - INNER JOIN corp_name cn - ON cn.corp_num = c.corp_num - AND cn.end_event_id IS NULL - AND cn.corp_name_typ_cd IN ('CO', 'NB') - LEFT OUTER JOIN mv_corps_with_officers cow ON c.corp_num = cow.corp_num - LEFT OUTER JOIN corps_with_third_party cowtp ON c.corp_num = cowtp.corp_num - LEFT OUTER JOIN ( - SELECT corp_num, MAX(party_typ_count) AS director_count - FROM mv_corps_party_role_count - WHERE party_typ_cd = 'DIR' - GROUP BY corp_num - ) cprt_dir ON cprt_dir.corp_num = c.corp_num - LEFT OUTER JOIN ( - SELECT - cp.corp_num, - COUNT(*) AS dir_cnt, - COUNT(*) Filter ( - WHERE cp.mailing_addr_id IS NOT NULL - AND cp.mailing_addr_id <> 1 - AND UPPER(TRIM(ma.province)) = 'BC' - ) AS dir_bc_mailing_cnt, - COUNT(*) Filter ( - WHERE cp.mailing_addr_id IS NOT NULL - AND cp.mailing_addr_id <> 1 - AND UPPER(TRIM(ma.country_typ_cd)) = 'CA' - ) AS dir_ca_mailing_cnt - FROM corp_party cp - LEFT JOIN address ma ON ma.addr_id = cp.mailing_addr_id - WHERE cp.end_event_id is NULL - AND cp.party_typ_cd = 'DIR' - GROUP BY cp.corp_num - ) dir_mail_loc ON dir_mail_loc.corp_num = c.corp_num - LEFT OUTER JOIN mv_admin_email_count aec ON c.admin_email = aec.admin_email - LEFT OUTER JOIN mv_addr_quality_screening_by_corp mvaqs ON c.corp_num = mvaqs.corp_num - LEFT OUTER JOIN bar_corps bc ON c.corp_num = bc.identifier - LEFT OUTER JOIN ( - SELECT ted_corp_num, array_agg(ting_corp_num) AS ting_corps - FROM corp_involved_amalgamating - GROUP BY ted_corp_num - ) ciaml ON c.corp_num = ciaml.ted_corp_num - WHERE 1 = 1 -) tblFe -LEFT JOIN mig_status ms USING (corp_num) -LEFT JOIN mig_batch mb ON mb.id = ms.mig_batch_id -LEFT JOIN mig_group mg ON mg.id = mb.mig_group_id -LEFT JOIN email_domain_groups edg ON tblFe.email_domain = edg.email_domain -LEFT JOIN mv_share_class_issue_flags sif USING (corp_num) -where 1=1 -WITH NO DATA -; - -ALTER MATERIALIZED VIEW mv_legacy_corps_data - owner to postgres; - -CREATE OR REPLACE VIEW v_business_state AS -SELECT cs.corp_num, - CASE cs.op_state_type_cd - WHEN 'ACT' THEN 'ACTIVE' - WHEN 'HIS' THEN 'HISTORICAL' - ELSE cs.op_state_type_cd - END AS business_state -FROM corp_state cs -WHERE cs.end_event_id IS NULL; - -ALTER VIEW v_business_state - owner to postgres; - -CREATE OR REPLACE VIEW v_auth_component_operation_audit AS -SELECT - aco.id AS component_operation_id, - aco.auth_processing_id, - aco.corp_num, - aco.environment, - aco.flow_name, - aco.flow_run_id, - c.corp_type_cd, - c.recognition_dts, - ap.processed_status AS auth_processing_status, - ap.operation AS processing_operation, - ap.operation_scope AS processing_operation_scope, - ap.operation_target AS processing_operation_target, - ap.repeatability, - ap.attempt_key, - ap.dry_run AS processing_dry_run, - ap.attempt_key_context AS processing_attempt_key_context, - ap.mig_batch_id, - ap.claimed_at, - ap.create_date AS processing_create_date, - ap.last_modified AS processing_last_modified, - ap.last_error AS processing_last_error, - ap.entity_action, - ap.contact_action, - ap.affiliation_action, - ap.invite_action, - ap.action_detail AS processing_action_detail, - aco.operation AS component_operation, - aco.operation_scope AS component_operation_scope, - aco.component, - aco.target_type, - aco.target_value, - aco.action, - aco.status_code, - aco.error AS component_error, - aco.detail AS component_detail, - aco.dry_run AS component_dry_run, - aco.create_date AS component_create_date, - aco.corp_num = ap.corp_num AS corp_num_matches_parent, - aco.environment = ap.environment AS environment_matches_parent, - aco.flow_name = ap.flow_name AS flow_name_matches_parent, - aco.flow_run_id IS NOT DISTINCT FROM ap.flow_run_id AS flow_run_id_matches_parent -FROM auth_component_operation aco -JOIN auth_processing ap ON ap.id = aco.auth_processing_id -LEFT JOIN corporation c ON c.corp_num = aco.corp_num; - -ALTER VIEW v_auth_component_operation_audit - owner to postgres; - -CREATE MATERIALIZED VIEW mv_corp_issue_flags AS -SELECT - co.corp_num, - bs.business_state, - co.corp_type_cd, - - /* ---------- UNIVERSAL denominators (ALL links / ALL real) ---------- */ - ( - COALESCE(a.address_all_null_count,0) - + COALESCE(a.address_all_stub_count,0) - + COALESCE(a.address_all_any_bad_key_fields_count,0) - + COALESCE(a.address_all_healthy_count,0) - ) > 0 AS has_any_link, - ( - COALESCE(a.address_all_any_bad_key_fields_count,0) - + COALESCE(a.address_all_healthy_count,0) - ) > 0 AS has_any_real, - - /* ---------- PARTY denominators ---------- */ - ( - COALESCE(a.party_all_null_count,0) - + COALESCE(a.party_all_stub_count,0) - + COALESCE(a.party_all_any_bad_key_fields_count,0) - + COALESCE(a.party_all_healthy_count,0) - ) > 0 AS has_party_link, - ( - COALESCE(a.party_all_any_bad_key_fields_count,0) - + COALESCE(a.party_all_healthy_count,0) - ) > 0 AS has_party_real, - - /* ---------- OFFICE denominators ---------- */ - ( - COALESCE(a.office_all_null_count,0) - + COALESCE(a.office_all_stub_count,0) - + COALESCE(a.office_all_any_bad_key_fields_count,0) - + COALESCE(a.office_all_healthy_count,0) - ) > 0 AS has_office_link, - ( - COALESCE(a.office_all_any_bad_key_fields_count,0) - + COALESCE(a.office_all_healthy_count,0) - ) > 0 AS has_office_real, - - /* ---------- ENTITY‑TYPE denominators (party types) ---------- */ - (COALESCE(a.dir_null_count,0) + COALESCE(a.dir_stub_count,0) - + COALESCE(a.dir_any_bad_key_fields_count,0) + COALESCE(a.dir_healthy_count,0)) > 0 AS has_dir_link, - (COALESCE(a.dir_any_bad_key_fields_count,0) + COALESCE(a.dir_healthy_count,0)) > 0 AS has_dir_real, - - (COALESCE(a.officer_null_count,0) + COALESCE(a.officer_stub_count,0) - + COALESCE(a.officer_any_bad_key_fields_count,0) + COALESCE(a.officer_healthy_count,0)) > 0 AS has_officer_link, - (COALESCE(a.officer_any_bad_key_fields_count,0) + COALESCE(a.officer_healthy_count,0)) > 0 AS has_officer_real, - - (COALESCE(a.liq_null_count,0) + COALESCE(a.liq_stub_count,0) - + COALESCE(a.liq_any_bad_key_fields_count,0) + COALESCE(a.liq_healthy_count,0)) > 0 AS has_liq_link, - (COALESCE(a.liq_any_bad_key_fields_count,0) + COALESCE(a.liq_healthy_count,0)) > 0 AS has_liq_real, - - (COALESCE(a.rcc_null_count,0) + COALESCE(a.rcc_stub_count,0) - + COALESCE(a.rcc_any_bad_key_fields_count,0) + COALESCE(a.rcc_healthy_count,0)) > 0 AS has_rcc_link, - (COALESCE(a.rcc_any_bad_key_fields_count,0) + COALESCE(a.rcc_healthy_count,0)) > 0 AS has_rcc_real, - - (COALESCE(a.rcm_null_count,0) + COALESCE(a.rcm_stub_count,0) - + COALESCE(a.rcm_any_bad_key_fields_count,0) + COALESCE(a.rcm_healthy_count,0)) > 0 AS has_rcm_link, - (COALESCE(a.rcm_any_bad_key_fields_count,0) + COALESCE(a.rcm_healthy_count,0)) > 0 AS has_rcm_real, - - /* ---------- ENTITY‑TYPE denominators (office types) ---------- */ - (COALESCE(a.office_rg_null_count,0) + COALESCE(a.office_rg_stub_count,0) - + COALESCE(a.office_rg_any_bad_key_fields_count,0) + COALESCE(a.office_rg_healthy_count,0)) > 0 AS has_office_rg_link, - (COALESCE(a.office_rg_any_bad_key_fields_count,0) + COALESCE(a.office_rg_healthy_count,0)) > 0 AS has_office_rg_real, - - (COALESCE(a.office_rc_null_count,0) + COALESCE(a.office_rc_stub_count,0) - + COALESCE(a.office_rc_any_bad_key_fields_count,0) + COALESCE(a.office_rc_healthy_count,0)) > 0 AS has_office_rc_link, - (COALESCE(a.office_rc_any_bad_key_fields_count,0) + COALESCE(a.office_rc_healthy_count,0)) > 0 AS has_office_rc_real, - - (COALESCE(a.office_lq_null_count,0) + COALESCE(a.office_lq_stub_count,0) - + COALESCE(a.office_lq_any_bad_key_fields_count,0) + COALESCE(a.office_lq_healthy_count,0)) > 0 AS has_office_lq_link, - (COALESCE(a.office_lq_any_bad_key_fields_count,0) + COALESCE(a.office_lq_healthy_count,0)) > 0 AS has_office_lq_real, - - (COALESCE(a.office_ds_null_count,0) + COALESCE(a.office_ds_stub_count,0) - + COALESCE(a.office_ds_any_bad_key_fields_count,0) + COALESCE(a.office_ds_healthy_count,0)) > 0 AS has_office_ds_link, - (COALESCE(a.office_ds_any_bad_key_fields_count,0) + COALESCE(a.office_ds_healthy_count,0)) > 0 AS has_office_ds_real, - - /* ---------- Any‑bad / Healthy at BUSINESS level ---------- */ - COALESCE(a.address_all_any_bad_count,0) > 0 AS any_bad_business, - COALESCE(a.address_all_any_bad_count,0) = 0 AS healthy_business, - - /* ---------- ALL (party+office) issues as booleans ---------- */ - COALESCE(a.address_all_null_count,0) > 0 AS has_null, - COALESCE(a.address_all_stub_count,0) > 0 AS has_stub, - COALESCE(a.address_all_missing_city_count,0) > 0 AS has_missing_city, - COALESCE(a.address_all_missing_province_count,0)> 0 AS has_missing_province, - COALESCE(a.address_all_missing_country_count,0) > 0 AS has_missing_country, - COALESCE(a.address_all_missing_postal_code_count,0) > 0 AS has_missing_postal, - COALESCE(a.address_all_missing_addr1_count,0) > 0 AS has_missing_addr1, - - /* ---------- PARTY aggregate issues ---------- */ - COALESCE(a.party_all_null_count,0) > 0 AS party_has_null, - COALESCE(a.party_all_stub_count,0) > 0 AS party_has_stub, - COALESCE(a.party_all_any_bad_key_fields_count,0)> 0 AS party_has_key_missing, - COALESCE(a.party_all_missing_city_count,0) > 0 AS party_missing_city, - COALESCE(a.party_all_missing_province_count,0) > 0 AS party_missing_province, - COALESCE(a.party_all_missing_country_count,0) > 0 AS party_missing_country, - COALESCE(a.party_all_missing_postal_code_count,0)>0 AS party_missing_postal, - COALESCE(a.party_all_missing_addr1_count,0) > 0 AS party_missing_addr1, - - /* ---------- OFFICE aggregate issues ---------- */ - COALESCE(a.office_all_null_count,0) > 0 AS office_has_null, - COALESCE(a.office_all_stub_count,0) > 0 AS office_has_stub, - COALESCE(a.office_all_any_bad_key_fields_count,0)>0 AS office_has_key_missing, - COALESCE(a.office_all_missing_city_count,0) > 0 AS office_missing_city, - COALESCE(a.office_all_missing_province_count,0) > 0 AS office_missing_province, - COALESCE(a.office_all_missing_country_count,0) > 0 AS office_missing_country, - COALESCE(a.office_all_missing_postal_code_count,0)>0 AS office_missing_postal, - COALESCE(a.office_all_missing_addr1_count,0) > 0 AS office_missing_addr1, - - /* ---------- PARTY per‑type issues ---------- */ - COALESCE(a.dir_null_count,0) > 0 AS dir_null, - COALESCE(a.dir_stub_count,0) > 0 AS dir_stub, - COALESCE(a.dir_missing_city_count,0) > 0 AS dir_missing_city, - COALESCE(a.dir_missing_province_count,0) > 0 AS dir_missing_province, - COALESCE(a.dir_missing_country_count,0) > 0 AS dir_missing_country, - COALESCE(a.dir_missing_postal_code_count,0) > 0 AS dir_missing_postal, - COALESCE(a.dir_missing_addr1_count,0) > 0 AS dir_missing_addr1, - - COALESCE(a.officer_null_count,0) > 0 AS officer_null, - COALESCE(a.officer_stub_count,0) > 0 AS officer_stub, - COALESCE(a.officer_missing_city_count,0) > 0 AS officer_missing_city, - COALESCE(a.officer_missing_province_count,0) > 0 AS officer_missing_province, - COALESCE(a.officer_missing_country_count,0) > 0 AS officer_missing_country, - COALESCE(a.officer_missing_postal_code_count,0) > 0 AS officer_missing_postal, - COALESCE(a.officer_missing_addr1_count,0) > 0 AS officer_missing_addr1, - - COALESCE(a.liq_null_count,0) > 0 AS liq_null, - COALESCE(a.liq_stub_count,0) > 0 AS liq_stub, - COALESCE(a.liq_missing_city_count,0) > 0 AS liq_missing_city, - COALESCE(a.liq_missing_province_count,0) > 0 AS liq_missing_province, - COALESCE(a.liq_missing_country_count,0) > 0 AS liq_missing_country, - COALESCE(a.liq_missing_postal_code_count,0) > 0 AS liq_missing_postal, - COALESCE(a.liq_missing_addr1_count,0) > 0 AS liq_missing_addr1, - - COALESCE(a.rcc_null_count,0) > 0 AS rcc_null, - COALESCE(a.rcc_stub_count,0) > 0 AS rcc_stub, - COALESCE(a.rcc_missing_city_count,0) > 0 AS rcc_missing_city, - COALESCE(a.rcc_missing_province_count,0) > 0 AS rcc_missing_province, - COALESCE(a.rcc_missing_country_count,0) > 0 AS rcc_missing_country, - COALESCE(a.rcc_missing_postal_code_count,0) > 0 AS rcc_missing_postal, - COALESCE(a.rcc_missing_addr1_count,0) > 0 AS rcc_missing_addr1, - - COALESCE(a.rcm_null_count,0) > 0 AS rcm_null, - COALESCE(a.rcm_stub_count,0) > 0 AS rcm_stub, - COALESCE(a.rcm_missing_city_count,0) > 0 AS rcm_missing_city, - COALESCE(a.rcm_missing_province_count,0) > 0 AS rcm_missing_province, - COALESCE(a.rcm_missing_country_count,0) > 0 AS rcm_missing_country, - COALESCE(a.rcm_missing_postal_code_count,0) > 0 AS rcm_missing_postal, - COALESCE(a.rcm_missing_addr1_count,0) > 0 AS rcm_missing_addr1, - - /* ---------- OFFICE per‑type issues ---------- */ - COALESCE(a.office_rg_null_count,0) > 0 AS office_rg_null, - COALESCE(a.office_rg_stub_count,0) > 0 AS office_rg_stub, - COALESCE(a.office_rg_missing_city_count,0) > 0 AS office_rg_missing_city, - COALESCE(a.office_rg_missing_province_count,0) > 0 AS office_rg_missing_province, - COALESCE(a.office_rg_missing_country_count,0) > 0 AS office_rg_missing_country, - COALESCE(a.office_rg_missing_postal_code_count,0) > 0 AS office_rg_missing_postal, - COALESCE(a.office_rg_missing_addr1_count,0) > 0 AS office_rg_missing_addr1, - - COALESCE(a.office_rc_null_count,0) > 0 AS office_rc_null, - COALESCE(a.office_rc_stub_count,0) > 0 AS office_rc_stub, - COALESCE(a.office_rc_missing_city_count,0) > 0 AS office_rc_missing_city, - COALESCE(a.office_rc_missing_province_count,0) > 0 AS office_rc_missing_province, - COALESCE(a.office_rc_missing_country_count,0) > 0 AS office_rc_missing_country, - COALESCE(a.office_rc_missing_postal_code_count,0) > 0 AS office_rc_missing_postal, - COALESCE(a.office_rc_missing_addr1_count,0) > 0 AS office_rc_missing_addr1, - - COALESCE(a.office_lq_null_count,0) > 0 AS office_lq_null, - COALESCE(a.office_lq_stub_count,0) > 0 AS office_lq_stub, - COALESCE(a.office_lq_missing_city_count,0) > 0 AS office_lq_missing_city, - COALESCE(a.office_lq_missing_province_count,0) > 0 AS office_lq_missing_province, - COALESCE(a.office_lq_missing_country_count,0) > 0 AS office_lq_missing_country, - COALESCE(a.office_lq_missing_postal_code_count,0) > 0 AS office_lq_missing_postal, - COALESCE(a.office_lq_missing_addr1_count,0) > 0 AS office_lq_missing_addr1, - - COALESCE(a.office_ds_null_count,0) > 0 AS office_ds_null, - COALESCE(a.office_ds_stub_count,0) > 0 AS office_ds_stub, - COALESCE(a.office_ds_missing_city_count,0) > 0 AS office_ds_missing_city, - COALESCE(a.office_ds_missing_province_count,0) > 0 AS office_ds_missing_province, - COALESCE(a.office_ds_missing_country_count,0) > 0 AS office_ds_missing_country, - COALESCE(a.office_ds_missing_postal_code_count,0) > 0 AS office_ds_missing_postal, - COALESCE(a.office_ds_missing_addr1_count,0) > 0 AS office_ds_missing_addr1 - -FROM corporation co -LEFT JOIN v_business_state bs ON bs.corp_num = co.corp_num -LEFT JOIN mv_addr_quality_by_corp a ON a.corp_num = co.corp_num -WITH NO DATA -; - --- Indexes for the MV (fast filtering + group‑bys) -CREATE UNIQUE INDEX IF NOT EXISTS ux_mv_corp_issue_flags_corp - ON mv_corp_issue_flags (corp_num); -CREATE INDEX IF NOT EXISTS ix_mv_corp_issue_flags_state_type - ON mv_corp_issue_flags (business_state, corp_type_cd); - -ALTER MATERIALIZED VIEW mv_corp_issue_flags - owner to postgres; - -CREATE OR REPLACE VIEW v_corp_issue_flags_long AS -WITH base AS ( - SELECT f.*, lcd.group_name, lcd.recognition_dts - FROM mv_corp_issue_flags f - LEFT JOIN mv_legacy_corps_data lcd USING (corp_num) -) -/* ----- ALL (party+office combined) ----- */ -SELECT corp_num, business_state, corp_type_cd, group_name, recognition_dts, - 'ALL'::text AS entity_kind, NULL::text AS entity_type, - 'Null'::text AS issue, has_null AS has_issue, - has_any_link AS denom_link, has_any_real AS denom_real -FROM base -UNION ALL SELECT corp_num, business_state, corp_type_cd, group_name, recognition_dts, 'ALL', NULL, 'Stub', has_stub, has_any_link, has_any_real FROM base -UNION ALL SELECT corp_num, business_state, corp_type_cd, group_name, recognition_dts, 'ALL', NULL, 'Missing city', has_missing_city, has_any_link, has_any_real FROM base -UNION ALL SELECT corp_num, business_state, corp_type_cd, group_name, recognition_dts, 'ALL', NULL, 'Missing province',has_missing_province,has_any_link, has_any_real FROM base -UNION ALL SELECT corp_num, business_state, corp_type_cd, group_name, recognition_dts, 'ALL', NULL, 'Missing country', has_missing_country, has_any_link, has_any_real FROM base -UNION ALL SELECT corp_num, business_state, corp_type_cd, group_name, recognition_dts, 'ALL', NULL, 'Missing postal', has_missing_postal, has_any_link, has_any_real FROM base -UNION ALL SELECT corp_num, business_state, corp_type_cd, group_name, recognition_dts, 'ALL', NULL, 'Missing addr1', has_missing_addr1, has_any_link, has_any_real FROM base - -/* ----- PARTY (aggregate) ----- */ -UNION ALL SELECT corp_num, business_state, corp_type_cd, group_name, recognition_dts, 'party', NULL, 'Null', party_has_null, has_party_link, has_party_real FROM base -UNION ALL SELECT corp_num, business_state, corp_type_cd, group_name, recognition_dts, 'party', NULL, 'Stub', party_has_stub, has_party_link, has_party_real FROM base -UNION ALL SELECT corp_num, business_state, corp_type_cd, group_name, recognition_dts, 'party', NULL, 'Missing city', party_missing_city, has_party_link, has_party_real FROM base -UNION ALL SELECT corp_num, business_state, corp_type_cd, group_name, recognition_dts, 'party', NULL, 'Missing province', party_missing_province, has_party_link, has_party_real FROM base -UNION ALL SELECT corp_num, business_state, corp_type_cd, group_name, recognition_dts, 'party', NULL, 'Missing country', party_missing_country, has_party_link, has_party_real FROM base -UNION ALL SELECT corp_num, business_state, corp_type_cd, group_name, recognition_dts, 'party', NULL, 'Missing postal', party_missing_postal, has_party_link, has_party_real FROM base -UNION ALL SELECT corp_num, business_state, corp_type_cd, group_name, recognition_dts, 'party', NULL, 'Missing addr1', party_missing_addr1, has_party_link, has_party_real FROM base - -/* ----- OFFICE (aggregate) ----- */ -UNION ALL SELECT corp_num, business_state, corp_type_cd, group_name, recognition_dts, 'office', NULL, 'Null', office_has_null, has_office_link, has_office_real FROM base -UNION ALL SELECT corp_num, business_state, corp_type_cd, group_name, recognition_dts, 'office', NULL, 'Stub', office_has_stub, has_office_link, has_office_real FROM base -UNION ALL SELECT corp_num, business_state, corp_type_cd, group_name, recognition_dts, 'office', NULL, 'Missing city', office_missing_city, has_office_link, has_office_real FROM base -UNION ALL SELECT corp_num, business_state, corp_type_cd, group_name, recognition_dts, 'office', NULL, 'Missing province', office_missing_province, has_office_link, has_office_real FROM base -UNION ALL SELECT corp_num, business_state, corp_type_cd, group_name, recognition_dts, 'office', NULL, 'Missing country', office_missing_country, has_office_link, has_office_real FROM base -UNION ALL SELECT corp_num, business_state, corp_type_cd, group_name, recognition_dts, 'office', NULL, 'Missing postal', office_missing_postal, has_office_link, has_office_real FROM base -UNION ALL SELECT corp_num, business_state, corp_type_cd, group_name, recognition_dts, 'office', NULL, 'Missing addr1', office_missing_addr1, has_office_link, has_office_real FROM base - -/* ----- PARTY per entity_type ----- */ -UNION ALL SELECT corp_num, business_state, corp_type_cd, group_name, recognition_dts, 'party', 'DIR','Null', dir_null, has_dir_link, has_dir_real FROM base -UNION ALL SELECT corp_num, business_state, corp_type_cd, group_name, recognition_dts, 'party', 'DIR','Stub', dir_stub, has_dir_link, has_dir_real FROM base -UNION ALL SELECT corp_num, business_state, corp_type_cd, group_name, recognition_dts, 'party', 'DIR','Missing city', dir_missing_city, has_dir_link, has_dir_real FROM base -UNION ALL SELECT corp_num, business_state, corp_type_cd, group_name, recognition_dts, 'party', 'DIR','Missing province', dir_missing_province, has_dir_link, has_dir_real FROM base -UNION ALL SELECT corp_num, business_state, corp_type_cd, group_name, recognition_dts, 'party', 'DIR','Missing country', dir_missing_country, has_dir_link, has_dir_real FROM base -UNION ALL SELECT corp_num, business_state, corp_type_cd, group_name, recognition_dts, 'party', 'DIR','Missing postal', dir_missing_postal, has_dir_link, has_dir_real FROM base -UNION ALL SELECT corp_num, business_state, corp_type_cd, group_name, recognition_dts, 'party', 'DIR','Missing addr1', dir_missing_addr1, has_dir_link, has_dir_real FROM base - -UNION ALL SELECT corp_num, business_state, corp_type_cd, group_name, recognition_dts, 'party', 'OFF','Null', officer_null, has_officer_link, has_officer_real FROM base -UNION ALL SELECT corp_num, business_state, corp_type_cd, group_name, recognition_dts, 'party', 'OFF','Stub', officer_stub, has_officer_link, has_officer_real FROM base -UNION ALL SELECT corp_num, business_state, corp_type_cd, group_name, recognition_dts, 'party', 'OFF','Missing city', officer_missing_city, has_officer_link, has_officer_real FROM base -UNION ALL SELECT corp_num, business_state, corp_type_cd, group_name, recognition_dts, 'party', 'OFF','Missing province', officer_missing_province, has_officer_link, has_officer_real FROM base -UNION ALL SELECT corp_num, business_state, corp_type_cd, group_name, recognition_dts, 'party', 'OFF','Missing country', officer_missing_country, has_officer_link, has_officer_real FROM base -UNION ALL SELECT corp_num, business_state, corp_type_cd, group_name, recognition_dts, 'party', 'OFF','Missing postal', officer_missing_postal, has_officer_link, has_officer_real FROM base -UNION ALL SELECT corp_num, business_state, corp_type_cd, group_name, recognition_dts, 'party', 'OFF','Missing addr1', officer_missing_addr1, has_officer_link, has_officer_real FROM base - -UNION ALL SELECT corp_num, business_state, corp_type_cd, group_name, recognition_dts, 'party', 'LIQ','Null', liq_null, has_liq_link, has_liq_real FROM base -UNION ALL SELECT corp_num, business_state, corp_type_cd, group_name, recognition_dts, 'party', 'LIQ','Stub', liq_stub, has_liq_link, has_liq_real FROM base -UNION ALL SELECT corp_num, business_state, corp_type_cd, group_name, recognition_dts, 'party', 'LIQ','Missing city', liq_missing_city, has_liq_link, has_liq_real FROM base -UNION ALL SELECT corp_num, business_state, corp_type_cd, group_name, recognition_dts, 'party', 'LIQ','Missing province', liq_missing_province, has_liq_link, has_liq_real FROM base -UNION ALL SELECT corp_num, business_state, corp_type_cd, group_name, recognition_dts, 'party', 'LIQ','Missing country', liq_missing_country, has_liq_link, has_liq_real FROM base -UNION ALL SELECT corp_num, business_state, corp_type_cd, group_name, recognition_dts, 'party', 'LIQ','Missing postal', liq_missing_postal, has_liq_link, has_liq_real FROM base -UNION ALL SELECT corp_num, business_state, corp_type_cd, group_name, recognition_dts, 'party', 'LIQ','Missing addr1', liq_missing_addr1, has_liq_link, has_liq_real FROM base - -UNION ALL SELECT corp_num, business_state, corp_type_cd, group_name, recognition_dts, 'party', 'RCC','Null', rcc_null, has_rcc_link, has_rcc_real FROM base -UNION ALL SELECT corp_num, business_state, corp_type_cd, group_name, recognition_dts, 'party', 'RCC','Stub', rcc_stub, has_rcc_link, has_rcc_real FROM base -UNION ALL SELECT corp_num, business_state, corp_type_cd, group_name, recognition_dts, 'party', 'RCC','Missing city', rcc_missing_city, has_rcc_link, has_rcc_real FROM base -UNION ALL SELECT corp_num, business_state, corp_type_cd, group_name, recognition_dts, 'party', 'RCC','Missing province', rcc_missing_province, has_rcc_link, has_rcc_real FROM base -UNION ALL SELECT corp_num, business_state, corp_type_cd, group_name, recognition_dts, 'party', 'RCC','Missing country', rcc_missing_country, has_rcc_link, has_rcc_real FROM base -UNION ALL SELECT corp_num, business_state, corp_type_cd, group_name, recognition_dts, 'party', 'RCC','Missing postal', rcc_missing_postal, has_rcc_link, has_rcc_real FROM base -UNION ALL SELECT corp_num, business_state, corp_type_cd, group_name, recognition_dts, 'party', 'RCC','Missing addr1', rcc_missing_addr1, has_rcc_link, has_rcc_real FROM base - -UNION ALL SELECT corp_num, business_state, corp_type_cd, group_name, recognition_dts, 'party', 'RCM','Null', rcm_null, has_rcm_link, has_rcm_real FROM base -UNION ALL SELECT corp_num, business_state, corp_type_cd, group_name, recognition_dts, 'party', 'RCM','Stub', rcm_stub, has_rcm_link, has_rcm_real FROM base -UNION ALL SELECT corp_num, business_state, corp_type_cd, group_name, recognition_dts, 'party', 'RCM','Missing city', rcm_missing_city, has_rcm_link, has_rcm_real FROM base -UNION ALL SELECT corp_num, business_state, corp_type_cd, group_name, recognition_dts, 'party', 'RCM','Missing province', rcm_missing_province, has_rcm_link, has_rcm_real FROM base -UNION ALL SELECT corp_num, business_state, corp_type_cd, group_name, recognition_dts, 'party', 'RCM','Missing country', rcm_missing_country, has_rcm_link, has_rcm_real FROM base -UNION ALL SELECT corp_num, business_state, corp_type_cd, group_name, recognition_dts, 'party', 'RCM','Missing postal', rcm_missing_postal, has_rcm_link, has_rcm_real FROM base -UNION ALL SELECT corp_num, business_state, corp_type_cd, group_name, recognition_dts, 'party', 'RCM','Missing addr1', rcm_missing_addr1, has_rcm_link, has_rcm_real FROM base - -/* ----- OFFICE per entity_type ----- */ -UNION ALL SELECT corp_num, business_state, corp_type_cd, group_name, recognition_dts, 'office','RG','Null', office_rg_null, has_office_rg_link, has_office_rg_real FROM base -UNION ALL SELECT corp_num, business_state, corp_type_cd, group_name, recognition_dts, 'office','RG','Stub', office_rg_stub, has_office_rg_link, has_office_rg_real FROM base -UNION ALL SELECT corp_num, business_state, corp_type_cd, group_name, recognition_dts, 'office','RG','Missing city', office_rg_missing_city, has_office_rg_link, has_office_rg_real FROM base -UNION ALL SELECT corp_num, business_state, corp_type_cd, group_name, recognition_dts, 'office','RG','Missing province', office_rg_missing_province, has_office_rg_link, has_office_rg_real FROM base -UNION ALL SELECT corp_num, business_state, corp_type_cd, group_name, recognition_dts, 'office','RG','Missing country', office_rg_missing_country, has_office_rg_link, has_office_rg_real FROM base -UNION ALL SELECT corp_num, business_state, corp_type_cd, group_name, recognition_dts, 'office','RG','Missing postal', office_rg_missing_postal, has_office_rg_link, has_office_rg_real FROM base -UNION ALL SELECT corp_num, business_state, corp_type_cd, group_name, recognition_dts, 'office','RG','Missing addr1', office_rg_missing_addr1, has_office_rg_link, has_office_rg_real FROM base - -UNION ALL SELECT corp_num, business_state, corp_type_cd, group_name, recognition_dts, 'office','RC','Null', office_rc_null, has_office_rc_link, has_office_rc_real FROM base -UNION ALL SELECT corp_num, business_state, corp_type_cd, group_name, recognition_dts, 'office','RC','Stub', office_rc_stub, has_office_rc_link, has_office_rc_real FROM base -UNION ALL SELECT corp_num, business_state, corp_type_cd, group_name, recognition_dts, 'office','RC','Missing city', office_rc_missing_city, has_office_rc_link, has_office_rc_real FROM base -UNION ALL SELECT corp_num, business_state, corp_type_cd, group_name, recognition_dts, 'office','RC','Missing province', office_rc_missing_province, has_office_rc_link, has_office_rc_real FROM base -UNION ALL SELECT corp_num, business_state, corp_type_cd, group_name, recognition_dts, 'office','RC','Missing country', office_rc_missing_country, has_office_rc_link, has_office_rc_real FROM base -UNION ALL SELECT corp_num, business_state, corp_type_cd, group_name, recognition_dts, 'office','RC','Missing postal', office_rc_missing_postal, has_office_rc_link, has_office_rc_real FROM base -UNION ALL SELECT corp_num, business_state, corp_type_cd, group_name, recognition_dts, 'office','RC','Missing addr1', office_rc_missing_addr1, has_office_rc_link, has_office_rc_real FROM base - -UNION ALL SELECT corp_num, business_state, corp_type_cd, group_name, recognition_dts, 'office','LQ','Null', office_lq_null, has_office_lq_link, has_office_lq_real FROM base -UNION ALL SELECT corp_num, business_state, corp_type_cd, group_name, recognition_dts, 'office','LQ','Stub', office_lq_stub, has_office_lq_link, has_office_lq_real FROM base -UNION ALL SELECT corp_num, business_state, corp_type_cd, group_name, recognition_dts, 'office','LQ','Missing city', office_lq_missing_city, has_office_lq_link, has_office_lq_real FROM base -UNION ALL SELECT corp_num, business_state, corp_type_cd, group_name, recognition_dts, 'office','LQ','Missing province', office_lq_missing_province, has_office_lq_link, has_office_lq_real FROM base -UNION ALL SELECT corp_num, business_state, corp_type_cd, group_name, recognition_dts, 'office','LQ','Missing country', office_lq_missing_country, has_office_lq_link, has_office_lq_real FROM base -UNION ALL SELECT corp_num, business_state, corp_type_cd, group_name, recognition_dts, 'office','LQ','Missing postal', office_lq_missing_postal, has_office_lq_link, has_office_lq_real FROM base -UNION ALL SELECT corp_num, business_state, corp_type_cd, group_name, recognition_dts, 'office','LQ','Missing addr1', office_lq_missing_addr1, has_office_lq_link, has_office_lq_real FROM base - -UNION ALL SELECT corp_num, business_state, corp_type_cd, group_name, recognition_dts, 'office','DS','Null', office_ds_null, has_office_ds_link, has_office_ds_real FROM base -UNION ALL SELECT corp_num, business_state, corp_type_cd, group_name, recognition_dts, 'office','DS','Stub', office_ds_stub, has_office_ds_link, has_office_ds_real FROM base -UNION ALL SELECT corp_num, business_state, corp_type_cd, group_name, recognition_dts, 'office','DS','Missing city', office_ds_missing_city, has_office_ds_link, has_office_ds_real FROM base -UNION ALL SELECT corp_num, business_state, corp_type_cd, group_name, recognition_dts, 'office','DS','Missing province', office_ds_missing_province, has_office_ds_link, has_office_ds_real FROM base -UNION ALL SELECT corp_num, business_state, corp_type_cd, group_name, recognition_dts, 'office','DS','Missing country', office_ds_missing_country, has_office_ds_link, has_office_ds_real FROM base -UNION ALL SELECT corp_num, business_state, corp_type_cd, group_name, recognition_dts, 'office','DS','Missing postal', office_ds_missing_postal, has_office_ds_link, has_office_ds_real FROM base -UNION ALL SELECT corp_num, business_state, corp_type_cd, group_name, recognition_dts, 'office','DS','Missing addr1', office_ds_missing_addr1, has_office_ds_link, has_office_ds_real FROM base -; - -ALTER VIEW v_corp_issue_flags_long - owner to postgres; - -CREATE MATERIALIZED VIEW mv_issue_counts_by_corp_type AS -SELECT - business_state, - corp_type_cd, - COUNT(*) FILTER (WHERE has_any_link) AS den_link, - COUNT(*) FILTER (WHERE has_any_real) AS den_real, - COUNT(*) AS total_corps, - COUNT(*) FILTER (WHERE has_null) AS c_null, - COUNT(*) FILTER (WHERE has_stub) AS c_stub, - COUNT(*) FILTER (WHERE has_missing_city) AS c_mcity, - COUNT(*) FILTER (WHERE has_missing_province) AS c_mprov, - COUNT(*) FILTER (WHERE has_missing_country) AS c_mcountry, - COUNT(*) FILTER (WHERE has_missing_postal) AS c_mpostal, - COUNT(*) FILTER (WHERE has_missing_addr1) AS c_maddr1 -FROM mv_corp_issue_flags -GROUP BY business_state, corp_type_cd -WITH NO DATA -; - -CREATE INDEX IF NOT EXISTS ix_mv_issue_counts_by_corp_type - ON mv_issue_counts_by_corp_type (corp_type_cd, business_state); -; - -ALTER MATERIALIZED VIEW mv_issue_counts_by_corp_type - owner to postgres; - -CREATE UNIQUE INDEX IF NOT EXISTS ux_legacy_corps_data_corpnum ON mv_legacy_corps_data (corp_num); - -CREATE INDEX IF NOT EXISTS ix_legacy_group_name ON mv_legacy_corps_data (group_name); - -CREATE INDEX IF NOT EXISTS ix_legacy_recog_dts ON mv_legacy_corps_data (recognition_dts); - --- Populated materialized views with data -REFRESH MATERIALIZED VIEW mv_corps_with_officers; -REFRESH MATERIALIZED VIEW mv_corps_party_role_count; -REFRESH MATERIALIZED VIEW mv_admin_email_count; -REFRESH MATERIALIZED VIEW mv_admin_email_domain_count; -REFRESH MATERIALIZED VIEW mv_addr_issue_counts_by_entity; -REFRESH MATERIALIZED VIEW mv_addr_quality_by_corp; -REFRESH MATERIALIZED VIEW mv_addr_quality_screening_by_corp; -REFRESH MATERIALIZED VIEW mv_share_class_issue_flags; -REFRESH MATERIALIZED VIEW mv_corp_event_filing_rollup; -REFRESH MATERIALIZED VIEW mv_legacy_corps_data; -REFRESH MATERIALIZED VIEW mv_corp_issue_flags; -REFRESH MATERIALIZED VIEW mv_issue_counts_by_corp_type; - -ANALYZE mv_corps_with_officers; -ANALYZE mv_corps_party_role_count; -ANALYZE mv_admin_email_count; -ANALYZE mv_admin_email_domain_count; -ANALYZE mv_addr_issue_counts_by_entity; -ANALYZE mv_addr_quality_by_corp; -ANALYZE mv_addr_quality_screening_by_corp; -ANALYZE mv_share_class_issue_flags; -ANALYZE mv_corp_event_filing_rollup; -ANALYZE mv_legacy_corps_data; -ANALYZE mv_corp_issue_flags; -ANALYZE mv_issue_counts_by_corp_type; - From bc8526e0d5c1440ffc8c25d2de1dec21ff066876 Mon Sep 17 00:00:00 2001 From: ketaki-deodhar Date: Thu, 2 Jul 2026 12:02:53 -0700 Subject: [PATCH 09/10] 31328 - refresh extract views updates --- data-tool/refresh_colin_extract_views.sh | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/data-tool/refresh_colin_extract_views.sh b/data-tool/refresh_colin_extract_views.sh index d09c8bbfd1..0320e20f06 100755 --- a/data-tool/refresh_colin_extract_views.sh +++ b/data-tool/refresh_colin_extract_views.sh @@ -461,8 +461,13 @@ while [[ $# -gt 0 ]]; do shift 2 ;; --schema) - PGSCHEMA="${2:-}" - shift 2 + if [[ $# -gt 1 && "$2" != -* ]]; then + PGSCHEMA="$2" + shift 2 + else + PGSCHEMA="${PGSCHEMA:-public}" + shift + fi ;; --psql-bin) PSQL_BIN="${2:-}" From c9958a9b152f31f19e89c19160ef49cfa10db29e Mon Sep 17 00:00:00 2001 From: ketaki-deodhar Date: Thu, 2 Jul 2026 12:06:42 -0700 Subject: [PATCH 10/10] 31328 - remove file --- data-tool/scripts/corp_ids_ctst.txt | 2 -- 1 file changed, 2 deletions(-) delete mode 100644 data-tool/scripts/corp_ids_ctst.txt diff --git a/data-tool/scripts/corp_ids_ctst.txt b/data-tool/scripts/corp_ids_ctst.txt deleted file mode 100644 index be5a749550..0000000000 --- a/data-tool/scripts/corp_ids_ctst.txt +++ /dev/null @@ -1,2 +0,0 @@ -BC0008367 -BC0008368 \ No newline at end of file