Skip to content
This repository was archived by the owner on Aug 8, 2025. It is now read-only.

Commit 7edf562

Browse files
committed
Update to 0.12.2, add in ODCS v3 example
1 parent c3c2b76 commit 7edf562

5 files changed

Lines changed: 254 additions & 4 deletions

File tree

Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
1-
FROM datacatering/data-caterer:0.12.1
1+
FROM datacatering/data-caterer:0.12.2
22

33
COPY --chown=app:app build/libs/data-caterer-example-0.1.0.jar /opt/app/job.jar
Lines changed: 236 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,236 @@
1+
# What's this data contract about?
2+
domain: seller # Domain
3+
dataProduct: my quantum # Data product name
4+
version: 1.1.0 # Version (follows semantic versioning)
5+
status: active
6+
id: 53581432-6c55-4ba2-a65f-72344a91553a
7+
8+
# Lots of information
9+
description:
10+
purpose: Views built on top of the seller tables.
11+
limitations: Data based on seller perspective, no buyer information
12+
usage: Predict sales over time
13+
authoritativeDefinitions:
14+
type: privacy-statement
15+
url: https://example.com/gdpr.pdf
16+
tenant: ClimateQuantumInc
17+
18+
kind: DataContract
19+
apiVersion: v3.0.0 # Standard version (follows semantic versioning)
20+
21+
# Infrastructure & servers
22+
servers:
23+
- server: my-postgres
24+
type: postgres
25+
host: localhost
26+
port: 5432
27+
database: pypl-edw
28+
schema: pp_access_views
29+
30+
# Dataset, schema and quality
31+
schema:
32+
- name: tbl
33+
physicalName: tbl_1
34+
physicalType: table
35+
description: Provides core payment metrics
36+
authoritativeDefinitions:
37+
- url: https://catalog.data.gov/dataset/air-quality
38+
type: businessDefinition
39+
- url: https://youtu.be/jbY1BKFj9ec
40+
type: videoTutorial
41+
tags: [ ]
42+
dataGranularityDescription: Aggregation on columns txn_ref_dt, pmt_txn_id
43+
properties:
44+
- name: txn_ref_dt
45+
primaryKey: false
46+
primaryKeyPosition: -1
47+
businessName: transaction reference date
48+
logicalType: date
49+
physicalType: date
50+
required: false
51+
description: Reference date for transaction
52+
partitioned: true
53+
partitionKeyPosition: 1
54+
criticalDataElement: false
55+
tags: [ ]
56+
classification: public
57+
transformSourceObjects:
58+
- table_name_1
59+
- table_name_2
60+
- table_name_3
61+
transformLogic: sel t1.txn_dt as txn_ref_dt from table_name_1 as t1, table_name_2 as t2, table_name_3 as t3 where t1.txn_dt=date-3
62+
transformDescription: defines the logic in business terms; logic for dummies
63+
examples:
64+
- "2022-10-03"
65+
- "2020-01-28"
66+
customProperties:
67+
- property: anonymizationStrategy
68+
value: none
69+
- name: rcvr_id
70+
primaryKey: true
71+
primaryKeyPosition: 1
72+
businessName: receiver id
73+
logicalType: string
74+
physicalType: varchar(18)
75+
required: false
76+
description: A description for column rcvr_id.
77+
partitioned: false
78+
partitionKeyPosition: -1
79+
criticalDataElement: false
80+
tags: [ ]
81+
classification: restricted
82+
- name: rcvr_cntry_code
83+
primaryKey: false
84+
primaryKeyPosition: -1
85+
businessName: receiver country code
86+
logicalType: string
87+
physicalType: varchar(2)
88+
required: false
89+
description: Country code
90+
partitioned: false
91+
partitionKeyPosition: -1
92+
criticalDataElement: false
93+
tags: [ ]
94+
classification: public
95+
authoritativeDefinitions:
96+
- url: https://collibra.com/asset/742b358f-71a5-4ab1-bda4-dcdba9418c25
97+
type: businessDefinition
98+
- url: https://github.com/myorg/myrepo
99+
type: transformationImplementation
100+
- url: jdbc:postgresql://localhost:5432/adventureworks/tbl_1/rcvr_cntry_code
101+
type: implementation
102+
encryptedName: rcvr_cntry_code_encrypted
103+
quality:
104+
- rule: nullCheck
105+
description: column should not contain null values
106+
dimension: completeness # dropdown 7 values
107+
type: library
108+
severity: error
109+
businessImpact: operational
110+
schedule: 0 20 * * *
111+
scheduler: cron
112+
customProperties:
113+
- property: FIELD_NAME
114+
value:
115+
- property: COMPARE_TO
116+
value:
117+
- property: COMPARISON_TYPE
118+
value: Greater than
119+
quality:
120+
- rule: countCheck
121+
type: library
122+
description: Ensure row count is within expected volume range
123+
dimension: completeness
124+
method: reconciliation
125+
severity: error
126+
businessImpact: operational
127+
schedule: 0 20 * * *
128+
scheduler: cron
129+
customProperties:
130+
- property: business-key
131+
value:
132+
- txn_ref_dt
133+
- rcvr_id
134+
135+
136+
# Pricing
137+
price:
138+
priceAmount: 9.95
139+
priceCurrency: USD
140+
priceUnit: megabyte
141+
142+
143+
# Team
144+
team:
145+
- username: ceastwood
146+
role: Data Scientist
147+
dateIn: "2022-08-02"
148+
dateOut: "2022-10-01"
149+
replacedByUsername: mhopper
150+
- username: mhopper
151+
role: Data Scientist
152+
dateIn: "2022-10-01"
153+
- username: daustin
154+
role: Owner
155+
comment: Keeper of the grail
156+
dateIn: "2022-10-01"
157+
158+
159+
# Roles
160+
roles:
161+
- role: microstrategy_user_opr
162+
access: read
163+
firstLevelApprovers: Reporting Manager
164+
secondLevelApprovers: 'mandolorian'
165+
- role: bq_queryman_user_opr
166+
access: read
167+
firstLevelApprovers: Reporting Manager
168+
secondLevelApprovers: na
169+
- role: risk_data_access_opr
170+
access: read
171+
firstLevelApprovers: Reporting Manager
172+
secondLevelApprovers: 'dathvador'
173+
- role: bq_unica_user_opr
174+
access: write
175+
firstLevelApprovers: Reporting Manager
176+
secondLevelApprovers: 'mickey'
177+
178+
# SLA
179+
slaDefaultElement: tab1.txn_ref_dt
180+
slaProperties:
181+
- property: latency # Property, see list of values in DP QoS
182+
value: 4
183+
unit: d # d, day, days for days; y, yr, years for years
184+
element: tab1.txn_ref_dt # This would not be needed as it is the same table.column as the default one
185+
- property: generalAvailability
186+
value: "2022-05-12T09:30:10-08:00"
187+
- property: endOfSupport
188+
value: "2032-05-12T09:30:10-08:00"
189+
- property: endOfLife
190+
value: "2042-05-12T09:30:10-08:00"
191+
- property: retention
192+
value: 3
193+
unit: y
194+
element: tab1.txn_ref_dt
195+
- property: frequency
196+
value: 1
197+
valueExt: 1
198+
unit: d
199+
element: tab1.txn_ref_dt
200+
- property: timeOfAvailability
201+
value: 09:00-08:00
202+
element: tab1.txn_ref_dt
203+
driver: regulatory # Describes the importance of the SLA: [regulatory|analytics|operational|...]
204+
- property: timeOfAvailability
205+
value: 08:00-08:00
206+
element: tab1.txn_ref_dt
207+
driver: analytics
208+
209+
210+
# Support
211+
support:
212+
- channel: '#product-help' # Simple Slack communication channel
213+
tool: slack
214+
url: https://aidaug.slack.com/archives/C05UZRSBKLY
215+
- channel: datacontract-ann # Simple distribution list
216+
tool: email
217+
url: mailto:datacontract-ann@bitol.io
218+
- channel: Feedback # Product Feedback
219+
description: General Product Feedback (Public)
220+
url: https://product-feedback.com
221+
222+
# Tags
223+
tags:
224+
- transactions
225+
226+
227+
# Custom properties
228+
customProperties:
229+
- property: refRulesetName
230+
value: gcsc.ruleset.name
231+
- property: somePropertyName
232+
value: property.value
233+
- property: dataprocClusterName # Used for specific applications like Elevate
234+
value: [ cluster name ]
235+
236+
contractCreatedTs: "2022-11-15T02:59:43+00:00"

gradle.properties

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,5 @@ version=0.1.0
88

99
scalaVersion=2.12
1010
scalaSpecificVersion=2.12.19
11-
dataCatererVersion=0.12.1
12-
sparkVersion=3.5.1
11+
dataCatererVersion=0.12.2
1312
sparkMajorVersion=3.5

helm/data-caterer/values.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ image:
88
repository: "datacatering/data-caterer"
99
pullPolicy: "IfNotPresent"
1010
# Overrides the image tag whose default is the chart appVersion.
11-
tag: "0.12.1"
11+
tag: "0.12.2"
1212

1313
imagePullSecrets: []
1414
nameOverride: ""
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
package io.github.datacatering.plan
2+
3+
import io.github.datacatering.datacaterer.api.PlanRun
4+
5+
class AdvancedODCSV3PlanRun extends PlanRun {
6+
7+
val accountTask = csv("customer_accounts", "/opt/app/data/customer/account-odcs-v3", Map("header" -> "true"))
8+
.schema(metadataSource.openDataContractStandard("/opt/app/mount/odcs/full-example-v3.odcs.yaml"))
9+
.count(count.records(100))
10+
11+
val conf = configuration.enableGeneratePlanAndTasks(true)
12+
.generatedReportsFolderPath("/opt/app/data/report")
13+
14+
execute(conf, accountTask)
15+
}

0 commit comments

Comments
 (0)