Skip to content
This repository was archived by the owner on Aug 8, 2025. It is now read-only.

Commit c654f30

Browse files
committed
Add in example validations for YAML
1 parent c04b45c commit c654f30

5 files changed

Lines changed: 338 additions & 0 deletions

File tree

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
---
2+
name: "json_account_checks"
3+
description: "Check account related fields have gone through system correctly"
4+
dataSources:
5+
json:
6+
- options:
7+
path: "app/src/test/resources/sample/json/account-gen"
8+
validations:
9+
- expr: "year BETWEEN 2021 AND 2022"
10+
- expr: "amount < 90"
11+
- expr: "updated_time == details.updated_by.time"
12+
- aggType: "count"
13+
aggExpr: "count == 1000"
Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,81 @@
1+
---
2+
name: "account_checks"
3+
description: "Check account related fields have gone through system correctly"
4+
dataSources:
5+
json:
6+
- options:
7+
path: "app/src/test/resources/sample/json/txn-gen"
8+
waitCondition:
9+
pauseInSeconds: 1
10+
validations:
11+
- expr: "amount < 100"
12+
- expr: "year == 2021"
13+
errorThreshold: 0.1
14+
- expr: "regexp_like(name, 'Peter .*')"
15+
errorThreshold: 200
16+
- preFilterExpr: "name == 'peter'"
17+
expr: "amount > 50"
18+
- groupByCols: ["account_id"]
19+
aggType: "count"
20+
aggExpr: "count == 1"
21+
- columnNameType: "columnCountEqual"
22+
count: "3"
23+
- columnNameType: "columnCountBetween"
24+
minCount: "1"
25+
maxCount: "2"
26+
- columnNameType: "columnNameMatchOrder"
27+
names: ["account_id", "amount", "name"]
28+
- columnNameType: "columnNameMatchSet"
29+
names: ["account_id", "my_name"]
30+
- upstreamDataSource: "my_first_json"
31+
upstreamReadOptions: {}
32+
joinColumns: ["account_id"]
33+
joinType: "outer"
34+
validation:
35+
expr: "my_first_json_customer_details.name == name"
36+
- upstreamDataSource: "my_first_json"
37+
upstreamReadOptions: {}
38+
joinColumns: ["account_id"]
39+
joinType: "outer"
40+
validation:
41+
expr: "amount != my_first_json_balance"
42+
- upstreamDataSource: "my_first_json"
43+
upstreamReadOptions: {}
44+
joinColumns: ["expr:account_id == my_first_json_account_id"]
45+
joinType: "outer"
46+
validation:
47+
groupByCols: ["account_id", "my_first_json_balance"]
48+
aggExpr: "sum(amount) BETWEEN my_first_json_balance * 0.8 AND my_first_json_balance * 1.2"
49+
- upstreamDataSource: "my_first_json"
50+
upstreamReadOptions: {}
51+
joinColumns: ["account_id"]
52+
joinType: "outer"
53+
validation:
54+
groupByCols: ["account_id", "my_first_json_balance"]
55+
aggExpr: "sum(amount) BETWEEN my_first_json_balance * 0.8 AND my_first_json_balance * 1.2"
56+
- upstreamDataSource: "my_first_json"
57+
upstreamReadOptions: {}
58+
joinColumns: ["account_id"]
59+
joinType: "anti"
60+
validation:
61+
aggType: "count"
62+
aggExpr: "count == 0"
63+
- upstreamDataSource: "my_first_json"
64+
upstreamReadOptions: {}
65+
joinColumns: ["account_id"]
66+
joinType: "outer"
67+
validation:
68+
aggType: "count"
69+
aggExpr: "count == 30"
70+
- upstreamDataSource: "my_first_json"
71+
upstreamReadOptions: {}
72+
joinColumns: ["account_id"]
73+
joinType: "outer"
74+
validation:
75+
upstreamDataSource: "my_third_json"
76+
upstreamReadOptions: {}
77+
joinColumns: ["account_id"]
78+
joinType: "outer"
79+
validation:
80+
aggType: "count"
81+
aggExpr: "count == 30"
Lines changed: 222 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,222 @@
1+
{
2+
"data_asset_type": null,
3+
"expectation_suite_name": "taxi.demo",
4+
"expectations": [
5+
{
6+
"expectation_type": "expect_table_columns_to_match_ordered_list",
7+
"kwargs": {
8+
"column_list": [
9+
"vendor_id",
10+
"pickup_datetime",
11+
"dropoff_datetime",
12+
"passenger_count",
13+
"trip_distance",
14+
"rate_code_id",
15+
"store_and_fwd_flag",
16+
"pickup_location_id",
17+
"dropoff_location_id",
18+
"payment_type",
19+
"fare_amount",
20+
"extra",
21+
"mta_tax",
22+
"tip_amount",
23+
"tolls_amount",
24+
"improvement_surcharge",
25+
"total_amount",
26+
"congestion_surcharge"
27+
]
28+
},
29+
"meta": {}
30+
},
31+
{
32+
"expectation_type": "expect_table_row_count_to_be_between",
33+
"kwargs": {
34+
"max_value": 10000,
35+
"min_value": 10000
36+
},
37+
"meta": {}
38+
},
39+
{
40+
"expectation_type": "expect_column_min_to_be_between",
41+
"kwargs": {
42+
"column": "passenger_count",
43+
"max_value": 1,
44+
"min_value": 1
45+
},
46+
"meta": {}
47+
},
48+
{
49+
"expectation_type": "expect_column_max_to_be_between",
50+
"kwargs": {
51+
"column": "passenger_count",
52+
"max_value": 6,
53+
"min_value": 6
54+
},
55+
"meta": {}
56+
},
57+
{
58+
"expectation_type": "expect_column_mean_to_be_between",
59+
"kwargs": {
60+
"column": "passenger_count",
61+
"max_value": 1.5716,
62+
"min_value": 1.5716
63+
},
64+
"meta": {}
65+
},
66+
{
67+
"expectation_type": "expect_column_median_to_be_between",
68+
"kwargs": {
69+
"column": "passenger_count",
70+
"max_value": 1.0,
71+
"min_value": 1.0
72+
},
73+
"meta": {}
74+
},
75+
{
76+
"expectation_type": "expect_column_quantile_values_to_be_between",
77+
"kwargs": {
78+
"allow_relative_error": "lower",
79+
"column": "passenger_count",
80+
"quantile_ranges": {
81+
"quantiles": [0.05, 0.25, 0.5, 0.75, 0.95],
82+
"value_ranges": [[1, 1], [1, 1], [1, 1], [2, 2], [5, 5]]
83+
}
84+
},
85+
"meta": {}
86+
},
87+
{
88+
"expectation_type": "expect_column_values_to_be_in_set",
89+
"kwargs": {
90+
"column": "passenger_count",
91+
"value_set": [1, 2, 3, 4, 5, 6]
92+
},
93+
"meta": {}
94+
},
95+
{
96+
"expectation_type": "expect_column_values_to_not_be_null",
97+
"kwargs": {
98+
"column": "passenger_count"
99+
},
100+
"meta": {}
101+
},
102+
{
103+
"expectation_type": "expect_column_proportion_of_unique_values_to_be_between",
104+
"kwargs": {
105+
"column": "passenger_count",
106+
"max_value": 0.0006,
107+
"min_value": 0.0006
108+
},
109+
"meta": {}
110+
},
111+
{
112+
"expectation_type": "expect_column_values_to_be_in_type_list",
113+
"kwargs": {
114+
"column": "passenger_count",
115+
"type_list": [
116+
"INTEGER",
117+
"integer",
118+
"int",
119+
"int_",
120+
"int8",
121+
"int16",
122+
"int32",
123+
"int64",
124+
"uint8",
125+
"uint16",
126+
"uint32",
127+
"uint64",
128+
"Int8Dtype",
129+
"Int16Dtype",
130+
"Int32Dtype",
131+
"Int64Dtype",
132+
"UInt8Dtype",
133+
"UInt16Dtype",
134+
"UInt32Dtype",
135+
"UInt64Dtype",
136+
"INT",
137+
"INTEGER",
138+
"INT64",
139+
"TINYINT",
140+
"BYTEINT",
141+
"SMALLINT",
142+
"BIGINT",
143+
"IntegerType",
144+
"LongType"
145+
]
146+
},
147+
"meta": {}
148+
}
149+
],
150+
"ge_cloud_id": null,
151+
"meta": {
152+
"citations": [
153+
{
154+
"batch_request": {
155+
"data_asset_name": "yellow_tripdata_sample_2019-01.csv",
156+
"data_connector_name": "default_inferred_data_connector_name",
157+
"datasource_name": "data__dir",
158+
"limit": 1000
159+
},
160+
"citation_date": "2022-08-23T13:56:02.653975Z",
161+
"comment": "Created suite added via CLI"
162+
}
163+
],
164+
"columns": {
165+
"congestion_surcharge": {
166+
"description": ""
167+
},
168+
"dropoff_datetime": {
169+
"description": ""
170+
},
171+
"dropoff_location_id": {
172+
"description": ""
173+
},
174+
"extra": {
175+
"description": ""
176+
},
177+
"fare_amount": {
178+
"description": ""
179+
},
180+
"improvement_surcharge": {
181+
"description": ""
182+
},
183+
"mta_tax": {
184+
"description": ""
185+
},
186+
"passenger_count": {
187+
"description": ""
188+
},
189+
"payment_type": {
190+
"description": ""
191+
},
192+
"pickup_datetime": {
193+
"description": ""
194+
},
195+
"pickup_location_id": {
196+
"description": ""
197+
},
198+
"rate_code_id": {
199+
"description": ""
200+
},
201+
"store_and_fwd_flag": {
202+
"description": ""
203+
},
204+
"tip_amount": {
205+
"description": ""
206+
},
207+
"tolls_amount": {
208+
"description": ""
209+
},
210+
"total_amount": {
211+
"description": ""
212+
},
213+
"trip_distance": {
214+
"description": ""
215+
},
216+
"vendor_id": {
217+
"description": ""
218+
}
219+
},
220+
"great_expectations_version": "0.15.19"
221+
}
222+
}
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
---
2+
name: "account_checks"
3+
description: "Check account related fields have gone through system correctly"
4+
dataSources:
5+
json:
6+
- options:
7+
path: "/tmp/yaml-validation-json-test"
8+
validations:
9+
- expr: "STARTSWITH(transaction_id, 'txn')"
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
---
2+
name: "account_checks"
3+
description: "Check account related fields have gone through system correctly"
4+
dataSources:
5+
json:
6+
- options:
7+
path: "app/src/test/resources/sample/json/txn-gen"
8+
validations:
9+
- expr: "amount < 100"
10+
- expr: "year == 2021"
11+
errorThreshold: 0.1
12+
- expr: "regexp_like(name, 'Peter .*')"
13+
errorThreshold: 200

0 commit comments

Comments
 (0)