Skip to content
This repository was archived by the owner on Aug 8, 2025. It is now read-only.

Commit c63f974

Browse files
authored
Merge pull request #34 from data-catering/upgrade-0.14
Upgrade 0.14
2 parents 521221e + 67d02f0 commit c63f974

41 files changed

Lines changed: 542 additions & 360 deletions

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ docker/data/custom/generated
3131
docker/data/custom/recordTracking
3232
docker/data/custom/report
3333
docker/sample
34+
docker/tmp
3435

3536
benchmark/jars
3637

Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
1-
FROM datacatering/data-caterer:0.13.1
1+
FROM datacatering/data-caterer:0.14.2
22

33
COPY --chown=app:app build/libs/data-caterer-example-0.1.0.jar /opt/app/job.jar

benchmark/run_benchmark.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ case "${uname_out}" in
1818
*) sed_option="-r";;
1919
esac
2020
data_sizes=(10000 100000 1000000)
21-
job_names=("BenchmarkForeignKeyPlanRun" "BenchmarkJsonPlanRun" "BenchmarkParquetPlanRun") #"BenchmarkAdvancedKafkaPlanRun"
21+
job_names=("BenchmarkForeignKeyPlanRun" "BenchmarkJsonPlanRun" "BenchmarkParquetPlanRun")
2222

2323
spark_query_execution_engines=("default" "blaze" "comet" "gluten")
2424
gluten_spark_conf="--conf \"spark.plugins=io.glutenproject.GlutenPlugin\" --conf \"spark.memory.offHeap.enabled=true\" --conf \"spark.memory.offHeap.size=1024mb\" --conf \"spark.shuffle.manager=org.apache.spark.shuffle.sort.ColumnarShuffleManager\""
Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
name: "simple_http"
2+
steps:
3+
- name: "account"
4+
count:
5+
records: 50
6+
fields:
7+
- name: "httpUrl"
8+
fields:
9+
- name: "url"
10+
static: "http://localhost:80/anything/{id}"
11+
- name: "method"
12+
static: "PUT"
13+
- name: "pathParam"
14+
fields:
15+
- name: "id"
16+
options:
17+
sql: "body.account_id"
18+
- name: "queryParam"
19+
fields:
20+
- name: "limit"
21+
type: "integer"
22+
options:
23+
min: 1
24+
max: 10
25+
- name: "httpHeaders"
26+
fields:
27+
- name: "Content-Type"
28+
static: "application/json"
29+
- name: "Content-Length"
30+
- name: "X-Account-Id"
31+
options:
32+
sql: "body.account_id"
33+
- name: "X-Updated"
34+
type: "timestamp"
35+
options:
36+
sql: "body.details.updated_by.time"
37+
- name: "httpBody"
38+
fields:
39+
- name: "account_id"
40+
options:
41+
regex: "ACC[0-9]{8}"
42+
- name: "year"
43+
type: "int"
44+
options:
45+
min: 2021
46+
max: 2022
47+
- name: "amount"
48+
type: "double"
49+
options:
50+
min: 10.0
51+
max: 100.0
52+
- name: "details"
53+
fields:
54+
- name: "name"
55+
- name: "txn_date"
56+
type: "date"
57+
options:
58+
min: "2021-01-01"
59+
max: "2021-12-31"
60+
- name: "updated_by"
61+
fields:
62+
- name: "user"
63+
- name: "time"
64+
type: "timestamp"
65+
- name: "transactions"
66+
type: "array"
67+
fields:
68+
- name: "txn_date"
69+
type: "date"
70+
- name: "amount"
71+
type: "double"
Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
name: "simple_kafka"
2+
steps:
3+
- name: "kafka_account"
4+
type: "json"
5+
count:
6+
records: "10"
7+
options:
8+
topic: "account-topic"
9+
fields:
10+
- name: "key"
11+
type: "string"
12+
options:
13+
sql: "body.account_id"
14+
- name: "messageBody"
15+
fields:
16+
- name: "account_id"
17+
- name: "year"
18+
type: "int"
19+
options:
20+
min: "2021"
21+
max: "2022"
22+
- name: "amount"
23+
type: "double"
24+
options:
25+
min: "10.0"
26+
max: "100.0"
27+
- name: "details"
28+
fields:
29+
- name: "name"
30+
- name: "first_txn_date"
31+
type: "date"
32+
options:
33+
sql: "ELEMENT_AT(SORT_ARRAY(body.transactions.txn_date), 1)"
34+
- name: "updated_by"
35+
fields:
36+
- name: "user"
37+
- name: "time"
38+
type: "timestamp"
39+
- name: "transactions"
40+
type: "array"
41+
fields:
42+
- name: "txn_date"
43+
type: "date"
44+
- name: "amount"
45+
type: "double"
46+
- name: "messageHeaders"
47+
fields:
48+
- name: "account-id"
49+
options:
50+
sql: "body.account_id"
51+
- name: "updated"
52+
options:
53+
sql: "body.details.update_by.time"

docker/data/custom/validation/simple-validation.yaml

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,20 @@ dataSources:
77
path: "app/src/test/resources/sample/json/txn-gen"
88
validations:
99
- expr: "amount < 100"
10+
- field: amount
11+
validation:
12+
- type: "null"
13+
negate: true
14+
- type: "between"
15+
min: 1
16+
max: 10
17+
description: "hello"
18+
errorThreshold: 2
19+
- type: "quantileValuesBetween"
20+
quantileRanges:
21+
0.1:
22+
- - 1.0
23+
- 10.0
1024
- expr: "year == 2021"
1125
errorThreshold: 0.1
1226
- expr: "regexp_like(name, 'Peter .*')"

gradle.properties

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,5 +8,5 @@ version=0.1.0
88

99
scalaVersion=2.12
1010
scalaSpecificVersion=2.12.19
11-
dataCatererVersion=0.13.1
11+
dataCatererVersion=0.14.2
1212
sparkMajorVersion=3.5

helm/data-caterer/values.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ image:
88
repository: "datacatering/data-caterer"
99
pullPolicy: "IfNotPresent"
1010
# Overrides the image tag whose default is the chart appVersion.
11-
tag: "0.13.1"
11+
tag: "0.14.2"
1212

1313
imagePullSecrets: []
1414
nameOverride: ""

run.sh

Lines changed: 25 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,27 @@
11
#!/usr/bin/env bash
22

3+
DATA_CATERER_ENV_FILE="$HOME/.data-caterer-env"
4+
35
data_caterer_version=$(grep dataCatererVersion gradle.properties | cut -d= -f2)
6+
data_caterer_user=${DATA_CATERER_API_USER:-}
7+
data_caterer_token=${DATA_CATERER_API_TOKEN:-}
8+
9+
echo "Checking for Data Caterer user and token..."
10+
if [[ -f "$DATA_CATERER_ENV_FILE" ]]; then
11+
source "$DATA_CATERER_ENV_FILE"
12+
else
13+
if [[ -z ${DATA_CATERER_API_USER} ]]; then
14+
read -p "Data Caterer user: " data_caterer_user
15+
echo "export DATA_CATERER_API_USER=$data_caterer_user" > "$DATA_CATERER_ENV_FILE"
16+
fi
17+
if [[ -z ${DATA_CATERER_API_TOKEN} ]]; then
18+
read -p "Data Caterer token: " -s data_caterer_token
19+
echo "export DATA_CATERER_API_TOKEN=$data_caterer_token" >> "$DATA_CATERER_ENV_FILE"
20+
echo
21+
fi
22+
fi
23+
source "$DATA_CATERER_ENV_FILE"
24+
425
if [[ -s ".tmp_prev_class_name" ]]; then
526
prev_class_name=$(cat .tmp_prev_class_name)
627
else
@@ -38,12 +59,15 @@ DOCKER_CMD=(
3859
-v "$(pwd)/docker/sample/tracking:/opt/app/record-tracking"
3960
-v "$(pwd)/docker/mount:/opt/app/mount"
4061
-v "$(pwd)/docker/data/custom:/opt/app/custom"
62+
-v "$(pwd)/docker/tmp:/tmp"
4163
-e "APPLICATION_CONFIG_PATH=/opt/app/custom/application.conf"
4264
-e "$full_class_name"
4365
-e "DEPLOY_MODE=client"
4466
-e "DRIVER_MEMORY=2g"
4567
-e "EXECUTOR_MEMORY=2g"
46-
--network "docker_default"
68+
-e "DATA_CATERER_API_USER=$DATA_CATERER_API_USER"
69+
-e "DATA_CATERER_API_TOKEN=$DATA_CATERER_API_TOKEN"
70+
--network "insta-infra_default"
4771
datacatering/data-caterer:"$data_caterer_version"
4872
)
4973

src/main/java/io/github/datacatering/plan/AdvancedBatchEventJavaPlanRun.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77

88
public class AdvancedBatchEventJavaPlanRun extends PlanRun {
99
{
10-
var kafkaTask = new AdvancedKafkaJavaPlanRun().getKafkaTask();
10+
var kafkaTask = new KafkaJavaPlanRun().getKafkaTask();
1111

1212
var csvTask = csv("my_csv", "/opt/app/data/csv/account")
1313
.fields(

0 commit comments

Comments
 (0)