@@ -32,8 +32,11 @@ This project inherits the [Apache License 2.0](LICENSE) from Data Juicer.
3232
3333```
3434docker build -t dataflow . -f Dockerfile
35- ```
3635
36+ docker buildx build --provenance false --platform linux/amd64 -t dataflow . -f Dockerfile
37+
38+ docker buildx build --provenance false --platform linux/arm64 -t dataflow . -f Dockerfile
39+ ```
3740
3841## Prerequisites
3942
@@ -42,7 +45,7 @@ Launch postgres container
4245``` bash
4346docker run -d --name dataflow-pg \
4447 -p 5433:5432 \
45- -v /home /pgdata:/var/lib/postgresql/data \
48+ -v /tmp/data_flow /pgdata:/var/lib/postgresql/data \
4649 -e POSTGRES_DB=data_flow \
4750 -e POSTGRES_USER=postgres \
4851 -e POSTGRES_PASSWORD=postgres \
@@ -54,7 +57,7 @@ Launch mongoDB container
5457``` bash
5558docker run -d --name dataflow-mongo \
5659 -p 27017:27017 \
57- -v /home /mongodata:/data/db \
60+ -v /tmp/data_flow /mongodata:/data/db \
5861 -e MONGO_INITDB_ROOT_USERNAME=root \
5962 -e MONGO_INITDB_ROOT_PASSWORD=example \
6063 opencsg-registry.cn-beijing.cr.aliyuncs.com/opencsghq/mongo:8.0.12
@@ -64,8 +67,8 @@ Launch redis container
6467
6568``` bash
6669docker run -d --name dataflow-redis \
67- -p 6379 :6379 \
68- -v /home /redisdata:/data \
70+ -p 16379 :6379 \
71+ -v /tmp/data_flow /redisdata:/data \
6972 opencsg-registry.cn-beijing.cr.aliyuncs.com/opencsghq/redis:7.2.5
7073```
7174
@@ -74,14 +77,14 @@ docker run -d --name dataflow-redis \
7477``` bash
7578
7679docker run -d --name dataflow-api -p 8000:8000 \
77- -v /home /apidata:/data/dataflow_data \
80+ -v /tmp/data_flow /apidata:/data/dataflow_data \
7881 -c " uvicorn data_server.main:app --host 0.0.0.0 --port 8000" \
7982 -e DATA_DIR=/data/dataflow_data \
8083 -e CSGHUB_ENDPOINT=https://hub.opencsg.com \
8184 -e MAX_WORKERS=99 \
8285 -e RAY_ADDRESS=auto \
8386 -e RAY_ENABLE=False \
84- -e RAY_LOG_DIR=/home/output \
87+ -e RAY_LOG_DIR=/data/ray_output \
8588 -e API_SERVER=0.0.0.0 \
8689 -e API_PORT=8000 \
8790 -e ENABLE_OPENTELEMETRY=False \
@@ -91,7 +94,7 @@ docker run -d --name dataflow-api -p 8000:8000 \
9194 -e DATABASE_HOSTNAME=127.0.0.1 \
9295 -e DATABASE_PORT=5433 \
9396 -e STUDIO_JUMP_URL=https://data-label.opencsg.com \
94- -e REDIS_HOST_URL=redis://127.0.0.1:6379 \
97+ -e REDIS_HOST_URL=redis://127.0.0.1:16379 \
9598 -e MONG_HOST_URL=mongodb://root:example@127.0.0.1:27017 \
9699 dataflow
97100
@@ -102,14 +105,14 @@ docker run -d --name dataflow-api -p 8000:8000 \
102105``` bash
103106
104107docker run -d --name celery-work -p 8001:8001 \
105- -v /home /celery-data:/data/dataflow_celery \
108+ -v /tmp/data_flow /celery-data:/data/dataflow_celery \
106109 -c " celery -A data_celery.main:celery_app worker --loglevel=info --pool=gevent" \
107110 -e DATA_DIR=/data/dataflow_celery \
108111 -e CSGHUB_ENDPOINT=https://hub.opencsg.com \
109112 -e MAX_WORKERS=99 \
110113 -e RAY_ADDRESS=auto \
111114 -e RAY_ENABLE=False \
112- -e RAY_LOG_DIR=/home/output \
115+ -e RAY_LOG_DIR=/data/ray_output \
113116 -e API_SERVER=0.0.0.0 \
114117 -e API_PORT=8001 \
115118 -e ENABLE_OPENTELEMETRY=False \
@@ -118,23 +121,35 @@ docker run -d --name celery-work -p 8001:8001 \
118121 -e DATABASE_PASSWORD=postgres \
119122 -e DATABASE_HOSTNAME=127.0.0.1 \
120123 -e DATABASE_PORT=5433 \
121- -e REDIS_HOST_URL=redis://127.0.0.1:6379 \
124+ -e REDIS_HOST_URL=redis://127.0.0.1:16379 \
122125 -e MONG_HOST_URL=mongodb://root:example@127.0.0.1:27017 \
123126 dataflow-celery
124127
125128```
126129
127130## Run data-flow server in development mode locally
128131
132+ ### Create a Virtual Environment
133+
129134``` bash
130- # Create virtual python 3.10 environment
135+ uv venv --python 3.10
136+
137+ source .venv/bin/activate
138+
139+ # or
140+
131141conda create -n dataflow python=3.10
142+ ```
143+
144+ ``` bash
132145
133146# Install dependencies
134- pip install ' .[dist]' -i https://pypi.tuna.tsinghua.edu.cn/simple/
135- pip install ' .[tools]' -i https://pypi.tuna.tsinghua.edu.cn/simple/
136- pip install ' .[sci]' -i https://pypi.tuna.tsinghua.edu.cn/simple/
137- pip install -r docker/requirements.txt
147+ # pip install '.[dist]' -i https://pypi.tuna.tsinghua.edu.cn/simple/
148+ # pip install '.[tools]' -i https://pypi.tuna.tsinghua.edu.cn/simple/
149+ # pip install '.[sci]' -i https://pypi.tuna.tsinghua.edu.cn/simple/
150+ # pip install -r docker/requirements.txt
151+
152+ uv pip install -r docker/dataflow_requirements.txt -i https://mirrors.aliyun.com/pypi/simple/
138153
139154# Run the server locally
140155uvicorn data_server.main:app --reload
0 commit comments