diff --git a/docker-compose.deploy.yml b/docker-compose.deploy.yml deleted file mode 100644 index 057f2fa..0000000 --- a/docker-compose.deploy.yml +++ /dev/null @@ -1,148 +0,0 @@ -services: - storage-service: - image: ghcr.io/weoses/storage-service:${RELEASE_VERSION:-latest} - environment: - METADATA_DB_ELASTIC_CLOUDID: ${ELASTIC_CLOUDID} - METADATA_DB_ELASTIC_APIKEY: ${ELASTIC_APIKEY} - METADATA_DB_INDEX: ${METADATA_DB_INDEX} - - TAG_DB_ELASTIC_CLOUDID: ${ELASTIC_CLOUDID} - TAG_DB_ELASTIC_APIKEY: ${ELASTIC_APIKEY} - TAG_DB_INDEX: ${TAG_DB_INDEX} - - MEDIA_STORAGE_ENDPOINT: ${S3_ENDPOINT} - MEDIA_STORAGE_ACCESSKEY: ${S3_ACCESSKEY} - MEDIA_STORAGE_SECRETKEY: ${S3_SECRETKEY} - MEDIA_STORAGE_SECURE: ${S3_SECURE} - MEDIA_STORAGE_BUCKET: ${S3_MEDIA_BUCKET} - - TEMP_STORAGE_ENDPOINT: ${S3_ENDPOINT} - TEMP_STORAGE_ACCESSKEY: ${S3_ACCESSKEY} - TEMP_STORAGE_SECRETKEY: ${S3_SECRETKEY} - TEMP_STORAGE_SECURE: ${S3_SECURE} - TEMP_STORAGE_BUCKET: ${S3_TEMP_BUCKET} - - GEMINI_EMBEDDING_APIKEY: ${GEMINI_EMBEDDING_APIKEY} - GEMINI_EMBEDDING_APIENDPOINT: ${GEMINI_EMBEDDING_APIENDPOINT} - GEMINI_EMBEDDING_MODEL: ${GEMINI_EMBEDDING_MODEL} - - GEMINI_EXTRACTOR_APIKEY: ${GEMINI_EXTRACTOR_APIKEY} - GEMINI_EXTRACTOR_APIENDPOINT: ${GEMINI_EXTRACTOR_APIENDPOINT} - GEMINI_EXTRACTOR_MODEL_IMAGE: ${GEMINI_EXTRACTOR_MODEL_IMAGE} - GEMINI_EXTRACTOR_MODEL_VIDEO: ${GEMINI_EXTRACTOR_MODEL_VIDEO} - GEMINI_EXTRACTOR_MODEL_AUDIO: ${GEMINI_EXTRACTOR_MODEL_AUDIO} - - EXTRACTOR_PROVIDER: ${EXTRACTOR_PROVIDER} - EMBEDDER_PROVIDER: ${EMBEDDER_PROVIDER} - - OPENROUTER_EMBEDDING_APIKEY: ${OPENROUTER_EMBEDDING_APIKEY} - OPENROUTER_EMBEDDING_MODEL: ${OPENROUTER_EMBEDDING_MODEL} - - OPENROUTER_EXTRACTOR_APIKEY: ${OPENROUTER_EXTRACTOR_APIKEY} - OPENROUTER_EXTRACTOR_MODEL_IMAGE: ${OPENROUTER_EXTRACTOR_MODEL_IMAGE} - OPENROUTER_EXTRACTOR_MODEL_VIDEO: ${OPENROUTER_EXTRACTOR_MODEL_VIDEO} - OPENROUTER_EXTRACTOR_MODEL_AUDIO: ${OPENROUTER_EXTRACTOR_MODEL_AUDIO} - - EXTRACTING_SEPARATE_AUDIO: ${EXTRACTING_SEPARATE_AUDIO} - - FFMPEG_BINARY: ${FFMPEG_BINARY} - FFMPEG_CPULIMIT: ${FFMPEG_CPULIMIT} - FFMPEG_THREADSLIMIT: ${FFMPEG_THREADSLIMIT} - - SERVER_LISTENADDRESS: :${STORAGE_SERVICE_PORT} - - restart: unless-stopped - ports: - - "127.0.0.1:${STORAGE_SERVICE_PORT}:${STORAGE_SERVICE_PORT}" - volumes: - - "./storage-service/config.yaml:/app/config/config.yaml" - networks: - - env - healthcheck: - test: ["CMD", "curl", "-f", "http://localhost:${STORAGE_SERVICE_PORT}/health"] - interval: 30s - timeout: 10s - retries: 6 - start_period: 20s - - postgres: - image: postgres:16-alpine - environment: - POSTGRES_USER: ${POSTGRES_USER} - POSTGRES_PASSWORD: ${POSTGRES_PASSWORD} - POSTGRES_DB: ${POSTGRES_DB} - ports: - - "127.0.0.1:5432:5432" - networks: - - env - volumes: - - "postgres_data:/var/lib/postgresql/data" - healthcheck: - test: ["CMD-SHELL", "pg_isready -U ${POSTGRES_USER} -d ${POSTGRES_DB}"] - interval: 10s - timeout: 5s - retries: 5 - - telegram-service: - image: ghcr.io/weoses/telegram-service:${RELEASE_VERSION:-latest} - environment: - TELEGRAM_TOKEN: ${TELEGRAM_TOKEN} - WEBHOOK_EXTERNALURL: ${WEBHOOK_EXTERNALURL} - STORAGE_SERVICE_URI: http://storage-service:${STORAGE_SERVICE_PORT} - POSTGRES_DSN: postgres://${POSTGRES_USER}:${POSTGRES_PASSWORD}@postgres:5432/${POSTGRES_DB}?sslmode=disable - TEMP_STORAGE_ENDPOINT: ${S3_ENDPOINT} - TEMP_STORAGE_ACCESSKEY: ${S3_ACCESSKEY} - TEMP_STORAGE_SECRETKEY: ${S3_SECRETKEY} - TEMP_STORAGE_SECURE: ${S3_SECURE} - TEMP_STORAGE_BUCKET: ${S3_TEMP_BUCKET} - - SERVER_LISTENADDRESS: :${TELEGRAM_SERVICE_PORT} - - restart: unless-stopped - ports: - - "127.0.0.1:${TELEGRAM_SERVICE_PORT}:${TELEGRAM_SERVICE_PORT}" - volumes: - - "./telegram-service/config.yaml:/app/config/config.yaml" - networks: - - env - depends_on: - postgres: - condition: service_healthy - storage-service: - condition: service_healthy - - webapp: - image: ghcr.io/weoses/webapp-service:${RELEASE_VERSION:-latest} - environment: - SERVER_LISTENADDRESS: :${WEBAPP_SERVICE_PORT} - STORAGE_SERVICE_URI: http://storage-service:${STORAGE_SERVICE_PORT} - ACCOUNT_ID: ${WEBAPP_ACCOUNT_ID} - JWT_SECRET: ${WEBAPP_JWT_SECRET} - FRONTEND_BASEURL: ${WEBAPP_BASE_URL} - TEMP_STORAGE_ENDPOINT: ${S3_ENDPOINT} - TEMP_STORAGE_ACCESSKEY: ${S3_ACCESSKEY} - TEMP_STORAGE_SECRETKEY: ${S3_SECRETKEY} - TEMP_STORAGE_SECURE: ${S3_SECURE} - TEMP_STORAGE_BUCKET: ${S3_TEMP_BUCKET} - restart: unless-stopped - ports: - - "127.0.0.1:${WEBAPP_SERVICE_PORT}:${WEBAPP_SERVICE_PORT}" - volumes: - - "./webapp-service/config.yaml:/app/config/config.yaml" - networks: - - env - depends_on: - storage-service: - condition: service_healthy - healthcheck: - test: ["CMD", "curl", "-f", "http://localhost:${WEBAPP_SERVICE_PORT}/api/health"] - interval: 30s - timeout: 10s - retries: 6 - start_period: 20s - -networks: - env: - -volumes: - postgres_data: diff --git a/group_vars/all.yml b/group_vars/all.yml index b2556e2..7b34383 100644 --- a/group_vars/all.yml +++ b/group_vars/all.yml @@ -10,14 +10,17 @@ s3_secure: "true" postgres_db: telegram # FFmpeg -ffmpeg_binary: ffmpeg ffmpeg_cpulimit: 80 ffmpeg_threadslimit: # LLM provider selection (gemini | openrouter) extractor_provider: "openrouter" embedder_provider: "gemini" + +# Extracting configuration extracting_separate_audio: false +extracting_video_slice_interval: 15 + # Gemini embedding model gemini_embedding_model: "gemini-embedding-2-preview" @@ -56,3 +59,12 @@ telegram_service_port: 7002 webapp_service_port: 7003 webapp_base_url: "/.proxy/webapp-service" + + +# Secrets +s3_access_key: +s3_secret_key: +elastic_cloudid: +elastic_apikey: +gemini_api_key: +openrouter_api_key: \ No newline at end of file diff --git a/roles/deploy/tasks/main.yml b/roles/deploy/tasks/main.yml index 36db0bf..ef2b00e 100644 --- a/roles/deploy/tasks/main.yml +++ b/roles/deploy/tasks/main.yml @@ -13,15 +13,27 @@ force: true register: git_result -- name: Template .env file +- name: Template storage-service config file template: - src: .env.j2 - dest: "{{ deploy_path }}/.env" + src: storage-service-config.yaml.j2 + dest: "{{ deploy_path }}/storage-service-config.yaml" mode: "0600" -- name: Copy ansible docker-compose file - copy: - src: "{{ playbook_dir }}/docker-compose.deploy.yml" +- name: Template telegram-service config file + template: + src: telegram-service-config.yaml.j2 + dest: "{{ deploy_path }}/telegram-service-config.yaml" + mode: "0600" + +- name: Template webapp-service config file + template: + src: webapp-service-config.yaml.j2 + dest: "{{ deploy_path }}/webapp-service-config.yaml" + mode: "0600" + +- name: Template docker-compose file + template: + src: docker-compose.deploy.yml.j2 dest: "{{ deploy_path }}/docker-compose.deploy.yml" mode: "0644" diff --git a/roles/deploy/templates/.env.j2 b/roles/deploy/templates/.env.j2 deleted file mode 100644 index 08a44e8..0000000 --- a/roles/deploy/templates/.env.j2 +++ /dev/null @@ -1,67 +0,0 @@ -# Generated by Ansible — do not edit manually -RELEASE_VERSION={{ release_version }} - -# Elastic -ELASTIC_CLOUDID={{ elastic_cloudid }} -ELASTIC_APIKEY={{ elastic_apikey }} -METADATA_DB_INDEX={{ metadata_elastic_index }} -TAG_DB_INDEX={{ tag_elastic_index }} - -# S3 storage -S3_ENDPOINT={{ s3_endpoint }} -S3_ACCESSKEY={{ s3_access_key }} -S3_SECRETKEY={{ s3_secret_key }} -S3_SECURE={{ s3_secure }} -S3_MEDIA_BUCKET={{ s3_media_bucket }} -S3_TEMP_BUCKET={{ s3_temp_bucket }} - -EXTRACTING_SEPARATE_AUDIO: {{ extracting_separate_audio }} - -# Gemini embedding -GEMINI_EMBEDDING_APIKEY={{ gemini_api_key }} -GEMINI_EMBEDDING_APIENDPOINT={{ gemini_embedding_api_endpoint }} -GEMINI_EMBEDDING_MODEL={{ gemini_embedding_model }} - -# Gemini extractor -GEMINI_EXTRACTOR_APIKEY={{ gemini_api_key }} -GEMINI_EXTRACTOR_APIENDPOINT={{ gemini_extractor_api_endpoint }} -GEMINI_EXTRACTOR_MODEL_IMAGE={{ gemini_extractor_model_image }} -GEMINI_EXTRACTOR_MODEL_VIDEO={{ gemini_extractor_model_video }} -GEMINI_EXTRACTOR_MODEL_AUDIO={{ gemini_extractor_model_audio }} - -# LLM provider selection -EXTRACTOR_PROVIDER={{ extractor_provider }} -EMBEDDER_PROVIDER={{ embedder_provider }} - -# OpenRouter embedding -OPENROUTER_EMBEDDING_APIKEY={{ openrouter_api_key | default('') }} -OPENROUTER_EMBEDDING_MODEL={{ openrouter_embedding_model }} - -# OpenRouter extractor -OPENROUTER_EXTRACTOR_APIKEY={{ openrouter_api_key | default('') }} -OPENROUTER_EXTRACTOR_MODEL_IMAGE={{ openrouter_extractor_model_image }} -OPENROUTER_EXTRACTOR_MODEL_VIDEO={{ openrouter_extractor_model_video }} -OPENROUTER_EXTRACTOR_MODEL_AUDIO={{ openrouter_extractor_model_audio }} - -# PostgreSQL -POSTGRES_DB={{ postgres_db }} -POSTGRES_USER={{ postgres_user }} -POSTGRES_PASSWORD={{ postgres_password }} - -# Telegram -TELEGRAM_TOKEN={{ telegram_token }} -WEBHOOK_EXTERNALURL={{ telegram_webhook_externalurl }} - -# FFmpeg -FFMPEG_BINARY={{ ffmpeg_binary }} -FFMPEG_CPULIMIT={{ ffmpeg_cpulimit }} -FFMPEG_THREADSLIMIT={{ ffmpeg_threadslimit }} - -# Ports -STORAGE_SERVICE_PORT={{ storage_service_port }} -TELEGRAM_SERVICE_PORT={{ telegram_service_port }} -WEBAPP_SERVICE_PORT={{ webapp_service_port }} - -WEBAPP_ACCOUNT_ID={{ webapp_account_id }} -WEBAPP_JWT_SECRET={{ webapp_jwt_secret }} -WEBAPP_BASE_URL={{ webapp_base_url }} diff --git a/roles/deploy/templates/docker-compose.deploy.yml.j2 b/roles/deploy/templates/docker-compose.deploy.yml.j2 new file mode 100644 index 0000000..4edcdcc --- /dev/null +++ b/roles/deploy/templates/docker-compose.deploy.yml.j2 @@ -0,0 +1,74 @@ +services: + storage-service: + image: "ghcr.io/weoses/storage-service:{{ release_version | default('latest') }}" + restart: unless-stopped + ports: + - "127.0.0.1:{{ storage_service_port }}:{{ storage_service_port }}" + volumes: + - "./storage-service-config.yaml:/app/config.yaml" + networks: + - env + healthcheck: + test: ["CMD", "curl", "-f", "http://localhost:{{ storage_service_port }}/health"] + interval: 30s + timeout: 10s + retries: 6 + start_period: 20s + + postgres: + image: postgres:16-alpine + environment: + POSTGRES_USER: "{{ postgres_user }}" + POSTGRES_PASSWORD: "{{ postgres_password }}" + POSTGRES_DB: "{{ postgres_db }}" + ports: + - "127.0.0.1:5432:5432" + networks: + - env + volumes: + - "postgres_data:/var/lib/postgresql/data" + healthcheck: + test: ["CMD-SHELL", "pg_isready -U '{{ postgres_user }}' -d '{{ postgres_db }}'"] + interval: 10s + timeout: 5s + retries: 5 + + telegram-service: + image: "ghcr.io/weoses/telegram-service:{{ release_version | default('latest') }}" + restart: unless-stopped + ports: + - "127.0.0.1:{{ telegram_service_port }}:{{ telegram_service_port }}" + volumes: + - "./telegram-service-config.yaml:/app/config.yaml" + networks: + - env + depends_on: + postgres: + condition: service_healthy + storage-service: + condition: service_healthy + + webapp: + image: "ghcr.io/weoses/webapp-service:{{ release_version | default('latest') }}" + restart: unless-stopped + ports: + - "127.0.0.1:{{ webapp_service_port }}:{{ webapp_service_port }}" + volumes: + - "./webapp-service-config.yaml:/app/config.yaml" + networks: + - env + depends_on: + storage-service: + condition: service_healthy + healthcheck: + test: ["CMD", "curl", "-f", "http://localhost:{{ webapp_service_port }}/api/health"] + interval: 30s + timeout: 10s + retries: 6 + start_period: 20s + +networks: + env: + +volumes: + postgres_data: \ No newline at end of file diff --git a/roles/deploy/templates/storage-service-config.yaml.j2 b/roles/deploy/templates/storage-service-config.yaml.j2 new file mode 100644 index 0000000..991cb03 --- /dev/null +++ b/roles/deploy/templates/storage-service-config.yaml.j2 @@ -0,0 +1,104 @@ +log: + Level: info + +server: + ListenAddress: :{{ storage_service_port }} + +extracting: + embedding-dimensions: 1408 + video-slice-interval-sec: {{ extracting_video_slice_interval }} + separate-audio: {{ extracting_separate_audio }} + +search: + SemanticDuplicateThreshold: 0.955 + PercentageDuplicatePartsThreshold: 0.7 + SemanticTextSearchThreshold: 0.5 + Fuzziness: "AUTO:4,8" + + +media-storage: + Endpoint: {{ s3_endpoint }} + AccessKey: {{ s3_access_key }} + SecretKey: {{ s3_secret_key }} + Bucket: {{ s3_media_bucket }} + Secure: {{ s3_secure }} + +temp-storage: + Endpoint: {{ s3_endpoint }} + AccessKey: {{ s3_access_key }} + SecretKey: {{ s3_secret_key }} + Bucket: {{ s3_temp_bucket }} + Secure: {{ s3_secure }} + + +metadata-db: + Elastic: + Addresses: + Username: + Password: + CloudID: {{ elastic_cloudid }} + ApiKey: {{ elastic_apikey }} + Index: {{ metadata_elastic_index }} + +tag-db: + Elastic: + Addresses: + Username: + Password: + CloudID: {{ elastic_cloudid }} + ApiKey: {{ elastic_apikey }} + Index: {{ tag_elastic_index }} + +image-converter: + ThumbSize: 360 + +ffmpeg: + FfmpegBinary: ffmpeg + FfprobeBinary: ffprobe + CpuLimit: {{ ffmpeg_cpulimit }} + ThreadsLimit: {{ ffmpeg_threadslimit }} + +extractor-provider: {{ extractor_provider }} +embedder-provider: {{ embedder_provider }} + +gemini-extractor: + apikey: {{ gemini_api_key }} + apiendpoint: {{ gemini_extractor_api_endpoint }} + model-image: {{ gemini_extractor_model_image }} + model-video: {{ gemini_extractor_model_video }} + model-audio: {{ gemini_extractor_model_audio }} + image-extract-prompt: "Analyze this media and call the extract_metadata function with your findings. If you found nothing for parameter, leave parameter empty (except caption). Always preserve original language, and use correct alphabet (cyrillic, latin)" + video-extract-prompt: "Analyze this media and call the extract_metadata function with your findings. If you found nothing for parameter, leave parameter empty (except caption). Always preserve original language, and use correct alphabet (cyrillic, latin)" + audio-extract-prompt: "Analyze this media and call the extract_metadata function with your findings. If you found nothing for parameter, leave parameter empty (except caption). Always preserve original language, and use correct alphabet (cyrillic, latin)" + output-tool-description: "Extract structured metadata from the media" + output-tool-transcription-desc: "Audio transcription or speech-to-text content from the media. Preserve original language. Capture speech and music lyrics." + output-tool-on-screen-text-desc: "Any text visible on screen (OCR). Preserve original language" + output-tool-caption-desc: "A small caption summarizing the media content. Must not be more than 2-3 words" + output-tool-audio-track-desc: "An audio track, song, that is at sound background. If it is not famous, or it to quiet, or you not sure, ignore it" + combine-prompt: "Multiple extractions of video segments are provided below. Segments may overlap one another. Merge on-screen text and transcripts. Pick the most fitting caption. Synthesize them into a single portion of data and call extract_metadata." + duplicate-prompt: "Are these two images is same, has same idea, same meaning, same characters, etc? Pass true to tool check_duplicate if images is same, or false otherwise." + +gemini-embedding: + apikey: {{ gemini_api_key }} + apiendpoint: {{ gemini_embedding_api_endpoint }} + model: {{ gemini_embedding_model }} + +openrouter-extractor: + apikey: {{ openrouter_api_key }} + model-image: {{ openrouter_extractor_model_image }} + model-video: {{ openrouter_extractor_model_video }} + model-audio: {{ openrouter_extractor_model_audio }} + image-extract-prompt: "Analyze this media and call the extract_metadata function with your findings. If you found nothing for parameter, leave parameter empty (except caption). Always preserve original language, and use correct alphabet (cyrillic, latin)" + video-extract-prompt: "Analyze this media and call the extract_metadata function with your findings. If you found nothing for parameter, leave parameter empty (except caption). Always preserve original language, and use correct alphabet (cyrillic, latin)" + audio-extract-prompt: "Analyze this media and call the extract_metadata function with your findings. If you found nothing for parameter, leave parameter empty (except caption). Always preserve original language, and use correct alphabet (cyrillic, latin)" + output-tool-description: "Extract structured metadata from the media" + output-tool-transcription-desc: "Audio transcription or speech-to-text content from the media. Preserve original language. Capture speech and music lyrics." + output-tool-on-screen-text-desc: "Any text visible on screen (OCR). Preserve original language" + output-tool-caption-desc: "A small caption summarizing the media content. Must not be more than 2-3 words" + output-tool-audio-track-desc: "An audio track, song, that is at sound background. If it is not famous, or it to quiet, or you not sure, ignore it" + combine-prompt: "Multiple extractions of video segments are provided below. Segments may overlap one another. Merge on-screen text and transcripts. Pick the most fitting caption. Synthesize them into a single portion of data and call extract_metadata." + duplicate-prompt: "Are these two images is same, has same idea, same meaning, same characters, etc? Pass true to tool check_duplicate if images is same, or false otherwise." + +openrouter-embedding: + apikey: {{ openrouter_api_key | default('') }} + model: {{ openrouter_embedding_model }} \ No newline at end of file diff --git a/roles/deploy/templates/telegram-service-config.yaml.j2 b/roles/deploy/templates/telegram-service-config.yaml.j2 new file mode 100644 index 0000000..2dd77a6 --- /dev/null +++ b/roles/deploy/templates/telegram-service-config.yaml.j2 @@ -0,0 +1,32 @@ +log: + Level: info + +server: + ListenAddress: :{{ telegram_service_port }} + +telegram: + Token: {{ telegram_token }} + Debug: false + +postgres: + DSN: postgres://{{ postgres_user }}:{{ postgres_password }}@postgres:5432/{{ postgres_db }}?sslmode=disable + +inline: + PageSize: 20 + + +storage-service: + Uri: "http://storage-service:{{ storage_service_port }}" + +user-account: + StaticUuid: "00000000-0000-0000-0000-000000000000" + +webhook: + ExternalUrl: "{{ telegram_webhook_externalurl }}" + +temp-storage: + Endpoint: {{ s3_endpoint }} + AccessKey: {{ s3_access_key }} + SecretKey: {{ s3_secret_key }} + Bucket: {{ s3_temp_bucket }} + Secure: {{ s3_secure }} \ No newline at end of file diff --git a/roles/deploy/templates/webapp-service-config.yaml.j2 b/roles/deploy/templates/webapp-service-config.yaml.j2 new file mode 100644 index 0000000..7765275 --- /dev/null +++ b/roles/deploy/templates/webapp-service-config.yaml.j2 @@ -0,0 +1,24 @@ +log: + Level: info + +server: + ListenAddress: :{{ webapp_service_port }} + +storage-service: + Uri: "http://storage-service:{{ storage_service_port }}" + +account: + Id: "{{ webapp_account_id }}" + +temp-storage: + Endpoint: {{ s3_endpoint }} + AccessKey: {{ s3_access_key }} + SecretKey: {{ s3_secret_key }} + Bucket: {{ s3_temp_bucket }} + Secure: {{ s3_secure }} + +jwt: + Secret: "{{ webapp_jwt_secret }}" + +frontend: + BaseUrl: {{ webapp_base_url }}