gruporaia · Petroncini · May 26, 2026 · May 20, 2026 · May 22, 2026 · May 22, 2026
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -3,123 +3,163 @@ name: CI
 on:
   pull_request:
   push:
-    branches:
-      - main
-      - dev
+    branches: [main, dev]
   workflow_dispatch:
   schedule:
     - cron: "0 3 * * *"
 
 jobs:
   tests-vcr:
-    name: Tests (VCR deterministic)
+    name: Tests (VCR deterministic) - ${{ matrix.provider }} / ${{ matrix.model }}
     runs-on: ubuntu-latest
+    strategy:
+      fail-fast: false
+      matrix:
+        include:
+          - provider: openai
+            model: gpt-4o-mini
+          - provider: google
+            model: gemini-2.5-flash
     env:
-      GOOGLE_API_KEY: dummy-key
+      TEXT_TO_INSIGHT_TEST_PROVIDER: ${{ matrix.provider }}
+      TEXT_TO_INSIGHT_TEST_MODEL: ${{ matrix.model }}
+      OPENAI_API_KEY: ${{ matrix.provider == 'openai' && 'dummy-key' || '' }}
+      GOOGLE_API_KEY: ${{ matrix.provider == 'google' && 'dummy-key' || '' }}
     steps:
-      - name: Checkout
-        uses: actions/checkout@v4
-
-      - name: Setup Python
-        uses: actions/setup-python@v5
+      - uses: actions/checkout@v4
+      - uses: actions/setup-python@v5
         with:
           python-version: "3.11"
-
       - name: Install dependencies
         run: |
           python -m pip install --upgrade pip
-          pip install -r requirements.txt
-          pip install -e .
-
-      - name: Ensure that the data folder exists
-        run: mkdir -p data
-
-      - name: Download the relase database for test
-        run: gh release download dados-teste -p "olist_relational.db" -D data/
+          pip install -r requirements.txt && pip install -e .
+      - name: Download test database
+        run: |
+          mkdir -p data
+          gh release download dados-teste -p "olist_relational.db" -D data/
         env:
           GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
-
-      - name: Run deterministic test layers
+      - name: Run deterministic tests
         run: |
-          pytest \
-            tests/test_componentes.py \
-            tests/test_nodes.py \
-            tests/test_integracao.py \
-            tests/test_main_engine_integracao.py \
+          pytest tests/test_componentes.py tests/test_nodes.py \
+            tests/test_integracao.py tests/test_main_engine_integracao.py \
             -v -s --record-mode=none -m "not real_api"
 
   record-vcr-cassettes:
-    name: Record VCR cassettes (manual)
+    name: Record VCR cassettes - ${{ matrix.provider }} / ${{ matrix.model }}
     runs-on: ubuntu-latest
     if: github.event_name == 'workflow_dispatch'
+    strategy:
+      fail-fast: false
+      matrix:
+        include:
+          - provider: openai
+            model: gpt-4o-mini
+          - provider: google
+            model: gemini-2.5-flash
     env:
-      GOOGLE_API_KEY: ${{ secrets.GOOGLE_API_KEY }}
+      TEXT_TO_INSIGHT_TEST_PROVIDER: ${{ matrix.provider }}
+      TEXT_TO_INSIGHT_TEST_MODEL: ${{ matrix.model }}
+      OPENAI_API_KEY: ${{ matrix.provider == 'openai' && secrets.OPENAI_API_KEY || '' }}
+      GOOGLE_API_KEY: ${{ matrix.provider == 'google' && secrets.GOOGLE_API_KEY || '' }}
     steps:
-      - name: Checkout
-        uses: actions/checkout@v4
-
-      - name: Setup Python
-        uses: actions/setup-python@v5
+      - uses: actions/checkout@v4
+      - uses: actions/setup-python@v5
         with:
           python-version: "3.11"
-
       - name: Install dependencies
         run: |
           python -m pip install --upgrade pip
-          pip install -r requirements.txt
-          pip install -e .
+          pip install -r requirements.txt && pip install -e .
 
-      - name: Validate API key
+      #  Mesmo setup de banco dos outros jobs
+      - name: Download test database
         run: |
-          if [[ -z "${GOOGLE_API_KEY}" ]]; then
-            echo "GOOGLE_API_KEY secret ausente; nao e possivel gravar cassetes."
-            exit 1
+          mkdir -p data
+          gh release download dados-teste -p "olist_relational.db" -D data/
+        env:
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+
+      #  Verificação no nível do step com saída que controla os próximos
+      - name: Check API key availability
+        id: check_key
+        run: |
+          if [[ "${{ matrix.provider }}" == "openai" && -z "${OPENAI_API_KEY}" ]]; then
+            echo "available=false" >> $GITHUB_OUTPUT
+            echo "⚠️ OPENAI_API_KEY ausente; pulando gravação."
+          elif [[ "${{ matrix.provider }}" == "google" && -z "${GOOGLE_API_KEY}" ]]; then
+            echo "available=false" >> $GITHUB_OUTPUT
+            echo "⚠️ GOOGLE_API_KEY ausente; pulando gravação."
+          else
+            echo "available=true" >> $GITHUB_OUTPUT
           fi
 
       - name: Record or update VCR cassettes
+        if: steps.check_key.outputs.available == 'true'  # só roda se tiver key
         run: |
-          pytest \
-            tests/test_nodes.py \
-            tests/test_integracao.py \
-            -v -s --record-mode=new_episodes -m "not real_api"
+          pytest tests/test_nodes.py tests/test_integracao.py \
+            -v -s --record-mode=rewrite -m "not real_api"
 
-      - name: Show cassette status
+      # Commita os cassettes de volta no repo
+      - name: Commit updated cassettes
+        if: steps.check_key.outputs.available == 'true'
         run: |
-          git status -- tests/cassettes || true
-
-      - name: Upload recorded cassettes artifact
-        uses: actions/upload-artifact@v4
-        with:
-          name: vcr-cassettes-${{ github.run_id }}
-          path: tests/cassettes
-          if-no-files-found: warn
+          git config user.name "github-actions[bot]"
+          git config user.email "github-actions[bot]@users.noreply.github.com"
+          git add tests/cassettes
+          git diff --cached --quiet || git commit -m "chore: update VCR cassettes [${{ matrix.provider }}/${{ matrix.model }}]"
+          git push
 
   tests-real-api:
-    name: Tests (real API optional)
+    name: Tests (real API) - ${{ matrix.provider }} / ${{ matrix.model }}
     runs-on: ubuntu-latest
     needs: tests-vcr
     if: github.event_name == 'workflow_dispatch' || github.event_name == 'schedule'
+    strategy:
+      fail-fast: false
+      matrix:
+        include:
+          - provider: openai
+            model: gpt-4o-mini
+          - provider: google
+            model: gemini-2.5-flash
     env:
-      GOOGLE_API_KEY: ${{ secrets.GOOGLE_API_KEY }}
+      TEXT_TO_INSIGHT_TEST_PROVIDER: ${{ matrix.provider }}
+      TEXT_TO_INSIGHT_TEST_MODEL: ${{ matrix.model }}
+      OPENAI_API_KEY: ${{ matrix.provider == 'openai' && secrets.OPENAI_API_KEY || '' }}
+      GOOGLE_API_KEY: ${{ matrix.provider == 'google' && secrets.GOOGLE_API_KEY || '' }}
     steps:
-      - name: Checkout
-        uses: actions/checkout@v4
-
-      - name: Setup Python
-        uses: actions/setup-python@v5
+      - uses: actions/checkout@v4
+      - uses: actions/setup-python@v5
         with:
           python-version: "3.11"
-
       - name: Install dependencies
         run: |
           python -m pip install --upgrade pip
-          pip install -r requirements.txt
-          pip install -e .
+          pip install -r requirements.txt && pip install -e .
 
-      - name: Run optional real API smoke
+      # ✅ Banco de dados presente aqui também
+      - name: Download test database
         run: |
-          if [[ -z "${GOOGLE_API_KEY}" ]]; then
-            echo "GOOGLE_API_KEY secret ausente; pulando job real_api."
-            exit 0
+          mkdir -p data
+          gh release download dados-teste -p "olist_relational.db" -D data/
+        env:
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+
+      - name: Check API key availability
+        id: check_key
+        run: |
+          if [[ "${{ matrix.provider }}" == "openai" && -z "${OPENAI_API_KEY}" ]]; then
+            echo "available=false" >> $GITHUB_OUTPUT
+            echo "⚠️ OPENAI_API_KEY ausente; pulando smoke test."
+          elif [[ "${{ matrix.provider }}" == "google" && -z "${GOOGLE_API_KEY}" ]]; then
+            echo "available=false" >> $GITHUB_OUTPUT
+            echo "⚠️ GOOGLE_API_KEY ausente; pulando smoke test."
+          else
+            echo "available=true" >> $GITHUB_OUTPUT
           fi
-          pytest tests/test_real_api_smoke.py -v -s -m real_api
+
+      - name: Run real API smoke tests
+        if: steps.check_key.outputs.available == 'true'  # ✅ condição correta
+        run: pytest tests/test_real_api_smoke.py -v -s -m real_api
diff --git a/.gitignore b/.gitignore
@@ -49,6 +49,7 @@ coverage.xml
 .hypothesis/
 .pytest_cache/
 results/
+graphs/
 
 # Translations
 *.mo
@@ -149,4 +150,7 @@ chroma_db/
 /meus_testes/*
 
 # reports testes spider
-/reports/
+/reports/
+
+# commit.txt (arquivo temporário usado para textos de commit personalizados)
+commit.txt
diff --git a/ARQUITETURA.md b/ARQUITETURA.md
@@ -4,9 +4,10 @@
 
 O sistema combina:
 
-- um grafo LangGraph com 7 nos;
+- um grafo LangGraph com 9 nos (inclui salvamento de CSV e geracao de graficos);
 - uma camada de runtime compartilhada (`text_to_insight/runtime.py`);
 - duas interfaces de entrada: biblioteca (`InsightEngine`) e CLI (`main.py` -> `text_to_insight/cli.py`).
+- um modulo de benchmark para Spider 1.0 e Spider 2.0 Lite (`scripts/` + `src/spider/`).
 
 Fluxo principal:
 
@@ -17,6 +18,9 @@ START
   -> Agente de Codigo
   -> Executor
   -> Critico
+  -> Salvar CSV
+  -> Roteador Grafico
+  -> Gerador Grafico (quando aplicavel)
   -> Resposta
 END
 ```
@@ -123,13 +127,23 @@ Arquivos:
 
 - gera resposta natural final quando status aprovado
 
+### Salvar CSV (`text_to_insight/nodes/csv_saver.py`)
+
+- salva o resultado completo em `results/` e registra `caminho_csv_resultado`
+
+### Gerador de Graficos (`text_to_insight/nodes/graph_generator.py`)
+
+- gera codigo matplotlib via LLM, executa em subprocesso e salva imagem em `graphs/`
+- registra `grafico_gerado` e `caminho_grafico` no estado
+
 ## Roteadores
 
 Arquivo: `text_to_insight/routers/edges.py`
 
 - `roteador_sandbox`: controla retry apos execucao
 - `roteador_planejador`: decide schema, codificacao, HITL ou fim
-- `roteador_critico` (interno em `graph.py`): aprovado -> resposta; senao -> planejador
+- `roteador_grafico`: decide entre gerar grafico ou ir direto para resposta
+- `roteador_critico` (interno em `graph.py`): aprovado -> salvar_csv (ou resposta); senao -> planejador
 
 ## Estado compartilhado
 
@@ -143,11 +157,18 @@ Campos obrigatorios:
 
 Campos principais do fluxo:
 
-- `contexto_schema`, `sql_gerada`, `linhas_resultado_preview`, `total_linhas_resultado`
+- `contexto_schema`, `sql_gerada`, `linhas_resultado_preview`, `linhas_resultado_completo`, `total_linhas_resultado`
 - `erro_execucao`, `saida_terminal`, `feedback_critico`, `resposta_natural`
 - `status`, `tentativas_loop`, `historico_conversa`, `espera_humana`, `pergunta_ao_usuario`
+- `caminho_csv_resultado`, `grafico_gerado`, `caminho_grafico`
 - telemetria: `tokens_input`, `tokens_output`, `tokens_total`
 
+## Benchmark Spider
+
+- Spider 1.0: `scripts/test_spider_eval.py`
+- Spider 2.0 Lite: `scripts/test_spider2_eval.py`
+- Componentes: `src/spider/` (loader, executor, metrics, csv_reporter, analise_empirica)
+
 ## HITL e perguntas
 
 - `pergunta_original` e a pergunta inicial da thread (imutavel apos o primeiro set).
@@ -170,4 +191,4 @@ Os testes marcados com `@pytest.mark.vcr` usam cassetes em `tests/cassettes/`.
 - para gravar ou atualizar cassetes: `--record-mode=new_episodes`;
 - apos gravacao: execute novamente com `--record-mode=none` para validar reproducibilidade.
 
-Na CI existe um job manual `record-vcr-cassettes` (workflow_dispatch) para gravacao/atualizacao controlada.
+Na CI existe um job manual `record-vcr-cassettes` (workflow_dispatch) para gravação/atualização controlada.