From 57cdc287322cfd348059aa98007ac5951769a7f8 Mon Sep 17 00:00:00 2001 From: LuizCorrei4 Date: Wed, 20 May 2026 13:37:32 -0300 Subject: [PATCH 1/6] =?UTF-8?q?Adiciona=20testes=20de=20integra=C3=A7?= =?UTF-8?q?=C3=A3o=20e=20de=20n=C3=B3s=20para=20gera=C3=A7=C3=A3o=20de=20g?= =?UTF-8?q?r=C3=A1ficos=20e=20atualiza=20fixtures=20para=20provedores=20LL?= =?UTF-8?q?M?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Novos casos de teste para OpenAI e Google Gemini nos testes de integração - Criação de cassettes YAML para o GPT-4o-mini e para perguntas fora do escopo - `conftest.py` passa a suportar seleção dinâmica de provedor e modelo via variáveis de ambiente - Fixture `grafo` atualizada para usar o provedor LLM selecionado - Testes refatorados para compatibilidade com as APIs da OpenAI e do Google - Removida a instanciação direta de LLMs nos testes de nós: agora dependem de fixtures - Smoke test de API real simplificado para usar a fixture de LLM --- .github/workflows/ci.yml | 176 +++++++----- .gitignore | 1 + ARQUITETURA.md | 29 +- DESENVOLVIMENTO.md | 126 ++++++++- INDICE.py | 80 ++++-- README.md | 58 +++- pyproject.toml | 2 + requirements.txt | 5 +- ...de_agent_executor__openai-gpt-4o-mini.yaml | 145 ++++++++++ ...feedback_regenera__openai-gpt-4o-mini.yaml | 147 ++++++++++ ...de_agent_gera_sql__openai-gpt-4o-mini.yaml | 145 ++++++++++ ...resultado_correto__openai-gpt-4o-mini.yaml | 173 ++++++++++++ ...m_feedback_revisa__openai-gpt-4o-mini.yaml | 153 ++++++++++ ..._decide_codificar__openai-gpt-4o-mini.yaml | 153 ++++++++++ ...ta_fora_de_escopo__openai-gpt-4o-mini.yaml | 152 ++++++++++ tests/conftest.py | 154 +++++++++- tests/test_integracao.py | 125 +++++++- tests/test_nodes.py | 266 +++++++++++++++++- tests/test_real_api_smoke.py | 13 +- 19 files changed, 1949 insertions(+), 154 deletions(-) create mode 100644 tests/cassettes/test_nodes/test_cadeia_code_agent_executor__openai-gpt-4o-mini.yaml create mode 100644 tests/cassettes/test_nodes/test_code_agent_com_feedback_regenera__openai-gpt-4o-mini.yaml create mode 100644 tests/cassettes/test_nodes/test_code_agent_gera_sql__openai-gpt-4o-mini.yaml create mode 100644 tests/cassettes/test_nodes/test_critic_avalia_resultado_correto__openai-gpt-4o-mini.yaml create mode 100644 tests/cassettes/test_nodes/test_planner_com_feedback_revisa__openai-gpt-4o-mini.yaml create mode 100644 tests/cassettes/test_nodes/test_planner_com_schema_decide_codificar__openai-gpt-4o-mini.yaml create mode 100644 tests/cassettes/test_nodes/test_planner_pergunta_fora_de_escopo__openai-gpt-4o-mini.yaml diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index eea8b4f..134f9fe 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -3,123 +3,163 @@ name: CI on: pull_request: push: - branches: - - main - - dev + branches: [main, dev] workflow_dispatch: schedule: - cron: "0 3 * * *" jobs: tests-vcr: - name: Tests (VCR deterministic) + name: Tests (VCR deterministic) - ${{ matrix.provider }} / ${{ matrix.model }} runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + include: + - provider: openai + model: gpt-4o-mini + - provider: google + model: gemini-2.5-flash env: - GOOGLE_API_KEY: dummy-key + TEXT_TO_INSIGHT_TEST_PROVIDER: ${{ matrix.provider }} + TEXT_TO_INSIGHT_TEST_MODEL: ${{ matrix.model }} + OPENAI_API_KEY: ${{ matrix.provider == 'openai' && 'dummy-key' || '' }} + GOOGLE_API_KEY: ${{ matrix.provider == 'google' && 'dummy-key' || '' }} steps: - - name: Checkout - uses: actions/checkout@v4 - - - name: Setup Python - uses: actions/setup-python@v5 + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 with: python-version: "3.11" - - name: Install dependencies run: | python -m pip install --upgrade pip - pip install -r requirements.txt - pip install -e . - - - name: Ensure that the data folder exists - run: mkdir -p data - - - name: Download the relase database for test - run: gh release download dados-teste -p "olist_relational.db" -D data/ + pip install -r requirements.txt && pip install -e . + - name: Download test database + run: | + mkdir -p data + gh release download dados-teste -p "olist_relational.db" -D data/ env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - - - name: Run deterministic test layers + - name: Run deterministic tests run: | - pytest \ - tests/test_componentes.py \ - tests/test_nodes.py \ - tests/test_integracao.py \ - tests/test_main_engine_integracao.py \ + pytest tests/test_componentes.py tests/test_nodes.py \ + tests/test_integracao.py tests/test_main_engine_integracao.py \ -v -s --record-mode=none -m "not real_api" record-vcr-cassettes: - name: Record VCR cassettes (manual) + name: Record VCR cassettes - ${{ matrix.provider }} / ${{ matrix.model }} runs-on: ubuntu-latest if: github.event_name == 'workflow_dispatch' + strategy: + fail-fast: false + matrix: + include: + - provider: openai + model: gpt-4o-mini + - provider: google + model: gemini-2.5-flash env: - GOOGLE_API_KEY: ${{ secrets.GOOGLE_API_KEY }} + TEXT_TO_INSIGHT_TEST_PROVIDER: ${{ matrix.provider }} + TEXT_TO_INSIGHT_TEST_MODEL: ${{ matrix.model }} + OPENAI_API_KEY: ${{ matrix.provider == 'openai' && secrets.OPENAI_API_KEY || '' }} + GOOGLE_API_KEY: ${{ matrix.provider == 'google' && secrets.GOOGLE_API_KEY || '' }} steps: - - name: Checkout - uses: actions/checkout@v4 - - - name: Setup Python - uses: actions/setup-python@v5 + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 with: python-version: "3.11" - - name: Install dependencies run: | python -m pip install --upgrade pip - pip install -r requirements.txt - pip install -e . + pip install -r requirements.txt && pip install -e . - - name: Validate API key + # ✅ Mesmo setup de banco dos outros jobs + - name: Download test database run: | - if [[ -z "${GOOGLE_API_KEY}" ]]; then - echo "GOOGLE_API_KEY secret ausente; nao e possivel gravar cassetes." - exit 1 + mkdir -p data + gh release download dados-teste -p "olist_relational.db" -D data/ + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + + # ✅ Verificação no nível do step com saída que controla os próximos + - name: Check API key availability + id: check_key + run: | + if [[ "${{ matrix.provider }}" == "openai" && -z "${OPENAI_API_KEY}" ]]; then + echo "available=false" >> $GITHUB_OUTPUT + echo "⚠️ OPENAI_API_KEY ausente; pulando gravação." + elif [[ "${{ matrix.provider }}" == "google" && -z "${GOOGLE_API_KEY}" ]]; then + echo "available=false" >> $GITHUB_OUTPUT + echo "⚠️ GOOGLE_API_KEY ausente; pulando gravação." + else + echo "available=true" >> $GITHUB_OUTPUT fi - name: Record or update VCR cassettes + if: steps.check_key.outputs.available == 'true' # ✅ só roda se tiver key run: | - pytest \ - tests/test_nodes.py \ - tests/test_integracao.py \ - -v -s --record-mode=new_episodes -m "not real_api" + pytest tests/test_nodes.py tests/test_integracao.py \ + -v -s --record-mode=rewrite -m "not real_api" - - name: Show cassette status + # ✅ Commita os cassettes de volta no repo + - name: Commit updated cassettes + if: steps.check_key.outputs.available == 'true' run: | - git status -- tests/cassettes || true - - - name: Upload recorded cassettes artifact - uses: actions/upload-artifact@v4 - with: - name: vcr-cassettes-${{ github.run_id }} - path: tests/cassettes - if-no-files-found: warn + git config user.name "github-actions[bot]" + git config user.email "github-actions[bot]@users.noreply.github.com" + git add tests/cassettes + git diff --cached --quiet || git commit -m "chore: update VCR cassettes [${{ matrix.provider }}/${{ matrix.model }}]" + git push tests-real-api: - name: Tests (real API optional) + name: Tests (real API) - ${{ matrix.provider }} / ${{ matrix.model }} runs-on: ubuntu-latest needs: tests-vcr if: github.event_name == 'workflow_dispatch' || github.event_name == 'schedule' + strategy: + fail-fast: false + matrix: + include: + - provider: openai + model: gpt-4o-mini + - provider: google + model: gemini-2.5-flash env: - GOOGLE_API_KEY: ${{ secrets.GOOGLE_API_KEY }} + TEXT_TO_INSIGHT_TEST_PROVIDER: ${{ matrix.provider }} + TEXT_TO_INSIGHT_TEST_MODEL: ${{ matrix.model }} + OPENAI_API_KEY: ${{ matrix.provider == 'openai' && secrets.OPENAI_API_KEY || '' }} + GOOGLE_API_KEY: ${{ matrix.provider == 'google' && secrets.GOOGLE_API_KEY || '' }} steps: - - name: Checkout - uses: actions/checkout@v4 - - - name: Setup Python - uses: actions/setup-python@v5 + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 with: python-version: "3.11" - - name: Install dependencies run: | python -m pip install --upgrade pip - pip install -r requirements.txt - pip install -e . + pip install -r requirements.txt && pip install -e . - - name: Run optional real API smoke + # ✅ Banco de dados presente aqui também + - name: Download test database run: | - if [[ -z "${GOOGLE_API_KEY}" ]]; then - echo "GOOGLE_API_KEY secret ausente; pulando job real_api." - exit 0 + mkdir -p data + gh release download dados-teste -p "olist_relational.db" -D data/ + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + + - name: Check API key availability + id: check_key + run: | + if [[ "${{ matrix.provider }}" == "openai" && -z "${OPENAI_API_KEY}" ]]; then + echo "available=false" >> $GITHUB_OUTPUT + echo "⚠️ OPENAI_API_KEY ausente; pulando smoke test." + elif [[ "${{ matrix.provider }}" == "google" && -z "${GOOGLE_API_KEY}" ]]; then + echo "available=false" >> $GITHUB_OUTPUT + echo "⚠️ GOOGLE_API_KEY ausente; pulando smoke test." + else + echo "available=true" >> $GITHUB_OUTPUT fi - pytest tests/test_real_api_smoke.py -v -s -m real_api + + - name: Run real API smoke tests + if: steps.check_key.outputs.available == 'true' # ✅ condição correta + run: pytest tests/test_real_api_smoke.py -v -s -m real_api \ No newline at end of file diff --git a/.gitignore b/.gitignore index 0e323d0..4713292 100644 --- a/.gitignore +++ b/.gitignore @@ -49,6 +49,7 @@ coverage.xml .hypothesis/ .pytest_cache/ results/ +graphs/ # Translations *.mo diff --git a/ARQUITETURA.md b/ARQUITETURA.md index b174fda..1f25534 100644 --- a/ARQUITETURA.md +++ b/ARQUITETURA.md @@ -4,9 +4,10 @@ O sistema combina: -- um grafo LangGraph com 7 nos; +- um grafo LangGraph com 9 nos (inclui salvamento de CSV e geracao de graficos); - uma camada de runtime compartilhada (`text_to_insight/runtime.py`); - duas interfaces de entrada: biblioteca (`InsightEngine`) e CLI (`main.py` -> `text_to_insight/cli.py`). +- um modulo de benchmark para Spider 1.0 e Spider 2.0 Lite (`scripts/` + `src/spider/`). Fluxo principal: @@ -17,6 +18,9 @@ START -> Agente de Codigo -> Executor -> Critico + -> Salvar CSV + -> Roteador Grafico + -> Gerador Grafico (quando aplicavel) -> Resposta END ``` @@ -123,13 +127,23 @@ Arquivos: - gera resposta natural final quando status aprovado +### Salvar CSV (`text_to_insight/nodes/csv_saver.py`) + +- salva o resultado completo em `results/` e registra `caminho_csv_resultado` + +### Gerador de Graficos (`text_to_insight/nodes/graph_generator.py`) + +- gera codigo matplotlib via LLM, executa em subprocesso e salva imagem em `graphs/` +- registra `grafico_gerado` e `caminho_grafico` no estado + ## Roteadores Arquivo: `text_to_insight/routers/edges.py` - `roteador_sandbox`: controla retry apos execucao - `roteador_planejador`: decide schema, codificacao, HITL ou fim -- `roteador_critico` (interno em `graph.py`): aprovado -> resposta; senao -> planejador +- `roteador_grafico`: decide entre gerar grafico ou ir direto para resposta +- `roteador_critico` (interno em `graph.py`): aprovado -> salvar_csv (ou resposta); senao -> planejador ## Estado compartilhado @@ -143,11 +157,18 @@ Campos obrigatorios: Campos principais do fluxo: -- `contexto_schema`, `sql_gerada`, `linhas_resultado_preview`, `total_linhas_resultado` +- `contexto_schema`, `sql_gerada`, `linhas_resultado_preview`, `linhas_resultado_completo`, `total_linhas_resultado` - `erro_execucao`, `saida_terminal`, `feedback_critico`, `resposta_natural` - `status`, `tentativas_loop`, `historico_conversa`, `espera_humana`, `pergunta_ao_usuario` +- `caminho_csv_resultado`, `grafico_gerado`, `caminho_grafico` - telemetria: `tokens_input`, `tokens_output`, `tokens_total` +## Benchmark Spider + +- Spider 1.0: `scripts/test_spider_eval.py` +- Spider 2.0 Lite: `scripts/test_spider2_eval.py` +- Componentes: `src/spider/` (loader, executor, metrics, csv_reporter, analise_empirica) + ## HITL e perguntas - `pergunta_original` e a pergunta inicial da thread (imutavel apos o primeiro set). @@ -170,4 +191,4 @@ Os testes marcados com `@pytest.mark.vcr` usam cassetes em `tests/cassettes/`. - para gravar ou atualizar cassetes: `--record-mode=new_episodes`; - apos gravacao: execute novamente com `--record-mode=none` para validar reproducibilidade. -Na CI existe um job manual `record-vcr-cassettes` (workflow_dispatch) para gravacao/atualizacao controlada. +Na CI existe um job manual `record-vcr-cassettes` (workflow_dispatch) para gravação/atualização controlada. diff --git a/DESENVOLVIMENTO.md b/DESENVOLVIMENTO.md index 3984c8d..fc559c7 100644 --- a/DESENVOLVIMENTO.md +++ b/DESENVOLVIMENTO.md @@ -7,13 +7,15 @@ Imports antigos via `src` nao devem mais ser usados. ## Setup local -### Pre-requisitos +### Pré-requisitos - Python 3.10+ - venv/conda - chave de API (Gemini ou OpenAI, conforme modelo escolhido) -### Instalacao +### Instalação + +Cria um ambiente isolado, instala as dependências e registra o pacote local em modo editável. ```bash python -m venv .venv @@ -23,13 +25,18 @@ pip install -r requirements.txt pip install -e . ``` -### Configuracao +- `python -m venv .venv`: cria um ambiente isolado. +- `source .venv/bin/activate`: ativa esse ambiente. +- `pip install -r requirements.txt`: instala dependências de runtime e ferramentas de teste usadas no projeto. +- `pip install -e .`: instala o TextToInsight em modo editável usando os metadados do `pyproject.toml`. + +### Configuração ```bash echo "GOOGLE_API_KEY=sua_chave" > .env ``` -Banco SQLite esperado por padrao: `data/olist_relational.db`. +Banco SQLite esperado por padrão: `data/olist_relational.db`. ### Verificação Rápida (Smoke) de import @@ -45,12 +52,13 @@ API estável da engine: - `resume(thread_id, user_response)` para retomar HITL; - `get_insight(...)` mantido como API base. -Criterios minimos: +Critérios mínimos: - fluxo completo do grafo; - HITL on/off; - retomada por `thread_id`; -- gravacao de metricas em CSV. +- gravacao de metricas em CSV; +- geracao opcional de graficos quando a visualizacao for relevante. ## Contrato HITL (perguntas) @@ -59,7 +67,7 @@ Criterios minimos: - Em HITL, se a resposta do usuario for classificada como "nova pergunta", o sistema atualiza `pergunta_atual` e reinicia o ciclo, sem alterar a original. -## Execucao +## Execução ```bash # adaptador local @@ -73,7 +81,16 @@ set -a && source .env && set +a python main.py --hitl off --model gpt-4o-mini --api-key-env OPENAI_API_KEY "Quantos pedidos existem?" ``` -O resultado e exibido no terminal em formato tabular sob o bloco `RESULTADO:`, junto com SQL gerada, feedback do critico e resposta natural. +O resultado é exibido no terminal em formato tabular sob o bloco `RESULTADO:`, junto com SQL gerada, feedback do crítico e resposta natural. + +### Geração de gráficos + +Quando o roteador de gráficos decide que a visualização é útil, o sistema gera um gráfico a partir do CSV de resultados. + +- CSV completo: `results/` +- Gráficos salvos: `graphs/` + +Nos benchmarks Spider, use `--with-graphs` para ativar geração de gráficos e salvamento de CSV. ## Estrutura relevante @@ -137,21 +154,110 @@ Observacoes: - `new_episodes` grava apenas chamadas que ainda nao existem no YAML; - `none` falha se faltar cassette, garantindo execucao reproduzivel. +#### Quando usar `--record-mode=rewrite` + +Use `rewrite` quando quiser **substituir completamente** as cassetes existentes, por exemplo, após uma mudança grande de prompt que torna as respostas gravadas incompatíveis com os testes atuais. + +```bash +TEXT_TO_INSIGHT_TEST_PROVIDER=openai TEXT_TO_INSIGHT_TEST_MODEL=gpt-4o-mini \ +pytest tests/test_nodes.py tests/test_integracao.py -v -s --record-mode=rewrite +``` + +> ⚠️ **Atenção:** `rewrite` apaga e regrava **todas** as cassetes do escopo, mesmo as que ainda funcionariam. +> Use `new_episodes` na dúvida, ele só grava o que está faltando e preserva o restante. + +Resumo dos modos disponíveis: + +| Modo | O que faz | Quando usar | +|---|---|---| +| `new_episodes` | Grava só chamadas sem cassete; preserva as existentes | Fluxo normal: adicionou testes ou mudou um nó | +| `none` | Nunca chama a API; falha se faltar cassete | CI e revisão local para garantir determinismo | +| `rewrite` | Apaga e regrava todas as cassetes do escopo | Mudança grande de prompt que invalidou as respostas antigas | + + + +### Escolha de provider/modelo para gravar novas cassetes + +Os testes de API real usam a fixture compartilhada em `tests/conftest.py` para resolver automaticamente o provider e o modelo. A ordem de prioridade e: + +1. `TEXT_TO_INSIGHT_TEST_MODEL` define o modelo explicitamente. +2. Se `TEXT_TO_INSIGHT_TEST_PROVIDER` estiver definido, ele força o provider (`google` ou `openai`). +3. Se o modelo nao deixar o provider obvio, o provider precisa ser informado. +4. Sem overrides, o sistema tenta `GOOGLE_API_KEY` e depois `OPENAI_API_KEY`. + +Para gravar novas cassetes de forma previsível, escolha explicitamente provider e modelo: + +```bash +TEXT_TO_INSIGHT_TEST_PROVIDER=google \ +TEXT_TO_INSIGHT_TEST_MODEL=gemini-2.5-flash \ +pytest tests/test_nodes.py tests/test_integracao.py -v -s --record-mode=new_episodes +``` + +```bash +TEXT_TO_INSIGHT_TEST_PROVIDER=openai \ +TEXT_TO_INSIGHT_TEST_MODEL=gpt-4o-mini \ +pytest tests/test_nodes.py tests/test_integracao.py -v -s --record-mode=new_episodes +``` + +Se voce definir as variaveis em linhas separadas no shell, use `export` para que o `pytest` e os processos filhos enxerguem os valores. Sem `export`, a atribuicao fica so no shell atual e a suite nao herda a configuracao. + +Uso recomendado: + +```bash +export TEXT_TO_INSIGHT_TEST_PROVIDER=openai +export TEXT_TO_INSIGHT_TEST_MODEL=gpt-4o-mini +pytest tests/test_nodes.py tests/test_integracao.py -v -s --record-mode=new_episodes +``` + +Tambem funciona em uma linha só, sem `export`: + +```bash +TEXT_TO_INSIGHT_TEST_PROVIDER=openai TEXT_TO_INSIGHT_TEST_MODEL=gpt-4o-mini \ +pytest tests/test_nodes.py tests/test_integracao.py -v -s --record-mode=new_episodes +``` + +Se o provider/modelo sair do padrão Gemini, os nomes dos cassetes ganham sufixo automático para evitar colisão com as cassetes existentes. + ### Drift provider/modelo (opcional) para verificar se API real ainda responde conforme esperado: ```bash pytest tests/test_real_api_smoke.py -v -s -m real_api ``` +### Benchmark Spider (1.0) + +Requer o dataset Spider em `spider_data/` (com `dev.json` e `database/`). + +```bash +python scripts/test_spider_eval.py --sample-size 10 --seed 42 --data-dir spider_data +``` + +Opções úteis: `--db-filter`, `--question-filter`, `--model`, `--with-graphs`, `--report-dir`. + +### Benchmark Spider 2.0 Lite + +Requer o dataset Spider 2.0 Lite em `spider2-lite/` e os bancos SQLite em +`spider2-lite/resource/databases/spider2-localdb`. + +```bash +python scripts/test_spider2_eval.py --sample-size 10 --seed 42 \ + --data-dir spider2-lite \ + --sqlite-dir spider2-lite/resource/databases/spider2-localdb +``` + +Opções úteis: `--db-filter`, `--question-filter`, `--model`, `--with-graphs`, `--report-dir`. + ## CI hibrida Arquivo: `.github/workflows/ci.yml` -- job padrao deterministico em PR/push (VCR + `--record-mode=none`); +- job padrão determinístico em PR/push (VCR + `--record-mode=none`); - job manual `record-vcr-cassettes` em `workflow_dispatch` para gravar/atualizar cassetes com API real; - job opcional real API em `workflow_dispatch` e `schedule`. -## Build e distribuicao +## Build e distribuição + +Esse teste garante que o pacote pode ser construído e instalado a partir do wheel, simulando o processo de distribuição real a um usuário novo instalando o pacote pela primeira vez. Se algo estiver faltando no wheel (como arquivos, dependências ou configurações), esse teste deve falhar, indicando que o pacote não está pronto para distribuição. ```bash python -m build diff --git a/INDICE.py b/INDICE.py index 9b71553..2ed153e 100644 --- a/INDICE.py +++ b/INDICE.py @@ -22,28 +22,46 @@ │ ├── 🚀 main.py Script de execução principal │ -└── 📁 src/ Código-fonte principal - │ - ├── __init__.py Package root - ├── state.py ⭐ TypedDict EstadoTextToInsight - ├── graph.py ⭐ Grafo compilado (entry point) - ├── model_selection.py Seleção de modelo/provedor LLM - ├── utils.py Telemetria de tokens e latência - │ - ├── 📁 nodes/ Nós do grafo - │ ├── __init__.py - │ ├── planner.py 🧠 Nó: Planejador (Supervisor) - │ ├── response.py 💬 Nó: Resposta Natural Final - │ ├── schema.py 📊 Nó: Extrator de Schema - │ ├── 📁 code_agent/ - │ │ ├── code_agent.py 💻 Nó: Gerador de SQL - │ │ └── code_sql.py 🔐 Validação + Execução SQL segura - │ ├── sandbox.py 🏖️ Nó: Executor Seguro - │ └── critic.py 🎯 Nó: Avaliador de Qualidade - │ - └── 📁 routers/ Roteadores Condicionais - ├── __init__.py - └── edges.py ➡️ Funções de roteamento +├── 📁 text_to_insight/ Código-fonte principal (pacote) +│ ├── __init__.py Package root +│ ├── state.py ⭐ TypedDict EstadoTextToInsight +│ ├── graph.py ⭐ Grafo compilado (entry point) +│ ├── InsightEngine.py API pública da biblioteca +│ ├── model_selection.py Seleção de modelo/provedor LLM +│ ├── runtime.py Runtime compartilhado +│ ├── utils.py Telemetria de tokens e latência +│ │ +│ ├── 📁 nodes/ Nós do grafo +│ │ ├── __init__.py +│ │ ├── planner.py 🧠 Nó: Planejador (Supervisor) +│ │ ├── schema.py 📊 Nó: Extrator de Schema +│ │ ├── 📁 code_agent/ +│ │ │ ├── code_agent.py 💻 Nó: Gerador de SQL +│ │ │ └── code_sql.py 🔐 Validação + Execução SQL segura +│ │ ├── sandbox.py 🏖️ Nó: Executor Seguro +│ │ ├── critic.py 🎯 Nó: Avaliador de Qualidade +│ │ ├── csv_saver.py 🧾 Nó: Salvar CSV +│ │ ├── graph_generator.py 📈 Nó: Gerador de Gráficos +│ │ └── response.py 💬 Nó: Resposta Natural Final +│ │ +│ └── 📁 routers/ Roteadores Condicionais +│ ├── __init__.py +│ └── edges.py ➡️ Funções de roteamento +│ +├── 📁 src/ Módulos de benchmark Spider +│ └── 📁 spider/ +│ ├── data_loader.py Loader do Spider 1.0 +│ ├── query_executor.py Executor SQL +│ ├── metrics.py Métricas (similarity/match/F1) +│ ├── csv_reporter.py Relatórios CSV +│ └── analise_empirica.py Pós-processamento e gráficos +│ +├── 📁 scripts/ Orquestração de benchmarks +│ ├── test_spider_eval.py Benchmark Spider 1.0 +│ └── test_spider2_eval.py Benchmark Spider 2.0 Lite +│ +├── 📁 results/ Resultados CSV das execuções +└── 📁 graphs/ Gráficos gerados ═══════════════════════════════════════════════════════════════════════════════ @@ -54,26 +72,28 @@ └─► Leia: README.md → DESENVOLVIMENTO.md → main.py 2️⃣ Desenvolvedor? - └─► Leia: ARQUITETURA.md → src/state.py → src/graph.py + └─► Leia: ARQUITETURA.md → text_to_insight/state.py → text_to_insight/graph.py 3️⃣ Operacional? └─► Leia: DESENVOLVIMENTO.md → main.py → execute! 4️⃣ Estudo Profundo? - └─► src/state.py → src/nodes/* → src/routers/edges.py → src/graph.py + └─► text_to_insight/state.py → text_to_insight/nodes/* → text_to_insight/routers/edges.py → text_to_insight/graph.py ═══════════════════════════════════════════════════════════════════════════════ O QUE FOI CRIADO: ═════════════════ -✅ ESTRUTURA: Projeto modular com src/, nós, roteadores e suíte de testes em 3 camadas +✅ ESTRUTURA: Projeto modular com text_to_insight/ + src/spider + suíte de testes em 3 camadas ✅ ESTADO: TypedDict EstadoTextToInsight com campos de SQL, HITL, resposta e telemetria -✅ 7 NÓS: Planejador, EsperaHumana, Schema, AgenteCódigo, Sandbox, Crítico, Resposta -✅ 3 ROTEADORES: Sandbox, Planejador e Crítico +✅ 9 NÓS: Planejador, EsperaHumana, Schema, AgenteCódigo, Sandbox, Crítico, SalvarCSV, GeradorGráfico, Resposta +✅ 4 ROTEADORES: Sandbox, Planejador, Gráfico e Crítico ✅ GRAFO COMPILADO: StateGraph + MemorySaver + interrupt_before para HITL ✅ DOCUMENTAÇÃO: 3 guias: README, ARQUITETURA, DESENVOLVIMENTO ✅ TELEMETRIA: Tokens (input/output/total), tentativas e latência em CSV +✅ GRÁFICOS: Geração automática com matplotlib quando aplicável +✅ BENCHMARKS: Spider 1.0 e Spider 2.0 Lite (scripts/ + src/spider) ✅ TODA EM PT-BR: Código, variáveis, docstrings, comentários ═══════════════════════════════════════════════════════════════════════════════ @@ -82,7 +102,7 @@ ═════════════ 📊 Linhas de Código: ~1.400+ (incluindo nós, roteadores e utilitários) -📚 Arquivos Python: 15+ (src/ + main.py + testes) +📚 Arquivos Python: 15+ (text_to_insight/ + src/spider + scripts + testes) 📖 Documentação: 3 guias principais 🔄 Fluxos de Grafo: 4+ cenários (normal, retry, HITL, bloqueado_hitl) 🧠 Tentativas max: 3 por padrão (configurável) @@ -93,11 +113,11 @@ PRÓXIMOS PASSOS (NÃO IMPLEMENTADOS AGORA): ═══════════════════════════════════════════ -❌ LLMs reais (OpenAI, Anthropic, etc) +❌ Novos provedores LLM (Anthropic, etc) ❌ Banco de dados real (PostgreSQL, MySQL, etc) ❌ Docker/Containerização ❌ Cache de schemas -❌ Métricasde produção +❌ Métricas de produção ❌ Observabilidade (LangSmith, DataDog, etc) ❌ Autenticação/Autorização ❌ Testes unitários (estrutura preparada) diff --git a/README.md b/README.md index 13015cd..cc1965d 100644 --- a/README.md +++ b/README.md @@ -8,10 +8,11 @@ O namespace oficial do pacote e `text_to_insight`. O runtime padrao garante: -- fluxo completo do grafo (planejador -> schema -> agente de codigo -> executor -> critico -> resposta); +- fluxo completo do grafo (planejador -> schema -> agente de codigo -> executor -> critico -> salvar CSV -> roteador grafico -> gerador grafico (quando aplicavel) -> resposta); - HITL ligado e desligado; - retomada por `thread_id`; -- persistencia de metricas em `data/metricas_execucao.csv`. +- persistencia de metricas em `data/metricas_execucao.csv`; +- geracao opcional de graficos quando a visualizacao for relevante. ## Contrato HITL (perguntas) @@ -101,6 +102,10 @@ O template de apresentacao usa `tabulate` para montar as linhas da query: - Acima de 5 linhas: mostra as 3 primeiras, omite as intermediarias, exibe as 2 ultimas - Resultado completo e exportado em CSV em `results/` automaticamente +### Geração de gráficos + +Quando o roteador de gráficos decide que a visualização é útil, um grafico é salvo em `graphs/` a partir do CSV de resultados. + ## Testes Camadas atuais: @@ -137,12 +142,61 @@ pytest tests/test_nodes.py tests/test_integracao.py -v -s --record-mode=none As cassetes ficam em `tests/cassettes/`. +Para gravar cassetes com um provider/modelo especifico, sobrescreva o ambiente antes de rodar o pytest: + +```bash +TEXT_TO_INSIGHT_TEST_PROVIDER=google \ +TEXT_TO_INSIGHT_TEST_MODEL=gemini-2.5-flash \ +pytest tests/test_nodes.py tests/test_integracao.py -v -s --record-mode=new_episodes +``` + +```bash +TEXT_TO_INSIGHT_TEST_PROVIDER=openai \ +TEXT_TO_INSIGHT_TEST_MODEL=gpt-4o-mini \ +pytest tests/test_nodes.py tests/test_integracao.py -v -s --record-mode=new_episodes +``` + +Se você preferir definir em comandos separados, use `export` antes de rodar o `pytest`. Sem `export`, a variavel fica apenas no shell atual e os testes nao herdam o valor. + +Uso recomendado: + +```bash +export TEXT_TO_INSIGHT_TEST_PROVIDER=google +export TEXT_TO_INSIGHT_TEST_MODEL=gemini-2.5-flash +pytest tests/test_nodes.py tests/test_integracao.py -v -s --record-mode=new_episodes +``` + +Se quiser tudo em uma linha só, sem `export`, use: + +```bash +TEXT_TO_INSIGHT_TEST_PROVIDER=google TEXT_TO_INSIGHT_TEST_MODEL=gemini-2.5-flash \ +pytest tests/test_nodes.py tests/test_integracao.py -v -s --record-mode=new_episodes +``` + +Sem esses overrides, a suite tenta `GOOGLE_API_KEY` primeiro e depois `OPENAI_API_KEY`. + Teste opcional com API real (drift provider/modelo): ```bash pytest tests/test_real_api_smoke.py -v -s -m real_api ``` +## Benchmark Spider + +Spider 1.0 (requer `spider_data/` com `dev.json` e `database/`): + +```bash +python scripts/test_spider_eval.py --sample-size 10 --seed 42 --data-dir spider_data +``` + +Spider 2.0 Lite (requer `spider2-lite/` e bancos em `spider2-lite/resource/databases/spider2-localdb`): + +```bash +python scripts/test_spider2_eval.py --sample-size 10 --seed 42 \ + --data-dir spider2-lite \ + --sqlite-dir spider2-lite/resource/databases/spider2-localdb +``` + ## CI hibrida Workflow em `.github/workflows/ci.yml`: diff --git a/pyproject.toml b/pyproject.toml index 15a1d34..d0fdc86 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -31,6 +31,8 @@ dependencies = [ "langchain-google-genai>=2.0.0", "langchain-openai>=0.1.0", "python-dotenv>=1.0.0", + "pandas>=2.0.0", # uso na geração de gráficos e análise de dados + "matplotlib>=3.8.0" # uso na geração de gráficos e visualizações ] [project.scripts] diff --git a/requirements.txt b/requirements.txt index 3aadb90..1f4db36 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,10 +4,11 @@ langchain-core>=0.2.0 python-dotenv>=1.0.0 langchain-google-genai>=2.0.0 langchain-openai>=0.1.0 +pandas>=2.0.0 +matplotlib>=3.8.0 pytest>=9.0.2 pytest-recording>=0.13.0 pytest-timeout>=2.3.0 numpy -pandas>=2.0.0 tabulate>=0.9.0 -chromadb +chromadb \ No newline at end of file diff --git a/tests/cassettes/test_nodes/test_cadeia_code_agent_executor__openai-gpt-4o-mini.yaml b/tests/cassettes/test_nodes/test_cadeia_code_agent_executor__openai-gpt-4o-mini.yaml new file mode 100644 index 0000000..8dd75ec --- /dev/null +++ b/tests/cassettes/test_nodes/test_cadeia_code_agent_executor__openai-gpt-4o-mini.yaml @@ -0,0 +1,145 @@ +interactions: +- request: + body: "{\"messages\":[{\"content\":\"Voc\xEA \xE9 um especialista em SQL para + bancos SQLite.\\n\\nSua tarefa: gerar UMA \xFAnica consulta SQL SELECT que responda + \xE0 pergunta do usu\xE1rio,\\nusando o schema do banco de dados fornecido abaixo.\\n\\nRegras:\\n- + Gere APENAS uma consulta SELECT (ou WITH/CTE seguido de SELECT).\\n- N\xC3O + use INSERT, UPDATE, DELETE, DROP, ALTER ou qualquer comando de escrita.\\n- + N\xC3O inclua explica\xE7\xF5es, apenas a SQL pura.\\n- Use nomes de tabelas + e colunas EXATAMENTE como aparecem no schema.\\n- Se a pergunta for amb\xEDgua, + fa\xE7a a interpreta\xE7\xE3o mais razo\xE1vel.\\n\\n=== SCHEMA DO BANCO ===\\n=== + SCHEMA SQLITE (INTROSPECCAO REAL) ===\\n\\nTabela: customers\\n- customer_id: + TEXT (PK)\\n- customer_unique_id: TEXT\\n- customer_zip_code_prefix: INTEGER\\n- + customer_city: TEXT\\n- customer_state: TEXT\\n\\nTabela: geolocation\\n- geolocation_zip_code_prefix: + INTEGER\\n- geolocation_lat: REAL\\n- geolocation_lng: REAL\\n- geolocation_city: + TEXT\\n- geolocation_state: TEXT\\n\\nTabela: order_items\\n- order_id: TEXT + (PK)\\n- order_item_id: INTEGER (PK)\\n- product_id: TEXT\\n- seller_id: TEXT\\n- + shipping_limit_date: TEXT\\n- price: REAL\\n- freight_value: REAL\\n Foreign + keys:\\n - seller_id -> sellers.seller_id (on_update=NO ACTION, on_delete=NO + ACTION)\\n - product_id -> products.product_id (on_update=NO ACTION, on_delete=NO + ACTION)\\n - order_id -> orders.order_id (on_update=NO ACTION, on_delete=NO + ACTION)\\n\\nTabela: order_payments\\n- order_id: TEXT (PK)\\n- payment_sequential: + INTEGER (PK)\\n- payment_type: TEXT\\n- payment_installments: INTEGER\\n- payment_value: + REAL\\n Foreign keys:\\n - order_id -> orders.order_id (on_update=NO ACTION, + on_delete=NO ACTION)\\n\\nTabela: order_reviews\\n- review_id: TEXT (PK)\\n- + order_id: TEXT\\n- review_score: INTEGER\\n- review_comment_title: TEXT\\n- + review_comment_message: TEXT\\n- review_creation_date: TEXT\\n- review_answer_timestamp: + TEXT\\n Foreign keys:\\n - order_id -> orders.order_id (on_update=NO ACTION, + on_delete=NO ACTION)\\n\\nTabela: orders\\n- order_id: TEXT (PK)\\n- customer_id: + TEXT\\n- order_status: TEXT\\n- order_purchase_timestamp: TEXT\\n- order_approved_at: + TEXT\\n- order_delivered_carrier_date: TEXT\\n- order_delivered_customer_date: + TEXT\\n- order_estimated_delivery_date: TEXT\\n Foreign keys:\\n - customer_id + -> customers.customer_id (on_update=NO ACTION, on_delete=NO ACTION)\\n\\nTabela: + products\\n- product_id: TEXT (PK)\\n- product_category_name: TEXT\\n- product_name_length: + REAL\\n- product_description_length: REAL\\n- product_photos_qty: REAL\\n- product_weight_g: + REAL\\n- product_length_cm: REAL\\n- product_height_cm: REAL\\n- product_width_cm: + REAL\\n\\nTabela: sellers\\n- seller_id: TEXT (PK)\\n- seller_zip_code_prefix: + INTEGER\\n- seller_city: TEXT\\n- seller_state: TEXT\\n\\n\\n=== PERGUNTA DO + USU\xC1RIO ===\\nQuantos clientes existem no banco?\\n\\n=== CONVERSA PR\xC9VIA + (CONTEXTO ADICIONAL) ===\\nNenhuma\\n\\n=== HIST\xD3RICO DE TENTATIVAS ANTERIORES + ===\\nNenhuma tentativa anterior.\\n\\nResponda APENAS com a consulta SQL, sem + markdown, sem explica\xE7\xE3o.\",\"role\":\"user\"}],\"model\":\"gpt-4o-mini\",\"stream\":false}" + headers: + Accept: + - a + Accept-Encoding: + - g + Connection: + - k + Content-Length: + - '3' + Content-Type: + - a + Cookie: + - _ + Host: + - a + User-Agent: + - O + X-Stainless-Arch: + - x + X-Stainless-Async: + - f + X-Stainless-Lang: + - p + X-Stainless-OS: + - L + X-Stainless-Package-Version: + - '2' + X-Stainless-Raw-Response: + - t + X-Stainless-Runtime: + - C + X-Stainless-Runtime-Version: + - '3' + x-stainless-retry-count: + - '0' + method: POST + uri: https://api.openai.com/v1/chat/completions + response: + body: + string: !!binary | + H4sIAAAAAAAA/6rmUlBQykxRslJQSs5ILEnOLcjRdclItTBJSwUAAAD//4xSy07DMBC89ysinwA1 + KH2mFSeoijjwEM8LQpHrbFODY0deB4FQ/5110pIAReJiyTs765nxjrPxzQBtD04uo+PIXc9G6sGe + sa5nmMUzCLdlHQpDPHDS6BoWFrgDP7UXx9P+dBgP+hWQmxSUp2WFC4cmzKWWYT/qD8MoDnuTDXtl + pACktke6BsFHdXqdOoU3KkfdbSUHRJ4B1bZNVLRG+QrjiBId1451G1AY7UBX0m/n5/PZXTC7ur+8 + 2zvYD05vri4CUaIzOVg8arMsLEvkXrkulWoBXGvjuHde6X3aIOsvhcpkhTUL/EFlS3KOq4SCQkqN + 1NCzBavQNZ1PVRLlN3OMBuWFS5x5geq5OO7V81jzAQ063mCOBKo2Ke7uGJek4LhU2IqSCS5WkDbU + JndeptK0gE7L9G8xu2bXxqXO/jO+AYSAgjYrKSykUnw33LRZ8Ov5V9tXyJVghmBfad8SJ8H6j0hh + yUtVLw3Dd3SQJ/RbGdjCynpzlkUynQxisRhNRpx11p1PAAAA//8DALb873xHAwAA + headers: + Access-Control-Expose-Headers: + - CF-Ray + CF-Cache-Status: + - DYNAMIC + CF-Ray: + - + Connection: + - keep-alive + Content-Encoding: + - gzip + Content-Type: + - application/json + Date: + - Wed, 20 May 2026 16:32:12 GMT + Server: + - cloudflare + Strict-Transport-Security: + - max-age=31536000; includeSubDomains; preload + Transfer-Encoding: + - chunked + X-Content-Type-Options: + - nosniff + access-control-expose-headers: + - X-Request-ID + - CF-Ray + alt-svc: + - h3=":443"; ma=86400 + openai-organization: + - + openai-processing-ms: + - '571' + openai-project: + - + openai-version: + - '2020-10-01' + x-openai-proxy-wasm: + - v0.1 + x-ratelimit-limit-requests: + - '10000' + x-ratelimit-limit-tokens: + - '200000' + x-ratelimit-remaining-requests: + - + x-ratelimit-remaining-tokens: + - + x-ratelimit-reset-requests: + - 5m38.18s + x-ratelimit-reset-tokens: + - 216ms + x-request-id: + - + status: + code: 200 + message: OK +version: 1 diff --git a/tests/cassettes/test_nodes/test_code_agent_com_feedback_regenera__openai-gpt-4o-mini.yaml b/tests/cassettes/test_nodes/test_code_agent_com_feedback_regenera__openai-gpt-4o-mini.yaml new file mode 100644 index 0000000..c338c23 --- /dev/null +++ b/tests/cassettes/test_nodes/test_code_agent_com_feedback_regenera__openai-gpt-4o-mini.yaml @@ -0,0 +1,147 @@ +interactions: +- request: + body: "{\"messages\":[{\"content\":\"Voc\xEA \xE9 um especialista em SQL para + bancos SQLite.\\n\\nSua tarefa: gerar UMA \xFAnica consulta SQL SELECT que responda + \xE0 pergunta do usu\xE1rio,\\nusando o schema do banco de dados fornecido abaixo.\\n\\nRegras:\\n- + Gere APENAS uma consulta SELECT (ou WITH/CTE seguido de SELECT).\\n- N\xC3O + use INSERT, UPDATE, DELETE, DROP, ALTER ou qualquer comando de escrita.\\n- + N\xC3O inclua explica\xE7\xF5es, apenas a SQL pura.\\n- Use nomes de tabelas + e colunas EXATAMENTE como aparecem no schema.\\n- Se a pergunta for amb\xEDgua, + fa\xE7a a interpreta\xE7\xE3o mais razo\xE1vel.\\n\\n=== SCHEMA DO BANCO ===\\n=== + SCHEMA SQLITE (INTROSPECCAO REAL) ===\\n\\nTabela: customers\\n- customer_id: + TEXT (PK)\\n- customer_unique_id: TEXT\\n- customer_zip_code_prefix: INTEGER\\n- + customer_city: TEXT\\n- customer_state: TEXT\\n\\nTabela: geolocation\\n- geolocation_zip_code_prefix: + INTEGER\\n- geolocation_lat: REAL\\n- geolocation_lng: REAL\\n- geolocation_city: + TEXT\\n- geolocation_state: TEXT\\n\\nTabela: order_items\\n- order_id: TEXT + (PK)\\n- order_item_id: INTEGER (PK)\\n- product_id: TEXT\\n- seller_id: TEXT\\n- + shipping_limit_date: TEXT\\n- price: REAL\\n- freight_value: REAL\\n Foreign + keys:\\n - seller_id -> sellers.seller_id (on_update=NO ACTION, on_delete=NO + ACTION)\\n - product_id -> products.product_id (on_update=NO ACTION, on_delete=NO + ACTION)\\n - order_id -> orders.order_id (on_update=NO ACTION, on_delete=NO + ACTION)\\n\\nTabela: order_payments\\n- order_id: TEXT (PK)\\n- payment_sequential: + INTEGER (PK)\\n- payment_type: TEXT\\n- payment_installments: INTEGER\\n- payment_value: + REAL\\n Foreign keys:\\n - order_id -> orders.order_id (on_update=NO ACTION, + on_delete=NO ACTION)\\n\\nTabela: order_reviews\\n- review_id: TEXT (PK)\\n- + order_id: TEXT\\n- review_score: INTEGER\\n- review_comment_title: TEXT\\n- + review_comment_message: TEXT\\n- review_creation_date: TEXT\\n- review_answer_timestamp: + TEXT\\n Foreign keys:\\n - order_id -> orders.order_id (on_update=NO ACTION, + on_delete=NO ACTION)\\n\\nTabela: orders\\n- order_id: TEXT (PK)\\n- customer_id: + TEXT\\n- order_status: TEXT\\n- order_purchase_timestamp: TEXT\\n- order_approved_at: + TEXT\\n- order_delivered_carrier_date: TEXT\\n- order_delivered_customer_date: + TEXT\\n- order_estimated_delivery_date: TEXT\\n Foreign keys:\\n - customer_id + -> customers.customer_id (on_update=NO ACTION, on_delete=NO ACTION)\\n\\nTabela: + products\\n- product_id: TEXT (PK)\\n- product_category_name: TEXT\\n- product_name_length: + REAL\\n- product_description_length: REAL\\n- product_photos_qty: REAL\\n- product_weight_g: + REAL\\n- product_length_cm: REAL\\n- product_height_cm: REAL\\n- product_width_cm: + REAL\\n\\nTabela: sellers\\n- seller_id: TEXT (PK)\\n- seller_zip_code_prefix: + INTEGER\\n- seller_city: TEXT\\n- seller_state: TEXT\\n\\n\\n=== PERGUNTA DO + USU\xC1RIO ===\\nQuais as 5 categorias de produtos mais vendidas?\\n\\n=== CONVERSA + PR\xC9VIA (CONTEXTO ADICIONAL) ===\\nNenhuma\\n\\n=== HIST\xD3RICO DE TENTATIVAS + ANTERIORES ===\\nNenhuma tentativa anterior.\\n\\nResponda APENAS com a consulta + SQL, sem markdown, sem explica\xE7\xE3o.\",\"role\":\"user\"}],\"model\":\"gpt-4o-mini\",\"stream\":false}" + headers: + Accept: + - a + Accept-Encoding: + - g + Connection: + - k + Content-Length: + - '3' + Content-Type: + - a + Cookie: + - _ + Host: + - a + User-Agent: + - O + X-Stainless-Arch: + - x + X-Stainless-Async: + - f + X-Stainless-Lang: + - p + X-Stainless-OS: + - L + X-Stainless-Package-Version: + - '2' + X-Stainless-Raw-Response: + - t + X-Stainless-Runtime: + - C + X-Stainless-Runtime-Version: + - '3' + x-stainless-retry-count: + - '0' + method: POST + uri: https://api.openai.com/v1/chat/completions + response: + body: + string: !!binary | + H4sIAAAAAAAA/6rmUlBQykxRslJQSs5ILEnOLcjRdclINS/0LQEAAAD//4xTUW/aMBB+51dYftok + QJDCEjTtoQM6wVpSAX2YRmUZ+wjuEtuynW5o4r/PSYCkWyvtJVLuu/vuu/vO1/PgZn4lng4/p0Ls + 7jX/mk1gjNtFhdo+AXPnqi5Tvg6cULKCmQHqoGDth+EoGA3CflQCmeKQFmWJdp2B6mRCik7QCwad + XtjpR6fqvRIMrE/77n8R+l1+C52Swy8f7rXPkQyspQn42DnJB41Kiwim1grrqHS4XYNMSQeylL6a + 3k7Ha6S72iieM0eYF50ocyCSZtBG4/hhsX6nRFcZDoYI/h5dr5BTjqbkGSSndiNvlvEdOuEOMouU + 2Mh5PFugE6lFGsULH750ERx9ajQVfCO/LOOHe/T521taNjJeTqbLIqPZHk2mq/FG3s7uZms0/Ngc + 08Aut7RYtczTtAFQKT1DYVW54McTcrysNFWJ17C1f5XinbfK7ol31nqb/fqsUxqX6NF/H0vr8hdu + YE+UaUec+gFluzD8UPHh+mJqdBicwGrGSzwKovYrfISDoyK1DfMxo2wPvC6tL4XmXKgG0GpM/a+a + 17iryYVM/oe+BhgD7d8C0Qa4YC8nrtMMFA/qrbTLlkvB2IJ59i+EOAGmcILDjuZpdebYHqw/ROLt + SsBoI6pb32kyiq5Cth1GQ4pbx9YfAAAA//8DAImrOBP5AwAA + headers: + Access-Control-Expose-Headers: + - CF-Ray + CF-Cache-Status: + - DYNAMIC + CF-Ray: + - + Connection: + - keep-alive + Content-Encoding: + - gzip + Content-Type: + - application/json + Date: + - Wed, 20 May 2026 16:32:00 GMT + Server: + - cloudflare + Strict-Transport-Security: + - max-age=31536000; includeSubDomains; preload + Transfer-Encoding: + - chunked + X-Content-Type-Options: + - nosniff + access-control-expose-headers: + - X-Request-ID + - CF-Ray + alt-svc: + - h3=":443"; ma=86400 + openai-organization: + - + openai-processing-ms: + - '1521' + openai-project: + - + openai-version: + - '2020-10-01' + x-openai-proxy-wasm: + - v0.1 + x-ratelimit-limit-requests: + - '10000' + x-ratelimit-limit-tokens: + - '200000' + x-ratelimit-remaining-requests: + - + x-ratelimit-remaining-tokens: + - + x-ratelimit-reset-requests: + - 5m33.642s + x-ratelimit-reset-tokens: + - 217ms + x-request-id: + - + status: + code: 200 + message: OK +version: 1 diff --git a/tests/cassettes/test_nodes/test_code_agent_gera_sql__openai-gpt-4o-mini.yaml b/tests/cassettes/test_nodes/test_code_agent_gera_sql__openai-gpt-4o-mini.yaml new file mode 100644 index 0000000..6f549cf --- /dev/null +++ b/tests/cassettes/test_nodes/test_code_agent_gera_sql__openai-gpt-4o-mini.yaml @@ -0,0 +1,145 @@ +interactions: +- request: + body: "{\"messages\":[{\"content\":\"Voc\xEA \xE9 um especialista em SQL para + bancos SQLite.\\n\\nSua tarefa: gerar UMA \xFAnica consulta SQL SELECT que responda + \xE0 pergunta do usu\xE1rio,\\nusando o schema do banco de dados fornecido abaixo.\\n\\nRegras:\\n- + Gere APENAS uma consulta SELECT (ou WITH/CTE seguido de SELECT).\\n- N\xC3O + use INSERT, UPDATE, DELETE, DROP, ALTER ou qualquer comando de escrita.\\n- + N\xC3O inclua explica\xE7\xF5es, apenas a SQL pura.\\n- Use nomes de tabelas + e colunas EXATAMENTE como aparecem no schema.\\n- Se a pergunta for amb\xEDgua, + fa\xE7a a interpreta\xE7\xE3o mais razo\xE1vel.\\n\\n=== SCHEMA DO BANCO ===\\n=== + SCHEMA SQLITE (INTROSPECCAO REAL) ===\\n\\nTabela: customers\\n- customer_id: + TEXT (PK)\\n- customer_unique_id: TEXT\\n- customer_zip_code_prefix: INTEGER\\n- + customer_city: TEXT\\n- customer_state: TEXT\\n\\nTabela: geolocation\\n- geolocation_zip_code_prefix: + INTEGER\\n- geolocation_lat: REAL\\n- geolocation_lng: REAL\\n- geolocation_city: + TEXT\\n- geolocation_state: TEXT\\n\\nTabela: order_items\\n- order_id: TEXT + (PK)\\n- order_item_id: INTEGER (PK)\\n- product_id: TEXT\\n- seller_id: TEXT\\n- + shipping_limit_date: TEXT\\n- price: REAL\\n- freight_value: REAL\\n Foreign + keys:\\n - seller_id -> sellers.seller_id (on_update=NO ACTION, on_delete=NO + ACTION)\\n - product_id -> products.product_id (on_update=NO ACTION, on_delete=NO + ACTION)\\n - order_id -> orders.order_id (on_update=NO ACTION, on_delete=NO + ACTION)\\n\\nTabela: order_payments\\n- order_id: TEXT (PK)\\n- payment_sequential: + INTEGER (PK)\\n- payment_type: TEXT\\n- payment_installments: INTEGER\\n- payment_value: + REAL\\n Foreign keys:\\n - order_id -> orders.order_id (on_update=NO ACTION, + on_delete=NO ACTION)\\n\\nTabela: order_reviews\\n- review_id: TEXT (PK)\\n- + order_id: TEXT\\n- review_score: INTEGER\\n- review_comment_title: TEXT\\n- + review_comment_message: TEXT\\n- review_creation_date: TEXT\\n- review_answer_timestamp: + TEXT\\n Foreign keys:\\n - order_id -> orders.order_id (on_update=NO ACTION, + on_delete=NO ACTION)\\n\\nTabela: orders\\n- order_id: TEXT (PK)\\n- customer_id: + TEXT\\n- order_status: TEXT\\n- order_purchase_timestamp: TEXT\\n- order_approved_at: + TEXT\\n- order_delivered_carrier_date: TEXT\\n- order_delivered_customer_date: + TEXT\\n- order_estimated_delivery_date: TEXT\\n Foreign keys:\\n - customer_id + -> customers.customer_id (on_update=NO ACTION, on_delete=NO ACTION)\\n\\nTabela: + products\\n- product_id: TEXT (PK)\\n- product_category_name: TEXT\\n- product_name_length: + REAL\\n- product_description_length: REAL\\n- product_photos_qty: REAL\\n- product_weight_g: + REAL\\n- product_length_cm: REAL\\n- product_height_cm: REAL\\n- product_width_cm: + REAL\\n\\nTabela: sellers\\n- seller_id: TEXT (PK)\\n- seller_zip_code_prefix: + INTEGER\\n- seller_city: TEXT\\n- seller_state: TEXT\\n\\n\\n=== PERGUNTA DO + USU\xC1RIO ===\\nQuantos pedidos existem no banco?\\n\\n=== CONVERSA PR\xC9VIA + (CONTEXTO ADICIONAL) ===\\nNenhuma\\n\\n=== HIST\xD3RICO DE TENTATIVAS ANTERIORES + ===\\nNenhuma tentativa anterior.\\n\\nResponda APENAS com a consulta SQL, sem + markdown, sem explica\xE7\xE3o.\",\"role\":\"user\"}],\"model\":\"gpt-4o-mini\",\"stream\":false}" + headers: + Accept: + - a + Accept-Encoding: + - g + Connection: + - k + Content-Length: + - '3' + Content-Type: + - a + Cookie: + - _ + Host: + - a + User-Agent: + - O + X-Stainless-Arch: + - x + X-Stainless-Async: + - f + X-Stainless-Lang: + - p + X-Stainless-OS: + - L + X-Stainless-Package-Version: + - '2' + X-Stainless-Raw-Response: + - t + X-Stainless-Runtime: + - C + X-Stainless-Runtime-Version: + - '3' + x-stainless-retry-count: + - '0' + method: POST + uri: https://api.openai.com/v1/chat/completions + response: + body: + string: !!binary | + H4sIAAAAAAAA/6rmUlBQykxRslJQSs5ILEnOLcjRdclINc8xdQQAAAD//4xS0U7CMBR95yuWPqnZ + DAO0EB8JJCYIieKTIUvp7kZxa2dvZzSGf7fdgA3FxJcmveee23NOr5yaPNFvi8VWbjDV/Xs6V8sZ + 8R1DrbfAzYF1zZXlgRFK1jDXwAy4qSGlo95oQMN+BeQqhszR0sIEAxXkQoqg1+0Ngi4NwuGevVGC + A9q2F3v1vK/qdDplDB+23PUPlRwQWQq2dmiyRa0yVyEMUaBh0hC/AbmSBmQl/Wkym4yX3njxPF9e + XF1608fFg6d0DBrv2hQNSYnMyZZllrUAJqUyzNmuxK72yO4oL1NpodUaf1BJYm3jJrIpoY3MSkGj + ClKhO3uuqhjKE2fEDsoLExn1CtVzlIb1PNKk36C3e8xYgVmbRP0z46IYDBMZtnIknPENxA21CZ2V + sVAtoNMy/VvMudm1cSHT/4xvAM6hsGsVFRpiwU8NN20a3G7+1XYMuRJMEPS7XbbICNDuI2JIWJnV + G0PwEw3kkf2tFHShRb02SRGNhn3K1zfDG0Y6u843AAAA//8DAAL88xtEAwAA + headers: + Access-Control-Expose-Headers: + - CF-Ray + CF-Cache-Status: + - DYNAMIC + CF-Ray: + - + Connection: + - keep-alive + Content-Encoding: + - gzip + Content-Type: + - application/json + Date: + - Wed, 20 May 2026 16:31:54 GMT + Server: + - cloudflare + Strict-Transport-Security: + - max-age=31536000; includeSubDomains; preload + Transfer-Encoding: + - chunked + X-Content-Type-Options: + - nosniff + access-control-expose-headers: + - X-Request-ID + - CF-Ray + alt-svc: + - h3=":443"; ma=86400 + openai-organization: + - + openai-processing-ms: + - '501' + openai-project: + - + openai-version: + - '2020-10-01' + x-openai-proxy-wasm: + - v0.1 + x-ratelimit-limit-requests: + - '10000' + x-ratelimit-limit-tokens: + - '200000' + x-ratelimit-remaining-requests: + - + x-ratelimit-remaining-tokens: + - + x-ratelimit-reset-requests: + - 5m13.091s + x-ratelimit-reset-tokens: + - 216ms + x-request-id: + - + status: + code: 200 + message: OK +version: 1 diff --git a/tests/cassettes/test_nodes/test_critic_avalia_resultado_correto__openai-gpt-4o-mini.yaml b/tests/cassettes/test_nodes/test_critic_avalia_resultado_correto__openai-gpt-4o-mini.yaml new file mode 100644 index 0000000..36e39dd --- /dev/null +++ b/tests/cassettes/test_nodes/test_critic_avalia_resultado_correto__openai-gpt-4o-mini.yaml @@ -0,0 +1,173 @@ +interactions: +- request: + body: "{\"messages\":[{\"content\":\"Voc\xEA \xE9 um revisor de qualidade para + consultas SQL geradas por IA.\\n\\nSua tarefa: avaliar se a consulta SQL e seus + resultados respondem adequadamente\\n\xE0 pergunta original do usu\xE1rio.\\n\\n=== + PERGUNTA DO USU\xC1RIO ===\\nQuantos pedidos existem no banco?\\n\\n=== SCHEMA + DO BANCO ===\\n\\n\\n=== CONVERSA COM O AGENTE (se houver) ===\\nNenhuma\\n\\n=== + SQL GERADA ===\\nSELECT COUNT(*) as total_pedidos FROM orders\\n\\n=== RESULTADO + DA EXECU\xC7\xC3O ===\\nStatus: exec_ok\\nTotal de linhas: 1\\nAmostra dos resultados + (primeiras linhas):\\n[{'total_pedidos': 99441}]\\n\\n=== ERROS (se houver) + ===\\nNenhum\\n\\n=== TENTATIVAS ANTERIORES ===\\nNenhuma tentativa anterior + (esta \xE9 a primeira).\\n\\n=== EXEMPLOS DE AVALIA\xC7\xC3O ===\\n\\n-- EXEMPLO + 1: REPROVADO (escopo incompleto) --\\nPergunta: \\\"Which airport has the least + number of flights?\\\"\\nSQL: SELECT SourceAirport FROM flights GROUP BY SourceAirport + ORDER BY COUNT(*) ASC LIMIT 1\\nResultado: [('AID',)]\\nVEREDITO: REPROVADO\\nRaz\xE3o: + A query conta apenas voos com partida (SourceAirport) e ignora voos com chegada + (DestAirport).\\nO escopo da pergunta \xE9 \\\"flights\\\" em geral \u2014 a + query responde a uma pergunta diferente.\\n\\n-- EXEMPLO 2: REPROVADO (erro + sem\xE2ntico: MIN vs MAX) --\\nPergunta: \\\"Which Asian countries have a population + larger than any country in Africa?\\\"\\nSQL: SELECT Name FROM country WHERE + Continent='Asia' AND Population > (SELECT MAX(Population) FROM country WHERE + Continent='Africa')\\nResultado: [] (vazio)\\nVEREDITO: REPROVADO\\nRaz\xE3o: + \\\"Larger than any country in Africa\\\" significa maior que pelo menos um + pa\xEDs africano (MIN),\\nn\xE3o maior que todos os pa\xEDses africanos (MAX). + A l\xF3gica est\xE1 semanticamente errada.\\n\\n-- EXEMPLO 3: REPROVADO (resultado + vazio suspeito) --\\nPergunta: \\\"Find the last name of students who live in + North Carolina and are not enrolled in any degree.\\\"\\nSQL: SELECT last_name + FROM Students WHERE state_province_county = 'North Carolina' AND ...\\nResultado: + [] (vazio)\\nVEREDITO: REPROVADO\\nRaz\xE3o: Resultado vazio quando a pergunta + espera dados reais \xE9 suspeito. Verifique se o filtro\\nde string corresponde + exatamente ao valor no banco (ex: 'NorthCarolina' vs 'North Carolina').\\n\\n-- + EXEMPLO 4: REPROVADO (JOIN incorreto muda o que est\xE1 sendo contado) --\\nPergunta: + \\\"Find the name of makers that produced some cars in 1970.\\\"\\nSQL: SELECT + DISTINCT Maker FROM car_makers JOIN car_names ON car_makers.Id = car_names.MakeId + JOIN cars_data ON car_names.MakeId = cars_data.Id WHERE cars_data.Year = 1970\\nResultado: + [('chevrolet',), ('buick',)]\\nVEREDITO: REPROVADO\\nRaz\xE3o: O JOIN usa car_names.MakeId + para conectar a cars_data, mas cars_data.Id refere-se\\nao ID do carro, n\xE3o + do fabricante. O caminho correto seria via model_list. Os resultados\\nparecem + plaus\xEDveis mas derivam de uma jun\xE7\xE3o incorreta.\\n\\n-- EXEMPLO 5: + APROVADO (formato diferente, resposta correta) --\\nPergunta: \\\"On average, + when were the transcripts printed?\\\"\\nSQL: SELECT AVG(transcript_date) AS + average_transcript_date FROM Transcripts\\nResultado: [('1989.9333333333334',)]\\nVEREDITO: + APROVADO\\nRaz\xE3o: O resultado \xE9 um n\xFAmero que representa a m\xE9dia + das datas (formato num\xE9rico do SQLite).\\nEmbora n\xE3o seja uma data formatada, + responde corretamente \xE0 pergunta. Diferen\xE7a de\\nrepresenta\xE7\xE3o n\xE3o + \xE9 motivo de reprova\xE7\xE3o.\\n\\n-- EXEMPLO 6: APROVADO (query mais simples + que o gold, resultado equivalente) --\\nPergunta: \\\"Which model of car has + the minimum horsepower?\\\"\\nSQL: SELECT Model FROM car_names JOIN cars_data + ON car_names.MakeId = cars_data.Id WHERE Horsepower = (SELECT MIN(Horsepower) + FROM cars_data) LIMIT 1\\nResultado: [('triumph',)]\\nVEREDITO: APROVADO\\nRaz\xE3o: + A query retorna corretamente o modelo com menor pot\xEAncia. O LIMIT 1 garante + unicidade\\ne o resultado \xE9 semanticamente correto. Aprovar.\\n\\n=== CRIT\xC9RIOS + DE AVALIA\xC7\xC3O ===\\n\\nREPROVE quando houver:\\n- Escopo incompleto: query + cobre apenas parte do que a pergunta pede\\n- Erro sem\xE2ntico: l\xF3gica correta + na forma mas errada no significado (MIN vs MAX, ANY vs ALL)\\n- JOIN incorreto + que altera os dados sendo agregados ou filtrados\\n- Resultado vazio quando + a pergunta claramente espera dados\\n- Filtro com valor literal diferente do + que est\xE1 no banco\\n- M\xE9trica errada (SUM vs AVG, COUNT vs COUNT DISTINCT, + etc.)\\n- Erro de execu\xE7\xE3o SQL\\n\\nAPROVE quando:\\n- O resultado responde + \xE0 pergunta, mesmo com formato ou representa\xE7\xE3o diferente\\n- H\xE1 + colunas extras que n\xE3o prejudicam a resposta\\n- A precis\xE3o num\xE9rica + difere mas o valor est\xE1 correto\\n- A query \xE9 mais simples que o esperado + mas semanticamente equivalente\\n\\nAvalie com rigor sem\xE2ntico. Resultados + que parecem plaus\xEDveis mas derivam de l\xF3gica\\nincorreta devem ser reprovados. + N\xE3o presuma que uma query bem-formada est\xE1 correta.\\n\\nResponda no formato:\\nVEREDITO: + APROVADO ou REPROVADO\\nFEEDBACK: \",\"role\":\"user\"}],\"model\":\"gpt-4o-mini\",\"stream\":false}" + headers: + Accept: + - a + Accept-Encoding: + - g + Connection: + - k + Content-Length: + - '4' + Content-Type: + - a + Cookie: + - _ + Host: + - a + User-Agent: + - O + X-Stainless-Arch: + - x + X-Stainless-Async: + - f + X-Stainless-Lang: + - p + X-Stainless-OS: + - L + X-Stainless-Package-Version: + - '2' + X-Stainless-Raw-Response: + - t + X-Stainless-Runtime: + - C + X-Stainless-Runtime-Version: + - '3' + x-stainless-retry-count: + - '0' + method: POST + uri: https://api.openai.com/v1/chat/completions + response: + body: + string: !!binary | + H4sIAAAAAAAA/6rmUlBQykxRslJQSs5ILEnOLcjRdclINa8oNQYAAAD//41T227TQBB971eM/FJA + SZSYhBDeQhMkBDS9S1wqa7w7cRfsHbO7rrgo/wK/wWt/jLGT1CkUCclayXPmnD07l9np9G31zuDC + VOfx4Zvl+fG3o5GPOjWD04+kwpbVUyw8CobtGlaOMFCtOhiPJ/FkOI5HDVCwprymZWXoDrlbGGu6 + cT8edvvj7uDphn3FRpGXtPfyC/C9OWufVtMXCfc720hB3mNGEtsmSdBxXkci9N74gDZEnRZUbAPZ + xvrF/GQ+e3m2eAbTo5PFxXS2APhgX8zns+fTg1cSBUn2VR4QTo9fw8Hi/PDswaOHEnWOJEiAmj5X + qLEQRQJHvmSxCDc/oCSXVVaSNEPlq5ufzjB4Th0Bg735VZBjCBwwByGUpI1mD5YhRau4I1qBnUUr + dIZrzNnVeZPJcDjowQLIKy4ZNLYWMXVoM9q6W1sKXMvK5yiTWrj6DoSAKeUI++w0Ob/fEU7Boimm + UXNvt1yOlpXHumW2yvMdAK0V93XLm0ZdbpDVbWtyzkrHqf+DGi2l5f4qkQnxMi7SBh+4jBp0Jedl + MwLVna5GIlSUIQn8iZrrBvEgXgtG7ei18JPHG7Ap8C5tPOrco5hoKZjJ/c4YRQrVFemW284cVtrw + DrC38+6/7dynvX67sdn/yLeAUlTKViWlk3FRd5/cpjmqV/Nfabd1bgxHnty17FoSDLm6F5qWKNO0 + 3kP/1QcqEmlYRq50Zr01yzKJY+yroU5HOtpb7f0G9PywcUMEAAA= + headers: + Access-Control-Expose-Headers: + - CF-Ray + CF-Cache-Status: + - DYNAMIC + CF-Ray: + - + Connection: + - keep-alive + Content-Encoding: + - gzip + Content-Type: + - application/json + Date: + - Wed, 20 May 2026 16:32:06 GMT + Server: + - cloudflare + Strict-Transport-Security: + - max-age=31536000; includeSubDomains; preload + Transfer-Encoding: + - chunked + X-Content-Type-Options: + - nosniff + access-control-expose-headers: + - X-Request-ID + - CF-Ray + alt-svc: + - h3=":443"; ma=86400 + openai-organization: + - + openai-processing-ms: + - '1528' + openai-project: + - + openai-version: + - '2020-10-01' + x-openai-proxy-wasm: + - v0.1 + x-ratelimit-limit-requests: + - '10000' + x-ratelimit-limit-tokens: + - '200000' + x-ratelimit-remaining-requests: + - + x-ratelimit-remaining-tokens: + - + x-ratelimit-reset-requests: + - 5m35.928s + x-ratelimit-reset-tokens: + - 355ms + x-request-id: + - + status: + code: 200 + message: OK +version: 1 diff --git a/tests/cassettes/test_nodes/test_planner_com_feedback_revisa__openai-gpt-4o-mini.yaml b/tests/cassettes/test_nodes/test_planner_com_feedback_revisa__openai-gpt-4o-mini.yaml new file mode 100644 index 0000000..242c553 --- /dev/null +++ b/tests/cassettes/test_nodes/test_planner_com_feedback_revisa__openai-gpt-4o-mini.yaml @@ -0,0 +1,153 @@ +interactions: +- request: + body: "{\"messages\":[{\"content\":\"Voc\xEA \xE9 o planejador de um sistema que + transforma perguntas em consultas SQL.\\n\\nSeu papel: analisar a situa\xE7\xE3o + atual e decidir a pr\xF3xima a\xE7\xE3o.\\n\\nContexto atual:\\n- Pergunta do + usu\xE1rio: \\\"Quantos pedidos existem no banco?\\\"\\n\\n- conversa_previa: + Nenhuma\\n\\n- Schema: === SCHEMA SQLITE (INTROSPECCAO REAL) ===\\n\\nTabela: + customers\\n- customer_id: TEXT (PK)\\n- customer_unique_id: TEXT\\n- customer_zip_code_prefix: + INTEGER\\n- customer_city: TEXT\\n- customer_state: TEXT\\n\\nTabela: geolocation\\n- + geolocation_zip_code_prefix: INTEGER\\n- geolocation_lat: REAL\\n- geolocation_lng: + REAL\\n- geolocation_city: TEXT\\n- geolocation_state: TEXT\\n\\nTabela: order_items\\n- + order_id: TEXT (PK)\\n- order_item_id: INTEGER (PK)\\n- product_id: TEXT\\n- + seller_id: TEXT\\n- shipping_limit_date: TEXT\\n- price: REAL\\n- freight_value: + REAL\\n Foreign keys:\\n - seller_id -> sellers.seller_id (on_update=NO ACTION, + on_delete=NO ACTION)\\n - product_id -> products.product_id (on_update=NO ACTION, + on_delete=NO ACTION)\\n - order_id -> orders.order_id (on_update=NO ACTION, + on_delete=NO ACTION)\\n\\nTabela: order_payments\\n- order_id: TEXT (PK)\\n- + payment_sequential: INTEGER (PK)\\n- payment_type: TEXT\\n- payment_installments: + INTEGER\\n- payment_value: REAL\\n Foreign keys:\\n - order_id -> orders.order_id + (on_update=NO ACTION, on_delete=NO ACTION)\\n\\nTabela: order_reviews\\n- review_id: + TEXT (PK)\\n- order_id: TEXT\\n- review_score: INTEGER\\n- review_comment_title: + TEXT\\n- review_comment_message: TEXT\\n- review_creation_date: TEXT\\n- review_answer_timestamp: + TEXT\\n Foreign keys:\\n - order_id -> orders.order_id (on_update=NO ACTION, + on_delete=NO ACTION)\\n\\nTabela: orders\\n- order_id: TEXT (PK)\\n- customer_id: + TEXT\\n- order_status: TEXT\\n- order_purchase_timestamp: TEXT\\n- order_approved_at: + TEXT\\n- order_delivered_carrier_date: TEXT\\n- order_delivered_customer_date: + TEXT\\n- order_estimated_delivery_date: TEXT\\n Foreign keys:\\n - customer_id + -> customers.customer_id (on_update=NO ACTION, on_delete=NO ACTION)\\n\\nTabela: + products\\n- product_id: TEXT (PK)\\n- product_category_name: TEXT\\n- product_name_length: + REAL\\n- product_description_length: REAL\\n- product_photos_qty: REAL\\n- product_weight_g: + REAL\\n- product_length_cm: REAL\\n- product_height_cm: REAL\\n- product_width_cm: + REAL\\n\\nTabela: sellers\\n- seller_id: TEXT (PK)\\n- seller_zip_code_prefix: + INTEGER\\n- seller_city: TEXT\\n- seller_state: TEXT\\n\\n\\n- Feedback do cr\xEDtico: + A SQL retornou dados incorretos, faltou filtrar por status.\\n- Tentativas realizadas: + 1\\n- Status atual: reprovado\\n- Erro anterior: Nenhum\\n\\nAVALIA\xC7\xC3O + CR\xCDTICA:\\nVerifique se a \\\"Pergunta do usu\xE1rio\\\" pode ser respondida + com as tabelas e colunas do Schema.\\nSe houver ambiguidade, conceitos n\xE3o + mapeados no banco de dados, ou se a inten\xE7\xE3o do usu\xE1rio n\xE3o estiver + clara, voc\xEA DEVE pedir mais informa\xE7\xF5es.\\n\\nResponda EXATAMENTE no + formato JSON abaixo, sem formata\xE7\xE3o markdown (```json):\\n{{\\n \\\"decisao\\\": + \\\"escolha_uma_opcao\\\",\\n \\\"pergunta_ao_usuario\\\": \\\"escreva a + pergunta aqui se precisar de ajuda, ou deixe vazio se n\xE3o precisar\\\"\\n}}\\n\\nOp\xE7\xF5es + v\xE1lidas para 'decis\xE3o':\\n- \\\"pronto_codificacao\\\" \u2192 se temos + schema, a pergunta faz sentido e devemos gerar/regenerar SQL\\n- \\\"revisando_estrategia\\\" + \u2192 se o cr\xEDtico reprovou e devemos tentar uma abordagem diferente\\n- + \\\"necessita_ajuda\\\" \u2192 a pergunta n\xE3o \xE9 clara, n\xE3o faz sentido, + falta contexto ou n\xE3o h\xE1 dados no schema para responder.\\n\",\"role\":\"user\"}],\"model\":\"gpt-4o-mini\",\"stream\":false}" + headers: + Accept: + - a + Accept-Encoding: + - g + Connection: + - k + Content-Length: + - '3' + Content-Type: + - a + Cookie: + - _ + Host: + - a + User-Agent: + - O + X-Stainless-Arch: + - x + X-Stainless-Async: + - f + X-Stainless-Lang: + - p + X-Stainless-OS: + - L + X-Stainless-Package-Version: + - '2' + X-Stainless-Raw-Response: + - t + X-Stainless-Runtime: + - C + X-Stainless-Runtime-Version: + - '3' + x-stainless-retry-count: + - '0' + method: POST + uri: https://api.openai.com/v1/chat/completions + response: + body: + string: !!binary | + H4sIAAAAAAAA/6rmUlBQykxRslJQSs5ILEnOLcjRdclINY/yDQcAAAD//4xTTW/bMAy951cIOsdD + 4qawfSqKYqcBw3ZZMcyFoUi0o8YWDVEONhT5Q/sb+2OjP1K7XQf0IkN8fE8kH317//kjYvwl/n5v + 93cuuW2y46ejXPcM3D+CDhfWB43Mg2DRjbD2oAL0qtskyeJsl2y2A9CggbqnVW2Idhg11tko3sS7 + aJNE23RiH9BqIE77wVchnoazr9MZ+MnhzfoSaYBIVcCxSxIHPdZ9RCoiS0G5INczqNEFcEPpT7nr + Q7k0oC0pzDmYSw8nvjiDBVDw3EZlVS7Xl9wWfNW5oAqFRUed8nbifUP957eokF/0VgkDorQ1C3iB + JFow1vRf9IITQkc34munLE03cRrYBggeleAayRpg7k0uc3de1u+h7Ej1M3RdXS8A5RyyFnswTO5h + Qs7Ps6qxaj3u6RVVluwBHQq2jNg/ngsFbOWAnvl8GDzpXoxZslDThiLgEYbn0iQd9eS8CjO6u5rA + wBXWczyLt+s39AoDQdmaFq5KrfQBzEydV0B1xuICWC26/reat7THzq2r3iM/A1pDy0tetJ7N1S87 + ntM89H/K/9KepzwULAn8iVe/CBZ874SBUnX1uL+SflGApmC7KvCtt+MSl22RpVeJ3l+n10quzqu/ + AAAA//8DALqYXOLSAwAA + headers: + Access-Control-Expose-Headers: + - CF-Ray + CF-Cache-Status: + - DYNAMIC + CF-Ray: + - + Connection: + - keep-alive + Content-Encoding: + - gzip + Content-Type: + - application/json + Date: + - Wed, 20 May 2026 16:31:42 GMT + Server: + - cloudflare + Strict-Transport-Security: + - max-age=31536000; includeSubDomains; preload + Transfer-Encoding: + - chunked + X-Content-Type-Options: + - nosniff + access-control-expose-headers: + - X-Request-ID + - CF-Ray + alt-svc: + - h3=":443"; ma=86400 + openai-organization: + - + openai-processing-ms: + - '1120' + openai-project: + - + openai-version: + - '2020-10-01' + x-openai-proxy-wasm: + - v0.1 + x-ratelimit-limit-requests: + - '10000' + x-ratelimit-limit-tokens: + - '200000' + x-ratelimit-remaining-requests: + - + x-ratelimit-remaining-tokens: + - + x-ratelimit-reset-requests: + - 4m16.184s + x-ratelimit-reset-tokens: + - 250ms + x-request-id: + - + status: + code: 200 + message: OK +version: 1 diff --git a/tests/cassettes/test_nodes/test_planner_com_schema_decide_codificar__openai-gpt-4o-mini.yaml b/tests/cassettes/test_nodes/test_planner_com_schema_decide_codificar__openai-gpt-4o-mini.yaml new file mode 100644 index 0000000..6de32c7 --- /dev/null +++ b/tests/cassettes/test_nodes/test_planner_com_schema_decide_codificar__openai-gpt-4o-mini.yaml @@ -0,0 +1,153 @@ +interactions: +- request: + body: "{\"messages\":[{\"content\":\"Voc\xEA \xE9 o planejador de um sistema que + transforma perguntas em consultas SQL.\\n\\nSeu papel: analisar a situa\xE7\xE3o + atual e decidir a pr\xF3xima a\xE7\xE3o.\\n\\nContexto atual:\\n- Pergunta do + usu\xE1rio: \\\"Quantos pedidos existem no banco?\\\"\\n\\n- conversa_previa: + Nenhuma\\n\\n- Schema: === SCHEMA SQLITE (INTROSPECCAO REAL) ===\\n\\nTabela: + customers\\n- customer_id: TEXT (PK)\\n- customer_unique_id: TEXT\\n- customer_zip_code_prefix: + INTEGER\\n- customer_city: TEXT\\n- customer_state: TEXT\\n\\nTabela: geolocation\\n- + geolocation_zip_code_prefix: INTEGER\\n- geolocation_lat: REAL\\n- geolocation_lng: + REAL\\n- geolocation_city: TEXT\\n- geolocation_state: TEXT\\n\\nTabela: order_items\\n- + order_id: TEXT (PK)\\n- order_item_id: INTEGER (PK)\\n- product_id: TEXT\\n- + seller_id: TEXT\\n- shipping_limit_date: TEXT\\n- price: REAL\\n- freight_value: + REAL\\n Foreign keys:\\n - seller_id -> sellers.seller_id (on_update=NO ACTION, + on_delete=NO ACTION)\\n - product_id -> products.product_id (on_update=NO ACTION, + on_delete=NO ACTION)\\n - order_id -> orders.order_id (on_update=NO ACTION, + on_delete=NO ACTION)\\n\\nTabela: order_payments\\n- order_id: TEXT (PK)\\n- + payment_sequential: INTEGER (PK)\\n- payment_type: TEXT\\n- payment_installments: + INTEGER\\n- payment_value: REAL\\n Foreign keys:\\n - order_id -> orders.order_id + (on_update=NO ACTION, on_delete=NO ACTION)\\n\\nTabela: order_reviews\\n- review_id: + TEXT (PK)\\n- order_id: TEXT\\n- review_score: INTEGER\\n- review_comment_title: + TEXT\\n- review_comment_message: TEXT\\n- review_creation_date: TEXT\\n- review_answer_timestamp: + TEXT\\n Foreign keys:\\n - order_id -> orders.order_id (on_update=NO ACTION, + on_delete=NO ACTION)\\n\\nTabela: orders\\n- order_id: TEXT (PK)\\n- customer_id: + TEXT\\n- order_status: TEXT\\n- order_purchase_timestamp: TEXT\\n- order_approved_at: + TEXT\\n- order_delivered_carrier_date: TEXT\\n- order_delivered_customer_date: + TEXT\\n- order_estimated_delivery_date: TEXT\\n Foreign keys:\\n - customer_id + -> customers.customer_id (on_update=NO ACTION, on_delete=NO ACTION)\\n\\nTabela: + products\\n- product_id: TEXT (PK)\\n- product_category_name: TEXT\\n- product_name_length: + REAL\\n- product_description_length: REAL\\n- product_photos_qty: REAL\\n- product_weight_g: + REAL\\n- product_length_cm: REAL\\n- product_height_cm: REAL\\n- product_width_cm: + REAL\\n\\nTabela: sellers\\n- seller_id: TEXT (PK)\\n- seller_zip_code_prefix: + INTEGER\\n- seller_city: TEXT\\n- seller_state: TEXT\\n\\n\\n- Feedback do cr\xEDtico: + Nenhum\\n- Tentativas realizadas: 0\\n- Status atual: schema_obtido\\n- Erro + anterior: Nenhum\\n\\nAVALIA\xC7\xC3O CR\xCDTICA:\\nVerifique se a \\\"Pergunta + do usu\xE1rio\\\" pode ser respondida com as tabelas e colunas do Schema.\\nSe + houver ambiguidade, conceitos n\xE3o mapeados no banco de dados, ou se a inten\xE7\xE3o + do usu\xE1rio n\xE3o estiver clara, voc\xEA DEVE pedir mais informa\xE7\xF5es.\\n\\nResponda + EXATAMENTE no formato JSON abaixo, sem formata\xE7\xE3o markdown (```json):\\n{{\\n + \ \\\"decisao\\\": \\\"escolha_uma_opcao\\\",\\n \\\"pergunta_ao_usuario\\\": + \\\"escreva a pergunta aqui se precisar de ajuda, ou deixe vazio se n\xE3o precisar\\\"\\n}}\\n\\nOp\xE7\xF5es + v\xE1lidas para 'decis\xE3o':\\n- \\\"pronto_codificacao\\\" \u2192 se temos + schema, a pergunta faz sentido e devemos gerar/regenerar SQL\\n- \\\"revisando_estrategia\\\" + \u2192 se o cr\xEDtico reprovou e devemos tentar uma abordagem diferente\\n- + \\\"necessita_ajuda\\\" \u2192 a pergunta n\xE3o \xE9 clara, n\xE3o faz sentido, + falta contexto ou n\xE3o h\xE1 dados no schema para responder.\\n\",\"role\":\"user\"}],\"model\":\"gpt-4o-mini\",\"stream\":false}" + headers: + Accept: + - a + Accept-Encoding: + - g + Connection: + - k + Content-Length: + - '3' + Content-Type: + - a + Host: + - a + User-Agent: + - O + X-Stainless-Arch: + - x + X-Stainless-Async: + - f + X-Stainless-Lang: + - p + X-Stainless-OS: + - L + X-Stainless-Package-Version: + - '2' + X-Stainless-Raw-Response: + - t + X-Stainless-Runtime: + - C + X-Stainless-Runtime-Version: + - '3' + x-stainless-retry-count: + - '0' + method: POST + uri: https://api.openai.com/v1/chat/completions + response: + body: + string: !!binary | + H4sIAAAAAAAA/6rmUlBQykxRslJQSs5ILEnOLcjRdclINQ8pdgIAAAD//4xSTWvjMBC951cUne0l + TZPYPhfK3rqnHtoUoUhjR1tbo9XIZZeS/74jO6ndLygGgd+bN5r3NLc39d3N+r67/hO34emu+HW/ + /GluRZYUuP8NOp5VPzSyDqJFN9I6gIqQul4WRbWq1ttqMxAdGmiTrPExX2PeWWfz1XK1zpdFflme + 1Ae0GojLHvj34uJlONOczsBfhpfZGemASDXA2LmIwYBtQoQishSViyKbSI0ughtGf9m5BO2EAW1J + 4Y7BnfCBK1BqNLa2WumEZ+dKD6HpXVRSoeypV8GeVPy54/yeAHVPKnl1fdvOCOUcRpWyGhw+npjj + q6cWG55hT++kouas6CA5WuKceX6K6MXAHvl8HLLr38SRzHQ+yohPMFxXbrdjPzE92cSurk5k5Anb + maqssk/6SQNR2ZZm6QsO7ABmkk5PpXpjcUYsZq4/TvNZ79G5dc132k+E1uB5GaUPYKx+63gqC5A2 + +quy15SHgQVBeOYVldFCSC9hoFZ9O+6ZoH8UoZP8XA0EH+y4bLWXVXlV6P2m3CixOC7+AwAA//8D + AOO2dWF6AwAA + headers: + Access-Control-Expose-Headers: + - CF-Ray + CF-Cache-Status: + - DYNAMIC + CF-Ray: + - + Connection: + - keep-alive + Content-Encoding: + - gzip + Content-Type: + - application/json + Date: + - Wed, 20 May 2026 16:31:36 GMT + Server: + - cloudflare + Strict-Transport-Security: + - max-age=31536000; includeSubDomains; preload + Transfer-Encoding: + - chunked + X-Content-Type-Options: + - nosniff + access-control-expose-headers: + - X-Request-ID + - CF-Ray + alt-svc: + - h3=":443"; ma=86400 + openai-organization: + - + openai-processing-ms: + - '911' + openai-project: + - + openai-version: + - '2020-10-01' + set-cookie: + - __cf_bm=2XGZi8XxCTtNcmNd0CGshkXoSLHNL3wEniErjFXfAWA-1779294695.1340203-1.0.1.1-Xg7.x.7HfMzJJXbSFY0DXW2lNFfqHY4X7F2fOj4R.k4X.Z6Lc3T3.yyA8ysFjZBxWlcAUiKyD1L4tWsnBtSTyq8nheJaruWP_fWjBa8610VZPMIbs.KuYoEuJ5P0mo9F; + HttpOnly; SameSite=None; Secure; Path=/; Domain=api.openai.com; Expires=Wed, + 20 May 2026 17:01:36 GMT + x-openai-proxy-wasm: + - v0.1 + x-ratelimit-limit-requests: + - '10000' + x-ratelimit-limit-tokens: + - '200000' + x-ratelimit-remaining-requests: + - + x-ratelimit-remaining-tokens: + - + x-ratelimit-reset-requests: + - 3m47.483s + x-ratelimit-reset-tokens: + - 247ms + x-request-id: + - + status: + code: 200 + message: OK +version: 1 diff --git a/tests/cassettes/test_nodes/test_planner_pergunta_fora_de_escopo__openai-gpt-4o-mini.yaml b/tests/cassettes/test_nodes/test_planner_pergunta_fora_de_escopo__openai-gpt-4o-mini.yaml new file mode 100644 index 0000000..eb51ff5 --- /dev/null +++ b/tests/cassettes/test_nodes/test_planner_pergunta_fora_de_escopo__openai-gpt-4o-mini.yaml @@ -0,0 +1,152 @@ +interactions: +- request: + body: "{\"messages\":[{\"content\":\"Voc\xEA \xE9 o planejador de um sistema que + transforma perguntas em consultas SQL.\\n\\nSeu papel: analisar a situa\xE7\xE3o + atual e decidir a pr\xF3xima a\xE7\xE3o.\\n\\nContexto atual:\\n- Pergunta do + usu\xE1rio: \\\"Quantas vezes a Ahri ganhou o CBLOL?\\\"\\n\\n- conversa_previa: + Nenhuma\\n\\n- Schema: === SCHEMA SQLITE (INTROSPECCAO REAL) ===\\n\\nTabela: + customers\\n- customer_id: TEXT (PK)\\n- customer_unique_id: TEXT\\n- customer_zip_code_prefix: + INTEGER\\n- customer_city: TEXT\\n- customer_state: TEXT\\n\\nTabela: geolocation\\n- + geolocation_zip_code_prefix: INTEGER\\n- geolocation_lat: REAL\\n- geolocation_lng: + REAL\\n- geolocation_city: TEXT\\n- geolocation_state: TEXT\\n\\nTabela: order_items\\n- + order_id: TEXT (PK)\\n- order_item_id: INTEGER (PK)\\n- product_id: TEXT\\n- + seller_id: TEXT\\n- shipping_limit_date: TEXT\\n- price: REAL\\n- freight_value: + REAL\\n Foreign keys:\\n - seller_id -> sellers.seller_id (on_update=NO ACTION, + on_delete=NO ACTION)\\n - product_id -> products.product_id (on_update=NO ACTION, + on_delete=NO ACTION)\\n - order_id -> orders.order_id (on_update=NO ACTION, + on_delete=NO ACTION)\\n\\nTabela: order_payments\\n- order_id: TEXT (PK)\\n- + payment_sequential: INTEGER (PK)\\n- payment_type: TEXT\\n- payment_installments: + INTEGER\\n- payment_value: REAL\\n Foreign keys:\\n - order_id -> orders.order_id + (on_update=NO ACTION, on_delete=NO ACTION)\\n\\nTabela: order_reviews\\n- review_id: + TEXT (PK)\\n- order_id: TEXT\\n- review_score: INTEGER\\n- review_comment_title: + TEXT\\n- review_comment_message: TEXT\\n- review_creation_date: TEXT\\n- review_answer_timestamp: + TEXT\\n Foreign keys:\\n - order_id -> orders.order_id (on_update=NO ACTION, + on_delete=NO ACTION)\\n\\nTabela: orders\\n- order_id: TEXT (PK)\\n- customer_id: + TEXT\\n- order_status: TEXT\\n- order_purchase_timestamp: TEXT\\n- order_approved_at: + TEXT\\n- order_delivered_carrier_date: TEXT\\n- order_delivered_customer_date: + TEXT\\n- order_estimated_delivery_date: TEXT\\n Foreign keys:\\n - customer_id + -> customers.customer_id (on_update=NO ACTION, on_delete=NO ACTION)\\n\\nTabela: + products\\n- product_id: TEXT (PK)\\n- product_category_name: TEXT\\n- product_name_length: + REAL\\n- product_description_length: REAL\\n- product_photos_qty: REAL\\n- product_weight_g: + REAL\\n- product_length_cm: REAL\\n- product_height_cm: REAL\\n- product_width_cm: + REAL\\n\\nTabela: sellers\\n- seller_id: TEXT (PK)\\n- seller_zip_code_prefix: + INTEGER\\n- seller_city: TEXT\\n- seller_state: TEXT\\n\\n\\n- Feedback do cr\xEDtico: + Nenhum\\n- Tentativas realizadas: 0\\n- Status atual: schema_obtido\\n- Erro + anterior: Nenhum\\n\\nAVALIA\xC7\xC3O CR\xCDTICA:\\nVerifique se a \\\"Pergunta + do usu\xE1rio\\\" pode ser respondida com as tabelas e colunas do Schema.\\nSe + houver ambiguidade, conceitos n\xE3o mapeados no banco de dados, ou se a inten\xE7\xE3o + do usu\xE1rio n\xE3o estiver clara, voc\xEA DEVE pedir mais informa\xE7\xF5es.\\n\\nResponda + EXATAMENTE no formato JSON abaixo, sem formata\xE7\xE3o markdown (```json):\\n{{\\n + \ \\\"decisao\\\": \\\"escolha_uma_opcao\\\",\\n \\\"pergunta_ao_usuario\\\": + \\\"escreva a pergunta aqui se precisar de ajuda, ou deixe vazio se n\xE3o precisar\\\"\\n}}\\n\\nOp\xE7\xF5es + v\xE1lidas para 'decis\xE3o':\\n- \\\"pronto_codificacao\\\" \u2192 se temos + schema, a pergunta faz sentido e devemos gerar/regenerar SQL\\n- \\\"revisando_estrategia\\\" + \u2192 se o cr\xEDtico reprovou e devemos tentar uma abordagem diferente\\n- + \\\"necessita_ajuda\\\" \u2192 a pergunta n\xE3o \xE9 clara, n\xE3o faz sentido, + falta contexto ou n\xE3o h\xE1 dados no schema para responder.\\n\",\"role\":\"user\"}],\"model\":\"gpt-4o-mini\",\"stream\":false}" + headers: + Accept: + - a + Accept-Encoding: + - g + Connection: + - k + Content-Length: + - '3' + Content-Type: + - a + Cookie: + - _ + Host: + - a + User-Agent: + - O + X-Stainless-Arch: + - x + X-Stainless-Async: + - f + X-Stainless-Lang: + - p + X-Stainless-OS: + - L + X-Stainless-Package-Version: + - '2' + X-Stainless-Raw-Response: + - t + X-Stainless-Runtime: + - C + X-Stainless-Runtime-Version: + - '3' + x-stainless-retry-count: + - '0' + method: POST + uri: https://api.openai.com/v1/chat/completions + response: + body: + string: !!binary | + H4sIAAAAAAAA/4xTy27bMBC8+ysIXnKxCtl1K7uXAn0cCqQtemhzqAJhTa5kJhTJcqkgaeAfKtCv + yI91JduR3QfQCyHN7Cx3d5b3EyGk0fKFkGoDSbXBZm82WNRxtn562364uNPv337XC/yE8ebi3edr + Oe0Vfn2FKh1UT5RnHSbj3Y5WESFhn3VWFKv5alHkxUC0XqPtZU1I2cJnrXEmm+fzRZYX2Wy5V2+8 + UUgc9pV/hbgfzr5Op/GW4Xx6QFokggYZOwQxGL3tEQlEhhK4JKcjqbxL6IbS70vXQ6XUqAyBLxks + pUO+m0yCCq46DaWcHsICxqZzPeGrjjqIZi/54tXDDxG4N4GkLEROEYUX3zoUZBpnaqNAnL1+df7x + /Eyg4HF5YYi8IBQRLSieHPSwABIJ1gzREGc7x1/aUPDu4ecNGhKOVWqDLbwsZem2x71FrDuCfr6u + s/aIAOd8gt6fYaqXe2b7OEfrmxD9mn6Typr9oU3FdhJ7yzOj5IMc2C2fl4Nf3YkFkhO1IVXJX+Nw + 3bLY+yXHNRnZZ7M9mbhCO+Kr+YE4yVdpTGAsHTkuFfA89Cgd1wM6bfwRMTnq+s9q/pZ717lxzf+k + HwmlMPADqEJEbdRpx2NYxP4V/SvsccpDwZL4AfKzqJLB2DuhsYbO7nZb0h0lbCu2q8EYotkteB2q + +fJ5gbM8r9dysp38AgAA//8DABmaEc/uAwAA + headers: + Access-Control-Expose-Headers: + - CF-Ray + CF-Cache-Status: + - DYNAMIC + CF-Ray: + - + Connection: + - keep-alive + Content-Encoding: + - gzip + Content-Type: + - application/json + Date: + - Wed, 20 May 2026 16:31:48 GMT + Server: + - cloudflare + Strict-Transport-Security: + - max-age=31536000; includeSubDomains; preload + Transfer-Encoding: + - chunked + X-Content-Type-Options: + - nosniff + access-control-expose-headers: + - X-Request-ID + - CF-Ray + alt-svc: + - h3=":443"; ma=86400 + openai-organization: + - + openai-processing-ms: + - '1363' + openai-project: + - + openai-version: + - '2020-10-01' + x-openai-proxy-wasm: + - v0.1 + x-ratelimit-limit-requests: + - '10000' + x-ratelimit-limit-tokens: + - '200000' + x-ratelimit-remaining-requests: + - + x-ratelimit-remaining-tokens: + - + x-ratelimit-reset-requests: + - 4m44.741s + x-ratelimit-reset-tokens: + - 247ms + x-request-id: + - + status: + code: 200 + message: OK +version: 1 diff --git a/tests/conftest.py b/tests/conftest.py index 65faed1..45df2ab 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,8 +1,17 @@ +import os import re +from dataclasses import dataclass import pytest +from dotenv import load_dotenv + +load_dotenv() REDACTED = "" +DEFAULT_GEMINI_MODEL = "gemini-2.5-flash" +DEFAULT_OPENAI_MODEL = "gpt-4o-mini" +TEST_PROVIDER_ENV = "TEXT_TO_INSIGHT_TEST_PROVIDER" +TEST_MODEL_ENV = "TEXT_TO_INSIGHT_TEST_MODEL" # Cobrem query string (key=...), payload JSON e eventuais tokens em texto. SENSITIVE_TEXT_PATTERNS = [ @@ -20,6 +29,15 @@ } +@dataclass(frozen=True) +class TestLLMProfile: + provider: str + model_name: str + api_key: str + api_key_env: str + cassette_suffix: str + + def _mask_sensitive_text(text: str) -> str: """Aplica mascaramento de segredos em strings gravadas pelo VCR.""" masked = text @@ -31,6 +49,96 @@ def _mask_sensitive_text(text: str) -> str: return masked +def _slugify_for_filename(value: str) -> str: + slug = re.sub(r"[^a-z0-9._-]+", "-", value.lower()).strip("-") + return slug or "default" + + +def _normalize_provider(provider: str) -> str: + normalized = provider.strip().lower() + if normalized in {"openai", "gpt"}: + return "openai" + if normalized in {"google", "gemini"}: + return "google" + pytest.skip(f"Provider de teste '{provider}' inválido. Use 'google' ou 'openai'.") + + +def _profile_for_provider(provider: str, model_name: str) -> TestLLMProfile: + api_key_env = "OPENAI_API_KEY" if provider == "openai" else "GOOGLE_API_KEY" + api_key = os.getenv(api_key_env) + if not api_key: + pytest.skip( + f"{api_key_env} não encontrada. Defina a chave para executar os testes com provider {provider}." + ) + + cassette_suffix = "" + if not (provider == "google" and model_name == DEFAULT_GEMINI_MODEL): + cassette_suffix = f"__{provider}-{_slugify_for_filename(model_name)}" + + return TestLLMProfile( + provider=provider, + model_name=model_name, + api_key=api_key, + api_key_env=api_key_env, + cassette_suffix=cassette_suffix, + ) + + +def resolve_test_llm_profile() -> TestLLMProfile: + explicit_provider = os.getenv(TEST_PROVIDER_ENV) + explicit_model = os.getenv(TEST_MODEL_ENV) + + if explicit_model: + model_name = explicit_model.strip() + lowered = model_name.lower() + + if explicit_provider: + provider = _normalize_provider(explicit_provider) + elif "gpt" in lowered or "openai" in lowered: + provider = "openai" + elif "gemini" in lowered: + provider = "google" + else: + pytest.skip( + f"Não foi possível inferir o provider a partir de {TEST_MODEL_ENV}={model_name!r}. Defina também {TEST_PROVIDER_ENV}." + ) + + return _profile_for_provider(provider, model_name) + + if explicit_provider: + provider = _normalize_provider(explicit_provider) + model_name = DEFAULT_OPENAI_MODEL if provider == "openai" else DEFAULT_GEMINI_MODEL + return _profile_for_provider(provider, model_name) + + google_key = os.getenv("GOOGLE_API_KEY") + if google_key: + return TestLLMProfile( + provider="google", + model_name=DEFAULT_GEMINI_MODEL, + api_key=google_key, + api_key_env="GOOGLE_API_KEY", + cassette_suffix="", + ) + + openai_key = os.getenv("OPENAI_API_KEY") + if openai_key: + return _profile_for_provider("openai", DEFAULT_OPENAI_MODEL) + + pytest.skip("Defina GOOGLE_API_KEY ou OPENAI_API_KEY para executar os testes de API real.") + + +def _default_cassette_name(test_class, test_name: str) -> str: + if test_class: + cassette_name = f"{test_class.__name__}.{test_name}" + else: + cassette_name = test_name + + for ch in ["<", ">", "?", "%", "*", ":", "|", '"', "'", "/", "\\"]: + cassette_name = cassette_name.replace(ch, "-") + + return cassette_name + + def _sanitize_vcr_request(request): """Censura dados sensíveis antes do request ser gravado em cassette.""" # 1) Headers (lista de valores por header) @@ -55,6 +163,23 @@ def _sanitize_vcr_request(request): return request +SENSITIVE_RESPONSE_HEADERS = { + "openai-organization", + "openai-project", + "x-request-id", + "cf-ray", # Cloudflare Ray ID (fingerprint da requisição) + "x-ratelimit-remaining-requests", # opcional: não vaza key mas revela uso + "x-ratelimit-remaining-tokens", +} + +def _sanitize_vcr_response(response): + """Censura headers sensíveis da resposta antes de gravar no cassette.""" + headers = response.get("headers", {}) + for header_name in list(headers.keys()): + if header_name.lower() in SENSITIVE_RESPONSE_HEADERS: + headers[header_name] = [REDACTED] + return response + @pytest.fixture(scope="module") def vcr_config(): @@ -69,4 +194,31 @@ def vcr_config(): "filter_query_parameters": ["key", "api_key", "api-key", "access_token", "token"], # Sanitização extra para payload JSON/URI em formatos não cobertos pelos filtros nativos. "before_record_request": _sanitize_vcr_request, - } \ No newline at end of file + "before_record_response": _sanitize_vcr_response, + } + + +@pytest.fixture(scope="session") +def llm_profile(): + return resolve_test_llm_profile() + + +@pytest.fixture(scope="session") +def llm(llm_profile): + from text_to_insight.model_selection import get_model + + return get_model(llm_profile.model_name, llm_profile.api_key) + + +@pytest.fixture +def default_cassette_name(request, llm_profile): + marker = request.node.get_closest_marker("default_cassette") + if marker is not None: + assert marker.args, ( + "You should pass the cassette name as an argument to the `pytest.mark.default_cassette` marker" + ) + base_name = marker.args[0] + else: + base_name = _default_cassette_name(request.cls, request.node.name) + + return f"{base_name}{llm_profile.cassette_suffix}" \ No newline at end of file diff --git a/tests/test_integracao.py b/tests/test_integracao.py index 11820df..5de36cc 100644 --- a/tests/test_integracao.py +++ b/tests/test_integracao.py @@ -1,5 +1,5 @@ """ -Testes de integração do Text-to-Insight (usa API real do Gemini + banco real). +Testes de integração do Text-to-Insight (usa API real + banco real). Estes testes validam o pipeline completo: pergunta → planner → schema → code agent → executor → critic → resposta. @@ -8,6 +8,7 @@ import os import sys import time +from pathlib import Path import pytest from dotenv import load_dotenv @@ -19,15 +20,11 @@ load_dotenv() @pytest.fixture -def grafo(): - """Retorna o grafo compilado.""" - api_key = os.getenv("GOOGLE_API_KEY") #como estamos usando vcr, não haverá mais requisição direta, apenas repetição - #do primeiro resultado da requisição, é possível verificar isso em test/cassettes - if not api_key: - pytest.skip("Variável GOOGLE_API_KEY não encontrada. Pulando testes de integração.") - +def grafo(llm_profile): + """Retorna o grafo compilado com o provider de teste configurado no ambiente.""" from text_to_insight.graph import Graph - return Graph(api_key, "gemini-2.5-flash", hitl=True) + + return Graph(llm_profile.api_key, llm_profile.model_name, hitl=True) @pytest.fixture(autouse=True) @@ -126,4 +123,112 @@ def test_hitl_nova_pergunta_substitui(grafo): assert resultado_final.get("pergunta_original") == "Quem e o Brad Pitt?" assert resultado_final.get("pergunta_atual") == "Quero saber quantos clientes existem" - assert "Brad Pitt" not in str(resultado_final.get("resposta_natural", "")) \ No newline at end of file + assert "Brad Pitt" not in str(resultado_final.get("resposta_natural", "")) + + +# ============================================================ +# INTEGRAÇÃO — fluxo de gráficos (sem LLM real) +# ============================================================ + +"""Testes de integração focados no fluxo de geração de gráficos, usando mocks para o LLM e nós do grafo.""" + +def _montar_grafo_fake(monkeypatch, tmp_path, enable_graphs: bool): + import text_to_insight.graph as graph_module + import text_to_insight.model_selection as model_selection + from text_to_insight.nodes import csv_saver as csv_module + + def _fake_get_model(model, api_key): + return object() + + monkeypatch.setattr(model_selection, "get_model", _fake_get_model) + monkeypatch.setattr(graph_module, "get_model", _fake_get_model) + + monkeypatch.setattr(csv_module, "RESULTS_DIR", tmp_path / "results") + + def _fake_planejador(estado, llm=None, hitl=True): + return {"status": "pronto_codificacao"} + + def _fake_agente_codigo(estado, llm=None): + return {"sql_gerada": "SELECT 1", "status": "sql_gerada", "tentativas_loop": 1} + + def _fake_sandbox(estado): + return { + "status": "exec_ok", + "linhas_resultado_preview": [{"valor": 1}, {"valor": 2}], + "linhas_resultado_completo": [{"valor": 1}, {"valor": 2}], + "total_linhas_resultado": 2, + "saida_terminal": "ok", + } + + def _fake_critico(estado, llm=None): + return {"status": "aprovado", "feedback_critico": "Aprovado"} + + def _fake_resposta(estado, llm=None): + return {"resposta_natural": "ok"} + + def _fake_gerador_grafico(estado, llm=None): + graphs_dir = tmp_path / "graphs" + graphs_dir.mkdir(exist_ok=True) + output_path = graphs_dir / "grafico_teste.png" + output_path.write_bytes(b"fakepng") + return {"grafico_gerado": True, "caminho_grafico": str(output_path)} + + def _fake_roteador_planejador(estado): + return "agente_codigo" + + def _fake_roteador_sandbox(estado): + return "critico" + + def _fake_roteador_grafico(estado, llm=None): + return "gerador_grafico" + + monkeypatch.setattr(graph_module, "nos_nodo_planejador", _fake_planejador) + monkeypatch.setattr(graph_module, "nos_nodo_agente_codigo", _fake_agente_codigo) + monkeypatch.setattr(graph_module, "nos_nodo_sandbox", _fake_sandbox) + monkeypatch.setattr(graph_module, "nos_nodo_critico", _fake_critico) + monkeypatch.setattr(graph_module, "nos_nodo_resposta", _fake_resposta) + monkeypatch.setattr(graph_module, "nos_nodo_gerador_grafico", _fake_gerador_grafico) + monkeypatch.setattr(graph_module, "nos_nodo_salvar_csv", csv_module.nos_nodo_salvar_csv) + monkeypatch.setattr(graph_module, "roteador_planejador", _fake_roteador_planejador) + monkeypatch.setattr(graph_module, "roteador_sandbox", _fake_roteador_sandbox) + monkeypatch.setattr(graph_module, "roteador_grafico", _fake_roteador_grafico) + + return graph_module.Graph(api_key="fake", model="fake", hitl=False, enable_graphs=enable_graphs) + + +def test_grafo_com_graficos_gera_csv_e_png(monkeypatch, tmp_path): + grafo = _montar_grafo_fake(monkeypatch, tmp_path, enable_graphs=True) + config = {"configurable": {"thread_id": "grafo_graficos_true"}} + + estado = { + "pergunta_original": "Teste", + "pergunta_atual": "Teste", + "db_path": "fake.db", + } + resultado = grafo.grafo_text_to_insight.invoke(estado, config) + + csv_path = resultado.get("caminho_csv_resultado", "") + assert csv_path + assert Path(csv_path).exists() + + assert resultado.get("grafico_gerado") is True + grafico_path = resultado.get("caminho_grafico", "") + assert grafico_path + assert Path(grafico_path).exists() + assert Path(grafico_path).stat().st_size > 0 + + +def test_grafo_sem_graficos_bypassa(monkeypatch, tmp_path): + grafo = _montar_grafo_fake(monkeypatch, tmp_path, enable_graphs=False) + config = {"configurable": {"thread_id": "grafo_graficos_false"}} + + estado = { + "pergunta_original": "Teste", + "pergunta_atual": "Teste", + "db_path": "fake.db", + } + resultado = grafo.grafo_text_to_insight.invoke(estado, config) + + assert resultado.get("caminho_csv_resultado", "") == "" + assert resultado.get("grafico_gerado", False) is False + assert resultado.get("caminho_grafico", "") == "" \ No newline at end of file diff --git a/tests/test_nodes.py b/tests/test_nodes.py index 1934363..aaf0729 100644 --- a/tests/test_nodes.py +++ b/tests/test_nodes.py @@ -4,16 +4,18 @@ Cada teste exercita UM ÚNICO nó isoladamente, com estado construído manualmente. Se um teste falha, você sabe exatamente qual nó quebrou. -Usa API real do Gemini — requer GOOGLE_API_KEY e quota disponível. +Usa API real — requer GOOGLE_API_KEY ou OPENAI_API_KEY e quota disponível. Executa: pytest tests/test_nodes.py -v -s """ import os import sys import time +import csv +import subprocess +from pathlib import Path import pytest from dotenv import load_dotenv -from langchain_google_genai import ChatGoogleGenerativeAI load_dotenv() @@ -21,19 +23,6 @@ DB_PATH = os.path.join(os.path.dirname(__file__), "..", "data", "olist_relational.db") -@pytest.fixture -def llm(): - """Retorna uma instância real do Gemini para os testes dos nós.""" #como estamos usando vcr, não haverá mais requisição direta, apenas repetição - #do primeiro resultado da requisição, é possível verificar isso em test/cassettes - api_key = os.getenv("GOOGLE_API_KEY") - if not api_key: - pytest.skip("GOOGLE_API_KEY não encontrada no .env. Pulando teste.") - - return ChatGoogleGenerativeAI( - model="gemini-2.5-flash", - google_api_key=api_key - ) - def _obter_schema_real() -> str: """Helper: extrai schema real do olist DB (sem API, só SQLite).""" from text_to_insight.nodes.schema import nos_nodo_esquema @@ -294,3 +283,250 @@ def test_cadeia_code_agent_executor(llm): assert resultado_exec["status"] == "exec_ok" assert resultado_exec["total_linhas_resultado"] >= 1 + + +# ============================================================ +# GRÁFICOS — helpers determinísticos +# ============================================================ + +def test_extrair_codigo_python_com_markdown(): + from text_to_insight.nodes import graph_generator as gg + + # Extract only the code block, without markdown wrappers. + texto = """```python +print('ok') +```""" + assert gg._extrair_codigo_python(texto) == "print('ok')" + + +def test_extrair_codigo_python_sem_markdown(): + from text_to_insight.nodes import graph_generator as gg + + # Fallback path: when no markdown is present, return the original string. + texto = "print('ok')" + assert gg._extrair_codigo_python(texto) == "print('ok')" + + +def test_construir_script_inclui_csv_e_saida(): + from text_to_insight.nodes import graph_generator as gg + + # Validate that the script includes the CSV load, plotting code, and output path. + script = gg._construir_script("/tmp/dados.csv", "/tmp/saida.png", "plt.plot([1],[2])") + assert "pd.read_csv(\"/tmp/dados.csv\")" in script + assert "plt.savefig(\"/tmp/saida.png\"" in script + assert "plt.plot([1],[2])" in script + assert "matplotlib.use('Agg')" in script + + +# ============================================================ +# GRÁFICOS — csv_saver +# ============================================================ + +def test_csv_saver_sem_linhas(tmp_path, monkeypatch): + from text_to_insight.nodes import csv_saver as csv_module + + # Redirect output to tmp_path and ensure empty input returns no file. + monkeypatch.setattr(csv_module, "RESULTS_DIR", tmp_path / "results") + resultado = csv_module.nos_nodo_salvar_csv({"linhas_resultado_completo": []}) + + assert resultado["caminho_csv_resultado"] == "" + + +def test_csv_saver_cria_arquivo(tmp_path, monkeypatch): + from text_to_insight.nodes import csv_saver as csv_module + + # Redirect output to tmp_path and validate header/row count and UTF-8 data. + monkeypatch.setattr(csv_module, "RESULTS_DIR", tmp_path / "results") + linhas = [ + {"categoria": "café", "valor": 10}, + {"categoria": "açaí", "valor": 12}, + ] + + resultado = csv_module.nos_nodo_salvar_csv({"linhas_resultado_completo": linhas}) + caminho = Path(resultado["caminho_csv_resultado"]) + + assert caminho.exists() + with caminho.open("r", encoding="utf-8", newline="") as f: + reader = csv.DictReader(f) + rows = list(reader) + + assert reader.fieldnames == ["categoria", "valor"] + assert len(rows) == 2 + assert rows[0]["categoria"] == "café" + + +# ============================================================ +# GRÁFICOS — roteador_grafico (determinístico) +# ============================================================ + +class _FakeResponse: + def __init__(self, content: str): + self.content = content + self.usage_metadata = {"input_tokens": 0, "output_tokens": 0, "total_tokens": 0} + + +class _FakeLLM: + def __init__(self, content: str): + self._content = content + self.called = False + + def invoke(self, prompt: str): + # Use a deterministic response without caring about the prompt text. + self.called = True + return _FakeResponse(self._content) + + +class _NoCallLLM: + def invoke(self, prompt: str): + # If invoked, the test should fail because this path should short-circuit. + raise AssertionError("LLM não deveria ser chamado") + + +class _ErrorLLM: + def invoke(self, prompt: str): + # Force the fallback path by raising an exception. + raise RuntimeError("boom") + + +def test_roteador_grafico_sem_csv_nao_chama_llm(): + from text_to_insight.routers.edges import roteador_grafico + + # Missing CSV should bypass LLM and return resposta. + estado = { + "pergunta_usuario": "Teste", + "linhas_resultado_preview": [{"a": 1}], + "total_linhas_resultado": 2, + "caminho_csv_resultado": "", + } + assert roteador_grafico(estado, _NoCallLLM()) == "resposta" + + +def test_roteador_grafico_uma_linha_nao_chama_llm(): + from text_to_insight.routers.edges import roteador_grafico + + # Single-row results should not trigger visualization. + estado = { + "pergunta_usuario": "Teste", + "linhas_resultado_preview": [{"a": 1}], + "total_linhas_resultado": 1, + "caminho_csv_resultado": "/tmp/resultado.csv", + } + assert roteador_grafico(estado, _NoCallLLM()) == "resposta" + + +def test_roteador_grafico_fallback_em_erro(): + from text_to_insight.routers.edges import roteador_grafico + + # Any LLM failure should fall back to resposta. + estado = { + "pergunta_usuario": "Teste", + "linhas_resultado_preview": [{"a": 1}, {"a": 2}], + "total_linhas_resultado": 2, + "caminho_csv_resultado": "/tmp/resultado.csv", + } + assert roteador_grafico(estado, _ErrorLLM()) == "resposta" + + +def test_roteador_grafico_decisao_sim(): + from text_to_insight.routers.edges import roteador_grafico + + # Deterministic positive decision from fake LLM. + llm = _FakeLLM("SIM") + estado = { + "pergunta_usuario": "Teste", + "linhas_resultado_preview": [{"a": 1}, {"a": 2}], + "total_linhas_resultado": 2, + "caminho_csv_resultado": "/tmp/resultado.csv", + } + assert roteador_grafico(estado, llm) == "gerador_grafico" + assert llm.called is True + + +def test_roteador_grafico_decisao_nao(): + from text_to_insight.routers.edges import roteador_grafico + + # Deterministic negative decision from fake LLM. + llm = _FakeLLM("NAO") + estado = { + "pergunta_usuario": "Teste", + "linhas_resultado_preview": [{"a": 1}, {"a": 2}], + "total_linhas_resultado": 2, + "caminho_csv_resultado": "/tmp/resultado.csv", + } + assert roteador_grafico(estado, llm) == "resposta" + assert llm.called is True + + +# ============================================================ +# GRAFICOS — executador de script (subprocesso) +# ============================================================ + +def test_executar_script_sucesso(): + from text_to_insight.nodes import graph_generator as gg + + # Minimal script that signals success via GRAPH_OK. + ok, saida = gg._executar_script("print('GRAPH_OK')") + assert ok is True + assert "GRAPH_OK" in saida + + +def test_executar_script_timeout(monkeypatch): + from text_to_insight.nodes import graph_generator as gg + + # Simulate a subprocess timeout to validate the error path. + def _fake_run(*args, **kwargs): + raise subprocess.TimeoutExpired(cmd=args[0], timeout=30) + + monkeypatch.setattr(gg.subprocess, "run", _fake_run) + ok, saida = gg._executar_script("print('GRAPH_OK')") + + assert ok is False + assert "Timeout" in saida + + +def test_executar_script_falha_retorno(monkeypatch): + from text_to_insight.nodes import graph_generator as gg + + # Simulate a non-zero return code and stderr output. + def _fake_run(*args, **kwargs): + return subprocess.CompletedProcess(args=args[0], returncode=1, stdout="", stderr="erro") + + monkeypatch.setattr(gg.subprocess, "run", _fake_run) + ok, saida = gg._executar_script("print('GRAPH_OK')") + + assert ok is False + assert "erro" in saida + + +def test_executar_script_limpa_temporario(monkeypatch, tmp_path): + from text_to_insight.nodes import graph_generator as gg + + temp_path = tmp_path / "temp_script.py" + + class _TempFile: + def __init__(self, path: Path): + self.name = str(path) + self._fh = open(self.name, "w", encoding="utf-8") + + def write(self, data: str) -> None: + self._fh.write(data) + + def __enter__(self): + return self + + def __exit__(self, exc_type, exc, tb): + self._fh.close() + + def _fake_named_tempfile(*args, **kwargs): + # Use a deterministic temp path to verify cleanup. + return _TempFile(temp_path) + + def _fake_run(*args, **kwargs): + return subprocess.CompletedProcess(args=args[0], returncode=0, stdout="GRAPH_OK", stderr="") + + monkeypatch.setattr(gg.tempfile, "NamedTemporaryFile", _fake_named_tempfile) + monkeypatch.setattr(gg.subprocess, "run", _fake_run) + + ok, _ = gg._executar_script("print('GRAPH_OK')") + assert ok is True + assert temp_path.exists() is False diff --git a/tests/test_real_api_smoke.py b/tests/test_real_api_smoke.py index 2012c63..1e6d287 100644 --- a/tests/test_real_api_smoke.py +++ b/tests/test_real_api_smoke.py @@ -1,19 +1,8 @@ -"""Smoke test opcional com API real para detectar drift de provider/modelo.""" - -import os import pytest -from text_to_insight.model_selection import get_model -from dotenv import load_dotenv -load_dotenv() @pytest.mark.real_api @pytest.mark.timeout(60) -def test_provider_model_smoke_real_api(): - api_key = os.getenv("GOOGLE_API_KEY") - if not api_key: - pytest.skip("GOOGLE_API_KEY não encontrada para teste real_api.") - - llm = get_model("gemini-2.5-flash", api_key) +def test_provider_model_smoke_real_api(llm): resposta = llm.invoke("Responda apenas com: OK") conteudo = str(getattr(resposta, "content", "")).strip().upper() From d500059db49d977f33d98cf6147c958b9dd9b35a Mon Sep 17 00:00:00 2001 From: LuizCorrei4 Date: Thu, 21 May 2026 21:35:15 -0300 Subject: [PATCH 2/6] - chore(tests): grava cassetes VCR para gpt-4o-mini e gemini-2.5-flash MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Atualiza os cassetes de integração para os perfis OpenAI gpt-4o-mini e Gemini 2.5 Flash e ajusta a documentaç~so da CI para deixar claro que os jobs rodam em matriz com os dois conjuntos de cassetes. - No momento, a CI ainda não tem acesso aos secrets OPENAI_API_KEY e GOOGLE_API_KEY, então apenas o job deterministico com --record-mode=none funciona. Os jobs record-vcr-cassettes e tests-real-api nao funcionam ainda sem essa configuração no GitHub Actions; por enquanto, a gravação e a validação real desses fluxos devem ser feitas localmente pelo desenvolvedor. --- .github/workflows/ci.yml | 8 +- .gitignore | 5 +- DESENVOLVIMENTO.md | 16 +- .../test_estado_final_completo.yaml | 682 ++++++++- ...do_final_completo__openai-gpt-4o-mini.yaml | 495 +++++++ .../test_hitl_nova_pergunta_substitui.yaml | 1061 ++++++++++++-- ...ergunta_substitui__openai-gpt-4o-mini.yaml | 765 ++++++++++ .../test_pergunta_com_ranking.yaml | 333 ++++- ...gunta_com_ranking__openai-gpt-4o-mini.yaml | 1289 +++++++++++++++++ .../test_pergunta_simples.yaml | 238 ++- ..._pergunta_simples__openai-gpt-4o-mini.yaml | 493 +++++++ tests/test_integracao.py | 9 +- 12 files changed, 5083 insertions(+), 311 deletions(-) create mode 100644 tests/cassettes/test_integracao/test_estado_final_completo__openai-gpt-4o-mini.yaml create mode 100644 tests/cassettes/test_integracao/test_hitl_nova_pergunta_substitui__openai-gpt-4o-mini.yaml create mode 100644 tests/cassettes/test_integracao/test_pergunta_com_ranking__openai-gpt-4o-mini.yaml create mode 100644 tests/cassettes/test_integracao/test_pergunta_simples__openai-gpt-4o-mini.yaml diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 134f9fe..6908034 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -73,7 +73,7 @@ jobs: python -m pip install --upgrade pip pip install -r requirements.txt && pip install -e . - # ✅ Mesmo setup de banco dos outros jobs + # Mesmo setup de banco dos outros jobs - name: Download test database run: | mkdir -p data @@ -81,7 +81,7 @@ jobs: env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - # ✅ Verificação no nível do step com saída que controla os próximos + # Verificação no nível do step com saída que controla os próximos - name: Check API key availability id: check_key run: | @@ -96,12 +96,12 @@ jobs: fi - name: Record or update VCR cassettes - if: steps.check_key.outputs.available == 'true' # ✅ só roda se tiver key + if: steps.check_key.outputs.available == 'true' # só roda se tiver key run: | pytest tests/test_nodes.py tests/test_integracao.py \ -v -s --record-mode=rewrite -m "not real_api" - # ✅ Commita os cassettes de volta no repo + # Commita os cassettes de volta no repo - name: Commit updated cassettes if: steps.check_key.outputs.available == 'true' run: | diff --git a/.gitignore b/.gitignore index 4713292..ca540ac 100644 --- a/.gitignore +++ b/.gitignore @@ -149,4 +149,7 @@ chroma_db/ /meus_testes/* # reports testes spider -/reports/ \ No newline at end of file +/reports/ + +# commit.txt (arquivo temporário usado para textos de commit personalizados) +commit.txt \ No newline at end of file diff --git a/DESENVOLVIMENTO.md b/DESENVOLVIMENTO.md index fc559c7..c7c3066 100644 --- a/DESENVOLVIMENTO.md +++ b/DESENVOLVIMENTO.md @@ -159,7 +159,6 @@ Observacoes: Use `rewrite` quando quiser **substituir completamente** as cassetes existentes, por exemplo, após uma mudança grande de prompt que torna as respostas gravadas incompatíveis com os testes atuais. ```bash -TEXT_TO_INSIGHT_TEST_PROVIDER=openai TEXT_TO_INSIGHT_TEST_MODEL=gpt-4o-mini \ pytest tests/test_nodes.py tests/test_integracao.py -v -s --record-mode=rewrite ``` @@ -182,9 +181,16 @@ Os testes de API real usam a fixture compartilhada em `tests/conftest.py` para r 1. `TEXT_TO_INSIGHT_TEST_MODEL` define o modelo explicitamente. 2. Se `TEXT_TO_INSIGHT_TEST_PROVIDER` estiver definido, ele força o provider (`google` ou `openai`). -3. Se o modelo nao deixar o provider obvio, o provider precisa ser informado. +3. Se o modelo não deixar o provider obvio, o provider precisa ser informado. 4. Sem overrides, o sistema tenta `GOOGLE_API_KEY` e depois `OPENAI_API_KEY`. +Resumo do funcionamento local: + +- `pytest tests/test_nodes.py tests/test_integracao.py -v -s --record-mode=none` faz replay apenas; se faltar cassette, o teste falha. +- `pytest tests/test_nodes.py tests/test_integracao.py -v -s --record-mode=new_episodes` grava só as chamadas que ainda nao existem no YAML. +- `pytest tests/test_nodes.py tests/test_integracao.py -v -s --record-mode=rewrite` regrava tudo do escopo. +- o fixture monta o nome do cassette a partir do teste e adiciona sufixo quando o provider/modelo sai do padrao Gemini; por isso `gpt-4o-mini` usa cassettes com `__openai-gpt-4o-mini` e `gemini-2.5-flash` usa os nomes sem sufixo. + Para gravar novas cassetes de forma previsível, escolha explicitamente provider e modelo: ```bash @@ -199,7 +205,7 @@ TEXT_TO_INSIGHT_TEST_MODEL=gpt-4o-mini \ pytest tests/test_nodes.py tests/test_integracao.py -v -s --record-mode=new_episodes ``` -Se voce definir as variaveis em linhas separadas no shell, use `export` para que o `pytest` e os processos filhos enxerguem os valores. Sem `export`, a atribuicao fica so no shell atual e a suite nao herda a configuracao. +Se voce definir as variaveis em linhas separadas no shell, use `export` para que o `pytest` e os processos filhos enxerguem os valores. Sem `export`, a atribuição fica só no shell atual e a suíte de testes não herda a configuração. Uso recomendado: @@ -251,8 +257,8 @@ Opções úteis: `--db-filter`, `--question-filter`, `--model`, `--with-graphs`, Arquivo: `.github/workflows/ci.yml` -- job padrão determinístico em PR/push (VCR + `--record-mode=none`); -- job manual `record-vcr-cassettes` em `workflow_dispatch` para gravar/atualizar cassetes com API real; +- job padrão determinístico em PR/push roda em matriz com as duas combinações de cassette: `openai / gpt-4o-mini` e `google / gemini-2.5-flash`, sempre em `--record-mode=none`; +- job manual `record-vcr-cassettes` em `workflow_dispatch` tambem roda as duas combinações e pode gravar/atualizar os dois conjuntos de cassetes; - job opcional real API em `workflow_dispatch` e `schedule`. ## Build e distribuição diff --git a/tests/cassettes/test_integracao/test_estado_final_completo.yaml b/tests/cassettes/test_integracao/test_estado_final_completo.yaml index fc6f433..d9a6ea5 100644 --- a/tests/cassettes/test_integracao/test_estado_final_completo.yaml +++ b/tests/cassettes/test_integracao/test_estado_final_completo.yaml @@ -3,16 +3,42 @@ interactions: body: '{"contents": [{"parts": [{"text": "Voc\u00ea \u00e9 o planejador de um sistema que transforma perguntas em consultas SQL.\n\nSeu papel: analisar a situa\u00e7\u00e3o atual e decidir a pr\u00f3xima a\u00e7\u00e3o.\n\nContexto - atual:\n- Pergunta do usu\u00e1rio: \"Qual o valor medio dos pedidos?\"\n- Schema - dispon\u00edvel: Sim\n- Feedback do cr\u00edtico: Nenhum\n- Tentativas realizadas: - 0\n- Status atual: schema_obtido\n- Erro anterior: Nenhum\n\nAVALIA\u00c7\u00c3O + atual:\n- Pergunta do usu\u00e1rio: \"Qual o valor medio dos pedidos?\"\n\n- + conversa_previa: Nenhuma\n\n- Schema: === SCHEMA SQLITE (INTROSPECCAO REAL) + ===\n\nTabela: customers\n- customer_id: TEXT (PK)\n- customer_unique_id: TEXT\n- + customer_zip_code_prefix: INTEGER\n- customer_city: TEXT\n- customer_state: + TEXT\n\nTabela: geolocation\n- geolocation_zip_code_prefix: INTEGER\n- geolocation_lat: + REAL\n- geolocation_lng: REAL\n- geolocation_city: TEXT\n- geolocation_state: + TEXT\n\nTabela: order_items\n- order_id: TEXT (PK)\n- order_item_id: INTEGER + (PK)\n- product_id: TEXT\n- seller_id: TEXT\n- shipping_limit_date: TEXT\n- + price: REAL\n- freight_value: REAL\n Foreign keys:\n - seller_id -> sellers.seller_id + (on_update=NO ACTION, on_delete=NO ACTION)\n - product_id -> products.product_id + (on_update=NO ACTION, on_delete=NO ACTION)\n - order_id -> orders.order_id + (on_update=NO ACTION, on_delete=NO ACTION)\n\nTabela: order_payments\n- order_id: + TEXT (PK)\n- payment_sequential: INTEGER (PK)\n- payment_type: TEXT\n- payment_installments: + INTEGER\n- payment_value: REAL\n Foreign keys:\n - order_id -> orders.order_id + (on_update=NO ACTION, on_delete=NO ACTION)\n\nTabela: order_reviews\n- review_id: + TEXT (PK)\n- order_id: TEXT\n- review_score: INTEGER\n- review_comment_title: + TEXT\n- review_comment_message: TEXT\n- review_creation_date: TEXT\n- review_answer_timestamp: + TEXT\n Foreign keys:\n - order_id -> orders.order_id (on_update=NO ACTION, + on_delete=NO ACTION)\n\nTabela: orders\n- order_id: TEXT (PK)\n- customer_id: + TEXT\n- order_status: TEXT\n- order_purchase_timestamp: TEXT\n- order_approved_at: + TEXT\n- order_delivered_carrier_date: TEXT\n- order_delivered_customer_date: + TEXT\n- order_estimated_delivery_date: TEXT\n Foreign keys:\n - customer_id + -> customers.customer_id (on_update=NO ACTION, on_delete=NO ACTION)\n\nTabela: + products\n- product_id: TEXT (PK)\n- product_category_name: TEXT\n- product_name_length: + REAL\n- product_description_length: REAL\n- product_photos_qty: REAL\n- product_weight_g: + REAL\n- product_length_cm: REAL\n- product_height_cm: REAL\n- product_width_cm: + REAL\n\nTabela: sellers\n- seller_id: TEXT (PK)\n- seller_zip_code_prefix: INTEGER\n- + seller_city: TEXT\n- seller_state: TEXT\n\n\n- Feedback do cr\u00edtico: Nenhum\n- + Tentativas realizadas: 0\n- Status atual: schema_obtido\n- Erro anterior: Nenhum\n\nAVALIA\u00c7\u00c3O CR\u00cdTICA:\nVerifique se a \"Pergunta do usu\u00e1rio\" pode ser respondida com as tabelas e colunas do Schema.\nSe houver ambiguidade, conceitos n\u00e3o mapeados no banco de dados, ou se a inten\u00e7\u00e3o do usu\u00e1rio n\u00e3o estiver clara, voc\u00ea DEVE pedir mais informa\u00e7\u00f5es.\n\nResponda - EXATAMENTE no formato JSON abaixo, sem formata\u00e7\u00e3o markdown (```json):\n{\n \"decisao\": + EXATAMENTE no formato JSON abaixo, sem formata\u00e7\u00e3o markdown (```json):\n{{\n \"decisao\": \"escolha_uma_opcao\",\n \"pergunta_ao_usuario\": \"escreva a pergunta aqui - se precisar de ajuda, ou deixe vazio se n\u00e3o precisar\"\n}\n\nOp\u00e7\u00f5es + se precisar de ajuda, ou deixe vazio se n\u00e3o precisar\"\n}}\n\nOp\u00e7\u00f5es v\u00e1lidas para ''decis\u00e3o'':\n- \"pronto_codificacao\" \u2192 se temos schema, a pergunta faz sentido e devemos gerar/regenerar SQL\n- \"revisando_estrategia\" \u2192 se o cr\u00edtico reprovou e devemos tentar uma abordagem diferente\n- @@ -22,32 +48,37 @@ interactions: 1}}' headers: Accept: - - '*/*' + - '*' Accept-Encoding: - - gzip, deflate, zstd + - g Connection: - - keep-alive + - k Content-Length: - - '1549' + - '3' Content-Type: - - application/json + - a Host: - - generativelanguage.googleapis.com + - g User-Agent: - - google-genai-sdk/1.68.0 gl-python/3.10.12 + - g x-goog-api-client: - - google-genai-sdk/1.68.0 gl-python/3.10.12 + - g method: POST uri: https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash:generateContent response: body: string: !!binary | - H4sIAAAAAAAC/2WRXU/CMBSG7/crll6DUQQJ3qqJXCCIi5E4Q47rYWsY7dKeGnTZf7fdGAxdk6V9 - z9vz8bQMwpAlILngQGjYbfjulDAs67+PKUkoyQVayYkFaDp5m6/s7J2FcO8vsTKW/hwzjokwoGIn - xqzQLq1aJ4qLjUgg8XqvdRaoUysJ1qDW1ljQ4nDLLVmxTp3quP/onbrTKkdfeqc45q29ag1sI6Qw - 2RLBKOltL9F8wY5RITnunXwZtAXq1MwaSHGGBI4THGn4QXYFRWqL8k7ZmtP11U2TrcP1zDCYHOKk - CPKz0Hgy7v1LbO5dWZF3gXfewk0JuaBvP0r08BaxDgn601fLIuggY5Qpm2Z03uNwNAgO0BqOr6iN - aICluHMI+4OLUX+Tg8nqikyjKZQ0OOXe8zSM5rDS8nH2ufwZTxdffbDmecuCKvgFSgPWCnMCAAA= + H4sIAAAAAAAC/7VUXW/TMBR936+4yiNKEyhU2/rERNutEhOl6xCIocp1blMzx878UVFN/e9cO9nW + jkoIBFKl2O71Offcc5L7I4AEjdEm6cM9bWjLdYG0e9M9TZuDCq1lZThLvmgP+IMjFljARnsD3BuD + ysGd146lUEtkFoGvkN82BbVkCpgqYCGkFKqEAh0T0mYw0gYqbRCEWmpTMSe0Avq5lbAQm0phhawA + p/uwcq62/TxnIiu1LiVmBa7zEiuhRIfVIi80t7lhDjtSVMIR/kwTvBKOaPZa9UHNYeiA+YSRwY16 + AR+DsifVy9A1OiN4H0pUSOVijSSy9ATbNkcN2YzrKm8rcM61ckQ+XxrEuRNo5gbvPFpnU4hkfeil + 1G+BMuBGWd2s11lKZlc3atLM1RDxhuYF3exV9+XxyfHJ61ObJa1R1jHnbfBpOrz6cD19N5wPP1+c + XV/NhoOHmnb6VPQ1HkBre/zzrdvU0efw/EVKM3ZT8+wCZd0Cxnvk6+0u5D5sy2u5EXXwOBC8R2ZU + 4z5baO/gPEqGs8m4iZLdwY8A3shw8Y9zkOzAbB/X39rVNv37KcRgjGia3uDuNNZCyxjm34wk6ryM + UQps/yRMz6cWOcZFwD9vb0/b0gmaS6G8Q1pMjP6O3IWjkMDOiIBnhHsQbiAqVLbVt68pZEFz9uBy + KfWCyWcg4YsSSBrN+0FP9iq3h9g/MemjO73/5+w0vGZj+irt2hrfvQGZswkI3cdgNdSBdnu0PfoJ + dL39d1IFAAA= headers: Alt-Svc: - h3=":443"; ma=2592000,h3-29=":443"; ma=2592000 @@ -56,11 +87,447 @@ interactions: Content-Type: - application/json; charset=UTF-8 Date: - - Thu, 02 Apr 2026 14:59:06 GMT + - Thu, 21 May 2026 23:49:57 GMT Server: - scaffolding on HTTPServer2 Server-Timing: - - gfet4t7; dur=2966 + - gfet4t7; dur=282 + Transfer-Encoding: + - chunked + Vary: + - Origin + - X-Origin + - Referer + X-Content-Type-Options: + - nosniff + X-Frame-Options: + - SAMEORIGIN + X-Gemini-Service-Tier: + - standard + X-XSS-Protection: + - '0' + status: + code: 429 + message: Too Many Requests +- request: + body: '{"contents": [{"parts": [{"text": "Voc\u00ea \u00e9 o planejador de um + sistema que transforma perguntas em consultas SQL.\n\nSeu papel: analisar a + situa\u00e7\u00e3o atual e decidir a pr\u00f3xima a\u00e7\u00e3o.\n\nContexto + atual:\n- Pergunta do usu\u00e1rio: \"Qual o valor medio dos pedidos?\"\n\n- + conversa_previa: Nenhuma\n\n- Schema: === SCHEMA SQLITE (INTROSPECCAO REAL) + ===\n\nTabela: customers\n- customer_id: TEXT (PK)\n- customer_unique_id: TEXT\n- + customer_zip_code_prefix: INTEGER\n- customer_city: TEXT\n- customer_state: + TEXT\n\nTabela: geolocation\n- geolocation_zip_code_prefix: INTEGER\n- geolocation_lat: + REAL\n- geolocation_lng: REAL\n- geolocation_city: TEXT\n- geolocation_state: + TEXT\n\nTabela: order_items\n- order_id: TEXT (PK)\n- order_item_id: INTEGER + (PK)\n- product_id: TEXT\n- seller_id: TEXT\n- shipping_limit_date: TEXT\n- + price: REAL\n- freight_value: REAL\n Foreign keys:\n - seller_id -> sellers.seller_id + (on_update=NO ACTION, on_delete=NO ACTION)\n - product_id -> products.product_id + (on_update=NO ACTION, on_delete=NO ACTION)\n - order_id -> orders.order_id + (on_update=NO ACTION, on_delete=NO ACTION)\n\nTabela: order_payments\n- order_id: + TEXT (PK)\n- payment_sequential: INTEGER (PK)\n- payment_type: TEXT\n- payment_installments: + INTEGER\n- payment_value: REAL\n Foreign keys:\n - order_id -> orders.order_id + (on_update=NO ACTION, on_delete=NO ACTION)\n\nTabela: order_reviews\n- review_id: + TEXT (PK)\n- order_id: TEXT\n- review_score: INTEGER\n- review_comment_title: + TEXT\n- review_comment_message: TEXT\n- review_creation_date: TEXT\n- review_answer_timestamp: + TEXT\n Foreign keys:\n - order_id -> orders.order_id (on_update=NO ACTION, + on_delete=NO ACTION)\n\nTabela: orders\n- order_id: TEXT (PK)\n- customer_id: + TEXT\n- order_status: TEXT\n- order_purchase_timestamp: TEXT\n- order_approved_at: + TEXT\n- order_delivered_carrier_date: TEXT\n- order_delivered_customer_date: + TEXT\n- order_estimated_delivery_date: TEXT\n Foreign keys:\n - customer_id + -> customers.customer_id (on_update=NO ACTION, on_delete=NO ACTION)\n\nTabela: + products\n- product_id: TEXT (PK)\n- product_category_name: TEXT\n- product_name_length: + REAL\n- product_description_length: REAL\n- product_photos_qty: REAL\n- product_weight_g: + REAL\n- product_length_cm: REAL\n- product_height_cm: REAL\n- product_width_cm: + REAL\n\nTabela: sellers\n- seller_id: TEXT (PK)\n- seller_zip_code_prefix: INTEGER\n- + seller_city: TEXT\n- seller_state: TEXT\n\n\n- Feedback do cr\u00edtico: Nenhum\n- + Tentativas realizadas: 0\n- Status atual: schema_obtido\n- Erro anterior: Nenhum\n\nAVALIA\u00c7\u00c3O + CR\u00cdTICA:\nVerifique se a \"Pergunta do usu\u00e1rio\" pode ser respondida + com as tabelas e colunas do Schema.\nSe houver ambiguidade, conceitos n\u00e3o + mapeados no banco de dados, ou se a inten\u00e7\u00e3o do usu\u00e1rio n\u00e3o + estiver clara, voc\u00ea DEVE pedir mais informa\u00e7\u00f5es.\n\nResponda + EXATAMENTE no formato JSON abaixo, sem formata\u00e7\u00e3o markdown (```json):\n{{\n \"decisao\": + \"escolha_uma_opcao\",\n \"pergunta_ao_usuario\": \"escreva a pergunta aqui + se precisar de ajuda, ou deixe vazio se n\u00e3o precisar\"\n}}\n\nOp\u00e7\u00f5es + v\u00e1lidas para ''decis\u00e3o'':\n- \"pronto_codificacao\" \u2192 se temos + schema, a pergunta faz sentido e devemos gerar/regenerar SQL\n- \"revisando_estrategia\" + \u2192 se o cr\u00edtico reprovou e devemos tentar uma abordagem diferente\n- + \"necessita_ajuda\" \u2192 a pergunta n\u00e3o \u00e9 clara, n\u00e3o faz sentido, + falta contexto ou n\u00e3o h\u00e1 dados no schema para responder.\n"}], "role": + "user"}], "safetySettings": [], "generationConfig": {"temperature": 0.7, "candidateCount": + 1}}' + headers: + Accept: + - '*' + Accept-Encoding: + - g + Connection: + - k + Content-Length: + - '3' + Content-Type: + - a + Host: + - g + User-Agent: + - g + x-goog-api-client: + - g + method: POST + uri: https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash:generateContent + response: + body: + string: !!binary | + H4sIAAAAAAAC/7VUXW/aMBR951dc8TiFBHVlGzytGtAirRqjdNq0Tsg4l+DVsVN/oKGK/75rJ22h + Q5o2bRJSbHN9zj33nOS+BdBGY7RpD+CeNrTlOkfanZ70k/qgRGtZEc7aX7QH/MERc8xhq70B7o1B + 5eDOa8cSqCQyi8DXyG/rgkoyBUzlsBRSClVAjo4JaVMYawOlNghCrbQpmRNaAf3cWliITSWwRpaD + 0wNYO1fZQZYxkRZaFxLTHDdZgaVQosMqkeWa28wwhx0pSuEIf64JXglHNAet+qDmOHTAfMJI4Ua9 + gI9B2ZPqVeganRF8AAUqpHKxQRJZeIJtmqOGbMp1mTUVuOBaOSJfrAziwgk0C4N3Hq2zCUSyAfQS + 6jdHGXCjrJO011lJZtc3alrP1RDxluYFvX76+lW3333TPX1p03bjlHXMeRuMmo2uPlzP3o0Wo88X + Z9dX89HwoaYZPxV9jQfQ+B7/fOu2VTQ6PH/RUs/dVDy9QFk1gPEeGXu7D3kI2/BabkQVTA4E75EZ + VdvPlto7OI+a4Ww6qbNk9/AjgDcyXPzjILT3YHaP62/Napf8/RRiMsY0TW9wfxoboWVM829GEnVe + xiwFtn+SpudTixyTPOCfN7dnTekUzaVQ3iEtpkZ/R+7CUYhgZ0zAc8I9CjcUJSrb6DvUFLKgOXtw + uZB6yeQzkPBJCSS15sOktw8qd8fYPzHpozu9/+fsLLxnE/os7dsaX74hmbON7P3HZNXcgXfX2rV+ + AnH8mmBUBQAA + headers: + Alt-Svc: + - h3=":443"; ma=2592000,h3-29=":443"; ma=2592000 + Content-Encoding: + - gzip + Content-Type: + - application/json; charset=UTF-8 + Date: + - Thu, 21 May 2026 23:50:00 GMT + Server: + - scaffolding on HTTPServer2 + Server-Timing: + - gfet4t7; dur=939 + Transfer-Encoding: + - chunked + Vary: + - Origin + - X-Origin + - Referer + X-Content-Type-Options: + - nosniff + X-Frame-Options: + - SAMEORIGIN + X-Gemini-Service-Tier: + - standard + X-XSS-Protection: + - '0' + status: + code: 429 + message: Too Many Requests +- request: + body: '{"contents": [{"parts": [{"text": "Voc\u00ea \u00e9 o planejador de um + sistema que transforma perguntas em consultas SQL.\n\nSeu papel: analisar a + situa\u00e7\u00e3o atual e decidir a pr\u00f3xima a\u00e7\u00e3o.\n\nContexto + atual:\n- Pergunta do usu\u00e1rio: \"Qual o valor medio dos pedidos?\"\n\n- + conversa_previa: Nenhuma\n\n- Schema: === SCHEMA SQLITE (INTROSPECCAO REAL) + ===\n\nTabela: customers\n- customer_id: TEXT (PK)\n- customer_unique_id: TEXT\n- + customer_zip_code_prefix: INTEGER\n- customer_city: TEXT\n- customer_state: + TEXT\n\nTabela: geolocation\n- geolocation_zip_code_prefix: INTEGER\n- geolocation_lat: + REAL\n- geolocation_lng: REAL\n- geolocation_city: TEXT\n- geolocation_state: + TEXT\n\nTabela: order_items\n- order_id: TEXT (PK)\n- order_item_id: INTEGER + (PK)\n- product_id: TEXT\n- seller_id: TEXT\n- shipping_limit_date: TEXT\n- + price: REAL\n- freight_value: REAL\n Foreign keys:\n - seller_id -> sellers.seller_id + (on_update=NO ACTION, on_delete=NO ACTION)\n - product_id -> products.product_id + (on_update=NO ACTION, on_delete=NO ACTION)\n - order_id -> orders.order_id + (on_update=NO ACTION, on_delete=NO ACTION)\n\nTabela: order_payments\n- order_id: + TEXT (PK)\n- payment_sequential: INTEGER (PK)\n- payment_type: TEXT\n- payment_installments: + INTEGER\n- payment_value: REAL\n Foreign keys:\n - order_id -> orders.order_id + (on_update=NO ACTION, on_delete=NO ACTION)\n\nTabela: order_reviews\n- review_id: + TEXT (PK)\n- order_id: TEXT\n- review_score: INTEGER\n- review_comment_title: + TEXT\n- review_comment_message: TEXT\n- review_creation_date: TEXT\n- review_answer_timestamp: + TEXT\n Foreign keys:\n - order_id -> orders.order_id (on_update=NO ACTION, + on_delete=NO ACTION)\n\nTabela: orders\n- order_id: TEXT (PK)\n- customer_id: + TEXT\n- order_status: TEXT\n- order_purchase_timestamp: TEXT\n- order_approved_at: + TEXT\n- order_delivered_carrier_date: TEXT\n- order_delivered_customer_date: + TEXT\n- order_estimated_delivery_date: TEXT\n Foreign keys:\n - customer_id + -> customers.customer_id (on_update=NO ACTION, on_delete=NO ACTION)\n\nTabela: + products\n- product_id: TEXT (PK)\n- product_category_name: TEXT\n- product_name_length: + REAL\n- product_description_length: REAL\n- product_photos_qty: REAL\n- product_weight_g: + REAL\n- product_length_cm: REAL\n- product_height_cm: REAL\n- product_width_cm: + REAL\n\nTabela: sellers\n- seller_id: TEXT (PK)\n- seller_zip_code_prefix: INTEGER\n- + seller_city: TEXT\n- seller_state: TEXT\n\n\n- Feedback do cr\u00edtico: Nenhum\n- + Tentativas realizadas: 0\n- Status atual: schema_obtido\n- Erro anterior: Nenhum\n\nAVALIA\u00c7\u00c3O + CR\u00cdTICA:\nVerifique se a \"Pergunta do usu\u00e1rio\" pode ser respondida + com as tabelas e colunas do Schema.\nSe houver ambiguidade, conceitos n\u00e3o + mapeados no banco de dados, ou se a inten\u00e7\u00e3o do usu\u00e1rio n\u00e3o + estiver clara, voc\u00ea DEVE pedir mais informa\u00e7\u00f5es.\n\nResponda + EXATAMENTE no formato JSON abaixo, sem formata\u00e7\u00e3o markdown (```json):\n{{\n \"decisao\": + \"escolha_uma_opcao\",\n \"pergunta_ao_usuario\": \"escreva a pergunta aqui + se precisar de ajuda, ou deixe vazio se n\u00e3o precisar\"\n}}\n\nOp\u00e7\u00f5es + v\u00e1lidas para ''decis\u00e3o'':\n- \"pronto_codificacao\" \u2192 se temos + schema, a pergunta faz sentido e devemos gerar/regenerar SQL\n- \"revisando_estrategia\" + \u2192 se o cr\u00edtico reprovou e devemos tentar uma abordagem diferente\n- + \"necessita_ajuda\" \u2192 a pergunta n\u00e3o \u00e9 clara, n\u00e3o faz sentido, + falta contexto ou n\u00e3o h\u00e1 dados no schema para responder.\n"}], "role": + "user"}], "safetySettings": [], "generationConfig": {"temperature": 0.7, "candidateCount": + 1}}' + headers: + Accept: + - '*' + Accept-Encoding: + - g + Connection: + - k + Content-Length: + - '3' + Content-Type: + - a + Host: + - g + User-Agent: + - g + x-goog-api-client: + - g + method: POST + uri: https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash:generateContent + response: + body: + string: !!binary | + H4sIAAAAAAAC/7VUXW/aMBR951dc8TiFpEKjFTytGtAirRqjdNq0Tsg4l+DVsVN/oKKK/75rJ22h + Q5o2bRJSbHN9zj33nOSxBdBGY7RpD+CRNrTlOkfave32k/qgRGtZEc7aX7UHfOCIOeaw1d4A98ag + cnDvtWMJVBKZReBr5Hd1QSWZAqZyWAophSogR8eEtCmMtYFSGwShVtqUzAmtgH5uLSzEphJYI8vB + 6QGsnavsIMuYSAutC4lpjpuswFIo0WGVyHLNbWaYw44UpXCEP9cEr4QjmoNWfVBzHDpgvmCkcKve + wKeg7EX1KnSNzgg+gAIVUrnYIIksPME2zVFDNuW6zJoKXHCtHJEvVgZx4QSahcF7j9bZBCLZAHoJ + 9ZujDLhRVjftdVaS2fWtmtZzNUS8pXlB7zTtn/S7Z/3uyZlN241T1jHnbTBqNrr+eDN7P1qMvlye + 31zPR8Onmmb8VPQtHkDje/zzndtW0ejw/EVLPXdT8fQSZdUAxntk7N0+5CFsw2u5EVUwORB8QGZU + bT9bau/gImqG8+mkzpLdw48A3shw8Y+D0N6D2T2vvzerXfL3U4jJGNM0vcH9aWyEljHNvxlJ1HkV + sxTY/kmaXk8tckzygH/R3J41pVM0V0J5h7SYGv0DuQtHIYKdMQHPCfco3FCUqGyj71BTyILm7Mnl + Quolk69AwiclkNSaD5PePqjcHWP/zKSP7vT+n7Oz8J5N6LO0b2t8+YZkzjaynz4nq+YOvLvWrvUT + 4diF7lQFAAA= + headers: + Alt-Svc: + - h3=":443"; ma=2592000,h3-29=":443"; ma=2592000 + Content-Encoding: + - gzip + Content-Type: + - application/json; charset=UTF-8 + Date: + - Thu, 21 May 2026 23:50:03 GMT + Server: + - scaffolding on HTTPServer2 + Server-Timing: + - gfet4t7; dur=315 + Transfer-Encoding: + - chunked + Vary: + - Origin + - X-Origin + - Referer + X-Content-Type-Options: + - nosniff + X-Frame-Options: + - SAMEORIGIN + X-Gemini-Service-Tier: + - standard + X-XSS-Protection: + - '0' + status: + code: 429 + message: Too Many Requests +- request: + body: '{"contents": [{"parts": [{"text": "Voc\u00ea \u00e9 o planejador de um + sistema que transforma perguntas em consultas SQL.\n\nSeu papel: analisar a + situa\u00e7\u00e3o atual e decidir a pr\u00f3xima a\u00e7\u00e3o.\n\nContexto + atual:\n- Pergunta do usu\u00e1rio: \"Qual o valor medio dos pedidos?\"\n\n- + conversa_previa: Nenhuma\n\n- Schema: === SCHEMA SQLITE (INTROSPECCAO REAL) + ===\n\nTabela: customers\n- customer_id: TEXT (PK)\n- customer_unique_id: TEXT\n- + customer_zip_code_prefix: INTEGER\n- customer_city: TEXT\n- customer_state: + TEXT\n\nTabela: geolocation\n- geolocation_zip_code_prefix: INTEGER\n- geolocation_lat: + REAL\n- geolocation_lng: REAL\n- geolocation_city: TEXT\n- geolocation_state: + TEXT\n\nTabela: order_items\n- order_id: TEXT (PK)\n- order_item_id: INTEGER + (PK)\n- product_id: TEXT\n- seller_id: TEXT\n- shipping_limit_date: TEXT\n- + price: REAL\n- freight_value: REAL\n Foreign keys:\n - seller_id -> sellers.seller_id + (on_update=NO ACTION, on_delete=NO ACTION)\n - product_id -> products.product_id + (on_update=NO ACTION, on_delete=NO ACTION)\n - order_id -> orders.order_id + (on_update=NO ACTION, on_delete=NO ACTION)\n\nTabela: order_payments\n- order_id: + TEXT (PK)\n- payment_sequential: INTEGER (PK)\n- payment_type: TEXT\n- payment_installments: + INTEGER\n- payment_value: REAL\n Foreign keys:\n - order_id -> orders.order_id + (on_update=NO ACTION, on_delete=NO ACTION)\n\nTabela: order_reviews\n- review_id: + TEXT (PK)\n- order_id: TEXT\n- review_score: INTEGER\n- review_comment_title: + TEXT\n- review_comment_message: TEXT\n- review_creation_date: TEXT\n- review_answer_timestamp: + TEXT\n Foreign keys:\n - order_id -> orders.order_id (on_update=NO ACTION, + on_delete=NO ACTION)\n\nTabela: orders\n- order_id: TEXT (PK)\n- customer_id: + TEXT\n- order_status: TEXT\n- order_purchase_timestamp: TEXT\n- order_approved_at: + TEXT\n- order_delivered_carrier_date: TEXT\n- order_delivered_customer_date: + TEXT\n- order_estimated_delivery_date: TEXT\n Foreign keys:\n - customer_id + -> customers.customer_id (on_update=NO ACTION, on_delete=NO ACTION)\n\nTabela: + products\n- product_id: TEXT (PK)\n- product_category_name: TEXT\n- product_name_length: + REAL\n- product_description_length: REAL\n- product_photos_qty: REAL\n- product_weight_g: + REAL\n- product_length_cm: REAL\n- product_height_cm: REAL\n- product_width_cm: + REAL\n\nTabela: sellers\n- seller_id: TEXT (PK)\n- seller_zip_code_prefix: INTEGER\n- + seller_city: TEXT\n- seller_state: TEXT\n\n\n- Feedback do cr\u00edtico: Nenhum\n- + Tentativas realizadas: 0\n- Status atual: schema_obtido\n- Erro anterior: Nenhum\n\nAVALIA\u00c7\u00c3O + CR\u00cdTICA:\nVerifique se a \"Pergunta do usu\u00e1rio\" pode ser respondida + com as tabelas e colunas do Schema.\nSe houver ambiguidade, conceitos n\u00e3o + mapeados no banco de dados, ou se a inten\u00e7\u00e3o do usu\u00e1rio n\u00e3o + estiver clara, voc\u00ea DEVE pedir mais informa\u00e7\u00f5es.\n\nResponda + EXATAMENTE no formato JSON abaixo, sem formata\u00e7\u00e3o markdown (```json):\n{{\n \"decisao\": + \"escolha_uma_opcao\",\n \"pergunta_ao_usuario\": \"escreva a pergunta aqui + se precisar de ajuda, ou deixe vazio se n\u00e3o precisar\"\n}}\n\nOp\u00e7\u00f5es + v\u00e1lidas para ''decis\u00e3o'':\n- \"pronto_codificacao\" \u2192 se temos + schema, a pergunta faz sentido e devemos gerar/regenerar SQL\n- \"revisando_estrategia\" + \u2192 se o cr\u00edtico reprovou e devemos tentar uma abordagem diferente\n- + \"necessita_ajuda\" \u2192 a pergunta n\u00e3o \u00e9 clara, n\u00e3o faz sentido, + falta contexto ou n\u00e3o h\u00e1 dados no schema para responder.\n"}], "role": + "user"}], "safetySettings": [], "generationConfig": {"temperature": 0.7, "candidateCount": + 1}}' + headers: + Accept: + - '*' + Accept-Encoding: + - g + Connection: + - k + Content-Length: + - '3' + Content-Type: + - a + Host: + - g + User-Agent: + - g + x-goog-api-client: + - g + method: POST + uri: https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash:generateContent + response: + body: + string: !!binary | + H4sIAAAAAAAC/7VUXWvbMBR9z6+45HE4dtvNjOZpZUnawMqyNB0b6wiKfONolSVXH6Gh5L/vSnbb + pAuMjQ0ClpSrc+6559gPHYAuGqNNtw8PtKEt1wXS7s3JadIcVGgtK8NZ96v2gPccscACNtob4N4Y + VA7uvHYsgVoiswh8hfy2KaglU8BUAQshpVAlFOiYkDaFkTZQaYMg1FKbijmhFdDPrYSF2FQCK2QF + ON2HlXO17WcZE2mpdSkxLXCdlVgJJXqsFlmhuc0Mc9iTohKO8Gea4JVwRLPXqg9qDkMHzGeMFG7U + K/gUlD2rXoau0RnB+1CiQioXaySRpSfYtjlqyKZcV1lbgXOulSPy+dIgzp1AMzd459E6m0Ak60Oe + UL8FyoAbZZ2keW8pmV3dqEkzV0PEG5oX5Mdp/jo/zY/eHuU27bZOWcect8Go6fDq4/X0/XA+/HJx + dn01Gw4ea9rxU9G3eACt7/HPd25TR6PD8xctzdxNzdMLlHULGO+Rsbe7kPuwLa/lRtTB5EDwAZlR + jf1sob2D86gZzibjJkt2Bz8CeCPDxT8OQncHZvu0/t6utsnfTyEmY0TT9AZ3p7EWWsY0/2YkUedl + zFJg+ydpejm1yDEuAv55e3valk7QXArlHdJiYvQP5C4chQj2RgQ8I9yDcANRobKtvn1N4XMRABo9 + +yl+gRViozl7DEQp9YLJ7l7J9hD7ZyZ9dCf/f85Ow3s2ps/Srq3x5RuQOZvIfvyUrIY78G47285P + y2LTAFQFAAA= + headers: + Alt-Svc: + - h3=":443"; ma=2592000,h3-29=":443"; ma=2592000 + Content-Encoding: + - gzip + Content-Type: + - application/json; charset=UTF-8 + Date: + - Thu, 21 May 2026 23:50:08 GMT + Server: + - scaffolding on HTTPServer2 + Server-Timing: + - gfet4t7; dur=271 + Transfer-Encoding: + - chunked + Vary: + - Origin + - X-Origin + - Referer + X-Content-Type-Options: + - nosniff + X-Frame-Options: + - SAMEORIGIN + X-Gemini-Service-Tier: + - standard + X-XSS-Protection: + - '0' + status: + code: 429 + message: Too Many Requests +- request: + body: '{"contents": [{"parts": [{"text": "Voc\u00ea \u00e9 o planejador de um + sistema que transforma perguntas em consultas SQL.\n\nSeu papel: analisar a + situa\u00e7\u00e3o atual e decidir a pr\u00f3xima a\u00e7\u00e3o.\n\nContexto + atual:\n- Pergunta do usu\u00e1rio: \"Qual o valor medio dos pedidos?\"\n\n- + conversa_previa: Nenhuma\n\n- Schema: === SCHEMA SQLITE (INTROSPECCAO REAL) + ===\n\nTabela: customers\n- customer_id: TEXT (PK)\n- customer_unique_id: TEXT\n- + customer_zip_code_prefix: INTEGER\n- customer_city: TEXT\n- customer_state: + TEXT\n\nTabela: geolocation\n- geolocation_zip_code_prefix: INTEGER\n- geolocation_lat: + REAL\n- geolocation_lng: REAL\n- geolocation_city: TEXT\n- geolocation_state: + TEXT\n\nTabela: order_items\n- order_id: TEXT (PK)\n- order_item_id: INTEGER + (PK)\n- product_id: TEXT\n- seller_id: TEXT\n- shipping_limit_date: TEXT\n- + price: REAL\n- freight_value: REAL\n Foreign keys:\n - seller_id -> sellers.seller_id + (on_update=NO ACTION, on_delete=NO ACTION)\n - product_id -> products.product_id + (on_update=NO ACTION, on_delete=NO ACTION)\n - order_id -> orders.order_id + (on_update=NO ACTION, on_delete=NO ACTION)\n\nTabela: order_payments\n- order_id: + TEXT (PK)\n- payment_sequential: INTEGER (PK)\n- payment_type: TEXT\n- payment_installments: + INTEGER\n- payment_value: REAL\n Foreign keys:\n - order_id -> orders.order_id + (on_update=NO ACTION, on_delete=NO ACTION)\n\nTabela: order_reviews\n- review_id: + TEXT (PK)\n- order_id: TEXT\n- review_score: INTEGER\n- review_comment_title: + TEXT\n- review_comment_message: TEXT\n- review_creation_date: TEXT\n- review_answer_timestamp: + TEXT\n Foreign keys:\n - order_id -> orders.order_id (on_update=NO ACTION, + on_delete=NO ACTION)\n\nTabela: orders\n- order_id: TEXT (PK)\n- customer_id: + TEXT\n- order_status: TEXT\n- order_purchase_timestamp: TEXT\n- order_approved_at: + TEXT\n- order_delivered_carrier_date: TEXT\n- order_delivered_customer_date: + TEXT\n- order_estimated_delivery_date: TEXT\n Foreign keys:\n - customer_id + -> customers.customer_id (on_update=NO ACTION, on_delete=NO ACTION)\n\nTabela: + products\n- product_id: TEXT (PK)\n- product_category_name: TEXT\n- product_name_length: + REAL\n- product_description_length: REAL\n- product_photos_qty: REAL\n- product_weight_g: + REAL\n- product_length_cm: REAL\n- product_height_cm: REAL\n- product_width_cm: + REAL\n\nTabela: sellers\n- seller_id: TEXT (PK)\n- seller_zip_code_prefix: INTEGER\n- + seller_city: TEXT\n- seller_state: TEXT\n\n\n- Feedback do cr\u00edtico: Nenhum\n- + Tentativas realizadas: 0\n- Status atual: schema_obtido\n- Erro anterior: Nenhum\n\nAVALIA\u00c7\u00c3O + CR\u00cdTICA:\nVerifique se a \"Pergunta do usu\u00e1rio\" pode ser respondida + com as tabelas e colunas do Schema.\nSe houver ambiguidade, conceitos n\u00e3o + mapeados no banco de dados, ou se a inten\u00e7\u00e3o do usu\u00e1rio n\u00e3o + estiver clara, voc\u00ea DEVE pedir mais informa\u00e7\u00f5es.\n\nResponda + EXATAMENTE no formato JSON abaixo, sem formata\u00e7\u00e3o markdown (```json):\n{{\n \"decisao\": + \"escolha_uma_opcao\",\n \"pergunta_ao_usuario\": \"escreva a pergunta aqui + se precisar de ajuda, ou deixe vazio se n\u00e3o precisar\"\n}}\n\nOp\u00e7\u00f5es + v\u00e1lidas para ''decis\u00e3o'':\n- \"pronto_codificacao\" \u2192 se temos + schema, a pergunta faz sentido e devemos gerar/regenerar SQL\n- \"revisando_estrategia\" + \u2192 se o cr\u00edtico reprovou e devemos tentar uma abordagem diferente\n- + \"necessita_ajuda\" \u2192 a pergunta n\u00e3o \u00e9 clara, n\u00e3o faz sentido, + falta contexto ou n\u00e3o h\u00e1 dados no schema para responder.\n"}], "role": + "user"}], "safetySettings": [], "generationConfig": {"temperature": 0.7, "candidateCount": + 1}}' + headers: + Accept: + - '*' + Accept-Encoding: + - g + Connection: + - k + Content-Length: + - '3' + Content-Type: + - a + Host: + - g + User-Agent: + - g + x-goog-api-client: + - g + method: POST + uri: https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash:generateContent + response: + body: + string: !!binary | + H4sIAAAAAAAC/2WRXW+CMBSG7/kVpNe6qJlx7nJfiReLbpJl2VjMGT1CM2yxpxg3wn9fC6J1g4TA + +749Hw9VEIYsAckFB4PErsN3q4Rh1Tydp6RBaazRSVYsQJtTtr0q791GDO7dIVbF0n3HjGMiCFRs + xZgV2pZVq0RxsRYJJE7vdckCdVpKAytQq5JK0OJwyt6yZl6f+vj+0TtNp1WOrvVGccy7eN0F2FpI + QdkzAinpYstovmBHV0iOeysPgq5BU5qVBCk+ogHLCY403CKbwkTqC+WtKhtO0+mkreZxPQuMpgff + KAP5mTUcD4a9f5XpzvYVuU/c+xl2TciF+Xa7RPevEfNQmD+DdTACjxkzmSrTzJwPeTkZH2xCvRMJ + RgK1a0HGbgWaO7AN1JbzC2oSLdAUNxZxf3Qx7q9zoKwZiGmkQknCGXeZ+ZtaAC0/b7b48DOZLXY0 + v9o+EQvq4BdB7uIkkwIAAA== + headers: + Alt-Svc: + - h3=":443"; ma=2592000,h3-29=":443"; ma=2592000 + Content-Encoding: + - gzip + Content-Type: + - application/json; charset=UTF-8 + Date: + - Thu, 21 May 2026 23:50:19 GMT + Server: + - scaffolding on HTTPServer2 + Server-Timing: + - gfet4t7; dur=2862 Transfer-Encoding: - chunked Vary: @@ -114,38 +581,40 @@ interactions: REAL\n- product_height_cm: REAL\n- product_width_cm: REAL\n\nTabela: sellers\n- seller_id: TEXT (PK)\n- seller_zip_code_prefix: INTEGER\n- seller_city: TEXT\n- seller_state: TEXT\n\n\n=== PERGUNTA DO USU\u00c1RIO ===\nQual o valor medio - dos pedidos?\n\n\n\nResponda APENAS com a consulta SQL, sem markdown, sem explica\u00e7\u00e3o."}], - "role": "user"}], "safetySettings": [], "generationConfig": {"temperature": - 0.7, "candidateCount": 1}}' + dos pedidos?\n\n=== CONVERSA PR\u00c9VIA (CONTEXTO ADICIONAL) ===\nNenhuma\n\n=== + HIST\u00d3RICO DE TENTATIVAS ANTERIORES ===\nNenhuma tentativa anterior.\n\nResponda + APENAS com a consulta SQL, sem markdown, sem explica\u00e7\u00e3o."}], "role": + "user"}], "safetySettings": [], "generationConfig": {"temperature": 0.7, "candidateCount": + 1}}' headers: Accept: - - '*/*' + - '*' Accept-Encoding: - - gzip, deflate, zstd + - g Connection: - - keep-alive + - k Content-Length: - - '3052' + - '3' Content-Type: - - application/json + - a Host: - - generativelanguage.googleapis.com + - g User-Agent: - - google-genai-sdk/1.68.0 gl-python/3.10.12 + - g x-goog-api-client: - - google-genai-sdk/1.68.0 gl-python/3.10.12 + - g method: POST uri: https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash:generateContent response: body: string: !!binary | - H4sIAAAAAAAC/2WRXU/CMBSG7/crml5BIoYPUaNXCEhIJCNsEI0xpNDDVt3aZe0MSPbf7T4KRXex - NOd9z9dzjg5CeEs4ZZQokPgBvesIQsfyX2iCK+BKCyakgwlJ1dlbfUfrrS0K9kUS9sYv46GPBqtJ - QwlForVIKaTrbxJl0ETPC3eGGrXHW84aCTnEup3RBx76l1UlVYHaLtFk4S7n6OmtjjPafMTWQPnp - /XF1XiMVERQzxoJCZOy5MeAd40yGCyBS8HIV353jk8o4hb0Otx3ToCyNM0kCmIEiGig5YcNJKuJE - +eIL+FBkJdD7/k1VzTrAhaFn9JLBhdRpd41oVZYj3ZdF9mmsq+k1ScTUodjFH7/62EKh/gxmYDgW - M6xCkQWhuhyy07t1amoVyBWkklXEAog1w1b3ut/aRUSGZUecgkwElzClhccVvku2Gz7dfiY/d9N5 - uPFGbBBgJ3d+AdJxKmKdAgAA + H4sIAAAAAAAC/2WRX0/CMBTF3/cpmj5BIgYURH1DnIARmTCIxhjS0MvWuLVL2xGQ8N3t/kHRPSzN + Oef23vvr3kEIrwinjBINCt+jT6MgtM//mSe4Bq6NUUlGTIjUp2zx7a2ziWjYZkV45r64fR/1FoOa + FppESyEpyOWGRCnU0dN0Mka1MjObj2sJ2cWmXeX3ZuhfVVFUCGVcocF0MvfQw0epM1rH1jyH4/nr + 4rSFFBFkI8aCQlTFD1UArxlnKpwCUYLnm/gTDx9dxilsjdx0qgb51ThVJIAxaGJ4kiM1nEgRJ9oX + 38D7Is153t51itss/meB63bp5wjOrFazWxVbN6tH05dF9stYj2bWJBHTu2wX3333sYVC/xmsguFY + zLAORRqE+nzIVvumtBXIDVuBz0BmLZQ2WxFJM7A51ILzAqRiBdAAYoO4cXXZaawjosJ8ICxBJYIr + GNEs4z0Lj6ihdNdD8dMdeZ3XTRy8jbFzcH4BOzSJsrsCAAA= headers: Alt-Svc: - h3=":443"; ma=2592000,h3-29=":443"; ma=2592000 @@ -154,11 +623,11 @@ interactions: Content-Type: - application/json; charset=UTF-8 Date: - - Thu, 02 Apr 2026 14:59:07 GMT + - Thu, 21 May 2026 23:50:21 GMT Server: - scaffolding on HTTPServer2 Server-Timing: - - gfet4t7; dur=1501 + - gfet4t7; dur=1766 Transfer-Encoding: - chunked Vary: @@ -180,46 +649,127 @@ interactions: body: '{"contents": [{"parts": [{"text": "Voc\u00ea \u00e9 um revisor de qualidade para consultas SQL geradas por IA.\n\nSua tarefa: avaliar se a consulta SQL e seus resultados respondem adequadamente\n\u00e0 pergunta original do usu\u00e1rio.\n\n=== - PERGUNTA DO USU\u00c1RIO ===\nQual o valor medio dos pedidos?\n\n=== SQL GERADA - ===\nSELECT AVG(total_order_value) FROM (SELECT SUM(payment_value) AS total_order_value - FROM order_payments GROUP BY order_id);\n\n=== RESULTADO DA EXECU\u00c7\u00c3O - ===\nStatus: exec_ok\nTotal de linhas: 1\nAmostra dos resultados (primeiras - linhas):\n[{''AVG(total_order_value)'': 160.99026669347316}]\n\n=== ERROS (se - houver) ===\nNenhum\n\nAvalie:\n1. A SQL responde \u00e0 pergunta do usu\u00e1rio?\n2. - Os resultados fazem sentido?\n3. H\u00e1 algum erro l\u00f3gico ou de interpreta\u00e7\u00e3o?\n\nResponda + PERGUNTA DO USU\u00c1RIO ===\nQual o valor medio dos pedidos?\n\n=== SCHEMA + DO BANCO ===\n=== SCHEMA SQLITE (INTROSPECCAO REAL) ===\n\nTabela: customers\n- + customer_id: TEXT (PK)\n- customer_unique_id: TEXT\n- customer_zip_code_prefix: + INTEGER\n- customer_city: TEXT\n- customer_state: TEXT\n\nTabela: geolocation\n- + geolocation_zip_code_prefix: INTEGER\n- geolocation_lat: REAL\n- geolocation_lng: + REAL\n- geolocation_city: TEXT\n- geolocation_state: TEXT\n\nTabela: order_items\n- + order_id: TEXT (PK)\n- order_item_id: INTEGER (PK)\n- product_id: TEXT\n- seller_id: + TEXT\n- shipping_limit_date: TEXT\n- price: REAL\n- freight_value: REAL\n Foreign + keys:\n - seller_id -> sellers.seller_id (on_update=NO ACTION, on_delete=NO + ACTION)\n - product_id -> products.product_id (on_update=NO ACTION, on_delete=NO + ACTION)\n - order_id -> orders.order_id (on_update=NO ACTION, on_delete=NO + ACTION)\n\nTabela: order_payments\n- order_id: TEXT (PK)\n- payment_sequential: + INTEGER (PK)\n- payment_type: TEXT\n- payment_installments: INTEGER\n- payment_value: + REAL\n Foreign keys:\n - order_id -> orders.order_id (on_update=NO ACTION, + on_delete=NO ACTION)\n\nTabela: order_reviews\n- review_id: TEXT (PK)\n- order_id: + TEXT\n- review_score: INTEGER\n- review_comment_title: TEXT\n- review_comment_message: + TEXT\n- review_creation_date: TEXT\n- review_answer_timestamp: TEXT\n Foreign + keys:\n - order_id -> orders.order_id (on_update=NO ACTION, on_delete=NO ACTION)\n\nTabela: + orders\n- order_id: TEXT (PK)\n- customer_id: TEXT\n- order_status: TEXT\n- + order_purchase_timestamp: TEXT\n- order_approved_at: TEXT\n- order_delivered_carrier_date: + TEXT\n- order_delivered_customer_date: TEXT\n- order_estimated_delivery_date: + TEXT\n Foreign keys:\n - customer_id -> customers.customer_id (on_update=NO + ACTION, on_delete=NO ACTION)\n\nTabela: products\n- product_id: TEXT (PK)\n- + product_category_name: TEXT\n- product_name_length: REAL\n- product_description_length: + REAL\n- product_photos_qty: REAL\n- product_weight_g: REAL\n- product_length_cm: + REAL\n- product_height_cm: REAL\n- product_width_cm: REAL\n\nTabela: sellers\n- + seller_id: TEXT (PK)\n- seller_zip_code_prefix: INTEGER\n- seller_city: TEXT\n- + seller_state: TEXT\n\n\n=== CONVERSA COM O AGENTE (se houver) ===\nNenhuma\n\n=== + SQL GERADA ===\nSELECT AVG(total_order_value) FROM (SELECT SUM(payment_value) + AS total_order_value FROM order_payments GROUP BY order_id)\n\n=== RESULTADO + DA EXECU\u00c7\u00c3O ===\nStatus: exec_ok\nTotal de linhas: 1\nAmostra dos + resultados (primeiras linhas):\n[{''AVG(total_order_value)'': 160.9902666934835}]\n\n=== + ERROS (se houver) ===\nNenhum\n\n=== TENTATIVAS ANTERIORES ===\nNenhuma tentativa + anterior (esta \u00e9 a primeira).\n\n=== EXEMPLOS DE AVALIA\u00c7\u00c3O ===\n\n-- + EXEMPLO 1: REPROVADO (escopo incompleto) --\nPergunta: \"Which airport has the + least number of flights?\"\nSQL: SELECT SourceAirport FROM flights GROUP BY + SourceAirport ORDER BY COUNT(*) ASC LIMIT 1\nResultado: [(''AID'',)]\nVEREDITO: + REPROVADO\nRaz\u00e3o: A query conta apenas voos com partida (SourceAirport) + e ignora voos com chegada (DestAirport).\nO escopo da pergunta \u00e9 \"flights\" + em geral \u2014 a query responde a uma pergunta diferente.\n\n-- EXEMPLO 2: + REPROVADO (erro sem\u00e2ntico: MIN vs MAX) --\nPergunta: \"Which Asian countries + have a population larger than any country in Africa?\"\nSQL: SELECT Name FROM + country WHERE Continent=''Asia'' AND Population > (SELECT MAX(Population) FROM + country WHERE Continent=''Africa'')\nResultado: [] (vazio)\nVEREDITO: REPROVADO\nRaz\u00e3o: + \"Larger than any country in Africa\" significa maior que pelo menos um pa\u00eds + africano (MIN),\nn\u00e3o maior que todos os pa\u00edses africanos (MAX). A + l\u00f3gica est\u00e1 semanticamente errada.\n\n-- EXEMPLO 3: REPROVADO (resultado + vazio suspeito) --\nPergunta: \"Find the last name of students who live in North + Carolina and are not enrolled in any degree.\"\nSQL: SELECT last_name FROM Students + WHERE state_province_county = ''North Carolina'' AND ...\nResultado: [] (vazio)\nVEREDITO: + REPROVADO\nRaz\u00e3o: Resultado vazio quando a pergunta espera dados reais + \u00e9 suspeito. Verifique se o filtro\nde string corresponde exatamente ao + valor no banco (ex: ''NorthCarolina'' vs ''North Carolina'').\n\n-- EXEMPLO + 4: REPROVADO (JOIN incorreto muda o que est\u00e1 sendo contado) --\nPergunta: + \"Find the name of makers that produced some cars in 1970.\"\nSQL: SELECT DISTINCT + Maker FROM car_makers JOIN car_names ON car_makers.Id = car_names.MakeId JOIN + cars_data ON car_names.MakeId = cars_data.Id WHERE cars_data.Year = 1970\nResultado: + [(''chevrolet'',), (''buick'',)]\nVEREDITO: REPROVADO\nRaz\u00e3o: O JOIN usa + car_names.MakeId para conectar a cars_data, mas cars_data.Id refere-se\nao ID + do carro, n\u00e3o do fabricante. O caminho correto seria via model_list. Os + resultados\nparecem plaus\u00edveis mas derivam de uma jun\u00e7\u00e3o incorreta.\n\n-- + EXEMPLO 5: APROVADO (formato diferente, resposta correta) --\nPergunta: \"On + average, when were the transcripts printed?\"\nSQL: SELECT AVG(transcript_date) + AS average_transcript_date FROM Transcripts\nResultado: [(''1989.9333333333334'',)]\nVEREDITO: + APROVADO\nRaz\u00e3o: O resultado \u00e9 um n\u00famero que representa a m\u00e9dia + das datas (formato num\u00e9rico do SQLite).\nEmbora n\u00e3o seja uma data + formatada, responde corretamente \u00e0 pergunta. Diferen\u00e7a de\nrepresenta\u00e7\u00e3o + n\u00e3o \u00e9 motivo de reprova\u00e7\u00e3o.\n\n-- EXEMPLO 6: APROVADO (query + mais simples que o gold, resultado equivalente) --\nPergunta: \"Which model + of car has the minimum horsepower?\"\nSQL: SELECT Model FROM car_names JOIN + cars_data ON car_names.MakeId = cars_data.Id WHERE Horsepower = (SELECT MIN(Horsepower) + FROM cars_data) LIMIT 1\nResultado: [(''triumph'',)]\nVEREDITO: APROVADO\nRaz\u00e3o: + A query retorna corretamente o modelo com menor pot\u00eancia. O LIMIT 1 garante + unicidade\ne o resultado \u00e9 semanticamente correto. Aprovar.\n\n=== CRIT\u00c9RIOS + DE AVALIA\u00c7\u00c3O ===\n\nREPROVE quando houver:\n- Escopo incompleto: query + cobre apenas parte do que a pergunta pede\n- Erro sem\u00e2ntico: l\u00f3gica + correta na forma mas errada no significado (MIN vs MAX, ANY vs ALL)\n- JOIN + incorreto que altera os dados sendo agregados ou filtrados\n- Resultado vazio + quando a pergunta claramente espera dados\n- Filtro com valor literal diferente + do que est\u00e1 no banco\n- M\u00e9trica errada (SUM vs AVG, COUNT vs COUNT + DISTINCT, etc.)\n- Erro de execu\u00e7\u00e3o SQL\n\nAPROVE quando:\n- O resultado + responde \u00e0 pergunta, mesmo com formato ou representa\u00e7\u00e3o diferente\n- + H\u00e1 colunas extras que n\u00e3o prejudicam a resposta\n- A precis\u00e3o + num\u00e9rica difere mas o valor est\u00e1 correto\n- A query \u00e9 mais simples + que o esperado mas semanticamente equivalente\n\nAvalie com rigor sem\u00e2ntico. + Resultados que parecem plaus\u00edveis mas derivam de l\u00f3gica\nincorreta + devem ser reprovados. N\u00e3o presuma que uma query bem-formada est\u00e1 correta.\n\nResponda no formato:\nVEREDITO: APROVADO ou REPROVADO\nFEEDBACK: "}], "role": "user"}], "safetySettings": [], "generationConfig": {"temperature": 0.7, "candidateCount": 1}}' headers: Accept: - - '*/*' + - '*' Accept-Encoding: - - gzip, deflate, zstd + - g Connection: - - keep-alive + - k Content-Length: - - '1039' + - '7' Content-Type: - - application/json + - a Host: - - generativelanguage.googleapis.com + - g User-Agent: - - google-genai-sdk/1.68.0 gl-python/3.10.12 + - g x-goog-api-client: - - google-genai-sdk/1.68.0 gl-python/3.10.12 + - g method: POST uri: https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash:generateContent response: body: string: !!binary | - H4sIAAAAAAAC/2VSTW/TQBC9+1eM9uxEUFKl9BYSgyJAdlIrQgIOi3fiLKx3zX5UpVH+SznyO/LH - GNtx6oAP1mrmzZs3b2YfAbCCayEF9+jYLXymCMC+/Tc5oz1qT4k+RMGaW/+M7b794E0Qjw9NEdsk - 62SxzNNbmGXrdDNbpF/02yRZvJnN31MMiN8F5TncrT6ARVcbLRC4wJ+BC15Ra4TjE9Roy6AJJgwE - F46/rTQxFFwVQZF6QzzWoj8VGLjnyljwxnMFxFcQF3HQlAYwBqzAYRlo5hg4VMc/QhIzOoeuq0Q3 - htQ1ehptwjjY8se2SnvZdqtIR3Wu1YEeVhZ8zAY2HM7vr/GzedYobJypjEDVww89gG2llm63Ru6M - bmB3eZqxc1aSOw8UfhH1DVpqFhwv8SMZQGvk52Wx2pqq9rn5gXpuQrvGq8lNxzZY+wXg+uUp37p3 - kZpOX8X/EbsFtZVqeA+DU6EpuZL+VzNKnnzK2cAJ/4+u3otoYBnzOxPKnb/UOJlOopNpnY8btE52 - hpVYkYWjq/H1aKu427UdWXdaDpeiwWR0kVyMVu/w2/fH6TK7f53Nw+qGRYfoLw2rwCASAwAA + H4sIAAAAAAAC/11S0W7aUAx95yus+7RJFHVdt9K+sUIntFUwGqFJ21QM18DVkji9vqnaIv5lr/uO + /tichEBYFCVX9rk+x8fetADMAlPrLAYScwU/NAKwKb9FjtNAadBEHdJghj4csNWzaZwVEuipuGSm + g8mgP4xGV9AbT0bTXn/0M70ZDPqfetdfNAZaX/I4INx9+woLjBd5jBr0ngImSkzA8IgxewgcMAZL + irIIGalkhjfCiapnmGX4XODvFZzTDDK9MWNvyd87O3sL1AZKQGiVa6ftPRNC8vrXOtS6IiQVlf4L + MicdGIowaCDjVJn1XbJPlN3TwgnC6x8V4ld5qg0Iz/1BbVmWwbLspErHNBza7s+/2gdfPcdUmJaw + pbiGb2uAWbrUyXpCKJwWsLtoNDb7rFOFTxo+bdUEZWmTC67oVu3UCeN+jibznGQh4t+UXnNeTvjd + 5eVFVa6xEkeIj+e7fDmMo9TZ5XmdbJSWvhK7uLksjT3SPjF24bloJhp8j0zDi/C/stqOVsM1E9ac + r9bhWGW3+36XFvKPbkGRI19wSNC20NvC2tLWyukpeXGVpStK1OSTs86Hk2WMsi4VmWoBhIa2wIwz + HmM2Dze3n+nlYjg+nT+kD72uaW1b/wDfu+eoTwMAAA== headers: Alt-Svc: - h3=":443"; ma=2592000,h3-29=":443"; ma=2592000 @@ -228,11 +778,11 @@ interactions: Content-Type: - application/json; charset=UTF-8 Date: - - Thu, 02 Apr 2026 14:59:11 GMT + - Thu, 21 May 2026 23:50:27 GMT Server: - scaffolding on HTTPServer2 Server-Timing: - - gfet4t7; dur=2871 + - gfet4t7; dur=4936 Transfer-Encoding: - chunked Vary: diff --git a/tests/cassettes/test_integracao/test_estado_final_completo__openai-gpt-4o-mini.yaml b/tests/cassettes/test_integracao/test_estado_final_completo__openai-gpt-4o-mini.yaml new file mode 100644 index 0000000..a374ca6 --- /dev/null +++ b/tests/cassettes/test_integracao/test_estado_final_completo__openai-gpt-4o-mini.yaml @@ -0,0 +1,495 @@ +interactions: +- request: + body: "{\"messages\":[{\"content\":\"Voc\xEA \xE9 o planejador de um sistema que + transforma perguntas em consultas SQL.\\n\\nSeu papel: analisar a situa\xE7\xE3o + atual e decidir a pr\xF3xima a\xE7\xE3o.\\n\\nContexto atual:\\n- Pergunta do + usu\xE1rio: \\\"Considerando o valor total cobrado por pedido, qual \xE9 a m\xE9dia + de valor dos pedidos?\\\"\\n\\n- conversa_previa: Nenhuma\\n\\n- Schema: === + SCHEMA SQLITE (INTROSPECCAO REAL) ===\\n\\nTabela: customers\\n- customer_id: + TEXT (PK)\\n- customer_unique_id: TEXT\\n- customer_zip_code_prefix: INTEGER\\n- + customer_city: TEXT\\n- customer_state: TEXT\\n\\nTabela: geolocation\\n- geolocation_zip_code_prefix: + INTEGER\\n- geolocation_lat: REAL\\n- geolocation_lng: REAL\\n- geolocation_city: + TEXT\\n- geolocation_state: TEXT\\n\\nTabela: order_items\\n- order_id: TEXT + (PK)\\n- order_item_id: INTEGER (PK)\\n- product_id: TEXT\\n- seller_id: TEXT\\n- + shipping_limit_date: TEXT\\n- price: REAL\\n- freight_value: REAL\\n Foreign + keys:\\n - seller_id -> sellers.seller_id (on_update=NO ACTION, on_delete=NO + ACTION)\\n - product_id -> products.product_id (on_update=NO ACTION, on_delete=NO + ACTION)\\n - order_id -> orders.order_id (on_update=NO ACTION, on_delete=NO + ACTION)\\n\\nTabela: order_payments\\n- order_id: TEXT (PK)\\n- payment_sequential: + INTEGER (PK)\\n- payment_type: TEXT\\n- payment_installments: INTEGER\\n- payment_value: + REAL\\n Foreign keys:\\n - order_id -> orders.order_id (on_update=NO ACTION, + on_delete=NO ACTION)\\n\\nTabela: order_reviews\\n- review_id: TEXT (PK)\\n- + order_id: TEXT\\n- review_score: INTEGER\\n- review_comment_title: TEXT\\n- + review_comment_message: TEXT\\n- review_creation_date: TEXT\\n- review_answer_timestamp: + TEXT\\n Foreign keys:\\n - order_id -> orders.order_id (on_update=NO ACTION, + on_delete=NO ACTION)\\n\\nTabela: orders\\n- order_id: TEXT (PK)\\n- customer_id: + TEXT\\n- order_status: TEXT\\n- order_purchase_timestamp: TEXT\\n- order_approved_at: + TEXT\\n- order_delivered_carrier_date: TEXT\\n- order_delivered_customer_date: + TEXT\\n- order_estimated_delivery_date: TEXT\\n Foreign keys:\\n - customer_id + -> customers.customer_id (on_update=NO ACTION, on_delete=NO ACTION)\\n\\nTabela: + products\\n- product_id: TEXT (PK)\\n- product_category_name: TEXT\\n- product_name_length: + REAL\\n- product_description_length: REAL\\n- product_photos_qty: REAL\\n- product_weight_g: + REAL\\n- product_length_cm: REAL\\n- product_height_cm: REAL\\n- product_width_cm: + REAL\\n\\nTabela: sellers\\n- seller_id: TEXT (PK)\\n- seller_zip_code_prefix: + INTEGER\\n- seller_city: TEXT\\n- seller_state: TEXT\\n\\n\\n- Feedback do cr\xEDtico: + Nenhum\\n- Tentativas realizadas: 0\\n- Status atual: schema_obtido\\n- Erro + anterior: Nenhum\\n\\nAVALIA\xC7\xC3O CR\xCDTICA:\\nVerifique se a \\\"Pergunta + do usu\xE1rio\\\" pode ser respondida com as tabelas e colunas do Schema.\\nSe + houver ambiguidade, conceitos n\xE3o mapeados no banco de dados, ou se a inten\xE7\xE3o + do usu\xE1rio n\xE3o estiver clara, voc\xEA DEVE pedir mais informa\xE7\xF5es.\\n\\nResponda + EXATAMENTE no formato JSON abaixo, sem formata\xE7\xE3o markdown (```json):\\n{{\\n + \ \\\"decisao\\\": \\\"escolha_uma_opcao\\\",\\n \\\"pergunta_ao_usuario\\\": + \\\"escreva a pergunta aqui se precisar de ajuda, ou deixe vazio se n\xE3o precisar\\\"\\n}}\\n\\nOp\xE7\xF5es + v\xE1lidas para 'decis\xE3o':\\n- \\\"pronto_codificacao\\\" \u2192 se temos + schema, a pergunta faz sentido e devemos gerar/regenerar SQL\\n- \\\"revisando_estrategia\\\" + \u2192 se o cr\xEDtico reprovou e devemos tentar uma abordagem diferente\\n- + \\\"necessita_ajuda\\\" \u2192 a pergunta n\xE3o \xE9 clara, n\xE3o faz sentido, + falta contexto ou n\xE3o h\xE1 dados no schema para responder.\\n\",\"role\":\"user\"}],\"model\":\"gpt-4o-mini\",\"stream\":false}" + headers: + Accept: + - a + Accept-Encoding: + - g + Connection: + - k + Content-Length: + - '3' + Content-Type: + - a + Cookie: + - _ + Host: + - a + User-Agent: + - O + X-Stainless-Arch: + - x + X-Stainless-Async: + - f + X-Stainless-Lang: + - p + X-Stainless-OS: + - L + X-Stainless-Package-Version: + - '2' + X-Stainless-Raw-Response: + - t + X-Stainless-Runtime: + - C + X-Stainless-Runtime-Version: + - '3' + x-stainless-retry-count: + - '0' + method: POST + uri: https://api.openai.com/v1/chat/completions + response: + body: + string: !!binary | + H4sIAAAAAAAA/6rmUlBQykxRslJQSs5ILEnOLcjRdck0z3GzrAQAAAD//41SwW7bMAy95ysKne3B + SQzbPW7D0MN23WkpBEWiHbW2qIlysaHIv4+yk9rtOmAwIMDv8VF8T0RXff12evyCTz8fPu6rT8fh + 7u7zd5ElBR4fQMer6oNG1kG06GZaB1ARUtdtXd+WRVPt64kY0ECfZJ2PeYn5YJ3Nd8WuzIs63zYX + 9QmtBuKyH/x7c/M8nWlOZ+AXw0V2RQYgUh0wdi1iMGCfEKGILEXlosgWUqOL4KbRnw8uQQdhQFtS + eGDwIHzgCpQajW2tVjrh2bXSQ+hGF5VUKEcaVbAXFX/uvL4nQDuSSl7d2PcrQjmHUaWsJof3F+b8 + 4qnHjmc40hupaDkrOkmOljhnnp8iejGxZz7vp+zGV3EkM4OPMuIjTNc1dTP3E8uTLexufyEjT9gv + +G2xzd7pJw1EZXtapS84sBOYRbo8lRqNxRWxWbn+e5r3es/Orev+p/1CaA2el1H6AMbq146XsgBp + o/9V9pLyNLAgCE+8ojJaCOklDLRq7Oc9E/SbIgySn6uD4IOdl631smrKUlUt1I3YnDd/AAdji0N6 + AwAA + headers: + Access-Control-Expose-Headers: + - CF-Ray + CF-Cache-Status: + - DYNAMIC + CF-Ray: + - + Connection: + - keep-alive + Content-Encoding: + - gzip + Content-Type: + - application/json + Date: + - Fri, 22 May 2026 00:10:37 GMT + Server: + - cloudflare + Strict-Transport-Security: + - max-age=31536000; includeSubDomains; preload + Transfer-Encoding: + - chunked + X-Content-Type-Options: + - nosniff + access-control-expose-headers: + - X-Request-ID + - CF-Ray + alt-svc: + - h3=":443"; ma=86400 + openai-organization: + - + openai-processing-ms: + - '453' + openai-project: + - + openai-version: + - '2020-10-01' + x-openai-proxy-wasm: + - v0.1 + x-ratelimit-limit-requests: + - '10000' + x-ratelimit-limit-tokens: + - '200000' + x-ratelimit-remaining-requests: + - + x-ratelimit-remaining-tokens: + - + x-ratelimit-reset-requests: + - 49.93s + x-ratelimit-reset-tokens: + - 251ms + x-request-id: + - + status: + code: 200 + message: OK +- request: + body: "{\"messages\":[{\"content\":\"Voc\xEA \xE9 um especialista em SQL para + bancos SQLite.\\n\\nSua tarefa: gerar UMA \xFAnica consulta SQL SELECT que responda + \xE0 pergunta do usu\xE1rio,\\nusando o schema do banco de dados fornecido abaixo.\\n\\nRegras:\\n- + Gere APENAS uma consulta SELECT (ou WITH/CTE seguido de SELECT).\\n- N\xC3O + use INSERT, UPDATE, DELETE, DROP, ALTER ou qualquer comando de escrita.\\n- + N\xC3O inclua explica\xE7\xF5es, apenas a SQL pura.\\n- Use nomes de tabelas + e colunas EXATAMENTE como aparecem no schema.\\n- Se a pergunta for amb\xEDgua, + fa\xE7a a interpreta\xE7\xE3o mais razo\xE1vel.\\n\\n=== SCHEMA DO BANCO ===\\n=== + SCHEMA SQLITE (INTROSPECCAO REAL) ===\\n\\nTabela: customers\\n- customer_id: + TEXT (PK)\\n- customer_unique_id: TEXT\\n- customer_zip_code_prefix: INTEGER\\n- + customer_city: TEXT\\n- customer_state: TEXT\\n\\nTabela: geolocation\\n- geolocation_zip_code_prefix: + INTEGER\\n- geolocation_lat: REAL\\n- geolocation_lng: REAL\\n- geolocation_city: + TEXT\\n- geolocation_state: TEXT\\n\\nTabela: order_items\\n- order_id: TEXT + (PK)\\n- order_item_id: INTEGER (PK)\\n- product_id: TEXT\\n- seller_id: TEXT\\n- + shipping_limit_date: TEXT\\n- price: REAL\\n- freight_value: REAL\\n Foreign + keys:\\n - seller_id -> sellers.seller_id (on_update=NO ACTION, on_delete=NO + ACTION)\\n - product_id -> products.product_id (on_update=NO ACTION, on_delete=NO + ACTION)\\n - order_id -> orders.order_id (on_update=NO ACTION, on_delete=NO + ACTION)\\n\\nTabela: order_payments\\n- order_id: TEXT (PK)\\n- payment_sequential: + INTEGER (PK)\\n- payment_type: TEXT\\n- payment_installments: INTEGER\\n- payment_value: + REAL\\n Foreign keys:\\n - order_id -> orders.order_id (on_update=NO ACTION, + on_delete=NO ACTION)\\n\\nTabela: order_reviews\\n- review_id: TEXT (PK)\\n- + order_id: TEXT\\n- review_score: INTEGER\\n- review_comment_title: TEXT\\n- + review_comment_message: TEXT\\n- review_creation_date: TEXT\\n- review_answer_timestamp: + TEXT\\n Foreign keys:\\n - order_id -> orders.order_id (on_update=NO ACTION, + on_delete=NO ACTION)\\n\\nTabela: orders\\n- order_id: TEXT (PK)\\n- customer_id: + TEXT\\n- order_status: TEXT\\n- order_purchase_timestamp: TEXT\\n- order_approved_at: + TEXT\\n- order_delivered_carrier_date: TEXT\\n- order_delivered_customer_date: + TEXT\\n- order_estimated_delivery_date: TEXT\\n Foreign keys:\\n - customer_id + -> customers.customer_id (on_update=NO ACTION, on_delete=NO ACTION)\\n\\nTabela: + products\\n- product_id: TEXT (PK)\\n- product_category_name: TEXT\\n- product_name_length: + REAL\\n- product_description_length: REAL\\n- product_photos_qty: REAL\\n- product_weight_g: + REAL\\n- product_length_cm: REAL\\n- product_height_cm: REAL\\n- product_width_cm: + REAL\\n\\nTabela: sellers\\n- seller_id: TEXT (PK)\\n- seller_zip_code_prefix: + INTEGER\\n- seller_city: TEXT\\n- seller_state: TEXT\\n\\n\\n=== PERGUNTA DO + USU\xC1RIO ===\\nConsiderando o valor total cobrado por pedido, qual \xE9 a + m\xE9dia de valor dos pedidos?\\n\\n=== CONVERSA PR\xC9VIA (CONTEXTO ADICIONAL) + ===\\nNenhuma\\n\\n=== HIST\xD3RICO DE TENTATIVAS ANTERIORES ===\\nNenhuma tentativa + anterior.\\n\\nResponda APENAS com a consulta SQL, sem markdown, sem explica\xE7\xE3o.\",\"role\":\"user\"}],\"model\":\"gpt-4o-mini\",\"stream\":false}" + headers: + Accept: + - a + Accept-Encoding: + - g + Connection: + - k + Content-Length: + - '3' + Content-Type: + - a + Cookie: + - _ + Host: + - a + User-Agent: + - O + X-Stainless-Arch: + - x + X-Stainless-Async: + - f + X-Stainless-Lang: + - p + X-Stainless-OS: + - L + X-Stainless-Package-Version: + - '2' + X-Stainless-Raw-Response: + - t + X-Stainless-Runtime: + - C + X-Stainless-Runtime-Version: + - '3' + x-stainless-retry-count: + - '0' + method: POST + uri: https://api.openai.com/v1/chat/completions + response: + body: + string: !!binary | + H4sIAAAAAAAA/6rmUlBQykxRslJQSs5ILEnOLcjRdck0z3H3qQQAAAD//41SwW7UMBC971dEPoG0 + qbLdlARxWpUFDiDULu2lVJZjz2bdJrblcSoqtP/OONltUihSL1b8nt/LzJvx9+LC5OsrvPhSf7y8 + 24WzVVVt2DwqbHUHMhxVJ9KSDoK2ZqClBxEgui6K4n2ele+WZU+0VkETZbULaW7TVhudnmaneZoV + 6aI8qHdWS0B6dkPXJPndn7FOo+AXwdn8iLSAKGog7PiIQG+biDCBqDEIE9h8JKU1AUxf+mb9dX3+ + I1ldf37jxGNLKH8QTQdvk9UmaUFpEe/Wc0ffyuJP8+ny+7fEegWEDQr8MDX3sO1QxAZN1zQTQhhj + g4gB9W3dHpj9UyONrZ23Ff4lZVsKCHec8kQKl4rGYB3r2T2dt31g3bMMGBm1LvBg76H/XVEuBz82 + zmlkF+WBDFRhM+Jltpi/4McVBKEbnETOpJA7UKN0nI/olLYTYjbp+t9qXvIeOtemfo39SEgJjjaQ + O0+zk887Hp95iGv8v2dPKfcFMwT/QHvJgwYfJ6FgK7pmWC6Gjxig5TSuGrzzetiwreOlyCWoanlW + sNl+9gcEcc2DbwMAAA== + headers: + Access-Control-Expose-Headers: + - CF-Ray + CF-Cache-Status: + - DYNAMIC + CF-Ray: + - + Connection: + - keep-alive + Content-Encoding: + - gzip + Content-Type: + - application/json + Date: + - Fri, 22 May 2026 00:10:38 GMT + Server: + - cloudflare + Strict-Transport-Security: + - max-age=31536000; includeSubDomains; preload + Transfer-Encoding: + - chunked + X-Content-Type-Options: + - nosniff + access-control-expose-headers: + - X-Request-ID + - CF-Ray + alt-svc: + - h3=":443"; ma=86400 + openai-organization: + - + openai-processing-ms: + - '556' + openai-project: + - + openai-version: + - '2020-10-01' + x-openai-proxy-wasm: + - v0.1 + x-ratelimit-limit-requests: + - '10000' + x-ratelimit-limit-tokens: + - '200000' + x-ratelimit-remaining-requests: + - + x-ratelimit-remaining-tokens: + - + x-ratelimit-reset-requests: + - 49.102s + x-ratelimit-reset-tokens: + - 220ms + x-request-id: + - + status: + code: 200 + message: OK +- request: + body: "{\"messages\":[{\"content\":\"Voc\xEA \xE9 um revisor de qualidade para + consultas SQL geradas por IA.\\n\\nSua tarefa: avaliar se a consulta SQL e seus + resultados respondem adequadamente\\n\xE0 pergunta original do usu\xE1rio.\\n\\n=== + PERGUNTA DO USU\xC1RIO ===\\nConsiderando o valor total cobrado por pedido, + qual \xE9 a m\xE9dia de valor dos pedidos?\\n\\n=== SCHEMA DO BANCO ===\\n=== + SCHEMA SQLITE (INTROSPECCAO REAL) ===\\n\\nTabela: customers\\n- customer_id: + TEXT (PK)\\n- customer_unique_id: TEXT\\n- customer_zip_code_prefix: INTEGER\\n- + customer_city: TEXT\\n- customer_state: TEXT\\n\\nTabela: geolocation\\n- geolocation_zip_code_prefix: + INTEGER\\n- geolocation_lat: REAL\\n- geolocation_lng: REAL\\n- geolocation_city: + TEXT\\n- geolocation_state: TEXT\\n\\nTabela: order_items\\n- order_id: TEXT + (PK)\\n- order_item_id: INTEGER (PK)\\n- product_id: TEXT\\n- seller_id: TEXT\\n- + shipping_limit_date: TEXT\\n- price: REAL\\n- freight_value: REAL\\n Foreign + keys:\\n - seller_id -> sellers.seller_id (on_update=NO ACTION, on_delete=NO + ACTION)\\n - product_id -> products.product_id (on_update=NO ACTION, on_delete=NO + ACTION)\\n - order_id -> orders.order_id (on_update=NO ACTION, on_delete=NO + ACTION)\\n\\nTabela: order_payments\\n- order_id: TEXT (PK)\\n- payment_sequential: + INTEGER (PK)\\n- payment_type: TEXT\\n- payment_installments: INTEGER\\n- payment_value: + REAL\\n Foreign keys:\\n - order_id -> orders.order_id (on_update=NO ACTION, + on_delete=NO ACTION)\\n\\nTabela: order_reviews\\n- review_id: TEXT (PK)\\n- + order_id: TEXT\\n- review_score: INTEGER\\n- review_comment_title: TEXT\\n- + review_comment_message: TEXT\\n- review_creation_date: TEXT\\n- review_answer_timestamp: + TEXT\\n Foreign keys:\\n - order_id -> orders.order_id (on_update=NO ACTION, + on_delete=NO ACTION)\\n\\nTabela: orders\\n- order_id: TEXT (PK)\\n- customer_id: + TEXT\\n- order_status: TEXT\\n- order_purchase_timestamp: TEXT\\n- order_approved_at: + TEXT\\n- order_delivered_carrier_date: TEXT\\n- order_delivered_customer_date: + TEXT\\n- order_estimated_delivery_date: TEXT\\n Foreign keys:\\n - customer_id + -> customers.customer_id (on_update=NO ACTION, on_delete=NO ACTION)\\n\\nTabela: + products\\n- product_id: TEXT (PK)\\n- product_category_name: TEXT\\n- product_name_length: + REAL\\n- product_description_length: REAL\\n- product_photos_qty: REAL\\n- product_weight_g: + REAL\\n- product_length_cm: REAL\\n- product_height_cm: REAL\\n- product_width_cm: + REAL\\n\\nTabela: sellers\\n- seller_id: TEXT (PK)\\n- seller_zip_code_prefix: + INTEGER\\n- seller_city: TEXT\\n- seller_state: TEXT\\n\\n\\n=== CONVERSA COM + O AGENTE (se houver) ===\\nNenhuma\\n\\n=== SQL GERADA ===\\nSELECT AVG(payment_value) + AS media_valor_pedidos\\nFROM order_payments;\\n\\n=== RESULTADO DA EXECU\xC7\xC3O + ===\\nStatus: exec_ok\\nTotal de linhas: 1\\nAmostra dos resultados (primeiras + linhas):\\n[{'media_valor_pedidos': 154.10038041699553}]\\n\\n=== ERROS (se + houver) ===\\nNenhum\\n\\n=== TENTATIVAS ANTERIORES ===\\nNenhuma tentativa + anterior (esta \xE9 a primeira).\\n\\n=== EXEMPLOS DE AVALIA\xC7\xC3O ===\\n\\n-- + EXEMPLO 1: REPROVADO (escopo incompleto) --\\nPergunta: \\\"Which airport has + the least number of flights?\\\"\\nSQL: SELECT SourceAirport FROM flights GROUP + BY SourceAirport ORDER BY COUNT(*) ASC LIMIT 1\\nResultado: [('AID',)]\\nVEREDITO: + REPROVADO\\nRaz\xE3o: A query conta apenas voos com partida (SourceAirport) + e ignora voos com chegada (DestAirport).\\nO escopo da pergunta \xE9 \\\"flights\\\" + em geral \u2014 a query responde a uma pergunta diferente.\\n\\n-- EXEMPLO 2: + REPROVADO (erro sem\xE2ntico: MIN vs MAX) --\\nPergunta: \\\"Which Asian countries + have a population larger than any country in Africa?\\\"\\nSQL: SELECT Name + FROM country WHERE Continent='Asia' AND Population > (SELECT MAX(Population) + FROM country WHERE Continent='Africa')\\nResultado: [] (vazio)\\nVEREDITO: REPROVADO\\nRaz\xE3o: + \\\"Larger than any country in Africa\\\" significa maior que pelo menos um + pa\xEDs africano (MIN),\\nn\xE3o maior que todos os pa\xEDses africanos (MAX). + A l\xF3gica est\xE1 semanticamente errada.\\n\\n-- EXEMPLO 3: REPROVADO (resultado + vazio suspeito) --\\nPergunta: \\\"Find the last name of students who live in + North Carolina and are not enrolled in any degree.\\\"\\nSQL: SELECT last_name + FROM Students WHERE state_province_county = 'North Carolina' AND ...\\nResultado: + [] (vazio)\\nVEREDITO: REPROVADO\\nRaz\xE3o: Resultado vazio quando a pergunta + espera dados reais \xE9 suspeito. Verifique se o filtro\\nde string corresponde + exatamente ao valor no banco (ex: 'NorthCarolina' vs 'North Carolina').\\n\\n-- + EXEMPLO 4: REPROVADO (JOIN incorreto muda o que est\xE1 sendo contado) --\\nPergunta: + \\\"Find the name of makers that produced some cars in 1970.\\\"\\nSQL: SELECT + DISTINCT Maker FROM car_makers JOIN car_names ON car_makers.Id = car_names.MakeId + JOIN cars_data ON car_names.MakeId = cars_data.Id WHERE cars_data.Year = 1970\\nResultado: + [('chevrolet',), ('buick',)]\\nVEREDITO: REPROVADO\\nRaz\xE3o: O JOIN usa car_names.MakeId + para conectar a cars_data, mas cars_data.Id refere-se\\nao ID do carro, n\xE3o + do fabricante. O caminho correto seria via model_list. Os resultados\\nparecem + plaus\xEDveis mas derivam de uma jun\xE7\xE3o incorreta.\\n\\n-- EXEMPLO 5: + APROVADO (formato diferente, resposta correta) --\\nPergunta: \\\"On average, + when were the transcripts printed?\\\"\\nSQL: SELECT AVG(transcript_date) AS + average_transcript_date FROM Transcripts\\nResultado: [('1989.9333333333334',)]\\nVEREDITO: + APROVADO\\nRaz\xE3o: O resultado \xE9 um n\xFAmero que representa a m\xE9dia + das datas (formato num\xE9rico do SQLite).\\nEmbora n\xE3o seja uma data formatada, + responde corretamente \xE0 pergunta. Diferen\xE7a de\\nrepresenta\xE7\xE3o n\xE3o + \xE9 motivo de reprova\xE7\xE3o.\\n\\n-- EXEMPLO 6: APROVADO (query mais simples + que o gold, resultado equivalente) --\\nPergunta: \\\"Which model of car has + the minimum horsepower?\\\"\\nSQL: SELECT Model FROM car_names JOIN cars_data + ON car_names.MakeId = cars_data.Id WHERE Horsepower = (SELECT MIN(Horsepower) + FROM cars_data) LIMIT 1\\nResultado: [('triumph',)]\\nVEREDITO: APROVADO\\nRaz\xE3o: + A query retorna corretamente o modelo com menor pot\xEAncia. O LIMIT 1 garante + unicidade\\ne o resultado \xE9 semanticamente correto. Aprovar.\\n\\n=== CRIT\xC9RIOS + DE AVALIA\xC7\xC3O ===\\n\\nREPROVE quando houver:\\n- Escopo incompleto: query + cobre apenas parte do que a pergunta pede\\n- Erro sem\xE2ntico: l\xF3gica correta + na forma mas errada no significado (MIN vs MAX, ANY vs ALL)\\n- JOIN incorreto + que altera os dados sendo agregados ou filtrados\\n- Resultado vazio quando + a pergunta claramente espera dados\\n- Filtro com valor literal diferente do + que est\xE1 no banco\\n- M\xE9trica errada (SUM vs AVG, COUNT vs COUNT DISTINCT, + etc.)\\n- Erro de execu\xE7\xE3o SQL\\n\\nAPROVE quando:\\n- O resultado responde + \xE0 pergunta, mesmo com formato ou representa\xE7\xE3o diferente\\n- H\xE1 + colunas extras que n\xE3o prejudicam a resposta\\n- A precis\xE3o num\xE9rica + difere mas o valor est\xE1 correto\\n- A query \xE9 mais simples que o esperado + mas semanticamente equivalente\\n\\nAvalie com rigor sem\xE2ntico. Resultados + que parecem plaus\xEDveis mas derivam de l\xF3gica\\nincorreta devem ser reprovados. + N\xE3o presuma que uma query bem-formada est\xE1 correta.\\n\\nResponda no formato:\\nVEREDITO: + APROVADO ou REPROVADO\\nFEEDBACK: \",\"role\":\"user\"}],\"model\":\"gpt-4o-mini\",\"stream\":false}" + headers: + Accept: + - a + Accept-Encoding: + - g + Connection: + - k + Content-Length: + - '7' + Content-Type: + - a + Cookie: + - _ + Host: + - a + User-Agent: + - O + X-Stainless-Arch: + - x + X-Stainless-Async: + - f + X-Stainless-Lang: + - p + X-Stainless-OS: + - L + X-Stainless-Package-Version: + - '2' + X-Stainless-Raw-Response: + - t + X-Stainless-Runtime: + - C + X-Stainless-Runtime-Version: + - '3' + x-stainless-retry-count: + - '0' + method: POST + uri: https://api.openai.com/v1/chat/completions + response: + body: + string: !!binary | + H4sIAAAAAAAA/6rmUlBQykxRslJQSs5ILEnOLcjRdck0z/FISgMAAAD//4xTTY/aMBC976+wcgYE + bMpHb7RQabWtoLsVh7araLCH4Naxje2grSr+S/e4v4M/1kkCJNtSqZfIeW/mzdhv5nbyuJDL6+7N + Z7XNPizve4ul7W+jVpFhVt+Qh1NWhxvKwyCNrmjuEAIWqr3hcBx3R4PrcUlkRqAq0lIb2rFpZ1LL + dr/bj9vdYbs3OmZvjOToKewL/TL2s/wWfWqBjwR3WyckQ+8hRcJOQQQ6owokAu+lD6BD1KpJbnRA + Xba+nN3Npjef5q/ZZHE3X06mc8a+6nez2fTN5O0toYyCfa4CsPuP7xkHxXMFBDqHATJSQQYsOzwL + CUwYtgNlHB08s5CWPB0FMotCEtpi2xyrbG8NXYUJWQsdflGcS3MdSq3c54cnJw3zZuUulQkmgCK1 + lQPCLCFVmQ6bMypQdE04WDqTfnE+PLPd4UlRDEN27gEEbnMQF7owTqZSg+o0n8/hOvdQWKhzpRoE + aE0dFSNQGvdwZPZnq5RJrTMr/0dqtKYR8JuEJsbT+JAtPhgbleyevg/lSOQvXI5IKLMhCeY7luV6 + w3G/EozqUazpV8MjWT5aI20Uj1sXFBNBtkjlG2MVceAbFHVuPYOQC2kaxFXj3n+3c0m7urvU6f/I + 1wTnaGnLEvJYSP7yynWYw2JV/xV2fuey4cij29HuJUGiK7wQuAYapWov/Q8fMEvIsBSddbLaorVN + BqM4hsEah6Poan/1GwAA//8DAAXD/FtTBAAA + headers: + Access-Control-Expose-Headers: + - CF-Ray + CF-Cache-Status: + - DYNAMIC + CF-Ray: + - + Connection: + - keep-alive + Content-Encoding: + - gzip + Content-Type: + - application/json + Date: + - Fri, 22 May 2026 00:10:40 GMT + Server: + - cloudflare + Strict-Transport-Security: + - max-age=31536000; includeSubDomains; preload + Transfer-Encoding: + - chunked + X-Content-Type-Options: + - nosniff + access-control-expose-headers: + - X-Request-ID + - CF-Ray + alt-svc: + - h3=":443"; ma=86400 + openai-organization: + - + openai-processing-ms: + - '945' + openai-project: + - + openai-version: + - '2020-10-01' + x-openai-proxy-wasm: + - v0.1 + x-ratelimit-limit-requests: + - '10000' + x-ratelimit-limit-tokens: + - '200000' + x-ratelimit-remaining-requests: + - + x-ratelimit-remaining-tokens: + - + x-ratelimit-reset-requests: + - 1m5.146s + x-ratelimit-reset-tokens: + - 516ms + x-request-id: + - + status: + code: 200 + message: OK +version: 1 diff --git a/tests/cassettes/test_integracao/test_hitl_nova_pergunta_substitui.yaml b/tests/cassettes/test_integracao/test_hitl_nova_pergunta_substitui.yaml index 69ec996..23774f0 100644 --- a/tests/cassettes/test_integracao/test_hitl_nova_pergunta_substitui.yaml +++ b/tests/cassettes/test_integracao/test_hitl_nova_pergunta_substitui.yaml @@ -10,12 +10,32 @@ interactions: geolocation_zip_code_prefix: INTEGER\n- geolocation_lat: REAL\n- geolocation_lng: REAL\n- geolocation_city: TEXT\n- geolocation_state: TEXT\n\nTabela: order_items\n- order_id: TEXT (PK)\n- order_item_id: INTEGER (PK)\n- product_id: TEXT\n- seller_id: - TEXT\n- shipping_limit_date: TEXT\n- price: \n\n- Feedback do cr\u00edtico: - Nenhum\n- Tentativas realizadas: 0\n- Status atual: schema_obtido\n- Erro anterior: - Nenhum\n\nAVALIA\u00c7\u00c3O CR\u00cdTICA:\nVerifique se a \"Pergunta do usu\u00e1rio\" - pode ser respondida com as tabelas e colunas do Schema.\nSe houver ambiguidade, - conceitos n\u00e3o mapeados no banco de dados, ou se a inten\u00e7\u00e3o do - usu\u00e1rio n\u00e3o estiver clara, voc\u00ea DEVE pedir mais informa\u00e7\u00f5es.\n\nResponda + TEXT\n- shipping_limit_date: TEXT\n- price: REAL\n- freight_value: REAL\n Foreign + keys:\n - seller_id -> sellers.seller_id (on_update=NO ACTION, on_delete=NO + ACTION)\n - product_id -> products.product_id (on_update=NO ACTION, on_delete=NO + ACTION)\n - order_id -> orders.order_id (on_update=NO ACTION, on_delete=NO + ACTION)\n\nTabela: order_payments\n- order_id: TEXT (PK)\n- payment_sequential: + INTEGER (PK)\n- payment_type: TEXT\n- payment_installments: INTEGER\n- payment_value: + REAL\n Foreign keys:\n - order_id -> orders.order_id (on_update=NO ACTION, + on_delete=NO ACTION)\n\nTabela: order_reviews\n- review_id: TEXT (PK)\n- order_id: + TEXT\n- review_score: INTEGER\n- review_comment_title: TEXT\n- review_comment_message: + TEXT\n- review_creation_date: TEXT\n- review_answer_timestamp: TEXT\n Foreign + keys:\n - order_id -> orders.order_id (on_update=NO ACTION, on_delete=NO ACTION)\n\nTabela: + orders\n- order_id: TEXT (PK)\n- customer_id: TEXT\n- order_status: TEXT\n- + order_purchase_timestamp: TEXT\n- order_approved_at: TEXT\n- order_delivered_carrier_date: + TEXT\n- order_delivered_customer_date: TEXT\n- order_estimated_delivery_date: + TEXT\n Foreign keys:\n - customer_id -> customers.customer_id (on_update=NO + ACTION, on_delete=NO ACTION)\n\nTabela: products\n- product_id: TEXT (PK)\n- + product_category_name: TEXT\n- product_name_length: REAL\n- product_description_length: + REAL\n- product_photos_qty: REAL\n- product_weight_g: REAL\n- product_length_cm: + REAL\n- product_height_cm: REAL\n- product_width_cm: REAL\n\nTabela: sellers\n- + seller_id: TEXT (PK)\n- seller_zip_code_prefix: INTEGER\n- seller_city: TEXT\n- + seller_state: TEXT\n\n\n- Feedback do cr\u00edtico: Nenhum\n- Tentativas realizadas: + 0\n- Status atual: schema_obtido\n- Erro anterior: Nenhum\n\nAVALIA\u00c7\u00c3O + CR\u00cdTICA:\nVerifique se a \"Pergunta do usu\u00e1rio\" pode ser respondida + com as tabelas e colunas do Schema.\nSe houver ambiguidade, conceitos n\u00e3o + mapeados no banco de dados, ou se a inten\u00e7\u00e3o do usu\u00e1rio n\u00e3o + estiver clara, voc\u00ea DEVE pedir mais informa\u00e7\u00f5es.\n\nResponda EXATAMENTE no formato JSON abaixo, sem formata\u00e7\u00e3o markdown (```json):\n{{\n \"decisao\": \"escolha_uma_opcao\",\n \"pergunta_ao_usuario\": \"escreva a pergunta aqui se precisar de ajuda, ou deixe vazio se n\u00e3o precisar\"\n}}\n\nOp\u00e7\u00f5es @@ -34,7 +54,7 @@ interactions: Connection: - k Content-Length: - - '2' + - '3' Content-Type: - a Host: @@ -48,15 +68,15 @@ interactions: response: body: string: !!binary | - H4sIAAAAAAAC/2WSz2rbQBDG73qKYS+5OKEJNja9lPQPJYdS05pQqEoYa8f2ptKO2FmVJMYPU3oI - BHrsE+jFOmtbjtzqIHZnvtlv9rezzgBMgd46i5HEvISvGgFYb/8pxz6Sj5roQhqsMcRn7e5b99Yq - iXSXisw692mfG0uFE+Rcg7nxVJCIi3iDt43F3Aw6WU1h2fiU4JtGGgxuX3IpEHFOJQpYJzX79vcP - cgK+/cWQumyfKnB+waHC9rH9QwLC80BQqxNrVcEVw8nrgBamLsaTM7jmon0Cktj+hDpw0QQFwVBz - AIuWBYLaFY79doNQlE5RkAyg5AJL95CM1J0bcApJG0tuipJf5Sb3G9NDsjmsvw2eQQYuKVGq2FLZ - yTedwCycd7L6RCjsk+zz7OPUHLLOW7rT8IusM9gebRrBJX2giPqkeHg4ozes6jjj7+TfcLN90uFk - uDutNwJHgtFkn48csTxKjS/Gg/8Olrdq68r+bPTGRm+p1OJ9usrs3ZeZ6ZGI//TVsch6yExccbNc - xeMezyejbA9tx/GagrgdsCVVivD04mx0utDJWW0dTaA0P0JXNmnK2+k54vzhvb1fPoyvpkOZT9zl - 0GSb7C9qGqlMHgMAAA== + H4sIAAAAAAAC/11Sy64SQRDd8xWV3twN3ICJEtzcXB8LFkZUvDE65qaYLobWma6xHwQhfI0rTfwK + fszqgYHBXnS66pyqPn26dj0AlaPVRmMgr57DF8kA7Jo9YWwD2SBAm5JkjS5cuMe165yFEmiTitQu + synOlKbceORMkpmylJP3JuAjfosaM9VvaTW5ItoE8GP0EZ05ldyDRNDCcPMuUgWH38DwwqGGmQnh + 7gZEmbQG8gEdLNkhaIbmDZvAcvagsdmNr9ke/q7JeLBCJ/BG9gr78CNKcPjF4HnhCPLSiAHk+1A7 + 1jFINcGarEZpQxAroEHOVUUup1t44PzwBwpOAgwmQivZAZYFg6MSc8NWZABe9FCnS6NAtLC/y1Rm + 96pj7f58/tq/fIjjkpLbFWsqW/q+Jailscav3hN6ton2Yf52ps6osZo2kh722gua1ip6LOgNBZTR + wPMAKHGhqsOcv5N9ybEZjcnk2bFbZ5SuCOPJCQ8csLyCRqNxW9zp7F/JvabsDlln/uSZWJrwM71l + /vrTXHWsCP8Ja83odTxTYcWxWIVrkaPh6AR7cmuT09yQS1fIX8pvO52MbUw9+vxAzpujoQVVYvHg + ye3TwbJEv2oEKUdpyDxNdeJ8rHmGcfF5GhYP2/F0tl1uF8P7oerte/8AFNm8i4YDAAA= headers: Alt-Svc: - h3=":443"; ma=2592000,h3-29=":443"; ma=2592000 @@ -65,11 +85,11 @@ interactions: Content-Type: - application/json; charset=UTF-8 Date: - - Fri, 01 May 2026 23:13:28 GMT + - Thu, 21 May 2026 23:50:44 GMT Server: - scaffolding on HTTPServer2 Server-Timing: - - gfet4t7; dur=1922 + - gfet4t7; dur=1631 Transfer-Encoding: - chunked Vary: @@ -122,13 +142,13 @@ interactions: response: body: string: !!binary | - H4sIAAAAAAAC/2WR0U+DMBDG3/kryD1vZqLbkr0Y43yYiXEqMSZilsu4QSO0pD10G+F/t8DYOu0D - Kfd9ve/6a+X5PqxRxiJGJgMz/8NWfL9qv42mJJNkK/QlWyxQ88nbrcrZWwvTtjkEVQQsChXBLAKp - vnFVkE5KyRjBIIL+ZyWVzjETe4yxtT6XKFkZf50Jm07Gp60wTPlNBDU4SfVx/zk4zadVRk14rmLK - envdG2AjpDDpC6FRsrG9hk9LOKpCxrS15ZHXB7StoTSY0COxHZHxyAMKrfKCQ/VF8k6VLakgGHfd - HLLnhtFBZ8WYnUmTIBj8a2zmNlZkLnLnNewtLTreNVcJ799DcEjwn7l6Fp6DDDhVZZLy+YxX06l3 - gNZxfCNtRAcsodwiHAYX4+EmQ5O2iaDJFEoaWsQt+/nyEjfEi8nO7KeLpXjYi5/ba/Bq7xdrmUMi - dQIAAA== + H4sIAAAAAAAC/2WR0U/CMBDG3/dXLPcMhiBE5MUYNAaNYcpCTJwhDTu2xq1t2htBlv3vthuDoXtY + uvu+3nf3W+n5PmyYiHnMCA1M/U9b8f2yfjtNCkJBVmhLtqiYprO3ecrO2VoI9+4SlBEQVzKCaQRC + 7thaoU4KQSyCXgTtx1pInbOMH1jMautbwQRJ428ybtPR+LjnhjC/i6CCTlJ1On/1zvNpmaELz2WM + WWuvWgNsueAmfUdmpHC2ZbgI4KRyEePelgdeG1C3hsKwBF+R7IjETjxAaZkrCuU3ipksalLD4bjp + 1iF7aRgcdZLEsgtpNJn0/jU2DzaWZ13knb9ht7To6MetEj5+hNAhQX/mall4HWRAqSySlP7MOLo+ + ygb1jm8w5KhdhCG7FNOx41ozbTCvUBve8Ewwt4T7w6txf5sxk9YDgUajpDA4j51n9SwDphby6SVI + Djfz4DZeqtk9eJX3C+vokf2TAgAA headers: Alt-Svc: - h3=":443"; ma=2592000,h3-29=":443"; ma=2592000 @@ -137,11 +157,11 @@ interactions: Content-Type: - application/json; charset=UTF-8 Date: - - Fri, 01 May 2026 23:13:30 GMT + - Thu, 21 May 2026 23:50:45 GMT Server: - scaffolding on HTTPServer2 Server-Timing: - - gfet4t7; dur=2302 + - gfet4t7; dur=1712 Transfer-Encoding: - chunked Vary: @@ -163,25 +183,46 @@ interactions: body: '{"contents": [{"parts": [{"text": "Voc\u00ea \u00e9 o planejador de um sistema que transforma perguntas em consultas SQL.\n\nSeu papel: analisar a situa\u00e7\u00e3o atual e decidir a pr\u00f3xima a\u00e7\u00e3o.\n\nContexto - atual:\n- Pergunta do usu\u00e1rio: \"Quantos clientes existem?\"\n\n- conversa_previa: - [[\"ai: As tabelas dispon\u00edveis n\u00e3o cont\u00eam informa\u00e7\u00f5es - sobre pessoas como ''Brad Pitt''. Voc\u00ea est\u00e1 procurando por dados relacionados - a clientes, localiza\u00e7\u00e3o ou itens de pedido?\", ''user: Quero saber - quantos clientes existem'']]\n\n- Schema: === SCHEMA SQLITE (INTROSPECCAO REAL) - ===\n\nTabela: customers\n- customer_id: TEXT (PK)\n- customer_unique_id: TEXT\n- - customer_zip_code_prefix: INTEGER\n- customer_city: TEXT\n- customer_state: - TEXT\n\nTabela: geolocation\n- geolocation_zip_code_prefix: INTEGER\n- geolocation_lat: - REAL\n- geolocation_lng: REAL\n- geolocation_city: TEXT\n- geolocation_state: - TEXT\n\nTabela: order_items\n- order_id: TEXT (PK)\n- order_item_id: INTEGER - (PK)\n- product_id: TEXT\n- seller_id: TEXT\n- shipping_limit_date: TEXT\n- - price: \n\n- Feedback do cr\u00edtico: Nenhum\n- Tentativas realizadas: 0\n- - Status atual: iniciado\n- Erro anterior: Nenhum\n\nAVALIA\u00c7\u00c3O CR\u00cdTICA:\nVerifique - se a \"Pergunta do usu\u00e1rio\" pode ser respondida com as tabelas e colunas - do Schema.\nSe houver ambiguidade, conceitos n\u00e3o mapeados no banco de dados, - ou se a inten\u00e7\u00e3o do usu\u00e1rio n\u00e3o estiver clara, voc\u00ea - DEVE pedir mais informa\u00e7\u00f5es.\n\nResponda EXATAMENTE no formato JSON - abaixo, sem formata\u00e7\u00e3o markdown (```json):\n{{\n \"decisao\": \"escolha_uma_opcao\",\n \"pergunta_ao_usuario\": - \"escreva a pergunta aqui se precisar de ajuda, ou deixe vazio se n\u00e3o precisar\"\n}}\n\nOp\u00e7\u00f5es + atual:\n- Pergunta do usu\u00e1rio: \"Quero saber quantos clientes existem\"\n\n- + conversa_previa: [[\"ai: A sua pergunta ''Quem \u00e9 o Brad Pitt?'' parece + estar fora do contexto dos dados dispon\u00edveis neste sistema, que s\u00e3o + sobre clientes, produtos e vendas de um e-commerce. Voc\u00ea gostaria de perguntar + algo relacionado aos dados de e-commerce que temos?\", ''user: Quero saber quantos + clientes existem'']]\n\n- Schema: === SCHEMA SQLITE (INTROSPECCAO REAL) ===\n\nTabela: + customers\n- customer_id: TEXT (PK)\n- customer_unique_id: TEXT\n- customer_zip_code_prefix: + INTEGER\n- customer_city: TEXT\n- customer_state: TEXT\n\nTabela: geolocation\n- + geolocation_zip_code_prefix: INTEGER\n- geolocation_lat: REAL\n- geolocation_lng: + REAL\n- geolocation_city: TEXT\n- geolocation_state: TEXT\n\nTabela: order_items\n- + order_id: TEXT (PK)\n- order_item_id: INTEGER (PK)\n- product_id: TEXT\n- seller_id: + TEXT\n- shipping_limit_date: TEXT\n- price: REAL\n- freight_value: REAL\n Foreign + keys:\n - seller_id -> sellers.seller_id (on_update=NO ACTION, on_delete=NO + ACTION)\n - product_id -> products.product_id (on_update=NO ACTION, on_delete=NO + ACTION)\n - order_id -> orders.order_id (on_update=NO ACTION, on_delete=NO + ACTION)\n\nTabela: order_payments\n- order_id: TEXT (PK)\n- payment_sequential: + INTEGER (PK)\n- payment_type: TEXT\n- payment_installments: INTEGER\n- payment_value: + REAL\n Foreign keys:\n - order_id -> orders.order_id (on_update=NO ACTION, + on_delete=NO ACTION)\n\nTabela: order_reviews\n- review_id: TEXT (PK)\n- order_id: + TEXT\n- review_score: INTEGER\n- review_comment_title: TEXT\n- review_comment_message: + TEXT\n- review_creation_date: TEXT\n- review_answer_timestamp: TEXT\n Foreign + keys:\n - order_id -> orders.order_id (on_update=NO ACTION, on_delete=NO ACTION)\n\nTabela: + orders\n- order_id: TEXT (PK)\n- customer_id: TEXT\n- order_status: TEXT\n- + order_purchase_timestamp: TEXT\n- order_approved_at: TEXT\n- order_delivered_carrier_date: + TEXT\n- order_delivered_customer_date: TEXT\n- order_estimated_delivery_date: + TEXT\n Foreign keys:\n - customer_id -> customers.customer_id (on_update=NO + ACTION, on_delete=NO ACTION)\n\nTabela: products\n- product_id: TEXT (PK)\n- + product_category_name: TEXT\n- product_name_length: REAL\n- product_description_length: + REAL\n- product_photos_qty: REAL\n- product_weight_g: REAL\n- product_length_cm: + REAL\n- product_height_cm: REAL\n- product_width_cm: REAL\n\nTabela: sellers\n- + seller_id: TEXT (PK)\n- seller_zip_code_prefix: INTEGER\n- seller_city: TEXT\n- + seller_state: TEXT\n\n\n- Feedback do cr\u00edtico: Nenhum\n- Tentativas realizadas: + 0\n- Status atual: iniciado\n- Erro anterior: Nenhum\n\nAVALIA\u00c7\u00c3O + CR\u00cdTICA:\nVerifique se a \"Pergunta do usu\u00e1rio\" pode ser respondida + com as tabelas e colunas do Schema.\nSe houver ambiguidade, conceitos n\u00e3o + mapeados no banco de dados, ou se a inten\u00e7\u00e3o do usu\u00e1rio n\u00e3o + estiver clara, voc\u00ea DEVE pedir mais informa\u00e7\u00f5es.\n\nResponda + EXATAMENTE no formato JSON abaixo, sem formata\u00e7\u00e3o markdown (```json):\n{{\n \"decisao\": + \"escolha_uma_opcao\",\n \"pergunta_ao_usuario\": \"escreva a pergunta aqui + se precisar de ajuda, ou deixe vazio se n\u00e3o precisar\"\n}}\n\nOp\u00e7\u00f5es v\u00e1lidas para ''decis\u00e3o'':\n- \"pronto_codificacao\" \u2192 se temos schema, a pergunta faz sentido e devemos gerar/regenerar SQL\n- \"revisando_estrategia\" \u2192 se o cr\u00edtico reprovou e devemos tentar uma abordagem diferente\n- @@ -197,7 +238,7 @@ interactions: Connection: - k Content-Length: - - '2' + - '4' Content-Type: - a Host: @@ -211,12 +252,13 @@ interactions: response: body: string: !!binary | - H4sIAAAAAAAC/2WRXU+DMBSG7/kVpNebccwZ9c5ML7wwLo4YE1mWM3oGzaAl7cFsEv67LYytU5qQ - 9j1vz8fTJghDloLkggOhYQ/hl1XCsOn+LqYkoSQbGCQrVqDp7O2/xttbC+HeXWJNIt05YRxTYUAl - VkxYpW1atU4VF1uRQur00eCsUGe1JFiDWtemBi2Ot+ySLfPqtKf9anTuTqsCXelScSwGezsY2FZI - YfJ3BKOksy3jtwU7RYXkuLfydTAU6FKz2kCGr0hgOcGJhhukrChWO5RzVXecZlHUZ/O4Xhii+2Oc - FEFxEbq9m47+JTZPtqwofODeW9gpoRB0cKPEz58x80jQn74GFoGHjFGu6iynyx4nU2fuoPUcP1Ab - 0QPLsLQIx9HVbLwtwORdRabRVEoafOEd+5/FBFK5mX8vb0raLeRGzw6PigVt8AtwLS/ocwIAAA== + H4sIAAAAAAAC/2WRXU/CMBSG7/crll6DARSm3hnkggsj0YWYOENO1rOtcbRLT0eQZf/ddh8wdEuW + 9j3v+XpWeb7PYpBccDBI7NH/tIrvV83XxZQ0KI0N9JIVC9Dm4m2fanC2FoNHl8SqSLp7xDjGgkBF + VoxYoW1ZtYsVF4mIIXb6qHcWqNNSGtiB2pVUghZdln1lzQZ96vP5a3SZTqscXeu94pj39ro3sERI + QdkbAinpbO/h64ado0JyPFp54vUNmtKsJEjxBQ1YTnCm4RbZFyZU3yiXqmw4TSfzoC03AHvlmD10 + caMM5NfJ08Xd6F9peraNRT5EPvgbdk/Ihflxy4Srj5ANWJi/k/U4vAE1ZjJVppm5njK476KE+iBi + DAVq14KM3Qo0d2Qbqi3oLWoSLdEU95bxeHYzHyc5UNYMxDRSoSThmjvPtlAboNvDCpfJKVhvaBGc + xk9r5tXeL0QJ8ymUAgAA headers: Alt-Svc: - h3=":443"; ma=2592000,h3-29=":443"; ma=2592000 @@ -225,11 +267,11 @@ interactions: Content-Type: - application/json; charset=UTF-8 Date: - - Fri, 01 May 2026 23:13:32 GMT + - Thu, 21 May 2026 23:50:47 GMT Server: - scaffolding on HTTPServer2 Server-Timing: - - gfet4t7; dur=1455 + - gfet4t7; dur=1189 Transfer-Encoding: - chunked Vary: @@ -282,14 +324,124 @@ interactions: REAL\n- product_photos_qty: REAL\n- product_weight_g: REAL\n- product_length_cm: REAL\n- product_height_cm: REAL\n- product_width_cm: REAL\n\nTabela: sellers\n- seller_id: TEXT (PK)\n- seller_zip_code_prefix: INTEGER\n- seller_city: TEXT\n- - seller_state: TEXT\n\n\n=== PERGUNTA DO USU\u00c1RIO ===\nQuantos clientes existem?\n\n=== - CONVERSA PR\u00c9VIA (CONTEXTO ADICIONAL) ===\n[[\"ai: As tabelas dispon\u00edveis - n\u00e3o cont\u00eam informa\u00e7\u00f5es sobre pessoas como ''Brad Pitt''. - Voc\u00ea est\u00e1 procurando por dados relacionados a clientes, localiza\u00e7\u00e3o - ou itens de pedido?\", ''user: Quero saber quantos clientes existem'']]\n\n=== - FEEDBACK CR\u00cdTICO (SE HOUVER) ===\n\n\nResponda APENAS com a consulta SQL, - sem markdown, sem explica\u00e7\u00e3o."}], "role": "user"}], "safetySettings": - [], "generationConfig": {"temperature": 0.7, "candidateCount": 1}}' + seller_state: TEXT\n\n\n=== PERGUNTA DO USU\u00c1RIO ===\nQuero saber quantos + clientes existem\n\n=== CONVERSA PR\u00c9VIA (CONTEXTO ADICIONAL) ===\n[[\"ai: + A sua pergunta ''Quem \u00e9 o Brad Pitt?'' parece estar fora do contexto dos + dados dispon\u00edveis neste sistema, que s\u00e3o sobre clientes, produtos + e vendas de um e-commerce. Voc\u00ea gostaria de perguntar algo relacionado + aos dados de e-commerce que temos?\", ''user: Quero saber quantos clientes existem'']]\n\n=== + HIST\u00d3RICO DE TENTATIVAS ANTERIORES ===\nNenhuma tentativa anterior.\n\nResponda + APENAS com a consulta SQL, sem markdown, sem explica\u00e7\u00e3o."}], "role": + "user"}], "safetySettings": [], "generationConfig": {"temperature": 0.7, "candidateCount": + 1}}' + headers: + Accept: + - '*' + Accept-Encoding: + - g + Connection: + - k + Content-Length: + - '3' + Content-Type: + - a + Host: + - g + User-Agent: + - g + x-goog-api-client: + - g + method: POST + uri: https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash:generateContent + response: + body: + string: !!binary | + H4sIAAAAAAAC/7VUXWvbMBR976+45HE4Nm6XsuVpZUnawMqyNB0b6wiKfONolSVXH6Gh5L/vSnbb + pAuMjQ0ClpSrc+6559gPRwAdNEabTh8eaENbrguk3evjt0lzUKG1rAxnna/aA95zxAIL2GhvgHtj + UDm489qxBGqJzCLwFfLbpqCWTAFTBSyElEKVUKBjQtoURtpApQ2CUEttKuaEVkA/txIWYlMJrJAV + 4HQfVs7Vtp9lTKSl1qXEtMB1VmIllOiyWmSF5jYzzGFXiko4wp9pglfCEc1eqz6oOQwdMJ8xUrhR + r+BTUPasehm6RmcE70OJCqlcrJFElp5g2+aoIZtyXWVtBc65Vo7I50uDOHcCzdzgnUfrbAKRrA+9 + hPotUAbcKOs47XWXktnVjZo0czVEvKF5QZ6np6f5SX7ypmfTTmuUdcx5G3yaDq8+Xk/fD+fDLxdn + 11ez4eCxpp0+FX2LB9DaHv985zZ19Dk8f5HSjN3UPL1AWbeA8R75ersLuQ/b8lpuRB08DgQfkBnV + uM8W2js4j5LhbDJuomR38COANzJc/OMcdHZgtk/r7+1qm/z9FGIwRjRNb3B3GmuhZQzzb0YSdV7G + KAW2fxKml1OLHOMi4J+3t6dt6QTNpVDeIS0mRv9A7sJRSGB3RMAzwj0INxAVKtvq29cUsqA5e3S5 + lHrB5AuQ8EUJJI3m/aB39iq3h9g/M+mjO73/5+w0vGZj+irt2hrfvQGZswkIef6UrIY78G6Ptkc/ + AbD65EFTBQAA + headers: + Alt-Svc: + - h3=":443"; ma=2592000,h3-29=":443"; ma=2592000 + Content-Encoding: + - gzip + Content-Type: + - application/json; charset=UTF-8 + Date: + - Thu, 21 May 2026 23:50:48 GMT + Server: + - scaffolding on HTTPServer2 + Server-Timing: + - gfet4t7; dur=936 + Transfer-Encoding: + - chunked + Vary: + - Origin + - X-Origin + - Referer + X-Content-Type-Options: + - nosniff + X-Frame-Options: + - SAMEORIGIN + X-Gemini-Service-Tier: + - standard + X-XSS-Protection: + - '0' + status: + code: 429 + message: Too Many Requests +- request: + body: '{"contents": [{"parts": [{"text": "Voc\u00ea \u00e9 um especialista em + SQL para bancos SQLite.\n\nSua tarefa: gerar UMA \u00fanica consulta SQL SELECT + que responda \u00e0 pergunta do usu\u00e1rio,\nusando o schema do banco de dados + fornecido abaixo.\n\nRegras:\n- Gere APENAS uma consulta SELECT (ou WITH/CTE + seguido de SELECT).\n- N\u00c3O use INSERT, UPDATE, DELETE, DROP, ALTER ou qualquer + comando de escrita.\n- N\u00c3O inclua explica\u00e7\u00f5es, apenas a SQL pura.\n- + Use nomes de tabelas e colunas EXATAMENTE como aparecem no schema.\n- Se a pergunta + for amb\u00edgua, fa\u00e7a a interpreta\u00e7\u00e3o mais razo\u00e1vel.\n\n=== + SCHEMA DO BANCO ===\n=== SCHEMA SQLITE (INTROSPECCAO REAL) ===\n\nTabela: customers\n- + customer_id: TEXT (PK)\n- customer_unique_id: TEXT\n- customer_zip_code_prefix: + INTEGER\n- customer_city: TEXT\n- customer_state: TEXT\n\nTabela: geolocation\n- + geolocation_zip_code_prefix: INTEGER\n- geolocation_lat: REAL\n- geolocation_lng: + REAL\n- geolocation_city: TEXT\n- geolocation_state: TEXT\n\nTabela: order_items\n- + order_id: TEXT (PK)\n- order_item_id: INTEGER (PK)\n- product_id: TEXT\n- seller_id: + TEXT\n- shipping_limit_date: TEXT\n- price: REAL\n- freight_value: REAL\n Foreign + keys:\n - seller_id -> sellers.seller_id (on_update=NO ACTION, on_delete=NO + ACTION)\n - product_id -> products.product_id (on_update=NO ACTION, on_delete=NO + ACTION)\n - order_id -> orders.order_id (on_update=NO ACTION, on_delete=NO + ACTION)\n\nTabela: order_payments\n- order_id: TEXT (PK)\n- payment_sequential: + INTEGER (PK)\n- payment_type: TEXT\n- payment_installments: INTEGER\n- payment_value: + REAL\n Foreign keys:\n - order_id -> orders.order_id (on_update=NO ACTION, + on_delete=NO ACTION)\n\nTabela: order_reviews\n- review_id: TEXT (PK)\n- order_id: + TEXT\n- review_score: INTEGER\n- review_comment_title: TEXT\n- review_comment_message: + TEXT\n- review_creation_date: TEXT\n- review_answer_timestamp: TEXT\n Foreign + keys:\n - order_id -> orders.order_id (on_update=NO ACTION, on_delete=NO ACTION)\n\nTabela: + orders\n- order_id: TEXT (PK)\n- customer_id: TEXT\n- order_status: TEXT\n- + order_purchase_timestamp: TEXT\n- order_approved_at: TEXT\n- order_delivered_carrier_date: + TEXT\n- order_delivered_customer_date: TEXT\n- order_estimated_delivery_date: + TEXT\n Foreign keys:\n - customer_id -> customers.customer_id (on_update=NO + ACTION, on_delete=NO ACTION)\n\nTabela: products\n- product_id: TEXT (PK)\n- + product_category_name: TEXT\n- product_name_length: REAL\n- product_description_length: + REAL\n- product_photos_qty: REAL\n- product_weight_g: REAL\n- product_length_cm: + REAL\n- product_height_cm: REAL\n- product_width_cm: REAL\n\nTabela: sellers\n- + seller_id: TEXT (PK)\n- seller_zip_code_prefix: INTEGER\n- seller_city: TEXT\n- + seller_state: TEXT\n\n\n=== PERGUNTA DO USU\u00c1RIO ===\nQuero saber quantos + clientes existem\n\n=== CONVERSA PR\u00c9VIA (CONTEXTO ADICIONAL) ===\n[[\"ai: + A sua pergunta ''Quem \u00e9 o Brad Pitt?'' parece estar fora do contexto dos + dados dispon\u00edveis neste sistema, que s\u00e3o sobre clientes, produtos + e vendas de um e-commerce. Voc\u00ea gostaria de perguntar algo relacionado + aos dados de e-commerce que temos?\", ''user: Quero saber quantos clientes existem'']]\n\n=== + HIST\u00d3RICO DE TENTATIVAS ANTERIORES ===\nNenhuma tentativa anterior.\n\nResponda + APENAS com a consulta SQL, sem markdown, sem explica\u00e7\u00e3o."}], "role": + "user"}], "safetySettings": [], "generationConfig": {"temperature": 0.7, "candidateCount": + 1}}' headers: Accept: - '*' @@ -312,12 +464,12 @@ interactions: response: body: string: !!binary | - H4sIAAAAAAAC/12QXUvDMBSG7/srQq4UrKzD7zvpJgzcBzOKKCJxOVvD0qQ0p7BZ9t9N22XLzEUI - 7/uec3KeOiKELrgWUnAESx/Ip1MIqdu78YxG0OgMLzmx4CUes92pg7eLIGyaIvoyfB6mjKTT1wk7 - W1QWTQ7ltxTn5Gk+HROvWBqU7w7vr4vj0NIoaDrmRoDy8Z0P0KXU0mZz4NbodjCbzujBlVrAxsm9 - yA9oW9PK8hWMAblbnx+WpEVp8gKZWYNOTdWuf9+/6boFuE4DexsNcnXiJL3E1waN7cCNlSrkGCB2 - W3IlcduswobvjAYk8N+/PIsoQEYxM9Uqw9M/3iXRnlmH8c2Rlx2vFeSOYNy/vI6XitusHUhLsIXR - FkaiyejBLOEfVTX5iZe/t6NZfrXN1o+GRrvoDyEzHVFJAgAA + H4sIAAAAAAAC/12RX0/CMBTF3/cpmj5pIgaI4J9HARMSkKlVTIwxDb2wxq1d2juDEL677UahuIem + uef03nt+2yaE0AVXQgqOYOkd+XAVQrb16TWtEBQ6IZRcseQGj97m20Z3Z0FY+0f0ZTQZDRgZzF4f + 2dmisqgLMF9SnJOH59mUhIql0fPd4f55cRxqdA6+Y6EF5MG+Cwa6lEra7Bm41aoezGYpPahSCVi7 + cjsJA+rWtLJ8BVNA7uLzQ0haGl2UyPQ3qIGu6vi3vX7TLcJ1atjLqJHnJ0qnfRPEqLEdurEyjzlG + iF1Knkv89VHY6J3RiAT+2yuwSCJkFDNdrTI83bHTvdrLFsyPXACTYPwIiy4UN8JzrZk2mN/cn5EN + zxUUjnCre9lrLXNus3ohasCWWlkYC++ZlzrlFb8fTua4uR6ncrJJ+0+aJrvkD5whZOppAgAA headers: Alt-Svc: - h3=":443"; ma=2592000,h3-29=":443"; ma=2592000 @@ -326,11 +478,11 @@ interactions: Content-Type: - application/json; charset=UTF-8 Date: - - Fri, 01 May 2026 23:13:33 GMT + - Thu, 21 May 2026 23:50:52 GMT Server: - scaffolding on HTTPServer2 Server-Timing: - - gfet4t7; dur=1115 + - gfet4t7; dur=2460 Transfer-Encoding: - chunked Vary: @@ -352,17 +504,737 @@ interactions: body: '{"contents": [{"parts": [{"text": "Voc\u00ea \u00e9 um revisor de qualidade para consultas SQL geradas por IA.\n\nSua tarefa: avaliar se a consulta SQL e seus resultados respondem adequadamente\n\u00e0 pergunta original do usu\u00e1rio.\n\n=== - PERGUNTA DO USU\u00c1RIO ===\nQuantos clientes existem?\n\n=== CONVERSA COM - O AGENTE (se houver) ===\n[[\"ai: As tabelas dispon\u00edveis n\u00e3o cont\u00eam - informa\u00e7\u00f5es sobre pessoas como ''Brad Pitt''. Voc\u00ea est\u00e1 - procurando por dados relacionados a clientes, localiza\u00e7\u00e3o ou itens - de pedido?\", ''user: Quero saber quantos clientes existem'']]\n\n=== SQL GERADA - ===\nSELECT COUNT(customer_id) FROM customers\n\n=== RESULTADO DA EXECU\u00c7\u00c3O - ===\nStatus: exec_ok\nTotal de linhas: 1\nAmostra dos resultados (primeiras - linhas):\n[{''COUNT(customer_id)'': 99441}]\n\n=== ERROS (se houver) ===\nNenhum\n\nAvalie:\n1. - A SQL responde \u00e0 pergunta do usu\u00e1rio?\n2. Os resultados fazem sentido?\n3. - H\u00e1 algum erro l\u00f3gico ou de interpreta\u00e7\u00e3o?\n\nResponda no - formato:\nVEREDITO: APROVADO ou REPROVADO\nFEEDBACK: sellers.seller_id (on_update=NO ACTION, on_delete=NO + ACTION)\n - product_id -> products.product_id (on_update=NO ACTION, on_delete=NO + ACTION)\n - order_id -> orders.order_id (on_update=NO ACTION, on_delete=NO + ACTION)\n\nTabela: order_payments\n- order_id: TEXT (PK)\n- payment_sequential: + INTEGER (PK)\n- payment_type: TEXT\n- payment_installments: INTEGER\n- payment_value: + REAL\n Foreign keys:\n - order_id -> orders.order_id (on_update=NO ACTION, + on_delete=NO ACTION)\n\nTabela: order_reviews\n- review_id: TEXT (PK)\n- order_id: + TEXT\n- review_score: INTEGER\n- review_comment_title: TEXT\n- review_comment_message: + TEXT\n- review_creation_date: TEXT\n- review_answer_timestamp: TEXT\n Foreign + keys:\n - order_id -> orders.order_id (on_update=NO ACTION, on_delete=NO ACTION)\n\nTabela: + orders\n- order_id: TEXT (PK)\n- customer_id: TEXT\n- order_status: TEXT\n- + order_purchase_timestamp: TEXT\n- order_approved_at: TEXT\n- order_delivered_carrier_date: + TEXT\n- order_delivered_customer_date: TEXT\n- order_estimated_delivery_date: + TEXT\n Foreign keys:\n - customer_id -> customers.customer_id (on_update=NO + ACTION, on_delete=NO ACTION)\n\nTabela: products\n- product_id: TEXT (PK)\n- + product_category_name: TEXT\n- product_name_length: REAL\n- product_description_length: + REAL\n- product_photos_qty: REAL\n- product_weight_g: REAL\n- product_length_cm: + REAL\n- product_height_cm: REAL\n- product_width_cm: REAL\n\nTabela: sellers\n- + seller_id: TEXT (PK)\n- seller_zip_code_prefix: INTEGER\n- seller_city: TEXT\n- + seller_state: TEXT\n\n\n=== CONVERSA COM O AGENTE (se houver) ===\n[[\"ai: A + sua pergunta ''Quem \u00e9 o Brad Pitt?'' parece estar fora do contexto dos + dados dispon\u00edveis neste sistema, que s\u00e3o sobre clientes, produtos + e vendas de um e-commerce. Voc\u00ea gostaria de perguntar algo relacionado + aos dados de e-commerce que temos?\", ''user: Quero saber quantos clientes existem'']]\n\n=== + SQL GERADA ===\nSELECT COUNT(customer_id) FROM customers\n\n=== RESULTADO DA + EXECU\u00c7\u00c3O ===\nStatus: exec_ok\nTotal de linhas: 1\nAmostra dos resultados + (primeiras linhas):\n[{''COUNT(customer_id)'': 99441}]\n\n=== ERROS (se houver) + ===\nNenhum\n\n=== TENTATIVAS ANTERIORES ===\nNenhuma tentativa anterior (esta + \u00e9 a primeira).\n\n=== EXEMPLOS DE AVALIA\u00c7\u00c3O ===\n\n-- EXEMPLO + 1: REPROVADO (escopo incompleto) --\nPergunta: \"Which airport has the least + number of flights?\"\nSQL: SELECT SourceAirport FROM flights GROUP BY SourceAirport + ORDER BY COUNT(*) ASC LIMIT 1\nResultado: [(''AID'',)]\nVEREDITO: REPROVADO\nRaz\u00e3o: + A query conta apenas voos com partida (SourceAirport) e ignora voos com chegada + (DestAirport).\nO escopo da pergunta \u00e9 \"flights\" em geral \u2014 a query + responde a uma pergunta diferente.\n\n-- EXEMPLO 2: REPROVADO (erro sem\u00e2ntico: + MIN vs MAX) --\nPergunta: \"Which Asian countries have a population larger than + any country in Africa?\"\nSQL: SELECT Name FROM country WHERE Continent=''Asia'' + AND Population > (SELECT MAX(Population) FROM country WHERE Continent=''Africa'')\nResultado: + [] (vazio)\nVEREDITO: REPROVADO\nRaz\u00e3o: \"Larger than any country in Africa\" + significa maior que pelo menos um pa\u00eds africano (MIN),\nn\u00e3o maior + que todos os pa\u00edses africanos (MAX). A l\u00f3gica est\u00e1 semanticamente + errada.\n\n-- EXEMPLO 3: REPROVADO (resultado vazio suspeito) --\nPergunta: + \"Find the last name of students who live in North Carolina and are not enrolled + in any degree.\"\nSQL: SELECT last_name FROM Students WHERE state_province_county + = ''North Carolina'' AND ...\nResultado: [] (vazio)\nVEREDITO: REPROVADO\nRaz\u00e3o: + Resultado vazio quando a pergunta espera dados reais \u00e9 suspeito. Verifique + se o filtro\nde string corresponde exatamente ao valor no banco (ex: ''NorthCarolina'' + vs ''North Carolina'').\n\n-- EXEMPLO 4: REPROVADO (JOIN incorreto muda o que + est\u00e1 sendo contado) --\nPergunta: \"Find the name of makers that produced + some cars in 1970.\"\nSQL: SELECT DISTINCT Maker FROM car_makers JOIN car_names + ON car_makers.Id = car_names.MakeId JOIN cars_data ON car_names.MakeId = cars_data.Id + WHERE cars_data.Year = 1970\nResultado: [(''chevrolet'',), (''buick'',)]\nVEREDITO: + REPROVADO\nRaz\u00e3o: O JOIN usa car_names.MakeId para conectar a cars_data, + mas cars_data.Id refere-se\nao ID do carro, n\u00e3o do fabricante. O caminho + correto seria via model_list. Os resultados\nparecem plaus\u00edveis mas derivam + de uma jun\u00e7\u00e3o incorreta.\n\n-- EXEMPLO 5: APROVADO (formato diferente, + resposta correta) --\nPergunta: \"On average, when were the transcripts printed?\"\nSQL: + SELECT AVG(transcript_date) AS average_transcript_date FROM Transcripts\nResultado: + [(''1989.9333333333334'',)]\nVEREDITO: APROVADO\nRaz\u00e3o: O resultado \u00e9 + um n\u00famero que representa a m\u00e9dia das datas (formato num\u00e9rico + do SQLite).\nEmbora n\u00e3o seja uma data formatada, responde corretamente + \u00e0 pergunta. Diferen\u00e7a de\nrepresenta\u00e7\u00e3o n\u00e3o \u00e9 + motivo de reprova\u00e7\u00e3o.\n\n-- EXEMPLO 6: APROVADO (query mais simples + que o gold, resultado equivalente) --\nPergunta: \"Which model of car has the + minimum horsepower?\"\nSQL: SELECT Model FROM car_names JOIN cars_data ON car_names.MakeId + = cars_data.Id WHERE Horsepower = (SELECT MIN(Horsepower) FROM cars_data) LIMIT + 1\nResultado: [(''triumph'',)]\nVEREDITO: APROVADO\nRaz\u00e3o: A query retorna + corretamente o modelo com menor pot\u00eancia. O LIMIT 1 garante unicidade\ne + o resultado \u00e9 semanticamente correto. Aprovar.\n\n=== CRIT\u00c9RIOS DE + AVALIA\u00c7\u00c3O ===\n\nREPROVE quando houver:\n- Escopo incompleto: query + cobre apenas parte do que a pergunta pede\n- Erro sem\u00e2ntico: l\u00f3gica + correta na forma mas errada no significado (MIN vs MAX, ANY vs ALL)\n- JOIN + incorreto que altera os dados sendo agregados ou filtrados\n- Resultado vazio + quando a pergunta claramente espera dados\n- Filtro com valor literal diferente + do que est\u00e1 no banco\n- M\u00e9trica errada (SUM vs AVG, COUNT vs COUNT + DISTINCT, etc.)\n- Erro de execu\u00e7\u00e3o SQL\n\nAPROVE quando:\n- O resultado + responde \u00e0 pergunta, mesmo com formato ou representa\u00e7\u00e3o diferente\n- + H\u00e1 colunas extras que n\u00e3o prejudicam a resposta\n- A precis\u00e3o + num\u00e9rica difere mas o valor est\u00e1 correto\n- A query \u00e9 mais simples + que o esperado mas semanticamente equivalente\n\nAvalie com rigor sem\u00e2ntico. + Resultados que parecem plaus\u00edveis mas derivam de l\u00f3gica\nincorreta + devem ser reprovados. N\u00e3o presuma que uma query bem-formada est\u00e1 correta.\n\nResponda + no formato:\nVEREDITO: APROVADO ou REPROVADO\nFEEDBACK: "}], "role": "user"}], "safetySettings": [], "generationConfig": + {"temperature": 0.7, "candidateCount": 1}}' + headers: + Accept: + - '*' + Accept-Encoding: + - g + Connection: + - k + Content-Length: + - '7' + Content-Type: + - a + Host: + - g + User-Agent: + - g + x-goog-api-client: + - g + method: POST + uri: https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash:generateContent + response: + body: + string: !!binary | + H4sIAAAAAAAC/7VUXWvbMBR976+4+HE4NgvN2uZpZUnawMqyNB0b6wiKfONolSVXH2Gh5L/vSnbb + pAuMjQ0ClpSrc+6559gPRwAJGqNN0ocH2tCW6wJpd9w9S5uDCq1lZThLvmgP+IMjFljARnsD3BuD + ysG9146lUEtkFoGvkN81BbVkCpgqYCGkFKqEAh0T0mYw0gYqbRCEWmpTMSe0Avq5lbAQm0phhawA + p/uwcq62/TxnIiu1LiVmBa7zEiuhRIfVIi80t7lhDjtSVMIR/kwTvBKOaPZa9UHNYeiA+YyRwa16 + BR+DsmfVy9A1OiN4H0pUSOVijSSy9ATbNkcN2YzrKm8rcM61ckQ+XxrEuRNo5gbvPVpnU4hkfeil + 1G+BMuBGWd2s11lKZle3atLM1RDxhuYFJ1n3+E23d3r2+tRmSWuUdcx5G3yaDq8/3EzfDefDz5fn + N9ez4eCxpp0+FX2NB9DaHv986zZ19Dk8f5HSjN3UPLtEWbeA8R75ercLuQ/b8lpuRB08DgTvkRnV + uM8W2ju4iJLhfDJuomR38COANzJc/OMcJDsw26f1t3a1Tf9+CjEYI5qmN7g7jbXQMob5NyOJOq9i + lALbPwnTy6lFjnER8C/a29O2dILmSijvkBYTo78jd+EoJLAzIuAZ4R6EG4gKlW317WsKWdCcPbpc + Sr1g8gVI+KIEkkbzftCTvcrtIfZPTProTu//OTsNr9mYvkq7tsZ3b0DmbALCyVOwGupAuz3aHv0E + YzWY6VIFAAA= + headers: + Alt-Svc: + - h3=":443"; ma=2592000,h3-29=":443"; ma=2592000 + Content-Encoding: + - gzip + Content-Type: + - application/json; charset=UTF-8 + Date: + - Thu, 21 May 2026 23:50:52 GMT + Server: + - scaffolding on HTTPServer2 + Server-Timing: + - gfet4t7; dur=283 + Transfer-Encoding: + - chunked + Vary: + - Origin + - X-Origin + - Referer + X-Content-Type-Options: + - nosniff + X-Frame-Options: + - SAMEORIGIN + X-Gemini-Service-Tier: + - standard + X-XSS-Protection: + - '0' + status: + code: 429 + message: Too Many Requests +- request: + body: '{"contents": [{"parts": [{"text": "Voc\u00ea \u00e9 um revisor de qualidade + para consultas SQL geradas por IA.\n\nSua tarefa: avaliar se a consulta SQL + e seus resultados respondem adequadamente\n\u00e0 pergunta original do usu\u00e1rio.\n\n=== + PERGUNTA DO USU\u00c1RIO ===\nQuero saber quantos clientes existem\n\n=== SCHEMA + DO BANCO ===\n=== SCHEMA SQLITE (INTROSPECCAO REAL) ===\n\nTabela: customers\n- + customer_id: TEXT (PK)\n- customer_unique_id: TEXT\n- customer_zip_code_prefix: + INTEGER\n- customer_city: TEXT\n- customer_state: TEXT\n\nTabela: geolocation\n- + geolocation_zip_code_prefix: INTEGER\n- geolocation_lat: REAL\n- geolocation_lng: + REAL\n- geolocation_city: TEXT\n- geolocation_state: TEXT\n\nTabela: order_items\n- + order_id: TEXT (PK)\n- order_item_id: INTEGER (PK)\n- product_id: TEXT\n- seller_id: + TEXT\n- shipping_limit_date: TEXT\n- price: REAL\n- freight_value: REAL\n Foreign + keys:\n - seller_id -> sellers.seller_id (on_update=NO ACTION, on_delete=NO + ACTION)\n - product_id -> products.product_id (on_update=NO ACTION, on_delete=NO + ACTION)\n - order_id -> orders.order_id (on_update=NO ACTION, on_delete=NO + ACTION)\n\nTabela: order_payments\n- order_id: TEXT (PK)\n- payment_sequential: + INTEGER (PK)\n- payment_type: TEXT\n- payment_installments: INTEGER\n- payment_value: + REAL\n Foreign keys:\n - order_id -> orders.order_id (on_update=NO ACTION, + on_delete=NO ACTION)\n\nTabela: order_reviews\n- review_id: TEXT (PK)\n- order_id: + TEXT\n- review_score: INTEGER\n- review_comment_title: TEXT\n- review_comment_message: + TEXT\n- review_creation_date: TEXT\n- review_answer_timestamp: TEXT\n Foreign + keys:\n - order_id -> orders.order_id (on_update=NO ACTION, on_delete=NO ACTION)\n\nTabela: + orders\n- order_id: TEXT (PK)\n- customer_id: TEXT\n- order_status: TEXT\n- + order_purchase_timestamp: TEXT\n- order_approved_at: TEXT\n- order_delivered_carrier_date: + TEXT\n- order_delivered_customer_date: TEXT\n- order_estimated_delivery_date: + TEXT\n Foreign keys:\n - customer_id -> customers.customer_id (on_update=NO + ACTION, on_delete=NO ACTION)\n\nTabela: products\n- product_id: TEXT (PK)\n- + product_category_name: TEXT\n- product_name_length: REAL\n- product_description_length: + REAL\n- product_photos_qty: REAL\n- product_weight_g: REAL\n- product_length_cm: + REAL\n- product_height_cm: REAL\n- product_width_cm: REAL\n\nTabela: sellers\n- + seller_id: TEXT (PK)\n- seller_zip_code_prefix: INTEGER\n- seller_city: TEXT\n- + seller_state: TEXT\n\n\n=== CONVERSA COM O AGENTE (se houver) ===\n[[\"ai: A + sua pergunta ''Quem \u00e9 o Brad Pitt?'' parece estar fora do contexto dos + dados dispon\u00edveis neste sistema, que s\u00e3o sobre clientes, produtos + e vendas de um e-commerce. Voc\u00ea gostaria de perguntar algo relacionado + aos dados de e-commerce que temos?\", ''user: Quero saber quantos clientes existem'']]\n\n=== + SQL GERADA ===\nSELECT COUNT(customer_id) FROM customers\n\n=== RESULTADO DA + EXECU\u00c7\u00c3O ===\nStatus: exec_ok\nTotal de linhas: 1\nAmostra dos resultados + (primeiras linhas):\n[{''COUNT(customer_id)'': 99441}]\n\n=== ERROS (se houver) + ===\nNenhum\n\n=== TENTATIVAS ANTERIORES ===\nNenhuma tentativa anterior (esta + \u00e9 a primeira).\n\n=== EXEMPLOS DE AVALIA\u00c7\u00c3O ===\n\n-- EXEMPLO + 1: REPROVADO (escopo incompleto) --\nPergunta: \"Which airport has the least + number of flights?\"\nSQL: SELECT SourceAirport FROM flights GROUP BY SourceAirport + ORDER BY COUNT(*) ASC LIMIT 1\nResultado: [(''AID'',)]\nVEREDITO: REPROVADO\nRaz\u00e3o: + A query conta apenas voos com partida (SourceAirport) e ignora voos com chegada + (DestAirport).\nO escopo da pergunta \u00e9 \"flights\" em geral \u2014 a query + responde a uma pergunta diferente.\n\n-- EXEMPLO 2: REPROVADO (erro sem\u00e2ntico: + MIN vs MAX) --\nPergunta: \"Which Asian countries have a population larger than + any country in Africa?\"\nSQL: SELECT Name FROM country WHERE Continent=''Asia'' + AND Population > (SELECT MAX(Population) FROM country WHERE Continent=''Africa'')\nResultado: + [] (vazio)\nVEREDITO: REPROVADO\nRaz\u00e3o: \"Larger than any country in Africa\" + significa maior que pelo menos um pa\u00eds africano (MIN),\nn\u00e3o maior + que todos os pa\u00edses africanos (MAX). A l\u00f3gica est\u00e1 semanticamente + errada.\n\n-- EXEMPLO 3: REPROVADO (resultado vazio suspeito) --\nPergunta: + \"Find the last name of students who live in North Carolina and are not enrolled + in any degree.\"\nSQL: SELECT last_name FROM Students WHERE state_province_county + = ''North Carolina'' AND ...\nResultado: [] (vazio)\nVEREDITO: REPROVADO\nRaz\u00e3o: + Resultado vazio quando a pergunta espera dados reais \u00e9 suspeito. Verifique + se o filtro\nde string corresponde exatamente ao valor no banco (ex: ''NorthCarolina'' + vs ''North Carolina'').\n\n-- EXEMPLO 4: REPROVADO (JOIN incorreto muda o que + est\u00e1 sendo contado) --\nPergunta: \"Find the name of makers that produced + some cars in 1970.\"\nSQL: SELECT DISTINCT Maker FROM car_makers JOIN car_names + ON car_makers.Id = car_names.MakeId JOIN cars_data ON car_names.MakeId = cars_data.Id + WHERE cars_data.Year = 1970\nResultado: [(''chevrolet'',), (''buick'',)]\nVEREDITO: + REPROVADO\nRaz\u00e3o: O JOIN usa car_names.MakeId para conectar a cars_data, + mas cars_data.Id refere-se\nao ID do carro, n\u00e3o do fabricante. O caminho + correto seria via model_list. Os resultados\nparecem plaus\u00edveis mas derivam + de uma jun\u00e7\u00e3o incorreta.\n\n-- EXEMPLO 5: APROVADO (formato diferente, + resposta correta) --\nPergunta: \"On average, when were the transcripts printed?\"\nSQL: + SELECT AVG(transcript_date) AS average_transcript_date FROM Transcripts\nResultado: + [(''1989.9333333333334'',)]\nVEREDITO: APROVADO\nRaz\u00e3o: O resultado \u00e9 + um n\u00famero que representa a m\u00e9dia das datas (formato num\u00e9rico + do SQLite).\nEmbora n\u00e3o seja uma data formatada, responde corretamente + \u00e0 pergunta. Diferen\u00e7a de\nrepresenta\u00e7\u00e3o n\u00e3o \u00e9 + motivo de reprova\u00e7\u00e3o.\n\n-- EXEMPLO 6: APROVADO (query mais simples + que o gold, resultado equivalente) --\nPergunta: \"Which model of car has the + minimum horsepower?\"\nSQL: SELECT Model FROM car_names JOIN cars_data ON car_names.MakeId + = cars_data.Id WHERE Horsepower = (SELECT MIN(Horsepower) FROM cars_data) LIMIT + 1\nResultado: [(''triumph'',)]\nVEREDITO: APROVADO\nRaz\u00e3o: A query retorna + corretamente o modelo com menor pot\u00eancia. O LIMIT 1 garante unicidade\ne + o resultado \u00e9 semanticamente correto. Aprovar.\n\n=== CRIT\u00c9RIOS DE + AVALIA\u00c7\u00c3O ===\n\nREPROVE quando houver:\n- Escopo incompleto: query + cobre apenas parte do que a pergunta pede\n- Erro sem\u00e2ntico: l\u00f3gica + correta na forma mas errada no significado (MIN vs MAX, ANY vs ALL)\n- JOIN + incorreto que altera os dados sendo agregados ou filtrados\n- Resultado vazio + quando a pergunta claramente espera dados\n- Filtro com valor literal diferente + do que est\u00e1 no banco\n- M\u00e9trica errada (SUM vs AVG, COUNT vs COUNT + DISTINCT, etc.)\n- Erro de execu\u00e7\u00e3o SQL\n\nAPROVE quando:\n- O resultado + responde \u00e0 pergunta, mesmo com formato ou representa\u00e7\u00e3o diferente\n- + H\u00e1 colunas extras que n\u00e3o prejudicam a resposta\n- A precis\u00e3o + num\u00e9rica difere mas o valor est\u00e1 correto\n- A query \u00e9 mais simples + que o esperado mas semanticamente equivalente\n\nAvalie com rigor sem\u00e2ntico. + Resultados que parecem plaus\u00edveis mas derivam de l\u00f3gica\nincorreta + devem ser reprovados. N\u00e3o presuma que uma query bem-formada est\u00e1 correta.\n\nResponda + no formato:\nVEREDITO: APROVADO ou REPROVADO\nFEEDBACK: "}], "role": "user"}], "safetySettings": [], "generationConfig": + {"temperature": 0.7, "candidateCount": 1}}' + headers: + Accept: + - '*' + Accept-Encoding: + - g + Connection: + - k + Content-Length: + - '7' + Content-Type: + - a + Host: + - g + User-Agent: + - g + x-goog-api-client: + - g + method: POST + uri: https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash:generateContent + response: + body: + string: !!binary | + H4sIAAAAAAAC/7VUXWvbMBR976+4+HE49tY2jORpZUnawMqyNB0b6wiKfONolSVXH6Gh5L/vSnbb + pAuMjQ0ClpSrc+6559gPRwAJGqNN0ocH2tCW6wJpd3rcS5uDCq1lZThLvmoPeM8RCyxgo70B7o1B + 5eDOa8dSqCUyi8BXyG+bgloyBUwVsBBSClVCgY4JaTMYaQOVNghCLbWpmBNaAf3cSliITaWwQlaA + 031YOVfbfp4zkZValxKzAtd5iZVQosNqkRea29wwhx0pKuEIf6YJXglHNHut+qDmMHTAfMbI4Ea9 + gk9B2bPqZeganRG8DyUqpHKxRhJZeoJtm6OGbMZ1lbcVOOdaOSKfLw3i3Ak0c4N3Hq2zKUSyPnRT + 6rdAGXCjrOOs21lKZlc3atLM1RDxhuYF3ex17/TkTa/39sRmSWuUdcx5G3yaDq8+Xk/fD+fDLxdn + 11ez4eCxpp0+FX2LB9DaHv985zZ19Dk8f5HSjN3UPLtAWbeA8R75ersLuQ/b8lpuRB08DgQfkBnV + uM8W2js4j5LhbDJuomR38COANzJc/OMcJDsw26f193a1Tf9+CjEYI5qmN7g7jbXQMob5NyOJOi9j + lALbPwnTy6lFjnER8M/b29O2dILmUijvkBYTo38gd+EoJLAzIuAZ4R6EG4gKlW317WsKWdCcPbpc + Sr1g8gVI+KIEkkbzftCTvcrtIfbPTProTvf/OTsNr9mYvkq7tsZ3b0DmbCL7U7Aa6kC7Pdoe/QTm + 7WK/UgUAAA== + headers: + Alt-Svc: + - h3=":443"; ma=2592000,h3-29=":443"; ma=2592000 + Content-Encoding: + - gzip + Content-Type: + - application/json; charset=UTF-8 + Date: + - Thu, 21 May 2026 23:50:54 GMT + Server: + - scaffolding on HTTPServer2 + Server-Timing: + - gfet4t7; dur=288 + Transfer-Encoding: + - chunked + Vary: + - Origin + - X-Origin + - Referer + X-Content-Type-Options: + - nosniff + X-Frame-Options: + - SAMEORIGIN + X-Gemini-Service-Tier: + - standard + X-XSS-Protection: + - '0' + status: + code: 429 + message: Too Many Requests +- request: + body: '{"contents": [{"parts": [{"text": "Voc\u00ea \u00e9 um revisor de qualidade + para consultas SQL geradas por IA.\n\nSua tarefa: avaliar se a consulta SQL + e seus resultados respondem adequadamente\n\u00e0 pergunta original do usu\u00e1rio.\n\n=== + PERGUNTA DO USU\u00c1RIO ===\nQuero saber quantos clientes existem\n\n=== SCHEMA + DO BANCO ===\n=== SCHEMA SQLITE (INTROSPECCAO REAL) ===\n\nTabela: customers\n- + customer_id: TEXT (PK)\n- customer_unique_id: TEXT\n- customer_zip_code_prefix: + INTEGER\n- customer_city: TEXT\n- customer_state: TEXT\n\nTabela: geolocation\n- + geolocation_zip_code_prefix: INTEGER\n- geolocation_lat: REAL\n- geolocation_lng: + REAL\n- geolocation_city: TEXT\n- geolocation_state: TEXT\n\nTabela: order_items\n- + order_id: TEXT (PK)\n- order_item_id: INTEGER (PK)\n- product_id: TEXT\n- seller_id: + TEXT\n- shipping_limit_date: TEXT\n- price: REAL\n- freight_value: REAL\n Foreign + keys:\n - seller_id -> sellers.seller_id (on_update=NO ACTION, on_delete=NO + ACTION)\n - product_id -> products.product_id (on_update=NO ACTION, on_delete=NO + ACTION)\n - order_id -> orders.order_id (on_update=NO ACTION, on_delete=NO + ACTION)\n\nTabela: order_payments\n- order_id: TEXT (PK)\n- payment_sequential: + INTEGER (PK)\n- payment_type: TEXT\n- payment_installments: INTEGER\n- payment_value: + REAL\n Foreign keys:\n - order_id -> orders.order_id (on_update=NO ACTION, + on_delete=NO ACTION)\n\nTabela: order_reviews\n- review_id: TEXT (PK)\n- order_id: + TEXT\n- review_score: INTEGER\n- review_comment_title: TEXT\n- review_comment_message: + TEXT\n- review_creation_date: TEXT\n- review_answer_timestamp: TEXT\n Foreign + keys:\n - order_id -> orders.order_id (on_update=NO ACTION, on_delete=NO ACTION)\n\nTabela: + orders\n- order_id: TEXT (PK)\n- customer_id: TEXT\n- order_status: TEXT\n- + order_purchase_timestamp: TEXT\n- order_approved_at: TEXT\n- order_delivered_carrier_date: + TEXT\n- order_delivered_customer_date: TEXT\n- order_estimated_delivery_date: + TEXT\n Foreign keys:\n - customer_id -> customers.customer_id (on_update=NO + ACTION, on_delete=NO ACTION)\n\nTabela: products\n- product_id: TEXT (PK)\n- + product_category_name: TEXT\n- product_name_length: REAL\n- product_description_length: + REAL\n- product_photos_qty: REAL\n- product_weight_g: REAL\n- product_length_cm: + REAL\n- product_height_cm: REAL\n- product_width_cm: REAL\n\nTabela: sellers\n- + seller_id: TEXT (PK)\n- seller_zip_code_prefix: INTEGER\n- seller_city: TEXT\n- + seller_state: TEXT\n\n\n=== CONVERSA COM O AGENTE (se houver) ===\n[[\"ai: A + sua pergunta ''Quem \u00e9 o Brad Pitt?'' parece estar fora do contexto dos + dados dispon\u00edveis neste sistema, que s\u00e3o sobre clientes, produtos + e vendas de um e-commerce. Voc\u00ea gostaria de perguntar algo relacionado + aos dados de e-commerce que temos?\", ''user: Quero saber quantos clientes existem'']]\n\n=== + SQL GERADA ===\nSELECT COUNT(customer_id) FROM customers\n\n=== RESULTADO DA + EXECU\u00c7\u00c3O ===\nStatus: exec_ok\nTotal de linhas: 1\nAmostra dos resultados + (primeiras linhas):\n[{''COUNT(customer_id)'': 99441}]\n\n=== ERROS (se houver) + ===\nNenhum\n\n=== TENTATIVAS ANTERIORES ===\nNenhuma tentativa anterior (esta + \u00e9 a primeira).\n\n=== EXEMPLOS DE AVALIA\u00c7\u00c3O ===\n\n-- EXEMPLO + 1: REPROVADO (escopo incompleto) --\nPergunta: \"Which airport has the least + number of flights?\"\nSQL: SELECT SourceAirport FROM flights GROUP BY SourceAirport + ORDER BY COUNT(*) ASC LIMIT 1\nResultado: [(''AID'',)]\nVEREDITO: REPROVADO\nRaz\u00e3o: + A query conta apenas voos com partida (SourceAirport) e ignora voos com chegada + (DestAirport).\nO escopo da pergunta \u00e9 \"flights\" em geral \u2014 a query + responde a uma pergunta diferente.\n\n-- EXEMPLO 2: REPROVADO (erro sem\u00e2ntico: + MIN vs MAX) --\nPergunta: \"Which Asian countries have a population larger than + any country in Africa?\"\nSQL: SELECT Name FROM country WHERE Continent=''Asia'' + AND Population > (SELECT MAX(Population) FROM country WHERE Continent=''Africa'')\nResultado: + [] (vazio)\nVEREDITO: REPROVADO\nRaz\u00e3o: \"Larger than any country in Africa\" + significa maior que pelo menos um pa\u00eds africano (MIN),\nn\u00e3o maior + que todos os pa\u00edses africanos (MAX). A l\u00f3gica est\u00e1 semanticamente + errada.\n\n-- EXEMPLO 3: REPROVADO (resultado vazio suspeito) --\nPergunta: + \"Find the last name of students who live in North Carolina and are not enrolled + in any degree.\"\nSQL: SELECT last_name FROM Students WHERE state_province_county + = ''North Carolina'' AND ...\nResultado: [] (vazio)\nVEREDITO: REPROVADO\nRaz\u00e3o: + Resultado vazio quando a pergunta espera dados reais \u00e9 suspeito. Verifique + se o filtro\nde string corresponde exatamente ao valor no banco (ex: ''NorthCarolina'' + vs ''North Carolina'').\n\n-- EXEMPLO 4: REPROVADO (JOIN incorreto muda o que + est\u00e1 sendo contado) --\nPergunta: \"Find the name of makers that produced + some cars in 1970.\"\nSQL: SELECT DISTINCT Maker FROM car_makers JOIN car_names + ON car_makers.Id = car_names.MakeId JOIN cars_data ON car_names.MakeId = cars_data.Id + WHERE cars_data.Year = 1970\nResultado: [(''chevrolet'',), (''buick'',)]\nVEREDITO: + REPROVADO\nRaz\u00e3o: O JOIN usa car_names.MakeId para conectar a cars_data, + mas cars_data.Id refere-se\nao ID do carro, n\u00e3o do fabricante. O caminho + correto seria via model_list. Os resultados\nparecem plaus\u00edveis mas derivam + de uma jun\u00e7\u00e3o incorreta.\n\n-- EXEMPLO 5: APROVADO (formato diferente, + resposta correta) --\nPergunta: \"On average, when were the transcripts printed?\"\nSQL: + SELECT AVG(transcript_date) AS average_transcript_date FROM Transcripts\nResultado: + [(''1989.9333333333334'',)]\nVEREDITO: APROVADO\nRaz\u00e3o: O resultado \u00e9 + um n\u00famero que representa a m\u00e9dia das datas (formato num\u00e9rico + do SQLite).\nEmbora n\u00e3o seja uma data formatada, responde corretamente + \u00e0 pergunta. Diferen\u00e7a de\nrepresenta\u00e7\u00e3o n\u00e3o \u00e9 + motivo de reprova\u00e7\u00e3o.\n\n-- EXEMPLO 6: APROVADO (query mais simples + que o gold, resultado equivalente) --\nPergunta: \"Which model of car has the + minimum horsepower?\"\nSQL: SELECT Model FROM car_names JOIN cars_data ON car_names.MakeId + = cars_data.Id WHERE Horsepower = (SELECT MIN(Horsepower) FROM cars_data) LIMIT + 1\nResultado: [(''triumph'',)]\nVEREDITO: APROVADO\nRaz\u00e3o: A query retorna + corretamente o modelo com menor pot\u00eancia. O LIMIT 1 garante unicidade\ne + o resultado \u00e9 semanticamente correto. Aprovar.\n\n=== CRIT\u00c9RIOS DE + AVALIA\u00c7\u00c3O ===\n\nREPROVE quando houver:\n- Escopo incompleto: query + cobre apenas parte do que a pergunta pede\n- Erro sem\u00e2ntico: l\u00f3gica + correta na forma mas errada no significado (MIN vs MAX, ANY vs ALL)\n- JOIN + incorreto que altera os dados sendo agregados ou filtrados\n- Resultado vazio + quando a pergunta claramente espera dados\n- Filtro com valor literal diferente + do que est\u00e1 no banco\n- M\u00e9trica errada (SUM vs AVG, COUNT vs COUNT + DISTINCT, etc.)\n- Erro de execu\u00e7\u00e3o SQL\n\nAPROVE quando:\n- O resultado + responde \u00e0 pergunta, mesmo com formato ou representa\u00e7\u00e3o diferente\n- + H\u00e1 colunas extras que n\u00e3o prejudicam a resposta\n- A precis\u00e3o + num\u00e9rica difere mas o valor est\u00e1 correto\n- A query \u00e9 mais simples + que o esperado mas semanticamente equivalente\n\nAvalie com rigor sem\u00e2ntico. + Resultados que parecem plaus\u00edveis mas derivam de l\u00f3gica\nincorreta + devem ser reprovados. N\u00e3o presuma que uma query bem-formada est\u00e1 correta.\n\nResponda + no formato:\nVEREDITO: APROVADO ou REPROVADO\nFEEDBACK: "}], "role": "user"}], "safetySettings": [], "generationConfig": + {"temperature": 0.7, "candidateCount": 1}}' + headers: + Accept: + - '*' + Accept-Encoding: + - g + Connection: + - k + Content-Length: + - '7' + Content-Type: + - a + Host: + - g + User-Agent: + - g + x-goog-api-client: + - g + method: POST + uri: https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash:generateContent + response: + body: + string: !!binary | + H4sIAAAAAAAC/7VUXW/aMBR951dc8TiFRKIwDZ5WDWiRVo1ROm1aJ2ScS/Dq2Kk/0FDFf9+1k7bQ + IU2bNgkptrk+5557TvLQAmijMdq0h/BAG9pynSPtet1BUh+UaC0rwln7i/aAPzhijjnstDfAvTGo + HNx77VgClURmEfgG+V1dUEmmgKkcVkJKoQrI0TEhbQoTbaDUBkGotTYlc0IroJ/bCAuxqQQ2yHJw + eggb5yo7zDIm0kLrQmKa4zYrsBRKdFglslxzmxnmsCNFKRzhLzTBK+GI5qhVH9Schg6Yzxgp3KpX + 8DEoe1a9Dl2jM4IPoUCFVC62SCILT7BNc9SQTbkus6YCl1wrR+TLtUFcOoFmafDeo3U2gUg2hH5C + /eYoA26U1U37nbVkdnOrZvVcDRHvaF7QTbtvemdng97rgU3bjVHWMedt8Gk+vv5wM383Xo4/X57f + XC/Go8eaZvpU9DUeQGN7/POt21XR5/D8RUo9dlPx9BJl1QDGe+Tr3SHkMWzDa7kRVfA4ELxHZlTt + Pltp7+AiSobz2bSOkj3AjwDeyHDxj3PQPoDZP62/Nat98vdTiMGY0DS9wcNpbIWWMcy/GUnUeRWj + FNj+SZheTi1yTPOAf9HcnjelMzRXQnmHtJgZ/R25C0chgZ0JAS8I9yTcSJSobKPvWFP4WgSAWs9x + iF9ghdhozh4DUUi9YrJ9VLI/xf6JSR/d6f8/Z+fhNZvSV+nQ1vjujcicXUDoPgWrpg60+9a+9RNJ + alMYUgUAAA== + headers: + Alt-Svc: + - h3=":443"; ma=2592000,h3-29=":443"; ma=2592000 + Content-Encoding: + - gzip + Content-Type: + - application/json; charset=UTF-8 + Date: + - Thu, 21 May 2026 23:50:57 GMT + Server: + - scaffolding on HTTPServer2 + Server-Timing: + - gfet4t7; dur=296 + Transfer-Encoding: + - chunked + Vary: + - Origin + - X-Origin + - Referer + X-Content-Type-Options: + - nosniff + X-Frame-Options: + - SAMEORIGIN + X-Gemini-Service-Tier: + - standard + X-XSS-Protection: + - '0' + status: + code: 429 + message: Too Many Requests +- request: + body: '{"contents": [{"parts": [{"text": "Voc\u00ea \u00e9 um revisor de qualidade + para consultas SQL geradas por IA.\n\nSua tarefa: avaliar se a consulta SQL + e seus resultados respondem adequadamente\n\u00e0 pergunta original do usu\u00e1rio.\n\n=== + PERGUNTA DO USU\u00c1RIO ===\nQuero saber quantos clientes existem\n\n=== SCHEMA + DO BANCO ===\n=== SCHEMA SQLITE (INTROSPECCAO REAL) ===\n\nTabela: customers\n- + customer_id: TEXT (PK)\n- customer_unique_id: TEXT\n- customer_zip_code_prefix: + INTEGER\n- customer_city: TEXT\n- customer_state: TEXT\n\nTabela: geolocation\n- + geolocation_zip_code_prefix: INTEGER\n- geolocation_lat: REAL\n- geolocation_lng: + REAL\n- geolocation_city: TEXT\n- geolocation_state: TEXT\n\nTabela: order_items\n- + order_id: TEXT (PK)\n- order_item_id: INTEGER (PK)\n- product_id: TEXT\n- seller_id: + TEXT\n- shipping_limit_date: TEXT\n- price: REAL\n- freight_value: REAL\n Foreign + keys:\n - seller_id -> sellers.seller_id (on_update=NO ACTION, on_delete=NO + ACTION)\n - product_id -> products.product_id (on_update=NO ACTION, on_delete=NO + ACTION)\n - order_id -> orders.order_id (on_update=NO ACTION, on_delete=NO + ACTION)\n\nTabela: order_payments\n- order_id: TEXT (PK)\n- payment_sequential: + INTEGER (PK)\n- payment_type: TEXT\n- payment_installments: INTEGER\n- payment_value: + REAL\n Foreign keys:\n - order_id -> orders.order_id (on_update=NO ACTION, + on_delete=NO ACTION)\n\nTabela: order_reviews\n- review_id: TEXT (PK)\n- order_id: + TEXT\n- review_score: INTEGER\n- review_comment_title: TEXT\n- review_comment_message: + TEXT\n- review_creation_date: TEXT\n- review_answer_timestamp: TEXT\n Foreign + keys:\n - order_id -> orders.order_id (on_update=NO ACTION, on_delete=NO ACTION)\n\nTabela: + orders\n- order_id: TEXT (PK)\n- customer_id: TEXT\n- order_status: TEXT\n- + order_purchase_timestamp: TEXT\n- order_approved_at: TEXT\n- order_delivered_carrier_date: + TEXT\n- order_delivered_customer_date: TEXT\n- order_estimated_delivery_date: + TEXT\n Foreign keys:\n - customer_id -> customers.customer_id (on_update=NO + ACTION, on_delete=NO ACTION)\n\nTabela: products\n- product_id: TEXT (PK)\n- + product_category_name: TEXT\n- product_name_length: REAL\n- product_description_length: + REAL\n- product_photos_qty: REAL\n- product_weight_g: REAL\n- product_length_cm: + REAL\n- product_height_cm: REAL\n- product_width_cm: REAL\n\nTabela: sellers\n- + seller_id: TEXT (PK)\n- seller_zip_code_prefix: INTEGER\n- seller_city: TEXT\n- + seller_state: TEXT\n\n\n=== CONVERSA COM O AGENTE (se houver) ===\n[[\"ai: A + sua pergunta ''Quem \u00e9 o Brad Pitt?'' parece estar fora do contexto dos + dados dispon\u00edveis neste sistema, que s\u00e3o sobre clientes, produtos + e vendas de um e-commerce. Voc\u00ea gostaria de perguntar algo relacionado + aos dados de e-commerce que temos?\", ''user: Quero saber quantos clientes existem'']]\n\n=== + SQL GERADA ===\nSELECT COUNT(customer_id) FROM customers\n\n=== RESULTADO DA + EXECU\u00c7\u00c3O ===\nStatus: exec_ok\nTotal de linhas: 1\nAmostra dos resultados + (primeiras linhas):\n[{''COUNT(customer_id)'': 99441}]\n\n=== ERROS (se houver) + ===\nNenhum\n\n=== TENTATIVAS ANTERIORES ===\nNenhuma tentativa anterior (esta + \u00e9 a primeira).\n\n=== EXEMPLOS DE AVALIA\u00c7\u00c3O ===\n\n-- EXEMPLO + 1: REPROVADO (escopo incompleto) --\nPergunta: \"Which airport has the least + number of flights?\"\nSQL: SELECT SourceAirport FROM flights GROUP BY SourceAirport + ORDER BY COUNT(*) ASC LIMIT 1\nResultado: [(''AID'',)]\nVEREDITO: REPROVADO\nRaz\u00e3o: + A query conta apenas voos com partida (SourceAirport) e ignora voos com chegada + (DestAirport).\nO escopo da pergunta \u00e9 \"flights\" em geral \u2014 a query + responde a uma pergunta diferente.\n\n-- EXEMPLO 2: REPROVADO (erro sem\u00e2ntico: + MIN vs MAX) --\nPergunta: \"Which Asian countries have a population larger than + any country in Africa?\"\nSQL: SELECT Name FROM country WHERE Continent=''Asia'' + AND Population > (SELECT MAX(Population) FROM country WHERE Continent=''Africa'')\nResultado: + [] (vazio)\nVEREDITO: REPROVADO\nRaz\u00e3o: \"Larger than any country in Africa\" + significa maior que pelo menos um pa\u00eds africano (MIN),\nn\u00e3o maior + que todos os pa\u00edses africanos (MAX). A l\u00f3gica est\u00e1 semanticamente + errada.\n\n-- EXEMPLO 3: REPROVADO (resultado vazio suspeito) --\nPergunta: + \"Find the last name of students who live in North Carolina and are not enrolled + in any degree.\"\nSQL: SELECT last_name FROM Students WHERE state_province_county + = ''North Carolina'' AND ...\nResultado: [] (vazio)\nVEREDITO: REPROVADO\nRaz\u00e3o: + Resultado vazio quando a pergunta espera dados reais \u00e9 suspeito. Verifique + se o filtro\nde string corresponde exatamente ao valor no banco (ex: ''NorthCarolina'' + vs ''North Carolina'').\n\n-- EXEMPLO 4: REPROVADO (JOIN incorreto muda o que + est\u00e1 sendo contado) --\nPergunta: \"Find the name of makers that produced + some cars in 1970.\"\nSQL: SELECT DISTINCT Maker FROM car_makers JOIN car_names + ON car_makers.Id = car_names.MakeId JOIN cars_data ON car_names.MakeId = cars_data.Id + WHERE cars_data.Year = 1970\nResultado: [(''chevrolet'',), (''buick'',)]\nVEREDITO: + REPROVADO\nRaz\u00e3o: O JOIN usa car_names.MakeId para conectar a cars_data, + mas cars_data.Id refere-se\nao ID do carro, n\u00e3o do fabricante. O caminho + correto seria via model_list. Os resultados\nparecem plaus\u00edveis mas derivam + de uma jun\u00e7\u00e3o incorreta.\n\n-- EXEMPLO 5: APROVADO (formato diferente, + resposta correta) --\nPergunta: \"On average, when were the transcripts printed?\"\nSQL: + SELECT AVG(transcript_date) AS average_transcript_date FROM Transcripts\nResultado: + [(''1989.9333333333334'',)]\nVEREDITO: APROVADO\nRaz\u00e3o: O resultado \u00e9 + um n\u00famero que representa a m\u00e9dia das datas (formato num\u00e9rico + do SQLite).\nEmbora n\u00e3o seja uma data formatada, responde corretamente + \u00e0 pergunta. Diferen\u00e7a de\nrepresenta\u00e7\u00e3o n\u00e3o \u00e9 + motivo de reprova\u00e7\u00e3o.\n\n-- EXEMPLO 6: APROVADO (query mais simples + que o gold, resultado equivalente) --\nPergunta: \"Which model of car has the + minimum horsepower?\"\nSQL: SELECT Model FROM car_names JOIN cars_data ON car_names.MakeId + = cars_data.Id WHERE Horsepower = (SELECT MIN(Horsepower) FROM cars_data) LIMIT + 1\nResultado: [(''triumph'',)]\nVEREDITO: APROVADO\nRaz\u00e3o: A query retorna + corretamente o modelo com menor pot\u00eancia. O LIMIT 1 garante unicidade\ne + o resultado \u00e9 semanticamente correto. Aprovar.\n\n=== CRIT\u00c9RIOS DE + AVALIA\u00c7\u00c3O ===\n\nREPROVE quando houver:\n- Escopo incompleto: query + cobre apenas parte do que a pergunta pede\n- Erro sem\u00e2ntico: l\u00f3gica + correta na forma mas errada no significado (MIN vs MAX, ANY vs ALL)\n- JOIN + incorreto que altera os dados sendo agregados ou filtrados\n- Resultado vazio + quando a pergunta claramente espera dados\n- Filtro com valor literal diferente + do que est\u00e1 no banco\n- M\u00e9trica errada (SUM vs AVG, COUNT vs COUNT + DISTINCT, etc.)\n- Erro de execu\u00e7\u00e3o SQL\n\nAPROVE quando:\n- O resultado + responde \u00e0 pergunta, mesmo com formato ou representa\u00e7\u00e3o diferente\n- + H\u00e1 colunas extras que n\u00e3o prejudicam a resposta\n- A precis\u00e3o + num\u00e9rica difere mas o valor est\u00e1 correto\n- A query \u00e9 mais simples + que o esperado mas semanticamente equivalente\n\nAvalie com rigor sem\u00e2ntico. + Resultados que parecem plaus\u00edveis mas derivam de l\u00f3gica\nincorreta + devem ser reprovados. N\u00e3o presuma que uma query bem-formada est\u00e1 correta.\n\nResponda + no formato:\nVEREDITO: APROVADO ou REPROVADO\nFEEDBACK: "}], "role": "user"}], "safetySettings": [], "generationConfig": + {"temperature": 0.7, "candidateCount": 1}}' + headers: + Accept: + - '*' + Accept-Encoding: + - g + Connection: + - k + Content-Length: + - '7' + Content-Type: + - a + Host: + - g + User-Agent: + - g + x-goog-api-client: + - g + method: POST + uri: https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash:generateContent + response: + body: + string: !!binary | + H4sIAAAAAAAC/7VUXW/aMBR951dc8TiFREWjHTytGtAirRqjdNq0Tsg4l+DVsVN/oKGK/75rJ22h + Q5o2bRJSbHN9zj33nOShBdBGY7RpD+CBNrTlOkfave72k/qgRGtZEc7aX7QH/MERc8xhq70B7o1B + 5eDea8cSqCQyi8DXyO/qgkoyBUzlsBRSClVAjo4JaVMYawOlNghCrbQpmRNaAf3cWliITSWwRpaD + 0wNYO1fZQZYxkRZaFxLTHDdZgaVQosMqkeWa28wwhx0pSuEIf64JXglHNAet+qDmOHTAfMZI4Va9 + go9B2bPqVeganRF8AAUqpHKxQRJZeIJtmqOGbMp1mTUVuOBaOSJfrAziwgk0C4P3Hq2zCUSyAfQS + 6jdHGXCjrG7a66wks+tbNa3naoh4S/OC3ll60n1zetrv9k9s2m6cso45b4NRs9H1h5vZu9Fi9Pny + /OZ6Pho+1jTjp6Kv8QAa3+Ofb922ikaH5y9a6rmbiqeXKKsGMN4jY+/2IQ9hG17LjaiCyYHgPTKj + avvZUnsHF1EznE8ndZbsHn4E8EaGi38chPYezO5p/a1Z7ZK/n0JMxpim6Q3uT2MjtIxp/s1Ios6r + mKXA9k/S9HJqkWOSB/yL5vasKZ2iuRLKO6TF1OjvyF04ChHsjAl4TrhH4YaiRGUbfYeaQhY0Z48u + F1IvmXwBEj4pgaTWfJj09kHl7hj7JyZ9dKf3/5ydhfdsQp+lfVvjyzckc7aR/ewpWTV34N21dq2f + 4DhuFlQFAAA= + headers: + Alt-Svc: + - h3=":443"; ma=2592000,h3-29=":443"; ma=2592000 + Content-Encoding: + - gzip + Content-Type: + - application/json; charset=UTF-8 + Date: + - Thu, 21 May 2026 23:51:02 GMT + Server: + - scaffolding on HTTPServer2 + Server-Timing: + - gfet4t7; dur=270 + Transfer-Encoding: + - chunked + Vary: + - Origin + - X-Origin + - Referer + X-Content-Type-Options: + - nosniff + X-Frame-Options: + - SAMEORIGIN + X-Gemini-Service-Tier: + - standard + X-XSS-Protection: + - '0' + status: + code: 429 + message: Too Many Requests +- request: + body: '{"contents": [{"parts": [{"text": "Voc\u00ea \u00e9 um revisor de qualidade + para consultas SQL geradas por IA.\n\nSua tarefa: avaliar se a consulta SQL + e seus resultados respondem adequadamente\n\u00e0 pergunta original do usu\u00e1rio.\n\n=== + PERGUNTA DO USU\u00c1RIO ===\nQuero saber quantos clientes existem\n\n=== SCHEMA + DO BANCO ===\n=== SCHEMA SQLITE (INTROSPECCAO REAL) ===\n\nTabela: customers\n- + customer_id: TEXT (PK)\n- customer_unique_id: TEXT\n- customer_zip_code_prefix: + INTEGER\n- customer_city: TEXT\n- customer_state: TEXT\n\nTabela: geolocation\n- + geolocation_zip_code_prefix: INTEGER\n- geolocation_lat: REAL\n- geolocation_lng: + REAL\n- geolocation_city: TEXT\n- geolocation_state: TEXT\n\nTabela: order_items\n- + order_id: TEXT (PK)\n- order_item_id: INTEGER (PK)\n- product_id: TEXT\n- seller_id: + TEXT\n- shipping_limit_date: TEXT\n- price: REAL\n- freight_value: REAL\n Foreign + keys:\n - seller_id -> sellers.seller_id (on_update=NO ACTION, on_delete=NO + ACTION)\n - product_id -> products.product_id (on_update=NO ACTION, on_delete=NO + ACTION)\n - order_id -> orders.order_id (on_update=NO ACTION, on_delete=NO + ACTION)\n\nTabela: order_payments\n- order_id: TEXT (PK)\n- payment_sequential: + INTEGER (PK)\n- payment_type: TEXT\n- payment_installments: INTEGER\n- payment_value: + REAL\n Foreign keys:\n - order_id -> orders.order_id (on_update=NO ACTION, + on_delete=NO ACTION)\n\nTabela: order_reviews\n- review_id: TEXT (PK)\n- order_id: + TEXT\n- review_score: INTEGER\n- review_comment_title: TEXT\n- review_comment_message: + TEXT\n- review_creation_date: TEXT\n- review_answer_timestamp: TEXT\n Foreign + keys:\n - order_id -> orders.order_id (on_update=NO ACTION, on_delete=NO ACTION)\n\nTabela: + orders\n- order_id: TEXT (PK)\n- customer_id: TEXT\n- order_status: TEXT\n- + order_purchase_timestamp: TEXT\n- order_approved_at: TEXT\n- order_delivered_carrier_date: + TEXT\n- order_delivered_customer_date: TEXT\n- order_estimated_delivery_date: + TEXT\n Foreign keys:\n - customer_id -> customers.customer_id (on_update=NO + ACTION, on_delete=NO ACTION)\n\nTabela: products\n- product_id: TEXT (PK)\n- + product_category_name: TEXT\n- product_name_length: REAL\n- product_description_length: + REAL\n- product_photos_qty: REAL\n- product_weight_g: REAL\n- product_length_cm: + REAL\n- product_height_cm: REAL\n- product_width_cm: REAL\n\nTabela: sellers\n- + seller_id: TEXT (PK)\n- seller_zip_code_prefix: INTEGER\n- seller_city: TEXT\n- + seller_state: TEXT\n\n\n=== CONVERSA COM O AGENTE (se houver) ===\n[[\"ai: A + sua pergunta ''Quem \u00e9 o Brad Pitt?'' parece estar fora do contexto dos + dados dispon\u00edveis neste sistema, que s\u00e3o sobre clientes, produtos + e vendas de um e-commerce. Voc\u00ea gostaria de perguntar algo relacionado + aos dados de e-commerce que temos?\", ''user: Quero saber quantos clientes existem'']]\n\n=== + SQL GERADA ===\nSELECT COUNT(customer_id) FROM customers\n\n=== RESULTADO DA + EXECU\u00c7\u00c3O ===\nStatus: exec_ok\nTotal de linhas: 1\nAmostra dos resultados + (primeiras linhas):\n[{''COUNT(customer_id)'': 99441}]\n\n=== ERROS (se houver) + ===\nNenhum\n\n=== TENTATIVAS ANTERIORES ===\nNenhuma tentativa anterior (esta + \u00e9 a primeira).\n\n=== EXEMPLOS DE AVALIA\u00c7\u00c3O ===\n\n-- EXEMPLO + 1: REPROVADO (escopo incompleto) --\nPergunta: \"Which airport has the least + number of flights?\"\nSQL: SELECT SourceAirport FROM flights GROUP BY SourceAirport + ORDER BY COUNT(*) ASC LIMIT 1\nResultado: [(''AID'',)]\nVEREDITO: REPROVADO\nRaz\u00e3o: + A query conta apenas voos com partida (SourceAirport) e ignora voos com chegada + (DestAirport).\nO escopo da pergunta \u00e9 \"flights\" em geral \u2014 a query + responde a uma pergunta diferente.\n\n-- EXEMPLO 2: REPROVADO (erro sem\u00e2ntico: + MIN vs MAX) --\nPergunta: \"Which Asian countries have a population larger than + any country in Africa?\"\nSQL: SELECT Name FROM country WHERE Continent=''Asia'' + AND Population > (SELECT MAX(Population) FROM country WHERE Continent=''Africa'')\nResultado: + [] (vazio)\nVEREDITO: REPROVADO\nRaz\u00e3o: \"Larger than any country in Africa\" + significa maior que pelo menos um pa\u00eds africano (MIN),\nn\u00e3o maior + que todos os pa\u00edses africanos (MAX). A l\u00f3gica est\u00e1 semanticamente + errada.\n\n-- EXEMPLO 3: REPROVADO (resultado vazio suspeito) --\nPergunta: + \"Find the last name of students who live in North Carolina and are not enrolled + in any degree.\"\nSQL: SELECT last_name FROM Students WHERE state_province_county + = ''North Carolina'' AND ...\nResultado: [] (vazio)\nVEREDITO: REPROVADO\nRaz\u00e3o: + Resultado vazio quando a pergunta espera dados reais \u00e9 suspeito. Verifique + se o filtro\nde string corresponde exatamente ao valor no banco (ex: ''NorthCarolina'' + vs ''North Carolina'').\n\n-- EXEMPLO 4: REPROVADO (JOIN incorreto muda o que + est\u00e1 sendo contado) --\nPergunta: \"Find the name of makers that produced + some cars in 1970.\"\nSQL: SELECT DISTINCT Maker FROM car_makers JOIN car_names + ON car_makers.Id = car_names.MakeId JOIN cars_data ON car_names.MakeId = cars_data.Id + WHERE cars_data.Year = 1970\nResultado: [(''chevrolet'',), (''buick'',)]\nVEREDITO: + REPROVADO\nRaz\u00e3o: O JOIN usa car_names.MakeId para conectar a cars_data, + mas cars_data.Id refere-se\nao ID do carro, n\u00e3o do fabricante. O caminho + correto seria via model_list. Os resultados\nparecem plaus\u00edveis mas derivam + de uma jun\u00e7\u00e3o incorreta.\n\n-- EXEMPLO 5: APROVADO (formato diferente, + resposta correta) --\nPergunta: \"On average, when were the transcripts printed?\"\nSQL: + SELECT AVG(transcript_date) AS average_transcript_date FROM Transcripts\nResultado: + [(''1989.9333333333334'',)]\nVEREDITO: APROVADO\nRaz\u00e3o: O resultado \u00e9 + um n\u00famero que representa a m\u00e9dia das datas (formato num\u00e9rico + do SQLite).\nEmbora n\u00e3o seja uma data formatada, responde corretamente + \u00e0 pergunta. Diferen\u00e7a de\nrepresenta\u00e7\u00e3o n\u00e3o \u00e9 + motivo de reprova\u00e7\u00e3o.\n\n-- EXEMPLO 6: APROVADO (query mais simples + que o gold, resultado equivalente) --\nPergunta: \"Which model of car has the + minimum horsepower?\"\nSQL: SELECT Model FROM car_names JOIN cars_data ON car_names.MakeId + = cars_data.Id WHERE Horsepower = (SELECT MIN(Horsepower) FROM cars_data) LIMIT + 1\nResultado: [(''triumph'',)]\nVEREDITO: APROVADO\nRaz\u00e3o: A query retorna + corretamente o modelo com menor pot\u00eancia. O LIMIT 1 garante unicidade\ne + o resultado \u00e9 semanticamente correto. Aprovar.\n\n=== CRIT\u00c9RIOS DE + AVALIA\u00c7\u00c3O ===\n\nREPROVE quando houver:\n- Escopo incompleto: query + cobre apenas parte do que a pergunta pede\n- Erro sem\u00e2ntico: l\u00f3gica + correta na forma mas errada no significado (MIN vs MAX, ANY vs ALL)\n- JOIN + incorreto que altera os dados sendo agregados ou filtrados\n- Resultado vazio + quando a pergunta claramente espera dados\n- Filtro com valor literal diferente + do que est\u00e1 no banco\n- M\u00e9trica errada (SUM vs AVG, COUNT vs COUNT + DISTINCT, etc.)\n- Erro de execu\u00e7\u00e3o SQL\n\nAPROVE quando:\n- O resultado + responde \u00e0 pergunta, mesmo com formato ou representa\u00e7\u00e3o diferente\n- + H\u00e1 colunas extras que n\u00e3o prejudicam a resposta\n- A precis\u00e3o + num\u00e9rica difere mas o valor est\u00e1 correto\n- A query \u00e9 mais simples + que o esperado mas semanticamente equivalente\n\nAvalie com rigor sem\u00e2ntico. + Resultados que parecem plaus\u00edveis mas derivam de l\u00f3gica\nincorreta + devem ser reprovados. N\u00e3o presuma que uma query bem-formada est\u00e1 correta.\n\nResponda + no formato:\nVEREDITO: APROVADO ou REPROVADO\nFEEDBACK: "}], "role": "user"}], "safetySettings": [], "generationConfig": {"temperature": 0.7, "candidateCount": 1}}' headers: @@ -373,7 +1245,7 @@ interactions: Connection: - k Content-Length: - - '1' + - '7' Content-Type: - a Host: @@ -387,16 +1259,15 @@ interactions: response: body: string: !!binary | - H4sIAAAAAAAC/2VSTW/TQBC951eM9gRSGjVVQtXeQuKKiBaniQlIgMgSb5It6x2zH1WbKP8FxAH1 - zI2r/xizsZw44IO1mnn73pu3s2kAsDnXqUy5E5ZdwgeqAGx2/9BD7YR21KhKVMy5cQds+W1qZ4I4 - 8RAusWk0jgbDJL6E3mgcT3uD+KO+iqLBy17/NdWA+K1XjsPk9hpmk+g66ifQj9++SZ7NvXWYCfNZ - ps/hahzfQFWxMzDC5qhTAak0wvGMPAoovkMuzNJr4ksRvPXFDyOxGVQcDYmAoIs/xIBAV+dKhmsW - NAfHvwjFYXaQaEEcVII5uji7uOh02jMonsBncM8VGtA+K56MnCN88wIWfA2W+CSh55iRfMZL4aXI - QJDwTwSeE6UI/oQxaEEVv5dEYAF9cCTJjsnDQMUvgrdYLdTt/vypeXgKg0qEnDNMharg2wrAFlJL - uxoLblEH2CSJR2zflRTgA5VPG5XAjpp5S55vyAUtBd8/PcsNZrlL8KvQffS7pTh70S3Zakt0BDiv - +g4dV0etTve0+R+xHZCsVPXtqi0eTcmVdI9hlCR6n7BaEu4fX1UWjVpkzK3QL1fu2GO7HQLYhVbm - OKX3l2Vg9HYU4clZq3uyUNyudoqs3D4rhmnA6LtRm6ej1av7u+n6fDhau3fj7NayxrbxF7u4rjZg - AwAA + H4sIAAAAAAAC/2VSwY7aMBC98xUjn1qJRYDU7XZvNMlWtEtDIUWVSlW8yQBWEzvYTkWL+Jde9xf2 + yo/tOGlIaHNwrHkzb+Y9z6EDwGIuE5Fwi4bdwleKABzK02FKWpSWgDpEwZxr2+RW36F1pxSLe1fE + FsEs8MdReAuj6SxcjPxwKe+CwH878j5QDIjfFKnlMP90D6t5cB94EXjh54/Ri7gwVmWov4vkJdzN + wgnUEbNydVQUK63R8owmRFAgT0+EKkgQxr5xvzgVDjMgOVj+gCmHVcPSpZpdgRWNyZWkikQ0jKc/ + kKPeFK7Vku0KLq0yDSfuhbGYLVkPQiACpyNRsFZaYizodnqEIjtPdXqSIlbdsiMhvNKwwQyssjwF + mgA1T3iPtaw8nu/fus0DaJWiczdTCaZ1+rFOYGshhdnOkBslXdo8CqfsjAqSuadwv1M3KKlZYWiY + CYmnVeDnB2e5VlluI/UDpaeKchWG/cGbiq61OxcZ1zVeSrssHvavu/9RG58ai7S9Va2FI508FfaX + ExMFXyLW8sL+O1ltR6flGrNbVWy29nLKweDmL2xQ/xQxRgK162EsyeI6cdaWtlZOL2hpRGUpPRuZ + fDXsvbpap9xsy4lYtUUGx4nLid+rKS945k3e4e/X42n/YSd3oxvWOXaeAZUmwpl4AwAA headers: Alt-Svc: - h3=":443"; ma=2592000,h3-29=":443"; ma=2592000 @@ -405,11 +1276,11 @@ interactions: Content-Type: - application/json; charset=UTF-8 Date: - - Fri, 01 May 2026 23:13:35 GMT + - Thu, 21 May 2026 23:51:13 GMT Server: - scaffolding on HTTPServer2 Server-Timing: - - gfet4t7; dur=1607 + - gfet4t7; dur=1735 Transfer-Encoding: - chunked Vary: diff --git a/tests/cassettes/test_integracao/test_hitl_nova_pergunta_substitui__openai-gpt-4o-mini.yaml b/tests/cassettes/test_integracao/test_hitl_nova_pergunta_substitui__openai-gpt-4o-mini.yaml new file mode 100644 index 0000000..4baf254 --- /dev/null +++ b/tests/cassettes/test_integracao/test_hitl_nova_pergunta_substitui__openai-gpt-4o-mini.yaml @@ -0,0 +1,765 @@ +interactions: +- request: + body: "{\"messages\":[{\"content\":\"Voc\xEA \xE9 o planejador de um sistema que + transforma perguntas em consultas SQL.\\n\\nSeu papel: analisar a situa\xE7\xE3o + atual e decidir a pr\xF3xima a\xE7\xE3o.\\n\\nContexto atual:\\n- Pergunta do + usu\xE1rio: \\\"Quem e o Brad Pitt?\\\"\\n\\n- conversa_previa: Nenhuma\\n\\n- + Schema: === SCHEMA SQLITE (INTROSPECCAO REAL) ===\\n\\nTabela: customers\\n- + customer_id: TEXT (PK)\\n- customer_unique_id: TEXT\\n- customer_zip_code_prefix: + INTEGER\\n- customer_city: TEXT\\n- customer_state: TEXT\\n\\nTabela: geolocation\\n- + geolocation_zip_code_prefix: INTEGER\\n- geolocation_lat: REAL\\n- geolocation_lng: + REAL\\n- geolocation_city: TEXT\\n- geolocation_state: TEXT\\n\\nTabela: order_items\\n- + order_id: TEXT (PK)\\n- order_item_id: INTEGER (PK)\\n- product_id: TEXT\\n- + seller_id: TEXT\\n- shipping_limit_date: TEXT\\n- price: REAL\\n- freight_value: + REAL\\n Foreign keys:\\n - seller_id -> sellers.seller_id (on_update=NO ACTION, + on_delete=NO ACTION)\\n - product_id -> products.product_id (on_update=NO ACTION, + on_delete=NO ACTION)\\n - order_id -> orders.order_id (on_update=NO ACTION, + on_delete=NO ACTION)\\n\\nTabela: order_payments\\n- order_id: TEXT (PK)\\n- + payment_sequential: INTEGER (PK)\\n- payment_type: TEXT\\n- payment_installments: + INTEGER\\n- payment_value: REAL\\n Foreign keys:\\n - order_id -> orders.order_id + (on_update=NO ACTION, on_delete=NO ACTION)\\n\\nTabela: order_reviews\\n- review_id: + TEXT (PK)\\n- order_id: TEXT\\n- review_score: INTEGER\\n- review_comment_title: + TEXT\\n- review_comment_message: TEXT\\n- review_creation_date: TEXT\\n- review_answer_timestamp: + TEXT\\n Foreign keys:\\n - order_id -> orders.order_id (on_update=NO ACTION, + on_delete=NO ACTION)\\n\\nTabela: orders\\n- order_id: TEXT (PK)\\n- customer_id: + TEXT\\n- order_status: TEXT\\n- order_purchase_timestamp: TEXT\\n- order_approved_at: + TEXT\\n- order_delivered_carrier_date: TEXT\\n- order_delivered_customer_date: + TEXT\\n- order_estimated_delivery_date: TEXT\\n Foreign keys:\\n - customer_id + -> customers.customer_id (on_update=NO ACTION, on_delete=NO ACTION)\\n\\nTabela: + products\\n- product_id: TEXT (PK)\\n- product_category_name: TEXT\\n- product_name_length: + REAL\\n- product_description_length: REAL\\n- product_photos_qty: REAL\\n- product_weight_g: + REAL\\n- product_length_cm: REAL\\n- product_height_cm: REAL\\n- product_width_cm: + REAL\\n\\nTabela: sellers\\n- seller_id: TEXT (PK)\\n- seller_zip_code_prefix: + INTEGER\\n- seller_city: TEXT\\n- seller_state: TEXT\\n\\n\\n- Feedback do cr\xEDtico: + Nenhum\\n- Tentativas realizadas: 0\\n- Status atual: schema_obtido\\n- Erro + anterior: Nenhum\\n\\nAVALIA\xC7\xC3O CR\xCDTICA:\\nVerifique se a \\\"Pergunta + do usu\xE1rio\\\" pode ser respondida com as tabelas e colunas do Schema.\\nSe + houver ambiguidade, conceitos n\xE3o mapeados no banco de dados, ou se a inten\xE7\xE3o + do usu\xE1rio n\xE3o estiver clara, voc\xEA DEVE pedir mais informa\xE7\xF5es.\\n\\nResponda + EXATAMENTE no formato JSON abaixo, sem formata\xE7\xE3o markdown (```json):\\n{{\\n + \ \\\"decisao\\\": \\\"escolha_uma_opcao\\\",\\n \\\"pergunta_ao_usuario\\\": + \\\"escreva a pergunta aqui se precisar de ajuda, ou deixe vazio se n\xE3o precisar\\\"\\n}}\\n\\nOp\xE7\xF5es + v\xE1lidas para 'decis\xE3o':\\n- \\\"pronto_codificacao\\\" \u2192 se temos + schema, a pergunta faz sentido e devemos gerar/regenerar SQL\\n- \\\"revisando_estrategia\\\" + \u2192 se o cr\xEDtico reprovou e devemos tentar uma abordagem diferente\\n- + \\\"necessita_ajuda\\\" \u2192 a pergunta n\xE3o \xE9 clara, n\xE3o faz sentido, + falta contexto ou n\xE3o h\xE1 dados no schema para responder.\\n\",\"role\":\"user\"}],\"model\":\"gpt-4o-mini\",\"stream\":false}" + headers: + Accept: + - a + Accept-Encoding: + - g + Connection: + - k + Content-Length: + - '3' + Content-Type: + - a + Cookie: + - _ + Host: + - a + User-Agent: + - O + X-Stainless-Arch: + - x + X-Stainless-Async: + - f + X-Stainless-Lang: + - p + X-Stainless-OS: + - L + X-Stainless-Package-Version: + - '2' + X-Stainless-Raw-Response: + - t + X-Stainless-Runtime: + - C + X-Stainless-Runtime-Version: + - '3' + x-stainless-retry-count: + - '0' + method: POST + uri: https://api.openai.com/v1/chat/completions + response: + body: + string: !!binary | + H4sIAAAAAAAA/6rmUlBQykxRslJQSs5ILEnOLcjRdck0z4m0CAcAAAD//41TTW/TQBC951es9tKL + g5zgpMmpEgIJxAUqtRdcWZPdibPF3jH7gaBVfgwnJO79BfljzDpJnUKROGQVvzdvdnbezDq+7N7e + 4btLfd1e4dX7y9cF2JXMkoJWt6jCUfVCEeswGLJ7WjmEgCnr5Px8WeSL+WzeEy1pbJKs7sK4oHFr + rBlP82kxzs/Hk8VBvSGj0HPYJ/4U4r4/U51W4zeG8+yItOg91MjYMYhBR01CJHhvfAAbZDaQimxA + 25d+X9oElVKjMh6oZLCUFvlubwJUcBs1lDI7hnXo6mgTQVX0EZw5SK5J7X6Jjt/mDAj0qgHHWZwg + 8SVi+jmhzR2f3Chx9jFiK5DJVw60+GBCOLsQb3zY/RCdIxUdWE3C2DW5FnY/dw/ohaeVQxFboRrD + 5WOW/nO0joEERQFNTcJhA4pNAJaD6FAbTf6ilKXdnrbA4Tp6SDbY2DQnBFhLAZKNffNvDsz2sd0N + 1Xznyv8hlWu20W8qdt3zCHBrfaBO9uyWz5ve1vjEKcmJ2i5UgT5jfx3PyD6fHKZpYGfFgQxcYTPg + y8kyeyZfpTGAafzJYEgFaoN6kA5TBFEbOiFGJ6/+u5rncu9fbmz9P+kHQinseE+qzrFX6umLhzCH + adn+FfbY5b5g6dF95e2pgkGXnNC4htjsV0D67z5gW7FdNbrOmf0erLsqL+aLZQ5TNZOj7eg3PVS5 + WhUEAAA= + headers: + Access-Control-Expose-Headers: + - CF-Ray + CF-Cache-Status: + - DYNAMIC + CF-Ray: + - + Connection: + - keep-alive + Content-Encoding: + - gzip + Content-Type: + - application/json + Date: + - Fri, 22 May 2026 00:10:58 GMT + Server: + - cloudflare + Strict-Transport-Security: + - max-age=31536000; includeSubDomains; preload + Transfer-Encoding: + - chunked + X-Content-Type-Options: + - nosniff + access-control-expose-headers: + - X-Request-ID + - CF-Ray + alt-svc: + - h3=":443"; ma=86400 + openai-organization: + - + openai-processing-ms: + - '2115' + openai-project: + - + openai-version: + - '2020-10-01' + x-openai-proxy-wasm: + - v0.1 + x-ratelimit-limit-requests: + - '10000' + x-ratelimit-limit-tokens: + - '200000' + x-ratelimit-remaining-requests: + - + x-ratelimit-remaining-tokens: + - + x-ratelimit-reset-requests: + - 57.024s + x-ratelimit-reset-tokens: + - 246ms + x-request-id: + - + status: + code: 200 + message: OK +- request: + body: '{"messages":[{"content":"Voce e um classificador de respostas de usuario + em um fluxo HITL.\n\nTarefa:\n- Dado a pergunta original, a pergunta atual e + a resposta do usuario,\n classifique se o usuario fez uma NOVA PERGUNTA ou + se apenas ESCLARECEU\n algo da pergunta atual.\n\nRegras:\n- Responda ESTRITAMENTE + em JSON valido, sem markdown.\n- Campos obrigatorios: \"tipo\" e \"pergunta_normalizada\".\n- + \"tipo\" deve ser: \"nova_pergunta\" ou \"esclarecimento\".\n- Se for \"esclarecimento\", + mantenha \"pergunta_normalizada\" como a pergunta atual.\n- Se for \"nova_pergunta\", + normalize a nova pergunta a partir da resposta do usuario.\n\nEntrada:\nPergunta + original: \"Quem e o Brad Pitt?\"\nPergunta atual: \"Quem e o Brad Pitt?\"\nResposta + do usuario: \"Quero saber quantos clientes existem\"\n\nRetorne apenas:\n{\"tipo\":\"...\",\"pergunta_normalizada\":\"...\"}\n","role":"user"}],"model":"gpt-4o-mini","stream":false}' + headers: + Accept: + - a + Accept-Encoding: + - g + Connection: + - k + Content-Length: + - '9' + Content-Type: + - a + Cookie: + - _ + Host: + - a + User-Agent: + - O + X-Stainless-Arch: + - x + X-Stainless-Async: + - f + X-Stainless-Lang: + - p + X-Stainless-OS: + - L + X-Stainless-Package-Version: + - '2' + X-Stainless-Raw-Response: + - t + X-Stainless-Runtime: + - C + X-Stainless-Runtime-Version: + - '3' + x-stainless-retry-count: + - '0' + method: POST + uri: https://api.openai.com/v1/chat/completions + response: + body: + string: !!binary | + H4sIAAAAAAAA/6rmUlBQykxRslJQSs5ILEnOLcjRdck0z0lKjAQAAAD//4xSTW/bMAy951cEOttD + nI8m3aWnXnYp2ks31IWgSIyjVhYFSS7SFvnvo+SkdrcO2EWA+PgeyUfe3B/uXg/uJ15cz7tfP5rr + /d3TjWZFYuD2CWQ8s75JJB5EjbaHpQcRIalW6/Xlcra5WF1moEUFJtEaF8sllq22upzP5styti6r + zYm9Ry0hUNoDfafT9/ymPq2CA4VnxTnSQgiiAYqdkyjo0aQIEyHoEIWNrBhAiTaCza2/1yxqhzX7 + XjOLL4I78E1no6hZUbPzh1v0rTD6TSiRU287ksQwlUaTEIQpHKgMtFc1O44redh1QaRpbWfMCBDW + YhTJrTzj4wk5fkxlsHEet+EPKtuRW2HPydxATtMEIaJjGT3S+5jd6z4ZwkiodZFHfIZcbl6tej02 + LG1Aq80JjNShGbEWi+ILPa4gCm3CyH8mhdyDGqjDskSnNI6AyWjqv7v5SrufXNvmf+QHQEpwdI7c + eVBafp54SPOQbvpfaR8u54ZZAP9CR8qjBp82oWAnOtNfGguv6R44rasB77zuz23nuFhsq+1yJdWG + TY6T3wAAAP//AwDia/AOfAMAAA== + headers: + Access-Control-Expose-Headers: + - CF-Ray + CF-Cache-Status: + - DYNAMIC + CF-Ray: + - + Connection: + - keep-alive + Content-Encoding: + - gzip + Content-Type: + - application/json + Date: + - Fri, 22 May 2026 00:10:59 GMT + Server: + - cloudflare + Strict-Transport-Security: + - max-age=31536000; includeSubDomains; preload + Transfer-Encoding: + - chunked + X-Content-Type-Options: + - nosniff + access-control-expose-headers: + - X-Request-ID + - CF-Ray + alt-svc: + - h3=":443"; ma=86400 + openai-organization: + - + openai-processing-ms: + - '657' + openai-project: + - + openai-version: + - '2020-10-01' + x-openai-proxy-wasm: + - v0.1 + x-ratelimit-limit-requests: + - '10000' + x-ratelimit-limit-tokens: + - '200000' + x-ratelimit-remaining-requests: + - + x-ratelimit-remaining-tokens: + - + x-ratelimit-reset-requests: + - 1m2.818s + x-ratelimit-reset-tokens: + - 60ms + x-request-id: + - + status: + code: 200 + message: OK +- request: + body: "{\"messages\":[{\"content\":\"Voc\xEA \xE9 o planejador de um sistema que + transforma perguntas em consultas SQL.\\n\\nSeu papel: analisar a situa\xE7\xE3o + atual e decidir a pr\xF3xima a\xE7\xE3o.\\n\\nContexto atual:\\n- Pergunta do + usu\xE1rio: \\\"Quero saber quantos clientes existem\\\"\\n\\n- conversa_previa: + [[\\\"ai: Voc\xEA poderia esclarecer o que quer dizer com 'Quem e o Brad Pitt'? + Est\xE1 procurando informa\xE7\xF5es sobre um cliente, um produto ou algo relacionado + a pedidos?\\\", 'user: Quero saber quantos clientes existem']]\\n\\n- Schema: + === SCHEMA SQLITE (INTROSPECCAO REAL) ===\\n\\nTabela: customers\\n- customer_id: + TEXT (PK)\\n- customer_unique_id: TEXT\\n- customer_zip_code_prefix: INTEGER\\n- + customer_city: TEXT\\n- customer_state: TEXT\\n\\nTabela: geolocation\\n- geolocation_zip_code_prefix: + INTEGER\\n- geolocation_lat: REAL\\n- geolocation_lng: REAL\\n- geolocation_city: + TEXT\\n- geolocation_state: TEXT\\n\\nTabela: order_items\\n- order_id: TEXT + (PK)\\n- order_item_id: INTEGER (PK)\\n- product_id: TEXT\\n- seller_id: TEXT\\n- + shipping_limit_date: TEXT\\n- price: REAL\\n- freight_value: REAL\\n Foreign + keys:\\n - seller_id -> sellers.seller_id (on_update=NO ACTION, on_delete=NO + ACTION)\\n - product_id -> products.product_id (on_update=NO ACTION, on_delete=NO + ACTION)\\n - order_id -> orders.order_id (on_update=NO ACTION, on_delete=NO + ACTION)\\n\\nTabela: order_payments\\n- order_id: TEXT (PK)\\n- payment_sequential: + INTEGER (PK)\\n- payment_type: TEXT\\n- payment_installments: INTEGER\\n- payment_value: + REAL\\n Foreign keys:\\n - order_id -> orders.order_id (on_update=NO ACTION, + on_delete=NO ACTION)\\n\\nTabela: order_reviews\\n- review_id: TEXT (PK)\\n- + order_id: TEXT\\n- review_score: INTEGER\\n- review_comment_title: TEXT\\n- + review_comment_message: TEXT\\n- review_creation_date: TEXT\\n- review_answer_timestamp: + TEXT\\n Foreign keys:\\n - order_id -> orders.order_id (on_update=NO ACTION, + on_delete=NO ACTION)\\n\\nTabela: orders\\n- order_id: TEXT (PK)\\n- customer_id: + TEXT\\n- order_status: TEXT\\n- order_purchase_timestamp: TEXT\\n- order_approved_at: + TEXT\\n- order_delivered_carrier_date: TEXT\\n- order_delivered_customer_date: + TEXT\\n- order_estimated_delivery_date: TEXT\\n Foreign keys:\\n - customer_id + -> customers.customer_id (on_update=NO ACTION, on_delete=NO ACTION)\\n\\nTabela: + products\\n- product_id: TEXT (PK)\\n- product_category_name: TEXT\\n- product_name_length: + REAL\\n- product_description_length: REAL\\n- product_photos_qty: REAL\\n- product_weight_g: + REAL\\n- product_length_cm: REAL\\n- product_height_cm: REAL\\n- product_width_cm: + REAL\\n\\nTabela: sellers\\n- seller_id: TEXT (PK)\\n- seller_zip_code_prefix: + INTEGER\\n- seller_city: TEXT\\n- seller_state: TEXT\\n\\n\\n- Feedback do cr\xEDtico: + Nenhum\\n- Tentativas realizadas: 0\\n- Status atual: iniciado\\n- Erro anterior: + Nenhum\\n\\nAVALIA\xC7\xC3O CR\xCDTICA:\\nVerifique se a \\\"Pergunta do usu\xE1rio\\\" + pode ser respondida com as tabelas e colunas do Schema.\\nSe houver ambiguidade, + conceitos n\xE3o mapeados no banco de dados, ou se a inten\xE7\xE3o do usu\xE1rio + n\xE3o estiver clara, voc\xEA DEVE pedir mais informa\xE7\xF5es.\\n\\nResponda + EXATAMENTE no formato JSON abaixo, sem formata\xE7\xE3o markdown (```json):\\n{{\\n + \ \\\"decisao\\\": \\\"escolha_uma_opcao\\\",\\n \\\"pergunta_ao_usuario\\\": + \\\"escreva a pergunta aqui se precisar de ajuda, ou deixe vazio se n\xE3o precisar\\\"\\n}}\\n\\nOp\xE7\xF5es + v\xE1lidas para 'decis\xE3o':\\n- \\\"pronto_codificacao\\\" \u2192 se temos + schema, a pergunta faz sentido e devemos gerar/regenerar SQL\\n- \\\"revisando_estrategia\\\" + \u2192 se o cr\xEDtico reprovou e devemos tentar uma abordagem diferente\\n- + \\\"necessita_ajuda\\\" \u2192 a pergunta n\xE3o \xE9 clara, n\xE3o faz sentido, + falta contexto ou n\xE3o h\xE1 dados no schema para responder.\\n\",\"role\":\"user\"}],\"model\":\"gpt-4o-mini\",\"stream\":false}" + headers: + Accept: + - a + Accept-Encoding: + - g + Connection: + - k + Content-Length: + - '3' + Content-Type: + - a + Cookie: + - _ + Host: + - a + User-Agent: + - O + X-Stainless-Arch: + - x + X-Stainless-Async: + - f + X-Stainless-Lang: + - p + X-Stainless-OS: + - L + X-Stainless-Package-Version: + - '2' + X-Stainless-Raw-Response: + - t + X-Stainless-Runtime: + - C + X-Stainless-Runtime-Version: + - '3' + x-stainless-retry-count: + - '0' + method: POST + uri: https://api.openai.com/v1/chat/completions + response: + body: + string: !!binary | + H4sIAAAAAAAA/6rmUlBQykxRslJQSs5ILEnOLcjRdck0z0mOCgAAAAD//4xSTWvcMBC9768IOq+L + 427249hALj0VCv2gG4Qijb1KbI3QSCFt2P/ekbwbO20KxSDwe/NGb57mBj89ff2y+XivbzR+u/6F + nz98f0ximRV4dw86nlXvNLIOokU30jqAipC7Xm42u1W9Xa/rQgxooM+yzsdqhdVgna2aullV9aa6 + 3J7UB7QaiMt+8O/FxXM5s09n4Inh0qsgAxCpDhg7FzEYsM+IUESWonJRLCdSo4vgivXnvcvQXhjQ + lhTuGdwLH7gCpUZjW6uVzvjyXOkhdMlFJRXKREkFe1Lx547zewK0iVSe1aW+nxHKOYwqZ1UmvD0x + x5eZeuzYwx39IRUtZ0UHydES58z+KaIXhT3yeVuyS6/iyMMMPsqID1Cu29VXYz8xPdnENu9PZGSH + /UzVbJdv9JMGorI9zdIXHNgBzCSdnkolY3FGLGZT/+3mrd7j5NZ1/9N+IrQGz8sofQBj9euJp7IA + eaP/VfaScjEsCMIjr6iMFkJ+CQOtSv24Z4J+UoRB8nN1EHyw47K1Xtar9XZXq0ZficVx8RsAAP// + AwBMwC8NegMAAA== + headers: + Access-Control-Expose-Headers: + - CF-Ray + CF-Cache-Status: + - DYNAMIC + CF-Ray: + - + Connection: + - keep-alive + Content-Encoding: + - gzip + Content-Type: + - application/json + Date: + - Fri, 22 May 2026 00:11:02 GMT + Server: + - cloudflare + Strict-Transport-Security: + - max-age=31536000; includeSubDomains; preload + Transfer-Encoding: + - chunked + X-Content-Type-Options: + - nosniff + access-control-expose-headers: + - X-Request-ID + - CF-Ray + alt-svc: + - h3=":443"; ma=86400 + openai-organization: + - + openai-processing-ms: + - '1624' + openai-project: + - + openai-version: + - '2020-10-01' + x-openai-proxy-wasm: + - v0.1 + x-ratelimit-limit-requests: + - '10000' + x-ratelimit-limit-tokens: + - '200000' + x-ratelimit-remaining-requests: + - + x-ratelimit-remaining-tokens: + - + x-ratelimit-reset-requests: + - 1m10.146s + x-ratelimit-reset-tokens: + - 262ms + x-request-id: + - + status: + code: 200 + message: OK +- request: + body: "{\"messages\":[{\"content\":\"Voc\xEA \xE9 um especialista em SQL para + bancos SQLite.\\n\\nSua tarefa: gerar UMA \xFAnica consulta SQL SELECT que responda + \xE0 pergunta do usu\xE1rio,\\nusando o schema do banco de dados fornecido abaixo.\\n\\nRegras:\\n- + Gere APENAS uma consulta SELECT (ou WITH/CTE seguido de SELECT).\\n- N\xC3O + use INSERT, UPDATE, DELETE, DROP, ALTER ou qualquer comando de escrita.\\n- + N\xC3O inclua explica\xE7\xF5es, apenas a SQL pura.\\n- Use nomes de tabelas + e colunas EXATAMENTE como aparecem no schema.\\n- Se a pergunta for amb\xEDgua, + fa\xE7a a interpreta\xE7\xE3o mais razo\xE1vel.\\n\\n=== SCHEMA DO BANCO ===\\n=== + SCHEMA SQLITE (INTROSPECCAO REAL) ===\\n\\nTabela: customers\\n- customer_id: + TEXT (PK)\\n- customer_unique_id: TEXT\\n- customer_zip_code_prefix: INTEGER\\n- + customer_city: TEXT\\n- customer_state: TEXT\\n\\nTabela: geolocation\\n- geolocation_zip_code_prefix: + INTEGER\\n- geolocation_lat: REAL\\n- geolocation_lng: REAL\\n- geolocation_city: + TEXT\\n- geolocation_state: TEXT\\n\\nTabela: order_items\\n- order_id: TEXT + (PK)\\n- order_item_id: INTEGER (PK)\\n- product_id: TEXT\\n- seller_id: TEXT\\n- + shipping_limit_date: TEXT\\n- price: REAL\\n- freight_value: REAL\\n Foreign + keys:\\n - seller_id -> sellers.seller_id (on_update=NO ACTION, on_delete=NO + ACTION)\\n - product_id -> products.product_id (on_update=NO ACTION, on_delete=NO + ACTION)\\n - order_id -> orders.order_id (on_update=NO ACTION, on_delete=NO + ACTION)\\n\\nTabela: order_payments\\n- order_id: TEXT (PK)\\n- payment_sequential: + INTEGER (PK)\\n- payment_type: TEXT\\n- payment_installments: INTEGER\\n- payment_value: + REAL\\n Foreign keys:\\n - order_id -> orders.order_id (on_update=NO ACTION, + on_delete=NO ACTION)\\n\\nTabela: order_reviews\\n- review_id: TEXT (PK)\\n- + order_id: TEXT\\n- review_score: INTEGER\\n- review_comment_title: TEXT\\n- + review_comment_message: TEXT\\n- review_creation_date: TEXT\\n- review_answer_timestamp: + TEXT\\n Foreign keys:\\n - order_id -> orders.order_id (on_update=NO ACTION, + on_delete=NO ACTION)\\n\\nTabela: orders\\n- order_id: TEXT (PK)\\n- customer_id: + TEXT\\n- order_status: TEXT\\n- order_purchase_timestamp: TEXT\\n- order_approved_at: + TEXT\\n- order_delivered_carrier_date: TEXT\\n- order_delivered_customer_date: + TEXT\\n- order_estimated_delivery_date: TEXT\\n Foreign keys:\\n - customer_id + -> customers.customer_id (on_update=NO ACTION, on_delete=NO ACTION)\\n\\nTabela: + products\\n- product_id: TEXT (PK)\\n- product_category_name: TEXT\\n- product_name_length: + REAL\\n- product_description_length: REAL\\n- product_photos_qty: REAL\\n- product_weight_g: + REAL\\n- product_length_cm: REAL\\n- product_height_cm: REAL\\n- product_width_cm: + REAL\\n\\nTabela: sellers\\n- seller_id: TEXT (PK)\\n- seller_zip_code_prefix: + INTEGER\\n- seller_city: TEXT\\n- seller_state: TEXT\\n\\n\\n=== PERGUNTA DO + USU\xC1RIO ===\\nQuero saber quantos clientes existem\\n\\n=== CONVERSA PR\xC9VIA + (CONTEXTO ADICIONAL) ===\\n[[\\\"ai: Voc\xEA poderia esclarecer o que quer dizer + com 'Quem e o Brad Pitt'? Est\xE1 procurando informa\xE7\xF5es sobre um cliente, + um produto ou algo relacionado a pedidos?\\\", 'user: Quero saber quantos clientes + existem']]\\n\\n=== HIST\xD3RICO DE TENTATIVAS ANTERIORES ===\\nNenhuma tentativa + anterior.\\n\\nResponda APENAS com a consulta SQL, sem markdown, sem explica\xE7\xE3o.\",\"role\":\"user\"}],\"model\":\"gpt-4o-mini\",\"stream\":false}" + headers: + Accept: + - a + Accept-Encoding: + - g + Connection: + - k + Content-Length: + - '3' + Content-Type: + - a + Cookie: + - _ + Host: + - a + User-Agent: + - O + X-Stainless-Arch: + - x + X-Stainless-Async: + - f + X-Stainless-Lang: + - p + X-Stainless-OS: + - L + X-Stainless-Package-Version: + - '2' + X-Stainless-Raw-Response: + - t + X-Stainless-Runtime: + - C + X-Stainless-Runtime-Version: + - '3' + x-stainless-retry-count: + - '0' + method: POST + uri: https://api.openai.com/v1/chat/completions + response: + body: + string: !!binary | + H4sIAAAAAAAA/6rmUlBQykxRslJQSs5ILEnOLcjRdck0z0k1DAUAAAD//4xSy07DMBC89ysinwA1 + qE1D2opTVQoHXhIUcUBV5Dqb1G1iB9tBINR/Z52kTcpD4mLJOzvrmfH61+PBMp4E0413MRDT1+ds + crU2pGsZcrkGZnasUyaRB4ZLUcFMATVgp/aHw7HfGwWBVwKZjCC1tCQ3ri/djAvuej3Pd3tDtz+q + 2SvJGWhse8Gr43yWp9UpInjHcq+7q2SgNU0Aa7smLCqZ2gqhWnNtqKg01yCTwoAopT/ObmbTuTO9 + f7qbH50cO5cP97cOK7SRGSh93mYpiAtNrXJRpGkLoEJIQ63zUu+iRrZ7halMciWX+huVxOhcr0IM + SmNqqAafzUmJbvFclEkUB+YIDspyExq5gfK5Ud+v5pHmAxo0qDGDAtMWyavjOxwXRmAoT3UrSsIo + W0HUUJvcaRFx2QI6LdM/xfw2uzLORfKf8Q3AGOS4WWGuIOLs0HDTpsCu519t+5BLwUSDesN9Cw0H + ZT8igpgWab3o+kMbyEL8rQRUrni1OXEejqjPIFoOzoaks+18AQAA//8DAMzVsrlHAwAA + headers: + Access-Control-Expose-Headers: + - CF-Ray + CF-Cache-Status: + - DYNAMIC + CF-Ray: + - + Connection: + - keep-alive + Content-Encoding: + - gzip + Content-Type: + - application/json + Date: + - Fri, 22 May 2026 00:11:03 GMT + Server: + - cloudflare + Strict-Transport-Security: + - max-age=31536000; includeSubDomains; preload + Transfer-Encoding: + - chunked + X-Content-Type-Options: + - nosniff + access-control-expose-headers: + - X-Request-ID + - CF-Ray + alt-svc: + - h3=":443"; ma=86400 + openai-organization: + - + openai-processing-ms: + - '375' + openai-project: + - + openai-version: + - '2020-10-01' + x-openai-proxy-wasm: + - v0.1 + x-ratelimit-limit-requests: + - '10000' + x-ratelimit-limit-tokens: + - '200000' + x-ratelimit-remaining-requests: + - + x-ratelimit-remaining-tokens: + - + x-ratelimit-reset-requests: + - 1m16.41s + x-ratelimit-reset-tokens: + - 231ms + x-request-id: + - + status: + code: 200 + message: OK +- request: + body: "{\"messages\":[{\"content\":\"Voc\xEA \xE9 um revisor de qualidade para + consultas SQL geradas por IA.\\n\\nSua tarefa: avaliar se a consulta SQL e seus + resultados respondem adequadamente\\n\xE0 pergunta original do usu\xE1rio.\\n\\n=== + PERGUNTA DO USU\xC1RIO ===\\nQuero saber quantos clientes existem\\n\\n=== SCHEMA + DO BANCO ===\\n=== SCHEMA SQLITE (INTROSPECCAO REAL) ===\\n\\nTabela: customers\\n- + customer_id: TEXT (PK)\\n- customer_unique_id: TEXT\\n- customer_zip_code_prefix: + INTEGER\\n- customer_city: TEXT\\n- customer_state: TEXT\\n\\nTabela: geolocation\\n- + geolocation_zip_code_prefix: INTEGER\\n- geolocation_lat: REAL\\n- geolocation_lng: + REAL\\n- geolocation_city: TEXT\\n- geolocation_state: TEXT\\n\\nTabela: order_items\\n- + order_id: TEXT (PK)\\n- order_item_id: INTEGER (PK)\\n- product_id: TEXT\\n- + seller_id: TEXT\\n- shipping_limit_date: TEXT\\n- price: REAL\\n- freight_value: + REAL\\n Foreign keys:\\n - seller_id -> sellers.seller_id (on_update=NO ACTION, + on_delete=NO ACTION)\\n - product_id -> products.product_id (on_update=NO ACTION, + on_delete=NO ACTION)\\n - order_id -> orders.order_id (on_update=NO ACTION, + on_delete=NO ACTION)\\n\\nTabela: order_payments\\n- order_id: TEXT (PK)\\n- + payment_sequential: INTEGER (PK)\\n- payment_type: TEXT\\n- payment_installments: + INTEGER\\n- payment_value: REAL\\n Foreign keys:\\n - order_id -> orders.order_id + (on_update=NO ACTION, on_delete=NO ACTION)\\n\\nTabela: order_reviews\\n- review_id: + TEXT (PK)\\n- order_id: TEXT\\n- review_score: INTEGER\\n- review_comment_title: + TEXT\\n- review_comment_message: TEXT\\n- review_creation_date: TEXT\\n- review_answer_timestamp: + TEXT\\n Foreign keys:\\n - order_id -> orders.order_id (on_update=NO ACTION, + on_delete=NO ACTION)\\n\\nTabela: orders\\n- order_id: TEXT (PK)\\n- customer_id: + TEXT\\n- order_status: TEXT\\n- order_purchase_timestamp: TEXT\\n- order_approved_at: + TEXT\\n- order_delivered_carrier_date: TEXT\\n- order_delivered_customer_date: + TEXT\\n- order_estimated_delivery_date: TEXT\\n Foreign keys:\\n - customer_id + -> customers.customer_id (on_update=NO ACTION, on_delete=NO ACTION)\\n\\nTabela: + products\\n- product_id: TEXT (PK)\\n- product_category_name: TEXT\\n- product_name_length: + REAL\\n- product_description_length: REAL\\n- product_photos_qty: REAL\\n- product_weight_g: + REAL\\n- product_length_cm: REAL\\n- product_height_cm: REAL\\n- product_width_cm: + REAL\\n\\nTabela: sellers\\n- seller_id: TEXT (PK)\\n- seller_zip_code_prefix: + INTEGER\\n- seller_city: TEXT\\n- seller_state: TEXT\\n\\n\\n=== CONVERSA COM + O AGENTE (se houver) ===\\n[[\\\"ai: Voc\xEA poderia esclarecer o que quer dizer + com 'Quem e o Brad Pitt'? Est\xE1 procurando informa\xE7\xF5es sobre um cliente, + um produto ou algo relacionado a pedidos?\\\", 'user: Quero saber quantos clientes + existem']]\\n\\n=== SQL GERADA ===\\nSELECT COUNT(*) FROM customers;\\n\\n=== + RESULTADO DA EXECU\xC7\xC3O ===\\nStatus: exec_ok\\nTotal de linhas: 1\\nAmostra + dos resultados (primeiras linhas):\\n[{'COUNT(*)': 99441}]\\n\\n=== ERROS (se + houver) ===\\nNenhum\\n\\n=== TENTATIVAS ANTERIORES ===\\nNenhuma tentativa + anterior (esta \xE9 a primeira).\\n\\n=== EXEMPLOS DE AVALIA\xC7\xC3O ===\\n\\n-- + EXEMPLO 1: REPROVADO (escopo incompleto) --\\nPergunta: \\\"Which airport has + the least number of flights?\\\"\\nSQL: SELECT SourceAirport FROM flights GROUP + BY SourceAirport ORDER BY COUNT(*) ASC LIMIT 1\\nResultado: [('AID',)]\\nVEREDITO: + REPROVADO\\nRaz\xE3o: A query conta apenas voos com partida (SourceAirport) + e ignora voos com chegada (DestAirport).\\nO escopo da pergunta \xE9 \\\"flights\\\" + em geral \u2014 a query responde a uma pergunta diferente.\\n\\n-- EXEMPLO 2: + REPROVADO (erro sem\xE2ntico: MIN vs MAX) --\\nPergunta: \\\"Which Asian countries + have a population larger than any country in Africa?\\\"\\nSQL: SELECT Name + FROM country WHERE Continent='Asia' AND Population > (SELECT MAX(Population) + FROM country WHERE Continent='Africa')\\nResultado: [] (vazio)\\nVEREDITO: REPROVADO\\nRaz\xE3o: + \\\"Larger than any country in Africa\\\" significa maior que pelo menos um + pa\xEDs africano (MIN),\\nn\xE3o maior que todos os pa\xEDses africanos (MAX). + A l\xF3gica est\xE1 semanticamente errada.\\n\\n-- EXEMPLO 3: REPROVADO (resultado + vazio suspeito) --\\nPergunta: \\\"Find the last name of students who live in + North Carolina and are not enrolled in any degree.\\\"\\nSQL: SELECT last_name + FROM Students WHERE state_province_county = 'North Carolina' AND ...\\nResultado: + [] (vazio)\\nVEREDITO: REPROVADO\\nRaz\xE3o: Resultado vazio quando a pergunta + espera dados reais \xE9 suspeito. Verifique se o filtro\\nde string corresponde + exatamente ao valor no banco (ex: 'NorthCarolina' vs 'North Carolina').\\n\\n-- + EXEMPLO 4: REPROVADO (JOIN incorreto muda o que est\xE1 sendo contado) --\\nPergunta: + \\\"Find the name of makers that produced some cars in 1970.\\\"\\nSQL: SELECT + DISTINCT Maker FROM car_makers JOIN car_names ON car_makers.Id = car_names.MakeId + JOIN cars_data ON car_names.MakeId = cars_data.Id WHERE cars_data.Year = 1970\\nResultado: + [('chevrolet',), ('buick',)]\\nVEREDITO: REPROVADO\\nRaz\xE3o: O JOIN usa car_names.MakeId + para conectar a cars_data, mas cars_data.Id refere-se\\nao ID do carro, n\xE3o + do fabricante. O caminho correto seria via model_list. Os resultados\\nparecem + plaus\xEDveis mas derivam de uma jun\xE7\xE3o incorreta.\\n\\n-- EXEMPLO 5: + APROVADO (formato diferente, resposta correta) --\\nPergunta: \\\"On average, + when were the transcripts printed?\\\"\\nSQL: SELECT AVG(transcript_date) AS + average_transcript_date FROM Transcripts\\nResultado: [('1989.9333333333334',)]\\nVEREDITO: + APROVADO\\nRaz\xE3o: O resultado \xE9 um n\xFAmero que representa a m\xE9dia + das datas (formato num\xE9rico do SQLite).\\nEmbora n\xE3o seja uma data formatada, + responde corretamente \xE0 pergunta. Diferen\xE7a de\\nrepresenta\xE7\xE3o n\xE3o + \xE9 motivo de reprova\xE7\xE3o.\\n\\n-- EXEMPLO 6: APROVADO (query mais simples + que o gold, resultado equivalente) --\\nPergunta: \\\"Which model of car has + the minimum horsepower?\\\"\\nSQL: SELECT Model FROM car_names JOIN cars_data + ON car_names.MakeId = cars_data.Id WHERE Horsepower = (SELECT MIN(Horsepower) + FROM cars_data) LIMIT 1\\nResultado: [('triumph',)]\\nVEREDITO: APROVADO\\nRaz\xE3o: + A query retorna corretamente o modelo com menor pot\xEAncia. O LIMIT 1 garante + unicidade\\ne o resultado \xE9 semanticamente correto. Aprovar.\\n\\n=== CRIT\xC9RIOS + DE AVALIA\xC7\xC3O ===\\n\\nREPROVE quando houver:\\n- Escopo incompleto: query + cobre apenas parte do que a pergunta pede\\n- Erro sem\xE2ntico: l\xF3gica correta + na forma mas errada no significado (MIN vs MAX, ANY vs ALL)\\n- JOIN incorreto + que altera os dados sendo agregados ou filtrados\\n- Resultado vazio quando + a pergunta claramente espera dados\\n- Filtro com valor literal diferente do + que est\xE1 no banco\\n- M\xE9trica errada (SUM vs AVG, COUNT vs COUNT DISTINCT, + etc.)\\n- Erro de execu\xE7\xE3o SQL\\n\\nAPROVE quando:\\n- O resultado responde + \xE0 pergunta, mesmo com formato ou representa\xE7\xE3o diferente\\n- H\xE1 + colunas extras que n\xE3o prejudicam a resposta\\n- A precis\xE3o num\xE9rica + difere mas o valor est\xE1 correto\\n- A query \xE9 mais simples que o esperado + mas semanticamente equivalente\\n\\nAvalie com rigor sem\xE2ntico. Resultados + que parecem plaus\xEDveis mas derivam de l\xF3gica\\nincorreta devem ser reprovados. + N\xE3o presuma que uma query bem-formada est\xE1 correta.\\n\\nResponda no formato:\\nVEREDITO: + APROVADO ou REPROVADO\\nFEEDBACK: \",\"role\":\"user\"}],\"model\":\"gpt-4o-mini\",\"stream\":false}" + headers: + Accept: + - a + Accept-Encoding: + - g + Connection: + - k + Content-Length: + - '7' + Content-Type: + - a + Cookie: + - _ + Host: + - a + User-Agent: + - O + X-Stainless-Arch: + - x + X-Stainless-Async: + - f + X-Stainless-Lang: + - p + X-Stainless-OS: + - L + X-Stainless-Package-Version: + - '2' + X-Stainless-Raw-Response: + - t + X-Stainless-Runtime: + - C + X-Stainless-Runtime-Version: + - '3' + x-stainless-retry-count: + - '0' + method: POST + uri: https://api.openai.com/v1/chat/completions + response: + body: + string: !!binary | + H4sIAAAAAAAA/6rmUlBQykxRslJQSs5ILEnOLcjRdck0z0kzDwEAAAD//41Ty27bMBC85ysWOrWF + bdiu4kd7cm25KPJwY7u51IFCk2uFjUTKJFUkKPwv7bG/0Kt/rEs5sZw2BXohoJ2dEYez+74ZrW5P + Luz92CSz9dIm2fl43QtqnqGXX5C7R1aDa+Khk1rtYG6QOfSqrW63HzZ7nc7rEsi0wNTTktzVQ13P + pJL1drMd1pvdeutBnN9oydFS22f6BPhWnv6eSuAdlZu1x0qG1rIEqfbYREWjU18JmLXSOqZcUKtA + rpVDVV79MppGow/zyRsYfJxOLgejCcBCjaNo9G4wPKEqULMtUsdgdnEK17PoNBrOYTj5dD5/8eol + jKeTM+CFdTpDY99eU7sx6FhG+ui5RNSgtr8I1uC0YykIAlLpGywoBo4tMWWwCPYyi6AGBm2uyasS + GoSsJLffIUeTFF6YoMIW2x9GarB6aRDWBVnVttLHO7KPWQMmXtH78HoI/X4YtmD7s7TnW7w2AhNI + EkI3Dp/L4KqwzEemijQ9AJhSZMhHXgZ19YBs9tGkOsmNXto/qMGKIrc3MU2IpXGhGMh4HpTohs6r + cgSKJ6kGJJTlLnb6FsvftXrN9k4wqEavgo/7D2D55oe0Tqv2jGIs6Illag/GKOCM36CouNXMsUJI + fQAcHfj++zrPae+8S5X8j3wFcI45bVWcGxSSP7VctRn0q/mvtv07lxcOLJqvtGuxk2h8FgJXjAZl + t4f23o9PTIElaHIjd1uzyuPjcNXuCN7vhsHR5ug3d2teCkMEAAA= + headers: + Access-Control-Expose-Headers: + - CF-Ray + CF-Cache-Status: + - DYNAMIC + CF-Ray: + - + Connection: + - keep-alive + Content-Encoding: + - gzip + Content-Type: + - application/json + Date: + - Fri, 22 May 2026 00:11:05 GMT + Server: + - cloudflare + Strict-Transport-Security: + - max-age=31536000; includeSubDomains; preload + Transfer-Encoding: + - chunked + X-Content-Type-Options: + - nosniff + access-control-expose-headers: + - X-Request-ID + - CF-Ray + alt-svc: + - h3=":443"; ma=86400 + openai-organization: + - + openai-processing-ms: + - '1330' + openai-project: + - + openai-version: + - '2020-10-01' + x-openai-proxy-wasm: + - v0.1 + x-ratelimit-limit-requests: + - '10000' + x-ratelimit-limit-tokens: + - '200000' + x-ratelimit-remaining-requests: + - + x-ratelimit-remaining-tokens: + - + x-ratelimit-reset-requests: + - 1m24.177s + x-ratelimit-reset-tokens: + - 523ms + x-request-id: + - + status: + code: 200 + message: OK +version: 1 diff --git a/tests/cassettes/test_integracao/test_pergunta_com_ranking.yaml b/tests/cassettes/test_integracao/test_pergunta_com_ranking.yaml index cbe1763..d8cbc7b 100644 --- a/tests/cassettes/test_integracao/test_pergunta_com_ranking.yaml +++ b/tests/cassettes/test_integracao/test_pergunta_com_ranking.yaml @@ -3,16 +3,42 @@ interactions: body: '{"contents": [{"parts": [{"text": "Voc\u00ea \u00e9 o planejador de um sistema que transforma perguntas em consultas SQL.\n\nSeu papel: analisar a situa\u00e7\u00e3o atual e decidir a pr\u00f3xima a\u00e7\u00e3o.\n\nContexto - atual:\n- Pergunta do usu\u00e1rio: \"Quais sao as 5 categorias de produtos - mais vendidos por quantidade?\"\n- Schema dispon\u00edvel: Sim\n- Feedback do - cr\u00edtico: Nenhum\n- Tentativas realizadas: 0\n- Status atual: schema_obtido\n- - Erro anterior: Nenhum\n\nAVALIA\u00c7\u00c3O CR\u00cdTICA:\nVerifique se a \"Pergunta - do usu\u00e1rio\" pode ser respondida com as tabelas e colunas do Schema.\nSe - houver ambiguidade, conceitos n\u00e3o mapeados no banco de dados, ou se a inten\u00e7\u00e3o - do usu\u00e1rio n\u00e3o estiver clara, voc\u00ea DEVE pedir mais informa\u00e7\u00f5es.\n\nResponda - EXATAMENTE no formato JSON abaixo, sem formata\u00e7\u00e3o markdown (```json):\n{\n \"decisao\": + atual:\n- Pergunta do usu\u00e1rio: \"Quais s\u00e3o as 5 categorias com a maior + quantidade total de itens vendidos?\"\n\n- conversa_previa: Nenhuma\n\n- Schema: + === SCHEMA SQLITE (INTROSPECCAO REAL) ===\n\nTabela: customers\n- customer_id: + TEXT (PK)\n- customer_unique_id: TEXT\n- customer_zip_code_prefix: INTEGER\n- + customer_city: TEXT\n- customer_state: TEXT\n\nTabela: geolocation\n- geolocation_zip_code_prefix: + INTEGER\n- geolocation_lat: REAL\n- geolocation_lng: REAL\n- geolocation_city: + TEXT\n- geolocation_state: TEXT\n\nTabela: order_items\n- order_id: TEXT (PK)\n- + order_item_id: INTEGER (PK)\n- product_id: TEXT\n- seller_id: TEXT\n- shipping_limit_date: + TEXT\n- price: REAL\n- freight_value: REAL\n Foreign keys:\n - seller_id -> + sellers.seller_id (on_update=NO ACTION, on_delete=NO ACTION)\n - product_id + -> products.product_id (on_update=NO ACTION, on_delete=NO ACTION)\n - order_id + -> orders.order_id (on_update=NO ACTION, on_delete=NO ACTION)\n\nTabela: order_payments\n- + order_id: TEXT (PK)\n- payment_sequential: INTEGER (PK)\n- payment_type: TEXT\n- + payment_installments: INTEGER\n- payment_value: REAL\n Foreign keys:\n - order_id + -> orders.order_id (on_update=NO ACTION, on_delete=NO ACTION)\n\nTabela: order_reviews\n- + review_id: TEXT (PK)\n- order_id: TEXT\n- review_score: INTEGER\n- review_comment_title: + TEXT\n- review_comment_message: TEXT\n- review_creation_date: TEXT\n- review_answer_timestamp: + TEXT\n Foreign keys:\n - order_id -> orders.order_id (on_update=NO ACTION, + on_delete=NO ACTION)\n\nTabela: orders\n- order_id: TEXT (PK)\n- customer_id: + TEXT\n- order_status: TEXT\n- order_purchase_timestamp: TEXT\n- order_approved_at: + TEXT\n- order_delivered_carrier_date: TEXT\n- order_delivered_customer_date: + TEXT\n- order_estimated_delivery_date: TEXT\n Foreign keys:\n - customer_id + -> customers.customer_id (on_update=NO ACTION, on_delete=NO ACTION)\n\nTabela: + products\n- product_id: TEXT (PK)\n- product_category_name: TEXT\n- product_name_length: + REAL\n- product_description_length: REAL\n- product_photos_qty: REAL\n- product_weight_g: + REAL\n- product_length_cm: REAL\n- product_height_cm: REAL\n- product_width_cm: + REAL\n\nTabela: sellers\n- seller_id: TEXT (PK)\n- seller_zip_code_prefix: INTEGER\n- + seller_city: TEXT\n- seller_state: TEXT\n\n\n- Feedback do cr\u00edtico: Nenhum\n- + Tentativas realizadas: 0\n- Status atual: schema_obtido\n- Erro anterior: Nenhum\n\nAVALIA\u00c7\u00c3O + CR\u00cdTICA:\nVerifique se a \"Pergunta do usu\u00e1rio\" pode ser respondida + com as tabelas e colunas do Schema.\nSe houver ambiguidade, conceitos n\u00e3o + mapeados no banco de dados, ou se a inten\u00e7\u00e3o do usu\u00e1rio n\u00e3o + estiver clara, voc\u00ea DEVE pedir mais informa\u00e7\u00f5es.\n\nResponda + EXATAMENTE no formato JSON abaixo, sem formata\u00e7\u00e3o markdown (```json):\n{{\n \"decisao\": \"escolha_uma_opcao\",\n \"pergunta_ao_usuario\": \"escreva a pergunta aqui - se precisar de ajuda, ou deixe vazio se n\u00e3o precisar\"\n}\n\nOp\u00e7\u00f5es + se precisar de ajuda, ou deixe vazio se n\u00e3o precisar\"\n}}\n\nOp\u00e7\u00f5es v\u00e1lidas para ''decis\u00e3o'':\n- \"pronto_codificacao\" \u2192 se temos schema, a pergunta faz sentido e devemos gerar/regenerar SQL\n- \"revisando_estrategia\" \u2192 se o cr\u00edtico reprovou e devemos tentar uma abordagem diferente\n- @@ -22,32 +48,33 @@ interactions: 1}}' headers: Accept: - - '*/*' + - '*' Accept-Encoding: - - gzip, deflate, zstd + - g Connection: - - keep-alive + - k Content-Length: - - '1585' + - '3' Content-Type: - - application/json + - a Host: - - generativelanguage.googleapis.com + - g User-Agent: - - google-genai-sdk/1.68.0 gl-python/3.10.12 + - g x-goog-api-client: - - google-genai-sdk/1.68.0 gl-python/3.10.12 + - g method: POST uri: https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash:generateContent response: body: string: !!binary | - H4sIAAAAAAAC/2WRa0vDMBSGv/dXlHzeZG66i1+9QAXZpkUFK+PYnLXBNinJKWyW/neTdt06baAk - 73lzLk8qz/dZDJILDoSG3fgfVvH9qvm7mJKEkmygk6xYgKaTt/2q3t5aCHfuEqsi6c4R4xgLAyqy - YsQKbdOqTay42IoYYqcPOmeBOiklwQbUpjQlaHG4ZZesWa9Ofdx/Dk7daZWhK50rjllnrzsD2wop - TPqMYJR0tpdwuWLHqJAcd1YeeV2BJjUrDST4hASWExxpuEHygkL1jfJWlQ2nyXjSZutxPTOMF4c4 - KYLsLLSYXg7+JTZ3tqzI+sB7b2GnhEzQ3o0S3r+HrEeC/vTVsfB6yBilqkxSOu9xOlp4B2gtx1fU - RrTAEswtwuH44nq4zcCkTUWm0RRKGgy48zwGsyXg8O2B9sHPLFjNv/L1fH3FvNr7BWhmkwNzAgAA + H4sIAAAAAAAC/2WRXUvDMBSG7/srSq432Sau6J1MLyaowwURrIxjc9oGu5ySpDIt/e8m7bp1s4WS + vOc9X0/rIAxZAkpIARYNuwnfnRKGdfv1MVIWlXWBXnJiCdoevd1TD87OYnHnk1gdK3+PmcBEGqDY + iTErtStLm4SETGUCiddHvbNEnVXKwgZoU5kKtNxnuVc1bNCnOZw/RsfpNBXoW29JYNHbm97AUqmk + yV8QDClvW/PnFTtEpRK4c/Ik6Bu0pVllIMNHtOA4wYGGX2RbWk5fqBZUtZymk8m8KzcAe+KYXe/j + liwUp8mX09noX2lz5xrLYoh88DfcnlBI++OX4fdvnA1Y2PPJehzBgBqzOVVZbs+mjKJ92KD+lgly + idr3MNatBVp4tC3WjvQraiM7pBluHeTx7OJqnBZg8nYiptGUpAwuhfcsSlqBjj4fiK9/o+VKpfxp + fkssaII/XdXs1JUCAAA= headers: Alt-Svc: - h3=":443"; ma=2592000,h3-29=":443"; ma=2592000 @@ -56,11 +83,11 @@ interactions: Content-Type: - application/json; charset=UTF-8 Date: - - Thu, 02 Apr 2026 15:41:27 GMT + - Thu, 21 May 2026 23:49:32 GMT Server: - scaffolding on HTTPServer2 Server-Timing: - - gfet4t7; dur=3466 + - gfet4t7; dur=2133 Transfer-Encoding: - chunked Vary: @@ -113,40 +140,42 @@ interactions: REAL\n- product_photos_qty: REAL\n- product_weight_g: REAL\n- product_length_cm: REAL\n- product_height_cm: REAL\n- product_width_cm: REAL\n\nTabela: sellers\n- seller_id: TEXT (PK)\n- seller_zip_code_prefix: INTEGER\n- seller_city: TEXT\n- - seller_state: TEXT\n\n\n=== PERGUNTA DO USU\u00c1RIO ===\nQuais sao as 5 categorias - de produtos mais vendidos por quantidade?\n\n\n\nResponda APENAS com a consulta + seller_state: TEXT\n\n\n=== PERGUNTA DO USU\u00c1RIO ===\nQuais s\u00e3o as + 5 categorias com a maior quantidade total de itens vendidos?\n\n=== CONVERSA + PR\u00c9VIA (CONTEXTO ADICIONAL) ===\nNenhuma\n\n=== HIST\u00d3RICO DE TENTATIVAS + ANTERIORES ===\nNenhuma tentativa anterior.\n\nResponda APENAS com a consulta SQL, sem markdown, sem explica\u00e7\u00e3o."}], "role": "user"}], "safetySettings": [], "generationConfig": {"temperature": 0.7, "candidateCount": 1}}' headers: Accept: - - '*/*' + - '*' Accept-Encoding: - - gzip, deflate, zstd + - g Connection: - - keep-alive + - k Content-Length: - - '3088' + - '3' Content-Type: - - application/json + - a Host: - - generativelanguage.googleapis.com + - g User-Agent: - - google-genai-sdk/1.68.0 gl-python/3.10.12 + - g x-goog-api-client: - - google-genai-sdk/1.68.0 gl-python/3.10.12 + - g method: POST uri: https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash:generateContent response: body: string: !!binary | - H4sIAAAAAAAC/31SXWvCMBR9768Iedpgig51MtjDVutQ1Lhaxz46JJirBtuka1KmiP99abUa97A8 - hHDPuefee252DkJ4TgXjjGpQ+B59mghCu+LOMSk0CG2AMmSCCU31mXs4O+ttKBo2eRKeeAPPDUKB - UFJNUsmyuZ7NTamlTLczQWMIRdcnQyRTBumMa4gVepwgyUPRJ70ROuYUwSSXISMDnqQ4Qw+WMmeh - ePbJdIye3v+tSfyO5x9JLpmOgqsL0WvU8SZuKAa9YS9ATWzNtj+9v27OjqQygnzcWDKISvq+JOAF - F1ytfKBKisKVgIzxCeWCwcaEa05ZoJDGmaJLGIKmZjf0tAFs2owTHcg1CFdmxW7arfpBzdrlBaHV - OuJaahpdQPVavUy2lFXH1OWRvWXrA5gxacT1Np8l8N4CbFmh/zRWmuFYnmG9ktlypS+bbDeco2kH - H18hVfxg2BJiY2HlttqsLCKqVkVBnIJKpFDQYzmn37gj9CMjRHa631qN44W7/nlpYGfv/AJstiMM - 5wIAAA== + H4sIAAAAAAAC/31SXU/CMBR9369o+qSJGvEDPxIfEKbOCMMxDcYZ0tArNG7t0t4ZkPDf7QaD4oN7 + WLp7Ts+599wtPELomEkuOEMw9Jq82wohi+pdYkoiSLRAXbLFnGncclfPwjlbCsKsvEQH/pPfjhNJ + SH6Ua8WLMY7G1mqi9HwkWQaJvIvCLlljhrQGJE/kYxj0iNIc9EggZFVZiVIm7DlKgpMbW3e+E3kf + hS99cvv2r2cYdfxoTWqHL714z6ps/azQPun4g3Yin4JuEJNz6oy33Jw/DrahaJVCOXGmOKQ1fVkT + 6KeQwkwjYEbJKpg47NMNKiSHmS0fe7VBJU0LwybQBWR2PWyzBGqnynKM1RfItiqq9Vwdn63UnHXu + EJqXaxwVsnQHajROa9BRNh3rK1J30c4/YMdkqcB5OUvsD2PqRIF/GqvD8JzMKE5VMZnibpONZnMN + G9DfYgyxAF1aGLRTMc3LYKtQVzm/gjZiFegEMhvx4cnR+eFnysy0aohqMLmSBgJecjqPqs9wDuHl + 4OHnIujPusPnomWot/R+AXMww50KAwAA headers: Alt-Svc: - h3=":443"; ma=2592000,h3-29=":443"; ma=2592000 @@ -155,11 +184,11 @@ interactions: Content-Type: - application/json; charset=UTF-8 Date: - - Thu, 02 Apr 2026 15:41:29 GMT + - Thu, 21 May 2026 23:49:34 GMT Server: - scaffolding on HTTPServer2 Server-Timing: - - gfet4t7; dur=1451 + - gfet4t7; dur=1676 Transfer-Encoding: - chunked Vary: @@ -181,51 +210,132 @@ interactions: body: '{"contents": [{"parts": [{"text": "Voc\u00ea \u00e9 um revisor de qualidade para consultas SQL geradas por IA.\n\nSua tarefa: avaliar se a consulta SQL e seus resultados respondem adequadamente\n\u00e0 pergunta original do usu\u00e1rio.\n\n=== - PERGUNTA DO USU\u00c1RIO ===\nQuais sao as 5 categorias de produtos mais vendidos - por quantidade?\n\n=== SQL GERADA ===\nSELECT\n p.product_category_name\nFROM - order_items AS oi\nJOIN products AS p\n ON oi.product_id = p.product_id\nGROUP - BY\n p.product_category_name\nORDER BY\n COUNT(oi.product_id) DESC\nLIMIT + PERGUNTA DO USU\u00c1RIO ===\nQuais s\u00e3o as 5 categorias com a maior quantidade + total de itens vendidos?\n\n=== SCHEMA DO BANCO ===\n=== SCHEMA SQLITE (INTROSPECCAO + REAL) ===\n\nTabela: customers\n- customer_id: TEXT (PK)\n- customer_unique_id: + TEXT\n- customer_zip_code_prefix: INTEGER\n- customer_city: TEXT\n- customer_state: + TEXT\n\nTabela: geolocation\n- geolocation_zip_code_prefix: INTEGER\n- geolocation_lat: + REAL\n- geolocation_lng: REAL\n- geolocation_city: TEXT\n- geolocation_state: + TEXT\n\nTabela: order_items\n- order_id: TEXT (PK)\n- order_item_id: INTEGER + (PK)\n- product_id: TEXT\n- seller_id: TEXT\n- shipping_limit_date: TEXT\n- + price: REAL\n- freight_value: REAL\n Foreign keys:\n - seller_id -> sellers.seller_id + (on_update=NO ACTION, on_delete=NO ACTION)\n - product_id -> products.product_id + (on_update=NO ACTION, on_delete=NO ACTION)\n - order_id -> orders.order_id + (on_update=NO ACTION, on_delete=NO ACTION)\n\nTabela: order_payments\n- order_id: + TEXT (PK)\n- payment_sequential: INTEGER (PK)\n- payment_type: TEXT\n- payment_installments: + INTEGER\n- payment_value: REAL\n Foreign keys:\n - order_id -> orders.order_id + (on_update=NO ACTION, on_delete=NO ACTION)\n\nTabela: order_reviews\n- review_id: + TEXT (PK)\n- order_id: TEXT\n- review_score: INTEGER\n- review_comment_title: + TEXT\n- review_comment_message: TEXT\n- review_creation_date: TEXT\n- review_answer_timestamp: + TEXT\n Foreign keys:\n - order_id -> orders.order_id (on_update=NO ACTION, + on_delete=NO ACTION)\n\nTabela: orders\n- order_id: TEXT (PK)\n- customer_id: + TEXT\n- order_status: TEXT\n- order_purchase_timestamp: TEXT\n- order_approved_at: + TEXT\n- order_delivered_carrier_date: TEXT\n- order_delivered_customer_date: + TEXT\n- order_estimated_delivery_date: TEXT\n Foreign keys:\n - customer_id + -> customers.customer_id (on_update=NO ACTION, on_delete=NO ACTION)\n\nTabela: + products\n- product_id: TEXT (PK)\n- product_category_name: TEXT\n- product_name_length: + REAL\n- product_description_length: REAL\n- product_photos_qty: REAL\n- product_weight_g: + REAL\n- product_length_cm: REAL\n- product_height_cm: REAL\n- product_width_cm: + REAL\n\nTabela: sellers\n- seller_id: TEXT (PK)\n- seller_zip_code_prefix: INTEGER\n- + seller_city: TEXT\n- seller_state: TEXT\n\n\n=== CONVERSA COM O AGENTE (se houver) + ===\nNenhuma\n\n=== SQL GERADA ===\nSELECT\n p.product_category_name\nFROM + products AS p\nJOIN order_items AS oi\n ON p.product_id = oi.product_id\nGROUP + BY\n p.product_category_name\nORDER BY\n COUNT(oi.order_item_id) DESC\nLIMIT 5\n\n=== RESULTADO DA EXECU\u00c7\u00c3O ===\nStatus: exec_ok\nTotal de linhas: 5\nAmostra dos resultados (primeiras linhas):\n[{''product_category_name'': ''cama_mesa_banho''}, {''product_category_name'': ''beleza_saude''}, {''product_category_name'': ''esporte_lazer''}, {''product_category_name'': ''moveis_decoracao''}, {''product_category_name'': - ''informatica_acessorios''}]\n\n=== ERROS (se houver) ===\nNenhum\n\nAvalie:\n1. - A SQL responde \u00e0 pergunta do usu\u00e1rio?\n2. Os resultados fazem sentido?\n3. - H\u00e1 algum erro l\u00f3gico ou de interpreta\u00e7\u00e3o?\n\nResponda no - formato:\nVEREDITO: APROVADO ou REPROVADO\nFEEDBACK: (SELECT MAX(Population) FROM country WHERE Continent=''Africa'')\nResultado: + [] (vazio)\nVEREDITO: REPROVADO\nRaz\u00e3o: \"Larger than any country in Africa\" + significa maior que pelo menos um pa\u00eds africano (MIN),\nn\u00e3o maior + que todos os pa\u00edses africanos (MAX). A l\u00f3gica est\u00e1 semanticamente + errada.\n\n-- EXEMPLO 3: REPROVADO (resultado vazio suspeito) --\nPergunta: + \"Find the last name of students who live in North Carolina and are not enrolled + in any degree.\"\nSQL: SELECT last_name FROM Students WHERE state_province_county + = ''North Carolina'' AND ...\nResultado: [] (vazio)\nVEREDITO: REPROVADO\nRaz\u00e3o: + Resultado vazio quando a pergunta espera dados reais \u00e9 suspeito. Verifique + se o filtro\nde string corresponde exatamente ao valor no banco (ex: ''NorthCarolina'' + vs ''North Carolina'').\n\n-- EXEMPLO 4: REPROVADO (JOIN incorreto muda o que + est\u00e1 sendo contado) --\nPergunta: \"Find the name of makers that produced + some cars in 1970.\"\nSQL: SELECT DISTINCT Maker FROM car_makers JOIN car_names + ON car_makers.Id = car_names.MakeId JOIN cars_data ON car_names.MakeId = cars_data.Id + WHERE cars_data.Year = 1970\nResultado: [(''chevrolet'',), (''buick'',)]\nVEREDITO: + REPROVADO\nRaz\u00e3o: O JOIN usa car_names.MakeId para conectar a cars_data, + mas cars_data.Id refere-se\nao ID do carro, n\u00e3o do fabricante. O caminho + correto seria via model_list. Os resultados\nparecem plaus\u00edveis mas derivam + de uma jun\u00e7\u00e3o incorreta.\n\n-- EXEMPLO 5: APROVADO (formato diferente, + resposta correta) --\nPergunta: \"On average, when were the transcripts printed?\"\nSQL: + SELECT AVG(transcript_date) AS average_transcript_date FROM Transcripts\nResultado: + [(''1989.9333333333334'',)]\nVEREDITO: APROVADO\nRaz\u00e3o: O resultado \u00e9 + um n\u00famero que representa a m\u00e9dia das datas (formato num\u00e9rico + do SQLite).\nEmbora n\u00e3o seja uma data formatada, responde corretamente + \u00e0 pergunta. Diferen\u00e7a de\nrepresenta\u00e7\u00e3o n\u00e3o \u00e9 + motivo de reprova\u00e7\u00e3o.\n\n-- EXEMPLO 6: APROVADO (query mais simples + que o gold, resultado equivalente) --\nPergunta: \"Which model of car has the + minimum horsepower?\"\nSQL: SELECT Model FROM car_names JOIN cars_data ON car_names.MakeId + = cars_data.Id WHERE Horsepower = (SELECT MIN(Horsepower) FROM cars_data) LIMIT + 1\nResultado: [(''triumph'',)]\nVEREDITO: APROVADO\nRaz\u00e3o: A query retorna + corretamente o modelo com menor pot\u00eancia. O LIMIT 1 garante unicidade\ne + o resultado \u00e9 semanticamente correto. Aprovar.\n\n=== CRIT\u00c9RIOS DE + AVALIA\u00c7\u00c3O ===\n\nREPROVE quando houver:\n- Escopo incompleto: query + cobre apenas parte do que a pergunta pede\n- Erro sem\u00e2ntico: l\u00f3gica + correta na forma mas errada no significado (MIN vs MAX, ANY vs ALL)\n- JOIN + incorreto que altera os dados sendo agregados ou filtrados\n- Resultado vazio + quando a pergunta claramente espera dados\n- Filtro com valor literal diferente + do que est\u00e1 no banco\n- M\u00e9trica errada (SUM vs AVG, COUNT vs COUNT + DISTINCT, etc.)\n- Erro de execu\u00e7\u00e3o SQL\n\nAPROVE quando:\n- O resultado + responde \u00e0 pergunta, mesmo com formato ou representa\u00e7\u00e3o diferente\n- + H\u00e1 colunas extras que n\u00e3o prejudicam a resposta\n- A precis\u00e3o + num\u00e9rica difere mas o valor est\u00e1 correto\n- A query \u00e9 mais simples + que o esperado mas semanticamente equivalente\n\nAvalie com rigor sem\u00e2ntico. + Resultados que parecem plaus\u00edveis mas derivam de l\u00f3gica\nincorreta + devem ser reprovados. N\u00e3o presuma que uma query bem-formada est\u00e1 correta.\n\nResponda + no formato:\nVEREDITO: APROVADO ou REPROVADO\nFEEDBACK: "}], "role": "user"}], "safetySettings": [], "generationConfig": {"temperature": 0.7, "candidateCount": 1}}' headers: Accept: - - '*/*' + - '*' Accept-Encoding: - - gzip, deflate, zstd + - g Connection: - - keep-alive + - k Content-Length: - - '1335' + - '7' Content-Type: - - application/json + - a Host: - - generativelanguage.googleapis.com + - g User-Agent: - - google-genai-sdk/1.68.0 gl-python/3.10.12 + - g x-goog-api-client: - - google-genai-sdk/1.68.0 gl-python/3.10.12 + - g method: POST uri: https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash:generateContent response: body: string: !!binary | - H4sIAAAAAAAC/11S227TQBB991eM9rmNmlZRQ99CYqRQkNPEikDAw5CdJAv2rtlLVIjyL0U88R35 - MWbtOnWwZGs958zMmbOzTwDECrVUEj05cQefOAKwr78RM9qT9gy0IQ5WaP0Lt3n2nTNTPD3GJLFM - 5+lkmmd3MJrNs+Vokn3Wb9J08no0vucYcH0XCo+weHgHllxltCSOWkseS+5McHyCiuwmaGZJA8GF - 42+rzAUoybhaq6jfADoYwIqn2Bir+IfLVNbI4I2DEpWDHdVjOqiMhR8BOVcis9Bb3B3/cgZGOR43 - VJ5la34Vu9DUJC5iepC5KDdKlwy74x/DyWSjYgdctGKUf7CMwjqyeESyyDJ6ouPY4XT+cvHiszUF - RRNLI6lo6YeWINZKK7edEzqjI22RZzNxQhUb+cjhq6RtUJcWwfGA79ldvnE83avgacvK5+Y76bEJ - 9Y3f3Fw31TobckYYDJ9xbzwWZ1B/cNWCncpuwn1V0d2dzlrxmFgo/zPOkqcfctGxwv8nrDUj6Xgm - /NaEzdafi+z3+8Pk2bbGySVZpxrL+KrZxMvr3uByXaDb1i1Fs4eOpjJy7j/eZrhahLfh67dft9PZ - 7tVsHB6GIjkk/wASEdTJPwMAAA== + H4sIAAAAAAAC/2VSwW4aMRC98xUjnwlCUSlVbjRsJdS0ULJCkZqomPVksbq2tx5vlIL4l+Q3euXH + Ol6ysLQcWOvNzJs3b2bbARCZtEorGZDEFXxnBGBb/8eYswFt4EADMVhKH065h9+29eaUgM+xSCyS + eTKepNMrGM3m08VoPL23n5Jk/HF0/ZkxYH6qiiDh9tsNVBYZ8B6DNNwUQRIEucKCv8vSO1VlgZaA + sHReof+hAxpadkHmviollFg4AusMEiguyXik3HnNz8hruYsDu/9j0DsILsgCFAKTWIInjB5wOc8m + uVLJU3kPEgOEecUmdYHVQGxv+UPgsZYfK9HUuGHSjOGsngCh0EZz5/0LwaAtqfTaZrqUmrqRpXSW + OZVjHDNNbwbsX3gqn1dRO8cqqvavXrueaLm9O74fuqcdeVdgXIBxCosmfdckiEdtNa3nKMnZmHab + TmfiGNWs5ZnhfqdpUFOLimSOX3g9fC3yeBOCV2PKkLqfaK9dVV/LZf/D+wNd67zOMoaDt3i9ifPi + YX/Y/Y+axtxYF+3Da90kzykLHX7HYdLkLhUtL8K/yho7Oi3XRFi7Kl+Hc5WDd80YhP5JZ5hq9LEH + BR5LehWtrW09OL1AT/pgaY6GTb647A0uHvmC17UicVg14UTFnPHAzeSvm7vk62qxGU5mm9lm1R/1 + RWfX+Qs0AbvPmwMAAA== headers: Alt-Svc: - h3=":443"; ma=2592000,h3-29=":443"; ma=2592000 @@ -234,11 +344,90 @@ interactions: Content-Type: - application/json; charset=UTF-8 Date: - - Thu, 02 Apr 2026 15:41:35 GMT + - Thu, 21 May 2026 23:49:38 GMT Server: - scaffolding on HTTPServer2 Server-Timing: - - gfet4t7; dur=5972 + - gfet4t7; dur=3282 + Transfer-Encoding: + - chunked + Vary: + - Origin + - X-Origin + - Referer + X-Content-Type-Options: + - nosniff + X-Frame-Options: + - SAMEORIGIN + X-Gemini-Service-Tier: + - standard + X-XSS-Protection: + - '0' + status: + code: 200 + message: OK +- request: + body: '{"contents": [{"parts": [{"text": "Voc\u00ea \u00e9 um assistente que decide + se os resultados de uma consulta SQL\ndevem ser acompanhados de um gr\u00e1fico + (visualiza\u00e7\u00e3o).\n\nAnalise a pergunta do usu\u00e1rio e as caracter\u00edsticas + dos dados retornados.\n\n=== PERGUNTA DO USU\u00c1RIO ===\n\n\n=== COLUNAS DO + RESULTADO ===\nproduct_category_name\n\n=== TOTAL DE LINHAS ===\n5\n\n=== AMOSTRA + DOS DADOS ===\n[{''product_category_name'': ''cama_mesa_banho''}, {''product_category_name'': + ''beleza_saude''}, {''product_category_name'': ''esporte_lazer''}, {''product_category_name'': + ''moveis_decoracao''}, {''product_category_name'': ''informatica_acessorios''}]\n\nUm + gr\u00e1fico \u00e9 \u00fatil quando:\n- A pergunta envolve compara\u00e7\u00f5es + entre categorias (ex: vendas por regi\u00e3o)\n- H\u00e1 dados temporais ou + tend\u00eancias (ex: evolu\u00e7\u00e3o ao longo dos meses)\n- H\u00e1 distribui\u00e7\u00f5es + ou rankings (ex: top 10 produtos)\n- H\u00e1 agrega\u00e7\u00f5es num\u00e9ricas + que se beneficiam de visualiza\u00e7\u00e3o\n- O resultado tem mais de 1 linha + com pelo menos uma coluna num\u00e9rica\n\nUm gr\u00e1fico N\u00c3O \u00e9 \u00fatil + quando:\n- O resultado \u00e9 um \u00fanico valor escalar (ex: total geral)\n- + A pergunta pede um dado espec\u00edfico pontual (ex: nome de um cliente)\n- + O resultado tem apenas 1 linha\n- N\u00e3o h\u00e1 colunas num\u00e9ricas para + plotar\n\nResponda APENAS com uma palavra: SIM ou NAO\n"}], "role": "user"}], + "safetySettings": [], "generationConfig": {"temperature": 0.7, "candidateCount": + 1}}' + headers: + Accept: + - '*' + Accept-Encoding: + - g + Connection: + - k + Content-Length: + - '1' + Content-Type: + - a + Host: + - g + User-Agent: + - g + x-goog-api-client: + - g + method: POST + uri: https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash:generateContent + response: + body: + string: !!binary | + H4sIAAAAAAAC/2WQQU/DMAyF7/0VVc4b2sqmIW4T7DBNsAoqhIQ4WKvXRqRJSdwxqPrfSdq1yyCH + KPJ7sf2+OghDtgOZ8hQIDbsN32wlDOv2dpqShJKs0JdssQRNZ293au9tLYRH94k9LrfMU5rh/T46 + 99NKoDMXKkXR25vewPZccpM/IRglne052cZsULlM8WjLk6Af0LZmlYEMH5DAJoNhf1ZqVZSUqA+U + d6pqk11H866bR+LCEJ1kUgTiQplOZjejf43NvR3LhY/Io2dTguD07aIkq9eEeSToz149i8BDxihX + VZbT5Y6LaHqSDeoD32HCUbsRhmwo0Knj2jLtML+gNrzjmWFhCY+jq/l4L8Dk7UJMoymVNLhOnWdV + qhiqT7nROvtZrOMDwWa2/GJBE/wC6uEO9EQCAAA= + headers: + Alt-Svc: + - h3=":443"; ma=2592000,h3-29=":443"; ma=2592000 + Content-Encoding: + - gzip + Content-Type: + - application/json; charset=UTF-8 + Date: + - Thu, 21 May 2026 23:49:42 GMT + Server: + - scaffolding on HTTPServer2 + Server-Timing: + - gfet4t7; dur=3904 Transfer-Encoding: - chunked Vary: diff --git a/tests/cassettes/test_integracao/test_pergunta_com_ranking__openai-gpt-4o-mini.yaml b/tests/cassettes/test_integracao/test_pergunta_com_ranking__openai-gpt-4o-mini.yaml new file mode 100644 index 0000000..9d6ffb8 --- /dev/null +++ b/tests/cassettes/test_integracao/test_pergunta_com_ranking__openai-gpt-4o-mini.yaml @@ -0,0 +1,1289 @@ +interactions: +- request: + body: "{\"messages\":[{\"content\":\"Voc\xEA \xE9 o planejador de um sistema que + transforma perguntas em consultas SQL.\\n\\nSeu papel: analisar a situa\xE7\xE3o + atual e decidir a pr\xF3xima a\xE7\xE3o.\\n\\nContexto atual:\\n- Pergunta do + usu\xE1rio: \\\"Quais s\xE3o as 5 categorias com a maior quantidade total de + itens vendidos?\\\"\\n\\n- conversa_previa: Nenhuma\\n\\n- Schema: === SCHEMA + SQLITE (INTROSPECCAO REAL) ===\\n\\nTabela: customers\\n- customer_id: TEXT + (PK)\\n- customer_unique_id: TEXT\\n- customer_zip_code_prefix: INTEGER\\n- + customer_city: TEXT\\n- customer_state: TEXT\\n\\nTabela: geolocation\\n- geolocation_zip_code_prefix: + INTEGER\\n- geolocation_lat: REAL\\n- geolocation_lng: REAL\\n- geolocation_city: + TEXT\\n- geolocation_state: TEXT\\n\\nTabela: order_items\\n- order_id: TEXT + (PK)\\n- order_item_id: INTEGER (PK)\\n- product_id: TEXT\\n- seller_id: TEXT\\n- + shipping_limit_date: TEXT\\n- price: REAL\\n- freight_value: REAL\\n Foreign + keys:\\n - seller_id -> sellers.seller_id (on_update=NO ACTION, on_delete=NO + ACTION)\\n - product_id -> products.product_id (on_update=NO ACTION, on_delete=NO + ACTION)\\n - order_id -> orders.order_id (on_update=NO ACTION, on_delete=NO + ACTION)\\n\\nTabela: order_payments\\n- order_id: TEXT (PK)\\n- payment_sequential: + INTEGER (PK)\\n- payment_type: TEXT\\n- payment_installments: INTEGER\\n- payment_value: + REAL\\n Foreign keys:\\n - order_id -> orders.order_id (on_update=NO ACTION, + on_delete=NO ACTION)\\n\\nTabela: order_reviews\\n- review_id: TEXT (PK)\\n- + order_id: TEXT\\n- review_score: INTEGER\\n- review_comment_title: TEXT\\n- + review_comment_message: TEXT\\n- review_creation_date: TEXT\\n- review_answer_timestamp: + TEXT\\n Foreign keys:\\n - order_id -> orders.order_id (on_update=NO ACTION, + on_delete=NO ACTION)\\n\\nTabela: orders\\n- order_id: TEXT (PK)\\n- customer_id: + TEXT\\n- order_status: TEXT\\n- order_purchase_timestamp: TEXT\\n- order_approved_at: + TEXT\\n- order_delivered_carrier_date: TEXT\\n- order_delivered_customer_date: + TEXT\\n- order_estimated_delivery_date: TEXT\\n Foreign keys:\\n - customer_id + -> customers.customer_id (on_update=NO ACTION, on_delete=NO ACTION)\\n\\nTabela: + products\\n- product_id: TEXT (PK)\\n- product_category_name: TEXT\\n- product_name_length: + REAL\\n- product_description_length: REAL\\n- product_photos_qty: REAL\\n- product_weight_g: + REAL\\n- product_length_cm: REAL\\n- product_height_cm: REAL\\n- product_width_cm: + REAL\\n\\nTabela: sellers\\n- seller_id: TEXT (PK)\\n- seller_zip_code_prefix: + INTEGER\\n- seller_city: TEXT\\n- seller_state: TEXT\\n\\n\\n- Feedback do cr\xEDtico: + Nenhum\\n- Tentativas realizadas: 0\\n- Status atual: schema_obtido\\n- Erro + anterior: Nenhum\\n\\nAVALIA\xC7\xC3O CR\xCDTICA:\\nVerifique se a \\\"Pergunta + do usu\xE1rio\\\" pode ser respondida com as tabelas e colunas do Schema.\\nSe + houver ambiguidade, conceitos n\xE3o mapeados no banco de dados, ou se a inten\xE7\xE3o + do usu\xE1rio n\xE3o estiver clara, voc\xEA DEVE pedir mais informa\xE7\xF5es.\\n\\nResponda + EXATAMENTE no formato JSON abaixo, sem formata\xE7\xE3o markdown (```json):\\n{{\\n + \ \\\"decisao\\\": \\\"escolha_uma_opcao\\\",\\n \\\"pergunta_ao_usuario\\\": + \\\"escreva a pergunta aqui se precisar de ajuda, ou deixe vazio se n\xE3o precisar\\\"\\n}}\\n\\nOp\xE7\xF5es + v\xE1lidas para 'decis\xE3o':\\n- \\\"pronto_codificacao\\\" \u2192 se temos + schema, a pergunta faz sentido e devemos gerar/regenerar SQL\\n- \\\"revisando_estrategia\\\" + \u2192 se o cr\xEDtico reprovou e devemos tentar uma abordagem diferente\\n- + \\\"necessita_ajuda\\\" \u2192 a pergunta n\xE3o \xE9 clara, n\xE3o faz sentido, + falta contexto ou n\xE3o h\xE1 dados no schema para responder.\\n\",\"role\":\"user\"}],\"model\":\"gpt-4o-mini\",\"stream\":false}" + headers: + Accept: + - a + Accept-Encoding: + - g + Connection: + - k + Content-Length: + - '3' + Content-Type: + - a + Cookie: + - _ + Host: + - a + User-Agent: + - O + X-Stainless-Arch: + - x + X-Stainless-Async: + - f + X-Stainless-Lang: + - p + X-Stainless-OS: + - L + X-Stainless-Package-Version: + - '2' + X-Stainless-Raw-Response: + - t + X-Stainless-Runtime: + - C + X-Stainless-Runtime-Version: + - '3' + x-stainless-retry-count: + - '0' + method: POST + uri: https://api.openai.com/v1/chat/completions + response: + body: + string: !!binary | + H4sIAAAAAAAA/6rmUlBQykxRslJQSs5ILEnOLcjRdck0z0739gYAAAD//41SwU7cMBC971cgnxMU + wkLCtVJvbQ9IcIBFlteeZL1NPJY9QSC0/8442SWhpVIVyVLemzee9zz0Y+/qX9/ue/R0exfM9/2D + /dmILClwuwdNJ9W5RtYBWXQTrQMogtT1oqpu1kV9XZQj0aOBLslaT/ka8946m5dFuc6LKr+oj+od + Wg2Ryx759+zsbTzTnM7AC8NFdkJ6iFG1wNipiMGAXUKEitFGUo5ENpMaHYEbR3/buARthAFto8IN + gxvhA1eg1GhsY7XSCc9OlR5COzhSUqEc4qCCPar4c4flPQGaIark1Q1dtyCUc0gqZTU6fDoyhw9P + HbY8wzb+IRUNZxV3kqONnDPPHwm9GNkDn09jdsOnOJKZ3pMk/A3jdXV1NfUT85PNbHl5JIkn7Baq + mzr7op80QMp2cZG+4MB2YGbp/FRqMBYXxGrh+u9pvuo9Obeu/Z/2M6E1eF5G6QMYqz87nssCpI3+ + V9lHyuPAIkJ45hWVZCGklzDQqKGb9kzE10jQS36uFoIPdlq2xktdNldVXW7NWqwOq3etH6evegMA + AA== + headers: + Access-Control-Expose-Headers: + - CF-Ray + CF-Cache-Status: + - DYNAMIC + CF-Ray: + - + Connection: + - keep-alive + Content-Encoding: + - gzip + Content-Type: + - application/json + Date: + - Fri, 22 May 2026 00:10:03 GMT + Server: + - cloudflare + Strict-Transport-Security: + - max-age=31536000; includeSubDomains; preload + Transfer-Encoding: + - chunked + X-Content-Type-Options: + - nosniff + access-control-expose-headers: + - X-Request-ID + - CF-Ray + alt-svc: + - h3=":443"; ma=86400 + openai-organization: + - + openai-processing-ms: + - '719' + openai-project: + - + openai-version: + - '2020-10-01' + x-openai-proxy-wasm: + - v0.1 + x-ratelimit-limit-requests: + - '10000' + x-ratelimit-limit-tokens: + - '200000' + x-ratelimit-remaining-requests: + - + x-ratelimit-remaining-tokens: + - + x-ratelimit-reset-requests: + - 15.434s + x-ratelimit-reset-tokens: + - 250ms + x-request-id: + - + status: + code: 200 + message: OK +- request: + body: "{\"messages\":[{\"content\":\"Voc\xEA \xE9 um especialista em SQL para + bancos SQLite.\\n\\nSua tarefa: gerar UMA \xFAnica consulta SQL SELECT que responda + \xE0 pergunta do usu\xE1rio,\\nusando o schema do banco de dados fornecido abaixo.\\n\\nRegras:\\n- + Gere APENAS uma consulta SELECT (ou WITH/CTE seguido de SELECT).\\n- N\xC3O + use INSERT, UPDATE, DELETE, DROP, ALTER ou qualquer comando de escrita.\\n- + N\xC3O inclua explica\xE7\xF5es, apenas a SQL pura.\\n- Use nomes de tabelas + e colunas EXATAMENTE como aparecem no schema.\\n- Se a pergunta for amb\xEDgua, + fa\xE7a a interpreta\xE7\xE3o mais razo\xE1vel.\\n\\n=== SCHEMA DO BANCO ===\\n=== + SCHEMA SQLITE (INTROSPECCAO REAL) ===\\n\\nTabela: customers\\n- customer_id: + TEXT (PK)\\n- customer_unique_id: TEXT\\n- customer_zip_code_prefix: INTEGER\\n- + customer_city: TEXT\\n- customer_state: TEXT\\n\\nTabela: geolocation\\n- geolocation_zip_code_prefix: + INTEGER\\n- geolocation_lat: REAL\\n- geolocation_lng: REAL\\n- geolocation_city: + TEXT\\n- geolocation_state: TEXT\\n\\nTabela: order_items\\n- order_id: TEXT + (PK)\\n- order_item_id: INTEGER (PK)\\n- product_id: TEXT\\n- seller_id: TEXT\\n- + shipping_limit_date: TEXT\\n- price: REAL\\n- freight_value: REAL\\n Foreign + keys:\\n - seller_id -> sellers.seller_id (on_update=NO ACTION, on_delete=NO + ACTION)\\n - product_id -> products.product_id (on_update=NO ACTION, on_delete=NO + ACTION)\\n - order_id -> orders.order_id (on_update=NO ACTION, on_delete=NO + ACTION)\\n\\nTabela: order_payments\\n- order_id: TEXT (PK)\\n- payment_sequential: + INTEGER (PK)\\n- payment_type: TEXT\\n- payment_installments: INTEGER\\n- payment_value: + REAL\\n Foreign keys:\\n - order_id -> orders.order_id (on_update=NO ACTION, + on_delete=NO ACTION)\\n\\nTabela: order_reviews\\n- review_id: TEXT (PK)\\n- + order_id: TEXT\\n- review_score: INTEGER\\n- review_comment_title: TEXT\\n- + review_comment_message: TEXT\\n- review_creation_date: TEXT\\n- review_answer_timestamp: + TEXT\\n Foreign keys:\\n - order_id -> orders.order_id (on_update=NO ACTION, + on_delete=NO ACTION)\\n\\nTabela: orders\\n- order_id: TEXT (PK)\\n- customer_id: + TEXT\\n- order_status: TEXT\\n- order_purchase_timestamp: TEXT\\n- order_approved_at: + TEXT\\n- order_delivered_carrier_date: TEXT\\n- order_delivered_customer_date: + TEXT\\n- order_estimated_delivery_date: TEXT\\n Foreign keys:\\n - customer_id + -> customers.customer_id (on_update=NO ACTION, on_delete=NO ACTION)\\n\\nTabela: + products\\n- product_id: TEXT (PK)\\n- product_category_name: TEXT\\n- product_name_length: + REAL\\n- product_description_length: REAL\\n- product_photos_qty: REAL\\n- product_weight_g: + REAL\\n- product_length_cm: REAL\\n- product_height_cm: REAL\\n- product_width_cm: + REAL\\n\\nTabela: sellers\\n- seller_id: TEXT (PK)\\n- seller_zip_code_prefix: + INTEGER\\n- seller_city: TEXT\\n- seller_state: TEXT\\n\\n\\n=== PERGUNTA DO + USU\xC1RIO ===\\nQuais s\xE3o as 5 categorias com a maior quantidade total de + itens vendidos?\\n\\n=== CONVERSA PR\xC9VIA (CONTEXTO ADICIONAL) ===\\nNenhuma\\n\\n=== + HIST\xD3RICO DE TENTATIVAS ANTERIORES ===\\nNenhuma tentativa anterior.\\n\\nResponda + APENAS com a consulta SQL, sem markdown, sem explica\xE7\xE3o.\",\"role\":\"user\"}],\"model\":\"gpt-4o-mini\",\"stream\":false}" + headers: + Accept: + - a + Accept-Encoding: + - g + Connection: + - k + Content-Length: + - '3' + Content-Type: + - a + Cookie: + - _ + Host: + - a + User-Agent: + - O + X-Stainless-Arch: + - x + X-Stainless-Async: + - f + X-Stainless-Lang: + - p + X-Stainless-OS: + - L + X-Stainless-Package-Version: + - '2' + X-Stainless-Raw-Response: + - t + X-Stainless-Runtime: + - C + X-Stainless-Runtime-Version: + - '3' + x-stainless-retry-count: + - '0' + method: POST + uri: https://api.openai.com/v1/chat/completions + response: + body: + string: !!binary | + H4sIAAAAAAAA/6rmUlBQykxRslJQSs5ILEnOLcjRdck0z84ItQAAAAD//41TUW/aMBB+51dYftok + QGmbEqppD6xlExWUCWilalSWsY/gLbE925lWTfz32kkgoVulvVjxfd9dvvN3N3qMft6OsvhuUBR0 + u7IweVDTK9wNGWrzHZg7ZPWZ8nnghJIVzAxQB6HqWZJcxdFwEF2UQK44ZCEt1a4Xq14upOidR+dx + L0p6Z8M6e6cEA+tp3/wVoT/lGXRKDr99OOoeIjlYS1PwsQPJB43KQgRTa4V1VDrcbUCmpANZSl+O + p+PrFdJ9bRQvmCPMi06VeSaS5tBFy/vZOyX6ynAwRDjIieDv0WiJnHI0KyOWWJXxtfy8mM9QQ7RI + ibW8nU/uqqC/o7n/Phbj6CNSx0tNrWVYpGvyQVdJ163rWn5ZzO+/ok+Pb6lfy/niZrwIjNdq0c14 + eb2W08lsskKXH9qPY2BbWBoMkkWWtQAqpa8SDC5teaqR/dGITKVex8a+SsVbb7DdET8P1g+Hf3Tr + lMYluvfnU2l4ceIh9oVy7YhTP6D8XTKsDcfNnDXoIKnBqs9jfBgfgJN6hIOjIrOtkcGMsh3wJrWZ + L1pwoVpAp9X132r+VbvqXMj0f8o3AGOg/QYRbYALdtpxQzMQ1vAt2vGVS8HYgvnl94o4ASY4wWFL + i6xaDmyfbRhwb1cKRhtRbchWkyGNGfDNxWWCO/vOC8UIfCgvBAAA + headers: + Access-Control-Expose-Headers: + - CF-Ray + CF-Cache-Status: + - DYNAMIC + CF-Ray: + - + Connection: + - keep-alive + Content-Encoding: + - gzip + Content-Type: + - application/json + Date: + - Fri, 22 May 2026 00:10:06 GMT + Server: + - cloudflare + Strict-Transport-Security: + - max-age=31536000; includeSubDomains; preload + Transfer-Encoding: + - chunked + X-Content-Type-Options: + - nosniff + access-control-expose-headers: + - X-Request-ID + - CF-Ray + alt-svc: + - h3=":443"; ma=86400 + openai-organization: + - + openai-processing-ms: + - '2416' + openai-project: + - + openai-version: + - '2020-10-01' + x-openai-proxy-wasm: + - v0.1 + x-ratelimit-limit-requests: + - '10000' + x-ratelimit-limit-tokens: + - '200000' + x-ratelimit-remaining-requests: + - + x-ratelimit-remaining-tokens: + - + x-ratelimit-reset-requests: + - 23.016s + x-ratelimit-reset-tokens: + - 219ms + x-request-id: + - + status: + code: 200 + message: OK +- request: + body: "{\"messages\":[{\"content\":\"Voc\xEA \xE9 um revisor de qualidade para + consultas SQL geradas por IA.\\n\\nSua tarefa: avaliar se a consulta SQL e seus + resultados respondem adequadamente\\n\xE0 pergunta original do usu\xE1rio.\\n\\n=== + PERGUNTA DO USU\xC1RIO ===\\nQuais s\xE3o as 5 categorias com a maior quantidade + total de itens vendidos?\\n\\n=== SCHEMA DO BANCO ===\\n=== SCHEMA SQLITE (INTROSPECCAO + REAL) ===\\n\\nTabela: customers\\n- customer_id: TEXT (PK)\\n- customer_unique_id: + TEXT\\n- customer_zip_code_prefix: INTEGER\\n- customer_city: TEXT\\n- customer_state: + TEXT\\n\\nTabela: geolocation\\n- geolocation_zip_code_prefix: INTEGER\\n- geolocation_lat: + REAL\\n- geolocation_lng: REAL\\n- geolocation_city: TEXT\\n- geolocation_state: + TEXT\\n\\nTabela: order_items\\n- order_id: TEXT (PK)\\n- order_item_id: INTEGER + (PK)\\n- product_id: TEXT\\n- seller_id: TEXT\\n- shipping_limit_date: TEXT\\n- + price: REAL\\n- freight_value: REAL\\n Foreign keys:\\n - seller_id -> sellers.seller_id + (on_update=NO ACTION, on_delete=NO ACTION)\\n - product_id -> products.product_id + (on_update=NO ACTION, on_delete=NO ACTION)\\n - order_id -> orders.order_id + (on_update=NO ACTION, on_delete=NO ACTION)\\n\\nTabela: order_payments\\n- order_id: + TEXT (PK)\\n- payment_sequential: INTEGER (PK)\\n- payment_type: TEXT\\n- payment_installments: + INTEGER\\n- payment_value: REAL\\n Foreign keys:\\n - order_id -> orders.order_id + (on_update=NO ACTION, on_delete=NO ACTION)\\n\\nTabela: order_reviews\\n- review_id: + TEXT (PK)\\n- order_id: TEXT\\n- review_score: INTEGER\\n- review_comment_title: + TEXT\\n- review_comment_message: TEXT\\n- review_creation_date: TEXT\\n- review_answer_timestamp: + TEXT\\n Foreign keys:\\n - order_id -> orders.order_id (on_update=NO ACTION, + on_delete=NO ACTION)\\n\\nTabela: orders\\n- order_id: TEXT (PK)\\n- customer_id: + TEXT\\n- order_status: TEXT\\n- order_purchase_timestamp: TEXT\\n- order_approved_at: + TEXT\\n- order_delivered_carrier_date: TEXT\\n- order_delivered_customer_date: + TEXT\\n- order_estimated_delivery_date: TEXT\\n Foreign keys:\\n - customer_id + -> customers.customer_id (on_update=NO ACTION, on_delete=NO ACTION)\\n\\nTabela: + products\\n- product_id: TEXT (PK)\\n- product_category_name: TEXT\\n- product_name_length: + REAL\\n- product_description_length: REAL\\n- product_photos_qty: REAL\\n- product_weight_g: + REAL\\n- product_length_cm: REAL\\n- product_height_cm: REAL\\n- product_width_cm: + REAL\\n\\nTabela: sellers\\n- seller_id: TEXT (PK)\\n- seller_zip_code_prefix: + INTEGER\\n- seller_city: TEXT\\n- seller_state: TEXT\\n\\n\\n=== CONVERSA COM + O AGENTE (se houver) ===\\nNenhuma\\n\\n=== SQL GERADA ===\\nSELECT p.product_category_name, + SUM(oi.order_item_id) AS total_items_sold\\nFROM order_items oi\\nJOIN orders + o ON oi.order_id = o.order_id\\nJOIN products p ON oi.product_id = p.product_id\\nGROUP + BY p.product_category_name\\nORDER BY total_items_sold DESC\\nLIMIT 5;\\n\\n=== + RESULTADO DA EXECU\xC7\xC3O ===\\nStatus: exec_ok\\nTotal de linhas: 5\\nAmostra + dos resultados (primeiras linhas):\\n[{'product_category_name': 'cama_mesa_banho', + 'total_items_sold': 13665}, {'product_category_name': 'moveis_decoracao', 'total_items_sold': + 11540}, {'product_category_name': 'beleza_saude', 'total_items_sold': 11081}, + {'product_category_name': 'esporte_lazer', 'total_items_sold': 9932}, {'product_category_name': + 'informatica_acessorios', 'total_items_sold': 9874}]\\n\\n=== ERROS (se houver) + ===\\nNenhum\\n\\n=== TENTATIVAS ANTERIORES ===\\nNenhuma tentativa anterior + (esta \xE9 a primeira).\\n\\n=== EXEMPLOS DE AVALIA\xC7\xC3O ===\\n\\n-- EXEMPLO + 1: REPROVADO (escopo incompleto) --\\nPergunta: \\\"Which airport has the least + number of flights?\\\"\\nSQL: SELECT SourceAirport FROM flights GROUP BY SourceAirport + ORDER BY COUNT(*) ASC LIMIT 1\\nResultado: [('AID',)]\\nVEREDITO: REPROVADO\\nRaz\xE3o: + A query conta apenas voos com partida (SourceAirport) e ignora voos com chegada + (DestAirport).\\nO escopo da pergunta \xE9 \\\"flights\\\" em geral \u2014 a + query responde a uma pergunta diferente.\\n\\n-- EXEMPLO 2: REPROVADO (erro + sem\xE2ntico: MIN vs MAX) --\\nPergunta: \\\"Which Asian countries have a population + larger than any country in Africa?\\\"\\nSQL: SELECT Name FROM country WHERE + Continent='Asia' AND Population > (SELECT MAX(Population) FROM country WHERE + Continent='Africa')\\nResultado: [] (vazio)\\nVEREDITO: REPROVADO\\nRaz\xE3o: + \\\"Larger than any country in Africa\\\" significa maior que pelo menos um + pa\xEDs africano (MIN),\\nn\xE3o maior que todos os pa\xEDses africanos (MAX). + A l\xF3gica est\xE1 semanticamente errada.\\n\\n-- EXEMPLO 3: REPROVADO (resultado + vazio suspeito) --\\nPergunta: \\\"Find the last name of students who live in + North Carolina and are not enrolled in any degree.\\\"\\nSQL: SELECT last_name + FROM Students WHERE state_province_county = 'North Carolina' AND ...\\nResultado: + [] (vazio)\\nVEREDITO: REPROVADO\\nRaz\xE3o: Resultado vazio quando a pergunta + espera dados reais \xE9 suspeito. Verifique se o filtro\\nde string corresponde + exatamente ao valor no banco (ex: 'NorthCarolina' vs 'North Carolina').\\n\\n-- + EXEMPLO 4: REPROVADO (JOIN incorreto muda o que est\xE1 sendo contado) --\\nPergunta: + \\\"Find the name of makers that produced some cars in 1970.\\\"\\nSQL: SELECT + DISTINCT Maker FROM car_makers JOIN car_names ON car_makers.Id = car_names.MakeId + JOIN cars_data ON car_names.MakeId = cars_data.Id WHERE cars_data.Year = 1970\\nResultado: + [('chevrolet',), ('buick',)]\\nVEREDITO: REPROVADO\\nRaz\xE3o: O JOIN usa car_names.MakeId + para conectar a cars_data, mas cars_data.Id refere-se\\nao ID do carro, n\xE3o + do fabricante. O caminho correto seria via model_list. Os resultados\\nparecem + plaus\xEDveis mas derivam de uma jun\xE7\xE3o incorreta.\\n\\n-- EXEMPLO 5: + APROVADO (formato diferente, resposta correta) --\\nPergunta: \\\"On average, + when were the transcripts printed?\\\"\\nSQL: SELECT AVG(transcript_date) AS + average_transcript_date FROM Transcripts\\nResultado: [('1989.9333333333334',)]\\nVEREDITO: + APROVADO\\nRaz\xE3o: O resultado \xE9 um n\xFAmero que representa a m\xE9dia + das datas (formato num\xE9rico do SQLite).\\nEmbora n\xE3o seja uma data formatada, + responde corretamente \xE0 pergunta. Diferen\xE7a de\\nrepresenta\xE7\xE3o n\xE3o + \xE9 motivo de reprova\xE7\xE3o.\\n\\n-- EXEMPLO 6: APROVADO (query mais simples + que o gold, resultado equivalente) --\\nPergunta: \\\"Which model of car has + the minimum horsepower?\\\"\\nSQL: SELECT Model FROM car_names JOIN cars_data + ON car_names.MakeId = cars_data.Id WHERE Horsepower = (SELECT MIN(Horsepower) + FROM cars_data) LIMIT 1\\nResultado: [('triumph',)]\\nVEREDITO: APROVADO\\nRaz\xE3o: + A query retorna corretamente o modelo com menor pot\xEAncia. O LIMIT 1 garante + unicidade\\ne o resultado \xE9 semanticamente correto. Aprovar.\\n\\n=== CRIT\xC9RIOS + DE AVALIA\xC7\xC3O ===\\n\\nREPROVE quando houver:\\n- Escopo incompleto: query + cobre apenas parte do que a pergunta pede\\n- Erro sem\xE2ntico: l\xF3gica correta + na forma mas errada no significado (MIN vs MAX, ANY vs ALL)\\n- JOIN incorreto + que altera os dados sendo agregados ou filtrados\\n- Resultado vazio quando + a pergunta claramente espera dados\\n- Filtro com valor literal diferente do + que est\xE1 no banco\\n- M\xE9trica errada (SUM vs AVG, COUNT vs COUNT DISTINCT, + etc.)\\n- Erro de execu\xE7\xE3o SQL\\n\\nAPROVE quando:\\n- O resultado responde + \xE0 pergunta, mesmo com formato ou representa\xE7\xE3o diferente\\n- H\xE1 + colunas extras que n\xE3o prejudicam a resposta\\n- A precis\xE3o num\xE9rica + difere mas o valor est\xE1 correto\\n- A query \xE9 mais simples que o esperado + mas semanticamente equivalente\\n\\nAvalie com rigor sem\xE2ntico. Resultados + que parecem plaus\xEDveis mas derivam de l\xF3gica\\nincorreta devem ser reprovados. + N\xE3o presuma que uma query bem-formada est\xE1 correta.\\n\\nResponda no formato:\\nVEREDITO: + APROVADO ou REPROVADO\\nFEEDBACK: \",\"role\":\"user\"}],\"model\":\"gpt-4o-mini\",\"stream\":false}" + headers: + Accept: + - a + Accept-Encoding: + - g + Connection: + - k + Content-Length: + - '7' + Content-Type: + - a + Cookie: + - _ + Host: + - a + User-Agent: + - O + X-Stainless-Arch: + - x + X-Stainless-Async: + - f + X-Stainless-Lang: + - p + X-Stainless-OS: + - L + X-Stainless-Package-Version: + - '2' + X-Stainless-Raw-Response: + - t + X-Stainless-Runtime: + - C + X-Stainless-Runtime-Version: + - '3' + x-stainless-retry-count: + - '0' + method: POST + uri: https://api.openai.com/v1/chat/completions + response: + body: + string: !!binary | + H4sIAAAAAAAA/6rmUlBQykxRslJQSs5ILEnOLcjRdck0z85x9QMAAAD//3RUTW/TQBC991eMfAIp + idI2kKa30BoJ8RHULyFBZW92J+629o67u66gqP8FxAH1zI2r/xizTlI7QC8red6bNzM7b+1IqLP4 + +OTDtYzl6bsrcTsdxW+iXsig+SVKv84aSOI89JrMEpYWhceguj0eT0bDvefDcQMUpDAPaVnp+yPq + F9ro/s5wZ9Qfjvvbe6vsC9ISHdM+8ifA1+YMfRqFnzk87K0jBTonMuTYmsRBS3mIRMI57bwwPuq1 + oCTj0TStn8VH8eGrk9k+HMXvj2Zn08MZwCfzMo4PX0wPXu/DFJjtqtwLqLzO9a0AAYvK1D/rHwTp + 8enbJ6QHZBXaRHssEq2epj24rhBMYKDz9fegwU0oAuLo7wItgScvclAInGUc3KBRWpFjqrXoRcEN + Yg9K0g7SDfkU6nuoCtCKKXqhpVBkAVnqBsGhXffJ1S4r44kVC0gPZqfvTlLAgivdhrKh9XTA8+X1 + r4xFQIl2VK4/r++L7ghorVCiB4VwfANFfe9tyFoXE0uuRVcSrwjqb1CizbgBAWR1pg1P62hukbOv + K96IVoJ5j11DyTNJNlDGyWLQ3Z7FReVEcJCp8rwDCGNYLDiw8c35Crl7cEpOWWlp7v5KjRbsQHeR + sGEdu5dd4TyVUYPe8XneOLLaMFnEQkXpE09X2JTbnuyuLBm1L6GFJ7srsJm3jbPv18iGYqLYAzp3 + HVdHUsgLVJ2S48nOwwyiUppabLjVGf3fjv4nvxxfm6yj8qh8C0iJJb/zpLSotNycuqVZDD+Lx2gP + V900HLGJb/j1J16jDetQuBBsy+WfwX1x4R3wzjK0pdXLd7wokz0xkqjmu8/G0dbd1h8AAAD//wMA + kSA9mNUEAAA= + headers: + Access-Control-Expose-Headers: + - CF-Ray + CF-Cache-Status: + - DYNAMIC + CF-Ray: + - + Connection: + - keep-alive + Content-Encoding: + - gzip + Content-Type: + - application/json + Date: + - Fri, 22 May 2026 00:10:08 GMT + Server: + - cloudflare + Strict-Transport-Security: + - max-age=31536000; includeSubDomains; preload + Transfer-Encoding: + - chunked + X-Content-Type-Options: + - nosniff + access-control-expose-headers: + - X-Request-ID + - CF-Ray + alt-svc: + - h3=":443"; ma=86400 + openai-organization: + - + openai-processing-ms: + - '1913' + openai-project: + - + openai-version: + - '2020-10-01' + x-openai-proxy-wasm: + - v0.1 + x-ratelimit-limit-requests: + - '10000' + x-ratelimit-limit-tokens: + - '200000' + x-ratelimit-remaining-requests: + - + x-ratelimit-remaining-tokens: + - + x-ratelimit-reset-requests: + - 28.574s + x-ratelimit-reset-tokens: + - 554ms + x-request-id: + - + status: + code: 200 + message: OK +- request: + body: "{\"messages\":[{\"content\":\"Voc\xEA \xE9 o planejador de um sistema que + transforma perguntas em consultas SQL.\\n\\nSeu papel: analisar a situa\xE7\xE3o + atual e decidir a pr\xF3xima a\xE7\xE3o.\\n\\nContexto atual:\\n- Pergunta do + usu\xE1rio: \\\"Quais s\xE3o as 5 categorias com a maior quantidade total de + itens vendidos?\\\"\\n\\n- conversa_previa: Nenhuma\\n\\n- Schema: === SCHEMA + SQLITE (INTROSPECCAO REAL) ===\\n\\nTabela: customers\\n- customer_id: TEXT + (PK)\\n- customer_unique_id: TEXT\\n- customer_zip_code_prefix: INTEGER\\n- + customer_city: TEXT\\n- customer_state: TEXT\\n\\nTabela: geolocation\\n- geolocation_zip_code_prefix: + INTEGER\\n- geolocation_lat: REAL\\n- geolocation_lng: REAL\\n- geolocation_city: + TEXT\\n- geolocation_state: TEXT\\n\\nTabela: order_items\\n- order_id: TEXT + (PK)\\n- order_item_id: INTEGER (PK)\\n- product_id: TEXT\\n- seller_id: TEXT\\n- + shipping_limit_date: TEXT\\n- price: REAL\\n- freight_value: REAL\\n Foreign + keys:\\n - seller_id -> sellers.seller_id (on_update=NO ACTION, on_delete=NO + ACTION)\\n - product_id -> products.product_id (on_update=NO ACTION, on_delete=NO + ACTION)\\n - order_id -> orders.order_id (on_update=NO ACTION, on_delete=NO + ACTION)\\n\\nTabela: order_payments\\n- order_id: TEXT (PK)\\n- payment_sequential: + INTEGER (PK)\\n- payment_type: TEXT\\n- payment_installments: INTEGER\\n- payment_value: + REAL\\n Foreign keys:\\n - order_id -> orders.order_id (on_update=NO ACTION, + on_delete=NO ACTION)\\n\\nTabela: order_reviews\\n- review_id: TEXT (PK)\\n- + order_id: TEXT\\n- review_score: INTEGER\\n- review_comment_title: TEXT\\n- + review_comment_message: TEXT\\n- review_creation_date: TEXT\\n- review_answer_timestamp: + TEXT\\n Foreign keys:\\n - order_id -> orders.order_id (on_update=NO ACTION, + on_delete=NO ACTION)\\n\\nTabela: orders\\n- order_id: TEXT (PK)\\n- customer_id: + TEXT\\n- order_status: TEXT\\n- order_purchase_timestamp: TEXT\\n- order_approved_at: + TEXT\\n- order_delivered_carrier_date: TEXT\\n- order_delivered_customer_date: + TEXT\\n- order_estimated_delivery_date: TEXT\\n Foreign keys:\\n - customer_id + -> customers.customer_id (on_update=NO ACTION, on_delete=NO ACTION)\\n\\nTabela: + products\\n- product_id: TEXT (PK)\\n- product_category_name: TEXT\\n- product_name_length: + REAL\\n- product_description_length: REAL\\n- product_photos_qty: REAL\\n- product_weight_g: + REAL\\n- product_length_cm: REAL\\n- product_height_cm: REAL\\n- product_width_cm: + REAL\\n\\nTabela: sellers\\n- seller_id: TEXT (PK)\\n- seller_zip_code_prefix: + INTEGER\\n- seller_city: TEXT\\n- seller_state: TEXT\\n\\n\\n- Feedback do cr\xEDtico: + A consulta utiliza a fun\xE7\xE3o `SUM(oi.order_item_id)`, que n\xE3o est\xE1 + contando o n\xFAmero total de itens vendidos corretamente, pois `order_item_id` + \xE9 um identificador e deve ser utilizado junto com `COUNT` em vez de `SUM`. + A l\xF3gica da consulta tamb\xE9m n\xE3o est\xE1 errada, mas a m\xE9trica utilizada + n\xE3o responde \xE0 pergunta original sobre a quantidade total de itens vendidos + por categoria.\\n- Tentativas realizadas: 1\\n- Status atual: reprovado\\n- + Erro anterior: Nenhum\\n\\nAVALIA\xC7\xC3O CR\xCDTICA:\\nVerifique se a \\\"Pergunta + do usu\xE1rio\\\" pode ser respondida com as tabelas e colunas do Schema.\\nSe + houver ambiguidade, conceitos n\xE3o mapeados no banco de dados, ou se a inten\xE7\xE3o + do usu\xE1rio n\xE3o estiver clara, voc\xEA DEVE pedir mais informa\xE7\xF5es.\\n\\nResponda + EXATAMENTE no formato JSON abaixo, sem formata\xE7\xE3o markdown (```json):\\n{{\\n + \ \\\"decisao\\\": \\\"escolha_uma_opcao\\\",\\n \\\"pergunta_ao_usuario\\\": + \\\"escreva a pergunta aqui se precisar de ajuda, ou deixe vazio se n\xE3o precisar\\\"\\n}}\\n\\nOp\xE7\xF5es + v\xE1lidas para 'decis\xE3o':\\n- \\\"pronto_codificacao\\\" \u2192 se temos + schema, a pergunta faz sentido e devemos gerar/regenerar SQL\\n- \\\"revisando_estrategia\\\" + \u2192 se o cr\xEDtico reprovou e devemos tentar uma abordagem diferente\\n- + \\\"necessita_ajuda\\\" \u2192 a pergunta n\xE3o \xE9 clara, n\xE3o faz sentido, + falta contexto ou n\xE3o h\xE1 dados no schema para responder.\\n\",\"role\":\"user\"}],\"model\":\"gpt-4o-mini\",\"stream\":false}" + headers: + Accept: + - a + Accept-Encoding: + - g + Connection: + - k + Content-Length: + - '3' + Content-Type: + - a + Cookie: + - _ + Host: + - a + User-Agent: + - O + X-Stainless-Arch: + - x + X-Stainless-Async: + - f + X-Stainless-Lang: + - p + X-Stainless-OS: + - L + X-Stainless-Package-Version: + - '2' + X-Stainless-Raw-Response: + - t + X-Stainless-Runtime: + - C + X-Stainless-Runtime-Version: + - '3' + x-stainless-retry-count: + - '0' + method: POST + uri: https://api.openai.com/v1/chat/completions + response: + body: + string: !!binary | + H4sIAAAAAAAA/6rmUlBQykxRslJQSs5ILEnOLcjRdck0z87zywUAAAD//41TzY7TMBC+9yksn1uU + Vi1tT1y4sBxgFwkOZBVN7UnqbmJnPXbFatUX4jV4McZJ22RhkbhY8vfNfPN/yO4+35q7Un96Wn37 + cvPw/sPx4+2NnCYPtzugChevN8qxHwbjbE8rjxAwqc7X6+0y27zNth3ROI11cqvaMFu6WWOsmS2y + xXKWrWfzzdl774xCYrPv/BXiuXtTnlbjD4az6QVpkAgqZOxixKB3dUIkEBkKYIOcDqRyNqDtUn/O + bYJyqVEZApczmEuPR/5Y7Qqk4LmMykAupxfbFn0VbYACXBEpgjdnv69O/fopWi7QGxAcpjS+AS8I + hUbCQ4cFBkA8Rk7KaNAoggtQs4EwnBWJI1pttCPW8UKl4C6pYa9yxKRBhkMknRYtsCX2Ho8RRek8 + NKI0FuqGi0TBj8cqIr3LZW5P40Z4LCNBGoaNdT0iwFpOKg2zG8H9mTldm167qvVuR3+4So5raF/w + 7IkXgRtMwbWyY0/83nfDjS/mJVmoaUMR3AN24barRa8nh50a2CvZtW3A51m2nL4iWGgMYGoa7YdU + oPaoB99hmSBq40bEZFT23+m8pt2Xbmz1P/IDoRS2fC5F63mW6mXJg5nHdHP/Mru2uUtYEvojH1ER + DPo0Co0lxLq/BElPFLApeF4V+tab/hzKtlCLcrXeLHZ6KSenyW+3CY2vHAQAAA== + headers: + Access-Control-Expose-Headers: + - CF-Ray + CF-Cache-Status: + - DYNAMIC + CF-Ray: + - + Connection: + - keep-alive + Content-Encoding: + - gzip + Content-Type: + - application/json + Date: + - Fri, 22 May 2026 00:10:10 GMT + Server: + - cloudflare + Strict-Transport-Security: + - max-age=31536000; includeSubDomains; preload + Transfer-Encoding: + - chunked + X-Content-Type-Options: + - nosniff + access-control-expose-headers: + - X-Request-ID + - CF-Ray + alt-svc: + - h3=":443"; ma=86400 + openai-organization: + - + openai-processing-ms: + - '1114' + openai-project: + - + openai-version: + - '2020-10-01' + x-openai-proxy-wasm: + - v0.1 + x-ratelimit-limit-requests: + - '10000' + x-ratelimit-limit-tokens: + - '200000' + x-ratelimit-remaining-requests: + - + x-ratelimit-remaining-tokens: + - + x-ratelimit-reset-requests: + - 34.674s + x-ratelimit-reset-tokens: + - 278ms + x-request-id: + - + status: + code: 200 + message: OK +- request: + body: "{\"messages\":[{\"content\":\"Voc\xEA \xE9 um especialista em SQL para + bancos SQLite.\\n\\nSua tarefa: gerar UMA \xFAnica consulta SQL SELECT que responda + \xE0 pergunta do usu\xE1rio,\\nusando o schema do banco de dados fornecido abaixo.\\n\\nRegras:\\n- + Gere APENAS uma consulta SELECT (ou WITH/CTE seguido de SELECT).\\n- N\xC3O + use INSERT, UPDATE, DELETE, DROP, ALTER ou qualquer comando de escrita.\\n- + N\xC3O inclua explica\xE7\xF5es, apenas a SQL pura.\\n- Use nomes de tabelas + e colunas EXATAMENTE como aparecem no schema.\\n- Se a pergunta for amb\xEDgua, + fa\xE7a a interpreta\xE7\xE3o mais razo\xE1vel.\\n\\n=== SCHEMA DO BANCO ===\\n=== + SCHEMA SQLITE (INTROSPECCAO REAL) ===\\n\\nTabela: customers\\n- customer_id: + TEXT (PK)\\n- customer_unique_id: TEXT\\n- customer_zip_code_prefix: INTEGER\\n- + customer_city: TEXT\\n- customer_state: TEXT\\n\\nTabela: geolocation\\n- geolocation_zip_code_prefix: + INTEGER\\n- geolocation_lat: REAL\\n- geolocation_lng: REAL\\n- geolocation_city: + TEXT\\n- geolocation_state: TEXT\\n\\nTabela: order_items\\n- order_id: TEXT + (PK)\\n- order_item_id: INTEGER (PK)\\n- product_id: TEXT\\n- seller_id: TEXT\\n- + shipping_limit_date: TEXT\\n- price: REAL\\n- freight_value: REAL\\n Foreign + keys:\\n - seller_id -> sellers.seller_id (on_update=NO ACTION, on_delete=NO + ACTION)\\n - product_id -> products.product_id (on_update=NO ACTION, on_delete=NO + ACTION)\\n - order_id -> orders.order_id (on_update=NO ACTION, on_delete=NO + ACTION)\\n\\nTabela: order_payments\\n- order_id: TEXT (PK)\\n- payment_sequential: + INTEGER (PK)\\n- payment_type: TEXT\\n- payment_installments: INTEGER\\n- payment_value: + REAL\\n Foreign keys:\\n - order_id -> orders.order_id (on_update=NO ACTION, + on_delete=NO ACTION)\\n\\nTabela: order_reviews\\n- review_id: TEXT (PK)\\n- + order_id: TEXT\\n- review_score: INTEGER\\n- review_comment_title: TEXT\\n- + review_comment_message: TEXT\\n- review_creation_date: TEXT\\n- review_answer_timestamp: + TEXT\\n Foreign keys:\\n - order_id -> orders.order_id (on_update=NO ACTION, + on_delete=NO ACTION)\\n\\nTabela: orders\\n- order_id: TEXT (PK)\\n- customer_id: + TEXT\\n- order_status: TEXT\\n- order_purchase_timestamp: TEXT\\n- order_approved_at: + TEXT\\n- order_delivered_carrier_date: TEXT\\n- order_delivered_customer_date: + TEXT\\n- order_estimated_delivery_date: TEXT\\n Foreign keys:\\n - customer_id + -> customers.customer_id (on_update=NO ACTION, on_delete=NO ACTION)\\n\\nTabela: + products\\n- product_id: TEXT (PK)\\n- product_category_name: TEXT\\n- product_name_length: + REAL\\n- product_description_length: REAL\\n- product_photos_qty: REAL\\n- product_weight_g: + REAL\\n- product_length_cm: REAL\\n- product_height_cm: REAL\\n- product_width_cm: + REAL\\n\\nTabela: sellers\\n- seller_id: TEXT (PK)\\n- seller_zip_code_prefix: + INTEGER\\n- seller_city: TEXT\\n- seller_state: TEXT\\n\\n\\n=== PERGUNTA DO + USU\xC1RIO ===\\nQuais s\xE3o as 5 categorias com a maior quantidade total de + itens vendidos?\\n\\n=== CONVERSA PR\xC9VIA (CONTEXTO ADICIONAL) ===\\nNenhuma\\n\\n=== + HIST\xD3RICO DE TENTATIVAS ANTERIORES ===\\n--- Tentativa 1 ---\\nSQL gerada:\\nSELECT + p.product_category_name, SUM(oi.order_item_id) AS total_items_sold\\nFROM order_items + oi\\nJOIN orders o ON oi.order_id = o.order_id\\nJOIN products p ON oi.product_id + = p.product_id\\nGROUP BY p.product_category_name\\nORDER BY total_items_sold + DESC\\nLIMIT 5;\\nFeedback do cr\xEDtico: A consulta utiliza a fun\xE7\xE3o + `SUM(oi.order_item_id)`, que n\xE3o est\xE1 contando o n\xFAmero total de itens + vendidos corretamente, pois `order_item_id` \xE9 um identificador e deve ser + utilizado junto com `COUNT` em vez de `SUM`. A l\xF3gica da consulta tamb\xE9m + n\xE3o est\xE1 errada, mas a m\xE9trica utilizada n\xE3o responde \xE0 pergunta + original sobre a quantidade total de itens vendidos por categoria.\\n\\nN\xC3O + repita os mesmos erros. Gere uma SQL diferente e corrigida.\\n\\nResponda APENAS + com a consulta SQL, sem markdown, sem explica\xE7\xE3o.\",\"role\":\"user\"}],\"model\":\"gpt-4o-mini\",\"stream\":false}" + headers: + Accept: + - a + Accept-Encoding: + - g + Connection: + - k + Content-Length: + - '3' + Content-Type: + - a + Cookie: + - _ + Host: + - a + User-Agent: + - O + X-Stainless-Arch: + - x + X-Stainless-Async: + - f + X-Stainless-Lang: + - p + X-Stainless-OS: + - L + X-Stainless-Package-Version: + - '2' + X-Stainless-Raw-Response: + - t + X-Stainless-Runtime: + - C + X-Stainless-Runtime-Version: + - '3' + x-stainless-retry-count: + - '0' + method: POST + uri: https://api.openai.com/v1/chat/completions + response: + body: + string: !!binary | + H4sIAAAAAAAA/6rmUlBQykxRslJQSs5ILEnOLcjRdck0zy4IdAIAAAD//4xTXW/aMBR951dYftok + QHwWqmoPG2UTUyEd0GnTqCxjX4K7xLZsp1o38d9rJ4GEbpX2YsX3nHtzrs+92eNy+3Vup307+Tb5 + /QUeuLtdSNwMGWr7AMwds9pM+TxwQpUwM0AdhKrd0ehy0BlfdLs5kCoOSUiLtWsNVCsVUrR6nd6g + 1Rm1uuMye68EA+tpP/wVoT/5GXRKDr98uNM8RlKwlsbgY0eSDxqVhAim1grrqHS4WYFMSQcyl76a + 3kwna6Tb2iieMUeYFx0r80QkTaGJJtHdYv1GibYyHAwRDlIi+Fv0foWccjTJI5ZYlfCN/LiM5qgi + WqTERn6OZosi6O8o8t+nYhy9Q+p0KamlEIt0ST4qy+m6dt3IT8vo7hZ9+P6a/o2MltfTZWC8VIuu + p6vJRt7M5rM1Gl7Vn8fALrM0WCSzJKkBVEpfJVicG3NfIoeTFYmKvY6tfZGKd95iuyd+IqwfD//s + 1imNc/Tgz/vc8uzMRewLpdoRp35C/rvLYb+oh6tJq9CLUQkWfZ7i3U6vnJTzgoSDoyKxtanBjLI9 + 8Cq3GjGacaFqQKPW9t9y/lW7aF3I+H/KVwBjoP0SEW2AC3beckUzEDbxNdrpmXPB2IJ59KtFnAAT + rOCwo1lS7Ae2TzZMuPcrBqONKJZkp8mYDhjwbX84wo1D4xkAAP//AwAfgQ2wMgQAAA== + headers: + Access-Control-Expose-Headers: + - CF-Ray + CF-Cache-Status: + - DYNAMIC + CF-Ray: + - + Connection: + - keep-alive + Content-Encoding: + - gzip + Content-Type: + - application/json + Date: + - Fri, 22 May 2026 00:10:12 GMT + Server: + - cloudflare + Strict-Transport-Security: + - max-age=31536000; includeSubDomains; preload + Transfer-Encoding: + - chunked + X-Content-Type-Options: + - nosniff + access-control-expose-headers: + - X-Request-ID + - CF-Ray + alt-svc: + - h3=":443"; ma=86400 + openai-organization: + - + openai-processing-ms: + - '1299' + openai-project: + - + openai-version: + - '2020-10-01' + x-openai-proxy-wasm: + - v0.1 + x-ratelimit-limit-requests: + - '10000' + x-ratelimit-limit-tokens: + - '200000' + x-ratelimit-remaining-requests: + - + x-ratelimit-remaining-tokens: + - + x-ratelimit-reset-requests: + - 41.573s + x-ratelimit-reset-tokens: + - 274ms + x-request-id: + - + status: + code: 200 + message: OK +- request: + body: "{\"messages\":[{\"content\":\"Voc\xEA \xE9 um revisor de qualidade para + consultas SQL geradas por IA.\\n\\nSua tarefa: avaliar se a consulta SQL e seus + resultados respondem adequadamente\\n\xE0 pergunta original do usu\xE1rio.\\n\\n=== + PERGUNTA DO USU\xC1RIO ===\\nQuais s\xE3o as 5 categorias com a maior quantidade + total de itens vendidos?\\n\\n=== SCHEMA DO BANCO ===\\n=== SCHEMA SQLITE (INTROSPECCAO + REAL) ===\\n\\nTabela: customers\\n- customer_id: TEXT (PK)\\n- customer_unique_id: + TEXT\\n- customer_zip_code_prefix: INTEGER\\n- customer_city: TEXT\\n- customer_state: + TEXT\\n\\nTabela: geolocation\\n- geolocation_zip_code_prefix: INTEGER\\n- geolocation_lat: + REAL\\n- geolocation_lng: REAL\\n- geolocation_city: TEXT\\n- geolocation_state: + TEXT\\n\\nTabela: order_items\\n- order_id: TEXT (PK)\\n- order_item_id: INTEGER + (PK)\\n- product_id: TEXT\\n- seller_id: TEXT\\n- shipping_limit_date: TEXT\\n- + price: REAL\\n- freight_value: REAL\\n Foreign keys:\\n - seller_id -> sellers.seller_id + (on_update=NO ACTION, on_delete=NO ACTION)\\n - product_id -> products.product_id + (on_update=NO ACTION, on_delete=NO ACTION)\\n - order_id -> orders.order_id + (on_update=NO ACTION, on_delete=NO ACTION)\\n\\nTabela: order_payments\\n- order_id: + TEXT (PK)\\n- payment_sequential: INTEGER (PK)\\n- payment_type: TEXT\\n- payment_installments: + INTEGER\\n- payment_value: REAL\\n Foreign keys:\\n - order_id -> orders.order_id + (on_update=NO ACTION, on_delete=NO ACTION)\\n\\nTabela: order_reviews\\n- review_id: + TEXT (PK)\\n- order_id: TEXT\\n- review_score: INTEGER\\n- review_comment_title: + TEXT\\n- review_comment_message: TEXT\\n- review_creation_date: TEXT\\n- review_answer_timestamp: + TEXT\\n Foreign keys:\\n - order_id -> orders.order_id (on_update=NO ACTION, + on_delete=NO ACTION)\\n\\nTabela: orders\\n- order_id: TEXT (PK)\\n- customer_id: + TEXT\\n- order_status: TEXT\\n- order_purchase_timestamp: TEXT\\n- order_approved_at: + TEXT\\n- order_delivered_carrier_date: TEXT\\n- order_delivered_customer_date: + TEXT\\n- order_estimated_delivery_date: TEXT\\n Foreign keys:\\n - customer_id + -> customers.customer_id (on_update=NO ACTION, on_delete=NO ACTION)\\n\\nTabela: + products\\n- product_id: TEXT (PK)\\n- product_category_name: TEXT\\n- product_name_length: + REAL\\n- product_description_length: REAL\\n- product_photos_qty: REAL\\n- product_weight_g: + REAL\\n- product_length_cm: REAL\\n- product_height_cm: REAL\\n- product_width_cm: + REAL\\n\\nTabela: sellers\\n- seller_id: TEXT (PK)\\n- seller_zip_code_prefix: + INTEGER\\n- seller_city: TEXT\\n- seller_state: TEXT\\n\\n\\n=== CONVERSA COM + O AGENTE (se houver) ===\\nNenhuma\\n\\n=== SQL GERADA ===\\nSELECT p.product_category_name, + COUNT(oi.order_item_id) AS total_items_sold\\nFROM order_items oi\\nJOIN orders + o ON oi.order_id = o.order_id\\nJOIN products p ON oi.product_id = p.product_id\\nGROUP + BY p.product_category_name\\nORDER BY total_items_sold DESC\\nLIMIT 5;\\n\\n=== + RESULTADO DA EXECU\xC7\xC3O ===\\nStatus: exec_ok\\nTotal de linhas: 5\\nAmostra + dos resultados (primeiras linhas):\\n[{'product_category_name': 'cama_mesa_banho', + 'total_items_sold': 11115}, {'product_category_name': 'beleza_saude', 'total_items_sold': + 9670}, {'product_category_name': 'esporte_lazer', 'total_items_sold': 8641}, + {'product_category_name': 'moveis_decoracao', 'total_items_sold': 8334}, {'product_category_name': + 'informatica_acessorios', 'total_items_sold': 7827}]\\n\\n=== ERROS (se houver) + ===\\nNenhum\\n\\n=== TENTATIVAS ANTERIORES ===\\n--- Tentativa 1 ---\\nSQL: + SELECT p.product_category_name, SUM(oi.order_item_id) AS total_items_sold\\nFROM + order_items oi\\nJOIN orders o ON oi.order_id = o.order_id\\nJOIN products p + ON oi.product_id = p.product_id\\nGROUP BY p.product_category_name\\nORDER BY + total_items_sold DESC\\nLIMIT 5;\\nFeedback: A consulta utiliza a fun\xE7\xE3o + `SUM(oi.order_item_id)`, que n\xE3o est\xE1 contando o n\xFAmero total de itens + vendidos corretamente, pois `order_item_id` \xE9 um identificador e deve ser + utilizado junto com `COUNT` em vez de `SUM`. A l\xF3gica da consulta tamb\xE9m + n\xE3o est\xE1 errada, mas a m\xE9trica utilizada n\xE3o responde \xE0 pergunta + original sobre a quantidade total de itens vendidos por categoria.\\n\\n\\n=== + EXEMPLOS DE AVALIA\xC7\xC3O ===\\n\\n-- EXEMPLO 1: REPROVADO (escopo incompleto) + --\\nPergunta: \\\"Which airport has the least number of flights?\\\"\\nSQL: + SELECT SourceAirport FROM flights GROUP BY SourceAirport ORDER BY COUNT(*) ASC + LIMIT 1\\nResultado: [('AID',)]\\nVEREDITO: REPROVADO\\nRaz\xE3o: A query conta + apenas voos com partida (SourceAirport) e ignora voos com chegada (DestAirport).\\nO + escopo da pergunta \xE9 \\\"flights\\\" em geral \u2014 a query responde a uma + pergunta diferente.\\n\\n-- EXEMPLO 2: REPROVADO (erro sem\xE2ntico: MIN vs + MAX) --\\nPergunta: \\\"Which Asian countries have a population larger than + any country in Africa?\\\"\\nSQL: SELECT Name FROM country WHERE Continent='Asia' + AND Population > (SELECT MAX(Population) FROM country WHERE Continent='Africa')\\nResultado: + [] (vazio)\\nVEREDITO: REPROVADO\\nRaz\xE3o: \\\"Larger than any country in + Africa\\\" significa maior que pelo menos um pa\xEDs africano (MIN),\\nn\xE3o + maior que todos os pa\xEDses africanos (MAX). A l\xF3gica est\xE1 semanticamente + errada.\\n\\n-- EXEMPLO 3: REPROVADO (resultado vazio suspeito) --\\nPergunta: + \\\"Find the last name of students who live in North Carolina and are not enrolled + in any degree.\\\"\\nSQL: SELECT last_name FROM Students WHERE state_province_county + = 'North Carolina' AND ...\\nResultado: [] (vazio)\\nVEREDITO: REPROVADO\\nRaz\xE3o: + Resultado vazio quando a pergunta espera dados reais \xE9 suspeito. Verifique + se o filtro\\nde string corresponde exatamente ao valor no banco (ex: 'NorthCarolina' + vs 'North Carolina').\\n\\n-- EXEMPLO 4: REPROVADO (JOIN incorreto muda o que + est\xE1 sendo contado) --\\nPergunta: \\\"Find the name of makers that produced + some cars in 1970.\\\"\\nSQL: SELECT DISTINCT Maker FROM car_makers JOIN car_names + ON car_makers.Id = car_names.MakeId JOIN cars_data ON car_names.MakeId = cars_data.Id + WHERE cars_data.Year = 1970\\nResultado: [('chevrolet',), ('buick',)]\\nVEREDITO: + REPROVADO\\nRaz\xE3o: O JOIN usa car_names.MakeId para conectar a cars_data, + mas cars_data.Id refere-se\\nao ID do carro, n\xE3o do fabricante. O caminho + correto seria via model_list. Os resultados\\nparecem plaus\xEDveis mas derivam + de uma jun\xE7\xE3o incorreta.\\n\\n-- EXEMPLO 5: APROVADO (formato diferente, + resposta correta) --\\nPergunta: \\\"On average, when were the transcripts printed?\\\"\\nSQL: + SELECT AVG(transcript_date) AS average_transcript_date FROM Transcripts\\nResultado: + [('1989.9333333333334',)]\\nVEREDITO: APROVADO\\nRaz\xE3o: O resultado \xE9 + um n\xFAmero que representa a m\xE9dia das datas (formato num\xE9rico do SQLite).\\nEmbora + n\xE3o seja uma data formatada, responde corretamente \xE0 pergunta. Diferen\xE7a + de\\nrepresenta\xE7\xE3o n\xE3o \xE9 motivo de reprova\xE7\xE3o.\\n\\n-- EXEMPLO + 6: APROVADO (query mais simples que o gold, resultado equivalente) --\\nPergunta: + \\\"Which model of car has the minimum horsepower?\\\"\\nSQL: SELECT Model FROM + car_names JOIN cars_data ON car_names.MakeId = cars_data.Id WHERE Horsepower + = (SELECT MIN(Horsepower) FROM cars_data) LIMIT 1\\nResultado: [('triumph',)]\\nVEREDITO: + APROVADO\\nRaz\xE3o: A query retorna corretamente o modelo com menor pot\xEAncia. + O LIMIT 1 garante unicidade\\ne o resultado \xE9 semanticamente correto. Aprovar.\\n\\n=== + CRIT\xC9RIOS DE AVALIA\xC7\xC3O ===\\n\\nREPROVE quando houver:\\n- Escopo incompleto: + query cobre apenas parte do que a pergunta pede\\n- Erro sem\xE2ntico: l\xF3gica + correta na forma mas errada no significado (MIN vs MAX, ANY vs ALL)\\n- JOIN + incorreto que altera os dados sendo agregados ou filtrados\\n- Resultado vazio + quando a pergunta claramente espera dados\\n- Filtro com valor literal diferente + do que est\xE1 no banco\\n- M\xE9trica errada (SUM vs AVG, COUNT vs COUNT DISTINCT, + etc.)\\n- Erro de execu\xE7\xE3o SQL\\n\\nAPROVE quando:\\n- O resultado responde + \xE0 pergunta, mesmo com formato ou representa\xE7\xE3o diferente\\n- H\xE1 + colunas extras que n\xE3o prejudicam a resposta\\n- A precis\xE3o num\xE9rica + difere mas o valor est\xE1 correto\\n- A query \xE9 mais simples que o esperado + mas semanticamente equivalente\\n\\nAvalie com rigor sem\xE2ntico. Resultados + que parecem plaus\xEDveis mas derivam de l\xF3gica\\nincorreta devem ser reprovados. + N\xE3o presuma que uma query bem-formada est\xE1 correta.\\n\\nResponda no formato:\\nVEREDITO: + APROVADO ou REPROVADO\\nFEEDBACK: \",\"role\":\"user\"}],\"model\":\"gpt-4o-mini\",\"stream\":false}" + headers: + Accept: + - a + Accept-Encoding: + - g + Connection: + - k + Content-Length: + - '8' + Content-Type: + - a + Cookie: + - _ + Host: + - a + User-Agent: + - O + X-Stainless-Arch: + - x + X-Stainless-Async: + - f + X-Stainless-Lang: + - p + X-Stainless-OS: + - L + X-Stainless-Package-Version: + - '2' + X-Stainless-Raw-Response: + - t + X-Stainless-Runtime: + - C + X-Stainless-Runtime-Version: + - '3' + x-stainless-retry-count: + - '0' + method: POST + uri: https://api.openai.com/v1/chat/completions + response: + body: + string: !!binary | + H4sIAAAAAAAA/6rmUlBQykxRslJQSs5ILEnOLcjRdck0zy5ySgUAAAD//41TTW/TQBC991eMfE6q + JE3rwK2kqVSBGlpKOUDlTnYnzoK94+6uKyjqfwFx4MQf4Jo/xqyT1CkfEhd/zJs382bnbTk/HyGf + uTcpnR6k/ePZyeJgrJNOZPDsPamwYe0qFh4Fw3YFK0cYKFbtp+mTYW900N9rgJI1FZGWV6E75G5p + rOkOeoNht5d2+6M1e8FGkZe0t/IL8Ll5Rp1W00cJ9zqbSEneY04S2yRJ0HERIwl6b3xAG5JOCyq2 + gWwj/XJyPjk6uZg+hcOX59PLw6MpwDt7PJkcPTscP5coSLKvi4Dw6uwFOPIViwKJOkcBSylDsPwC + Fbm8tpLFzuTGYgHIkRrQAYNd/izJMQQOggjdSH8Pt2S10eyhYgdKTisXNnagDqYwd2g1AwLmjnJc + fl9+401XeZdwPZ6+Pr243hWJ7DTZdQpJu8KU0gAqdEL3sA+VM1aZCo1v23iYs8MSTFxbnAOjEtR0 + U6NeDdaJ88bZoxIqoS4RPC5/aISbmuQzGD/HOxHpuTDKhLUGya59vfzqDO9un7ujee0x7t7WRbEF + oLVyMtE7zcav1sj9w44LzivHM/8bNZmLd/wiE6t58Z3s0weukga9l+dV46X6kT0SKVRWIQv8gZp2 + g146WhVMWg+3cDpYg83ytmj9/bUHH1fMtGzIFH7Lj4lCtSDdclvzYq0NbwE7W3P/KedvtVezG5v/ + T/kWUIoquZ5Z5Ugb9XjkNs1RvOP/Sns450Zw4sndyqXNgiEXd6FpjuKe1YX2n3ygMpOF5eSiH5vr + N6+yEQ4V6dnefprs3O/8AmvySwKMBAAA + headers: + Access-Control-Expose-Headers: + - CF-Ray + CF-Cache-Status: + - DYNAMIC + CF-Ray: + - + Connection: + - keep-alive + Content-Encoding: + - gzip + Content-Type: + - application/json + Date: + - Fri, 22 May 2026 00:10:15 GMT + Server: + - cloudflare + Strict-Transport-Security: + - max-age=31536000; includeSubDomains; preload + Transfer-Encoding: + - chunked + X-Content-Type-Options: + - nosniff + access-control-expose-headers: + - X-Request-ID + - CF-Ray + alt-svc: + - h3=":443"; ma=86400 + openai-organization: + - + openai-processing-ms: + - '1727' + openai-project: + - + openai-version: + - '2020-10-01' + x-openai-proxy-wasm: + - v0.1 + x-ratelimit-limit-requests: + - '10000' + x-ratelimit-limit-tokens: + - '200000' + x-ratelimit-remaining-requests: + - + x-ratelimit-remaining-tokens: + - + x-ratelimit-reset-requests: + - 47.788s + x-ratelimit-reset-tokens: + - 601ms + x-request-id: + - + status: + code: 200 + message: OK +- request: + body: "{\"messages\":[{\"content\":\"Voc\xEA \xE9 um assistente que decide se + os resultados de uma consulta SQL\\ndevem ser acompanhados de um gr\xE1fico + (visualiza\xE7\xE3o).\\n\\nAnalise a pergunta do usu\xE1rio e as caracter\xEDsticas + dos dados retornados.\\n\\n=== PERGUNTA DO USU\xC1RIO ===\\n\\n\\n=== COLUNAS + DO RESULTADO ===\\nproduct_category_name, total_items_sold\\n\\n=== TOTAL DE + LINHAS ===\\n5\\n\\n=== AMOSTRA DOS DADOS ===\\n[{'product_category_name': 'cama_mesa_banho', + 'total_items_sold': 11115}, {'product_category_name': 'beleza_saude', 'total_items_sold': + 9670}, {'product_category_name': 'esporte_lazer', 'total_items_sold': 8641}, + {'product_category_name': 'moveis_decoracao', 'total_items_sold': 8334}, {'product_category_name': + 'informatica_acessorios', 'total_items_sold': 7827}]\\n\\nUm gr\xE1fico \xE9 + \xFAtil quando:\\n- A pergunta envolve compara\xE7\xF5es entre categorias (ex: + vendas por regi\xE3o)\\n- H\xE1 dados temporais ou tend\xEAncias (ex: evolu\xE7\xE3o + ao longo dos meses)\\n- H\xE1 distribui\xE7\xF5es ou rankings (ex: top 10 produtos)\\n- + H\xE1 agrega\xE7\xF5es num\xE9ricas que se beneficiam de visualiza\xE7\xE3o\\n- + O resultado tem mais de 1 linha com pelo menos uma coluna num\xE9rica\\n\\nUm + gr\xE1fico N\xC3O \xE9 \xFAtil quando:\\n- O resultado \xE9 um \xFAnico valor + escalar (ex: total geral)\\n- A pergunta pede um dado espec\xEDfico pontual + (ex: nome de um cliente)\\n- O resultado tem apenas 1 linha\\n- N\xE3o h\xE1 + colunas num\xE9ricas para plotar\\n\\nResponda APENAS com uma palavra: SIM ou + NAO\\n\",\"role\":\"user\"}],\"model\":\"gpt-4o-mini\",\"stream\":false}" + headers: + Accept: + - a + Accept-Encoding: + - g + Connection: + - k + Content-Length: + - '1' + Content-Type: + - a + Cookie: + - _ + Host: + - a + User-Agent: + - O + X-Stainless-Arch: + - x + X-Stainless-Async: + - f + X-Stainless-Lang: + - p + X-Stainless-OS: + - L + X-Stainless-Package-Version: + - '2' + X-Stainless-Raw-Response: + - t + X-Stainless-Runtime: + - C + X-Stainless-Runtime-Version: + - '3' + x-stainless-retry-count: + - '0' + method: POST + uri: https://api.openai.com/v1/chat/completions + response: + body: + string: !!binary | + H4sIAAAAAAAA/4xSXU/CMBR951csfWZmfGSgr2KiUV/0wURDltJeRqFrS9spSvjv3m7AhmJiljTZ + PfecnnN7t50oIoKTq4iwBfWsMDKeiNGqfFi/rpP+5FreDwfLr/Hm5iN/uk1fctINDD1bAvMH1gXT + yAMvtKphZoF6CKq90ehymIzTXloBheYgAy03Ph7quBBKxP2kP4yTUdwb79kLLRg4bHvD3yjaVmfw + qThssJx0D5UCnKM5YO3QhEWrZagQ6pxwnipPug3ItPKgKuvPd49txMK8dDS4U6WULYAqpT0N6SpP + 0z2yO7qQOjdWz9wPKpljOrfIcBgOJ4M3Oq8NqdAdntMqbXkSgKBQYXzm9Qqq6wZpv9YjzZAbtLfH + PBqUbdKge0Yu4+CpkK41LsIoWwBvqM1sacmFbgGdVujfZs5p18GFyv8j3wCMgcHtyYwFLthp4KbN + QljBv9qOQ64MEwf2HXcq8wJseAgOc1rKejGI+3QeigxfKwdrrKi3Y26yS8bxSwcpkM6u8w0AAP// + AwBTHiohKwMAAA== + headers: + Access-Control-Expose-Headers: + - CF-Ray + CF-Cache-Status: + - DYNAMIC + CF-Ray: + - + Connection: + - keep-alive + Content-Encoding: + - gzip + Content-Type: + - application/json + Date: + - Fri, 22 May 2026 00:10:16 GMT + Server: + - cloudflare + Strict-Transport-Security: + - max-age=31536000; includeSubDomains; preload + Transfer-Encoding: + - chunked + X-Content-Type-Options: + - nosniff + access-control-expose-headers: + - X-Request-ID + - CF-Ray + alt-svc: + - h3=":443"; ma=86400 + openai-organization: + - + openai-processing-ms: + - '340' + openai-project: + - + openai-version: + - '2020-10-01' + x-openai-proxy-wasm: + - v0.1 + x-ratelimit-limit-requests: + - '10000' + x-ratelimit-limit-tokens: + - '200000' + x-ratelimit-remaining-requests: + - + x-ratelimit-remaining-tokens: + - + x-ratelimit-reset-requests: + - 54.176s + x-ratelimit-reset-tokens: + - 104ms + x-request-id: + - + status: + code: 200 + message: OK +- request: + body: "{\"messages\":[{\"content\":\"Voc\xEA \xE9 um especialista em visualiza\xE7\xE3o + de dados com Python e matplotlib.\\n\\nSua tarefa: gerar APENAS o bloco de c\xF3digo + Python que cria um gr\xE1fico matplotlib\\npara visualizar os dados descritos + abaixo.\\n\\n=== PERGUNTA DO USU\xC1RIO ===\\n\\n\\n=== SQL EXECUTADA ===\\nSELECT + p.product_category_name, COUNT(oi.order_item_id) AS total_items_sold\\nFROM + order_items oi\\nJOIN orders o ON oi.order_id = o.order_id\\nJOIN products p + ON oi.product_id = p.product_id\\nGROUP BY p.product_category_name\\nORDER BY + total_items_sold DESC\\nLIMIT 5;\\n\\n=== COLUNAS DO RESULTADO ===\\nproduct_category_name, + total_items_sold\\n\\n=== AMOSTRA DOS DADOS (primeiras linhas do CSV) ===\\n[{'product_category_name': + 'cama_mesa_banho', 'total_items_sold': 11115}, {'product_category_name': 'beleza_saude', + 'total_items_sold': 9670}, {'product_category_name': 'esporte_lazer', 'total_items_sold': + 8641}, {'product_category_name': 'moveis_decoracao', 'total_items_sold': 8334}, + {'product_category_name': 'informatica_acessorios', 'total_items_sold': 7827}]\\n\\n=== + TOTAL DE LINHAS ===\\n5\\n\\nRegras:\\n- O c\xF3digo ser\xE1 inserido dentro + de um script que j\xE1 importou pandas, matplotlib e j\xE1\\n carregou o DataFrame + com `df = pd.read_csv(...)`.\\n- Voc\xEA N\xC3O deve importar nada nem carregar + dados. Apenas use a vari\xE1vel `df`.\\n- Use `plt` (j\xE1 importado como `import + matplotlib.pyplot as plt`).\\n- Escolha o tipo de gr\xE1fico mais adequado \xE0 + pergunta e aos dados (barras, linhas, pizza, dispers\xE3o, etc.).\\n- Adicione + t\xEDtulo, labels nos eixos, e legenda quando relevante.\\n- Use cores visualmente + agrad\xE1veis.\\n- Se necess\xE1rio, rotacione labels do eixo X para legibilidade.\\n- + O gr\xE1fico ser\xE1 salvo automaticamente, voc\xEA N\xC3O deve chamar plt.savefig() + nem plt.show().\\n- Responda APENAS com o c\xF3digo Python puro, sem markdown, + sem explica\xE7\xF5es.\\n- Se os dados tiverem muitas categorias (>15), mostre + apenas o top 10-15 mais relevantes.\\n\\nN\xE3o fa\xE7a um gr\xE1fico basico + visualmente, fa\xE7a ele bonito, use cores agradaveis e que ajude o usuario + a entender os dados. Tenta fazer algo com cara profissional! Feito por um analista + apresentando para um grande cliente que julga o livro pela capa.\\n\",\"role\":\"user\"}],\"model\":\"gpt-4o-mini\",\"stream\":false}" + headers: + Accept: + - a + Accept-Encoding: + - g + Connection: + - k + Content-Length: + - '2' + Content-Type: + - a + Cookie: + - _ + Host: + - a + User-Agent: + - O + X-Stainless-Arch: + - x + X-Stainless-Async: + - f + X-Stainless-Lang: + - p + X-Stainless-OS: + - L + X-Stainless-Package-Version: + - '2' + X-Stainless-Raw-Response: + - t + X-Stainless-Runtime: + - C + X-Stainless-Runtime-Version: + - '3' + x-stainless-retry-count: + - '0' + method: POST + uri: https://api.openai.com/v1/chat/completions + response: + body: + string: !!binary | + H4sIAAAAAAAA/6rmUlBQykxRslJQSs5ILEnOLcjRdck0zy71NgQAAAD//4xUTY/TMBC976+wsoek + Ii1pmjZbpByWikVwQHysAGlbRW4yTc26dmS7uylo/ztjp5/QRVxaZ57f85uxZ75/WF5fC/Hl85vm + 3bd69J6+/VQvBl5oGXL+AwqzY/UKiTwwTIoWLhRQA1a1n6bjJLoa9UcOWMkSuKVVtekmsrtignXj + KE66UdrtX23ZS8kK0LjtDj8J+eV+rU9RQoPhKNxFVqA1rQBju00YVJLbiEe1ZtpQYbzwABZSGBDO + +oJVIaENyUjNTU+v5zWXRgcY1uwnZEE/Csmo05mKQnKpNO678y+TyfXNMPJD4l/e3Ez6UeqWcX88 + uhm00SQZDEZuOZ7E6evIn03FnDo+bXq4CsrFnV8rWa4LkxdYqEqqTS7oCvxZSCxopKE8ZwZWOteS + lzbuTGStFfQ0FSimweSGGQ6BfytrMiSTVo1RTUogH+0ZRmryFUTJSqnR1QLzd+nhjbiPR2DV0mT+ + 3J7T2cs2nM6BB/55xROh5MDabFm3NgFLeIfFfub8I1pjWHHvqPrfxVGoa59ZlgxDsqSZr6z5E9nY + FWchFcFSEybsn341Ffb2Nw/oKrORXoXHLl3qQacF0YuBxgQ7tAk65MV+7yMrzTLovIxDpxKisgns + qhOSB2rLZ4xc+a2rAl8YqBNb0flquxJUipUBbZjO/A2SOBOgzYZD5ne7+E15jaJRL8X99qUaK5Fz + upFrNH/8uBUs1praBhNrzo8AKsS2cq6tZlvkad9IXFZY9Ln+g4o9Iphe5tjPGpsbm0YbWXsOfcLf + mWvY9UkPeii0qvFhyntwxw2TuNXzDnPigMZRukXbV78H0mQcnhHMSzCUcX3U815BiyWUB+phQNB1 + yeQRcHGU9t92zmm3qTNR/Y/8ASgKqHEE5rWCkhWnKR+2KbBz9Llt+zI7w54G9YCDETselL2KEhZ0 + zdvp5umNxnGR431VoGrFtiOuzq8GcTJP59AfexdPF78BAAD//wMA3k9YRfAFAAA= + headers: + Access-Control-Expose-Headers: + - CF-Ray + CF-Cache-Status: + - DYNAMIC + CF-Ray: + - + Connection: + - keep-alive + Content-Encoding: + - gzip + Content-Type: + - application/json + Date: + - Fri, 22 May 2026 00:10:21 GMT + Server: + - cloudflare + Strict-Transport-Security: + - max-age=31536000; includeSubDomains; preload + Transfer-Encoding: + - chunked + X-Content-Type-Options: + - nosniff + access-control-expose-headers: + - X-Request-ID + - CF-Ray + alt-svc: + - h3=":443"; ma=86400 + openai-organization: + - + openai-processing-ms: + - '4460' + openai-project: + - + openai-version: + - '2020-10-01' + x-openai-proxy-wasm: + - v0.1 + x-ratelimit-limit-requests: + - '10000' + x-ratelimit-limit-tokens: + - '200000' + x-ratelimit-remaining-requests: + - + x-ratelimit-remaining-tokens: + - + x-ratelimit-reset-requests: + - 53.457s + x-ratelimit-reset-tokens: + - 159ms + x-request-id: + - + status: + code: 200 + message: OK +version: 1 diff --git a/tests/cassettes/test_integracao/test_pergunta_simples.yaml b/tests/cassettes/test_integracao/test_pergunta_simples.yaml index 184e1a8..e25cb1b 100644 --- a/tests/cassettes/test_integracao/test_pergunta_simples.yaml +++ b/tests/cassettes/test_integracao/test_pergunta_simples.yaml @@ -3,16 +3,42 @@ interactions: body: '{"contents": [{"parts": [{"text": "Voc\u00ea \u00e9 o planejador de um sistema que transforma perguntas em consultas SQL.\n\nSeu papel: analisar a situa\u00e7\u00e3o atual e decidir a pr\u00f3xima a\u00e7\u00e3o.\n\nContexto - atual:\n- Pergunta do usu\u00e1rio: \"Quantos pedidos existem no banco?\"\n- - Schema dispon\u00edvel: Sim\n- Feedback do cr\u00edtico: Nenhum\n- Tentativas - realizadas: 0\n- Status atual: schema_obtido\n- Erro anterior: Nenhum\n\nAVALIA\u00c7\u00c3O + atual:\n- Pergunta do usu\u00e1rio: \"Quantos pedidos existem no banco?\"\n\n- + conversa_previa: Nenhuma\n\n- Schema: === SCHEMA SQLITE (INTROSPECCAO REAL) + ===\n\nTabela: customers\n- customer_id: TEXT (PK)\n- customer_unique_id: TEXT\n- + customer_zip_code_prefix: INTEGER\n- customer_city: TEXT\n- customer_state: + TEXT\n\nTabela: geolocation\n- geolocation_zip_code_prefix: INTEGER\n- geolocation_lat: + REAL\n- geolocation_lng: REAL\n- geolocation_city: TEXT\n- geolocation_state: + TEXT\n\nTabela: order_items\n- order_id: TEXT (PK)\n- order_item_id: INTEGER + (PK)\n- product_id: TEXT\n- seller_id: TEXT\n- shipping_limit_date: TEXT\n- + price: REAL\n- freight_value: REAL\n Foreign keys:\n - seller_id -> sellers.seller_id + (on_update=NO ACTION, on_delete=NO ACTION)\n - product_id -> products.product_id + (on_update=NO ACTION, on_delete=NO ACTION)\n - order_id -> orders.order_id + (on_update=NO ACTION, on_delete=NO ACTION)\n\nTabela: order_payments\n- order_id: + TEXT (PK)\n- payment_sequential: INTEGER (PK)\n- payment_type: TEXT\n- payment_installments: + INTEGER\n- payment_value: REAL\n Foreign keys:\n - order_id -> orders.order_id + (on_update=NO ACTION, on_delete=NO ACTION)\n\nTabela: order_reviews\n- review_id: + TEXT (PK)\n- order_id: TEXT\n- review_score: INTEGER\n- review_comment_title: + TEXT\n- review_comment_message: TEXT\n- review_creation_date: TEXT\n- review_answer_timestamp: + TEXT\n Foreign keys:\n - order_id -> orders.order_id (on_update=NO ACTION, + on_delete=NO ACTION)\n\nTabela: orders\n- order_id: TEXT (PK)\n- customer_id: + TEXT\n- order_status: TEXT\n- order_purchase_timestamp: TEXT\n- order_approved_at: + TEXT\n- order_delivered_carrier_date: TEXT\n- order_delivered_customer_date: + TEXT\n- order_estimated_delivery_date: TEXT\n Foreign keys:\n - customer_id + -> customers.customer_id (on_update=NO ACTION, on_delete=NO ACTION)\n\nTabela: + products\n- product_id: TEXT (PK)\n- product_category_name: TEXT\n- product_name_length: + REAL\n- product_description_length: REAL\n- product_photos_qty: REAL\n- product_weight_g: + REAL\n- product_length_cm: REAL\n- product_height_cm: REAL\n- product_width_cm: + REAL\n\nTabela: sellers\n- seller_id: TEXT (PK)\n- seller_zip_code_prefix: INTEGER\n- + seller_city: TEXT\n- seller_state: TEXT\n\n\n- Feedback do cr\u00edtico: Nenhum\n- + Tentativas realizadas: 0\n- Status atual: schema_obtido\n- Erro anterior: Nenhum\n\nAVALIA\u00c7\u00c3O CR\u00cdTICA:\nVerifique se a \"Pergunta do usu\u00e1rio\" pode ser respondida com as tabelas e colunas do Schema.\nSe houver ambiguidade, conceitos n\u00e3o mapeados no banco de dados, ou se a inten\u00e7\u00e3o do usu\u00e1rio n\u00e3o estiver clara, voc\u00ea DEVE pedir mais informa\u00e7\u00f5es.\n\nResponda - EXATAMENTE no formato JSON abaixo, sem formata\u00e7\u00e3o markdown (```json):\n{\n \"decisao\": + EXATAMENTE no formato JSON abaixo, sem formata\u00e7\u00e3o markdown (```json):\n{{\n \"decisao\": \"escolha_uma_opcao\",\n \"pergunta_ao_usuario\": \"escreva a pergunta aqui - se precisar de ajuda, ou deixe vazio se n\u00e3o precisar\"\n}\n\nOp\u00e7\u00f5es + se precisar de ajuda, ou deixe vazio se n\u00e3o precisar\"\n}}\n\nOp\u00e7\u00f5es v\u00e1lidas para ''decis\u00e3o'':\n- \"pronto_codificacao\" \u2192 se temos schema, a pergunta faz sentido e devemos gerar/regenerar SQL\n- \"revisando_estrategia\" \u2192 se o cr\u00edtico reprovou e devemos tentar uma abordagem diferente\n- @@ -22,32 +48,33 @@ interactions: 1}}' headers: Accept: - - '*/*' + - '*' Accept-Encoding: - - gzip, deflate, zstd + - g Connection: - - keep-alive + - k Content-Length: - - '1551' + - '3' Content-Type: - - application/json + - a Host: - - generativelanguage.googleapis.com + - g User-Agent: - - google-genai-sdk/1.68.0 gl-python/3.10.12 + - g x-goog-api-client: - - google-genai-sdk/1.68.0 gl-python/3.10.12 + - g method: POST uri: https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash:generateContent response: body: string: !!binary | - H4sIAAAAAAAC/12RUU/CMBSF3/crlj6DAUQIvgoxPBhQp9E4s9ysl61xtEt7Z9Bl/912Y1DckqU9 - 5/Te3m91EIYsBckFB0LDbsMPq4Rh3X6dpyShJGv0khVL0HTOdk/trW2E8OAOsTqWbh8zjqkwoGIr - xqzUtqxKUsXFTqSQOn3QJ0vUWSUJElBJZSrQ4njKvrJhXp/mtP4cnG+nVYGu9V5xLPp40wfYTkhh - 8icEo6SLPUebLTu5QnI8WHkU9A3a0qwykOEDElhOcKLhBtmXFKkvlHeqajldj2ddNY/rRWCyOPqk - CIoLaz7rPa+wWdq2ovCBe//CTgmFoB83SrR6i5hHgv7dq2cReMgY5arKcrq843QyDY7QOo6vqI3o - gGW4twiHk6ub4a4Ak7cdmUZTKmlwzV1m+R5tAJLxavH98jtfbxfDezN6VCxogj/pnKO9cwIAAA== + H4sIAAAAAAAC/2WRX0+DMBTF3/kUpM+bkamZ+GY2TfZgXBSNiZjlht5BM9ZiezFTwne3hbF1CgmB + c07vnx9NEIYsA8kFB0LDbsJ3q4Rh0z2dpyShJGsMkhUr0HTM9lfjvdsI4c4dYk0q3XfKOGbCgEqt + mLJK27JqlSku1iKDzOmjIVmhzmtJsAK1qk0NWuxP2Vu2zOvTHt4/RsfptCrRtd4qjuUQb4cAWwsp + TPGEYJR0sefkcckOrpAcd1Y+D4YGXWlWG8jxAQksJzjQcItsK0rUBuVM1R2nOJ721TyuJ4FJvPdJ + EZQnVhRF56N/lc3c9hWlT9z7GXZNKAV9u12Su7eEeSjoz2ADjMBjxqhQdV7Q6ZDXl3vXoP4SGSYC + tetgyC4FmjuuHdMe8ytqI3qeOW4t4fHk7Gq8LsEU3TxMo6mUNLjgLhNXmyWo2cW90fJnulhG8DkX + ty8saINfO4HuK5ICAAA= headers: Alt-Svc: - h3=":443"; ma=2592000,h3-29=":443"; ma=2592000 @@ -56,11 +83,11 @@ interactions: Content-Type: - application/json; charset=UTF-8 Date: - - Thu, 02 Apr 2026 14:58:23 GMT + - Thu, 21 May 2026 23:49:11 GMT Server: - scaffolding on HTTPServer2 Server-Timing: - - gfet4t7; dur=2624 + - gfet4t7; dur=1330 Transfer-Encoding: - chunked Vary: @@ -114,37 +141,39 @@ interactions: REAL\n- product_height_cm: REAL\n- product_width_cm: REAL\n\nTabela: sellers\n- seller_id: TEXT (PK)\n- seller_zip_code_prefix: INTEGER\n- seller_city: TEXT\n- seller_state: TEXT\n\n\n=== PERGUNTA DO USU\u00c1RIO ===\nQuantos pedidos existem - no banco?\n\n\n\nResponda APENAS com a consulta SQL, sem markdown, sem explica\u00e7\u00e3o."}], - "role": "user"}], "safetySettings": [], "generationConfig": {"temperature": - 0.7, "candidateCount": 1}}' + no banco?\n\n=== CONVERSA PR\u00c9VIA (CONTEXTO ADICIONAL) ===\nNenhuma\n\n=== + HIST\u00d3RICO DE TENTATIVAS ANTERIORES ===\nNenhuma tentativa anterior.\n\nResponda + APENAS com a consulta SQL, sem markdown, sem explica\u00e7\u00e3o."}], "role": + "user"}], "safetySettings": [], "generationConfig": {"temperature": 0.7, "candidateCount": + 1}}' headers: Accept: - - '*/*' + - '*' Accept-Encoding: - - gzip, deflate, zstd + - g Connection: - - keep-alive + - k Content-Length: - - '3054' + - '3' Content-Type: - - application/json + - a Host: - - generativelanguage.googleapis.com + - g User-Agent: - - google-genai-sdk/1.68.0 gl-python/3.10.12 + - g x-goog-api-client: - - google-genai-sdk/1.68.0 gl-python/3.10.12 + - g method: POST uri: https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash:generateContent response: body: string: !!binary | - H4sIAAAAAAAC/2WQUUvDMBSF3/srQp4UVpHhsPNNaoWCs3NmoohINHdtNEtKcyvTsf9u2q5bpn0o - yb3n3pPzrQNC6DvXQgqOYOkFeXYVQtbtv+kZjaDRNfqSK5a8wr22+9be2UkQVs0QvU9ukpiROJvf - siNTCahepTgm17NsQtqrpd7gZnd+GeztKqOg2bU0AlQv3/QCupBa2mIG3BrdWrJsSnddqQWsXPk0 - 6A3a1bS2PIcJIHfB+S4eLSuzLJGZT9Cxqdvg0eis2+aBOhCMt200yNXhaBQN/u21V85VKh+gx9aF - 5Erid5OEJY+MeiDwz7N6FIFHjGJh6rzAwycOR8EWWUfxwYGXHa4clg5gODwZhQvFbdEa0gpsabSF - VDSaJGUZfwrVZfj28XOeTr/G07i+i2iwCX4BO0FHyUICAAA= + H4sIAAAAAAAC/12RX0vDMBTF3/spQp4UnIiwaX2TOmGyf8xMBBEJy10b1yUluZPp2Hc3Sc2W2YeS + 3HNyb84vu4wQuuBKSMERLL0jb65CyC78vaYVgkInxJIrNtzg0dt+u2TtLAhbf4g+94f9gpFiMh+z + M20EmA8pzsnjbDIiYWtpcnB/WL9fHMcZXYPvtdYC6mjfRwNdSiVtNQNutQoj2WRKD6pUAraufJXF + AaE13VhewgiQu+D8EI82Rq8bZHoFqtCbEPw277bdElAnhvxPRo28PlV6UUv62gc3VdYpwIStC8lr + id8+Ceu/MpqAwH/XiiiyhBjFSm/KCk+v2IsRLJgvuQAmwfgJFl0kboSnGoi2kF/cu8iWZglrx7dz + fdntLGtuq3AfasA2WlkYCO/Ju6sp18VsnA/Nz81g2jx9ivLevew++wUPh1vwYQIAAA== headers: Alt-Svc: - h3=":443"; ma=2592000,h3-29=":443"; ma=2592000 @@ -153,11 +182,11 @@ interactions: Content-Type: - application/json; charset=UTF-8 Date: - - Thu, 02 Apr 2026 14:58:24 GMT + - Thu, 21 May 2026 23:49:12 GMT Server: - scaffolding on HTTPServer2 Server-Timing: - - gfet4t7; dur=839 + - gfet4t7; dur=1178 Transfer-Encoding: - chunked Vary: @@ -179,45 +208,126 @@ interactions: body: '{"contents": [{"parts": [{"text": "Voc\u00ea \u00e9 um revisor de qualidade para consultas SQL geradas por IA.\n\nSua tarefa: avaliar se a consulta SQL e seus resultados respondem adequadamente\n\u00e0 pergunta original do usu\u00e1rio.\n\n=== - PERGUNTA DO USU\u00c1RIO ===\nQuantos pedidos existem no banco?\n\n=== SQL GERADA - ===\nSELECT COUNT(order_id) FROM orders\n\n=== RESULTADO DA EXECU\u00c7\u00c3O + PERGUNTA DO USU\u00c1RIO ===\nQuantos pedidos existem no banco?\n\n=== SCHEMA + DO BANCO ===\n=== SCHEMA SQLITE (INTROSPECCAO REAL) ===\n\nTabela: customers\n- + customer_id: TEXT (PK)\n- customer_unique_id: TEXT\n- customer_zip_code_prefix: + INTEGER\n- customer_city: TEXT\n- customer_state: TEXT\n\nTabela: geolocation\n- + geolocation_zip_code_prefix: INTEGER\n- geolocation_lat: REAL\n- geolocation_lng: + REAL\n- geolocation_city: TEXT\n- geolocation_state: TEXT\n\nTabela: order_items\n- + order_id: TEXT (PK)\n- order_item_id: INTEGER (PK)\n- product_id: TEXT\n- seller_id: + TEXT\n- shipping_limit_date: TEXT\n- price: REAL\n- freight_value: REAL\n Foreign + keys:\n - seller_id -> sellers.seller_id (on_update=NO ACTION, on_delete=NO + ACTION)\n - product_id -> products.product_id (on_update=NO ACTION, on_delete=NO + ACTION)\n - order_id -> orders.order_id (on_update=NO ACTION, on_delete=NO + ACTION)\n\nTabela: order_payments\n- order_id: TEXT (PK)\n- payment_sequential: + INTEGER (PK)\n- payment_type: TEXT\n- payment_installments: INTEGER\n- payment_value: + REAL\n Foreign keys:\n - order_id -> orders.order_id (on_update=NO ACTION, + on_delete=NO ACTION)\n\nTabela: order_reviews\n- review_id: TEXT (PK)\n- order_id: + TEXT\n- review_score: INTEGER\n- review_comment_title: TEXT\n- review_comment_message: + TEXT\n- review_creation_date: TEXT\n- review_answer_timestamp: TEXT\n Foreign + keys:\n - order_id -> orders.order_id (on_update=NO ACTION, on_delete=NO ACTION)\n\nTabela: + orders\n- order_id: TEXT (PK)\n- customer_id: TEXT\n- order_status: TEXT\n- + order_purchase_timestamp: TEXT\n- order_approved_at: TEXT\n- order_delivered_carrier_date: + TEXT\n- order_delivered_customer_date: TEXT\n- order_estimated_delivery_date: + TEXT\n Foreign keys:\n - customer_id -> customers.customer_id (on_update=NO + ACTION, on_delete=NO ACTION)\n\nTabela: products\n- product_id: TEXT (PK)\n- + product_category_name: TEXT\n- product_name_length: REAL\n- product_description_length: + REAL\n- product_photos_qty: REAL\n- product_weight_g: REAL\n- product_length_cm: + REAL\n- product_height_cm: REAL\n- product_width_cm: REAL\n\nTabela: sellers\n- + seller_id: TEXT (PK)\n- seller_zip_code_prefix: INTEGER\n- seller_city: TEXT\n- + seller_state: TEXT\n\n\n=== CONVERSA COM O AGENTE (se houver) ===\nNenhuma\n\n=== + SQL GERADA ===\nSELECT COUNT(order_id) FROM orders\n\n=== RESULTADO DA EXECU\u00c7\u00c3O ===\nStatus: exec_ok\nTotal de linhas: 1\nAmostra dos resultados (primeiras - linhas):\n[{''COUNT(order_id)'': 99441}]\n\n=== ERROS (se houver) ===\nNenhum\n\nAvalie:\n1. - A SQL responde \u00e0 pergunta do usu\u00e1rio?\n2. Os resultados fazem sentido?\n3. - H\u00e1 algum erro l\u00f3gico ou de interpreta\u00e7\u00e3o?\n\nResponda no - formato:\nVEREDITO: APROVADO ou REPROVADO\nFEEDBACK: (SELECT MAX(Population) FROM country WHERE Continent=''Africa'')\nResultado: + [] (vazio)\nVEREDITO: REPROVADO\nRaz\u00e3o: \"Larger than any country in Africa\" + significa maior que pelo menos um pa\u00eds africano (MIN),\nn\u00e3o maior + que todos os pa\u00edses africanos (MAX). A l\u00f3gica est\u00e1 semanticamente + errada.\n\n-- EXEMPLO 3: REPROVADO (resultado vazio suspeito) --\nPergunta: + \"Find the last name of students who live in North Carolina and are not enrolled + in any degree.\"\nSQL: SELECT last_name FROM Students WHERE state_province_county + = ''North Carolina'' AND ...\nResultado: [] (vazio)\nVEREDITO: REPROVADO\nRaz\u00e3o: + Resultado vazio quando a pergunta espera dados reais \u00e9 suspeito. Verifique + se o filtro\nde string corresponde exatamente ao valor no banco (ex: ''NorthCarolina'' + vs ''North Carolina'').\n\n-- EXEMPLO 4: REPROVADO (JOIN incorreto muda o que + est\u00e1 sendo contado) --\nPergunta: \"Find the name of makers that produced + some cars in 1970.\"\nSQL: SELECT DISTINCT Maker FROM car_makers JOIN car_names + ON car_makers.Id = car_names.MakeId JOIN cars_data ON car_names.MakeId = cars_data.Id + WHERE cars_data.Year = 1970\nResultado: [(''chevrolet'',), (''buick'',)]\nVEREDITO: + REPROVADO\nRaz\u00e3o: O JOIN usa car_names.MakeId para conectar a cars_data, + mas cars_data.Id refere-se\nao ID do carro, n\u00e3o do fabricante. O caminho + correto seria via model_list. Os resultados\nparecem plaus\u00edveis mas derivam + de uma jun\u00e7\u00e3o incorreta.\n\n-- EXEMPLO 5: APROVADO (formato diferente, + resposta correta) --\nPergunta: \"On average, when were the transcripts printed?\"\nSQL: + SELECT AVG(transcript_date) AS average_transcript_date FROM Transcripts\nResultado: + [(''1989.9333333333334'',)]\nVEREDITO: APROVADO\nRaz\u00e3o: O resultado \u00e9 + um n\u00famero que representa a m\u00e9dia das datas (formato num\u00e9rico + do SQLite).\nEmbora n\u00e3o seja uma data formatada, responde corretamente + \u00e0 pergunta. Diferen\u00e7a de\nrepresenta\u00e7\u00e3o n\u00e3o \u00e9 + motivo de reprova\u00e7\u00e3o.\n\n-- EXEMPLO 6: APROVADO (query mais simples + que o gold, resultado equivalente) --\nPergunta: \"Which model of car has the + minimum horsepower?\"\nSQL: SELECT Model FROM car_names JOIN cars_data ON car_names.MakeId + = cars_data.Id WHERE Horsepower = (SELECT MIN(Horsepower) FROM cars_data) LIMIT + 1\nResultado: [(''triumph'',)]\nVEREDITO: APROVADO\nRaz\u00e3o: A query retorna + corretamente o modelo com menor pot\u00eancia. O LIMIT 1 garante unicidade\ne + o resultado \u00e9 semanticamente correto. Aprovar.\n\n=== CRIT\u00c9RIOS DE + AVALIA\u00c7\u00c3O ===\n\nREPROVE quando houver:\n- Escopo incompleto: query + cobre apenas parte do que a pergunta pede\n- Erro sem\u00e2ntico: l\u00f3gica + correta na forma mas errada no significado (MIN vs MAX, ANY vs ALL)\n- JOIN + incorreto que altera os dados sendo agregados ou filtrados\n- Resultado vazio + quando a pergunta claramente espera dados\n- Filtro com valor literal diferente + do que est\u00e1 no banco\n- M\u00e9trica errada (SUM vs AVG, COUNT vs COUNT + DISTINCT, etc.)\n- Erro de execu\u00e7\u00e3o SQL\n\nAPROVE quando:\n- O resultado + responde \u00e0 pergunta, mesmo com formato ou representa\u00e7\u00e3o diferente\n- + H\u00e1 colunas extras que n\u00e3o prejudicam a resposta\n- A precis\u00e3o + num\u00e9rica difere mas o valor est\u00e1 correto\n- A query \u00e9 mais simples + que o esperado mas semanticamente equivalente\n\nAvalie com rigor sem\u00e2ntico. + Resultados que parecem plaus\u00edveis mas derivam de l\u00f3gica\nincorreta + devem ser reprovados. N\u00e3o presuma que uma query bem-formada est\u00e1 correta.\n\nResponda + no formato:\nVEREDITO: APROVADO ou REPROVADO\nFEEDBACK: "}], "role": "user"}], "safetySettings": [], "generationConfig": {"temperature": 0.7, "candidateCount": 1}}' headers: Accept: - - '*/*' + - '*' Accept-Encoding: - - gzip, deflate, zstd + - g Connection: - - keep-alive + - k Content-Length: - - '933' + - '7' Content-Type: - - application/json + - a Host: - - generativelanguage.googleapis.com + - g User-Agent: - - google-genai-sdk/1.68.0 gl-python/3.10.12 + - g x-goog-api-client: - - google-genai-sdk/1.68.0 gl-python/3.10.12 + - g method: POST uri: https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash:generateContent response: body: string: !!binary | - H4sIAAAAAAAC/2VSwU7bQBC9+ytGe04iQLQp3AJ2pUArh2BFlSgSUzyxLewda3ctAVH+pVX/otf8 - WGftOjHUB2s08/bNvDezCQDUI+q0SNGRVedwJxmATfv3NdaOtJNCn5JkjcYdsN23GcQCcfTsH6lV - tIzCeRKfw2yxjFezMP6uP0dReDG7vJYcCL9tSodwe/MFMjKYIhiyNeuUIC0MOaykP8HuJ9RkskYL - NmVobLP7ZQoeeQYnAlgCc4Az6N2figyD8NQk+tiCRnD4g0qEBzYpGfswgdj6fn4Ej7C730JUopGY - YI2vVIEVQnk+gjUbTY/kezE4dliCTOpn5okayN/u4/vRwTTDJXlHKk6p7OHbHqDWhS5sviS0rD3s - NokXal8txI5nSR8FfYOWWjUWM/oqqmV9uF+Sqg1XtUv4ifQlN+36To4+dWyDdb8BnJ79q7fK3pSm - x9PRf8Q2lLZFObyDwYmISiwL9+KlJNG3RA2ccO/m6r0IBpYpl3OT5e7djB+9Aa1pnY8rWWLRGZZR - JRaOTyYfxusSbd52VN0tWZqnHhPJJSK+nMbjMH+dzhdnV01d3TypYBv8BcYwIfgKAwAA + H4sIAAAAAAAC/2VSzW7bMAy++ykInTYgCdYUQbfessQBgrZTmnjBsB8sWsQkQm3JkOQhXZB32bDD + 3mFXv9gou07dzgeDIj9+JD/yEAGwtdBSSeHRsUv4RB6AQ/UPMaM9ak+BxkXOXFj/iK2/Q8smiMd9 + SGLLeB6Ppwm/hOFszpfDMf+sJ3E8fjscXZEPiN8VqRewuL2G1SK+jkcJjPj7d8kLYyXar0q+hMmc + 30D1dCtA58tflGctUhqCVMHIqEkEiy43WiKUPyFHuy20F51Qw9OIBgzo8m+G1oA3XqRAwBxpdONA + C/DiG6YCVg+FesADX2iOUiWV2uO6KP+Uvw1sjNW4xhYh7oU3LcIea+lxPNlfOo8qWpNikCgzEtMG + fmwAbKO0crs5Cmd0gC0SPmOnqKIp9+R+FTUFKmpWOLHFGxKE9ilOW2O5NVnuE3OHemSKap9nbwav + a7rWATxBnOKVWk9C/f7Zeec/ajemwiptn0bramhOkSp/H4ZJ4g8Ja2nhn3fWyBG1VGN+Z4rtzrtn + c1w8hB3a72qNiUIbariwdGFlkLaStVZ6SctVtaRbzEjkbr836G5S4XZVR6w+IodTGTDdj3czYQo7 + GVzFPy6ms3Nuubt1LDpG/wDT+1NSPQMAAA== headers: Alt-Svc: - h3=":443"; ma=2592000,h3-29=":443"; ma=2592000 @@ -226,11 +336,11 @@ interactions: Content-Type: - application/json; charset=UTF-8 Date: - - Thu, 02 Apr 2026 14:58:27 GMT + - Thu, 21 May 2026 23:49:15 GMT Server: - scaffolding on HTTPServer2 Server-Timing: - - gfet4t7; dur=2951 + - gfet4t7; dur=1978 Transfer-Encoding: - chunked Vary: diff --git a/tests/cassettes/test_integracao/test_pergunta_simples__openai-gpt-4o-mini.yaml b/tests/cassettes/test_integracao/test_pergunta_simples__openai-gpt-4o-mini.yaml new file mode 100644 index 0000000..78f6a1e --- /dev/null +++ b/tests/cassettes/test_integracao/test_pergunta_simples__openai-gpt-4o-mini.yaml @@ -0,0 +1,493 @@ +interactions: +- request: + body: "{\"messages\":[{\"content\":\"Voc\xEA \xE9 o planejador de um sistema que + transforma perguntas em consultas SQL.\\n\\nSeu papel: analisar a situa\xE7\xE3o + atual e decidir a pr\xF3xima a\xE7\xE3o.\\n\\nContexto atual:\\n- Pergunta do + usu\xE1rio: \\\"Quantos pedidos existem no banco?\\\"\\n\\n- conversa_previa: + Nenhuma\\n\\n- Schema: === SCHEMA SQLITE (INTROSPECCAO REAL) ===\\n\\nTabela: + customers\\n- customer_id: TEXT (PK)\\n- customer_unique_id: TEXT\\n- customer_zip_code_prefix: + INTEGER\\n- customer_city: TEXT\\n- customer_state: TEXT\\n\\nTabela: geolocation\\n- + geolocation_zip_code_prefix: INTEGER\\n- geolocation_lat: REAL\\n- geolocation_lng: + REAL\\n- geolocation_city: TEXT\\n- geolocation_state: TEXT\\n\\nTabela: order_items\\n- + order_id: TEXT (PK)\\n- order_item_id: INTEGER (PK)\\n- product_id: TEXT\\n- + seller_id: TEXT\\n- shipping_limit_date: TEXT\\n- price: REAL\\n- freight_value: + REAL\\n Foreign keys:\\n - seller_id -> sellers.seller_id (on_update=NO ACTION, + on_delete=NO ACTION)\\n - product_id -> products.product_id (on_update=NO ACTION, + on_delete=NO ACTION)\\n - order_id -> orders.order_id (on_update=NO ACTION, + on_delete=NO ACTION)\\n\\nTabela: order_payments\\n- order_id: TEXT (PK)\\n- + payment_sequential: INTEGER (PK)\\n- payment_type: TEXT\\n- payment_installments: + INTEGER\\n- payment_value: REAL\\n Foreign keys:\\n - order_id -> orders.order_id + (on_update=NO ACTION, on_delete=NO ACTION)\\n\\nTabela: order_reviews\\n- review_id: + TEXT (PK)\\n- order_id: TEXT\\n- review_score: INTEGER\\n- review_comment_title: + TEXT\\n- review_comment_message: TEXT\\n- review_creation_date: TEXT\\n- review_answer_timestamp: + TEXT\\n Foreign keys:\\n - order_id -> orders.order_id (on_update=NO ACTION, + on_delete=NO ACTION)\\n\\nTabela: orders\\n- order_id: TEXT (PK)\\n- customer_id: + TEXT\\n- order_status: TEXT\\n- order_purchase_timestamp: TEXT\\n- order_approved_at: + TEXT\\n- order_delivered_carrier_date: TEXT\\n- order_delivered_customer_date: + TEXT\\n- order_estimated_delivery_date: TEXT\\n Foreign keys:\\n - customer_id + -> customers.customer_id (on_update=NO ACTION, on_delete=NO ACTION)\\n\\nTabela: + products\\n- product_id: TEXT (PK)\\n- product_category_name: TEXT\\n- product_name_length: + REAL\\n- product_description_length: REAL\\n- product_photos_qty: REAL\\n- product_weight_g: + REAL\\n- product_length_cm: REAL\\n- product_height_cm: REAL\\n- product_width_cm: + REAL\\n\\nTabela: sellers\\n- seller_id: TEXT (PK)\\n- seller_zip_code_prefix: + INTEGER\\n- seller_city: TEXT\\n- seller_state: TEXT\\n\\n\\n- Feedback do cr\xEDtico: + Nenhum\\n- Tentativas realizadas: 0\\n- Status atual: schema_obtido\\n- Erro + anterior: Nenhum\\n\\nAVALIA\xC7\xC3O CR\xCDTICA:\\nVerifique se a \\\"Pergunta + do usu\xE1rio\\\" pode ser respondida com as tabelas e colunas do Schema.\\nSe + houver ambiguidade, conceitos n\xE3o mapeados no banco de dados, ou se a inten\xE7\xE3o + do usu\xE1rio n\xE3o estiver clara, voc\xEA DEVE pedir mais informa\xE7\xF5es.\\n\\nResponda + EXATAMENTE no formato JSON abaixo, sem formata\xE7\xE3o markdown (```json):\\n{{\\n + \ \\\"decisao\\\": \\\"escolha_uma_opcao\\\",\\n \\\"pergunta_ao_usuario\\\": + \\\"escreva a pergunta aqui se precisar de ajuda, ou deixe vazio se n\xE3o precisar\\\"\\n}}\\n\\nOp\xE7\xF5es + v\xE1lidas para 'decis\xE3o':\\n- \\\"pronto_codificacao\\\" \u2192 se temos + schema, a pergunta faz sentido e devemos gerar/regenerar SQL\\n- \\\"revisando_estrategia\\\" + \u2192 se o cr\xEDtico reprovou e devemos tentar uma abordagem diferente\\n- + \\\"necessita_ajuda\\\" \u2192 a pergunta n\xE3o \xE9 clara, n\xE3o faz sentido, + falta contexto ou n\xE3o h\xE1 dados no schema para responder.\\n\",\"role\":\"user\"}],\"model\":\"gpt-4o-mini\",\"stream\":false}" + headers: + Accept: + - a + Accept-Encoding: + - g + Connection: + - k + Content-Length: + - '3' + Content-Type: + - a + Host: + - a + User-Agent: + - O + X-Stainless-Arch: + - x + X-Stainless-Async: + - f + X-Stainless-Lang: + - p + X-Stainless-OS: + - L + X-Stainless-Package-Version: + - '2' + X-Stainless-Raw-Response: + - t + X-Stainless-Runtime: + - C + X-Stainless-Runtime-Version: + - '3' + x-stainless-retry-count: + - '0' + method: POST + uri: https://api.openai.com/v1/chat/completions + response: + body: + string: !!binary | + H4sIAAAAAAAA/6rmUlBQykxRslJQSs5ILEnOLcjRdck0z/bzTAEAAAD//4xSy07DMBC89yuQzwlq + m0LDFXFBcOsNiizH3qQGx7a8DipC/XfWSUvCS0KRLGVmZ70z3s1t8aCazd3r83Uh1eJmb/b31YZl + SeGqZ5DxpDqXjnQQtbMDLQOICKnrYr2+Ws3Li7LoidYpMEnW+JivXN5qq/PlfLnK5+t8UR7VO6cl + IJU90u/Z2Xt/pjmtgj3B8+yEtIAoGiDsVERgcCYhTCBqjMJGlo2kdDaC7Ud/39oEbZkCqVG4LYFb + 5gNVOC6d0rWWQiY8O1V6CE1no+DC8Q47EfRRRZ89TO8JUHcoklfbGTMhhLUuipRV7/DpyBw+PRnX + 0AwVfpOymrLCHadokXKm+TE6z3r2QOdTn133JY5kpvWRR/cC/XXl5eXQj41PNrLL4khGmtBMVOVV + 9ks/riAKbXCSPqPAdqBG6fhUolPaTYjZxPXPaX7rPTjXtvlP+5GQEjwtI/cBlJZfHY9lAdJG/1X2 + mXI/MEMIr7SiPGoI6SUU1KIzw54xfMMILafnaiD4oIdlqz0XRbWoVhdSlWx2mH0AAAD//wMAQKe5 + iXoDAAA= + headers: + Access-Control-Expose-Headers: + - CF-Ray + CF-Cache-Status: + - DYNAMIC + CF-Ray: + - + Connection: + - keep-alive + Content-Encoding: + - gzip + Content-Type: + - application/json + Date: + - Fri, 22 May 2026 00:09:44 GMT + Server: + - cloudflare + Strict-Transport-Security: + - max-age=31536000; includeSubDomains; preload + Transfer-Encoding: + - chunked + X-Content-Type-Options: + - nosniff + access-control-expose-headers: + - X-Request-ID + - CF-Ray + alt-svc: + - h3=":443"; ma=86400 + openai-organization: + - + openai-processing-ms: + - '734' + openai-project: + - + openai-version: + - '2020-10-01' + set-cookie: + - __cf_bm=xx9AsHIRa0iP5Mq8PaM5py9mwsXqNpBvtk8M.D9ORa0-1779408583.343677-1.0.1.1-R8hyMWGKmashuagH53gAoTtnbiesDED8c2FEXwo76FQiGGjlAiSxcbvNZuX22eg7m03oTbtijpQUKXEts462TulxieWTFlCVoSNwQUSTeWl7lBnYK330FHt.sK79P77P; + HttpOnly; SameSite=None; Secure; Path=/; Domain=api.openai.com; Expires=Fri, + 22 May 2026 00:39:44 GMT + x-openai-proxy-wasm: + - v0.1 + x-ratelimit-limit-requests: + - '10000' + x-ratelimit-limit-tokens: + - '200000' + x-ratelimit-remaining-requests: + - + x-ratelimit-remaining-tokens: + - + x-ratelimit-reset-requests: + - 8.64s + x-ratelimit-reset-tokens: + - 247ms + x-request-id: + - + status: + code: 200 + message: OK +- request: + body: "{\"messages\":[{\"content\":\"Voc\xEA \xE9 um especialista em SQL para + bancos SQLite.\\n\\nSua tarefa: gerar UMA \xFAnica consulta SQL SELECT que responda + \xE0 pergunta do usu\xE1rio,\\nusando o schema do banco de dados fornecido abaixo.\\n\\nRegras:\\n- + Gere APENAS uma consulta SELECT (ou WITH/CTE seguido de SELECT).\\n- N\xC3O + use INSERT, UPDATE, DELETE, DROP, ALTER ou qualquer comando de escrita.\\n- + N\xC3O inclua explica\xE7\xF5es, apenas a SQL pura.\\n- Use nomes de tabelas + e colunas EXATAMENTE como aparecem no schema.\\n- Se a pergunta for amb\xEDgua, + fa\xE7a a interpreta\xE7\xE3o mais razo\xE1vel.\\n\\n=== SCHEMA DO BANCO ===\\n=== + SCHEMA SQLITE (INTROSPECCAO REAL) ===\\n\\nTabela: customers\\n- customer_id: + TEXT (PK)\\n- customer_unique_id: TEXT\\n- customer_zip_code_prefix: INTEGER\\n- + customer_city: TEXT\\n- customer_state: TEXT\\n\\nTabela: geolocation\\n- geolocation_zip_code_prefix: + INTEGER\\n- geolocation_lat: REAL\\n- geolocation_lng: REAL\\n- geolocation_city: + TEXT\\n- geolocation_state: TEXT\\n\\nTabela: order_items\\n- order_id: TEXT + (PK)\\n- order_item_id: INTEGER (PK)\\n- product_id: TEXT\\n- seller_id: TEXT\\n- + shipping_limit_date: TEXT\\n- price: REAL\\n- freight_value: REAL\\n Foreign + keys:\\n - seller_id -> sellers.seller_id (on_update=NO ACTION, on_delete=NO + ACTION)\\n - product_id -> products.product_id (on_update=NO ACTION, on_delete=NO + ACTION)\\n - order_id -> orders.order_id (on_update=NO ACTION, on_delete=NO + ACTION)\\n\\nTabela: order_payments\\n- order_id: TEXT (PK)\\n- payment_sequential: + INTEGER (PK)\\n- payment_type: TEXT\\n- payment_installments: INTEGER\\n- payment_value: + REAL\\n Foreign keys:\\n - order_id -> orders.order_id (on_update=NO ACTION, + on_delete=NO ACTION)\\n\\nTabela: order_reviews\\n- review_id: TEXT (PK)\\n- + order_id: TEXT\\n- review_score: INTEGER\\n- review_comment_title: TEXT\\n- + review_comment_message: TEXT\\n- review_creation_date: TEXT\\n- review_answer_timestamp: + TEXT\\n Foreign keys:\\n - order_id -> orders.order_id (on_update=NO ACTION, + on_delete=NO ACTION)\\n\\nTabela: orders\\n- order_id: TEXT (PK)\\n- customer_id: + TEXT\\n- order_status: TEXT\\n- order_purchase_timestamp: TEXT\\n- order_approved_at: + TEXT\\n- order_delivered_carrier_date: TEXT\\n- order_delivered_customer_date: + TEXT\\n- order_estimated_delivery_date: TEXT\\n Foreign keys:\\n - customer_id + -> customers.customer_id (on_update=NO ACTION, on_delete=NO ACTION)\\n\\nTabela: + products\\n- product_id: TEXT (PK)\\n- product_category_name: TEXT\\n- product_name_length: + REAL\\n- product_description_length: REAL\\n- product_photos_qty: REAL\\n- product_weight_g: + REAL\\n- product_length_cm: REAL\\n- product_height_cm: REAL\\n- product_width_cm: + REAL\\n\\nTabela: sellers\\n- seller_id: TEXT (PK)\\n- seller_zip_code_prefix: + INTEGER\\n- seller_city: TEXT\\n- seller_state: TEXT\\n\\n\\n=== PERGUNTA DO + USU\xC1RIO ===\\nQuantos pedidos existem no banco?\\n\\n=== CONVERSA PR\xC9VIA + (CONTEXTO ADICIONAL) ===\\nNenhuma\\n\\n=== HIST\xD3RICO DE TENTATIVAS ANTERIORES + ===\\nNenhuma tentativa anterior.\\n\\nResponda APENAS com a consulta SQL, sem + markdown, sem explica\xE7\xE3o.\",\"role\":\"user\"}],\"model\":\"gpt-4o-mini\",\"stream\":false}" + headers: + Accept: + - a + Accept-Encoding: + - g + Connection: + - k + Content-Length: + - '3' + Content-Type: + - a + Cookie: + - _ + Host: + - a + User-Agent: + - O + X-Stainless-Arch: + - x + X-Stainless-Async: + - f + X-Stainless-Lang: + - p + X-Stainless-OS: + - L + X-Stainless-Package-Version: + - '2' + X-Stainless-Raw-Response: + - t + X-Stainless-Runtime: + - C + X-Stainless-Runtime-Version: + - '3' + x-stainless-retry-count: + - '0' + method: POST + uri: https://api.openai.com/v1/chat/completions + response: + body: + string: !!binary | + H4sIAAAAAAAA/6rmUlBQykxRslJQSs5ILEnOLcjRdck0zw7IzQQAAAD//41Sy07DMBC89ysinwA1 + qCltU8SRhwQCKiiIA0KRa28SQ2JbXgcVUP8dO2lJgCJxseSdnfXMeKNXFeEYzPzifP5wdpOf3EyW + +p30PUMtnoHZDWufKccDK5RsYGaAWvBTozg+HA2m4+m4BkrFofC0TNtwpMJSSBEOB8NROIjDaLpm + 50owQNf26K5B8FGfXqfksHTlQX9TKQGRZuBqmyZXNKrwFUIRBVoqLem3IFPSgqylz08vT4/vguPZ + /fXdzt5ucHY7uwqU4WDwqEsxkFZIvWxZFUUHoFIqS73tWuzTGll9yStUpo1a4A8qSZ1tzBOXErrI + nBS0SpMaXbnzqY6h+uaMuEGltolVL1A/F8dRM4+06bfoZI1ZJ7DokuL+lnEJB0tFgZ0cCaMsB95S + 29BpxYXqAL2O6d9its1ujAuZ/Wd8CzAG2q1Vog1wwb4bbtsM+N38q+0r5FowQTCvbtkSK8D4j+CQ + 0qpoNobgG1ooE/dbGRhtRLM2qU6mdMSALw7GMemtep+ye8uGRAMAAA== + headers: + Access-Control-Expose-Headers: + - CF-Ray + CF-Cache-Status: + - DYNAMIC + CF-Ray: + - + Connection: + - keep-alive + Content-Encoding: + - gzip + Content-Type: + - application/json + Date: + - Fri, 22 May 2026 00:09:45 GMT + Server: + - cloudflare + Strict-Transport-Security: + - max-age=31536000; includeSubDomains; preload + Transfer-Encoding: + - chunked + X-Content-Type-Options: + - nosniff + access-control-expose-headers: + - X-Request-ID + - CF-Ray + alt-svc: + - h3=":443"; ma=86400 + openai-organization: + - + openai-processing-ms: + - '530' + openai-project: + - + openai-version: + - '2020-10-01' + x-openai-proxy-wasm: + - v0.1 + x-ratelimit-limit-requests: + - '10000' + x-ratelimit-limit-tokens: + - '200000' + x-ratelimit-remaining-requests: + - + x-ratelimit-remaining-tokens: + - + x-ratelimit-reset-requests: + - 15.867s + x-ratelimit-reset-tokens: + - 216ms + x-request-id: + - + status: + code: 200 + message: OK +- request: + body: "{\"messages\":[{\"content\":\"Voc\xEA \xE9 um revisor de qualidade para + consultas SQL geradas por IA.\\n\\nSua tarefa: avaliar se a consulta SQL e seus + resultados respondem adequadamente\\n\xE0 pergunta original do usu\xE1rio.\\n\\n=== + PERGUNTA DO USU\xC1RIO ===\\nQuantos pedidos existem no banco?\\n\\n=== SCHEMA + DO BANCO ===\\n=== SCHEMA SQLITE (INTROSPECCAO REAL) ===\\n\\nTabela: customers\\n- + customer_id: TEXT (PK)\\n- customer_unique_id: TEXT\\n- customer_zip_code_prefix: + INTEGER\\n- customer_city: TEXT\\n- customer_state: TEXT\\n\\nTabela: geolocation\\n- + geolocation_zip_code_prefix: INTEGER\\n- geolocation_lat: REAL\\n- geolocation_lng: + REAL\\n- geolocation_city: TEXT\\n- geolocation_state: TEXT\\n\\nTabela: order_items\\n- + order_id: TEXT (PK)\\n- order_item_id: INTEGER (PK)\\n- product_id: TEXT\\n- + seller_id: TEXT\\n- shipping_limit_date: TEXT\\n- price: REAL\\n- freight_value: + REAL\\n Foreign keys:\\n - seller_id -> sellers.seller_id (on_update=NO ACTION, + on_delete=NO ACTION)\\n - product_id -> products.product_id (on_update=NO ACTION, + on_delete=NO ACTION)\\n - order_id -> orders.order_id (on_update=NO ACTION, + on_delete=NO ACTION)\\n\\nTabela: order_payments\\n- order_id: TEXT (PK)\\n- + payment_sequential: INTEGER (PK)\\n- payment_type: TEXT\\n- payment_installments: + INTEGER\\n- payment_value: REAL\\n Foreign keys:\\n - order_id -> orders.order_id + (on_update=NO ACTION, on_delete=NO ACTION)\\n\\nTabela: order_reviews\\n- review_id: + TEXT (PK)\\n- order_id: TEXT\\n- review_score: INTEGER\\n- review_comment_title: + TEXT\\n- review_comment_message: TEXT\\n- review_creation_date: TEXT\\n- review_answer_timestamp: + TEXT\\n Foreign keys:\\n - order_id -> orders.order_id (on_update=NO ACTION, + on_delete=NO ACTION)\\n\\nTabela: orders\\n- order_id: TEXT (PK)\\n- customer_id: + TEXT\\n- order_status: TEXT\\n- order_purchase_timestamp: TEXT\\n- order_approved_at: + TEXT\\n- order_delivered_carrier_date: TEXT\\n- order_delivered_customer_date: + TEXT\\n- order_estimated_delivery_date: TEXT\\n Foreign keys:\\n - customer_id + -> customers.customer_id (on_update=NO ACTION, on_delete=NO ACTION)\\n\\nTabela: + products\\n- product_id: TEXT (PK)\\n- product_category_name: TEXT\\n- product_name_length: + REAL\\n- product_description_length: REAL\\n- product_photos_qty: REAL\\n- product_weight_g: + REAL\\n- product_length_cm: REAL\\n- product_height_cm: REAL\\n- product_width_cm: + REAL\\n\\nTabela: sellers\\n- seller_id: TEXT (PK)\\n- seller_zip_code_prefix: + INTEGER\\n- seller_city: TEXT\\n- seller_state: TEXT\\n\\n\\n=== CONVERSA COM + O AGENTE (se houver) ===\\nNenhuma\\n\\n=== SQL GERADA ===\\nSELECT COUNT(*) + FROM orders;\\n\\n=== RESULTADO DA EXECU\xC7\xC3O ===\\nStatus: exec_ok\\nTotal + de linhas: 1\\nAmostra dos resultados (primeiras linhas):\\n[{'COUNT(*)': 99441}]\\n\\n=== + ERROS (se houver) ===\\nNenhum\\n\\n=== TENTATIVAS ANTERIORES ===\\nNenhuma + tentativa anterior (esta \xE9 a primeira).\\n\\n=== EXEMPLOS DE AVALIA\xC7\xC3O + ===\\n\\n-- EXEMPLO 1: REPROVADO (escopo incompleto) --\\nPergunta: \\\"Which + airport has the least number of flights?\\\"\\nSQL: SELECT SourceAirport FROM + flights GROUP BY SourceAirport ORDER BY COUNT(*) ASC LIMIT 1\\nResultado: [('AID',)]\\nVEREDITO: + REPROVADO\\nRaz\xE3o: A query conta apenas voos com partida (SourceAirport) + e ignora voos com chegada (DestAirport).\\nO escopo da pergunta \xE9 \\\"flights\\\" + em geral \u2014 a query responde a uma pergunta diferente.\\n\\n-- EXEMPLO 2: + REPROVADO (erro sem\xE2ntico: MIN vs MAX) --\\nPergunta: \\\"Which Asian countries + have a population larger than any country in Africa?\\\"\\nSQL: SELECT Name + FROM country WHERE Continent='Asia' AND Population > (SELECT MAX(Population) + FROM country WHERE Continent='Africa')\\nResultado: [] (vazio)\\nVEREDITO: REPROVADO\\nRaz\xE3o: + \\\"Larger than any country in Africa\\\" significa maior que pelo menos um + pa\xEDs africano (MIN),\\nn\xE3o maior que todos os pa\xEDses africanos (MAX). + A l\xF3gica est\xE1 semanticamente errada.\\n\\n-- EXEMPLO 3: REPROVADO (resultado + vazio suspeito) --\\nPergunta: \\\"Find the last name of students who live in + North Carolina and are not enrolled in any degree.\\\"\\nSQL: SELECT last_name + FROM Students WHERE state_province_county = 'North Carolina' AND ...\\nResultado: + [] (vazio)\\nVEREDITO: REPROVADO\\nRaz\xE3o: Resultado vazio quando a pergunta + espera dados reais \xE9 suspeito. Verifique se o filtro\\nde string corresponde + exatamente ao valor no banco (ex: 'NorthCarolina' vs 'North Carolina').\\n\\n-- + EXEMPLO 4: REPROVADO (JOIN incorreto muda o que est\xE1 sendo contado) --\\nPergunta: + \\\"Find the name of makers that produced some cars in 1970.\\\"\\nSQL: SELECT + DISTINCT Maker FROM car_makers JOIN car_names ON car_makers.Id = car_names.MakeId + JOIN cars_data ON car_names.MakeId = cars_data.Id WHERE cars_data.Year = 1970\\nResultado: + [('chevrolet',), ('buick',)]\\nVEREDITO: REPROVADO\\nRaz\xE3o: O JOIN usa car_names.MakeId + para conectar a cars_data, mas cars_data.Id refere-se\\nao ID do carro, n\xE3o + do fabricante. O caminho correto seria via model_list. Os resultados\\nparecem + plaus\xEDveis mas derivam de uma jun\xE7\xE3o incorreta.\\n\\n-- EXEMPLO 5: + APROVADO (formato diferente, resposta correta) --\\nPergunta: \\\"On average, + when were the transcripts printed?\\\"\\nSQL: SELECT AVG(transcript_date) AS + average_transcript_date FROM Transcripts\\nResultado: [('1989.9333333333334',)]\\nVEREDITO: + APROVADO\\nRaz\xE3o: O resultado \xE9 um n\xFAmero que representa a m\xE9dia + das datas (formato num\xE9rico do SQLite).\\nEmbora n\xE3o seja uma data formatada, + responde corretamente \xE0 pergunta. Diferen\xE7a de\\nrepresenta\xE7\xE3o n\xE3o + \xE9 motivo de reprova\xE7\xE3o.\\n\\n-- EXEMPLO 6: APROVADO (query mais simples + que o gold, resultado equivalente) --\\nPergunta: \\\"Which model of car has + the minimum horsepower?\\\"\\nSQL: SELECT Model FROM car_names JOIN cars_data + ON car_names.MakeId = cars_data.Id WHERE Horsepower = (SELECT MIN(Horsepower) + FROM cars_data) LIMIT 1\\nResultado: [('triumph',)]\\nVEREDITO: APROVADO\\nRaz\xE3o: + A query retorna corretamente o modelo com menor pot\xEAncia. O LIMIT 1 garante + unicidade\\ne o resultado \xE9 semanticamente correto. Aprovar.\\n\\n=== CRIT\xC9RIOS + DE AVALIA\xC7\xC3O ===\\n\\nREPROVE quando houver:\\n- Escopo incompleto: query + cobre apenas parte do que a pergunta pede\\n- Erro sem\xE2ntico: l\xF3gica correta + na forma mas errada no significado (MIN vs MAX, ANY vs ALL)\\n- JOIN incorreto + que altera os dados sendo agregados ou filtrados\\n- Resultado vazio quando + a pergunta claramente espera dados\\n- Filtro com valor literal diferente do + que est\xE1 no banco\\n- M\xE9trica errada (SUM vs AVG, COUNT vs COUNT DISTINCT, + etc.)\\n- Erro de execu\xE7\xE3o SQL\\n\\nAPROVE quando:\\n- O resultado responde + \xE0 pergunta, mesmo com formato ou representa\xE7\xE3o diferente\\n- H\xE1 + colunas extras que n\xE3o prejudicam a resposta\\n- A precis\xE3o num\xE9rica + difere mas o valor est\xE1 correto\\n- A query \xE9 mais simples que o esperado + mas semanticamente equivalente\\n\\nAvalie com rigor sem\xE2ntico. Resultados + que parecem plaus\xEDveis mas derivam de l\xF3gica\\nincorreta devem ser reprovados. + N\xE3o presuma que uma query bem-formada est\xE1 correta.\\n\\nResponda no formato:\\nVEREDITO: + APROVADO ou REPROVADO\\nFEEDBACK: \",\"role\":\"user\"}],\"model\":\"gpt-4o-mini\",\"stream\":false}" + headers: + Accept: + - a + Accept-Encoding: + - g + Connection: + - k + Content-Length: + - '7' + Content-Type: + - a + Cookie: + - _ + Host: + - a + User-Agent: + - O + X-Stainless-Arch: + - x + X-Stainless-Async: + - f + X-Stainless-Lang: + - p + X-Stainless-OS: + - L + X-Stainless-Package-Version: + - '2' + X-Stainless-Raw-Response: + - t + X-Stainless-Runtime: + - C + X-Stainless-Runtime-Version: + - '3' + x-stainless-retry-count: + - '0' + method: POST + uri: https://api.openai.com/v1/chat/completions + response: + body: + string: !!binary | + H4sIAAAAAAAA/6rmUlBQykxRslJQSs5ILEnOLcjRdck0zw40MgAAAAD//4xTTW/TQBC991eMfE6i + fNvlFkgqVSCFFuihpLLW64mzrb1jdtdVEcp/gSN/gWv+GLPOh10oEhdrPW/e25l9Mzq06vaCwnAa + PVx+kvd4dVuOoqDjGZTco3RHVk8S89Ap0ntYGhQOveogDM/H/WgSTWugoBRzT8tK1x1Tt1BadYf9 + 4bjbD7uDg7jckJJoOe0z/wJ8q7++Tp3iE4f7nWOkQGtFhhw7JnHQUO4jgbBWWSe0CzoNKEk71HXp + N4vrxfzy4/IVzN5fL29m8yXASl8sFvPXszdvOQqcbKvcCfhw9c7/8EmSMehEwRoIBHr3q0BD4MiJ + HFKEElOVkgUtwIkEcwGrgEyKxq6CDnypEAzakrgTSFWjtPvOTJNV/oqUoLLV7odRBJYSg0zjLlj0 + KI5P3BgWoAkSoSX1YOllfalMpsRxFux+1vX7TH+DvzjHR+HPpTACxKm9XvuBDK4rK7xJusrzFiC0 + 5ia9ybU1dwdkezIjp6w0lNg/qMGaTbabmGfC8oDww1tHZVCjW/7e1aZXz3wMWKgoXezoAevrBuHk + fC8YNMPWwJPxAax9aNGiwajzgmKc8sur3LYGJ5BCbjBtuM2UiSpV1ALOWn3/Xc5L2vvelc7+R74B + pMSS9yguDRsvn7fcpBn0y/ivtNM71wUHFs0jb1fsFBrvRYprwTOw3zz71Y9VzIZlaEqj9nuyLmMc + DaOpXGN/GJxtz34DAAD//wMAht9PNzUEAAA= + headers: + Access-Control-Expose-Headers: + - CF-Ray + CF-Cache-Status: + - DYNAMIC + CF-Ray: + - + Connection: + - keep-alive + Content-Encoding: + - gzip + Content-Type: + - application/json + Date: + - Fri, 22 May 2026 00:09:47 GMT + Server: + - cloudflare + Strict-Transport-Security: + - max-age=31536000; includeSubDomains; preload + Transfer-Encoding: + - chunked + X-Content-Type-Options: + - nosniff + access-control-expose-headers: + - X-Request-ID + - CF-Ray + alt-svc: + - h3=":443"; ma=86400 + openai-organization: + - + openai-processing-ms: + - '1301' + openai-project: + - + openai-version: + - '2020-10-01' + x-openai-proxy-wasm: + - v0.1 + x-ratelimit-limit-requests: + - '10000' + x-ratelimit-limit-tokens: + - '200000' + x-ratelimit-remaining-requests: + - + x-ratelimit-remaining-tokens: + - + x-ratelimit-reset-requests: + - 23.64s + x-ratelimit-reset-tokens: + - 507ms + x-request-id: + - + status: + code: 200 + message: OK +version: 1 diff --git a/tests/test_integracao.py b/tests/test_integracao.py index 5de36cc..a973b9e 100644 --- a/tests/test_integracao.py +++ b/tests/test_integracao.py @@ -74,8 +74,8 @@ def test_pergunta_com_ranking(grafo): """Pergunta com ranking retorna múltiplas linhas.""" config = {"configurable": {"thread_id": "teste_simples"}} resultado = grafo.grafo_text_to_insight.invoke( - _estado_inicial("Quais sao as 5 categorias de produtos mais vendidos por quantidade?"), config - ) + _estado_inicial("Quais são as 5 categorias com a maior quantidade total de itens vendidos?"), config + ) # antes a pergunta era "Quais são as 5 categorias de produtos mais vendidos por quantidade?" e ela era uma pergunta que necessitava de mais input de contexto para o gpt4o assert resultado["status"] == "aprovado" assert resultado["sql_gerada"] != "" @@ -88,8 +88,9 @@ def test_estado_final_completo(grafo): """Estado final tem todos os campos-chave preenchidos.""" config = {"configurable": {"thread_id": "teste_estado"}} resultado = grafo.grafo_text_to_insight.invoke( - _estado_inicial("Qual o valor medio dos pedidos?"), config - ) + _estado_inicial("Considerando o valor total cobrado por pedido, qual é a média de valor dos pedidos?"), config + ) + # antes era "Qual o valor médio dos pedidos?", mas o gpt4o não conseguia entender o contexto de "valor dos pedidos" sem mencionar o campo específico "valor total cobrado por pedido" # Campos que devem estar preenchidos ao final assert resultado.get("contexto_schema", "") != "" From 0ec5261e4fb337b4a6498a6ab36beceb2470d8d1 Mon Sep 17 00:00:00 2001 From: LuizCorrei4 Date: Thu, 21 May 2026 21:50:23 -0300 Subject: [PATCH 3/6] add networkx as a project dependency --- pyproject.toml | 1 + requirements.txt | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index d0fdc86..0ca8b25 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -33,6 +33,7 @@ dependencies = [ "python-dotenv>=1.0.0", "pandas>=2.0.0", # uso na geração de gráficos e análise de dados "matplotlib>=3.8.0" # uso na geração de gráficos e visualizações + "networkx>=3.0" ] [project.scripts] diff --git a/requirements.txt b/requirements.txt index 1f4db36..cf3023b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -11,4 +11,5 @@ pytest-recording>=0.13.0 pytest-timeout>=2.3.0 numpy tabulate>=0.9.0 -chromadb \ No newline at end of file +chromadb +networkx>=3.0 \ No newline at end of file From fa44b0cd2c990e30d62a7aff53aca5bbd3117e29 Mon Sep 17 00:00:00 2001 From: LuizCorrei4 Date: Thu, 21 May 2026 21:57:56 -0300 Subject: [PATCH 4/6] =?UTF-8?q?fix:=20erro=20de=20sintaxe=20das=20depend?= =?UTF-8?q?=C3=AAncias=20do=20pyproject?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 0ca8b25..8a3acfa 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -32,7 +32,7 @@ dependencies = [ "langchain-openai>=0.1.0", "python-dotenv>=1.0.0", "pandas>=2.0.0", # uso na geração de gráficos e análise de dados - "matplotlib>=3.8.0" # uso na geração de gráficos e visualizações + "matplotlib>=3.8.0", # uso na geração de gráficos e visualizações "networkx>=3.0" ] From 78da14ef122a1b14623b69b41e359fabe775fdbb Mon Sep 17 00:00:00 2001 From: LuizCorrei4 Date: Thu, 21 May 2026 22:06:01 -0300 Subject: [PATCH 5/6] add ',' no fim das dependencias do pyproject --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 8a3acfa..2e60505 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -33,7 +33,7 @@ dependencies = [ "python-dotenv>=1.0.0", "pandas>=2.0.0", # uso na geração de gráficos e análise de dados "matplotlib>=3.8.0", # uso na geração de gráficos e visualizações - "networkx>=3.0" + "networkx>=3.0", ] [project.scripts] From af896f32668cf0700e051140613c61f183920e90 Mon Sep 17 00:00:00 2001 From: LuizCorrei4 Date: Thu, 21 May 2026 23:20:32 -0300 Subject: [PATCH 6/6] =?UTF-8?q?fix:=20update=20test=20do=20n=C3=B3=20retri?= =?UTF-8?q?ever=20para=20esperar=20o=20campo=20de=20contexto=5Frag=5Fschem?= =?UTF-8?q?a=20e=20tratamento=20mais=20robusto=20dos=20campos=20de=20pergu?= =?UTF-8?q?nta=20no=20n=C3=B3=20do=20retriever=20mas=20sem=20mudan=C3=A7a?= =?UTF-8?q?=20de=20l=C3=B3gica,=20al=C3=A9m=20de=20atualiza=C3=A7=C3=A3o?= =?UTF-8?q?=20de=20docstring=20do=20N=C3=B3=20Retriever=20que=20estava=20i?= =?UTF-8?q?nconsistente?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- tests/test_componentes.py | 8 ++++---- text_to_insight/nodes/retriever.py | 11 ++++++----- 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/tests/test_componentes.py b/tests/test_componentes.py index fb48086..5927fbf 100644 --- a/tests/test_componentes.py +++ b/tests/test_componentes.py @@ -306,11 +306,11 @@ def test_no_retriever_reduz_contexto_schema(): from text_to_insight.nodes.retriever import nos_nodo_retriever from text_to_insight.nodes.schema import nos_nodo_esquema - estado = {"db_path": DB_PATH, "pergunta_usuario": "How many orders does each customer have?"} + estado = {"db_path": DB_PATH, "pergunta_atual": "How many orders does each customer have?"} estado["contexto_schema"] = nos_nodo_esquema(estado)["contexto_schema"] tam_original = len(estado["contexto_schema"]) out = nos_nodo_retriever(estado) - assert "contexto_schema" in out - assert len(out["contexto_schema"]) < tam_original - assert "orders" in out["contexto_schema"].lower() \ No newline at end of file + assert "contexto_rag_schema" in out + assert len(out["contexto_rag_schema"]) < tam_original + assert "orders" in out["contexto_rag_schema"].lower() \ No newline at end of file diff --git a/text_to_insight/nodes/retriever.py b/text_to_insight/nodes/retriever.py index c7ba3c1..c9f6081 100644 --- a/text_to_insight/nodes/retriever.py +++ b/text_to_insight/nodes/retriever.py @@ -3,10 +3,7 @@ Lê o `contexto_schema` produzido pelo nó de schema, recupera o subconjunto de tabelas relevantes para a pergunta via SchemaGraphRAG (vetor + grafo de FKs), -formata o resultado como texto e sobrescreve `contexto_schema` no estado. - -Política: sobrescreve `contexto_schema` para manter o CodeAgent e o template -de prompt inalterados (decisão registrada no plano). +formata o resultado como texto e adiciona ao campo contexto_rag_schema. """ from ..state import EstadoTextToInsight @@ -25,7 +22,11 @@ def _formatar_contexto_rag(retrieved, relations) -> str: def nos_nodo_retriever(estado: EstadoTextToInsight) -> dict: - pergunta = estado.get("pergunta_usuario", "") + pergunta = ( + estado.get("pergunta_atual", "") + or estado.get("pergunta_original", "") + or estado.get("pergunta_usuario", "") # campo antigo de pergunta mas manter para compatibilidade com testes antigos + ) # usa pergunta canônica do estado schema_full = estado.get("contexto_schema", "") if not schema_full or not pergunta or len(schema_full) < 500: return {}