diff --git a/.github/.markdownlint.json b/.github/.markdownlint.json
index e9300d1..0292480 100644
--- a/.github/.markdownlint.json
+++ b/.github/.markdownlint.json
@@ -8,5 +8,6 @@
"MD033": false,
"MD048": false,
"MD040": false,
- "MD041": false
+ "MD041": false,
+ "MD060": false
}
diff --git a/.gitignore b/.gitignore
index 6349e36..c95816f 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,5 +1,8 @@
# Local .terraform directories
.terraform/
+*.terraform.lock.hcl
+.terraform.lock.hcl
+*src/.env
# .tfstate files
*.tfstate
diff --git a/TROUBLESHOOTING.md b/TROUBLESHOOTING.md
new file mode 100644
index 0000000..2b69924
--- /dev/null
+++ b/TROUBLESHOOTING.md
@@ -0,0 +1,355 @@
+# Troubleshooting Guide - Overview
+
+Costa Rica
+
+[](https://github.com/)
+[brown9804](https://github.com/brown9804)
+
+Last updated: 2025-11-24
+
+----------
+
+> This guide covers common issues you may encounter when deploying and running this Azure AI Shopping demo application.
+
+
+Table of Content (Click to expand)
+
+- [Python Not Found](#python-not-found)
+- [Virtual Environment Creation Failed](#virtual-environment-creation-failed)
+- [Package Installation Failed](#package-installation-failed)
+- [Not Logged into Azure CLI](#not-logged-into-azure-cli)
+- [AAD Authentication Failed](#aad-authentication-failed)
+- [Local Authorization Disabled Error](#local-authorization-disabled-error)
+- [Connection Timeout](#connection-timeout)
+- [CSV File Not Found](#csv-file-not-found)
+- [CSV Parsing Error](#csv-parsing-error)
+- [Environment File Missing](#environment-file-missing)
+- [Failed to Authenticate to Cosmos DB](#failed-to-authenticate-to-cosmos-db)
+- [Resource Already Exists](#resource-already-exists)
+- [Insufficient Permissions](#insufficient-permissions)
+- [Provider Configuration Error](#provider-configuration-error)
+- [State Lock Error](#state-lock-error)
+- [Enable Verbose Logging](#enable-verbose-logging)
+- [Check Azure Service Health](#check-azure-service-health)
+- [Clean Up and Retry](#clean-up-and-retry)
+- [Still Having Issues?](#still-having-issues)
+
+
+
+## Python Not Found
+
+```
+ERROR: Python is not installed or not in PATH
+```
+
+**Solution**:
+
+- Install Python 3.8+ from
+- Ensure Python is added to your system PATH during installation
+- Verify installation: `python --version`
+
+## Virtual Environment Creation Failed
+
+```
+ERROR: Failed to create virtual environment
+```
+
+**Solution**:
+
+- Ensure you have write permissions to the `src` directory
+- Try deleting existing `venv` folder: `Remove-Item -Recurse -Force venv`
+- Check if `python -m venv` works manually: `python -m venv test_venv`
+- On Windows, ensure your execution policy allows script execution
+
+## Package Installation Failed
+
+```
+ERROR: Could not install packages due to an OSError
+```
+
+**Solution**:
+
+- Update pip: `python -m pip install --upgrade pip`
+- Clear pip cache: `pip cache purge`
+- Try installing with `--no-cache-dir`: `pip install --no-cache-dir -r requirements.txt`
+- For Windows + pandas issues, use pre-built wheels by ensuring `pandas>=2.2.2` in requirements.txt
+
+## Not Logged into Azure CLI
+
+```
+ERROR: Please run 'az login' to setup account
+```
+
+**Solution**:
+
+```powershell
+# Login to Azure CLI
+az login
+
+# Verify you're logged in with the correct account
+az account show
+
+# If needed, set the correct subscription
+az account set --subscription
+```
+
+## AAD Authentication Failed
+
+```
+DefaultAzureCredential failed to retrieve a token
+```
+
+**Solution**:
+
+1. Ensure you're logged into Azure CLI: `az login`
+2. Check your account has proper permissions
+3. Verify the resource exists and you have access
+4. Try clearing Azure credentials cache: `az account clear` then `az login` again
+
+## Local Authorization Disabled Error
+
+```
+ERROR: Local Authorization is disabled. Use an AAD token to authorize all requests.
+```
+
+> This error occurs when Cosmos DB requires Azure Active Directory (AAD) authentication instead of key-based authentication.
+
+**Common Causes and Solutions**:
+
+- Not logged into Azure CLI
+
+```powershell
+# Login to Azure CLI
+az login
+
+# Verify you're logged in with the correct account
+az account show
+
+# If needed, set the correct subscription
+az account set --subscription
+```
+
+> After logging in, try running the script again.
+
+- Public Network Access Disabled
+
+> If your Cosmos DB has public network access disabled, your local machine or Codespace VM cannot connect.
+
+**Solution via Azure Portal**:
+
+- Navigate to your Cosmos DB account in the Azure portal
+- Select **Networking** from the Settings menu
+- Ensure **Public network access** is set to **All networks**
+- Click **Save**
+- Wait a few minutes for the change to propagate
+- Try running the script again
+
+**Solution via Azure CLI**:
+
+```powershell
+az cosmosdb update \
+ --name \
+ --resource-group \
+ --enable-public-network true
+```
+
+- Insufficient Permissions: Your Azure account needs appropriate role assignments on the Cosmos DB account.
+
+**Required roles**:
+
+- `Cosmos DB Built-in Data Contributor` (for read/write access)
+- Or `Contributor` at the resource group level
+
+**Solution via Azure CLI**:
+
+```powershell
+# Get your user object ID
+$userId = (az ad signed-in-user show --query id -o tsv)
+
+# Assign Cosmos DB Data Contributor role
+az cosmosdb sql role assignment create \
+ --account-name \
+ --resource-group \
+ --role-definition-id 00000000-0000-0000-0000-000000000002 \
+ --principal-id $userId \
+ --scope "/"
+```
+
+## Connection Timeout
+
+```
+ERROR: Request timeout
+```
+
+**Solution**:
+
+- Check your network connection
+- Verify Cosmos DB firewall settings allow your IP address
+- Ensure public network access is enabled (see above)
+- Check if Azure services are experiencing outages:
+
+## CSV File Not Found
+
+```
+WARNING: CSV data file not found at data/updated_product_catalog(in).csv
+```
+
+**Solution**:
+Download or place the product catalog CSV file in the `src/data/` directory:
+
+```bash
+curl -o src/data/updated_product_catalog(in).csv https://raw.githubusercontent.com/microsoft/TechWorkshop-L300-AI-Apps-and-agents/main/src/data/updated_product_catalog(in).csv
+```
+
+## CSV Parsing Error
+
+```
+ERROR: Error tokenizing data. C error: Expected X fields, saw Y
+```
+
+**Solution**:
+
+- Ensure CSV fields with commas are properly quoted
+- Check for special characters or encoding issues
+- Verify the CSV has the correct number of columns (6): ProductID, ProductName, ProductCategory, ProductDescription, Price, ImageUrl
+- Try opening the CSV in a text editor to check for formatting issues
+
+## Environment File Missing
+
+```
+ERROR: .env file not found
+```
+
+**Solution**:
+
+```bash
+# Run Terraform to generate the .env file
+cd terraform-infrastructure
+terraform apply -auto-approve
+```
+
+## Failed to Authenticate to Cosmos DB
+
+```
+ERROR: Failed to authenticate to Cosmos DB using DefaultAzureCredential and no valid COSMOS_DB_KEY was provided
+```
+
+**Solution**:
+
+- Ensure your `.env` file is properly generated with correct keys
+- Run `terraform apply` again if needed
+- Check that `COSMOS_DB_ENDPOINT` and `COSMOS_DB_KEY` are set correctly in `.env`
+- The script will automatically try AAD authentication first, then fall back to key-based auth
+
+## Resource Already Exists
+
+```
+ERROR: A resource with the ID already exists
+```
+
+**Solution**:
+
+- Import the existing resource: `terraform import . `
+- Or destroy and recreate: `terraform destroy` then `terraform apply`
+- Check for resources in other resource groups with the same name
+
+## Insufficient Permissions
+
+```
+ERROR: The client does not have authorization to perform action
+```
+
+**Solution**:
+
+- Ensure your Azure account has `Contributor` or `Owner` role on the subscription or resource group
+- Check if specific Azure policies are blocking resource creation
+- Contact your Azure administrator to grant necessary permissions
+
+## Provider Configuration Error
+
+```
+ERROR: Error configuring the backend "azurerm"
+```
+
+**Solution**:
+
+- Verify your Azure credentials are configured: `az login`
+- Check that the specified subscription exists and you have access
+- Ensure the backend storage account and container exist (if using remote state)
+
+## State Lock Error
+
+```
+ERROR: Error acquiring the state lock
+```
+
+**Solution**:
+
+```bash
+# Force unlock (use with caution)
+terraform force-unlock
+```
+
+> Only force-unlock if you're certain no other Terraform process is running.
+
+## Enable Verbose Logging
+
+For more detailed error information:
+
+**Azure CLI**:
+
+```powershell
+az --debug
+```
+
+**Python Scripts**:
+Set environment variable before running:
+
+```powershell
+$env:AZURE_LOG_LEVEL = "DEBUG"
+python pipelines/script.py
+```
+
+**Terraform**:
+
+```bash
+export TF_LOG=DEBUG
+terraform apply
+```
+
+## Check Azure Service Health
+
+> If experiencing unexpected issues, check [Azure service status](https://status.azure.com/)
+
+## Clean Up and Retry
+
+> Sometimes a clean slate helps:
+
+```bash
+# Clean Python environment
+Remove-Item -Recurse -Force venv
+python -m venv venv
+
+# Clean Terraform state (use with caution)
+terraform destroy
+Remove-Item -Recurse -Force .terraform
+terraform init
+terraform apply
+```
+
+## Still Having Issues?
+
+> If you continue experiencing problems:
+
+1. Check the [GitHub repository issues](https://github.com/MicrosoftCloudEssentials-LearningHub/Agentic-DevOps-AI-Shopping/issues)
+2. Review Azure documentation for specific services
+3. Enable detailed logging as described above
+4. Collect error messages, logs, and configuration details
+5. Create a new issue with detailed information about your problem
+
+
+
+

+
Refresh Date: 2025-11-24
+
+
diff --git a/src/DATA_PIPELINE.md b/src/DATA_PIPELINE.md
new file mode 100644
index 0000000..69a2ff8
--- /dev/null
+++ b/src/DATA_PIPELINE.md
@@ -0,0 +1,283 @@
+# Data Pipeline Automation - Overview
+
+Costa Rica
+
+[](https://github.com/)
+[brown9804](https://github.com/brown9804)
+
+Last updated: 2025-11-24
+
+----------
+
+> This automation handles the complete data pipeline setup for the Azure AI Shopping application.
+
+
+Table of Content (Click to expand)
+
+- [Usage](#usage)
+- [Data Files](#data-files)
+- [Scripts](#scripts)
+- [Troubleshooting](#troubleshooting)
+- [Configuration](#configuration)
+- [Environment Variable Reference](#environment-variable-reference)
+- [Verification](#verification)
+- [Check Cosmos DB](#check-cosmos-db)
+- [Check Search Index](#check-search-index)
+- [Query Search Index](#query-search-index)
+- [Next Steps](#next-steps)
+
+
+
+> [!NOTE]
+> What It Does? The data pipeline automation performs the following tasks:
+>
+> 1. **Creates Python Virtual Environment**: Sets up an isolated Python environment with all required dependencies
+> 2. **Imports Data to Cosmos DB**: Loads product catalog data from CSV into Cosmos DB container
+> 3. **Creates Azure AI Search Index**: Sets up a search index with vector search capabilities
+> 4. **Imports Data to Search**: Populates the search index from Cosmos DB using an indexer
+
+
+ Prerequisites: (Click to expand)
+
+> - Python 3.8 or higher installed and available in PATH
+> - Product catalog CSV file at `src/data/updated_product_catalog(in).csv` (demo)
+
+
+
+> Automated by Terraform:
+
+- Cosmos DB account and database
+- Azure AI Search service
+- Azure OpenAI model deployments
+- Environment variables in `src/.env`
+
+## Usage
+
+> Option 1: Run Automatically with Terraform → Enable data pipeline automation in `terraform.tfvars`:
+
+```hcl
+enable_data_pipeline = true
+```
+
+Then run:
+
+```bash
+terraform apply -auto-approve
+```
+
+This will:
+
+- Deploy all Azure resources
+- Create AI model deployments
+- Generate `.env` file
+- **Automatically run the complete data pipeline**
+
+> Option 2: Run Manually → If you prefer to run the data pipeline manually or separately:
+
+1. **Ensure `.env` file exists** (created by Terraform):
+
+ ```bash
+ cd terraform-infrastructure
+ terraform apply -auto-approve
+ ```
+
+2. **Navigate to src directory**:
+
+ ```bash
+ cd ../src
+ ```
+
+3. **Create virtual environment and install dependencies**:
+
+ ```powershell
+ python -m venv venv
+ .\venv\Scripts\Activate.ps1
+ pip install --upgrade pip
+ pip install -r requirements.txt
+ ```
+
+4. **Run pipeline scripts in order**:
+
+ ```powershell
+ # Step 1: Import data to Cosmos DB
+ python pipelines/ingest_to_cosmos.py
+
+ # Step 2: Create Azure AI Search index
+ python pipelines/create_search_index.py
+
+ # Step 3: Upload data to search index
+ python pipelines/upload_to_search.py
+ ```
+
+## Data Files
+
+> Product Catalog CSV → The product catalog data should be placed at:
+
+```
+src/data/updated_product_catalog(in).csv
+```
+
+> Expected columns:
+
+- `ProductID`: Unique product identifier
+- `ProductName`: Product name
+- `ProductCategory`: Product category
+- `ProductDescription`: Product description
+- `ProductPrice`: Product price
+- `ProductImageURL`: URL to product image
+
+> Download Data → If you don't have the data file, you can download it from the reference repository [TechWorkshop-L300-AI-Apps-and-agents](https://github.com/microsoft/TechWorkshop-L300-AI-Apps-and-agents/tree/main), please feel free to follow the guide as well [Guide - TechWorkshop L300: AI Apps and Agents](https://microsoft.github.io/TechWorkshop-L300-AI-Apps-and-agents/):
+
+```bash
+# Download the product catalog data
+curl -o src/data/updated_product_catalog(in).csv https://raw.githubusercontent.com/microsoft/TechWorkshop-L300-AI-Apps-and-agents/main/src/data/updated_product_catalog(in).csv
+```
+
+## Scripts
+
+
+ pipelines/ingest_to_cosmos.py (Click to expand)
+
+- Reads CSV data with product catalog
+- Connects to Cosmos DB (uses AAD or key-based auth)
+- Creates database and container if they don't exist
+- Imports all products with upsert operations
+- Creates `content_for_vector` field for semantic search
+- **Smart Skip Logic**:
+ - By default (`COSMOS_SKIP_IF_EXISTS=true`), checks if container already has data
+ - If data exists, skips import to avoid duplicates and save time
+ - Set `COSMOS_FORCE_INGEST=true` to force re-import even if data exists
+ - Set `COSMOS_SKIP_IF_EXISTS=false` to always import (legacy behavior)
+
+
+
+
+ pipelines/create_search_index.py (Click to expand)
+
+- Creates Azure AI Search index with vector search
+- Configures HNSW algorithm for vector search
+- Sets up Azure OpenAI vectorizer
+- Defines searchable and filterable fields
+
+
+
+
+ pipelines/create_search_index.py (Click to expand)
+
+- Creates Azure AI Search index with vector search capabilities
+- Configures HNSW algorithm for efficient vector similarity search
+- Sets up Azure OpenAI vectorizer with text-embedding-3-small model
+- Defines searchable, filterable, and vector fields
+- Supports hybrid search (keyword + semantic)
+
+
+
+
+ pipelines/create_search_index.py (Click to expand)
+
+- Creates Azure AI Search index with vector search
+- Configures HNSW algorithm for vector search
+- Sets up Azure OpenAI vectorizer
+- Defines searchable and filterable fields
+
+
+
+
+ pipelines/upload_to_search.py (Click to expand)
+
+- Reads all documents from Cosmos DB container
+- Authenticates using AAD or key-based auth (auto-fallback)
+- Maps Cosmos DB fields to Azure AI Search index schema
+- Uploads documents in batches to Azure AI Search
+- Provides detailed success/failure reporting
+- **Note**: This script replaces the traditional indexer approach to avoid managed identity complexity when Cosmos DB local auth is disabled
+
+
+
+## Troubleshooting
+
+> For detailed troubleshooting guidance, see [TROUBLESHOOTING.md](../TROUBLESHOOTING.md). Quick Reference:
+
+- **Python Not Found**: Install Python 3.8+ from
+- **CSV File Not Found**: Download the product catalog CSV file and place it in `src/data/` directory
+- **Authentication Errors**: Run `az login` and ensure you have proper permissions. See [TROUBLESHOOTING.md](../TROUBLESHOOTING.md#azure-authentication-issues) for detailed solutions.
+- **Virtual Environment Issues**: Delete `venv` folder and recreate. See [TROUBLESHOOTING.md](../TROUBLESHOOTING.md#python-environment-issues) for details.
+
+## Configuration
+
+> All configuration is pulled from the `.env` file created by Terraform:
+
+```bash
+COSMOS_DB_ENDPOINT=...
+COSMOS_DB_KEY=...
+COSMOS_DB_NAME=...
+COSMOS_DB_CONTAINER_NAME=products
+COSMOS_SKIP_IF_EXISTS=true # Skip import if data already exists
+COSMOS_FORCE_INGEST=false # Force re-import even if data exists
+SEARCH_SERVICE_ENDPOINT=...
+SEARCH_SERVICE_KEY=...
+SEARCH_INDEX_NAME=products-index
+AZURE_OPENAI_ENDPOINT=...
+AZURE_OPENAI_API_KEY=...
+AZURE_OPENAI_EMBEDDING_DEPLOYMENT=text-embedding-3-small
+```
+
+## Environment Variable Reference
+
+| Variable | Default | Description |
+|----------------------------|---------|-------------------------------------------------------|
+| `COSMOS_SKIP_IF_EXISTS` | `true` | Skip import if container already has data |
+| `COSMOS_FORCE_INGEST` | `false` | Force re-import even if data exists (overrides skip) |
+| `COSMOS_DB_ENDPOINT` | - | Cosmos DB account endpoint URL |
+| `COSMOS_DB_KEY` | - | Cosmos DB account key (optional if using AAD) |
+| `COSMOS_DB_NAME` | - | Database name |
+| `COSMOS_DB_CONTAINER_NAME` | - | Container name for product catalog |
+
+## Verification
+
+> After running the pipeline, verify data was imported:
+
+## Check Cosmos DB
+
+```powershell
+az cosmosdb sql container show \
+ --account-name \
+ --database-name zava \
+ --name products \
+ --resource-group
+```
+
+## Check Search Index
+
+```powershell
+az search index show \
+ --index-name products-index \
+ --service-name \
+ --resource-group
+```
+
+## Query Search Index
+
+```powershell
+az search index show-statistics \
+ --index-name products-index \
+ --service-name \
+ --resource-group
+```
+
+## Next Steps
+
+> After the data pipeline completes:
+
+1. Your Cosmos DB container is populated with product data
+2. Azure AI Search index is created with vector search enabled
+3. Search index is populated from Cosmos DB
+4. You can now build AI agents that query this data
+5. Use the search index for hybrid search (keyword + semantic)
+
+
+
+

+
Refresh Date: 2025-11-24
+
+
diff --git a/src/data/updated_product_catalog(in).csv b/src/data/updated_product_catalog(in).csv
new file mode 100644
index 0000000..426e823
--- /dev/null
+++ b/src/data/updated_product_catalog(in).csv
@@ -0,0 +1,21 @@
+ProductID,ProductName,ProductCategory,ProductDescription,Price,ImageUrl
+1001,Zava Smart Speaker,Electronics,Voice-controlled speaker with high-fidelity audio and edge AI noise suppression,89.99,https://example.com/images/speaker.jpg
+1002,Zava Wireless Earbuds,Electronics,Comfort-fit earbuds with adaptive EQ and 30h battery life,129.00,https://example.com/images/earbuds.jpg
+1003,Zava Fitness Tracker,Sports,Water-resistant tracker with heart rate variability and sleep stage insights,59.50,https://example.com/images/tracker.jpg
+1004,Zava Running Shoes,Sports,Breathable mesh performance shoes with responsive foam sole,104.95,https://example.com/images/runningshoes.jpg
+1005,Zava Cotton Hoodie,Apparel,Ultra-soft recycled cotton hoodie with antimicrobial treatment,54.99,https://example.com/images/hoodie.jpg
+1006,Zava Insulated Bottle,Home,Double-wall stainless steel bottle keeps drinks cold 24h / hot 12h,28.00,https://example.com/images/bottle.jpg
+1007,Zava Ceramic Mug,Home,Matte glaze 14oz mug safe for dishwasher and microwave,12.75,https://example.com/images/mug.jpg
+1008,Zava Multi-Tool Outdoor,Sports,Compact 11-in-1 stainless multi-tool with locking blades,36.40,https://example.com/images/multitool.jpg
+1009,Zava Hair Serum,Beauty,Nutrient-rich lightweight serum for frizz control and shine,24.50,https://example.com/images/hairserum.jpg
+1010,Zava Vitamin C Gummies,Grocery,Non-GMO vegan gummies with natural citrus flavor (90 count),18.99,https://example.com/images/vitaminc.jpg
+1011,Zava Gaming Mouse,Electronics,Customizable RGB ergonomic mouse with 12K DPI sensor,64.00,https://example.com/images/mouse.jpg
+1012,Zava Mechanical Keyboard,Electronics,"Hot-swap switches, per-key lighting, and PBT keycaps",139.95,https://example.com/images/keyboard.jpg
+1013,Zava Desk Lamp,Home,Adjustable LED lamp with ambient backlight and USB-C charging port,42.25,https://example.com/images/desklamp.jpg
+1014,Zava Noise Masking Device,Electronics,Generates adaptive ambient sound for focus and sleep environments,79.99,https://example.com/images/noisemask.jpg
+1015,Zava Travel Backpack,Apparel,Weather-resistant 28L backpack with laptop sleeve and hidden pocket,98.00,https://example.com/images/backpack.jpg
+1016,Zava Smart Plug,Electronics,Energy monitoring smart plug with over-current protection,19.95,https://example.com/images/smartplug.jpg
+1017,Zava LED Strip Kit,Electronics,16M color Wi-Fi LED strip with music sync mode,34.50,https://example.com/images/ledstrip.jpg
+1018,Zava Foam Roller,Sports,"High-density recovery roller improves circulation and muscle release",25.00,https://example.com/images/foamroller.jpg
+1019,Zava Sunscreen SPF50,Beauty,Broad-spectrum mineral sunscreen water-resistant for 80 minutes,21.25,https://example.com/images/sunscreen.jpg
+1020,Zava Organic Trail Mix,Grocery,"Blend of roasted nuts, seeds, and dried berries (16oz)",11.49,https://example.com/images/trailmix.jpg
\ No newline at end of file
diff --git a/src/pipelines/create_search_index.py b/src/pipelines/create_search_index.py
new file mode 100644
index 0000000..d9e9333
--- /dev/null
+++ b/src/pipelines/create_search_index.py
@@ -0,0 +1,119 @@
+import logging
+import os
+from azure.search.documents.indexes import SearchIndexClient
+from azure.search.documents.indexes.models import (
+ SearchIndex,
+ SearchField,
+ SearchFieldDataType,
+ SimpleField,
+ SearchableField,
+ VectorSearch,
+ HnswAlgorithmConfiguration,
+ VectorSearchProfile,
+ AzureOpenAIVectorizer,
+ AzureOpenAIVectorizerParameters
+)
+from azure.core.credentials import AzureKeyCredential
+from azure.identity import DefaultAzureCredential
+from dotenv import load_dotenv
+
+load_dotenv()
+
+# Configuration
+SEARCH_ENDPOINT = os.environ.get("SEARCH_SERVICE_ENDPOINT")
+SEARCH_KEY = os.environ.get("SEARCH_SERVICE_KEY")
+INDEX_NAME = os.environ.get("SEARCH_INDEX_NAME", "products-index")
+AZURE_OPENAI_ENDPOINT = os.environ.get("AZURE_OPENAI_ENDPOINT")
+AZURE_OPENAI_API_KEY = os.environ.get("AZURE_OPENAI_API_KEY")
+EMBEDDING_DEPLOYMENT = os.environ.get("AZURE_OPENAI_EMBEDDING_DEPLOYMENT", "text-embedding-3-small")
+
+# Configure logging
+logger = logging.getLogger(__name__)
+logging.basicConfig(level=logging.INFO)
+
+def create_search_index():
+ """Create Azure AI Search index with vector search capabilities."""
+
+ if not SEARCH_ENDPOINT:
+ raise ValueError("SEARCH_SERVICE_ENDPOINT must be provided in environment variables")
+
+ # Create client
+ try:
+ logger.info("Attempting to create Search Index Client...")
+ if SEARCH_KEY:
+ credential = AzureKeyCredential(SEARCH_KEY)
+ else:
+ credential = DefaultAzureCredential()
+
+ index_client = SearchIndexClient(endpoint=SEARCH_ENDPOINT, credential=credential)
+ logger.info("Search Index Client created successfully")
+ except Exception as e:
+ logger.error(f"Failed to create Search Index Client: {e}")
+ raise
+
+ # Define the index fields
+ fields = [
+ SimpleField(name="id", type=SearchFieldDataType.String, key=True, filterable=True),
+ SimpleField(name="ProductID", type=SearchFieldDataType.String, filterable=True),
+ SearchableField(name="ProductName", type=SearchFieldDataType.String, searchable=True),
+ SearchableField(name="ProductCategory", type=SearchFieldDataType.String, searchable=True, filterable=True, facetable=True),
+ SearchableField(name="ProductDescription", type=SearchFieldDataType.String, searchable=True),
+ SimpleField(name="ProductPrice", type=SearchFieldDataType.Double, filterable=True, sortable=True),
+ SimpleField(name="ProductImageURL", type=SearchFieldDataType.String),
+ SearchableField(name="content_for_vector", type=SearchFieldDataType.String, searchable=True),
+ SearchField(
+ name="content_vector",
+ type=SearchFieldDataType.Collection(SearchFieldDataType.Single),
+ searchable=True,
+ vector_search_dimensions=1536, # text-embedding-3-small dimensions
+ vector_search_profile_name="vector-profile"
+ )
+ ]
+
+ # Configure vector search
+ vector_search = VectorSearch(
+ algorithms=[
+ HnswAlgorithmConfiguration(name="hnsw-algorithm")
+ ],
+ profiles=[
+ VectorSearchProfile(
+ name="vector-profile",
+ algorithm_configuration_name="hnsw-algorithm",
+ vectorizer_name="openai-vectorizer"
+ )
+ ],
+ vectorizers=[
+ AzureOpenAIVectorizer(
+ vectorizer_name="openai-vectorizer",
+ parameters=AzureOpenAIVectorizerParameters(
+ resource_url=AZURE_OPENAI_ENDPOINT,
+ deployment_name=EMBEDDING_DEPLOYMENT,
+ model_name="text-embedding-3-small", # Required in API version 2025-09-01
+ api_key=AZURE_OPENAI_API_KEY
+ )
+ )
+ ]
+ )
+
+ # Create the search index
+ index = SearchIndex(
+ name=INDEX_NAME,
+ fields=fields,
+ vector_search=vector_search
+ )
+
+ try:
+ logger.info(f"Creating search index: {INDEX_NAME}...")
+ result = index_client.create_or_update_index(index)
+ logger.info(f"Search index '{result.name}' created successfully")
+ return result
+ except Exception as e:
+ logger.error(f"Failed to create search index: {e}")
+ raise
+
+def main():
+ create_search_index()
+ logger.info("Search index creation completed successfully")
+
+if __name__ == "__main__":
+ main()
diff --git a/src/pipelines/import_to_search.py b/src/pipelines/import_to_search.py
new file mode 100644
index 0000000..873b6ac
--- /dev/null
+++ b/src/pipelines/import_to_search.py
@@ -0,0 +1,149 @@
+import logging
+import os
+from azure.cosmos import CosmosClient
+from azure.search.documents import SearchClient
+from azure.search.documents.indexes import SearchIndexerClient
+from azure.search.documents.indexes.models import (
+ SearchIndexerDataSourceConnection,
+ SearchIndexerDataContainer,
+ SearchIndexer,
+ FieldMapping,
+ IndexingSchedule
+)
+from azure.core.credentials import AzureKeyCredential
+from azure.identity import DefaultAzureCredential
+from dotenv import load_dotenv
+import time
+
+load_dotenv()
+
+# Configuration
+COSMOS_ENDPOINT = os.environ.get("COSMOS_DB_ENDPOINT")
+COSMOS_KEY = os.environ.get("COSMOS_DB_KEY")
+DATABASE_NAME = os.environ.get("COSMOS_DB_NAME")
+CONTAINER_NAME = os.environ.get("COSMOS_DB_CONTAINER_NAME")
+SEARCH_ENDPOINT = os.environ.get("SEARCH_SERVICE_ENDPOINT")
+SEARCH_KEY = os.environ.get("SEARCH_SERVICE_KEY")
+INDEX_NAME = os.environ.get("SEARCH_INDEX_NAME", "products-index")
+DATASOURCE_NAME = f"{INDEX_NAME}-datasource"
+INDEXER_NAME = f"{INDEX_NAME}-indexer"
+
+# Configure logging
+logger = logging.getLogger(__name__)
+logging.basicConfig(level=logging.INFO)
+
+def create_cosmos_datasource():
+ """Create a data source connection to Cosmos DB."""
+
+ if not SEARCH_KEY:
+ credential = DefaultAzureCredential()
+ else:
+ credential = AzureKeyCredential(SEARCH_KEY)
+
+ indexer_client = SearchIndexerClient(endpoint=SEARCH_ENDPOINT, credential=credential)
+
+ # Create the data source connection
+ container = SearchIndexerDataContainer(name=CONTAINER_NAME)
+
+ data_source_connection = SearchIndexerDataSourceConnection(
+ name=DATASOURCE_NAME,
+ type="cosmosdb",
+ connection_string=f"AccountEndpoint={COSMOS_ENDPOINT};AccountKey={COSMOS_KEY};Database={DATABASE_NAME}",
+ container=container
+ )
+
+ try:
+ logger.info(f"Creating data source: {DATASOURCE_NAME}...")
+ result = indexer_client.create_or_update_data_source_connection(data_source_connection)
+ logger.info(f"Data source '{result.name}' created successfully")
+ return result
+ except Exception as e:
+ logger.error(f"Failed to create data source: {e}")
+ raise
+
+def create_indexer():
+ """Create an indexer to import data from Cosmos DB to Azure AI Search."""
+
+ if not SEARCH_KEY:
+ credential = DefaultAzureCredential()
+ else:
+ credential = AzureKeyCredential(SEARCH_KEY)
+
+ indexer_client = SearchIndexerClient(endpoint=SEARCH_ENDPOINT, credential=credential)
+
+ # Create the indexer
+ indexer = SearchIndexer(
+ name=INDEXER_NAME,
+ data_source_name=DATASOURCE_NAME,
+ target_index_name=INDEX_NAME,
+ field_mappings=[
+ FieldMapping(source_field_name="id", target_field_name="id"),
+ FieldMapping(source_field_name="ProductID", target_field_name="ProductID"),
+ FieldMapping(source_field_name="ProductName", target_field_name="ProductName"),
+ FieldMapping(source_field_name="ProductCategory", target_field_name="ProductCategory"),
+ FieldMapping(source_field_name="ProductDescription", target_field_name="ProductDescription"),
+ FieldMapping(source_field_name="ProductPrice", target_field_name="ProductPrice"),
+ FieldMapping(source_field_name="ProductImageURL", target_field_name="ProductImageURL"),
+ FieldMapping(source_field_name="content_for_vector", target_field_name="content_for_vector"),
+ ]
+ )
+
+ try:
+ logger.info(f"Creating indexer: {INDEXER_NAME}...")
+ result = indexer_client.create_or_update_indexer(indexer)
+ logger.info(f"Indexer '{result.name}' created successfully")
+ return result
+ except Exception as e:
+ logger.error(f"Failed to create indexer: {e}")
+ raise
+
+def run_indexer():
+ """Run the indexer to start data import."""
+
+ if not SEARCH_KEY:
+ credential = DefaultAzureCredential()
+ else:
+ credential = AzureKeyCredential(SEARCH_KEY)
+
+ indexer_client = SearchIndexerClient(endpoint=SEARCH_ENDPOINT, credential=credential)
+
+ try:
+ logger.info(f"Running indexer: {INDEXER_NAME}...")
+ indexer_client.run_indexer(INDEXER_NAME)
+ logger.info("Indexer started successfully")
+
+ # Wait for indexer to complete
+ logger.info("Waiting for indexer to complete...")
+ for i in range(30): # Wait up to 5 minutes
+ time.sleep(10)
+ status = indexer_client.get_indexer_status(INDEXER_NAME)
+ last_result = status.last_result
+
+ if last_result:
+ logger.info(f"Indexer status: {last_result.status}")
+ if last_result.status == "success":
+ logger.info(f"Indexer completed successfully. Indexed {last_result.items_processed} items.")
+ return
+ elif last_result.status == "transientFailure" or last_result.status == "persistentFailure":
+ logger.error(f"Indexer failed: {last_result.error_message}")
+ raise Exception(f"Indexer failed: {last_result.error_message}")
+
+ logger.warning("Indexer is still running after timeout")
+ except Exception as e:
+ logger.error(f"Failed to run indexer: {e}")
+ raise
+
+def main():
+ # Step 1: Create Cosmos DB data source
+ create_cosmos_datasource()
+
+ # Step 2: Create indexer
+ create_indexer()
+
+ # Step 3: Run indexer
+ run_indexer()
+
+ logger.info("Data import to Azure AI Search completed successfully")
+
+if __name__ == "__main__":
+ main()
diff --git a/src/pipelines/ingest_to_cosmos.py b/src/pipelines/ingest_to_cosmos.py
new file mode 100644
index 0000000..9c84fe3
--- /dev/null
+++ b/src/pipelines/ingest_to_cosmos.py
@@ -0,0 +1,135 @@
+import logging
+import pandas as pd
+import os
+from azure.cosmos import CosmosClient, PartitionKey
+from azure.identity import DefaultAzureCredential
+from azure.core.exceptions import AzureError
+from dotenv import load_dotenv
+
+load_dotenv()
+
+# CONFIGURATIONS - Replace with your actual values
+COSMOS_ENDPOINT = os.environ.get("COSMOS_DB_ENDPOINT")
+COSMOS_KEY = os.environ.get("COSMOS_DB_KEY")
+DATABASE_NAME = os.environ.get("COSMOS_DB_NAME")
+CONTAINER_NAME = os.environ.get("COSMOS_DB_CONTAINER_NAME")
+SKIP_IF_EXISTS = os.environ.get("COSMOS_SKIP_IF_EXISTS", "true").lower() == "true"
+FORCE_INGEST = os.environ.get("COSMOS_FORCE_INGEST", "false").lower() == "true"
+CSV_FILE = r"data/updated_product_catalog(in).csv"
+
+# Configure logging
+logger = logging.getLogger(__name__)
+logging.basicConfig(level=logging.INFO)
+
+def get_cosmos_client(endpoint: str | None, key: str | None = None):
+ """Try to authenticate to Cosmos DB using DefaultAzureCredential first.
+
+ If that fails, fall back to using the provided key.
+ Returns a connected CosmosClient instance.
+ """
+ if not endpoint:
+ raise ValueError("COSMOS_DB_ENDPOINT must be provided in environment variables")
+
+ # Try AAD first
+ try:
+ logger.info("Attempting to authenticate to Cosmos DB using DefaultAzureCredential (AAD)...")
+ credential = DefaultAzureCredential()
+ client = CosmosClient(endpoint, credential=credential)
+
+ # Perform a light operation to validate the credential
+ _ = list(client.list_databases())
+ logger.info("Authenticated to Cosmos DB with DefaultAzureCredential.")
+ return client
+ except AzureError as ex:
+ logger.warning("AAD authentication failed: %s", ex)
+
+ # Fallback to key
+ if key:
+ try:
+ logger.info("Falling back to endpoint + key authentication for Cosmos DB...")
+ client = CosmosClient(endpoint, key)
+ # Validate key by a light operation
+ _ = list(client.list_databases())
+ logger.info("Authenticated to Cosmos DB with endpoint+key.")
+ return client
+ except Exception as ex:
+ logger.error("Endpoint+key authentication failed: %s", ex)
+ raise
+
+ # If we reach here, both auth methods failed or no key provided
+ raise RuntimeError("Failed to authenticate to Cosmos DB using DefaultAzureCredential and no valid COSMOS_DB_KEY was provided")
+
+def main():
+ # 1. Read data from CSV
+ logger.info(f"Reading data from {CSV_FILE}...")
+ df = pd.read_csv(CSV_FILE, encoding='utf-8', quoting=1) # quoting=1 is csv.QUOTE_ALL
+
+ # Create content for vector search
+ df['content_for_vector'] = (
+ df['ProductName'].fillna('').astype(str) + ' | ' +
+ df['ProductCategory'].fillna('').astype(str) + ' | ' +
+ df['ProductDescription'].fillna('').astype(str)
+ )
+
+ logger.info(f"Loaded {len(df)} products from CSV")
+
+ # 2. Connect to Cosmos DB
+ client = get_cosmos_client(COSMOS_ENDPOINT, COSMOS_KEY)
+
+ if not DATABASE_NAME:
+ raise ValueError("COSMOS_DB_NAME must be provided in environment variables")
+
+ if not CONTAINER_NAME:
+ raise ValueError("COSMOS_DB_CONTAINER_NAME must be provided in environment variables")
+
+ database = client.create_database_if_not_exists(id=DATABASE_NAME)
+ logger.info(f"Connected to database: {DATABASE_NAME}")
+
+ container = database.create_container_if_not_exists(
+ id=CONTAINER_NAME,
+ partition_key=PartitionKey(path="/ProductID")
+ )
+ logger.info(f"Connected to container: {CONTAINER_NAME}")
+
+ # Check existing item count (lightweight)
+ existing_count = 0
+ try:
+ count_query = list(container.query_items(
+ query="SELECT VALUE COUNT(1) FROM c",
+ enable_cross_partition_query=True
+ ))
+ if count_query:
+ raw_val = count_query[0]
+ if isinstance(raw_val, dict):
+ for k in ("$1", "count", "COUNT"):
+ if k in raw_val:
+ raw_val = raw_val[k]
+ break
+ if isinstance(raw_val, (int, float, str)):
+ existing_count = int(raw_val)
+ except Exception as ex:
+ logger.warning(f"Count query failed (will ignore): {ex}")
+
+ if existing_count > 0 and SKIP_IF_EXISTS and not FORCE_INGEST:
+ logger.info(
+ f"Container already has {existing_count} items. Skipping ingestion (SKIP_IF_EXISTS=true, FORCE_INGEST=false)."
+ )
+ return
+
+ # 3. Upload items
+ logger.info("Starting data upload to Cosmos DB...")
+ for idx, row in enumerate(df.itertuples(index=False), start=1):
+ # Convert row to dict
+ item = row._asdict()
+ item['id'] = str(item['ProductID'])
+ item['ProductID'] = str(item['ProductID'])
+
+ # Insert or update item
+ container.upsert_item(body=item)
+ if idx % 10 == 0:
+ logger.info(f"Uploaded {idx}/{len(df)} products")
+
+ logger.info(f"Successfully uploaded all {len(df)} products to Cosmos DB.")
+
+if __name__ == "__main__":
+ main()
diff --git a/src/pipelines/upload_to_search.py b/src/pipelines/upload_to_search.py
new file mode 100644
index 0000000..f689233
--- /dev/null
+++ b/src/pipelines/upload_to_search.py
@@ -0,0 +1,122 @@
+import logging
+import os
+from azure.cosmos import CosmosClient
+from azure.search.documents import SearchClient
+from azure.core.credentials import AzureKeyCredential
+from azure.identity import DefaultAzureCredential
+from dotenv import load_dotenv
+
+load_dotenv()
+
+# Configuration
+COSMOS_ENDPOINT = os.environ.get("COSMOS_DB_ENDPOINT")
+COSMOS_KEY = os.environ.get("COSMOS_DB_KEY")
+DATABASE_NAME = os.environ.get("COSMOS_DB_NAME")
+CONTAINER_NAME = os.environ.get("COSMOS_DB_CONTAINER_NAME")
+SEARCH_ENDPOINT = os.environ.get("SEARCH_SERVICE_ENDPOINT")
+SEARCH_KEY = os.environ.get("SEARCH_SERVICE_KEY")
+INDEX_NAME = os.environ.get("SEARCH_INDEX_NAME", "products-index")
+
+# Configure logging
+logger = logging.getLogger(__name__)
+logging.basicConfig(level=logging.INFO)
+
+def get_cosmos_client(endpoint: str, key: str | None = None):
+ """Get Cosmos DB client with AAD or key-based auth."""
+ if not endpoint:
+ raise ValueError("COSMOS_DB_ENDPOINT must be provided")
+
+ # Try AAD first
+ try:
+ logger.info("Attempting to authenticate to Cosmos DB using DefaultAzureCredential (AAD)...")
+ credential = DefaultAzureCredential()
+ client = CosmosClient(endpoint, credential=credential)
+ # Validate
+ _ = list(client.list_databases())
+ logger.info("Authenticated to Cosmos DB with DefaultAzureCredential.")
+ return client
+ except Exception as ex:
+ logger.warning(f"AAD authentication failed: {ex}")
+
+ # Fallback to key
+ if key:
+ try:
+ logger.info("Falling back to key-based authentication for Cosmos DB...")
+ client = CosmosClient(endpoint, key)
+ # Validate
+ _ = list(client.list_databases())
+ logger.info("Authenticated to Cosmos DB with key.")
+ return client
+ except Exception as ex:
+ logger.error(f"Key authentication failed: {ex}")
+
+ raise RuntimeError("Failed to authenticate to Cosmos DB")
+
+def upload_documents_to_search():
+ """Read documents from Cosmos DB and upload directly to Azure AI Search."""
+
+ # Connect to Cosmos DB
+ cosmos_client = get_cosmos_client(COSMOS_ENDPOINT, COSMOS_KEY)
+ database = cosmos_client.get_database_client(DATABASE_NAME)
+ container = database.get_container_client(CONTAINER_NAME)
+
+ # Get all documents from Cosmos DB
+ logger.info(f"Reading documents from Cosmos DB container: {CONTAINER_NAME}...")
+ query = "SELECT * FROM c"
+ items = list(container.query_items(query=query, enable_cross_partition_query=True))
+ logger.info(f"Retrieved {len(items)} documents from Cosmos DB")
+
+ if len(items) == 0:
+ logger.warning("No documents found in Cosmos DB container")
+ return
+
+ # Connect to Search
+ if SEARCH_KEY:
+ search_credential = AzureKeyCredential(SEARCH_KEY)
+ else:
+ search_credential = DefaultAzureCredential()
+
+ search_client = SearchClient(endpoint=SEARCH_ENDPOINT, index_name=INDEX_NAME, credential=search_credential)
+
+ # Prepare documents for upload
+ documents = []
+ for item in items:
+ # Map Cosmos DB fields to Search index fields
+ doc = {
+ "id": str(item.get("id", item.get("ProductID"))), # Use Cosmos id or ProductID
+ "ProductID": str(item.get("ProductID")),
+ "ProductName": item.get("ProductName"),
+ "ProductCategory": item.get("ProductCategory"),
+ "ProductDescription": item.get("ProductDescription"),
+ "ProductPrice": float(item.get("Price", item.get("ProductPrice", 0.0))),
+ "ProductImageURL": item.get("ImageUrl", item.get("ProductImageURL", "")),
+ "content_for_vector": item.get("content_for_vector", "")
+ }
+ documents.append(doc)
+
+ # Upload documents in batches
+ logger.info(f"Uploading {len(documents)} documents to Azure AI Search index: {INDEX_NAME}...")
+ try:
+ result = search_client.upload_documents(documents=documents)
+ success_count = sum(1 for r in result if r.succeeded)
+ failed_count = len(result) - success_count
+
+ logger.info(f"Upload completed: {success_count} succeeded, {failed_count} failed")
+
+ if failed_count > 0:
+ for r in result:
+ if not r.succeeded:
+ logger.error(f"Failed to upload document {r.key}: {r.error_message}")
+
+ return success_count
+ except Exception as e:
+ logger.error(f"Failed to upload documents to search: {e}")
+ raise
+
+def main():
+ logger.info("Starting data upload from Cosmos DB to Azure AI Search...")
+ count = upload_documents_to_search()
+ logger.info(f"Data upload completed successfully. {count} documents uploaded.")
+
+if __name__ == "__main__":
+ main()
diff --git a/src/requirements.txt b/src/requirements.txt
new file mode 100644
index 0000000..8f79d8d
--- /dev/null
+++ b/src/requirements.txt
@@ -0,0 +1,8 @@
+requests==2.32.3
+python-dotenv==1.0.1
+pandas>=2.2.2
+azure-cosmos==4.9.0
+azure-identity==1.19.0
+azure-search-documents==11.6.0
+openai==1.54.5
+azure-ai-inference==1.0.0b6
diff --git a/src/verify_data.py b/src/verify_data.py
new file mode 100644
index 0000000..e30444e
--- /dev/null
+++ b/src/verify_data.py
@@ -0,0 +1,24 @@
+from azure.cosmos import CosmosClient
+from azure.identity import DefaultAzureCredential
+import os
+from dotenv import load_dotenv
+import json
+
+load_dotenv()
+
+credential = DefaultAzureCredential()
+client = CosmosClient(os.environ['COSMOS_DB_ENDPOINT'], credential)
+db = client.get_database_client(os.environ['COSMOS_DB_NAME'])
+container = db.get_container_client(os.environ['COSMOS_DB_CONTAINER_NAME'])
+
+# Count total items
+count = list(container.query_items('SELECT VALUE COUNT(1) FROM c', enable_cross_partition_query=True))[0]
+print(f'✓ Total items in Cosmos DB container: {count}')
+
+# Get sample products
+items = list(container.query_items('SELECT TOP 3 c.ProductID, c.ProductName, c.ProductCategory, c.Price FROM c ORDER BY c.ProductID', enable_cross_partition_query=True))
+print(f'\n✓ Sample products:')
+for item in items:
+ print(f" - {item['ProductID']}: {item['ProductName']} ({item['ProductCategory']}) - ${item['Price']}")
+
+print('\n✓ Data successfully loaded into Cosmos DB!')
diff --git a/src/verify_search.py b/src/verify_search.py
new file mode 100644
index 0000000..e11e63e
--- /dev/null
+++ b/src/verify_search.py
@@ -0,0 +1,27 @@
+from azure.search.documents import SearchClient
+from azure.core.credentials import AzureKeyCredential
+from dotenv import load_dotenv
+import os
+
+load_dotenv()
+
+credential = AzureKeyCredential(os.environ['SEARCH_SERVICE_KEY'])
+client = SearchClient(
+ endpoint=os.environ['SEARCH_SERVICE_ENDPOINT'],
+ index_name=os.environ['SEARCH_INDEX_NAME'],
+ credential=credential
+)
+
+# Count documents
+results = client.search(search_text='*', include_total_count=True)
+total_count = results.get_count()
+print(f'✓ Total documents in Azure AI Search index: {total_count}')
+
+# Show sample products
+print(f'\n✓ Sample products:')
+for i, doc in enumerate(results):
+ print(f" - {doc['ProductID']}: {doc['ProductName']} ({doc['ProductCategory']}) - ${doc['ProductPrice']}")
+ if i >= 2:
+ break
+
+print('\n✓ Data successfully loaded into Azure AI Search!')
diff --git a/terraform-infrastructure/README.md b/terraform-infrastructure/README.md
index cb38b65..39c766e 100644
--- a/terraform-infrastructure/README.md
+++ b/terraform-infrastructure/README.md
@@ -119,7 +119,7 @@ graph TD;
-

-
Refresh Date: 2025-11-22
+

+
Refresh Date: 2025-11-24
diff --git a/terraform-infrastructure/main.tf b/terraform-infrastructure/main.tf
index ece0b39..652b2f0 100644
--- a/terraform-infrastructure/main.tf
+++ b/terraform-infrastructure/main.tf
@@ -15,19 +15,20 @@ resource "random_id" "suffix" {
locals {
# Use provided user_principal_id or default to current Azure CLI user
- principal_id = var.user_principal_id != null ? var.user_principal_id : data.azurerm_client_config.current.object_id
- suffix = substr(random_id.suffix.hex, 0, 8)
- cosmos_account_name = "${var.name_prefix}${local.suffix}cosmosdb"
- cosmos_db_name = "zava"
- storage_account = lower(replace("${var.name_prefix}${local.suffix}sa", "-", ""))
- ai_foundry_name = "aif-${local.suffix}" # custom subdomain
- ai_project_name = "proj-${local.suffix}"
- search_service_name = "${var.name_prefix}-${local.suffix}-search"
- app_service_plan = "${var.name_prefix}-${local.suffix}-asp"
- log_analytics_name = "${var.name_prefix}-${local.suffix}-la"
- app_insights_name = "${var.name_prefix}-${local.suffix}-ai"
- registry_name = lower(replace("${var.name_prefix}${local.suffix}cosureg", "-", ""))
- web_app_name = "${var.name_prefix}-${local.suffix}-app"
+ principal_id = var.user_principal_id != null ? var.user_principal_id : data.azurerm_client_config.current.object_id
+ suffix = substr(random_id.suffix.hex, 0, 8)
+ cosmos_account_name = "${var.name_prefix}${local.suffix}cosmosdb"
+ cosmos_db_name = "zava"
+ storage_account = lower(replace("${var.name_prefix}${local.suffix}sa", "-", ""))
+ ai_foundry_name = "aif-${local.suffix}" # custom subdomain
+ ai_project_name = "proj-${local.suffix}"
+ search_service_name = "${var.name_prefix}-${local.suffix}-search"
+ app_service_plan = "${var.name_prefix}-${local.suffix}-asp"
+ log_analytics_name = "${var.name_prefix}-${local.suffix}-la"
+ app_insights_name = "${var.name_prefix}-${local.suffix}-ai"
+ registry_name = lower(replace("${var.name_prefix}${local.suffix}cosureg", "-", ""))
+ web_app_name = "${var.name_prefix}-${local.suffix}-app"
+ cosmos_connection_auth_type = var.enable_cosmos_local_auth ? "AccountKey" : "AAD"
}
resource "azurerm_cosmosdb_account" "cosmos" {
@@ -45,9 +46,9 @@ resource "azurerm_cosmosdb_account" "cosmos" {
location = var.location
failover_priority = 0
}
- free_tier_enabled = false
- analytical_storage_enabled = false
- local_authentication_disabled = !var.enable_cosmos_local_auth
+ free_tier_enabled = false
+ analytical_storage_enabled = false
+ local_authentication_disabled = !var.enable_cosmos_local_auth
}
resource "azurerm_cosmosdb_sql_database" "cosmosdb" {
@@ -57,28 +58,37 @@ resource "azurerm_cosmosdb_sql_database" "cosmosdb" {
throughput = 400
}
+resource "azurerm_cosmosdb_sql_container" "products" {
+ name = "product_catalog"
+ resource_group_name = azurerm_resource_group.rg.name
+ account_name = azurerm_cosmosdb_account.cosmos.name
+ database_name = azurerm_cosmosdb_sql_database.cosmosdb.name
+ partition_key_paths = ["/ProductID"]
+ throughput = 400
+}
+
# Storage account using AzAPI to bypass policy restrictions
resource "azapi_resource" "storage" {
type = "Microsoft.Storage/storageAccounts@2023-01-01"
name = local.storage_account
location = var.location
parent_id = azurerm_resource_group.rg.id
-
+
body = jsonencode({
sku = {
name = "Standard_LRS"
}
kind = "StorageV2"
properties = {
- accessTier = "Hot"
- allowSharedKeyAccess = true
+ accessTier = "Hot"
+ allowSharedKeyAccess = true
defaultToOAuthAuthentication = false
- allowBlobPublicAccess = false
- minimumTlsVersion = "TLS1_2"
- supportsHttpsTrafficOnly = true
+ allowBlobPublicAccess = false
+ minimumTlsVersion = "TLS1_2"
+ supportsHttpsTrafficOnly = true
}
})
-
+
identity {
type = "SystemAssigned"
}
@@ -86,10 +96,10 @@ resource "azapi_resource" "storage" {
# AI Foundry account (preview) using AzAPI provider.
resource "azapi_resource" "ai_foundry" {
- type = "Microsoft.CognitiveServices/accounts@2025-06-01"
- name = local.ai_foundry_name
- location = var.location
- parent_id = azurerm_resource_group.rg.id
+ type = "Microsoft.CognitiveServices/accounts@2025-06-01"
+ name = local.ai_foundry_name
+ location = var.location
+ parent_id = azurerm_resource_group.rg.id
schema_validation_enabled = false
identity { type = "SystemAssigned" }
body = jsonencode({
@@ -104,13 +114,13 @@ resource "azapi_resource" "ai_foundry" {
}
resource "azapi_resource" "ai_project" {
- type = "Microsoft.CognitiveServices/accounts/projects@2025-06-01"
- name = local.ai_project_name
- location = var.location
- parent_id = azapi_resource.ai_foundry.id
+ type = "Microsoft.CognitiveServices/accounts/projects@2025-06-01"
+ name = local.ai_project_name
+ location = var.location
+ parent_id = azapi_resource.ai_foundry.id
schema_validation_enabled = false
identity { type = "SystemAssigned" }
- body = jsonencode({ properties = {} })
+ body = jsonencode({ properties = {} })
depends_on = [azapi_resource.ai_foundry]
}
@@ -157,7 +167,7 @@ resource "azurerm_container_registry_webhook" "webhook" {
status = "enabled"
scope = "${local.suffix}/techworkshopl300/zava:latest"
actions = ["push"]
-
+
custom_headers = {
"Content-Type" = "application/json"
}
@@ -185,7 +195,7 @@ resource "azurerm_linux_web_app" "app" {
docker_image_name = "${local.registry_name}.azurecr.io/${local.suffix}/techworkshopl300/zava:latest"
docker_registry_url = "https://${local.registry_name}.azurecr.io"
}
- http2_enabled = true
+ http2_enabled = true
minimum_tls_version = "1.2"
}
@@ -297,7 +307,7 @@ resource "azurerm_role_assignment" "storage_blob_data_contributor_project" {
# Azure AI model deployments automation
resource "null_resource" "ai_model_deployments" {
count = var.enable_ai_automation ? 1 : 0
-
+
depends_on = [
azapi_resource.ai_project,
azapi_resource.ai_foundry,
@@ -305,7 +315,7 @@ resource "null_resource" "ai_model_deployments" {
]
provisioner "local-exec" {
- command = <<-EOT
+ command = <<-EOT
# Create AI model deployments
Write-Host "Creating Azure AI model deployments..."
@@ -395,58 +405,221 @@ resource "null_resource" "ai_model_deployments" {
}
}
-# Connect resources to Azure AI Foundry project
-resource "null_resource" "ai_project_connections" {
+# Connection helper actions for Foundry resources
+data "azapi_resource_action" "storage_list_keys" {
+ count = var.enable_ai_automation ? 1 : 0
+ type = "Microsoft.Storage/storageAccounts@2023-01-01"
+ resource_id = azapi_resource.storage.id
+ action = "listKeys"
+ response_export_values = ["keys"]
+ body = jsonencode({})
+ depends_on = [azapi_resource.storage]
+}
+
+data "azapi_resource_action" "search_admin_keys" {
+ count = var.enable_ai_automation ? 1 : 0
+ type = "Microsoft.Search/searchServices@2025-02-01-preview"
+ resource_id = azurerm_search_service.search.id
+ action = "listAdminKeys"
+ response_export_values = ["primaryKey"]
+ body = jsonencode({})
+ depends_on = [azurerm_search_service.search]
+}
+
+data "azapi_resource_action" "cosmos_keys" {
+ count = (var.enable_ai_automation && var.enable_cosmos_local_auth) ? 1 : 0
+ type = "Microsoft.DocumentDB/databaseAccounts@2024-11-15"
+ resource_id = azurerm_cosmosdb_account.cosmos.id
+ action = "listKeys"
+ response_export_values = ["primaryMasterKey"]
+ body = jsonencode({})
+ depends_on = [azurerm_cosmosdb_account.cosmos]
+}
+
+# Connect resources to Azure AI Foundry project using ARM templates
+resource "azapi_resource" "storage_connection" {
count = var.enable_ai_automation ? 1 : 0
-
+
+ type = "Microsoft.CognitiveServices/accounts/connections@2025-04-01-preview"
+ name = "${local.ai_foundry_name}-storage"
+ parent_id = azapi_resource.ai_foundry.id
+ schema_validation_enabled = false
+
+ depends_on = [
+ azapi_resource.storage,
+ azapi_resource.ai_foundry
+ ]
+
+ body = jsonencode({
+ properties = {
+ category = "AzureStorageAccount"
+ target = "https://${local.storage_account}.blob.core.windows.net"
+ authType = "AccountKey"
+ isSharedToAll = true
+ credentials = {
+ key = jsondecode(data.azapi_resource_action.storage_list_keys[0].output).keys[0].value
+ }
+ metadata = {
+ ApiType = "Azure"
+ ResourceId = azapi_resource.storage.id
+ }
+ }
+ })
+}
+
+resource "azapi_resource" "app_insights_connection" {
+ count = var.enable_ai_automation ? 1 : 0
+
+ type = "Microsoft.CognitiveServices/accounts/connections@2025-04-01-preview"
+ name = "${local.ai_foundry_name}-appinsights"
+ parent_id = azapi_resource.ai_foundry.id
+ schema_validation_enabled = false
+
depends_on = [
- null_resource.ai_model_deployments,
azurerm_application_insights.appinsights,
- azapi_resource.storage
+ azapi_resource.ai_foundry
]
- provisioner "local-exec" {
- command = <<-EOT
- Write-Host "Verifying Azure AI Foundry project configuration..."
-
- # Check if Azure ML extension is installed
- $mlExtension = az extension list --query "[?name=='ml'].name" --output tsv
- if (-not $mlExtension) {
- Write-Host "Installing Azure ML extension..."
- az extension add --name ml
+ body = jsonencode({
+ properties = {
+ category = "AppInsights"
+ target = azurerm_application_insights.appinsights.id
+ authType = "ApiKey"
+ isSharedToAll = true
+ credentials = {
+ key = azurerm_application_insights.appinsights.connection_string
}
-
- # Set the AI project as the default workspace for future ML operations
- az config set defaults.workspace="${local.ai_project_name}"
- az config set defaults.group="${azurerm_resource_group.rg.name}"
-
- Write-Host "Azure AI project configuration completed successfully."
- Write-Host "Project Name: ${local.ai_project_name}"
+ metadata = {
+ ApiType = "Azure"
+ ResourceId = azurerm_application_insights.appinsights.id
+ }
+ }
+ })
+}
+
+resource "azapi_resource" "search_connection" {
+ count = var.enable_ai_automation ? 1 : 0
+
+ type = "Microsoft.CognitiveServices/accounts/connections@2025-04-01-preview"
+ name = "${local.ai_foundry_name}-aisearch"
+ parent_id = azapi_resource.ai_foundry.id
+ schema_validation_enabled = false
+
+ depends_on = [
+ azurerm_search_service.search,
+ azapi_resource.ai_foundry
+ ]
+
+ body = jsonencode({
+ properties = {
+ category = "CognitiveSearch"
+ target = "https://${local.search_service_name}.search.windows.net"
+ authType = "ApiKey"
+ isSharedToAll = true
+ credentials = {
+ key = jsondecode(data.azapi_resource_action.search_admin_keys[0].output).primaryKey
+ }
+ metadata = {
+ ApiType = "Azure"
+ ResourceId = azurerm_search_service.search.id
+ location = azurerm_search_service.search.location
+ }
+ }
+ })
+}
+
+resource "azapi_resource" "cosmos_connection" {
+ count = var.enable_ai_automation ? 1 : 0
+
+ type = "Microsoft.CognitiveServices/accounts/connections@2025-04-01-preview"
+ name = "${local.ai_foundry_name}-cosmosdb"
+ parent_id = azapi_resource.ai_foundry.id
+ schema_validation_enabled = false
+
+ depends_on = [
+ azurerm_cosmosdb_account.cosmos,
+ azapi_resource.ai_foundry
+ ]
+
+ body = jsonencode({
+ properties = merge({
+ category = "CosmosDb"
+ target = azurerm_cosmosdb_account.cosmos.endpoint
+ authType = local.cosmos_connection_auth_type
+ isSharedToAll = true
+ metadata = {
+ ApiType = "Azure"
+ ResourceId = azurerm_cosmosdb_account.cosmos.id
+ location = azurerm_cosmosdb_account.cosmos.location
+ }
+ }, var.enable_cosmos_local_auth ? {
+ credentials = {
+ key = jsondecode(data.azapi_resource_action.cosmos_keys[0].output).primaryMasterKey
+ }
+ } : {})
+ })
+}
+
+# Verification script for connections
+resource "null_resource" "verify_connections" {
+ count = var.enable_ai_automation ? 1 : 0
+
+ depends_on = [
+ azapi_resource.storage_connection,
+ azapi_resource.app_insights_connection,
+ azapi_resource.search_connection,
+ azapi_resource.cosmos_connection
+ ]
+
+ provisioner "local-exec" {
+ command = <<-EOT
+ Write-Host "=== Verifying Microsoft Foundry Project Connections ==="
+ Write-Host ""
+ Write-Host "Project: ${local.ai_project_name}"
Write-Host "AI Foundry: ${local.ai_foundry_name}"
Write-Host "Resource Group: ${azurerm_resource_group.rg.name}"
+ Write-Host ""
+
+ # List connections using Azure CLI
+ Write-Host "Checking connections via Azure CLI..."
+ az rest --method GET --url "https://management.azure.com/subscriptions/${data.azurerm_client_config.current.subscription_id}/resourceGroups/${azurerm_resource_group.rg.name}/providers/Microsoft.CognitiveServices/accounts/${local.ai_foundry_name}/connections?api-version=2025-06-01" --query "value[].{Name:name,Type:properties.connectionType,Target:properties.target}" --output table
+
+ Write-Host ""
+ Write-Host "✓ Microsoft Foundry project connections verification completed!"
+ Write-Host ""
+ Write-Host "Available connections:"
+ Write-Host " - Storage Account: ${local.storage_account}"
+ Write-Host " - Application Insights: ${local.app_insights_name}"
+ Write-Host " - Azure AI Search: ${local.search_service_name}"
+ Write-Host " - Cosmos DB: ${local.cosmos_account_name}"
+ Write-Host ""
+ Write-Host "View in Azure Portal:"
+ Write-Host " https://ai.azure.com/resource/overview/${local.ai_foundry_name}"
+ Write-Host " Navigate to Management center > Connected resources"
EOT
interpreter = ["PowerShell", "-Command"]
}
triggers = {
- storage_id = azapi_resource.storage.id
- app_insights_id = azurerm_application_insights.appinsights.id
- ai_project_id = azapi_resource.ai_project.id
+ storage_conn = var.enable_ai_automation ? azapi_resource.storage_connection[0].id : ""
+ app_insights_conn = var.enable_ai_automation ? azapi_resource.app_insights_connection[0].id : ""
+ search_conn = var.enable_ai_automation ? azapi_resource.search_connection[0].id : ""
+ cosmos_conn = var.enable_ai_automation ? azapi_resource.cosmos_connection[0].id : ""
}
}
# Create .env file with all necessary configuration
resource "null_resource" "create_env_file" {
count = var.enable_ai_automation ? 1 : 0
-
+
depends_on = [
- null_resource.ai_project_connections,
+ null_resource.verify_connections,
azurerm_cosmosdb_account.cosmos,
azurerm_search_service.search
]
provisioner "local-exec" {
- command = <<-EOT
+ command = <<-EOT
Write-Host "Creating .env file with Azure resource configuration..."
# Create src directory if it doesn't exist
@@ -509,7 +682,9 @@ AZURE_OPENAI_API_VERSION=2024-02-01
COSMOS_DB_ENDPOINT=${azurerm_cosmosdb_account.cosmos.endpoint}
COSMOS_DB_KEY=$cosmosKey
COSMOS_DB_NAME=${local.cosmos_db_name}
-COSMOS_DB_CONTAINER_NAME=products
+COSMOS_DB_CONTAINER_NAME=product_catalog
+COSMOS_SKIP_IF_EXISTS=true
+COSMOS_FORCE_INGEST=false
# Azure AI Search Configuration
SEARCH_SERVICE_ENDPOINT=https://${local.search_service_name}.search.windows.net
@@ -546,7 +721,9 @@ AZURE_OPENAI_API_VERSION=2024-02-01
COSMOS_DB_ENDPOINT=${azurerm_cosmosdb_account.cosmos.endpoint}
COSMOS_DB_KEY=$cosmosKey
COSMOS_DB_NAME=${local.cosmos_db_name}
-COSMOS_DB_CONTAINER_NAME=products
+COSMOS_DB_CONTAINER_NAME=product_catalog
+COSMOS_SKIP_IF_EXISTS=true
+COSMOS_FORCE_INGEST=false
# Azure AI Search Configuration
SEARCH_SERVICE_ENDPOINT=https://${local.search_service_name}.search.windows.net
@@ -589,11 +766,118 @@ AZURE_LOCATION=${var.location}
triggers = {
# Trigger recreation when any of these resources change
- ai_foundry_id = azapi_resource.ai_foundry.id
- ai_project_id = azapi_resource.ai_project.id
- cosmos_id = azurerm_cosmosdb_account.cosmos.id
- search_id = azurerm_search_service.search.id
- storage_id = azapi_resource.storage.id
+ ai_foundry_id = azapi_resource.ai_foundry.id
+ ai_project_id = azapi_resource.ai_project.id
+ cosmos_id = azurerm_cosmosdb_account.cosmos.id
+ search_id = azurerm_search_service.search.id
+ storage_id = azapi_resource.storage.id
app_insights_id = azurerm_application_insights.appinsights.id
}
}
+
+# Data pipeline automation - runs after .env file is created
+resource "null_resource" "data_pipeline" {
+ count = var.enable_data_pipeline ? 1 : 0
+
+ depends_on = [
+ null_resource.create_env_file,
+ azurerm_cosmosdb_sql_database.cosmosdb,
+ azurerm_cosmosdb_sql_container.products
+ ]
+
+ provisioner "local-exec" {
+ command = <<-EOT
+ Write-Host "Starting data pipeline automation..."
+
+ # Navigate to src directory
+ cd ../src
+
+ # Check if Python is available
+ try {
+ $pythonCmd = (Get-Command python -ErrorAction Stop).Source
+ Write-Host "Found Python at: $pythonCmd"
+ } catch {
+ Write-Host "ERROR: Python is not installed or not in PATH"
+ Write-Host "Please install Python 3.8+ from https://www.python.org/downloads/"
+ exit 1
+ }
+
+ # Create virtual environment
+ Write-Host "Creating Python virtual environment..."
+ if (Test-Path "venv") {
+ Write-Host "Virtual environment already exists, removing..."
+ Remove-Item -Recurse -Force venv
+ }
+ python -m venv venv
+
+ # Install dependencies directly to venv without activation
+ Write-Host "Installing Python dependencies (with retry)..."
+ $pythonExe = "venv\Scripts\python.exe"
+ $pipExe = "venv\Scripts\pip.exe"
+
+ if (Test-Path $pythonExe) {
+ & $pythonExe -m pip install --upgrade pip
+ $maxAttempts = 3
+ for ($i = 1; $i -le $maxAttempts; $i++) {
+ Write-Host "pip install attempt $i..."
+ & $pipExe install -r requirements.txt
+ if ($LASTEXITCODE -eq 0) {
+ Write-Host "Dependencies installed successfully on attempt $i"
+ break
+ } else {
+ Write-Host "pip install failed (exit $LASTEXITCODE)."
+ if ($i -lt $maxAttempts) {
+ Write-Host "Retrying after short backoff..."
+ Start-Sleep -Seconds 5
+ } else {
+ Write-Host "ERROR: Dependencies failed after $maxAttempts attempts"
+ exit 1
+ }
+ }
+ }
+
+ Write-Host "Python environment ready"
+ Write-Host ""
+
+ # Check if CSV data file exists
+ $csvFile = "data/updated_product_catalog(in).csv"
+ if (!(Test-Path $csvFile)) {
+ Write-Host "WARNING: CSV data file not found at $csvFile"
+ Write-Host "Please download the product catalog data or place it in the data directory"
+ Write-Host "Skipping data import for now"
+ } else {
+ Write-Host "Step 1: Importing data to Cosmos DB (skip logic flags: COSMOS_SKIP_IF_EXISTS / COSMOS_FORCE_INGEST)..."
+ & $pythonExe pipelines/ingest_to_cosmos.py
+
+ Write-Host ""
+ Write-Host "Step 2: Creating Azure AI Search index..."
+ & $pythonExe pipelines/create_search_index.py
+
+ Write-Host ""
+ Write-Host "Step 3: Uploading data from Cosmos DB to Azure AI Search..."
+ & $pythonExe pipelines/upload_to_search.py
+
+ Write-Host ""
+ Write-Host "Data pipeline completed successfully!"
+ Write-Host "- Cosmos DB container created and populated"
+ Write-Host "- Azure AI Search index created"
+ Write-Host "- Data imported to search index"
+ }
+ } else {
+ Write-Host "ERROR: Failed to create virtual environment"
+ exit 1
+ }
+
+ Write-Host ""
+ Write-Host "Data pipeline automation completed"
+ EOT
+ interpreter = ["PowerShell", "-Command"]
+ working_dir = path.module
+ }
+
+ triggers = {
+ cosmos_db_id = azurerm_cosmosdb_sql_database.cosmosdb.id
+ search_id = azurerm_search_service.search.id
+ env_file_id = null_resource.create_env_file[0].id
+ }
+}
diff --git a/terraform-infrastructure/terraform.tfvars b/terraform-infrastructure/terraform.tfvars
index 0953c4e..2cc89f4 100644
--- a/terraform-infrastructure/terraform.tfvars
+++ b/terraform-infrastructure/terraform.tfvars
@@ -1,4 +1,4 @@
-resource_group_name = "RG-AI-retailw3"
+resource_group_name = "RG-AI-retailbrw5"
location = "westus3"
name_prefix = "zava"
# user_principal_id is optional - defaults to current Azure CLI user (az login)
diff --git a/terraform-infrastructure/variables.tf b/terraform-infrastructure/variables.tf
index bda5d3d..f69a72e 100644
--- a/terraform-infrastructure/variables.tf
+++ b/terraform-infrastructure/variables.tf
@@ -32,3 +32,10 @@ variable "enable_ai_automation" {
description = "Whether to run Azure AI Foundry automation steps (model deployments, connections, .env creation)"
default = true
}
+
+variable "enable_data_pipeline" {
+ type = bool
+ description = "Whether to run data pipeline automation (requires Python and data files)"
+ default = true
+}
+