|
| 1 | +# Copyright 2025 Collate |
| 2 | +# Licensed under the Collate Community License, Version 1.0 (the "License"); |
| 3 | +# you may not use this file except in compliance with the License. |
| 4 | +# You may obtain a copy of the License at |
| 5 | +# https://github.com/open-metadata/OpenMetadata/blob/main/ingestion/LICENSE |
| 6 | +# Unless required by applicable law or agreed to in writing, software |
| 7 | +# distributed under the License is distributed on an "AS IS" BASIS, |
| 8 | +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 9 | +# See the License for the specific language governing permissions and |
| 10 | +# limitations under the License. |
| 11 | + |
| 12 | +""" |
| 13 | +Google Drive connection and helpers |
| 14 | +""" |
| 15 | +import traceback |
| 16 | +from functools import partial |
| 17 | +from typing import Optional |
| 18 | + |
| 19 | +from google.auth import default |
| 20 | +from googleapiclient.discovery import Resource, build |
| 21 | + |
| 22 | +from metadata.generated.schema.entity.automations.workflow import ( |
| 23 | + Workflow as AutomationWorkflow, |
| 24 | +) |
| 25 | +from metadata.generated.schema.entity.services.connections.drive.googleDriveConnection import ( |
| 26 | + GoogleDriveConnection, |
| 27 | +) |
| 28 | +from metadata.generated.schema.entity.services.connections.testConnectionResult import ( |
| 29 | + TestConnectionResult, |
| 30 | +) |
| 31 | +from metadata.ingestion.connections.test_connections import ( |
| 32 | + SourceConnectionException, |
| 33 | + test_connection_steps, |
| 34 | +) |
| 35 | +from metadata.ingestion.ometa.ometa_api import OpenMetadata |
| 36 | +from metadata.utils.constants import THREE_MIN |
| 37 | +from metadata.utils.credentials import set_google_credentials |
| 38 | +from metadata.utils.logger import ingestion_logger |
| 39 | + |
| 40 | +logger = ingestion_logger() |
| 41 | + |
| 42 | + |
| 43 | +class GoogleDriveClient: |
| 44 | + """ |
| 45 | + Wrapper around Google Sheets and Drive API clients |
| 46 | + """ |
| 47 | + |
| 48 | + def __init__(self, sheets_service: Resource, drive_service: Resource): |
| 49 | + self.sheets_service = sheets_service |
| 50 | + self.drive_service = drive_service |
| 51 | + |
| 52 | + |
| 53 | +def get_connection(connection: GoogleDriveConnection) -> GoogleDriveClient: |
| 54 | + """ |
| 55 | + Create connection to Google Drive |
| 56 | + """ |
| 57 | + scopes = ( |
| 58 | + connection.scopes |
| 59 | + if hasattr(connection, "scopes") and connection.scopes |
| 60 | + else [ |
| 61 | + "https://www.googleapis.com/auth/spreadsheets.readonly", |
| 62 | + "https://www.googleapis.com/auth/drive.readonly", |
| 63 | + "https://www.googleapis.com/auth/drive.metadata.readonly", |
| 64 | + ] |
| 65 | + ) |
| 66 | + |
| 67 | + # Set Google credentials using the utility function |
| 68 | + set_google_credentials(gcp_credentials=connection.credentials) |
| 69 | + |
| 70 | + # Get default credentials - this will use the credentials set by set_google_credentials |
| 71 | + credentials, _ = default(scopes=scopes) |
| 72 | + |
| 73 | + # Handle impersonation if configured |
| 74 | + if ( |
| 75 | + connection.credentials.gcpImpersonateServiceAccount |
| 76 | + and connection.credentials.gcpImpersonateServiceAccount.impersonateServiceAccount |
| 77 | + ): |
| 78 | + from google.auth import ( # pylint: disable=import-outside-toplevel |
| 79 | + impersonated_credentials, |
| 80 | + ) |
| 81 | + |
| 82 | + credentials = impersonated_credentials.Credentials( |
| 83 | + source_credentials=credentials, |
| 84 | + target_principal=connection.credentials.gcpImpersonateServiceAccount.impersonateServiceAccount, |
| 85 | + target_scopes=scopes, |
| 86 | + lifetime=connection.credentials.gcpImpersonateServiceAccount.lifetime, |
| 87 | + ) |
| 88 | + |
| 89 | + # Build the services |
| 90 | + sheets_service = build("sheets", "v4", credentials=credentials) |
| 91 | + drive_service = build("drive", "v3", credentials=credentials) |
| 92 | + |
| 93 | + return GoogleDriveClient(sheets_service, drive_service) |
| 94 | + |
| 95 | + |
| 96 | +def test_connection( |
| 97 | + metadata: OpenMetadata, |
| 98 | + client: GoogleDriveClient, |
| 99 | + service_connection: GoogleDriveConnection, |
| 100 | + automation_workflow: Optional[AutomationWorkflow] = None, |
| 101 | + timeout_seconds: Optional[int] = THREE_MIN, |
| 102 | +) -> TestConnectionResult: |
| 103 | + """ |
| 104 | + Test connection to Google Drive |
| 105 | + """ |
| 106 | + logger.info("Starting Google Drive test connection") |
| 107 | + |
| 108 | + def check_access(): |
| 109 | + """ |
| 110 | + Check if we can access Google Drive API |
| 111 | + """ |
| 112 | + try: |
| 113 | + # Try to get user info - this will fail if credentials are invalid |
| 114 | + about = client.drive_service.about().get(fields="user").execute() |
| 115 | + user_email = about.get("user", {}).get("emailAddress", "Unknown") |
| 116 | + logger.info(f"Successfully authenticated as: {user_email}") |
| 117 | + except Exception as exc: |
| 118 | + logger.debug(f"Access check error traceback: {traceback.format_exc()}") |
| 119 | + raise SourceConnectionException(f"Failed to access Google Drive API: {exc}") |
| 120 | + |
| 121 | + def get_drive_files(): |
| 122 | + """ |
| 123 | + Test listing drive files |
| 124 | + """ |
| 125 | + try: |
| 126 | + logger.info("Testing Google Drive file listing") |
| 127 | + |
| 128 | + # Query for a small number of files to test access |
| 129 | + query = "trashed=false" |
| 130 | + |
| 131 | + results = ( |
| 132 | + client.drive_service.files() |
| 133 | + .list( |
| 134 | + q=query, |
| 135 | + pageSize=5, |
| 136 | + fields="files(id, name, mimeType)", |
| 137 | + supportsAllDrives=True, |
| 138 | + includeItemsFromAllDrives=True, |
| 139 | + ) |
| 140 | + .execute() |
| 141 | + ) |
| 142 | + |
| 143 | + files = results.get("files", []) |
| 144 | + logger.info(f"Found {len(files)} files in Drive (sample)") |
| 145 | + |
| 146 | + # Also test for shared drives |
| 147 | + logger.info("Testing shared drive access") |
| 148 | + try: |
| 149 | + shared_results = ( |
| 150 | + client.drive_service.drives() |
| 151 | + .list(pageSize=5, fields="drives(id, name)") |
| 152 | + .execute() |
| 153 | + ) |
| 154 | + shared_drives = shared_results.get("drives", []) |
| 155 | + logger.info(f"Found {len(shared_drives)} shared drives") |
| 156 | + for drive in shared_drives: |
| 157 | + logger.info( |
| 158 | + f"Shared drive: {drive.get('name')} (ID: {drive.get('id')})" |
| 159 | + ) |
| 160 | + except Exception as shared_exc: |
| 161 | + logger.warning(f"Could not access shared drives: {shared_exc}") |
| 162 | + |
| 163 | + except Exception as exc: |
| 164 | + logger.debug(f"Drive files test error traceback: {traceback.format_exc()}") |
| 165 | + raise SourceConnectionException(f"Failed to list drive files: {exc}") |
| 166 | + |
| 167 | + def get_spreadsheets(include_sheets: bool = False): |
| 168 | + """ |
| 169 | + Test listing spreadsheets if Google Sheets is included |
| 170 | + """ |
| 171 | + if not include_sheets: |
| 172 | + return |
| 173 | + |
| 174 | + try: |
| 175 | + logger.info("Testing Google Sheets spreadsheet listing") |
| 176 | + |
| 177 | + # Query for Google Sheets files |
| 178 | + query = ( |
| 179 | + "mimeType='application/vnd.google-apps.spreadsheet' and trashed=false" |
| 180 | + ) |
| 181 | + |
| 182 | + results = ( |
| 183 | + client.drive_service.files() |
| 184 | + .list(q=query, pageSize=5, fields="files(id, name)") |
| 185 | + .execute() |
| 186 | + ) |
| 187 | + |
| 188 | + files = results.get("files", []) |
| 189 | + logger.info(f"Found {len(files)} spreadsheets") |
| 190 | + |
| 191 | + except Exception as exc: |
| 192 | + logger.debug(f"Spreadsheet test error traceback: {traceback.format_exc()}") |
| 193 | + raise SourceConnectionException(f"Failed to list spreadsheets: {exc}") |
| 194 | + |
| 195 | + test_fn = { |
| 196 | + "CheckAccess": check_access, |
| 197 | + "GetDriveFiles": get_drive_files, |
| 198 | + "GetSpreadsheets": partial( |
| 199 | + get_spreadsheets, include_sheets=service_connection.includeGoogleSheets |
| 200 | + ), |
| 201 | + } |
| 202 | + |
| 203 | + return test_connection_steps( |
| 204 | + metadata=metadata, |
| 205 | + test_fn=test_fn, |
| 206 | + service_type=service_connection.type.value, |
| 207 | + automation_workflow=automation_workflow, |
| 208 | + timeout_seconds=timeout_seconds, |
| 209 | + ) |
0 commit comments