From 20f3440b5807379aba8ad528c44f3a0d672d6b35 Mon Sep 17 00:00:00 2001 From: Jacques Raphanel Date: Thu, 4 Jun 2026 18:36:35 +0000 Subject: [PATCH 1/5] fix: define dynamic default pg values --- .../infrastructure/emit/postgres/emitter.py | 85 ++++++++++++++++++- .../emit/test_postgres_emitter.py | 49 +++++++++++ 2 files changed, 133 insertions(+), 1 deletion(-) diff --git a/db2sql/infrastructure/emit/postgres/emitter.py b/db2sql/infrastructure/emit/postgres/emitter.py index b7d04f5..09c450d 100644 --- a/db2sql/infrastructure/emit/postgres/emitter.py +++ b/db2sql/infrastructure/emit/postgres/emitter.py @@ -2,12 +2,47 @@ from __future__ import annotations +import re from typing import Any, Dict, Iterable, Mapping, Optional from db2sql.application.ports import OutputSink from db2sql.domain.model import Column, Database, Schema, Table from db2sql.domain.policy import drop_order, normalize_identifier, topological_order +# Source-side scalar functions that have no PG equivalent under the same name. +# Keys are lowercased, parens stripped; values are PG expressions to substitute. +# Matched regardless of whether the source wrote them with empty parens +# (``getdate()``) or as bare keywords (Oracle ``SYSDATE``). +_DEFAULT_FUNCTION_MAP: Dict[str, str] = { + # MSSQL date/time + "getdate": "now()", + "sysdatetime": "LOCALTIMESTAMP", + "getutcdate": "(now() AT TIME ZONE 'utc')", + "sysutcdatetime": "(now() AT TIME ZONE 'utc')", + "sysdatetimeoffset": "now()", + # Oracle date/time (bare keywords, no parens) + "sysdate": "now()", + "systimestamp": "now()", + # MSSQL / Oracle / MySQL uuid generators + "newid": "gen_random_uuid()", + "newsequentialid": "gen_random_uuid()", + "sys_guid": "gen_random_uuid()", + "uuid": "gen_random_uuid()", + # Session info — MSSQL / Oracle + "suser_sname": "CURRENT_USER", + "system_user": "CURRENT_USER", + "user_name": "CURRENT_USER", + "user": "CURRENT_USER", + "db_name": "current_database()", +} + +_UNICODE_STRING_RE = re.compile(r"(?i)\bN'") +_FUNCTION_CALL_RE = re.compile( + r"^\s*([A-Za-z_][A-Za-z0-9_]*)(?:\s*\(\s*\))?\s*$" +) +# MySQL ``bit`` default literal — ``b'0'`` / ``b'1'``. +_MYSQL_BIT_RE = re.compile(r"^(?i:b)'([01]+)'$") + class PostgresSqlEmitter: """Produce PostgreSQL DDL+DML for a collected :class:`Database`.""" @@ -118,9 +153,57 @@ def column_definition(self, column: Column) -> str: if not column.nullable and not column.identity: parts.append("NOT NULL") if column.default is not None and not column.identity: - parts.append(f"DEFAULT {column.default}") + parts.append(f"DEFAULT {self._translate_default(column.default, target_type)}") return " ".join(parts) + @staticmethod + def _strip_wrapping_parens(expr: str) -> str: + # MSSQL wraps every default in at least one extra pair of parens + # (``((0))``, ``(getdate())``, ``(N'foo')``). Peel only when the outer + # pair encloses the whole expression — leave ``(1)+(2)`` alone. + expr = expr.strip() + while expr.startswith("(") and expr.endswith(")"): + depth = 0 + balanced = True + for index, ch in enumerate(expr): + if ch == "(": + depth += 1 + elif ch == ")": + depth -= 1 + if depth == 0 and index != len(expr) - 1: + balanced = False + break + if not balanced: + break + expr = expr[1:-1].strip() + return expr + + def _translate_default(self, raw: str, target_type: str) -> str: + expr = self._strip_wrapping_parens(raw) + + # ``N'foo'`` → ``'foo'``. PG has no N-prefixed string literal; strings + # are already unicode-capable. + expr = _UNICODE_STRING_RE.sub("'", expr) + + # ``0`` / ``1`` → ``FALSE`` / ``TRUE`` when the column maps to boolean + # (MSSQL ``bit`` becomes PG ``boolean`` and PG won't coerce int→bool + # inside a DEFAULT clause). MySQL ``bit`` defaults arrive as ``b'1'``. + if target_type == "boolean": + if expr in ("0", "1"): + return "FALSE" if expr == "0" else "TRUE" + bit_match = _MYSQL_BIT_RE.match(expr) + if bit_match: + return "FALSE" if int(bit_match.group(1), 2) == 0 else "TRUE" + + match = _FUNCTION_CALL_RE.match(expr) + if match: + fn = match.group(1).lower() + replacement = _DEFAULT_FUNCTION_MAP.get(fn) + if replacement is not None: + return replacement + + return expr + # ---- emit ------------------------------------------------------------- def emit_prologue(self, sink: OutputSink) -> None: diff --git a/tests/unit/infrastructure/emit/test_postgres_emitter.py b/tests/unit/infrastructure/emit/test_postgres_emitter.py index ea5db65..b7db1f7 100644 --- a/tests/unit/infrastructure/emit/test_postgres_emitter.py +++ b/tests/unit/infrastructure/emit/test_postgres_emitter.py @@ -95,6 +95,55 @@ def test_not_null_default(self) -> None: col = Column(name="email", type="text", nullable=False, default="''") assert emitter.column_definition(col) == '"email" text NOT NULL DEFAULT \'\'' + @pytest.mark.parametrize( + "source_type, raw_default, expected", + [ + # MSSQL date/time functions translate to PG equivalents. + ("datetime", "(getdate())", "now()"), + ("datetime2", "(GETDATE())", "now()"), + ("datetime2", "(sysdatetime())", "LOCALTIMESTAMP"), + ("datetime2", "(getutcdate())", "(now() AT TIME ZONE 'utc')"), + ("datetime2", "(sysutcdatetime())", "(now() AT TIME ZONE 'utc')"), + # uuid generators. + ("uniqueidentifier", "(newid())", "gen_random_uuid()"), + ("uniqueidentifier", "(newsequentialid())", "gen_random_uuid()"), + # session info. + ("varchar", "(suser_sname())", "CURRENT_USER"), + ("varchar", "(db_name())", "current_database()"), + # ``N'foo'`` unicode literal → plain literal. + ("varchar", "(N'foo')", "'foo'"), + # ``bit`` → boolean coercion of 0/1 literals. + ("bit", "((0))", "FALSE"), + ("bit", "((1))", "TRUE"), + # Double parens around integer literal — peel both. + ("int", "((42))", "42"), + # Already PG-friendly expression: must be left untouched. + ("int", "1 + 1", "1 + 1"), + # Unbalanced wrapping must not be peeled. + ("int", "(1)+(2)", "(1)+(2)"), + # Oracle bare keywords (no parens). + ("date", "SYSDATE", "now()"), + ("timestamp", "SYSTIMESTAMP", "now()"), + ("varchar", "USER", "CURRENT_USER"), + # Oracle SYS_GUID() and MySQL UUID(). + ("uniqueidentifier", "SYS_GUID()", "gen_random_uuid()"), + ("uniqueidentifier", "uuid()", "gen_random_uuid()"), + # MySQL bit default literal on a boolean-mapped column. + ("bit", "b'0'", "FALSE"), + ("bit", "b'1'", "TRUE"), + # PG-compatible keywords must survive untouched. + ("timestamp", "CURRENT_TIMESTAMP", "CURRENT_TIMESTAMP"), + ("date", "CURRENT_DATE", "CURRENT_DATE"), + ], + ) + def test_default_translation( + self, source_type: str, raw_default: str, expected: str + ) -> None: + emitter = PostgresSqlEmitter(preserve_case=True) + col = Column(name="c", type=source_type, default=raw_default) + definition = emitter.column_definition(col) + assert definition.endswith(f"DEFAULT {expected}"), definition + class TestEmitSchemasAndTables: def _db(self) -> Database: From 52fd397ea5c3b93c19b091c551532fbea54eed95 Mon Sep 17 00:00:00 2001 From: Jacques Raphanel Date: Thu, 4 Jun 2026 18:37:28 +0000 Subject: [PATCH 2/5] fix: define dynamic default mssql values --- db2sql/infrastructure/emit/mssql/emitter.py | 104 +++++++++++++++++- .../infrastructure/emit/test_mssql_emitter.py | 45 ++++++++ 2 files changed, 148 insertions(+), 1 deletion(-) diff --git a/db2sql/infrastructure/emit/mssql/emitter.py b/db2sql/infrastructure/emit/mssql/emitter.py index c24c0f7..557fff3 100644 --- a/db2sql/infrastructure/emit/mssql/emitter.py +++ b/db2sql/infrastructure/emit/mssql/emitter.py @@ -2,6 +2,7 @@ from __future__ import annotations +import re import warnings from typing import Any, Dict, Iterable, Mapping, Optional @@ -9,6 +10,55 @@ from db2sql.domain.model import Column, Database, Schema, Table from db2sql.domain.policy import drop_order, normalize_identifier +# Source-side scalar functions to rewrite when targeting MSSQL. Keys lowercased, +# parens stripped; matched with or without empty parens so Oracle bare keywords +# (``SYSDATE``) and PG / MSSQL function calls are both handled. +_DEFAULT_FUNCTION_MAP: Dict[str, str] = { + # PG date/time + "now": "SYSDATETIME()", + "localtimestamp": "SYSDATETIME()", + "transaction_timestamp": "SYSDATETIME()", + "statement_timestamp": "SYSDATETIME()", + "clock_timestamp": "SYSDATETIME()", + "current_date": "CAST(SYSDATETIME() AS DATE)", + "current_time": "CAST(SYSDATETIME() AS TIME)", + # MySQL date/time (NOW already covered as ``now``); UTC variant. + "utc_timestamp": "SYSUTCDATETIME()", + # Oracle date/time bare keywords + "sysdate": "GETDATE()", + "systimestamp": "SYSDATETIME()", + # uuid generators + "gen_random_uuid": "NEWID()", + "uuid_generate_v4": "NEWID()", + "sys_guid": "NEWID()", # Oracle + "uuid": "NEWID()", # MySQL + # session info — leave CURRENT_USER / SESSION_USER / SYSTEM_USER and + # CURRENT_TIMESTAMP alone, they are ANSI-compatible in MSSQL. + "current_database": "DB_NAME()", + "current_catalog": "DB_NAME()", + "current_schema": "SCHEMA_NAME()", +} + +_FUNCTION_CALL_RE = re.compile( + r"^\s*([A-Za-z_][A-Za-z0-9_]*)(?:\s*\(\s*\))?\s*$" +) +# PG ``literal::type`` cast — only when the whole expression is a single +# literal followed by a single cast. Anything more complex is left as-is. +_PG_CAST_RE = re.compile( + r"""^\s* + (?P + NULL + | TRUE | FALSE + | -?\d+(?:\.\d+)? + | '(?:[^']|'')*' + ) + \s*::\s*[A-Za-z_][A-Za-z_0-9 ]*(?:\([^)]*\))? + \s*$""", + re.IGNORECASE | re.VERBOSE, +) +# MySQL ``bit`` default literal — ``b'0'`` / ``b'1'``. +_MYSQL_BIT_RE = re.compile(r"^(?i:b)'([01]+)'$") + class MssqlSqlEmitter: """Produce Microsoft SQL Server DDL+DML for a collected :class:`Database`.""" @@ -132,9 +182,61 @@ def column_definition(self, column: Column) -> str: if not column.nullable and not column.identity: parts.append("NOT NULL") if column.default is not None and not column.identity: - parts.append(f"DEFAULT {column.default}") + parts.append(f"DEFAULT {self._translate_default(column.default, target_type)}") return " ".join(parts) + @staticmethod + def _strip_pg_cast(expr: str) -> str: + match = _PG_CAST_RE.match(expr) + if match: + return match.group("value") + return expr + + @staticmethod + def _strip_wrapping_parens(expr: str) -> str: + # MSSQL-sourced defaults arrive wrapped — peel only when the outer pair + # encloses the whole expression so we leave ``(1)+(2)`` alone. + expr = expr.strip() + while expr.startswith("(") and expr.endswith(")"): + depth = 0 + balanced = True + for index, ch in enumerate(expr): + if ch == "(": + depth += 1 + elif ch == ")": + depth -= 1 + if depth == 0 and index != len(expr) - 1: + balanced = False + break + if not balanced: + break + expr = expr[1:-1].strip() + return expr + + def _translate_default(self, raw: str, target_type: str) -> str: + expr = self._strip_wrapping_parens(raw) + expr = self._strip_pg_cast(expr) + + # PG / MySQL boolean literals → MSSQL bit literal when target is ``bit``. + if target_type == "bit": + lower = expr.lower() + if lower == "true": + return "1" + if lower == "false": + return "0" + bit_match = _MYSQL_BIT_RE.match(expr) + if bit_match: + return "1" if int(bit_match.group(1), 2) else "0" + + match = _FUNCTION_CALL_RE.match(expr) + if match: + fn = match.group(1).lower() + replacement = _DEFAULT_FUNCTION_MAP.get(fn) + if replacement is not None: + return replacement + + return expr + # ---- emit ------------------------------------------------------------- def emit_prologue(self, sink: OutputSink) -> None: diff --git a/tests/unit/infrastructure/emit/test_mssql_emitter.py b/tests/unit/infrastructure/emit/test_mssql_emitter.py index acb131b..c374bf6 100644 --- a/tests/unit/infrastructure/emit/test_mssql_emitter.py +++ b/tests/unit/infrastructure/emit/test_mssql_emitter.py @@ -109,6 +109,51 @@ def test_not_null_default(self) -> None: col = Column(name="email", type="varchar", char_length=255, nullable=False, default="''") assert emitter.column_definition(col) == "[email] nvarchar(255) NOT NULL DEFAULT ''" + @pytest.mark.parametrize( + "source_type, raw_default, expected", + [ + # PG date/time functions translate to MSSQL equivalents. + ("timestamp", "now()", "SYSDATETIME()"), + ("timestamp", "NOW()", "SYSDATETIME()"), + ("timestamp", "LOCALTIMESTAMP", "SYSDATETIME()"), + ("date", "CURRENT_DATE", "CAST(SYSDATETIME() AS DATE)"), + # Oracle bare keywords. + ("date", "SYSDATE", "GETDATE()"), + ("timestamp", "SYSTIMESTAMP", "SYSDATETIME()"), + # uuid generators from any source. + ("uuid", "gen_random_uuid()", "NEWID()"), + ("uuid", "uuid_generate_v4()", "NEWID()"), + ("uuid", "SYS_GUID()", "NEWID()"), + ("uuid", "uuid()", "NEWID()"), + # session info: current_database → DB_NAME; CURRENT_USER stays. + ("varchar", "current_database()", "DB_NAME()"), + ("varchar", "CURRENT_USER", "CURRENT_USER"), + ("timestamp", "CURRENT_TIMESTAMP", "CURRENT_TIMESTAMP"), + # PG ``literal::type`` cast — strip. + ("varchar", "'foo'::text", "'foo'"), + ("varchar", "'foo'::character varying(20)", "'foo'"), + ("int", "0::integer", "0"), + ("int", "NULL::integer", "NULL"), + # PG / MySQL boolean literals on bit column. + ("boolean", "true", "1"), + ("boolean", "false", "0"), + ("boolean", "FALSE", "0"), + ("bit", "b'1'", "1"), + ("bit", "b'0'", "0"), + # Already MSSQL-friendly: keep as-is. + ("int", "((42))", "42"), + ("int", "1 + 1", "1 + 1"), + ("int", "(1)+(2)", "(1)+(2)"), + ], + ) + def test_default_translation( + self, source_type: str, raw_default: str, expected: str + ) -> None: + emitter = MssqlSqlEmitter(preserve_case=True) + col = Column(name="c", type=source_type, default=raw_default) + definition = emitter.column_definition(col) + assert definition.endswith(f"DEFAULT {expected}"), definition + class TestTransactionAndSchemas: def test_prologue_uses_begin_transaction(self) -> None: From ea89ad788e62a18db12b5e3df0d67eb16e7a9f89 Mon Sep 17 00:00:00 2001 From: Jacques Raphanel Date: Thu, 4 Jun 2026 18:40:15 +0000 Subject: [PATCH 3/5] style: run black --- db2sql/infrastructure/emit/mssql/emitter.py | 4 +--- db2sql/infrastructure/emit/postgres/emitter.py | 4 +--- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/db2sql/infrastructure/emit/mssql/emitter.py b/db2sql/infrastructure/emit/mssql/emitter.py index 557fff3..bae243a 100644 --- a/db2sql/infrastructure/emit/mssql/emitter.py +++ b/db2sql/infrastructure/emit/mssql/emitter.py @@ -39,9 +39,7 @@ "current_schema": "SCHEMA_NAME()", } -_FUNCTION_CALL_RE = re.compile( - r"^\s*([A-Za-z_][A-Za-z0-9_]*)(?:\s*\(\s*\))?\s*$" -) +_FUNCTION_CALL_RE = re.compile(r"^\s*([A-Za-z_][A-Za-z0-9_]*)(?:\s*\(\s*\))?\s*$") # PG ``literal::type`` cast — only when the whole expression is a single # literal followed by a single cast. Anything more complex is left as-is. _PG_CAST_RE = re.compile( diff --git a/db2sql/infrastructure/emit/postgres/emitter.py b/db2sql/infrastructure/emit/postgres/emitter.py index 09c450d..404b8c9 100644 --- a/db2sql/infrastructure/emit/postgres/emitter.py +++ b/db2sql/infrastructure/emit/postgres/emitter.py @@ -37,9 +37,7 @@ } _UNICODE_STRING_RE = re.compile(r"(?i)\bN'") -_FUNCTION_CALL_RE = re.compile( - r"^\s*([A-Za-z_][A-Za-z0-9_]*)(?:\s*\(\s*\))?\s*$" -) +_FUNCTION_CALL_RE = re.compile(r"^\s*([A-Za-z_][A-Za-z0-9_]*)(?:\s*\(\s*\))?\s*$") # MySQL ``bit`` default literal — ``b'0'`` / ``b'1'``. _MYSQL_BIT_RE = re.compile(r"^(?i:b)'([01]+)'$") From 93645f2989d09fc415459fd1b12f45b06e48e51e Mon Sep 17 00:00:00 2001 From: Jacques Raphanel Date: Thu, 4 Jun 2026 19:04:31 +0000 Subject: [PATCH 4/5] style: apply linter --- db2sql/infrastructure/emit/mssql/emitter.py | 3 ++- .../infrastructure/emit/postgres/emitter.py | 3 ++- db2sql/infrastructure/logging/colors.py | 2 +- .../infrastructure/logging/console_logger.py | 2 +- .../output/rotating_file_sink.py | 4 ++- .../persistence/mssql/reader.py | 12 ++++----- .../persistence/mysql/reader.py | 12 ++++----- .../persistence/oracle/reader.py | 6 ++--- .../persistence/postgres/reader.py | 12 ++++----- .../persistence/query_introspection.py | 2 +- .../persistence/sqlite/reader.py | 4 ++- db2sql/infrastructure/plugins/registry.py | 2 +- db2sql/infrastructure/writer/mssql/writer.py | 25 ++++++++--------- .../infrastructure/writer/postgres/writer.py | 25 ++++++++--------- db2sql/interface/cli/runner.py | 27 ++++++------------- db2sql/interface/cli/validate_command.py | 5 ++-- installer/build.py | 2 +- pyproject.toml | 3 +++ 18 files changed, 69 insertions(+), 82 deletions(-) diff --git a/db2sql/infrastructure/emit/mssql/emitter.py b/db2sql/infrastructure/emit/mssql/emitter.py index bae243a..ae46210 100644 --- a/db2sql/infrastructure/emit/mssql/emitter.py +++ b/db2sql/infrastructure/emit/mssql/emitter.py @@ -147,7 +147,8 @@ def _normalize(self, name: str) -> str: def quote_identifier(self, name: str) -> str: normalized = self._normalize(name) - return "[{}]".format(normalized.replace("]", "]]")) + escaped = normalized.replace("]", "]]") + return f"[{escaped}]" def schema_name(self, schema: Schema) -> str: mapped = self._schema_mapping.get(schema.name, schema.name) diff --git a/db2sql/infrastructure/emit/postgres/emitter.py b/db2sql/infrastructure/emit/postgres/emitter.py index 404b8c9..bb66f9a 100644 --- a/db2sql/infrastructure/emit/postgres/emitter.py +++ b/db2sql/infrastructure/emit/postgres/emitter.py @@ -123,7 +123,8 @@ def _normalize(self, name: str) -> str: def quote_identifier(self, name: str) -> str: normalized = self._normalize(name) - return '"{}"'.format(normalized.replace('"', '""')) + escaped = normalized.replace('"', '""') + return f'"{escaped}"' def schema_name(self, schema: Schema) -> str: mapped = self._schema_mapping.get(schema.name, schema.name) diff --git a/db2sql/infrastructure/logging/colors.py b/db2sql/infrastructure/logging/colors.py index d87f1c6..a89bd63 100644 --- a/db2sql/infrastructure/logging/colors.py +++ b/db2sql/infrastructure/logging/colors.py @@ -32,7 +32,7 @@ def init_colorama(stream: object) -> None: colorama.init() -class Palette: +class Palette: # pylint: disable=too-few-public-methods """Wrapper around colorama colors.""" RED = Fore.RED diff --git a/db2sql/infrastructure/logging/console_logger.py b/db2sql/infrastructure/logging/console_logger.py index 97859ce..def452f 100644 --- a/db2sql/infrastructure/logging/console_logger.py +++ b/db2sql/infrastructure/logging/console_logger.py @@ -66,7 +66,7 @@ def from_verbosity( raise InvalidLogLevel(level_name) from exc stream: IO[str] if log_file: - stream = open( + stream = open( # pylint: disable=consider-using-with log_file, "wt", encoding="utf-8" ) # noqa: SIM115 — owned for process lifetime else: diff --git a/db2sql/infrastructure/output/rotating_file_sink.py b/db2sql/infrastructure/output/rotating_file_sink.py index 42accde..c84d939 100644 --- a/db2sql/infrastructure/output/rotating_file_sink.py +++ b/db2sql/infrastructure/output/rotating_file_sink.py @@ -92,7 +92,9 @@ def _open_next_part(self) -> None: parent = self._current_path.parent if str(parent) and not parent.exists(): parent.mkdir(parents=True, exist_ok=True) - self._current_stream = open(self._current_path, "w", encoding="utf-8") + self._current_stream = open( # pylint: disable=consider-using-with + self._current_path, "w", encoding="utf-8" + ) self._current_size = 0 def _part_path(self, index: int) -> Path: diff --git a/db2sql/infrastructure/persistence/mssql/reader.py b/db2sql/infrastructure/persistence/mssql/reader.py index b43056a..a4659cb 100644 --- a/db2sql/infrastructure/persistence/mssql/reader.py +++ b/db2sql/infrastructure/persistence/mssql/reader.py @@ -34,13 +34,11 @@ def _ensure_session(self) -> Session: def _connection_string(self) -> str: server = self._config.server port = f":{server.port}" if server.port else "" - return "mssql+pymssql://{}:{}@{}{}/{}".format( - server.username or "", - server.password or "", - server.hostname or "", - port, - server.dbname or "", - ) + username = server.username or "" + password = server.password or "" + hostname = server.hostname or "" + dbname = server.dbname or "" + return f"mssql+pymssql://{username}:{password}@{hostname}{port}/{dbname}" def collect_metadata(self) -> Database: database = Database(str(self._config.server.dbname or "")) diff --git a/db2sql/infrastructure/persistence/mysql/reader.py b/db2sql/infrastructure/persistence/mysql/reader.py index 015ffc2..04a85c6 100644 --- a/db2sql/infrastructure/persistence/mysql/reader.py +++ b/db2sql/infrastructure/persistence/mysql/reader.py @@ -34,13 +34,11 @@ def _ensure_session(self) -> Session: def _connection_string(self) -> str: server = self._config.server port = f":{server.port}" if server.port else "" - return "mysql+pymysql://{}:{}@{}{}/{}".format( - server.username or "", - server.password or "", - server.hostname or "", - port, - server.dbname or "", - ) + username = server.username or "" + password = server.password or "" + hostname = server.hostname or "" + dbname = server.dbname or "" + return f"mysql+pymysql://{username}:{password}@{hostname}{port}/{dbname}" @property def _database_name(self) -> str: diff --git a/db2sql/infrastructure/persistence/oracle/reader.py b/db2sql/infrastructure/persistence/oracle/reader.py index 0c45736..aa063bd 100644 --- a/db2sql/infrastructure/persistence/oracle/reader.py +++ b/db2sql/infrastructure/persistence/oracle/reader.py @@ -102,7 +102,7 @@ def _connection_string(self) -> str: options = server.options or {} driver = options.get("driver", "oracledb") port = f":{server.port}" if server.port else "" - userinfo = "{}:{}".format(server.username or "", server.password or "") + userinfo = f"{server.username or ''}:{server.password or ''}" host = server.hostname or "" service_name = options.get("service_name") sid = options.get("sid") @@ -150,7 +150,7 @@ def _read_schemas(self, database: Database) -> None: owner = self._schema_filter params: Dict[str, Any] = {} if owner: - query = "SELECT DISTINCT OWNER FROM ALL_TABLES " "WHERE OWNER = :owner ORDER BY OWNER" + query = "SELECT DISTINCT OWNER FROM ALL_TABLES WHERE OWNER = :owner ORDER BY OWNER" params["owner"] = owner else: query = ( @@ -342,7 +342,7 @@ def _read_identity_columns(self, database: Database) -> None: ), params, ) - except Exception: + except Exception: # pylint: disable=broad-exception-caught return for row in rows: table = database.get_table(row.owner, row.table_name) diff --git a/db2sql/infrastructure/persistence/postgres/reader.py b/db2sql/infrastructure/persistence/postgres/reader.py index 3672c23..0e63d31 100644 --- a/db2sql/infrastructure/persistence/postgres/reader.py +++ b/db2sql/infrastructure/persistence/postgres/reader.py @@ -36,13 +36,11 @@ def _ensure_session(self) -> Session: def _connection_string(self) -> str: server = self._config.server port = f":{server.port}" if server.port else "" - return "postgresql+psycopg2://{}:{}@{}{}/{}".format( - server.username or "", - server.password or "", - server.hostname or "", - port, - server.dbname or "", - ) + username = server.username or "" + password = server.password or "" + hostname = server.hostname or "" + dbname = server.dbname or "" + return f"postgresql+psycopg2://{username}:{password}@{hostname}{port}/{dbname}" def collect_metadata(self) -> Database: database = Database(str(self._config.server.dbname or "")) diff --git a/db2sql/infrastructure/persistence/query_introspection.py b/db2sql/infrastructure/persistence/query_introspection.py index 673a49a..c880006 100644 --- a/db2sql/infrastructure/persistence/query_introspection.py +++ b/db2sql/infrastructure/persistence/query_introspection.py @@ -68,7 +68,7 @@ def iter_query_rows(session: Session, query: str, limit: int = -1) -> Iterator[T result: Result[Any] = session.execute(text(query)) try: for index, row in enumerate(result): - if limit and limit > 0 and index >= limit: + if 0 < limit <= index: break yield tuple(row) finally: diff --git a/db2sql/infrastructure/persistence/sqlite/reader.py b/db2sql/infrastructure/persistence/sqlite/reader.py index 3a19fe8..bca9dc1 100644 --- a/db2sql/infrastructure/persistence/sqlite/reader.py +++ b/db2sql/infrastructure/persistence/sqlite/reader.py @@ -124,7 +124,9 @@ def _read_foreign_keys(self, database: Database, table_name: str) -> None: continue column.foreign_key = ForeignKey(self._schema, ref_table, ref_col) - def iter_rows(self, schema: str, table: Table, limit: int = -1) -> Iterator[Tuple[Any, ...]]: + def iter_rows( # pylint: disable=unused-argument + self, schema: str, table: Table, limit: int = -1 + ) -> Iterator[Tuple[Any, ...]]: session = self._ensure_session() columns = ", ".join(f'"{name}"' for name in table.columns) suffix = f" LIMIT {limit}" if limit and limit > 0 else "" diff --git a/db2sql/infrastructure/plugins/registry.py b/db2sql/infrastructure/plugins/registry.py index 256597d..1e04ba6 100644 --- a/db2sql/infrastructure/plugins/registry.py +++ b/db2sql/infrastructure/plugins/registry.py @@ -74,7 +74,7 @@ def _load_entry_points(group: str) -> Dict[str, Any]: eps = metadata.entry_points(group=group) except TypeError: legacy = metadata.entry_points() - eps = legacy.get(group, []) # type: ignore[attr-defined] + eps = legacy.get(group, []) # type: ignore[attr-defined] # pylint: disable=no-member for entry in eps: loaded[entry.name] = entry.load() return loaded diff --git a/db2sql/infrastructure/writer/mssql/writer.py b/db2sql/infrastructure/writer/mssql/writer.py index 3def634..655a48d 100644 --- a/db2sql/infrastructure/writer/mssql/writer.py +++ b/db2sql/infrastructure/writer/mssql/writer.py @@ -130,25 +130,22 @@ def _raw_pymssql_cursor(self) -> Any: def _connection_string(self) -> str: server = self._config.target_server port = f":{server.port}" if server.port else "" - return "mssql+pymssql://{}:{}@{}{}/{}".format( - server.username or "", - server.password or "", - server.hostname or "", - port, - server.dbname or "", - ) + username = server.username or "" + password = server.password or "" + hostname = server.hostname or "" + dbname = server.dbname or "" + return f"mssql+pymssql://{username}:{password}@{hostname}{port}/{dbname}" @property def _connection_string_redacted(self) -> str: server = self._config.target_server port = f":{server.port}" if server.port else "" - return "mssql+pymssql://{}@{}{}/{}".format( - server.username or "", - server.hostname or "", - port, - server.dbname or "", - ) + username = server.username or "" + hostname = server.hostname or "" + dbname = server.dbname or "" + return f"mssql+pymssql://{username}@{hostname}{port}/{dbname}" @staticmethod def _quote_ident(name: str) -> str: - return "[{}]".format(name.replace("]", "]]")) + escaped = name.replace("]", "]]") + return f"[{escaped}]" diff --git a/db2sql/infrastructure/writer/postgres/writer.py b/db2sql/infrastructure/writer/postgres/writer.py index e1364a7..257a7c9 100644 --- a/db2sql/infrastructure/writer/postgres/writer.py +++ b/db2sql/infrastructure/writer/postgres/writer.py @@ -128,28 +128,25 @@ def _raw_psycopg2_cursor(self) -> Any: def _connection_string(self) -> str: server = self._config.target_server port = f":{server.port}" if server.port else "" - return "postgresql+psycopg2://{}:{}@{}{}/{}".format( - server.username or "", - server.password or "", - server.hostname or "", - port, - server.dbname or "", - ) + username = server.username or "" + password = server.password or "" + hostname = server.hostname or "" + dbname = server.dbname or "" + return f"postgresql+psycopg2://{username}:{password}@{hostname}{port}/{dbname}" @property def _connection_string_redacted(self) -> str: server = self._config.target_server port = f":{server.port}" if server.port else "" - return "postgresql+psycopg2://{}@{}{}/{}".format( - server.username or "", - server.hostname or "", - port, - server.dbname or "", - ) + username = server.username or "" + hostname = server.hostname or "" + dbname = server.dbname or "" + return f"postgresql+psycopg2://{username}@{hostname}{port}/{dbname}" @staticmethod def _quote_ident(name: str) -> str: - return '"{}"'.format(name.replace('"', '""')) + escaped = name.replace('"', '""') + return f'"{escaped}"' # Mirrors PostgresSqlEmitter._format_copy_value so the bytes hitting the # server through COPY FROM STDIN are byte-identical to the file dump. diff --git a/db2sql/interface/cli/runner.py b/db2sql/interface/cli/runner.py index c698cab..051d26e 100644 --- a/db2sql/interface/cli/runner.py +++ b/db2sql/interface/cli/runner.py @@ -3,6 +3,7 @@ from __future__ import annotations import getpass +import signal import sys import traceback from typing import Any, List, Optional, Union @@ -147,7 +148,9 @@ def _execute_migrate(self, config: AppConfig, target_driver: Optional[str]) -> N ) use_case.execute() - def exit_code_from(self, exception: Optional[BaseException]) -> ExitCode: + def exit_code_from( # pylint: disable=too-many-return-statements + self, exception: Optional[BaseException] + ) -> ExitCode: logger = self._logger if exception is None: return SUCCESS @@ -159,22 +162,10 @@ def exit_code_from(self, exception: Optional[BaseException]) -> ExitCode: if isinstance(exception, ConfigError): logger.error(exception.message) return ERROR_INVALID_CONFIGURATION - if isinstance(exception, UnknownReaderError): + if isinstance(exception, (UnknownReaderError, UnknownEmitterError, UnknownWriterError)): logger.error(str(exception)) return ERROR_INVALID_CONFIGURATION - if isinstance(exception, UnknownEmitterError): - logger.error(str(exception)) - return ERROR_INVALID_CONFIGURATION - if isinstance(exception, UnknownWriterError): - logger.error(str(exception)) - return ERROR_INVALID_CONFIGURATION - if isinstance(exception, SourceReaderError): - logger.error(exception.message) - return ERROR_GENERAL - if isinstance(exception, TargetWriterError): - logger.error(exception.message) - return ERROR_GENERAL - if isinstance(exception, DomainError): + if isinstance(exception, (SourceReaderError, TargetWriterError, DomainError)): logger.error(exception.message) return ERROR_GENERAL if isinstance(exception, SystemExit): @@ -185,14 +176,12 @@ def exit_code_from(self, exception: Optional[BaseException]) -> ExitCode: logger.error(traceback.format_exc()) try: logger.error(str(exception)) - except Exception: + except Exception: # pylint: disable=broad-exception-caught logger.error(repr(exception)) return ERROR_UNEXPECTED def main(args: Optional[List[str]] = None) -> ExitCode: - import signal - def ctrl_c_handler(_signo: Any, _frame: Any) -> None: print("You pressed Ctrl+C!") sys.exit(2) @@ -216,6 +205,6 @@ def ctrl_break_handler(_signo: Any, _frame: Any) -> None: result = cli.run(args if args is not None else sys.argv[1:]) if result is not None and result != SUCCESS: error = result - except BaseException as exc: + except BaseException as exc: # pylint: disable=broad-exception-caught error = cli.exit_code_from(exc) return error diff --git a/db2sql/interface/cli/validate_command.py b/db2sql/interface/cli/validate_command.py index 8d5f638..ffd242d 100644 --- a/db2sql/interface/cli/validate_command.py +++ b/db2sql/interface/cli/validate_command.py @@ -185,7 +185,8 @@ def _print_plan( limit_str = "all rows" if limit < 0 else f"≤ {limit} rows" count_str = "" if with_counts: - count_str = f", count={_count_rows(reader, schema_name, schema.tables[table_name], limit, logger)}" + count = _count_rows(reader, schema_name, schema.tables[table_name], limit, logger) + count_str = f", count={count}" logger.info(f" - {table_name} (format={fmt}, {limit_str}{count_str})") @@ -206,7 +207,7 @@ def _count_rows( for total, _ in enumerate(reader.iter_rows(schema, table, limit=limit), start=1): pass return str(total) - except Exception as exc: # pragma: no cover - reader-specific failure + except Exception as exc: # pylint: disable=broad-exception-caught # pragma: no cover logger.warning(f"could not count rows for {schema}.{getattr(table, 'name', '?')}: {exc}") return "?" diff --git a/installer/build.py b/installer/build.py index 8267f0c..4e0eaf8 100644 --- a/installer/build.py +++ b/installer/build.py @@ -77,7 +77,7 @@ def _resolve_dist_name(module_name: str) -> Optional[str]: pass try: mapping = importlib.metadata.packages_distributions() - except Exception: # noqa: BLE001 — best effort + except Exception: # noqa: BLE001 # pylint: disable=broad-exception-caught return None dists = mapping.get(module_name) return dists[0] if dists else None diff --git a/pyproject.toml b/pyproject.toml index 6449d5e..c867eff 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -164,10 +164,13 @@ max-return = 10 jobs = 0 # Use auto-detected number of multiple processes to speed up Pylint. disable = [ "missing-module-docstring", + "missing-class-docstring", + "missing-function-docstring", "too-many-branches", "too-many-statements", "duplicate-code", ] +fail-under = 10.0 max-args=15 # Maximum number of arguments for function / method. max-attributes=20 # Maximum number of attributes for a class (see R0902). max-local=25 # Maximum number of local variables From 1560a58eee74c4008e09d4bb52ad4b16bd696137 Mon Sep 17 00:00:00 2001 From: Jacques Raphanel Date: Thu, 4 Jun 2026 19:29:40 +0000 Subject: [PATCH 5/5] feat: setup functional tests for direct migration --- .docker/mssql/init/01-schema.sql | 25 ++++++++++ db2sql/infrastructure/plugins/registry.py | 7 ++- tests/functional/test_mssql_functional.py | 56 +++++++++++++++++++++++ 3 files changed, 86 insertions(+), 2 deletions(-) diff --git a/.docker/mssql/init/01-schema.sql b/.docker/mssql/init/01-schema.sql index 570bf38..84d6040 100644 --- a/.docker/mssql/init/01-schema.sql +++ b/.docker/mssql/init/01-schema.sql @@ -19,6 +19,7 @@ GO IF OBJECT_ID('apptest.book', 'U') IS NOT NULL DROP TABLE apptest.book; IF OBJECT_ID('apptest.author', 'U') IS NOT NULL DROP TABLE apptest.author; IF OBJECT_ID('apptest.type_matrix', 'U') IS NOT NULL DROP TABLE apptest.type_matrix; +IF OBJECT_ID('apptest.default_matrix', 'U') IS NOT NULL DROP TABLE apptest.default_matrix; GO -- Type-coverage table ------------------------------------------------------- @@ -56,6 +57,30 @@ CREATE TABLE apptest.type_matrix ( ); GO +-- Default-value coverage table ------------------------------------------- +-- Every column exercises a DEFAULT expression that the postgres emitter is +-- expected to translate (functions, bare keywords, literals, booleans). +CREATE TABLE apptest.default_matrix ( + id INT IDENTITY(1,1) PRIMARY KEY, + d_getdate DATETIME NOT NULL DEFAULT GETDATE(), + d_sysdatetime DATETIME2 NOT NULL DEFAULT SYSDATETIME(), + d_getutcdate DATETIME NOT NULL DEFAULT GETUTCDATE(), + d_sysutcdatetime DATETIME2 NOT NULL DEFAULT SYSUTCDATETIME(), + d_sysdatetimeoffset DATETIMEOFFSET NOT NULL DEFAULT SYSDATETIMEOFFSET(), + d_current_timestamp DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP, + d_newid UNIQUEIDENTIFIER NOT NULL DEFAULT NEWID(), + d_newsequentialid UNIQUEIDENTIFIER NOT NULL DEFAULT NEWSEQUENTIALID(), + d_suser_sname NVARCHAR(128) NOT NULL DEFAULT SUSER_SNAME(), + d_system_user NVARCHAR(128) NOT NULL DEFAULT SYSTEM_USER, + d_user_name NVARCHAR(128) NOT NULL DEFAULT USER_NAME(), + d_db_name NVARCHAR(128) NOT NULL DEFAULT DB_NAME(), + d_bit_true BIT NOT NULL DEFAULT 1, + d_bit_false BIT NOT NULL DEFAULT 0, + d_int_literal INT NOT NULL DEFAULT 42, + d_string_literal NVARCHAR(32) NOT NULL DEFAULT N'hello' +); +GO + -- Relational mini-fixture (parallels the sqlite fixture in tests/conftest.py) CREATE TABLE apptest.author ( id INT IDENTITY(1,1) PRIMARY KEY, diff --git a/db2sql/infrastructure/plugins/registry.py b/db2sql/infrastructure/plugins/registry.py index 1e04ba6..5cc8241 100644 --- a/db2sql/infrastructure/plugins/registry.py +++ b/db2sql/infrastructure/plugins/registry.py @@ -73,8 +73,11 @@ def _load_entry_points(group: str) -> Dict[str, Any]: try: eps = metadata.entry_points(group=group) except TypeError: - legacy = metadata.entry_points() - eps = legacy.get(group, []) # type: ignore[attr-defined] # pylint: disable=no-member + # Python <3.10 fallback: ``entry_points()`` returned a dict-like keyed + # by group. Stubs differ across versions, so cast to Any to keep mypy + # quiet without a type: ignore that would itself become "unused". + legacy = cast(Any, metadata.entry_points()) + eps = legacy.get(group, []) # pylint: disable=no-member for entry in eps: loaded[entry.name] = entry.load() return loaded diff --git a/tests/functional/test_mssql_functional.py b/tests/functional/test_mssql_functional.py index bf7c5cc..734dfec 100644 --- a/tests/functional/test_mssql_functional.py +++ b/tests/functional/test_mssql_functional.py @@ -12,6 +12,9 @@ target type from ``PostgresSqlEmitter.DEFAULT_TYPE_MAP``. 4. The author/book relationship (PK identity + FK + secondary index) is surfaced correctly. +5. MSSQL DEFAULT expressions on ``default_matrix`` (GETDATE, NEWID, SYSTEM_USER, + bit literals, ...) are translated to the expected PG equivalents + (``now()``, ``gen_random_uuid()``, ``CURRENT_USER``, ``TRUE/FALSE``, ...). """ from __future__ import annotations @@ -120,3 +123,56 @@ def test_mssql_foreign_key_and_index(mssql_metadata) -> None: fk = book.columns["author_id"].foreign_key assert (fk.schema, fk.table, fk.column) == ("apptest", "author", "id") assert any("title" in cols for cols in book.indexes.values()) + + +# --------------------------------------------------------------------------- # +# mssql → pg: DEFAULT-value translation through PostgresSqlEmitter # +# --------------------------------------------------------------------------- # + +# (column_name, expected DEFAULT clause in the rendered PG column definition). +# Order matches .docker/mssql/init/01-schema.sql:default_matrix. +EXPECTED_DEFAULTS = [ + ("d_getdate", "DEFAULT now()"), + ("d_sysdatetime", "DEFAULT LOCALTIMESTAMP"), + ("d_getutcdate", "DEFAULT (now() AT TIME ZONE 'utc')"), + ("d_sysutcdatetime", "DEFAULT (now() AT TIME ZONE 'utc')"), + ("d_sysdatetimeoffset", "DEFAULT now()"), + # MSSQL stores CURRENT_TIMESTAMP as ``(getdate())`` in INFORMATION_SCHEMA, + # so the emitter cannot distinguish it from a real GETDATE() default. + ("d_current_timestamp", "DEFAULT now()"), + ("d_newid", "DEFAULT gen_random_uuid()"), + ("d_newsequentialid", "DEFAULT gen_random_uuid()"), + ("d_suser_sname", "DEFAULT CURRENT_USER"), + ("d_system_user", "DEFAULT CURRENT_USER"), + ("d_user_name", "DEFAULT CURRENT_USER"), + ("d_db_name", "DEFAULT current_database()"), + ("d_bit_true", "DEFAULT TRUE"), + ("d_bit_false", "DEFAULT FALSE"), + ("d_int_literal", "DEFAULT 42"), + ("d_string_literal", "DEFAULT 'hello'"), +] + + +def test_mssql_default_matrix_columns_present(mssql_metadata) -> None: + table = mssql_metadata.schemas["apptest"].get_table("default_matrix") + assert table is not None + for column_name, _ in EXPECTED_DEFAULTS: + column = table.columns.get(column_name) + assert column is not None, f"column {column_name} missing" + assert column.default, f"{column_name}: reader returned empty default" + + +@pytest.mark.parametrize("column_name, expected_clause", EXPECTED_DEFAULTS) +def test_mssql_to_pg_default_value_mapping( + mssql_metadata, column_name: str, expected_clause: str +) -> None: + """MSSQL DEFAULT expressions translate to the expected PG clause.""" + table = mssql_metadata.schemas["apptest"].get_table("default_matrix") + assert table is not None + emitter = PostgresSqlEmitter() + column = table.columns[column_name] + rendered = emitter.column_definition(column) + assert expected_clause in rendered, ( + f"{column_name}: rendered={rendered!r}, expected to contain {expected_clause!r} " + f"(raw default from reader: {column.default!r})" + )