diff --git a/firestore-bigquery-export/CHANGELOG.md b/firestore-bigquery-export/CHANGELOG.md index 6f6e48a63..38465ee0a 100644 --- a/firestore-bigquery-export/CHANGELOG.md +++ b/firestore-bigquery-export/CHANGELOG.md @@ -1,3 +1,7 @@ +## Version 0.3.2 + +fix: restore acceptance of ISO 8601 date/datetime strings as partition field values, regression introduced in 0.3.0 (#2803) + ## Version 0.3.1 chore: bump dependencies diff --git a/firestore-bigquery-export/extension.yaml b/firestore-bigquery-export/extension.yaml index 0da2359f0..506e4caf5 100644 --- a/firestore-bigquery-export/extension.yaml +++ b/firestore-bigquery-export/extension.yaml @@ -13,7 +13,7 @@ # limitations under the License. name: firestore-bigquery-export -version: 0.3.1 +version: 0.3.2 specVersion: v1beta displayName: Stream Firestore to BigQuery diff --git a/firestore-bigquery-export/firestore-bigquery-change-tracker/src/__tests__/bigquery/partitioning/converter.test.ts b/firestore-bigquery-export/firestore-bigquery-change-tracker/src/__tests__/bigquery/partitioning/converter.test.ts index 19fe6604a..4a77a537a 100644 --- a/firestore-bigquery-export/firestore-bigquery-change-tracker/src/__tests__/bigquery/partitioning/converter.test.ts +++ b/firestore-bigquery-export/firestore-bigquery-change-tracker/src/__tests__/bigquery/partitioning/converter.test.ts @@ -42,9 +42,105 @@ describe("PartitionValueConverter", () => { expect(result).toBeNull(); }); - test("returns null for string", () => { + test("converts ISO 8601 datetime string to BigQuery timestamp string", () => { + const result = converter.convert("2024-01-15T10:30:00Z"); + expect(result).toContain("2024-01-15"); + }); + + test("converts ISO 8601 date-only string to BigQuery timestamp string", () => { const result = converter.convert("2024-01-15"); - expect(result).toBeNull(); + expect(result).toContain("2024-01-15"); + }); + + test("returns null for unparseable string", () => { + expect(converter.convert("not-a-date")).toBeNull(); + }); + + test("returns null for empty string", () => { + expect(converter.convert("")).toBeNull(); + }); + + test("returns null for partial date (year-month only)", () => { + expect(converter.convert("2024-01")).toBeNull(); + }); + + test("returns null for partial date (year only)", () => { + expect(converter.convert("2024")).toBeNull(); + }); + + test("returns null for bare numeric string", () => { + expect(converter.convert("1")).toBeNull(); + }); + + test("returns null for calendar-invalid date (Feb 30)", () => { + expect(converter.convert("2024-02-30")).toBeNull(); + }); + + test("returns null for non-leap-year Feb 29", () => { + expect(converter.convert("2023-02-29")).toBeNull(); + }); + + test("accepts leap-year Feb 29", () => { + const result = converter.convert("2024-02-29"); + expect(result).toContain("2024-02-29"); + }); + + test("returns null for out-of-range month", () => { + expect(converter.convert("2024-13-01")).toBeNull(); + }); + + test("returns null for out-of-range day", () => { + expect(converter.convert("2024-01-32")).toBeNull(); + }); + + test("returns null for year 0 (outside BigQuery DATE range)", () => { + expect(converter.convert("0000-01-01")).toBeNull(); + }); + + test("accepts year 0001 (BigQuery DATE minimum)", () => { + const result = converter.convert("0001-01-01"); + expect(result).toContain("0001-01-01"); + }); + + test("accepts year 9999 (BigQuery DATE maximum)", () => { + const result = converter.convert("9999-12-31"); + expect(result).toContain("9999-12-31"); + }); + + test("returns null for datetime without timezone", () => { + expect(converter.convert("2024-01-15T10:30:00")).toBeNull(); + }); + + test("returns null for out-of-range hour", () => { + expect(converter.convert("2024-01-15T25:00:00Z")).toBeNull(); + }); + + test("returns null for hour 24 (avoids silent next-day shift)", () => { + // ISO 8601 allows 24:00:00 as end-of-day, equivalent to next day 00:00. + // JS Date parses it as such and rolls forward, which would silently + // misfile the row into the next-day partition. 0.2.x passed the raw + // string to BigQuery, which rejected hour=24 outright. Reject here to + // match the loud-failure behavior rather than silent misfiling. + expect(converter.convert("2024-01-15T24:00:00Z")).toBeNull(); + }); + + test("returns null for out-of-range minute", () => { + expect(converter.convert("2024-01-15T23:60:00Z")).toBeNull(); + }); + + test("accepts timezone offset without colon", () => { + const result = converter.convert("2024-01-15T10:30:00+0800"); + expect(result).toContain("2024-01-15"); + }); + + test("accepts fractional seconds beyond millisecond precision", () => { + const result = converter.convert("2024-01-15T10:30:00.123456Z"); + expect(result).toContain("2024-01-15"); + }); + + test("accepts space separator between date and time (RFC 3339 alt form)", () => { + const result = converter.convert("2024-01-15 10:30:00Z"); + expect(result).toContain("2024-01-15"); }); test("returns null for null", () => { @@ -104,6 +200,32 @@ describe("PartitionValueConverter", () => { const result = converter.convert(date); expect(result).toBe("2024-01-15"); }); + + test("converts ISO 8601 date-only string to BigQuery date string", () => { + const result = converter.convert("2024-01-15"); + expect(result).toBe("2024-01-15"); + }); + + test("converts ISO 8601 datetime string to BigQuery date string", () => { + const result = converter.convert("2024-01-15T10:30:00Z"); + expect(result).toBe("2024-01-15"); + }); + + test("uses UTC date component for timezone-suffixed datetime string", () => { + // 2024-01-15T22:00:00-08:00 == 2024-01-16T06:00:00Z. The DATE column + // takes the UTC date component, matching how Firestore Timestamps are + // handled. Pinned so future changes to this contract are explicit. + const result = converter.convert("2024-01-15T22:00:00-08:00"); + expect(result).toBe("2024-01-16"); + }); + + test("returns null for unparseable string", () => { + expect(converter.convert("not-a-date")).toBeNull(); + }); + + test("returns null for empty string", () => { + expect(converter.convert("")).toBeNull(); + }); }); describe("convert with DATETIME type", () => { @@ -134,5 +256,36 @@ describe("PartitionValueConverter", () => { expect(result).toBeDefined(); expect(result).toContain("2024-01-15"); }); + + test("converts ISO 8601 datetime string to BigQuery datetime string", () => { + const result = converter.convert("2024-01-15T10:30:00Z"); + expect(result).toBeDefined(); + expect(result).toContain("2024-01-15"); + }); + + test("converts ISO 8601 date-only string to BigQuery datetime string", () => { + const result = converter.convert("2024-01-15"); + expect(result).toBeDefined(); + expect(result).toContain("2024-01-15"); + }); + + test("DATETIME output uses BigQuery canonical form (no Z, space separator)", () => { + // BigQuery DATETIME columns reject the 'Z' timezone suffix and require + // a space (not 'T') between date and time. @google-cloud/bigquery's + // BigQuery.datetime() helper already normalises ISO 8601 input to this + // canonical form, so feeding it date.toISOString() (which always ends + // in 'Z') is safe. Pinned so the contract does not silently regress. + expect(converter.convert("2024-01-15T10:30:00Z")).toBe( + "2024-01-15 10:30:00.000" + ); + }); + + test("returns null for unparseable string", () => { + expect(converter.convert("not-a-date")).toBeNull(); + }); + + test("returns null for empty string", () => { + expect(converter.convert("")).toBeNull(); + }); }); }); diff --git a/firestore-bigquery-export/firestore-bigquery-change-tracker/src/bigquery/partitioning/converter.ts b/firestore-bigquery-export/firestore-bigquery-change-tracker/src/bigquery/partitioning/converter.ts index c1a6a4849..8d008d635 100644 --- a/firestore-bigquery-export/firestore-bigquery-change-tracker/src/bigquery/partitioning/converter.ts +++ b/firestore-bigquery-export/firestore-bigquery-change-tracker/src/bigquery/partitioning/converter.ts @@ -27,17 +27,71 @@ export class PartitionValueConverter { ).toDate(); } else if (value instanceof Date && !isNaN(value.getTime())) { date = value; + } else if (typeof value === "string") { + // Strict ISO 8601 / RFC 3339: YYYY-MM-DD, optionally followed by T or + // space-separated HH:MM[:SS[.ffffff]] and a required timezone designator + // when the time component is present. JS Date parsing alone is too + // permissive — it silently normalizes invalid inputs (e.g. "2024-02-30" + // → "2024-03-01"), accepts partial dates ("2024-01"), and reads bare + // numerics as years ("1" → "2001-01-01"). Reject all of those. + const m = value.match( + /^(\d{4})-(\d{2})-(\d{2})(?:[T ](\d{2}):(\d{2})(?::(\d{2})(?:\.\d+)?)?(Z|[+-]\d{2}:?\d{2}))?$/ + ); + if (!m) { + return null; + } + const yearN = Number(m[1]); + const monthN = Number(m[2]); + const dayN = Number(m[3]); + // BigQuery DATE / DATETIME / TIMESTAMP all reject year 0 — the supported + // range is 0001-01-01 to 9999-12-31. Reject client-side so the row gets + // a clear warning instead of a server-side insert error. + if (yearN < 1) { + return null; + } + // Reject calendar-invalid components (Feb 30, non-leap Feb 29, etc.). + // setUTCFullYear avoids the legacy 2-digit-year quirk of Date.UTC(). + const validator = new Date(0); + validator.setUTCFullYear(yearN, monthN - 1, dayN); + if ( + validator.getUTCFullYear() !== yearN || + validator.getUTCMonth() + 1 !== monthN || + validator.getUTCDate() !== dayN + ) { + return null; + } + // Reject hour 24 (ISO 8601 allows it as end-of-day, but JS Date and the + // pre-0.3.0 string passthrough both treat it differently: JS rolls to + // next day, BigQuery DATETIME rejects the row outright. Rather than + // silently misfile the row into the next-day partition, reject here so + // the caller logs firestoreTimePartitionFieldError and the row lands in + // __NULL__. Minute and second out-of-range values are caught by + // new Date() returning Invalid Date below. + if (m[4] !== undefined && Number(m[4]) > 23) { + return null; + } + const parsed = new Date(value); + if (isNaN(parsed.getTime())) { + return null; + } + date = parsed; } else { return null; } - switch (this.fieldType) { - case "DATETIME": - return BigQuery.datetime(date.toISOString()).value; - case "DATE": - return BigQuery.date(date.toISOString().substring(0, 10)).value; - case "TIMESTAMP": - return BigQuery.timestamp(date).value; + try { + switch (this.fieldType) { + case "DATETIME": + return BigQuery.datetime(date.toISOString()).value; + case "DATE": + return BigQuery.date(date.toISOString().substring(0, 10)).value; + case "TIMESTAMP": + return BigQuery.timestamp(date).value; + default: + return null; + } + } catch { + return null; } } }