diff --git a/schema.go b/schema.go index aa0689841..80b0d8f28 100644 --- a/schema.go +++ b/schema.go @@ -567,6 +567,8 @@ type SchemaVisitorPerPrimitiveType[T any] interface { VisitBinary() T VisitUUID() T VisitUnknown() T + VisitGeometry(GeometryType) T + VisitGeography(GeographyType) T } // Visit accepts a visitor and performs a post-order traversal of the given schema. diff --git a/schema_test.go b/schema_test.go index 0ae676b32..813216d7b 100644 --- a/schema_test.go +++ b/schema_test.go @@ -1035,3 +1035,177 @@ func TestSanitizeColumnNamesEmptyFieldName(t *testing.T) { assert.ErrorIs(t, err, iceberg.ErrInvalidSchema) assert.ErrorContains(t, err, "field name cannot be empty") } + +func TestSchemaWithGeometryGeographyTypes(t *testing.T) { + geom, err := iceberg.GeometryTypeOf("srid:4326") + require.NoError(t, err) + geog, err := iceberg.GeographyTypeOf("srid:4269", iceberg.EdgeAlgorithmKarney) + require.NoError(t, err) + + schema := iceberg.NewSchema(1, + iceberg.NestedField{ID: 1, Name: "id", Type: iceberg.PrimitiveTypes.Int64, Required: true}, + iceberg.NestedField{ID: 2, Name: "simple_point", Type: iceberg.GeometryType{}, Required: false}, + iceberg.NestedField{ID: 3, Name: "location", Type: geom, Required: false}, + iceberg.NestedField{ID: 4, Name: "service_area", Type: geog, Required: false}, + ) + + data, err := json.Marshal(schema) + require.NoError(t, err) + + assert.JSONEq(t, `{ + "type": "struct", + "schema-id": 1, + "identifier-field-ids": [], + "fields": [ + {"id": 1, "name": "id", "type": "long", "required": true}, + {"id": 2, "name": "simple_point", "type": "geometry", "required": false}, + {"id": 3, "name": "location", "type": "geometry(srid:4326)", "required": false}, + {"id": 4, "name": "service_area", "type": "geography(srid:4269, karney)", "required": false} + ] + }`, string(data)) + + var unmarshaledSchema iceberg.Schema + require.NoError(t, json.Unmarshal(data, &unmarshaledSchema)) + assert.True(t, schema.Equals(&unmarshaledSchema)) +} + +func TestNestedFieldToStringGeographyGeometry(t *testing.T) { + geom, err := iceberg.GeometryTypeOf("srid:3857") + require.NoError(t, err) + geog, err := iceberg.GeographyTypeOf("srid:4269", iceberg.EdgeAlgorithmKarney) + require.NoError(t, err) + + tests := []struct { + field iceberg.NestedField + expected string + }{ + { + iceberg.NestedField{ID: 1, Name: "point", Type: iceberg.GeometryType{}, Required: false}, + "1: point: optional geometry", + }, + { + iceberg.NestedField{ID: 2, Name: "location", Type: geom, Required: true}, + "2: location: required geometry(srid:3857)", + }, + { + iceberg.NestedField{ID: 3, Name: "area", Type: iceberg.GeographyType{}, Required: false}, + "3: area: optional geography", + }, + { + iceberg.NestedField{ID: 4, Name: "region", Type: geog, Required: false}, + "4: region: optional geography(srid:4269, karney)", + }, + } + + for _, tt := range tests { + t.Run(tt.field.Name, func(t *testing.T) { + assert.Equal(t, tt.expected, tt.field.String()) + }) + } +} + +func TestSchemaWithGeometryInNestedStructures(t *testing.T) { + geom, err := iceberg.GeometryTypeOf("srid:4326") + require.NoError(t, err) + geog, err := iceberg.GeographyTypeOf("srid:4269", iceberg.EdgeAlgorithmSpherical) + require.NoError(t, err) + + schema := iceberg.NewSchema(1, + iceberg.NestedField{ + ID: 1, + Name: "locations", + Type: &iceberg.ListType{ + ElementID: 2, + Element: geom, + ElementRequired: true, + }, + Required: true, + }, + iceberg.NestedField{ + ID: 3, + Name: "region_data", + Type: &iceberg.MapType{ + KeyID: 4, + KeyType: iceberg.PrimitiveTypes.String, + ValueID: 5, + ValueType: geog, + ValueRequired: false, + }, + Required: false, + }, + iceberg.NestedField{ + ID: 6, + Name: "place", + Type: &iceberg.StructType{ + FieldList: []iceberg.NestedField{ + {ID: 7, Name: "name", Type: iceberg.PrimitiveTypes.String, Required: true}, + {ID: 8, Name: "coords", Type: iceberg.GeometryType{}, Required: false}, + }, + }, + Required: false, + }, + ) + + data, err := json.Marshal(schema) + require.NoError(t, err) + + var unmarshaledSchema iceberg.Schema + require.NoError(t, json.Unmarshal(data, &unmarshaledSchema)) + assert.True(t, schema.Equals(&unmarshaledSchema)) + + assert.Equal(t, "1: locations: required list", schema.Field(0).String()) + assert.Equal(t, "3: region_data: optional map", schema.Field(1).String()) +} + +func TestPruneColumnsWithGeometry(t *testing.T) { + geom, err := iceberg.GeometryTypeOf("srid:4326") + require.NoError(t, err) + + schema := iceberg.NewSchema(1, + iceberg.NestedField{ID: 1, Name: "id", Type: iceberg.PrimitiveTypes.Int32, Required: true}, + iceberg.NestedField{ID: 2, Name: "name", Type: iceberg.PrimitiveTypes.String, Required: false}, + iceberg.NestedField{ID: 3, Name: "location", Type: geom, Required: false}, + ) + + pruned, err := iceberg.PruneColumns(schema, map[int]iceberg.Void{1: {}, 3: {}}, false) + require.NoError(t, err) + + expected := iceberg.NewSchema(1, + iceberg.NestedField{ID: 1, Name: "id", Type: iceberg.PrimitiveTypes.Int32, Required: true}, + iceberg.NestedField{ID: 3, Name: "location", Type: geom, Required: false}, + ) + + assert.True(t, pruned.Equals(expected)) +} + +func TestSchemaIndexByIDWithGeography(t *testing.T) { + geog, err := iceberg.GeographyTypeOf("srid:4269", iceberg.EdgeAlgorithmKarney) + require.NoError(t, err) + + schema := iceberg.NewSchema(1, + iceberg.NestedField{ID: 1, Name: "id", Type: iceberg.PrimitiveTypes.Int32, Required: true}, + iceberg.NestedField{ID: 2, Name: "area", Type: geog, Required: false}, + ) + + index, err := iceberg.IndexByID(schema) + require.NoError(t, err) + + assert.Len(t, index, 2) + assert.Equal(t, geog, index[2].Type) + assert.Equal(t, "area", index[2].Name) +} + +func TestSchemaFindColumnNameWithGeometryGeography(t *testing.T) { + schema := iceberg.NewSchema(1, + iceberg.NestedField{ID: 1, Name: "point", Type: iceberg.GeometryType{}, Required: false}, + iceberg.NestedField{ID: 2, Name: "region", Type: iceberg.GeographyType{}, Required: false}, + ) + + name, ok := schema.FindColumnName(1) + assert.True(t, ok) + assert.Equal(t, "point", name) + + name, ok = schema.FindColumnName(2) + assert.True(t, ok) + assert.Equal(t, "region", name) +} diff --git a/table/arrow_utils.go b/table/arrow_utils.go index ad7c3631e..db832dc18 100644 --- a/table/arrow_utils.go +++ b/table/arrow_utils.go @@ -630,6 +630,22 @@ func (c convertToArrow) VisitUnknown() arrow.Field { } } +func (c convertToArrow) VisitGeometry(iceberg.GeometryType) arrow.Field { + // Passthrough binary for now, adding geoarrow-go support later + if c.useLargeTypes { + return arrow.Field{Type: arrow.BinaryTypes.LargeBinary} + } + return arrow.Field{Type: arrow.BinaryTypes.Binary} +} + +func (c convertToArrow) VisitGeography(iceberg.GeographyType) arrow.Field { + // Passthrough binary for now, adding geoarrow-go support later + if c.useLargeTypes { + return arrow.Field{Type: arrow.BinaryTypes.LargeBinary} + } + return arrow.Field{Type: arrow.BinaryTypes.Binary} +} + var _ iceberg.SchemaVisitorPerPrimitiveType[arrow.Field] = convertToArrow{} // SchemaToArrowSchema converts an Iceberg schema to an Arrow schema. If the metadata parameter diff --git a/table/metadata_builder_internal_test.go b/table/metadata_builder_internal_test.go index 4f8011275..1ce775e9c 100644 --- a/table/metadata_builder_internal_test.go +++ b/table/metadata_builder_internal_test.go @@ -1410,6 +1410,8 @@ func TestUnsupportedTypes(t *testing.T) { TestTypes := []iceberg.Type{ iceberg.TimestampNsType{}, iceberg.TimestampTzNsType{}, + iceberg.GeometryType{}, + iceberg.GeographyType{}, } for _, typ := range TestTypes { for unsupportedVersion := 1; unsupportedVersion < minFormatVersionForType(typ); unsupportedVersion++ { @@ -1837,3 +1839,83 @@ func TestUnknownTypeValidation(t *testing.T) { require.ErrorContains(t, err, "must be optional") }) } + +func TestGeometryGeographyNullOnlyDefaults(t *testing.T) { + testTypes := []struct { + name string + typ iceberg.Type + }{ + {"geometry", iceberg.GeometryType{}}, + {"geography", iceberg.GeographyType{}}, + } + + for _, tt := range testTypes { + t.Run(tt.name+" with non-null initial default", func(t *testing.T) { + defaultValue := "POINT(0 0)" + sc := iceberg.NewSchema(0, + iceberg.NestedField{ + Type: tt.typ, + ID: 1, + Name: "location", + Required: false, + InitialDefault: &defaultValue, + }, + ) + + err := checkSchemaCompatibility(sc, 3) + require.Error(t, err) + require.ErrorContains(t, err, "columns must default to null") + require.ErrorIs(t, err, iceberg.ErrInvalidSchema) + }) + + t.Run(tt.name+" with non-null write default", func(t *testing.T) { + defaultValue := "POINT(0 0)" + sc := iceberg.NewSchema(0, + iceberg.NestedField{ + Type: tt.typ, + ID: 1, + Name: "location", + Required: false, + WriteDefault: &defaultValue, + }, + ) + + err := checkSchemaCompatibility(sc, 3) + require.Error(t, err) + require.ErrorContains(t, err, "columns must default to null") + require.ErrorIs(t, err, iceberg.ErrInvalidSchema) + }) + + t.Run(tt.name+" with null defaults", func(t *testing.T) { + sc := iceberg.NewSchema(0, + iceberg.NestedField{ + Type: tt.typ, + ID: 1, + Name: "location", + Required: false, + }, + ) + + err := checkSchemaCompatibility(sc, 3) + require.NoError(t, err) + }) + + t.Run(tt.name+" in v2 with non-null initial default", func(t *testing.T) { + defaultValue := "POINT(0 0)" + sc := iceberg.NewSchema(0, + iceberg.NestedField{ + Type: tt.typ, + ID: 1, + Name: "location", + Required: false, + InitialDefault: &defaultValue, + }, + ) + + err := checkSchemaCompatibility(sc, 2) + require.Error(t, err) + require.ErrorContains(t, err, "is not supported until v3") + require.ErrorIs(t, err, iceberg.ErrInvalidSchema) + }) + } +} diff --git a/table/metadata_schema_compatibility.go b/table/metadata_schema_compatibility.go index 91fdb0cf0..b278442b1 100644 --- a/table/metadata_schema_compatibility.go +++ b/table/metadata_schema_compatibility.go @@ -39,7 +39,17 @@ func (e ErrIncompatibleSchema) Error() string { fmt.Fprintf(&problems, "\n- invalid type for %s: %s is not supported until v%d", f.ColName, f.Field.Type, f.UnsupportedType.MinFormatVersion) } if f.InvalidDefault != nil { - fmt.Fprintf(&problems, "\n- invalid initial default for %s: non-null default (%v) is not supported until v%d", f.ColName, f.Field.InitialDefault, f.InvalidDefault.MinFormatVersion) + switch f.Field.Type.(type) { + case iceberg.GeometryType, iceberg.GeographyType: + if f.Field.InitialDefault != nil { + fmt.Fprintf(&problems, "\n- invalid initial default for %s: %s columns must default to null", f.ColName, f.Field.Type) + } + if f.Field.WriteDefault != nil { + fmt.Fprintf(&problems, "\n- invalid write default for %s: %s columns must default to null", f.ColName, f.Field.Type) + } + default: + fmt.Fprintf(&problems, "\n- invalid initial default for %s: non-null default (%v) is not supported until v%d", f.ColName, f.Field.InitialDefault, f.InvalidDefault.MinFormatVersion) + } } } @@ -109,12 +119,30 @@ func checkSchemaCompatibility(sc *iceberg.Schema, formatVersion int) error { }) } - if field.InitialDefault != nil && formatVersion < defaultValuesMinFormatVersion { - problems = append(problems, IncompatibleField{ - Field: field, - ColName: colName, - InvalidDefault: &InvalidDefault{MinFormatVersion: defaultValuesMinFormatVersion, WriteDefault: field.InitialDefault}, - }) + switch field.Type.(type) { + case iceberg.GeometryType, iceberg.GeographyType: + if field.InitialDefault != nil { + problems = append(problems, IncompatibleField{ + Field: field, + ColName: colName, + InvalidDefault: &InvalidDefault{MinFormatVersion: formatVersion, WriteDefault: field.InitialDefault}, + }) + } + if field.WriteDefault != nil { + problems = append(problems, IncompatibleField{ + Field: field, + ColName: colName, + InvalidDefault: &InvalidDefault{MinFormatVersion: formatVersion, WriteDefault: field.WriteDefault}, + }) + } + default: + if field.InitialDefault != nil && formatVersion < defaultValuesMinFormatVersion { + problems = append(problems, IncompatibleField{ + Field: field, + ColName: colName, + InvalidDefault: &InvalidDefault{MinFormatVersion: defaultValuesMinFormatVersion, WriteDefault: field.InitialDefault}, + }) + } } } @@ -130,7 +158,7 @@ func checkSchemaCompatibility(sc *iceberg.Schema, formatVersion int) error { // version number for types that require newer format versions. func minFormatVersionForType(t iceberg.Type) int { switch t.(type) { - case iceberg.TimestampNsType, iceberg.TimestampTzNsType, iceberg.UnknownType: + case iceberg.TimestampNsType, iceberg.TimestampTzNsType, iceberg.UnknownType, iceberg.GeometryType, iceberg.GeographyType: return 3 default: // All other types supported in v1+ diff --git a/table/substrait/substrait.go b/table/substrait/substrait.go index 8bcc80c3f..d08a61b59 100644 --- a/table/substrait/substrait.go +++ b/table/substrait/substrait.go @@ -169,6 +169,10 @@ func (convertToSubstrait) VisitUnknown() types.Type { // Returning nil indicates this type cannot be converted to Substrait return nil } +func (convertToSubstrait) VisitGeometry(iceberg.GeometryType) types.Type { return &types.BinaryType{} } +func (convertToSubstrait) VisitGeography(iceberg.GeographyType) types.Type { + return &types.BinaryType{} +} var _ iceberg.SchemaVisitorPerPrimitiveType[types.Type] = (*convertToSubstrait)(nil) diff --git a/table/update_schema_test.go b/table/update_schema_test.go index 16245c51e..3e504c677 100644 --- a/table/update_schema_test.go +++ b/table/update_schema_test.go @@ -344,6 +344,31 @@ func TestAddColumn(t *testing.T) { }}, }, newSchema.Fields()) }) + + t.Run("test update schema with add geometry and geography columns", func(t *testing.T) { + table := New([]string{"id"}, testMetadata, "", nil, nil) + txn := table.NewTransaction() + + geog, err := iceberg.GeographyTypeOf("srid:4269", iceberg.EdgeAlgorithmKarney) + assert.NoError(t, err) + + newSchema, err := NewUpdateSchema(txn, true, true). + AddColumn([]string{"geom"}, iceberg.GeometryType{}, "", false, nil). + AddColumn([]string{"geog"}, geog, "", false, nil). + Apply() + assert.NoError(t, err) + assert.NotNil(t, newSchema) + + geomField, ok := newSchema.FindFieldByName("geom") + assert.True(t, ok) + assert.Equal(t, 12, geomField.ID) + assert.Equal(t, iceberg.GeometryType{}, geomField.Type) + + geogField, ok := newSchema.FindFieldByName("geog") + assert.True(t, ok) + assert.Equal(t, 13, geogField.ID) + assert.True(t, geogField.Type.Equals(geog)) + }) } func TestApplyChanges(t *testing.T) { @@ -861,6 +886,31 @@ func TestErrorHandling(t *testing.T) { assert.Contains(t, err.Error(), "cannot change column nullability from optional to required") }) + t.Run("test update geography CRS and edge algorithm without allowIncompatibleChanges", func(t *testing.T) { + currentGeog, err := iceberg.GeographyTypeOf("srid:4269", iceberg.EdgeAlgorithmKarney) + assert.NoError(t, err) + targetGeog, err := iceberg.GeographyTypeOf("srid:4326", iceberg.EdgeAlgorithmSpherical) + assert.NoError(t, err) + + geoSchema := iceberg.NewSchema(1, + iceberg.NestedField{ID: 1, Name: "id", Type: iceberg.PrimitiveTypes.Int32, Required: true}, + iceberg.NestedField{ID: 2, Name: "geog", Type: currentGeog, Required: false}, + ) + geoMeta, err := NewMetadata(geoSchema, nil, UnsortedSortOrder, "", iceberg.Properties{ + PropertyFormatVersion: "3", + }) + assert.NoError(t, err) + + table := New([]string{"geo"}, geoMeta, "", nil, nil) + txn := table.NewTransaction() + + _, err = NewUpdateSchema(txn, true, false).UpdateColumn([]string{"geog"}, ColumnUpdate{ + FieldType: iceberg.Optional[iceberg.Type]{Valid: true, Val: targetGeog}, + }).Apply() + assert.Error(t, err) + assert.Contains(t, err.Error(), "cannot promote geography(srid:4269, karney) to geography(srid:4326, spherical)") + }) + t.Run("test add required field without default value", func(t *testing.T) { table := New([]string{"id"}, testMetadata, "", nil, nil) txn := table.NewTransaction() diff --git a/table/update_spec_test.go b/table/update_spec_test.go index 0359490bc..ffca67057 100644 --- a/table/update_spec_test.go +++ b/table/update_spec_test.go @@ -204,6 +204,49 @@ func TestUpdateSpecAddField(t *testing.T) { assert.NotNil(t, newSpec) assert.Equal(t, "street_void_1001", newSpec.FieldsBySourceID(5)[0].Name) }) + + t.Run("reject geometry source for identity partition transform", func(t *testing.T) { + geoSchema := iceberg.NewSchema(1, + iceberg.NestedField{ID: 1, Name: "id", Required: true, Type: iceberg.PrimitiveTypes.Int64}, + iceberg.NestedField{ID: 2, Name: "geom", Required: false, Type: iceberg.GeometryType{}}, + ) + metadata, err := table.NewMetadata(geoSchema, iceberg.UnpartitionedSpec, table.UnsortedSortOrder, "", nil) + assert.NoError(t, err) + + tbl := table.New([]string{"geo_geometry"}, metadata, "", nil, nil) + specUpdate := table.NewUpdateSpec(tbl.NewTransaction(), true) + + updates, reqs, err := specUpdate. + AddField("geom", iceberg.IdentityTransform{}, "geom_identity"). + BuildUpdates() + assert.Error(t, err) + assert.Nil(t, updates) + assert.Nil(t, reqs) + }) + + t.Run("reject geography source for identity partition transform", func(t *testing.T) { + geog, err := iceberg.GeographyTypeOf("srid:4269", iceberg.EdgeAlgorithmKarney) + assert.NoError(t, err) + + geoSchema := iceberg.NewSchema(1, + iceberg.NestedField{ID: 1, Name: "id", Required: true, Type: iceberg.PrimitiveTypes.Int64}, + iceberg.NestedField{ID: 2, Name: "geog", Required: false, Type: geog}, + ) + metadata, err := table.NewMetadata(geoSchema, iceberg.UnpartitionedSpec, table.UnsortedSortOrder, "", nil) + assert.NoError(t, err) + + tbl := table.New([]string{"geo_geography"}, metadata, "", nil, nil) + specUpdate := table.NewUpdateSpec(tbl.NewTransaction(), true) + + updates, reqs, err := specUpdate. + AddField("geog", iceberg.IdentityTransform{}, "geog_identity"). + BuildUpdates() + assert.Error(t, err) + assert.ErrorContains(t, err, "cannot transform") + assert.ErrorContains(t, err, "geog") + assert.Nil(t, updates) + assert.Nil(t, reqs) + }) } func TestUpdateSpecAddIdentityField(t *testing.T) { diff --git a/transforms.go b/transforms.go index 1f0c7e31b..6f245280d 100644 --- a/transforms.go +++ b/transforms.go @@ -107,9 +107,14 @@ func (t IdentityTransform) MarshalText() ([]byte, error) { func (IdentityTransform) String() string { return "identity" } func (IdentityTransform) CanTransform(t Type) bool { - _, ok := t.(PrimitiveType) + switch t.(type) { + case GeometryType, GeographyType: + return false + default: + _, ok := t.(PrimitiveType) - return ok + return ok + } } func (IdentityTransform) ResultType(t Type) Type { return t } func (IdentityTransform) PreservesOrder() bool { return true } diff --git a/transforms_test.go b/transforms_test.go index d06ef262c..339e29501 100644 --- a/transforms_test.go +++ b/transforms_test.go @@ -258,6 +258,8 @@ func TestCanTransform(t *testing.T) { }, notAllowed: []iceberg.Type{ &iceberg.StructType{}, &iceberg.ListType{}, &iceberg.MapType{}, + iceberg.GeometryType{}, + iceberg.GeographyType{}, }, }, { @@ -284,6 +286,8 @@ func TestCanTransform(t *testing.T) { notAllowed: []iceberg.Type{ iceberg.PrimitiveTypes.Bool, iceberg.PrimitiveTypes.Float32, iceberg.PrimitiveTypes.Float64, &iceberg.StructType{}, &iceberg.ListType{}, &iceberg.MapType{}, + iceberg.GeometryType{}, + iceberg.GeographyType{}, }, }, { diff --git a/types.go b/types.go index a4c0a8221..60a33190c 100644 --- a/types.go +++ b/types.go @@ -19,6 +19,7 @@ package iceberg import ( "encoding/json" + "errors" "fmt" "regexp" "slices" @@ -32,6 +33,8 @@ import ( var ( regexFromBrackets = regexp.MustCompile(`^\w+\[(\d+)\]$`) decimalRegex = regexp.MustCompile(`decimal\(\s*(\d+)\s*,\s*(\d+)\s*\)`) + geometryRegex = regexp.MustCompile(`(?i)^geometry\s*(?:\(\s*([^)]+?)\s*\))?$`) + geographyRegex = regexp.MustCompile(`(?i)^geography\s*(?:\(\s*([^,]+?)\s*(?:,\s*(\w+)\s*)?\))?$`) ) type Properties map[string]string @@ -153,6 +156,10 @@ func (t *typeIFace) UnmarshalJSON(b []byte) error { t.Type = BinaryType{} case "unknown": t.Type = UnknownType{} + case "geometry": + t.Type = GeometryType{} + case "geography": + t.Type = GeographyType{} default: switch { case strings.HasPrefix(typename, "fixed"): @@ -172,6 +179,46 @@ func (t *typeIFace) UnmarshalJSON(b []byte) error { prec, _ := strconv.Atoi(matches[1]) scale, _ := strconv.Atoi(matches[2]) t.Type = DecimalType{precision: prec, scale: scale} + case strings.HasPrefix(strings.ToLower(typename), "geometry"): + matches := geometryRegex.FindStringSubmatch(typename) + if len(matches) != 2 { + return fmt.Errorf("%w: %s", ErrInvalidTypeString, typename) + } + + if matches[1] != "" { + geom, err := GeometryTypeOf(strings.TrimSpace(matches[1])) + if err != nil { + return err + } + t.Type = geom + } else { + t.Type = GeometryType{} + } + case strings.HasPrefix(strings.ToLower(typename), "geography"): + matches := geographyRegex.FindStringSubmatch(typename) + if len(matches) != 3 { + return fmt.Errorf("%w: %s", ErrInvalidTypeString, typename) + } + + crs := defaultGeoCRS + if matches[1] != "" { + crs = strings.TrimSpace(matches[1]) + } + + var algorithm EdgeAlgorithm + if matches[2] != "" { + algo, err := ParseEdgeAlgorithm(strings.TrimSpace(matches[2])) + if err != nil { + return err + } + algorithm = algo + } + + geog, err := GeographyTypeOf(crs, algorithm) + if err != nil { + return err + } + t.Type = geog default: return fmt.Errorf("%w: unrecognized field type", ErrInvalidSchema) } @@ -777,6 +824,145 @@ func (UnknownType) primitive() {} func (UnknownType) Type() string { return "unknown" } func (UnknownType) String() string { return "unknown" } +type EdgeAlgorithm string + +const ( + EdgeAlgorithmSpherical EdgeAlgorithm = "spherical" + EdgeAlgorithmVincenty EdgeAlgorithm = "vincenty" + EdgeAlgorithmThomas EdgeAlgorithm = "thomas" + EdgeAlgorithmAndoyer EdgeAlgorithm = "andoyer" + EdgeAlgorithmKarney EdgeAlgorithm = "karney" +) + +func ParseEdgeAlgorithm(s string) (EdgeAlgorithm, error) { + switch strings.ToLower(s) { + case "spherical": + return EdgeAlgorithmSpherical, nil + case "vincenty": + return EdgeAlgorithmVincenty, nil + case "thomas": + return EdgeAlgorithmThomas, nil + case "andoyer": + return EdgeAlgorithmAndoyer, nil + case "karney": + return EdgeAlgorithmKarney, nil + default: + return "", fmt.Errorf("invalid edge interpolation algorithm: %s", s) + } +} + +func (e EdgeAlgorithm) String() string { + return string(e) +} + +const defaultGeoCRS = "OGC:CRS84" + +type GeometryType struct { + crs string +} + +func GeometryTypeOf(crs string) (GeometryType, error) { + if crs == "" { + return GeometryType{}, errors.New("invalid CRS: (empty string)") + } + if strings.EqualFold(crs, defaultGeoCRS) { + return GeometryType{}, nil + } + + return GeometryType{crs: crs}, nil +} + +func (g GeometryType) CRS() string { + if g.crs == "" { + return defaultGeoCRS + } + + return g.crs +} + +func (g GeometryType) Equals(other Type) bool { + rhs, ok := other.(GeometryType) + if !ok { + return false + } + + return g.crs == rhs.crs +} + +func (GeometryType) primitive() {} +func (g GeometryType) Type() string { + if g.crs == "" { + return "geometry" + } + + return fmt.Sprintf("geometry(%s)", g.crs) +} + +func (g GeometryType) String() string { + return g.Type() +} + +type GeographyType struct { + crs string + algorithm EdgeAlgorithm +} + +func GeographyTypeOf(crs string, algorithm EdgeAlgorithm) (GeographyType, error) { + if crs == "" { + return GeographyType{}, errors.New("invalid CRS: (empty string)") + } + + normalizedCRS := crs + if strings.EqualFold(crs, defaultGeoCRS) { + normalizedCRS = "" + } + + return GeographyType{crs: normalizedCRS, algorithm: algorithm}, nil +} + +func (g GeographyType) CRS() string { + if g.crs == "" { + return defaultGeoCRS + } + + return g.crs +} + +func (g GeographyType) Algorithm() EdgeAlgorithm { + return g.algorithm +} + +func (g GeographyType) Equals(other Type) bool { + rhs, ok := other.(GeographyType) + if !ok { + return false + } + + return g.crs == rhs.crs && g.algorithm == rhs.algorithm +} + +func (GeographyType) primitive() {} +func (g GeographyType) Type() string { + hasCRS := g.crs != "" + hasAlgo := g.algorithm != "" + + if !hasCRS && !hasAlgo { + return "geography" + } + if hasCRS && !hasAlgo { + return fmt.Sprintf("geography(%s)", g.crs) + } + if !hasCRS && hasAlgo { + return fmt.Sprintf("geography(%s, %s)", defaultGeoCRS, g.algorithm) + } + + return fmt.Sprintf("geography(%s, %s)", g.crs, g.algorithm) +} + +func (g GeographyType) String() string { + return g.Type() +} + var PrimitiveTypes = struct { Bool PrimitiveType Int32 PrimitiveType diff --git a/types_test.go b/types_test.go index f3735343a..f791e3c3e 100644 --- a/types_test.go +++ b/types_test.go @@ -479,3 +479,214 @@ func TestPropUInt(t *testing.T) { assert.Equal(t, uint(77), iceberg.PropUInt(props, "garbage", 77), "falls back on parse error") assert.Equal(t, uint(5), iceberg.PropUInt(props, "missing", 5), "falls back on missing key") } + +func TestGeometryType(t *testing.T) { + t.Run("default CRS", func(t *testing.T) { + geom := iceberg.GeometryType{} + assert.Equal(t, "OGC:CRS84", geom.CRS()) + assert.Equal(t, "geometry", geom.String()) + assert.True(t, geom.Equals(iceberg.GeometryType{})) + }) + + t.Run("custom CRS", func(t *testing.T) { + geom, err := iceberg.GeometryTypeOf("srid:3857") + require.NoError(t, err) + assert.Equal(t, "srid:3857", geom.CRS()) + assert.Equal(t, "geometry(srid:3857)", geom.String()) + }) + + t.Run("CRS normalization", func(t *testing.T) { + geom1, err := iceberg.GeometryTypeOf("OGC:CRS84") + require.NoError(t, err) + geom2 := iceberg.GeometryType{} + assert.True(t, geom1.Equals(geom2)) + assert.Equal(t, "geometry", geom1.String()) + }) + + t.Run("empty CRS error", func(t *testing.T) { + _, err := iceberg.GeometryTypeOf("") + assert.ErrorContains(t, err, "invalid CRS: (empty string)") + }) + + t.Run("JSON parsing - default", func(t *testing.T) { + data := `{"id": 1, "name": "location", "type": "geometry", "required": false}` + var n iceberg.NestedField + require.NoError(t, json.Unmarshal([]byte(data), &n)) + geom, ok := n.Type.(iceberg.GeometryType) + require.True(t, ok) + assert.Equal(t, "OGC:CRS84", geom.CRS()) + + out, err := json.Marshal(n) + require.NoError(t, err) + assert.JSONEq(t, data, string(out)) + }) + + t.Run("JSON parsing - custom CRS", func(t *testing.T) { + data := `{"id": 1, "name": "location", "type": "geometry(srid:4326)", "required": false}` + var n iceberg.NestedField + require.NoError(t, json.Unmarshal([]byte(data), &n)) + geom, ok := n.Type.(iceberg.GeometryType) + require.True(t, ok) + assert.Equal(t, "srid:4326", geom.CRS()) + + out, err := json.Marshal(n) + require.NoError(t, err) + assert.JSONEq(t, data, string(out)) + }) + + t.Run("JSON parsing - case insensitive", func(t *testing.T) { + data := `{"id": 1, "name": "location", "type": "Geometry(SRID:4326)", "required": false}` + var n iceberg.NestedField + require.NoError(t, json.Unmarshal([]byte(data), &n)) + geom, ok := n.Type.(iceberg.GeometryType) + require.True(t, ok) + assert.Equal(t, "SRID:4326", geom.CRS()) + }) + + t.Run("JSON parsing - whitespace tolerance", func(t *testing.T) { + data := `{"id": 1, "name": "location", "type": "geometry( srid:4326 )", "required": false}` + var n iceberg.NestedField + require.NoError(t, json.Unmarshal([]byte(data), &n)) + geom, ok := n.Type.(iceberg.GeometryType) + require.True(t, ok) + assert.Equal(t, "srid:4326", geom.CRS()) + }) +} + +func TestGeographyType(t *testing.T) { + t.Run("default CRS and algorithm", func(t *testing.T) { + geog := iceberg.GeographyType{} + assert.Equal(t, "OGC:CRS84", geog.CRS()) + assert.Equal(t, iceberg.EdgeAlgorithm(""), geog.Algorithm()) + assert.Equal(t, "geography", geog.String()) + assert.True(t, geog.Equals(iceberg.GeographyType{})) + }) + + t.Run("custom CRS only", func(t *testing.T) { + geog, err := iceberg.GeographyTypeOf("srid:4269", "") + require.NoError(t, err) + assert.Equal(t, "srid:4269", geog.CRS()) + assert.Equal(t, iceberg.EdgeAlgorithm(""), geog.Algorithm()) + assert.Equal(t, "geography(srid:4269)", geog.String()) + }) + + t.Run("default CRS with algorithm", func(t *testing.T) { + geog, err := iceberg.GeographyTypeOf("OGC:CRS84", iceberg.EdgeAlgorithmKarney) + require.NoError(t, err) + assert.Equal(t, "OGC:CRS84", geog.CRS()) + assert.Equal(t, iceberg.EdgeAlgorithmKarney, geog.Algorithm()) + assert.Equal(t, "geography(OGC:CRS84, karney)", geog.String()) + }) + + t.Run("custom CRS with algorithm", func(t *testing.T) { + geog, err := iceberg.GeographyTypeOf("srid:4269", iceberg.EdgeAlgorithmKarney) + require.NoError(t, err) + assert.Equal(t, "srid:4269", geog.CRS()) + assert.Equal(t, iceberg.EdgeAlgorithmKarney, geog.Algorithm()) + assert.Equal(t, "geography(srid:4269, karney)", geog.String()) + }) + + t.Run("empty CRS error", func(t *testing.T) { + _, err := iceberg.GeographyTypeOf("", "") + assert.ErrorContains(t, err, "invalid CRS: (empty string)") + }) + + t.Run("JSON parsing - default", func(t *testing.T) { + data := `{"id": 1, "name": "area", "type": "geography", "required": false}` + var n iceberg.NestedField + require.NoError(t, json.Unmarshal([]byte(data), &n)) + geog, ok := n.Type.(iceberg.GeographyType) + require.True(t, ok) + assert.Equal(t, "OGC:CRS84", geog.CRS()) + assert.Equal(t, iceberg.EdgeAlgorithm(""), geog.Algorithm()) + + out, err := json.Marshal(n) + require.NoError(t, err) + assert.JSONEq(t, data, string(out)) + }) + + t.Run("JSON parsing - custom CRS", func(t *testing.T) { + data := `{"id": 1, "name": "area", "type": "geography(srid:4269)", "required": false}` + var n iceberg.NestedField + require.NoError(t, json.Unmarshal([]byte(data), &n)) + geog, ok := n.Type.(iceberg.GeographyType) + require.True(t, ok) + assert.Equal(t, "srid:4269", geog.CRS()) + assert.Equal(t, iceberg.EdgeAlgorithm(""), geog.Algorithm()) + + out, err := json.Marshal(n) + require.NoError(t, err) + assert.JSONEq(t, data, string(out)) + }) + + t.Run("JSON parsing - custom CRS with algorithm", func(t *testing.T) { + data := `{"id": 1, "name": "area", "type": "geography(srid:4269, karney)", "required": false}` + var n iceberg.NestedField + require.NoError(t, json.Unmarshal([]byte(data), &n)) + geog, ok := n.Type.(iceberg.GeographyType) + require.True(t, ok) + assert.Equal(t, "srid:4269", geog.CRS()) + assert.Equal(t, iceberg.EdgeAlgorithmKarney, geog.Algorithm()) + + out, err := json.Marshal(n) + require.NoError(t, err) + assert.JSONEq(t, data, string(out)) + }) + + t.Run("JSON parsing - case insensitive", func(t *testing.T) { + data := `{"id": 1, "name": "area", "type": "Geography(SRID:4269, KARNEY)", "required": false}` + var n iceberg.NestedField + require.NoError(t, json.Unmarshal([]byte(data), &n)) + geog, ok := n.Type.(iceberg.GeographyType) + require.True(t, ok) + assert.Equal(t, "SRID:4269", geog.CRS()) + assert.Equal(t, iceberg.EdgeAlgorithmKarney, geog.Algorithm()) + }) + + t.Run("JSON parsing - whitespace tolerance", func(t *testing.T) { + data := `{"id": 1, "name": "area", "type": "geography( srid:4269 , karney )", "required": false}` + var n iceberg.NestedField + require.NoError(t, json.Unmarshal([]byte(data), &n)) + geog, ok := n.Type.(iceberg.GeographyType) + require.True(t, ok) + assert.Equal(t, "srid:4269", geog.CRS()) + assert.Equal(t, iceberg.EdgeAlgorithmKarney, geog.Algorithm()) + }) + + t.Run("JSON parsing - invalid algorithm", func(t *testing.T) { + data := `{"id": 1, "name": "area", "type": "geography(srid:4269, invalid)", "required": false}` + var n iceberg.NestedField + err := json.Unmarshal([]byte(data), &n) + assert.ErrorContains(t, err, "invalid edge interpolation algorithm") + }) +} + +func TestEdgeAlgorithm(t *testing.T) { + tests := []struct { + input string + expected iceberg.EdgeAlgorithm + }{ + {"spherical", iceberg.EdgeAlgorithmSpherical}, + {"vincenty", iceberg.EdgeAlgorithmVincenty}, + {"thomas", iceberg.EdgeAlgorithmThomas}, + {"andoyer", iceberg.EdgeAlgorithmAndoyer}, + {"karney", iceberg.EdgeAlgorithmKarney}, + {"SPHERICAL", iceberg.EdgeAlgorithmSpherical}, + {"Vincenty", iceberg.EdgeAlgorithmVincenty}, + {"KARNEY", iceberg.EdgeAlgorithmKarney}, + } + + for _, tt := range tests { + t.Run(tt.input, func(t *testing.T) { + algo, err := iceberg.ParseEdgeAlgorithm(tt.input) + require.NoError(t, err) + assert.Equal(t, tt.expected, algo) + assert.Equal(t, string(tt.expected), algo.String()) + }) + } + + t.Run("invalid algorithm", func(t *testing.T) { + _, err := iceberg.ParseEdgeAlgorithm("invalid") + assert.ErrorContains(t, err, "invalid edge interpolation algorithm") + }) +}