From d85b5e35f4f24ecdd8e3412b8cb66baf25a9e2dc Mon Sep 17 00:00:00 2001 From: Moshe Immerman Date: Wed, 22 Apr 2026 08:08:54 +0300 Subject: [PATCH] refactor(hash): extract jsonMD5Raw helper and fix DeterministicUUID to use raw bytes Extract jsonMD5Raw as an internal helper function to compute MD5 digest without hex encoding. This allows DeterministicUUID to use the raw 16-byte digest directly instead of incorrectly parsing hex-encoded strings as UUID bytes, fixing a regression where UUID bytes were ASCII codes of hex digits. Also add comprehensive test coverage for JSONMD5Hash and DeterministicUUID, including passthrough behavior for UUID values, pointers, byte arrays, and strings. Update CI workflow to use gavel for testing and linting with PR comment support. Fixes incorrect UUID generation from non-UUID seeds. --- .github/workflows/test.yml | 17 +++- .gitignore | 2 +- har/pretty.go | 2 +- hash/hash.go | 60 +++++++++-- hash/hash_test.go | 198 +++++++++++++++++++++++++++++++++++++ 5 files changed, 268 insertions(+), 11 deletions(-) create mode 100644 hash/hash_test.go diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index aea3caa..212e3c5 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -13,6 +13,9 @@ jobs: go-version: - 1.25.x runs-on: ${{ matrix.platform }} + permissions: + contents: read + pull-requests: write # required when comment=true so the action can post/update the sticky PR comment steps: - name: Harden the runner (Audit all outbound calls) uses: step-security/harden-runner@f4a75cfd619ee5ce8d5b864b0d183aff3c69b55a # v2.13.1 @@ -25,5 +28,15 @@ jobs: go-version: ${{ matrix.go-version }} - name: Checkout code uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6 - - name: Test - run: go test ./... + + - uses: flanksource/gavel@main + with: + args: test --lint + version: latest + json-file: gavel-results.json + html-file: gavel-results.html + summary-file: gavel-summary.md + artifact-name: gavel-results + comment: "true" + comment-header: gavel + fail-on-error: "true" diff --git a/.gitignore b/.gitignore index f01860a..6314f11 100644 --- a/.gitignore +++ b/.gitignore @@ -20,4 +20,4 @@ y cliff.toml .todos/ cmd/hx/fixtures/hx -.ginkgo +.ginkgo/ diff --git a/har/pretty.go b/har/pretty.go index 34743b0..a8b7f36 100644 --- a/har/pretty.go +++ b/har/pretty.go @@ -59,7 +59,7 @@ func headersToDescriptionList(headers []Header) api.DescriptionList { func (e Entry) Columns() []api.ColumnDef { return []api.ColumnDef{ api.Column("method").Label("Method").Style("font-bold text-green-500 uppercase").Build(), - api.Column("url").Label("URL").MaxWidth(80).Build(), + api.Column("url").Label("URL").MaxWidth(100).Build(), api.Column("status").Label("Status").Build(), api.Column("duration").Label("Duration").Build(), api.Column("size").Label("Size").Build(), diff --git a/hash/hash.go b/hash/hash.go index 84df28a..851d146 100644 --- a/hash/hash.go +++ b/hash/hash.go @@ -64,17 +64,22 @@ import ( // hash, err := JSONMD5Hash(config) // // hash will be consistent for the same config values func JSONMD5Hash(obj any) (string, error) { - data, err := json.Marshal(obj) + raw, err := jsonMD5Raw(obj) if err != nil { return "", err } + return hex.EncodeToString(raw[:]), nil +} - hash := md5.Sum(data) +// jsonMD5Raw marshals the object into JSON and returns the raw 16-byte MD5 +// digest. Internal helper shared by JSONMD5Hash (which hex-encodes it) and +// DeterministicUUID (which uses the raw bytes as UUID bytes). +func jsonMD5Raw(obj any) ([16]byte, error) { + data, err := json.Marshal(obj) if err != nil { - return "", err + return [16]byte{}, err } - - return hex.EncodeToString(hash[:]), nil + return md5.Sum(data), nil } // Sha256Hex computes the SHA256 hash of the input string and returns it @@ -113,15 +118,56 @@ func Sha256Hex(in string) string { // // Generate UUID from string // id2, err := DeterministicUUID("unique-resource-name") func DeterministicUUID(seed any) (uuid.UUID, error) { - byteHash, err := JSONMD5Hash(seed) + // If the seed is already a UUID (value, pointer, 16 bytes, or parseable + // string — uuid.Nil included), return it verbatim. Re-hashing a UUID would + // produce a different UUID, defeating the caller's intent. + if id, ok := asUUID(seed); ok { + return id, nil + } + + raw, err := jsonMD5Raw(seed) if err != nil { return uuid.Nil, err } - id, err := uuid.FromBytes([]byte(byteHash[0:16])) + // md5.Sum returns exactly 16 bytes, which is the size of a UUID. Use the + // raw digest directly — NOT the hex-encoded representation. + id, err := uuid.FromBytes(raw[:]) if err != nil { return uuid.Nil, err } return id, nil } + +// asUUID reports whether seed is already a UUID in one of its common forms: +// uuid.UUID, *uuid.UUID, [16]byte, []byte of length 16, or a string that +// parses as a UUID. uuid.Nil counts as a valid UUID. +// +// Composite seeds ([]string, pq.StringArray, structs, etc.) are not unwrapped +// — a single-element slice containing a UUID is still a composite and should +// be hashed. +func asUUID(seed any) (uuid.UUID, bool) { + switch v := seed.(type) { + case uuid.UUID: + return v, true + case *uuid.UUID: + if v == nil { + return uuid.Nil, false + } + return *v, true + case [16]byte: + return uuid.UUID(v), true + case []byte: + if len(v) == 16 { + var id uuid.UUID + copy(id[:], v) + return id, true + } + case string: + if id, err := uuid.Parse(v); err == nil { + return id, true + } + } + return uuid.Nil, false +} diff --git a/hash/hash_test.go b/hash/hash_test.go new file mode 100644 index 0000000..ee84e7c --- /dev/null +++ b/hash/hash_test.go @@ -0,0 +1,198 @@ +package hash + +import ( + "bytes" + "crypto/md5" + "encoding/hex" + "encoding/json" + "testing" + + "github.com/google/uuid" +) + +func TestJSONMD5Hash_ReturnsHexEncoded(t *testing.T) { + // Hex encoding of an MD5 digest is always 32 characters. + h, err := JSONMD5Hash("hello") + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if len(h) != 32 { + t.Fatalf("expected 32-char hex hash, got %d chars: %q", len(h), h) + } + if _, err := hex.DecodeString(h); err != nil { + t.Fatalf("hash is not valid hex: %v", err) + } +} + +func TestJSONMD5Hash_Deterministic(t *testing.T) { + a, _ := JSONMD5Hash(map[string]string{"k": "v"}) + b, _ := JSONMD5Hash(map[string]string{"k": "v"}) + if a != b { + t.Fatalf("expected deterministic hash, got %q vs %q", a, b) + } +} + +func TestDeterministicUUID_UsesRawBytes(t *testing.T) { + // Regression for a bug where DeterministicUUID fed the *hex-encoded* + // JSONMD5Hash string into uuid.FromBytes, producing UUIDs whose bytes + // were the ASCII codes of hex digits (e.g. 30663964-3638-3061-... where + // 0x30='0', 0x66='f', 0x39='9', 0x64='d'). The correct behavior is to + // use the raw 16-byte md5 digest as the UUID bytes. + + seed := "test-seed" + got, err := DeterministicUUID(seed) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + data, _ := json.Marshal(seed) + sum := md5.Sum(data) + // The UUID bytes must equal the raw md5 bytes. + for i := 0; i < 16; i++ { + if got[i] != sum[i] { + t.Fatalf("UUID byte %d: want %#x, got %#x", i, sum[i], got[i]) + } + } + + // Also assert the UUID is NOT the ASCII-hex-encoded variant of the hex + // representation of the md5, which is what the old buggy code produced. + hexStr := hex.EncodeToString(sum[:]) + var bogus [16]byte + copy(bogus[:], hexStr[:16]) + if got == bogus { + t.Fatal("DeterministicUUID regressed: still uses ASCII hex bytes as UUID bytes") + } +} + +func TestDeterministicUUID_Deterministic(t *testing.T) { + a, _ := DeterministicUUID("same-seed") + b, _ := DeterministicUUID("same-seed") + if a != b { + t.Fatalf("expected deterministic UUID, got %q vs %q", a, b) + } +} + +func TestDeterministicUUID_DifferentSeedsProduceDifferentUUIDs(t *testing.T) { + a, _ := DeterministicUUID("seed-one") + b, _ := DeterministicUUID("seed-two") + if a == b { + t.Fatalf("expected distinct UUIDs for distinct seeds, got %q twice", a) + } +} + +const passthroughUUIDStr = "550e8400-e29b-41d4-a716-446655440000" + +func TestDeterministicUUID_PassesThroughValidUUIDString(t *testing.T) { + got, err := DeterministicUUID(passthroughUUIDStr) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if got.String() != passthroughUUIDStr { + t.Fatalf("expected passthrough %q, got %q", passthroughUUIDStr, got.String()) + } +} + +func TestDeterministicUUID_PassesThroughUUIDValue(t *testing.T) { + in := uuid.MustParse(passthroughUUIDStr) + got, err := DeterministicUUID(in) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if got != in { + t.Fatalf("expected passthrough %q, got %q", in, got) + } +} + +func TestDeterministicUUID_PassesThroughUUIDPointer(t *testing.T) { + in := uuid.MustParse(passthroughUUIDStr) + got, err := DeterministicUUID(&in) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if got != in { + t.Fatalf("expected passthrough %q, got %q", in, got) + } +} + +func TestDeterministicUUID_PassesThroughUUIDBytes(t *testing.T) { + in := uuid.MustParse(passthroughUUIDStr) + + gotArr, err := DeterministicUUID([16]byte(in)) + if err != nil { + t.Fatalf("unexpected error ([16]byte): %v", err) + } + if gotArr != in { + t.Fatalf("[16]byte passthrough: want %q, got %q", in, gotArr) + } + + gotSlice, err := DeterministicUUID(in[:]) + if err != nil { + t.Fatalf("unexpected error ([]byte): %v", err) + } + if gotSlice != in { + t.Fatalf("[]byte passthrough: want %q, got %q", in, gotSlice) + } +} + +func TestDeterministicUUID_PassesThroughNilUUID(t *testing.T) { + const nilStr = "00000000-0000-0000-0000-000000000000" + + cases := []struct { + name string + in any + }{ + {"uuid.Nil value", uuid.Nil}, + {"nil string", nilStr}, + {"zero [16]byte", [16]byte{}}, + } + for _, tc := range cases { + got, err := DeterministicUUID(tc.in) + if err != nil { + t.Fatalf("%s: unexpected error: %v", tc.name, err) + } + if got != uuid.Nil { + t.Fatalf("%s: expected uuid.Nil passthrough, got %q", tc.name, got) + } + } +} + +func TestDeterministicUUID_SingleElementSliceIsHashed(t *testing.T) { + in := []string{passthroughUUIDStr} + got, err := DeterministicUUID(in) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if got.String() == passthroughUUIDStr { + t.Fatal("single-element slice must be treated as a composite and hashed, not unwrapped") + } + if got == uuid.Nil { + t.Fatal("hashed composite should not be uuid.Nil") + } +} + +func TestDeterministicUUID_NonUUIDStringStillHashes(t *testing.T) { + got, err := DeterministicUUID("test-seed") + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + data, _ := json.Marshal("test-seed") + sum := md5.Sum(data) + if !bytes.Equal(got[:], sum[:]) { + t.Fatalf("non-UUID strings must still hash; want %x, got %x", sum, got[:]) + } +} + +func TestDeterministicUUID_ShortByteSliceStillHashes(t *testing.T) { + in := []byte{1, 2, 3} + got, err := DeterministicUUID(in) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + data, _ := json.Marshal(in) + sum := md5.Sum(data) + if !bytes.Equal(got[:], sum[:]) { + t.Fatalf("len != 16 []byte must hash via JSON-MD5; want %x, got %x", sum, got[:]) + } +}