diff --git a/internal/strdist/strdist.go b/internal/strdist/strdist.go index 76e50d6d..8bcd8ac3 100644 --- a/internal/strdist/strdist.go +++ b/internal/strdist/strdist.go @@ -28,20 +28,28 @@ func StandardCost(ar, br rune) Cost { return Cost{SwapAB: 1, DeleteA: 1, InsertB: 1} } +// Distance returns the edit distance between two strings. The cost per edit is +// given by the costFunc argument. +// +// There is an optional cut argument that when set will finish the computation +// as early as possible once the final cost is certain to be >= cut. There is +// no guarantee about the exact cost returned when this is the case other than +// being >= cut. In particular, when cut is used, the function is not symmetric +// on a and b. func Distance(a, b string, f CostFunc, cut int64) int64 { if a == b { return 0 } lst := make([]CostInt, len(b)+1) bl := 0 - for bi, br := range b { - bl++ + for _, br := range b { cost := f(-1, br) - if cost.InsertB == Inhibit || lst[bi] == Inhibit { - lst[bi+1] = Inhibit + if cost.InsertB == Inhibit || lst[bl] == Inhibit { + lst[bl+1] = Inhibit } else { - lst[bi+1] = lst[bi] + cost.InsertB + lst[bl+1] = lst[bl] + cost.InsertB } + bl++ } lst = lst[:bl+1] // Not required, but caching means preventing the fast path @@ -59,6 +67,9 @@ func Distance(a, b string, f CostFunc, cut int64) int64 { lst[0] = last + cost.DeleteA } stop := true + if lst[0] < CostInt(cut) { + stop = false + } i := 0 for _, br := range b { i++ @@ -87,7 +98,6 @@ func Distance(a, b string, f CostFunc, cut int64) int64 { if debug { debugf("... %v", lst) } - _ = stop if cut != 0 && stop { break } diff --git a/internal/strdist/strdist_test.go b/internal/strdist/strdist_test.go index 9ea7e463..aab85310 100644 --- a/internal/strdist/strdist_test.go +++ b/internal/strdist/strdist_test.go @@ -36,6 +36,7 @@ var distanceTests = []distanceTest{ {f: uniqueCost, r: 6, a: "abc", b: "b"}, {f: uniqueCost, r: 6, a: "abc", b: "c"}, {f: uniqueCost, r: 9, a: "abc", b: ""}, + {f: uniqueCost, r: 6, cut: 6, a: "abc", b: ""}, {f: uniqueCost, r: 5, a: "abc", b: "abcd"}, {f: uniqueCost, r: 5, a: "abc", b: "dabc"}, {f: uniqueCost, r: 10, a: "abc", b: "adbdc"}, @@ -58,7 +59,13 @@ var distanceTests = []distanceTest{ {f: strdist.GlobCost, r: 1, a: "a**f/hij", b: "abc/def/hik"}, {f: strdist.GlobCost, r: 2, a: "a**fg", b: "abc/def/hik"}, {f: strdist.GlobCost, r: 0, a: "a**f/hij/klm", b: "abc/d**m"}, + {f: strdist.GlobCost, r: 1, a: "**a", b: ""}, {f: strdist.GlobCost, r: 0, a: "/*a/", b: "/a/"}, + {f: strdist.GlobCost, r: 3, a: "abc", b: ""}, + {f: strdist.GlobCost, r: 1, cut: 1, a: "abc", b: ""}, + // Not symmetric. + {f: strdist.GlobCost, r: 2, cut: 3, a: "ab", b: ""}, + {f: strdist.GlobCost, r: 2, cut: 1, a: "", b: "ab"}, } func (s *S) TestDistance(c *C) {