Skip to content

Commit d2f8663

Browse files
committed
feat(search): add unicode character removal for fuzzy matching
Introduced a string preprocessing step in FuzzySearch that removes unicode characters. This improves the search experience by allowing users to find results regardless of accents or special formatting.
1 parent c9dbc33 commit d2f8663

1 file changed

Lines changed: 35 additions & 7 deletions

File tree

Flow.Launcher.Infrastructure/StringMatcher.cs

Lines changed: 35 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,9 @@
22
using Flow.Launcher.Plugin.SharedModels;
33
using System;
44
using System.Collections.Generic;
5+
using System.Globalization;
56
using System.Linq;
7+
using System.Text;
68
using Flow.Launcher.Infrastructure.UserSettings;
79

810
namespace Flow.Launcher.Infrastructure
@@ -67,6 +69,8 @@ public MatchResult FuzzyMatch(string query, string stringToCompare, MatchOption
6769
return new MatchResult(false, UserSettingSearchPrecision);
6870

6971
query = query.Trim();
72+
query = RemoveAccents(query);
73+
stringToCompare = RemoveAccents(stringToCompare);
7074
TranslationMapping translationMapping = null;
7175
if (_alphabet is not null && _alphabet.ShouldTranslate(query))
7276
{
@@ -98,7 +102,9 @@ public MatchResult FuzzyMatch(string query, string stringToCompare, MatchOption
98102
var indexList = new List<int>();
99103
List<int> spaceIndices = new List<int>();
100104

101-
for (var compareStringIndex = 0; compareStringIndex < fullStringToCompareWithoutCase.Length; compareStringIndex++)
105+
for (var compareStringIndex = 0;
106+
compareStringIndex < fullStringToCompareWithoutCase.Length;
107+
compareStringIndex++)
102108
{
103109
// If acronyms matching successfully finished, this gets the remaining not matched acronyms for score calculation
104110
if (currentAcronymQueryIndex >= query.Length && acronymsMatched == query.Length)
@@ -160,7 +166,7 @@ public MatchResult FuzzyMatch(string query, string stringToCompare, MatchOption
160166
var startIndexToVerify = compareStringIndex - currentQuerySubstringCharacterIndex;
161167

162168
if (AllPreviousCharsMatched(startIndexToVerify, currentQuerySubstringCharacterIndex,
163-
fullStringToCompareWithoutCase, currentQuerySubstring))
169+
fullStringToCompareWithoutCase, currentQuerySubstring))
164170
{
165171
matchFoundInPreviousLoop = true;
166172

@@ -205,7 +211,8 @@ public MatchResult FuzzyMatch(string query, string stringToCompare, MatchOption
205211

206212
if (acronymScore >= (int)UserSettingSearchPrecision)
207213
{
208-
acronymMatchData = acronymMatchData.Select(x => translationMapping?.MapToOriginalIndex(x) ?? x).Distinct().ToList();
214+
acronymMatchData = acronymMatchData.Select(x => translationMapping?.MapToOriginalIndex(x) ?? x)
215+
.Distinct().ToList();
209216
return new MatchResult(true, UserSettingSearchPrecision, acronymMatchData, acronymScore);
210217
}
211218
}
@@ -218,19 +225,39 @@ public MatchResult FuzzyMatch(string query, string stringToCompare, MatchOption
218225
// firstMatchIndex - nearestSpaceIndex - 1 is to set the firstIndex as the index of the first matched char
219226
// preceded by a space e.g. 'world' matching 'hello world' firstIndex would be 0 not 6
220227
// giving more weight than 'we or donald' by allowing the distance calculation to treat the starting position at after the space.
221-
var score = CalculateSearchScore(query, stringToCompare, firstMatchIndex - nearestSpaceIndex - 1, spaceIndices,
228+
var score = CalculateSearchScore(query, stringToCompare, firstMatchIndex - nearestSpaceIndex - 1,
229+
spaceIndices,
222230
lastMatchIndex - firstMatchIndex, allSubstringsContainedInCompareString);
223231

224-
var resultList = indexList.Select(x => translationMapping?.MapToOriginalIndex(x) ?? x).Distinct().ToList();
232+
var resultList = indexList.Select(x => translationMapping?.MapToOriginalIndex(x) ?? x).Distinct()
233+
.ToList();
225234
return new MatchResult(true, UserSettingSearchPrecision, resultList, score);
226235
}
227236

228237
return new MatchResult(false, UserSettingSearchPrecision);
229238
}
230239

240+
private static string RemoveAccents(string value)
241+
{
242+
if (string.IsNullOrEmpty(value))
243+
return value;
244+
string normalized = value.Normalize(NormalizationForm.FormD);
245+
StringBuilder sb = new();
246+
247+
foreach (char c in normalized)
248+
{
249+
var unicodedCategory = Char.GetUnicodeCategory(c);
250+
if (unicodedCategory != UnicodeCategory.NonSpacingMark)
251+
sb.Append(c);
252+
}
253+
254+
return sb.ToString().Normalize(NormalizationForm.FormC);
255+
}
256+
231257
private static bool IsAcronym(string stringToCompare, int compareStringIndex)
232258
{
233-
if (IsAcronymChar(stringToCompare, compareStringIndex) || IsAcronymNumber(stringToCompare, compareStringIndex))
259+
if (IsAcronymChar(stringToCompare, compareStringIndex) ||
260+
IsAcronymNumber(stringToCompare, compareStringIndex))
234261
return true;
235262

236263
return false;
@@ -312,7 +339,8 @@ private static bool AllQuerySubstringsMatched(int currentQuerySubstringIndex, in
312339
return currentQuerySubstringIndex >= querySubstringsLength;
313340
}
314341

315-
private static int CalculateSearchScore(string query, string stringToCompare, int firstIndex, List<int> spaceIndices, int matchLen,
342+
private static int CalculateSearchScore(string query, string stringToCompare, int firstIndex,
343+
List<int> spaceIndices, int matchLen,
316344
bool allSubstringsContainedInCompareString)
317345
{
318346
// A match found near the beginning of a string is scored more than a match found near the end

0 commit comments

Comments
 (0)