diff --git a/ift/common/bazel_data_file_resolver.cc b/ift/common/bazel_data_file_resolver.cc index 06f2cbdd..ac320f58 100644 --- a/ift/common/bazel_data_file_resolver.cc +++ b/ift/common/bazel_data_file_resolver.cc @@ -1,6 +1,7 @@ #include "ift/common/bazel_data_file_resolver.h" #include + #include "absl/status/status.h" #include "absl/strings/str_cat.h" @@ -14,18 +15,23 @@ StatusOr> BazelDataFileResolver::Create( std::string error; auto runfiles = std::unique_ptr(Runfiles::Create(argv0, &error)); if (!runfiles) { - return absl::InternalError(absl::StrCat("Failed to create runfiles: ", error)); + return absl::InternalError( + absl::StrCat("Failed to create runfiles: ", error)); } - return std::shared_ptr(new BazelDataFileResolver(std::move(runfiles))); + return std::shared_ptr( + new BazelDataFileResolver(std::move(runfiles))); } -StatusOr> BazelDataFileResolver::CreateForTest() { +StatusOr> +BazelDataFileResolver::CreateForTest() { std::string error; auto runfiles = std::unique_ptr(Runfiles::CreateForTest(&error)); if (!runfiles) { - return absl::InternalError(absl::StrCat("Failed to create runfiles for test: ", error)); + return absl::InternalError( + absl::StrCat("Failed to create runfiles for test: ", error)); } - return std::shared_ptr(new BazelDataFileResolver(std::move(runfiles))); + return std::shared_ptr( + new BazelDataFileResolver(std::move(runfiles))); } BazelDataFileResolver::BazelDataFileResolver(std::unique_ptr runfiles) @@ -34,15 +40,18 @@ BazelDataFileResolver::BazelDataFileResolver(std::unique_ptr runfiles) StatusOr BazelDataFileResolver::GetUnicodeDataPath() const { std::string path = runfiles_->Rlocation(UNICODE_DATA_PATH); if (path.empty() || !std::filesystem::exists(path)) { - return absl::NotFoundError(absl::StrCat("Failed to find UnicodeData.txt via runfiles: ", path)); + return absl::NotFoundError( + absl::StrCat("Failed to find UnicodeData.txt via runfiles: ", path)); } return path; } -StatusOr BazelDataFileResolver::GetDerivedNormalizationPropsPath() const { +StatusOr BazelDataFileResolver::GetDerivedNormalizationPropsPath() + const { std::string path = runfiles_->Rlocation(DERIVED_PROPS_PATH); if (path.empty() || !std::filesystem::exists(path)) { - return absl::NotFoundError(absl::StrCat("Failed to find DerivedNormalizationProps.txt via runfiles: ", path)); + return absl::NotFoundError(absl::StrCat( + "Failed to find DerivedNormalizationProps.txt via runfiles: ", path)); } return path; } @@ -50,7 +59,8 @@ StatusOr BazelDataFileResolver::GetDerivedNormalizationPropsPath() StatusOr BazelDataFileResolver::GetFrequencyDataDirectory() const { std::string metadata_path = runfiles_->Rlocation(FREQ_DATA_METADATA); if (metadata_path.empty() || !std::filesystem::exists(metadata_path)) { - return absl::NotFoundError("Failed to find frequency data directory via runfiles"); + return absl::NotFoundError( + "Failed to find frequency data directory via runfiles"); } return std::filesystem::path(metadata_path).parent_path().string(); } diff --git a/ift/common/bit_buffer_test.cc b/ift/common/bit_buffer_test.cc index 6931d4f8..b18f717f 100644 --- a/ift/common/bit_buffer_test.cc +++ b/ift/common/bit_buffer_test.cc @@ -11,7 +11,7 @@ using std::vector; class BitBufferTest : public ::testing::Test { protected: - static void check_transcode(const vector &input, BranchFactor bf, + static void check_transcode(const vector& input, BranchFactor bf, unsigned int depth) { BitOutputBuffer bout(bf, depth); for (uint32_t value : input) { diff --git a/ift/common/bit_input_buffer.cc b/ift/common/bit_input_buffer.cc index a3b9f591..04aa46fe 100644 --- a/ift/common/bit_input_buffer.cc +++ b/ift/common/bit_input_buffer.cc @@ -50,7 +50,7 @@ absl::string_view BitInputBuffer::Remaining() const { } } -bool BitInputBuffer::read(uint32_t *out) { +bool BitInputBuffer::read(uint32_t* out) { if (!out) { return false; } diff --git a/ift/common/bit_input_buffer.h b/ift/common/bit_input_buffer.h index 0434cc18..16b527ea 100644 --- a/ift/common/bit_input_buffer.h +++ b/ift/common/bit_input_buffer.h @@ -24,7 +24,7 @@ class BitInputBuffer { // The lowest/rightmost bits of the value bits are set, the remaining are // cleared. The number of bits set depends on the BranchFactor this // BitInputBuffer was constructed with. - bool read(uint32_t *out); + bool read(uint32_t* out); private: const BranchFactor branch_factor; diff --git a/ift/common/bit_output_buffer_test.cc b/ift/common/bit_output_buffer_test.cc index 9eb27d65..8845b3b7 100644 --- a/ift/common/bit_output_buffer_test.cc +++ b/ift/common/bit_output_buffer_test.cc @@ -13,7 +13,7 @@ using std::string; class BitOutputBufferTest : public ::testing::Test { protected: - static string Bits(const string &s) { + static string Bits(const string& s) { if (s.empty()) { return ""; } diff --git a/ift/common/data_file_resolver.h b/ift/common/data_file_resolver.h index 9201eb21..77f620d7 100644 --- a/ift/common/data_file_resolver.h +++ b/ift/common/data_file_resolver.h @@ -2,6 +2,7 @@ #define IFT_COMMON_DATA_FILE_RESOLVER_H_ #include + #include "absl/status/statusor.h" namespace ift::common { @@ -14,7 +15,8 @@ class DataFileResolver { virtual absl::StatusOr GetUnicodeDataPath() const = 0; // Returns the path to DerivedNormalizationProps.txt - virtual absl::StatusOr GetDerivedNormalizationPropsPath() const = 0; + virtual absl::StatusOr GetDerivedNormalizationPropsPath() + const = 0; // Returns the path to the directory containing frequency data files. virtual absl::StatusOr GetFrequencyDataDirectory() const = 0; diff --git a/ift/common/fixed_data_file_resolver.h b/ift/common/fixed_data_file_resolver.h index 1f66c65b..171bd031 100644 --- a/ift/common/fixed_data_file_resolver.h +++ b/ift/common/fixed_data_file_resolver.h @@ -21,7 +21,8 @@ class FixedDataFileResolver : public DataFileResolver { return unicode_data_path_; } - absl::StatusOr GetDerivedNormalizationPropsPath() const override { + absl::StatusOr GetDerivedNormalizationPropsPath() + const override { return derived_props_path_; } diff --git a/ift/common/font_helper_test.cc b/ift/common/font_helper_test.cc index 0792037f..72536126 100644 --- a/ift/common/font_helper_test.cc +++ b/ift/common/font_helper_test.cc @@ -575,8 +575,10 @@ TEST_F(FontHelperTest, GlyfData_ShortOverflowSynthetic) { hb_face_unique_ptr face = make_hb_face(hb_face_builder_create()); std::vector loca = { - 0xC3, 0x50, // 50,000 (100,000 actual) - 0xC3, 0x52, // 50,002 (100,005 actual) + 0xC3, + 0x50, // 50,000 (100,000 actual) + 0xC3, + 0x52, // 50,002 (100,005 actual) }; { auto blob = diff --git a/ift/common/int_set.h b/ift/common/int_set.h index 3bc1f2ea..528c3b3c 100644 --- a/ift/common/int_set.h +++ b/ift/common/int_set.h @@ -367,7 +367,7 @@ class IntSet { // Typed variants class GlyphSet : public IntSet { public: - GlyphSet() : IntSet(){}; + GlyphSet() : IntSet() {}; GlyphSet(std::initializer_list values) : IntSet(values) {} explicit GlyphSet(const hb_set_t* set) : IntSet(set) {} explicit GlyphSet(const hb_set_unique_ptr& set) : IntSet(set) {} @@ -381,7 +381,7 @@ class GlyphSet : public IntSet { class CodepointSet : public IntSet { public: - CodepointSet() : IntSet(){}; + CodepointSet() : IntSet() {}; CodepointSet(std::initializer_list values) : IntSet(values) {} explicit CodepointSet(const hb_set_t* set) : IntSet(set) {} explicit CodepointSet(const hb_set_unique_ptr& set) : IntSet(set) {} @@ -395,7 +395,7 @@ class CodepointSet : public IntSet { class SegmentSet : public IntSet { public: - SegmentSet() : IntSet(){}; + SegmentSet() : IntSet() {}; SegmentSet(std::initializer_list values) : IntSet(values) {} explicit SegmentSet(const hb_set_t* set) : IntSet(set) {} explicit SegmentSet(const hb_set_unique_ptr& set) : IntSet(set) {} diff --git a/ift/config/auto_segmenter_config.cc b/ift/config/auto_segmenter_config.cc index 2086ad36..d296b0e1 100644 --- a/ift/config/auto_segmenter_config.cc +++ b/ift/config/auto_segmenter_config.cc @@ -88,7 +88,8 @@ enum Quality { // Would be reasonable to always have this set to at least the minimum group // size. // -// - condition_analysis_mode: use DEP_GRAPH_ONLY if dependency API is available, otherwise CLOSURE_ONLY. +// - condition_analysis_mode: use DEP_GRAPH_ONLY if dependency API is available, +// otherwise CLOSURE_ONLY. // // Merge group settings: // @@ -648,7 +649,8 @@ static void ApplyQualityLevelTo(Quality quality, SegmenterConfig& config) { // Based on measured network overhead cost in practice from the // ift demo. - config.mutable_base_cost_config()->set_network_overhead_cost(DEFAULT_NETWORK_COST); + config.mutable_base_cost_config()->set_network_overhead_cost( + DEFAULT_NETWORK_COST); for (auto& merge_group : *config.mutable_merge_groups()) { ApplyQualityLevelTo(quality, merge_group); @@ -656,8 +658,7 @@ static void ApplyQualityLevelTo(Quality quality, SegmenterConfig& config) { } StatusOr AutoSegmenterConfig::GenerateConfig( - hb_face_t* face, - const ift::common::DataFileResolver& resolver, + hb_face_t* face, const ift::common::DataFileResolver& resolver, std::optional primary_script, std::optional quality_level) { SegmenterConfig config; @@ -718,7 +719,8 @@ StatusOr AutoSegmenterConfig::GenerateConfig( cost->set_built_in_freq_data_name(script); if (script == primary_script_file) { - cost->set_initial_font_merge_threshold(-(double) DEFAULT_NETWORK_COST * (0.8)); + cost->set_initial_font_merge_threshold(-(double)DEFAULT_NETWORK_COST * + (0.8)); } } diff --git a/ift/config/auto_segmenter_config.h b/ift/config/auto_segmenter_config.h index 3c470712..d5f9bbfe 100644 --- a/ift/config/auto_segmenter_config.h +++ b/ift/config/auto_segmenter_config.h @@ -3,11 +3,11 @@ #include #include -#include "ift/common/data_file_resolver.h" #include "absl/status/statusor.h" #include "absl/strings/string_view.h" #include "hb.h" +#include "ift/common/data_file_resolver.h" #include "ift/config/segmenter_config.pb.h" namespace ift::config { @@ -26,8 +26,7 @@ class AutoSegmenterConfig { // times, high values have longer segmenting times but // typically results in better segmentation quality. static absl::StatusOr GenerateConfig( - hb_face_t* face, - const ift::common::DataFileResolver& resolver, + hb_face_t* face, const ift::common::DataFileResolver& resolver, std::optional primary_script = std::nullopt, std::optional quality_level = std::nullopt); diff --git a/ift/config/auto_segmenter_config_test.cc b/ift/config/auto_segmenter_config_test.cc index e302cd07..fd223081 100644 --- a/ift/config/auto_segmenter_config_test.cc +++ b/ift/config/auto_segmenter_config_test.cc @@ -1,5 +1,4 @@ #include "ift/config/auto_segmenter_config.h" -#include "ift/common/bazel_data_file_resolver.h" #include @@ -11,6 +10,7 @@ #include "gmock/gmock.h" #include "gtest/gtest.h" #include "hb.h" +#include "ift/common/bazel_data_file_resolver.h" #include "ift/common/font_data.h" #include "ift/config/load_codepoints.h" @@ -157,16 +157,16 @@ base_segmentation_plan { generate_feature_segments: true )" #ifdef HB_DEPEND_API -"condition_analysis_mode: DEP_GRAPH_ONLY\n" + "condition_analysis_mode: DEP_GRAPH_ONLY\n" #else -"condition_analysis_mode: CLOSURE_ONLY\n" + "condition_analysis_mode: CLOSURE_ONLY\n" #endif -); + ); } TEST_F(AutoSegmenterConfigTest, Roboto_ScriptCyrillic) { - auto config_or = - AutoSegmenterConfig::GenerateConfig(face_.get(), *resolver, "Script_cyrillic"); + auto config_or = AutoSegmenterConfig::GenerateConfig(face_.get(), *resolver, + "Script_cyrillic"); ASSERT_TRUE(config_or.ok()) << config_or.status(); EXPECT_THAT( GetScripts(*config_or), @@ -176,8 +176,8 @@ TEST_F(AutoSegmenterConfigTest, Roboto_ScriptCyrillic) { } TEST_F(AutoSegmenterConfigTest, Roboto_LanguageFr) { - auto config_or = - AutoSegmenterConfig::GenerateConfig(face_.get(), *resolver, "Language_fr"); + auto config_or = AutoSegmenterConfig::GenerateConfig(face_.get(), *resolver, + "Language_fr"); ASSERT_TRUE(config_or.ok()) << config_or.status(); EXPECT_THAT(GetScripts(*config_or), UnorderedElementsAre(Pair("Language_fr", "Language_fr.riegeli"), @@ -188,7 +188,8 @@ TEST_F(AutoSegmenterConfigTest, Roboto_LanguageFr) { TEST_F(AutoSegmenterConfigTest, NotoSansJP_UnspecifiedPrimary) { if (!cjk_face_) GTEST_SKIP() << "NotoSansJP-Regular.ttf not found"; - auto config_or = AutoSegmenterConfig::GenerateConfig(cjk_face_.get(), *resolver); + auto config_or = + AutoSegmenterConfig::GenerateConfig(cjk_face_.get(), *resolver); ASSERT_TRUE(config_or.ok()) << config_or.status(); EXPECT_THAT(GetScripts(*config_or), UnorderedElementsAre(kLatin, kGreek, kCyrillic, kCJK, kSymbols, @@ -199,8 +200,8 @@ TEST_F(AutoSegmenterConfigTest, NotoSansJP_UnspecifiedPrimary) { TEST_F(AutoSegmenterConfigTest, NotoSansJP_ScriptCJK) { if (!cjk_face_) GTEST_SKIP() << "NotoSansJP-Regular.ttf not found"; - auto config_or = - AutoSegmenterConfig::GenerateConfig(cjk_face_.get(), *resolver, "Script_CJK"); + auto config_or = AutoSegmenterConfig::GenerateConfig(cjk_face_.get(), + *resolver, "Script_CJK"); ASSERT_TRUE(config_or.ok()) << config_or.status(); EXPECT_THAT(GetScripts(*config_or), UnorderedElementsAre(kLatin, kGreek, kCyrillic, kCJK, kSymbols, @@ -211,8 +212,8 @@ TEST_F(AutoSegmenterConfigTest, NotoSansJP_ScriptCJK) { TEST_F(AutoSegmenterConfigTest, NotoSansJP_ScriptJapanese) { if (!cjk_face_) GTEST_SKIP() << "NotoSansJP-Regular.ttf not found"; - auto config_or = - AutoSegmenterConfig::GenerateConfig(cjk_face_.get(), *resolver, "Script_japanese"); + auto config_or = AutoSegmenterConfig::GenerateConfig( + cjk_face_.get(), *resolver, "Script_japanese"); ASSERT_TRUE(config_or.ok()) << config_or.status(); EXPECT_THAT( GetScripts(*config_or), @@ -225,8 +226,8 @@ TEST_F(AutoSegmenterConfigTest, NotoSansJP_ScriptJapanese) { TEST_F(AutoSegmenterConfigTest, NotoSansJP_LanguageZhHans) { if (!cjk_face_) GTEST_SKIP() << "NotoSansJP-Regular.ttf not found"; - auto config_or = - AutoSegmenterConfig::GenerateConfig(cjk_face_.get(), *resolver, "Language_zh-Hans"); + auto config_or = AutoSegmenterConfig::GenerateConfig( + cjk_face_.get(), *resolver, "Language_zh-Hans"); ASSERT_TRUE(config_or.ok()) << config_or.status(); EXPECT_THAT(GetScripts(*config_or), UnorderedElementsAre( @@ -238,14 +239,14 @@ TEST_F(AutoSegmenterConfigTest, NotoSansJP_LanguageZhHans) { } TEST_F(AutoSegmenterConfigTest, Roboto_ScriptNotFound) { - auto config_or = - AutoSegmenterConfig::GenerateConfig(face_.get(), *resolver, "Script_foobar"); + auto config_or = AutoSegmenterConfig::GenerateConfig(face_.get(), *resolver, + "Script_foobar"); EXPECT_EQ(config_or.status().code(), absl::StatusCode::kNotFound); } TEST_F(AutoSegmenterConfigTest, Roboto_LanguageNotFound) { - auto config_or = - AutoSegmenterConfig::GenerateConfig(face_.get(), *resolver, "Language_foobar"); + auto config_or = AutoSegmenterConfig::GenerateConfig(face_.get(), *resolver, + "Language_foobar"); EXPECT_EQ(config_or.status().code(), absl::StatusCode::kNotFound); } @@ -267,8 +268,8 @@ TEST_F(AutoSegmenterConfigTest, Roboto_FullFileName_Script) { } TEST_F(AutoSegmenterConfigTest, Roboto_FullFileName_Language) { - auto config_or = - AutoSegmenterConfig::GenerateConfig(face_.get(), *resolver, "Language_fr.riegeli"); + auto config_or = AutoSegmenterConfig::GenerateConfig(face_.get(), *resolver, + "Language_fr.riegeli"); EXPECT_THAT(GetScripts(*config_or), UnorderedElementsAre(Pair("Language_fr", "Language_fr.riegeli"), kCyrillic, kGreek, kSymbols, kFallback)); @@ -291,8 +292,8 @@ TEST_F(AutoSegmenterConfigTest, LanguageMappingsExist) { } TEST_F(AutoSegmenterConfigTest, QualityLevelForcing) { - auto config_or = - AutoSegmenterConfig::GenerateConfig(face_.get(), *resolver, std::nullopt, 1); + auto config_or = AutoSegmenterConfig::GenerateConfig(face_.get(), *resolver, + std::nullopt, 1); ASSERT_TRUE(config_or.ok()) << config_or.status(); EXPECT_EQ(config_or->brotli_quality(), 0); EXPECT_EQ(config_or->unmapped_glyph_handling(), MOVE_TO_INIT_FONT); @@ -300,8 +301,8 @@ TEST_F(AutoSegmenterConfigTest, QualityLevelForcing) { EXPECT_EQ(config_or->brotli_quality_for_initial_font_merging(), 0); EXPECT_EQ(config_or->base_cost_config().optimization_cutoff_fraction(), 0.05); - auto config_or_8 = - AutoSegmenterConfig::GenerateConfig(face_.get(), *resolver, std::nullopt, 8); + auto config_or_8 = AutoSegmenterConfig::GenerateConfig(face_.get(), *resolver, + std::nullopt, 8); ASSERT_TRUE(config_or_8.ok()) << config_or_8.status(); EXPECT_EQ(config_or_8->brotli_quality(), 11); EXPECT_EQ(config_or_8->unmapped_glyph_handling(), MOVE_TO_INIT_FONT); diff --git a/ift/config/load_codepoints.cc b/ift/config/load_codepoints.cc index 97dda8cb..92546d38 100644 --- a/ift/config/load_codepoints.cc +++ b/ift/config/load_codepoints.cc @@ -205,8 +205,7 @@ static Status LoadFrequenciesFromRiegeliIndividual( } StatusOr LoadFrequenciesFromRiegeli( - const char* path, - std::optional filter) { + const char* path, std::optional filter) { auto paths = TRY(ExpandShardedPath(path)); UnicodeFrequenciesBuilder builder(filter); for (const auto& path : paths) { @@ -216,8 +215,7 @@ StatusOr LoadFrequenciesFromRiegeli( } StatusOr LoadBuiltInFrequencies( - const char* name, - const DataFileResolver& resolver, + const char* name, const DataFileResolver& resolver, std::optional filter) { std::string data_dir = TRY(resolver.GetFrequencyDataDirectory()); std::string path = StrCat(data_dir, "/", name); diff --git a/ift/config/load_codepoints.h b/ift/config/load_codepoints.h index 21bd8e65..f0fc9ee7 100644 --- a/ift/config/load_codepoints.h +++ b/ift/config/load_codepoints.h @@ -7,10 +7,10 @@ #include "absl/container/btree_set.h" #include "absl/container/flat_hash_map.h" #include "absl/status/statusor.h" +#include "ift/common/data_file_resolver.h" #include "ift/common/font_data.h" #include "ift/common/font_helper.h" #include "ift/common/int_set.h" -#include "ift/common/data_file_resolver.h" #include "ift/freq/unicode_frequencies.h" namespace ift::config { @@ -51,8 +51,7 @@ absl::StatusOr LoadFrequenciesFromRiegeli( // name is the file name to load. // Append "@*" to the name to load all sharded files for a name. absl::StatusOr LoadBuiltInFrequencies( - const char* name, - const ift::common::DataFileResolver& resolver, + const char* name, const ift::common::DataFileResolver& resolver, std::optional filter = std::nullopt); // Returns a list of all built-in frequency data sets and the codepoints diff --git a/ift/config/load_codepoints_test.cc b/ift/config/load_codepoints_test.cc index 8ad4a64c..3f7f901e 100644 --- a/ift/config/load_codepoints_test.cc +++ b/ift/config/load_codepoints_test.cc @@ -1,9 +1,9 @@ #include "ift/config/load_codepoints.h" -#include "ift/common/bazel_data_file_resolver.h" #include #include "gtest/gtest.h" +#include "ift/common/bazel_data_file_resolver.h" namespace ift::config { @@ -161,7 +161,8 @@ TEST_F(LoadCodepointsTest, ExpandShardedPath) { } TEST_F(LoadCodepointsTest, LoadBuiltInFrequencies) { - auto result = ift::config::LoadBuiltInFrequencies("Script_latin.riegeli", *resolver); + auto result = + ift::config::LoadBuiltInFrequencies("Script_latin.riegeli", *resolver); ASSERT_TRUE(result.ok()) << result.status(); EXPECT_EQ(result->ProbabilityFor(0x20, 0x20), 1.0); diff --git a/ift/config/segmenter_config_util.cc b/ift/config/segmenter_config_util.cc index 62d11ffa..eb835b89 100644 --- a/ift/config/segmenter_config_util.cc +++ b/ift/config/segmenter_config_util.cc @@ -2,6 +2,7 @@ #include +#include "absl/strings/str_cat.h" #include "ift/common/font_helper.h" #include "ift/common/int_set.h" #include "ift/common/try.h" @@ -12,8 +13,6 @@ #include "ift/encoder/subset_definition.h" #include "ift/feature_registry/feature_registry.h" #include "ift/freq/unicode_frequencies.h" -#include "absl/strings/str_cat.h" - using absl::btree_map; using absl::btree_set; @@ -120,8 +119,7 @@ std::vector SegmenterConfigUtil::ConfigToSegments( StatusOr SegmenterConfigUtil::ProtoToCostStrategy( const CostConfiguration& base, const CostConfiguration& config, - CodepointSet& covered_codepoints, - const CodepointSet& font_codepoints) { + CodepointSet& covered_codepoints, const CodepointSet& font_codepoints) { CostConfiguration merged = base; merged.MergeFrom(config); @@ -133,8 +131,10 @@ StatusOr SegmenterConfigUtil::ProtoToCostStrategy( UnicodeFrequencies freq = config.has_built_in_freq_data_name() - ? TRY(GetFrequencyData(merged.built_in_freq_data_name(), true, font_codepoints)) - : TRY(GetFrequencyData(merged.path_to_frequency_data(), false, font_codepoints)); + ? TRY(GetFrequencyData(merged.built_in_freq_data_name(), true, + font_codepoints)) + : TRY(GetFrequencyData(merged.path_to_frequency_data(), false, + font_codepoints)); covered_codepoints = freq.CoveredCodepoints(); diff --git a/ift/config/segmenter_config_util.h b/ift/config/segmenter_config_util.h index 13c491d8..ca4ebcc6 100644 --- a/ift/config/segmenter_config_util.h +++ b/ift/config/segmenter_config_util.h @@ -1,8 +1,9 @@ #ifndef IFT_CONFIG_SEGMENTER_CONFIG_UTIL_H_ #define IFT_CONFIG_SEGMENTER_CONFIG_UTIL_H_ -#include #include +#include + #include "absl/container/btree_map.h" #include "absl/status/statusor.h" #include "hb.h" @@ -29,7 +30,8 @@ struct SegmentationResult { */ class SegmenterConfigUtil { public: - SegmenterConfigUtil(std::string config_file_path, std::shared_ptr resolver) + SegmenterConfigUtil(std::string config_file_path, + std::shared_ptr resolver) : config_file_path_(config_file_path), resolver_(std::move(resolver)) {} /* @@ -97,8 +99,7 @@ class SegmenterConfigUtil { ProtoToMergeGroup(const std::vector& segments, const absl::flat_hash_map& id_to_index, const HeuristicConfiguration& base_heuristic, - const CostConfiguration& base_cost, - const MergeGroup& group, + const CostConfiguration& base_cost, const MergeGroup& group, const ift::common::CodepointSet& font_codepoints); static ift::common::SegmentSet MapToIndices( diff --git a/ift/config/segmenter_config_util_test.cc b/ift/config/segmenter_config_util_test.cc index 13d6162c..6ba97c77 100644 --- a/ift/config/segmenter_config_util_test.cc +++ b/ift/config/segmenter_config_util_test.cc @@ -1,11 +1,11 @@ #include "ift/config/segmenter_config_util.h" -#include "ift/common/bazel_data_file_resolver.h" #include #include #include "absl/container/btree_map.h" #include "gtest/gtest.h" +#include "ift/common/bazel_data_file_resolver.h" #include "ift/common/data_file_resolver.h" #include "ift/common/int_set.h" #include "ift/encoder/merge_strategy.h" @@ -17,15 +17,15 @@ using ift::config::SegmenterConfig; using absl::btree_map; using absl::btree_set; +using ift::common::BazelDataFileResolver; using ift::common::CodepointSet; +using ift::common::DataFileResolver; using ift::common::SegmentSet; using ift::config::SegmenterConfigUtil; using ift::encoder::MergeStrategy; using ift::encoder::SubsetDefinition; using ift::freq::UnicodeFrequencies; using ift::freq::UnicodeFrequenciesBuilder; -using ift::common::DataFileResolver; -using ift::common::BazelDataFileResolver; class SegmenterConfigUtilTest : public ::testing::Test { protected: @@ -47,7 +47,8 @@ MergeStrategy ExpectedCostStrategy( UnicodeFrequenciesBuilder freq_builder; freq_builder.Add(1, 1, 1); - MergeStrategy s = *MergeStrategy::CostBased(freq_builder.Build(), net_overhead, 1); + MergeStrategy s = + *MergeStrategy::CostBased(freq_builder.Build(), net_overhead, 1); s.SetOptimizationCutoffFraction(0.001); s.SetInitFontMergeThreshold(init_font_threshold); diff --git a/ift/dep_graph/dependency_graph.cc b/ift/dep_graph/dependency_graph.cc index 743d9e96..15911ec3 100644 --- a/ift/dep_graph/dependency_graph.cc +++ b/ift/dep_graph/dependency_graph.cc @@ -1,6 +1,4 @@ #include "ift/dep_graph/dependency_graph.h" -#include "ift/common/data_file_resolver.h" -#include "ift/dep_graph/unicode_edges.h" #include #include @@ -9,6 +7,7 @@ #include #include "absl/log/log.h" +#include "ift/common/data_file_resolver.h" #include "ift/common/font_data.h" #include "ift/common/font_helper.h" #include "ift/common/hb_set_unique_ptr.h" @@ -16,6 +15,7 @@ #include "ift/dep_graph/node.h" #include "ift/dep_graph/pending_edge.h" #include "ift/dep_graph/traversal.h" +#include "ift/dep_graph/unicode_edges.h" #include "ift/encoder/requested_segmentation_information.h" #include "ift/encoder/segment.h" #include "ift/encoder/types.h" @@ -28,6 +28,7 @@ using absl::Status; using absl::StatusOr; using bazel::tools::cpp::runfiles::Runfiles; using ift::common::CodepointSet; +using ift::common::DataFileResolver; using ift::common::FontHelper; using ift::common::GlyphSet; using ift::common::hb_font_unique_ptr; @@ -36,7 +37,6 @@ using ift::common::IntSet; using ift::common::make_hb_font; using ift::common::make_hb_set; using ift::common::SegmentSet; -using ift::common::DataFileResolver; using ift::encoder::glyph_id_t; using ift::encoder::RequestedSegmentationInformation; using ift::encoder::Segment; @@ -46,27 +46,26 @@ using ift::encoder::SubsetDefinition; namespace ift::dep_graph { StatusOr DependencyGraph::Create( - const RequestedSegmentationInformation* segmentation_info, - hb_face_t* face, + const RequestedSegmentationInformation* segmentation_info, hb_face_t* face, const DataFileResolver& resolver) { auto full_feature_set = TRY(FullFeatureSet(segmentation_info, face)); auto init_font_feature_set = TRY(InitFeatureSet(segmentation_info, face)); hb_depend_t* depend = hb_depend_from_face_or_fail(face); if (!depend) { - return absl::InternalError( - "Call to hb_depend_from_face_or_fail() failed."); + return absl::InternalError("Call to hb_depend_from_face_or_fail() failed."); } - auto unicode_edges = TRY(UnicodeEdges::ComputeUnicodeDependencyEdges(face, resolver)); + auto unicode_edges = + TRY(UnicodeEdges::ComputeUnicodeDependencyEdges(face, resolver)); - return DependencyGraph(segmentation_info, depend, face, full_feature_set, std::move(unicode_edges)); + return DependencyGraph(segmentation_info, depend, face, full_feature_set, + std::move(unicode_edges)); } DependencyGraph::DependencyGraph( const RequestedSegmentationInformation* segmentation_info, hb_depend_t* depend, hb_face_t* face, - flat_hash_set full_feature_set, - UnicodeEdges unicode_edges) + flat_hash_set full_feature_set, UnicodeEdges unicode_edges) : segmentation_info_(segmentation_info), original_face_(ift::common::make_hb_face(hb_face_reference(face))), full_feature_set_(full_feature_set), @@ -260,7 +259,8 @@ class TraversalContext { return TraverseEdgeTo(dest, edge, FontHelper::kCmap); } - Status TraverseCompositionEdge(hb_codepoint_t a, hb_codepoint_t b, hb_codepoint_t dest_unicode) { + Status TraverseCompositionEdge(hb_codepoint_t a, hb_codepoint_t b, + hb_codepoint_t dest_unicode) { bool can_reach_a = !unicode_filter || reached_unicodes_.contains(a) || unicode_filter->contains(a); bool can_reach_b = !unicode_filter || reached_unicodes_.contains(b) || @@ -823,8 +823,9 @@ StatusOr DependencyGraph::ClosureTraversal( TRY(InitFontFeatureSet())); } - // TraverseGraph(...) only records things reached via edges and not the starting nodes. - // So explicitly mark starting nodes as reached (if they are not filtered). + // TraverseGraph(...) only records things reached via edges and not the + // starting nodes. So explicitly mark starting nodes as reached (if they are + // not filtered). btree_set filtered_nodes; for (const auto& node : nodes) { if (node.IsGlyph() && !base_context.glyph_filter->contains(node.Id())) { @@ -910,14 +911,16 @@ Status DependencyGraph::HandleUnicodeOutgoingEdges( auto comp_edges = unicode_edges_.composition.find(unicode); if (comp_edges != unicode_edges_.composition.end()) { for (const auto& edge : comp_edges->second) { - TRYV(context->TraverseCompositionEdge(unicode, edge.other_source, edge.dest)); + TRYV(context->TraverseCompositionEdge(unicode, edge.other_source, + edge.dest)); } } auto decomp_edges = unicode_edges_.decomposition.find(unicode); if (decomp_edges != unicode_edges_.decomposition.end()) { for (hb_codepoint_t dest : decomp_edges->second) { - context->TraverseEdgeTo(Node::Unicode(unicode), Node::Unicode(dest), FontHelper::kCmap); + context->TraverseEdgeTo(Node::Unicode(unicode), Node::Unicode(dest), + FontHelper::kCmap); } } @@ -1048,8 +1051,6 @@ void DependencyGraph::HandleSubsetDefinitionOutgoingEdges( } } - - StatusOr> DependencyGraph::FullFeatureSet( const RequestedSegmentationInformation* segmentation_info, hb_face_t* face) { @@ -1230,9 +1231,6 @@ DependencyGraph::ComputeContextGlyphEdges() const { return out; } - - - flat_hash_map> DependencyGraph::ComputeFeatureEdges() const { flat_hash_map> edges; diff --git a/ift/dep_graph/dependency_graph.h b/ift/dep_graph/dependency_graph.h index d8f71f85..fa653410 100644 --- a/ift/dep_graph/dependency_graph.h +++ b/ift/dep_graph/dependency_graph.h @@ -8,9 +8,9 @@ #include "absl/container/flat_hash_map.h" #include "absl/status/statusor.h" #include "hb.h" +#include "ift/common/data_file_resolver.h" #include "ift/common/font_data.h" #include "ift/common/font_helper.h" -#include "ift/common/data_file_resolver.h" #include "ift/common/hb_set_unique_ptr.h" #include "ift/common/int_set.h" #include "ift/common/try.h" @@ -40,7 +40,6 @@ class TraversalContext; */ class DependencyGraph { private: - public: static constexpr uint32_t kNumberOfClosurePhases = 6; @@ -75,8 +74,7 @@ class DependencyGraph { static absl::StatusOr Create( const ift::encoder::RequestedSegmentationInformation* segmentation_info, - hb_face_t* face, - const ift::common::DataFileResolver& resolver); + hb_face_t* face, const ift::common::DataFileResolver& resolver); // Traverse the full dependency graph (segments, unicodes, and gids), starting // at one or more specific starting nodes. Attempts to mimic hb glyph closure @@ -206,17 +204,12 @@ class DependencyGraph { const ift::encoder::RequestedSegmentationInformation* segmentation_info, hb_face_t* face); - - const ift::encoder::RequestedSegmentationInformation* segmentation_info_; ift::common::hb_face_unique_ptr original_face_; absl::flat_hash_set full_feature_set_; - std::unique_ptr dependency_graph_; - - struct LayoutFeatureEdge { hb_tag_t layout_tag; hb_codepoint_t source_gid; @@ -247,20 +240,10 @@ class DependencyGraph { } }; - absl::flat_hash_map> ComputeFeatureEdges() const; absl::flat_hash_map> ComputeContextGlyphEdges() const; - static absl::Status ParseDerivedNormalizationProps( - const std::string& path, - common::CodepointSet& full_composition_exclusions); - - static absl::Status ParseUnicodeData( - const std::string& path, - const common::CodepointSet& unicodes, - const common::CodepointSet& full_composition_exclusions, - UnicodeEdges& result); absl::flat_hash_map> layout_feature_implied_edges_; diff --git a/ift/dep_graph/dependency_graph_test.cc b/ift/dep_graph/dependency_graph_test.cc index 0024d624..f39785e8 100644 --- a/ift/dep_graph/dependency_graph_test.cc +++ b/ift/dep_graph/dependency_graph_test.cc @@ -1,10 +1,10 @@ #include "ift/dep_graph/dependency_graph.h" -#include "ift/common/bazel_data_file_resolver.h" #include #include "gmock/gmock.h" #include "gtest/gtest.h" +#include "ift/common/bazel_data_file_resolver.h" #include "ift/common/data_file_resolver.h" #include "ift/common/font_data.h" #include "ift/common/font_helper.h" @@ -21,13 +21,13 @@ using ift::config::PATCH; using absl::btree_set; using absl::flat_hash_map; +using ift::common::BazelDataFileResolver; using ift::common::CodepointSet; +using ift::common::DataFileResolver; using ift::common::FontData; using ift::common::FontHelper; using ift::common::GlyphSet; using ift::common::hb_face_unique_ptr; -using ift::common::BazelDataFileResolver; -using ift::common::DataFileResolver; using ift::encoder::glyph_id_t; using ift::encoder::GlyphClosureCache; using ift::encoder::RequestedSegmentationInformation; @@ -44,14 +44,17 @@ class DependencyGraphTest : public ::testing::Test { DependencyGraphTest() : face(from_file("ift/common/testdata/Roboto-Regular.ttf")), resolver(*BazelDataFileResolver::CreateForTest()), - closure_cache(std::move(*GlyphClosureCache::Create(face.get(), *resolver))), + closure_cache( + std::move(*GlyphClosureCache::Create(face.get(), *resolver))), noto_sans_jp(from_file("ift/common/testdata/NotoSansJP-Regular.ttf")), noto_sans_jp_vf( from_file("ift/common/testdata/NotoSansJP-VF.cmap14.ttf")), - noto_sans_kr(from_file("ift/common/testdata/NotoSansKR[wght].subset.ttf")), + noto_sans_kr( + from_file("ift/common/testdata/NotoSansKR[wght].subset.ttf")), segmentation_info(*RequestedSegmentationInformation::Create( segments, WithDefaultFeatures({}), *closure_cache, PATCH)), - graph(*DependencyGraph::Create(segmentation_info.get(), face.get(), *resolver)) {} + graph(*DependencyGraph::Create(segmentation_info.get(), face.get(), + *resolver)) {} static hb_face_unique_ptr from_file(const char* filename) { hb_blob_t* blob = hb_blob_create_from_file_or_fail(filename); @@ -73,7 +76,8 @@ class DependencyGraphTest : public ::testing::Test { std::vector new_segments) { segmentation_info = *RequestedSegmentationInformation::Create( new_segments, new_init, *closure_cache, PATCH); - graph = *DependencyGraph::Create(segmentation_info.get(), face.get(), *resolver); + graph = *DependencyGraph::Create(segmentation_info.get(), face.get(), + *resolver); } void Reconfigure(hb_face_t* new_face, SubsetDefinition new_init, @@ -81,7 +85,8 @@ class DependencyGraphTest : public ::testing::Test { closure_cache = std::move(*GlyphClosureCache::Create(new_face, *resolver)); segmentation_info = *RequestedSegmentationInformation::Create( new_segments, new_init, *closure_cache, PATCH); - graph = *DependencyGraph::Create(segmentation_info.get(), new_face, *resolver); + graph = + *DependencyGraph::Create(segmentation_info.get(), new_face, *resolver); } private: @@ -139,27 +144,30 @@ TEST_F(DependencyGraphTest, ClosureTraversal_RecordsInputNodes) { GlyphSet all_g = GlyphSet::all(); CodepointSet all_u = CodepointSet::all(); - Node glyph_node = Node::Glyph(74); // 'f' + Node glyph_node = Node::Glyph(74); // 'f' Node unicode_node = Node::Unicode('a'); Node feature_node = Node::Feature(HB_TAG('l', 'i', 'g', 'a')); - auto r = graph.ClosureTraversal({glyph_node, unicode_node, feature_node}, &all_g, &all_u); + auto r = graph.ClosureTraversal({glyph_node, unicode_node, feature_node}, + &all_g, &all_u); ASSERT_TRUE(r.ok()) << r.status(); const auto& traversal = *r; EXPECT_TRUE(traversal.ReachedGlyphs().contains(74)); EXPECT_TRUE(traversal.ReachedCodepoints().contains('a')); - EXPECT_TRUE(traversal.ReachedLayoutFeatures().contains(HB_TAG('l', 'i', 'g', 'a'))); + EXPECT_TRUE( + traversal.ReachedLayoutFeatures().contains(HB_TAG('l', 'i', 'g', 'a'))); } TEST_F(DependencyGraphTest, ClosureTraversal_FiltersInputNodes) { GlyphSet empty_g; CodepointSet empty_u; - Node glyph_node = Node::Glyph(74); // 'f' + Node glyph_node = Node::Glyph(74); // 'f' Node unicode_node = Node::Unicode('a'); - auto r = graph.ClosureTraversal({glyph_node, unicode_node}, &empty_g, &empty_u); + auto r = + graph.ClosureTraversal({glyph_node, unicode_node}, &empty_g, &empty_u); ASSERT_TRUE(r.ok()) << r.status(); const auto& traversal = *r; @@ -170,76 +178,76 @@ TEST_F(DependencyGraphTest, ClosureTraversal_FiltersInputNodes) { TEST_F(DependencyGraphTest, ClosureTraversal_FiltersNotInFont) { Reconfigure(WithDefaultFeatures({}), { - {{0xD4DB}, ProbabilityBound::Zero()}, // korean codepoint not in Roboto + {{0xD4DB}, + ProbabilityBound::Zero()}, // korean codepoint not in Roboto }); auto r = graph.ClosureTraversal({Node::Unicode(0xD4DB)}); ASSERT_TRUE(r.ok()) << r.status(); // Decompositions should not be present - EXPECT_EQ(r->ReachedCodepoints(), (CodepointSet {})); + EXPECT_EQ(r->ReachedCodepoints(), (CodepointSet{})); } - TEST_F(DependencyGraphTest, UnicodeCompDecomp_Traversal) { Reconfigure(WithDefaultFeatures({}), { - {{0xe1}, ProbabilityBound::Zero()}, // á - {{0x61}, ProbabilityBound::Zero()}, // a - {{0x301}, ProbabilityBound::Zero()}, // combining acute + {{0xe1}, ProbabilityBound::Zero()}, // á + {{0x61}, ProbabilityBound::Zero()}, // a + {{0x301}, ProbabilityBound::Zero()}, // combining acute }); auto r = graph.ClosureTraversal({Node::Unicode(0xe1)}); ASSERT_TRUE(r.ok()) << r.status(); - EXPECT_EQ(r->ReachedCodepoints(), (CodepointSet {0xe1, 0x61, 0x301})); + EXPECT_EQ(r->ReachedCodepoints(), (CodepointSet{0xe1, 0x61, 0x301})); r = graph.ClosureTraversal({Node::Unicode(0x61), Node::Unicode(0x301)}); ASSERT_TRUE(r.ok()) << r.status(); - EXPECT_EQ(r->ReachedCodepoints(), (CodepointSet {0xe1, 0x61, 0x301})); + EXPECT_EQ(r->ReachedCodepoints(), (CodepointSet{0xe1, 0x61, 0x301})); } TEST_F(DependencyGraphTest, UnicodeCompDecomp_IncludesNonSegmentCodepoints) { Reconfigure(WithDefaultFeatures({}), { - {{0xe1}, ProbabilityBound::Zero()}, // á - {{0x61}, ProbabilityBound::Zero()}, // a + {{0xe1}, ProbabilityBound::Zero()}, // á + {{0x61}, ProbabilityBound::Zero()}, // a }); - - // Even though 0x301 is not in a segment's definition closure traversal will still - // pass through it. + // Even though 0x301 is not in a segment's definition closure traversal will + // still pass through it. auto r = graph.ClosureTraversal({Node::Unicode(0xe1)}); ASSERT_TRUE(r.ok()) << r.status(); - EXPECT_EQ(r->ReachedCodepoints(), (CodepointSet {0xe1, 0x61, 0x301})); + EXPECT_EQ(r->ReachedCodepoints(), (CodepointSet{0xe1, 0x61, 0x301})); - // Because 0x301 isn't in scope, the composition and decomposition edges can't be traversed. + // Because 0x301 isn't in scope, the composition and decomposition edges can't + // be traversed. r = graph.ClosureTraversal({Node::Unicode(0x61)}); ASSERT_TRUE(r.ok()) << r.status(); - EXPECT_EQ(r->ReachedCodepoints(), (CodepointSet {0x61})); + EXPECT_EQ(r->ReachedCodepoints(), (CodepointSet{0x61})); } TEST_F(DependencyGraphTest, UnicodeCompDecomp_CompositionExclusion) { // U+2126 (OHM SIGN) decomposes to U+03A9 (GREEK CAPITAL LETTER OMEGA) // and is in the Full_Composition_Exclusion list. - Reconfigure(WithDefaultFeatures({}), - { - {{0x2126}, ProbabilityBound::Zero()}, - {{0x3a9}, ProbabilityBound::Zero()}, - }); + Reconfigure(WithDefaultFeatures({}), { + {{0x2126}, ProbabilityBound::Zero()}, + {{0x3a9}, ProbabilityBound::Zero()}, + }); // Only composition is excluded so decomposition should still work. auto r = graph.ClosureTraversal({Node::Unicode(0x2126)}); ASSERT_TRUE(r.ok()) << r.status(); - EXPECT_EQ(r->ReachedCodepoints(), (CodepointSet {0x3a9, 0x2126})); + EXPECT_EQ(r->ReachedCodepoints(), (CodepointSet{0x3a9, 0x2126})); // The reverse should not work r = graph.ClosureTraversal({Node::Unicode(0x03A9)}); ASSERT_TRUE(r.ok()) << r.status(); - EXPECT_EQ(r->ReachedCodepoints(), (CodepointSet {0x3a9})); + EXPECT_EQ(r->ReachedCodepoints(), (CodepointSet{0x3a9})); } TEST_F(DependencyGraphTest, UnicodeCompDecomp_HangulExclusionTraversal) { // U+0xD4DB decomposes to U+1111, U+1171, U+11B6 - // (from: https://www.unicode.org/versions/Unicode17.0.0/core-spec/chapter-3/#G24646) + // (from: + // https://www.unicode.org/versions/Unicode17.0.0/core-spec/chapter-3/#G24646) Reconfigure(noto_sans_kr.get(), WithDefaultFeatures({}), { {{0xD4DB}, ProbabilityBound::Zero()}, @@ -251,12 +259,13 @@ TEST_F(DependencyGraphTest, UnicodeCompDecomp_HangulExclusionTraversal) { auto r = graph.ClosureTraversal({Node::Unicode(0xD4DB)}); ASSERT_TRUE(r.ok()) << r.status(); // Decompositions should not be present - EXPECT_EQ(r->ReachedCodepoints(), (CodepointSet {0xD4DB})); + EXPECT_EQ(r->ReachedCodepoints(), (CodepointSet{0xD4DB})); - r = graph.ClosureTraversal({Node::Unicode(0x1111), Node::Unicode(0x1171), Node::Unicode(0x11B6)}); + r = graph.ClosureTraversal( + {Node::Unicode(0x1111), Node::Unicode(0x1171), Node::Unicode(0x11B6)}); ASSERT_TRUE(r.ok()) << r.status(); // Composition should not be present - EXPECT_EQ(r->ReachedCodepoints(), (CodepointSet {0x1111, 0x1171, 0x11B6})); + EXPECT_EQ(r->ReachedCodepoints(), (CodepointSet{0x1111, 0x1171, 0x11B6})); } TEST_F(DependencyGraphTest, ContextGlyphs) { @@ -616,8 +625,7 @@ TEST_F(DependencyGraphTest, StronglyConnectedComponents_TopologicalSorting) { Node::Segment(3), Node::Unicode('a'), Node::Unicode('f'), Node::Unicode('i'), Node::Glyph(gid_a), Node::Glyph(gid_f), Node::Glyph(gid_i), Node::Glyph(gid_fi), Node::Glyph(gid_ffi), - Node::Unicode(0xfb01), Node::Unicode(0xfb03), - liga_node)); + Node::Unicode(0xfb01), Node::Unicode(0xfb03), liga_node)); // Now check relative ordering of the elements int s0 = GetIndex(topo_order, Node::Segment(0)); @@ -835,10 +843,11 @@ TEST_F(DependencyGraphTest, CollectIncomingEdges) { {Node::Glyph(gid_f)}, {Node::Glyph(gid_i)}, {Node::Feature(HB_TAG('l', 'i', 'g', 'a'))}, - }; - EXPECT_EQ(fi_edges, (btree_set{ - expected_fi_edge, - EdgeConditionsCnf {{Node::Unicode(0xfb01)},}})); + }; + EXPECT_EQ(fi_edges, (btree_set{expected_fi_edge, + EdgeConditionsCnf{ + {Node::Unicode(0xfb01)}, + }})); // 'f' requires 'f' (Unicode) auto f_edges_it = edges.find(Node::Glyph(gid_f)); diff --git a/ift/dep_graph/unicode_edges.cc b/ift/dep_graph/unicode_edges.cc index 1eaf539e..b445b3b2 100644 --- a/ift/dep_graph/unicode_edges.cc +++ b/ift/dep_graph/unicode_edges.cc @@ -1,32 +1,32 @@ #include "ift/dep_graph/unicode_edges.h" #include -#include "absl/strings/str_cat.h" + #include "absl/strings/numbers.h" +#include "absl/strings/str_cat.h" #include "absl/strings/string_view.h" #include "ift/common/data_file_resolver.h" +#include "ift/common/font_data.h" #include "ift/common/font_helper.h" +#include "ift/common/hb_set_unique_ptr.h" #include "ift/common/int_set.h" #include "ift/common/try.h" -#include "ift/common/font_data.h" -#include "ift/common/hb_set_unique_ptr.h" using absl::flat_hash_map; using absl::Status; using absl::StatusOr; using ift::common::CodepointSet; +using ift::common::DataFileResolver; using ift::common::FontHelper; -using ift::common::make_hb_set; -using ift::common::make_hb_font; -using ift::common::hb_set_unique_ptr; using ift::common::hb_font_unique_ptr; -using ift::common::DataFileResolver; +using ift::common::hb_set_unique_ptr; +using ift::common::make_hb_font; +using ift::common::make_hb_set; namespace ift::dep_graph { absl::Status ParseDerivedNormalizationProps( - const std::string& path, - CodepointSet& full_composition_exclusions) { + const std::string& path, CodepointSet& full_composition_exclusions) { std::ifstream props_file(path); if (!props_file.is_open()) { return absl::NotFoundError(absl::StrCat("Failed to open ", path)); @@ -67,15 +67,17 @@ absl::Status ParseDerivedNormalizationProps( hb_codepoint_t start = 0, end = 0; if (!absl::SimpleHexAtoi(range.substr(0, dotdot), &start) || !absl::SimpleHexAtoi(range.substr(dotdot + 2), &end)) { - return absl::InternalError( - absl::StrCat( - "Failed to parse unicode range in DerivedNormalizationProps.txt: ", range)); + return absl::InternalError(absl::StrCat( + "Failed to parse unicode range in DerivedNormalizationProps.txt: ", + range)); } full_composition_exclusions.insert_range(start, end); } else { hb_codepoint_t u = 0; if (!absl::SimpleHexAtoi(range, &u)) { - return absl::InternalError(absl::StrCat("Failed to parse unicode value in DerivedNormalizationProps.txt: ", range)); + return absl::InternalError(absl::StrCat( + "Failed to parse unicode value in DerivedNormalizationProps.txt: ", + range)); } full_composition_exclusions.insert(u); } @@ -83,12 +85,9 @@ absl::Status ParseDerivedNormalizationProps( return absl::OkStatus(); } -Status ParseUnicodeData( - const std::string& path, - const CodepointSet& unicodes, - const CodepointSet& full_composition_exclusions, - UnicodeEdges& result) { - +Status ParseUnicodeData(const std::string& path, const CodepointSet& unicodes, + const CodepointSet& full_composition_exclusions, + UnicodeEdges& result) { std::ifstream data_file(path); if (!data_file.is_open()) { return absl::NotFoundError(absl::StrCat("Failed to open ", path)); @@ -119,7 +118,8 @@ Status ParseUnicodeData( hb_codepoint_t base = 0; if (!absl::SimpleHexAtoi(fields[0], &base)) { - return absl::InternalError(absl::StrCat("Failed to parse unicode value in UnicodeData.txt: ", fields[0])); + return absl::InternalError(absl::StrCat( + "Failed to parse unicode value in UnicodeData.txt: ", fields[0])); } if (!unicodes.contains(base)) { continue; @@ -142,7 +142,8 @@ Status ParseUnicodeData( } hb_codepoint_t u = 0; if (!absl::SimpleHexAtoi(next, &u)) { - return absl::InternalError(absl::StrCat("Failed to parse unicode value in UnicodeData.txt: ", next)); + return absl::InternalError(absl::StrCat( + "Failed to parse unicode value in UnicodeData.txt: ", next)); } if (!unicodes.contains(u)) { keep = false; @@ -161,7 +162,8 @@ Status ParseUnicodeData( result.decomposition[base].union_set(decomp_chars); - if (decomp_chars.size() == 2 && !full_composition_exclusions.contains(base)) { + if (decomp_chars.size() == 2 && + !full_composition_exclusions.contains(base)) { hb_codepoint_t d0 = *decomp_chars.begin(); hb_codepoint_t d1 = *(++decomp_chars.begin()); result.composition[d0].push_back(UnicodeConjunctiveEdge{d1, base}); @@ -186,7 +188,10 @@ flat_hash_map UnicodeEdges::UnicodeToGid( return out; } -void UnicodeEdges::ComputeUVSEdges(hb_face_t* face, const flat_hash_map& unicode_to_gid, UnicodeEdges& result) { +void UnicodeEdges::ComputeUVSEdges( + hb_face_t* face, + const flat_hash_map& unicode_to_gid, + UnicodeEdges& result) { hb_set_unique_ptr vs_unicodes_hb = make_hb_set(); hb_face_collect_variation_selectors(face, vs_unicodes_hb.get()); CodepointSet vs_unicodes(vs_unicodes_hb.get()); @@ -219,17 +224,18 @@ void UnicodeEdges::ComputeUVSEdges(hb_face_t* face, const flat_hash_map UnicodeEdges::ComputeUnicodeDependencyEdges( - hb_face_t* face, - const DataFileResolver& resolver) { - + hb_face_t* face, const DataFileResolver& resolver) { std::string unicode_data_path = TRY(resolver.GetUnicodeDataPath()); - std::string derived_props_path = TRY(resolver.GetDerivedNormalizationPropsPath()); + std::string derived_props_path = + TRY(resolver.GetDerivedNormalizationPropsPath()); CodepointSet unicodes = FontHelper::ToCodepointsSet(face); UnicodeEdges result; CodepointSet full_composition_exclusions; - TRYV(ParseDerivedNormalizationProps(derived_props_path, full_composition_exclusions)); - TRYV(ParseUnicodeData(unicode_data_path, unicodes, full_composition_exclusions, result)); + TRYV(ParseDerivedNormalizationProps(derived_props_path, + full_composition_exclusions)); + TRYV(ParseUnicodeData(unicode_data_path, unicodes, + full_composition_exclusions, result)); // Compute UVS edges result.unicode_to_gid = UnicodeToGid(face); diff --git a/ift/dep_graph/unicode_edges.h b/ift/dep_graph/unicode_edges.h index d836183a..04c41137 100644 --- a/ift/dep_graph/unicode_edges.h +++ b/ift/dep_graph/unicode_edges.h @@ -2,6 +2,7 @@ #define IFT_DEP_GRAPH_UNICODE_EDGES_H_ #include + #include "absl/container/flat_hash_map.h" #include "absl/status/statusor.h" #include "hb.h" @@ -26,17 +27,23 @@ struct VariationSelectorEdge { }; struct UnicodeEdges { - absl::flat_hash_map> composition; + absl::flat_hash_map> + composition; absl::flat_hash_map decomposition; - absl::flat_hash_map> variation_selector; + absl::flat_hash_map> + variation_selector; absl::flat_hash_map unicode_to_gid; absl::flat_hash_map gid_to_vs; static absl::StatusOr ComputeUnicodeDependencyEdges( - hb_face_t* face, const ift::common::DataFileResolver& resolver); + hb_face_t* face, const ift::common::DataFileResolver& resolver); private: - static void ComputeUVSEdges(hb_face_t* face, const absl::flat_hash_map& unicode_to_gid, UnicodeEdges& result); + static void ComputeUVSEdges( + hb_face_t* face, + const absl::flat_hash_map& + unicode_to_gid, + UnicodeEdges& result); static absl::flat_hash_map UnicodeToGid( hb_face_t* face); diff --git a/ift/dep_graph/unicode_edges_test.cc b/ift/dep_graph/unicode_edges_test.cc index 52c81ff9..18e9c36d 100644 --- a/ift/dep_graph/unicode_edges_test.cc +++ b/ift/dep_graph/unicode_edges_test.cc @@ -1,19 +1,19 @@ #include "ift/dep_graph/unicode_edges.h" -#include "ift/common/bazel_data_file_resolver.h" #include -#include "gtest/gtest.h" #include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "ift/common/bazel_data_file_resolver.h" #include "ift/common/font_data.h" #include "ift/common/font_helper.h" namespace ift::dep_graph { -using ift::common::FontHelper; +using ift::common::BazelDataFileResolver; using ift::common::FontData; +using ift::common::FontHelper; using ift::common::hb_face_unique_ptr; -using ift::common::BazelDataFileResolver; using ::testing::Contains; using ::testing::Not; @@ -30,7 +30,8 @@ TEST(UnicodeEdgesTest, ComputeUnicodeDependencyEdges_Roboto) { ASSERT_TRUE(face); auto resolver = *BazelDataFileResolver::CreateForTest(); - auto edges = UnicodeEdges::ComputeUnicodeDependencyEdges(face.get(), *resolver); + auto edges = + UnicodeEdges::ComputeUnicodeDependencyEdges(face.get(), *resolver); ASSERT_TRUE(edges.ok()) << edges.status(); // U+00C1 (Á) decomposes to U+0041 (A) and U+0301 (◌́) @@ -50,18 +51,18 @@ TEST(UnicodeEdgesTest, ComputeUnicodeDependencyEdges_Roboto) { // Check composition auto comp_it = edges->composition.find(A); ASSERT_NE(comp_it, edges->composition.end()); - EXPECT_THAT(comp_it->second, Contains(UnicodeConjunctiveEdge { - .other_source = acute, - .dest = A_acute, - })); + EXPECT_THAT(comp_it->second, Contains(UnicodeConjunctiveEdge{ + .other_source = acute, + .dest = A_acute, + })); // There should also be an edge from acute comp_it = edges->composition.find(acute); ASSERT_NE(comp_it, edges->composition.end()); - EXPECT_THAT(comp_it->second, Contains(UnicodeConjunctiveEdge { - .other_source = A, - .dest = A_acute, - })); + EXPECT_THAT(comp_it->second, Contains(UnicodeConjunctiveEdge{ + .other_source = A, + .dest = A_acute, + })); } TEST(UnicodeEdgesTest, ComputeUnicodeDependencyEdges_UVS) { @@ -69,7 +70,8 @@ TEST(UnicodeEdgesTest, ComputeUnicodeDependencyEdges_UVS) { ASSERT_TRUE(face); auto resolver = *BazelDataFileResolver::CreateForTest(); - auto edges = UnicodeEdges::ComputeUnicodeDependencyEdges(face.get(), *resolver); + auto edges = + UnicodeEdges::ComputeUnicodeDependencyEdges(face.get(), *resolver); ASSERT_TRUE(edges.ok()) << edges.status(); // Check a specific known mapping: U+4FAE with U+FE00 -> U+FA30 @@ -80,11 +82,12 @@ TEST(UnicodeEdgesTest, ComputeUnicodeDependencyEdges_UVS) { ASSERT_NE(dest_gid, edges->unicode_to_gid.end()); auto it = edges->variation_selector.find(base_u); - ASSERT_NE(it, edges->variation_selector.end()) << "U+4FAE not found in variation_selector"; - EXPECT_THAT(it->second, Contains(VariationSelectorEdge { - .unicode = vs_u, - .gid = dest_gid->second, - })); + ASSERT_NE(it, edges->variation_selector.end()) + << "U+4FAE not found in variation_selector"; + EXPECT_THAT(it->second, Contains(VariationSelectorEdge{ + .unicode = vs_u, + .gid = dest_gid->second, + })); // Check the reverse mapping auto rev_it = edges->gid_to_vs.find(dest_gid->second); @@ -98,7 +101,8 @@ TEST(UnicodeEdgesTest, ComputeUnicodeDependencyEdges_CompositionExclusion) { ASSERT_TRUE(face); auto resolver = *BazelDataFileResolver::CreateForTest(); - auto edges = UnicodeEdges::ComputeUnicodeDependencyEdges(face.get(), *resolver); + auto edges = + UnicodeEdges::ComputeUnicodeDependencyEdges(face.get(), *resolver); ASSERT_TRUE(edges.ok()) << edges.status(); // U+2126 (OHM SIGN) decomposes to U+03A9 (GREEK CAPITAL LETTER OMEGA) @@ -107,7 +111,8 @@ TEST(UnicodeEdgesTest, ComputeUnicodeDependencyEdges_CompositionExclusion) { hb_codepoint_t omega = 0x03A9; auto unicodes = ift::common::FontHelper::ToCodepointsSet(face.get()); - ASSERT_TRUE(unicodes.contains(ohm_sign)) << "U+2126 not in Roboto-Regular.ttf"; + ASSERT_TRUE(unicodes.contains(ohm_sign)) + << "U+2126 not in Roboto-Regular.ttf"; // Check decomposition still works auto decomp_it = edges->decomposition.find(ohm_sign); @@ -118,7 +123,8 @@ TEST(UnicodeEdgesTest, ComputeUnicodeDependencyEdges_CompositionExclusion) { auto comp_it = edges->composition.find(omega); if (comp_it != edges->composition.end()) { for (const auto& edge : comp_it->second) { - EXPECT_NE(edge.dest, ohm_sign) << "Composition edge to U+2126 should be excluded"; + EXPECT_NE(edge.dest, ohm_sign) + << "Composition edge to U+2126 should be excluded"; } } } diff --git a/ift/encoder/activation_condition.cc b/ift/encoder/activation_condition.cc index 72b2a347..94e4aa25 100644 --- a/ift/encoder/activation_condition.cc +++ b/ift/encoder/activation_condition.cc @@ -28,11 +28,11 @@ using absl::StatusOr; using absl::StrCat; using ift::common::IntSet; using ift::common::SegmentSet; +using ift::freq::ProbabilityBound; using ift::freq::ProbabilityCalculator; using ift::proto::GLYPH_KEYED; using ift::proto::PatchEncoding; using ift::proto::PatchMap; -using ift::freq::ProbabilityBound; namespace ift::encoder { @@ -93,7 +93,8 @@ ActivationCondition ActivationCondition::composite_condition( return conditions; } -ActivationCondition ActivationCondition::clear_exclusive(ActivationCondition&& condition) { +ActivationCondition ActivationCondition::clear_exclusive( + ActivationCondition&& condition) { ActivationCondition updated(std::move(condition)); updated.is_exclusive_ = false; return updated; @@ -194,22 +195,22 @@ ActivationCondition ActivationCondition::ReplaceSegments( return new_condition; } -ActivationCondition ActivationCondition::RemoveIntersectingSubgroups(const SegmentSet& segments) const { +ActivationCondition ActivationCondition::RemoveIntersectingSubgroups( + const SegmentSet& segments) const { ActivationCondition new_condition = ActivationCondition::True(0); new_condition.activated_ = activated_; new_condition.encoding_ = encoding_; new_condition.is_fallback_ = is_fallback_; new_condition.is_exclusive_ = is_exclusive_; - for (const auto& condition_group : conditions_) { if (!condition_group.intersects(segments)) { new_condition.conditions_.push_back(condition_group); } } - // Simplify is not needed as the existing condition will already be simplified and removing - // sub-groups does not affect simplification. + // Simplify is not needed as the existing condition will already be simplified + // and removing sub-groups does not affect simplification. return new_condition; } @@ -377,20 +378,18 @@ StatusOr ActivationCondition::ProbabilityBound( } StatusOr ActivationCondition::MergedProbability( - Span segments, - segment_index_t merged_segment_index, + Span segments, segment_index_t merged_segment_index, const Segment& merged_segment, const ProbabilityCalculator& calculator) const { - return TRY(MergedProbabilityBound(segments, merged_segment_index, merged_segment, calculator)).Average(); + return TRY(MergedProbabilityBound(segments, merged_segment_index, + merged_segment, calculator)) + .Average(); } - StatusOr ActivationCondition::MergedProbabilityBound( - Span segments, - segment_index_t merged_segment_index, + Span segments, segment_index_t merged_segment_index, const Segment& merged_segment, const ProbabilityCalculator& calculator) const { - if (conditions_.empty()) { return freq::ProbabilityBound(1.0, 1.0); } @@ -412,7 +411,7 @@ StatusOr ActivationCondition::MergedProbabilityBound( } } set_bound = calculator.ComputeMergedProbability(union_segments); - } else if (*segment_set.min() == merged_segment_index) { + } else if (*segment_set.min() == merged_segment_index) { set_bound = merged_segment.ProbabilityBound(); } else { set_bound = segments[*segment_set.min()].ProbabilityBound(); diff --git a/ift/encoder/activation_condition.h b/ift/encoder/activation_condition.h index f387bd03..6f301f1c 100644 --- a/ift/encoder/activation_condition.h +++ b/ift/encoder/activation_condition.h @@ -59,8 +59,8 @@ class ActivationCondition { bool is_fallback = false); /* - * Constructs a new condition equal to 'condition' except with the is exclusive flag - * cleared. + * Constructs a new condition equal to 'condition' except with the is + * exclusive flag cleared. */ static ActivationCondition clear_exclusive(ActivationCondition&& condition); @@ -126,7 +126,8 @@ class ActivationCondition { // Generate a new condition which is a copy of this one but with all // sub grups that contain segments removed. - ActivationCondition RemoveIntersectingSubgroups(const common::SegmentSet& segments) const; + ActivationCondition RemoveIntersectingSubgroups( + const common::SegmentSet& segments) const; /* * Returns a human readable string representation of this condition. @@ -186,14 +187,12 @@ class ActivationCondition { // it is modified to merge all segments in "merged_segments" into a single // segment with "merged_probability". absl::StatusOr MergedProbabilityBound( - absl::Span segments, - segment_index_t merged_segment_index, + absl::Span segments, segment_index_t merged_segment_index, const Segment& merged_segment, const ift::freq::ProbabilityCalculator& calculator) const; absl::StatusOr MergedProbability( - absl::Span segments, - segment_index_t merged_segment_index, + absl::Span segments, segment_index_t merged_segment_index, const Segment& merged_segment, const ift::freq::ProbabilityCalculator& calculator) const; diff --git a/ift/encoder/activation_condition_test.cc b/ift/encoder/activation_condition_test.cc index 954cbb87..cf37e71e 100644 --- a/ift/encoder/activation_condition_test.cc +++ b/ift/encoder/activation_condition_test.cc @@ -323,47 +323,47 @@ TEST(ActivationConditionTest, MergedProbability) { // {0} get's replaced with {0 U 1} EXPECT_NEAR(*ActivationCondition::and_segments({0, 2}, 1) - .ReplaceSegments(0, {0, 1}) - .MergedProbability( - segments, 0, merged_segment, probability_calculator), + .ReplaceSegments(0, {0, 1}) + .MergedProbability(segments, 0, merged_segment, + probability_calculator), 0.85 * 0.25, 1e-9); // Disjunctive with merge intersection // {0} get's replaced with {0 U 1} EXPECT_NEAR(*ActivationCondition::or_segments({0}, 1) - .ReplaceSegments(0, {0, 1}) - .MergedProbability( - segments, 0, merged_segment, probability_calculator), + .ReplaceSegments(0, {0, 1}) + .MergedProbability(segments, 0, merged_segment, + probability_calculator), 0.85, 1e-9); EXPECT_NEAR(*ActivationCondition::or_segments({1}, 1) - .ReplaceSegments(0, {0, 1}) - .MergedProbability( - segments, 0, merged_segment, probability_calculator), + .ReplaceSegments(0, {0, 1}) + .MergedProbability(segments, 0, merged_segment, + probability_calculator), 0.85, 1e-9); EXPECT_NEAR(*ActivationCondition::or_segments({0, 1}, 1) - .ReplaceSegments(0, {0, 1}) - .MergedProbability( - segments, 0, merged_segment, probability_calculator), + .ReplaceSegments(0, {0, 1}) + .MergedProbability(segments, 0, merged_segment, + probability_calculator), 0.85, 1e-9); // Disjunctive with partial merge intersection EXPECT_NEAR(*ActivationCondition::or_segments({0, 2}, 1) - .ReplaceSegments(0, {0, 1}) - .MergedProbability( - segments, 0, merged_segment, probability_calculator), + .ReplaceSegments(0, {0, 1}) + .MergedProbability(segments, 0, merged_segment, + probability_calculator), 0.95, 1e-9); // Conjunctive with partial merge intersection EXPECT_NEAR(*ActivationCondition::and_segments({0, 1, 2}, 1) - .ReplaceSegments(2, {1, 2}) + .ReplaceSegments(2, {1, 2}) .MergedProbability(segments, 2, merged_segment, probability_calculator), - 0.75 * 0.85, 1e-9); // .. AND 1 AND 2 becomes .. AND 1 + 0.75 * 0.85, 1e-9); // .. AND 1 AND 2 becomes .. AND 1 // Composite condition // (0 or 1) AND 2 =merge {1, 2}=> (0 or 1) EXPECT_NEAR(*ActivationCondition::composite_condition({{0, 1}, {2}}, 1) - .ReplaceSegments(1, {1, 2}) + .ReplaceSegments(1, {1, 2}) .MergedProbability(segments, 1, merged_segment, probability_calculator), 0.85, 1e-9); diff --git a/ift/encoder/candidate_merge.cc b/ift/encoder/candidate_merge.cc index 90e6527b..6a5167f2 100644 --- a/ift/encoder/candidate_merge.cc +++ b/ift/encoder/candidate_merge.cc @@ -27,8 +27,8 @@ namespace ift::encoder { -using absl::flat_hash_set; using absl::flat_hash_map; +using absl::flat_hash_set; using absl::Status; using absl::StatusOr; using ift::GlyphKeyedDiff; @@ -187,7 +187,8 @@ static void MergeSegments(const Merger& merger, const SegmentSet& segments, merged_segments.push_back(&s); } - // Compute probability before modifying base since it's in the merged segments array. + // Compute probability before modifying base since it's in the merged segments + // array. const auto* calculator = merger.Strategy().ProbabilityCalculator(); const auto& bound = calculator->ComputeMergedProbability(merged_segments); base.Definition() = std::move(union_def); @@ -246,7 +247,8 @@ StatusOr CandidateMerge::Woff2SizeOf(hb_face_t* original_face, // Finds the set of patches which intersect either gids or segments. static flat_hash_map PatchesWithGlyphsOrSegments( - const SegmentationContext& context, const GlyphSet& gids, const SegmentSet& segments) { + const SegmentationContext& context, const GlyphSet& gids, + const SegmentSet& segments) { // To more efficiently target our search we can use the glyph_condition_set to // locate conditions that intersect with gids. GlyphSet fallback_glyphs = context.glyph_groupings.UnmappedGlyphs(); @@ -268,7 +270,8 @@ static flat_hash_map PatchesWithGlyphsOrSegments( } for (segment_index_t s : segments) { - const auto& conditions = context.glyph_groupings.TriggeringSegmentToConditions(s); + const auto& conditions = + context.glyph_groupings.TriggeringSegmentToConditions(s); conditions_of_interest.insert(conditions.begin(), conditions.end()); } @@ -310,19 +313,21 @@ Status CandidateMerge::ComputeInitFontGlyphDelta( return absl::OkStatus(); } -static std::optional> FindExistingCondition( - const flat_hash_map& condition_and_glyphs, - const ActivationCondition& condition) { - +static std::optional> +FindExistingCondition( + const flat_hash_map& condition_and_glyphs, + const ActivationCondition& condition) { auto it = condition_and_glyphs.find(condition); if (it != condition_and_glyphs.end()) { return *it; } if (condition.IsUnitary()) { - // For unitary conditions, the existing one might exist under a different is_exclusive value + // For unitary conditions, the existing one might exist under a different + // is_exclusive value segment_index_t s = *condition.TriggeringSegments().min(); - ActivationCondition secondary = ActivationCondition::exclusive_segment(s, 0); + ActivationCondition secondary = + ActivationCondition::exclusive_segment(s, 0); if (condition.IsExclusive()) { secondary = ActivationCondition::or_segments({s}, 0); } @@ -335,33 +340,37 @@ static std::optional> FindExistingCondi return std::nullopt; } -static StatusOr CostFor(Merger& merger, const ActivationCondition& condition, const GlyphSet& glyphs) { +static StatusOr CostFor(Merger& merger, + const ActivationCondition& condition, + const GlyphSet& glyphs) { double probability = 1.0; if (!condition.IsFallback()) { probability = TRY( - condition.Probability(merger.Context().SegmentationInfo().Segments(), - *merger.Strategy().ProbabilityCalculator())); + condition.Probability(merger.Context().SegmentationInfo().Segments(), + *merger.Strategy().ProbabilityCalculator())); } double patch_size = TRY(merger.Context().patch_size_cache_for_init_font->GetPatchSize( - glyphs)) + merger.Strategy().NetworkOverheadCost(); + glyphs)) + + merger.Strategy().NetworkOverheadCost(); double cost = probability * patch_size; - VLOG(1) << " - (" << probability << " * " << patch_size - << ") -> " << cost << " [modified patch]"; + VLOG(1) << " - (" << probability << " * " << patch_size << ") -> " << cost + << " [modified patch]"; return cost; } StatusOr> CandidateMerge::ComputeInitFontCostDelta( Merger& merger, uint32_t existing_init_font_size, const GlyphSet& moved_glyphs, - absl::flat_hash_map& smallest_size_increases - ) { - // Brotli compression results can be a bit noisy and it's common to see very different - // size increase (in both directions) for adding the same glyphs as the base changes. - // So to help control some of this noise we use smallest_size_increases to track the smallest - // size increase we've seen for moving a specific set of glyphs. + absl::flat_hash_map& + smallest_size_increases) { + // Brotli compression results can be a bit noisy and it's common to see very + // different size increase (in both directions) for adding the same glyphs as + // the base changes. So to help control some of this noise we use + // smallest_size_increases to track the smallest size increase we've seen for + // moving a specific set of glyphs. VLOG(1) << "cost_delta for move of glyphs " << moved_glyphs.ToString() << " to the initial font ="; @@ -374,25 +383,30 @@ StatusOr> CandidateMerge::ComputeInitFontCostDelta( // in the initial font. // Moving glyphs to the initial font has the following affects: - // 1. Based on the input moved_glyphs an expanded set of glyphs and codepoints are found + // 1. Based on the input moved_glyphs an expanded set of glyphs and codepoints + // are found // and moved to the initial font. - // 2. This affects condition -> patch costs by changing the contained glyphs and in - // some cases modifying the activation conditions. Conditions are changed by either - // segments being fully moved to init font (becoming always true), or more rarely - // by segment definitions changing which in turn affects the probability. + // 2. This affects condition -> patch costs by changing the contained glyphs + // and in + // some cases modifying the activation conditions. Conditions are changed + // by either segments being fully moved to init font (becoming always + // true), or more rarely by segment definitions changing which in turn + // affects the probability. GlyphSet new_glyph_closure, glyph_closure_delta; CodepointSet codepoint_closure_delta; TRYV(ComputeInitFontGlyphDelta(merger, moved_glyphs, new_glyph_closure, glyph_closure_delta, codepoint_closure_delta)); - uint32_t after = TRY(merger.Context().patch_size_cache_for_init_font->GetPatchSize( + uint32_t after = + TRY(merger.Context().patch_size_cache_for_init_font->GetPatchSize( new_glyph_closure)); uint32_t init_increase = 0; if (after >= existing_init_font_size) { init_increase = after - existing_init_font_size; - auto [it, _] = smallest_size_increases.emplace(glyph_closure_delta, UINT32_MAX); + auto [it, _] = + smallest_size_increases.emplace(glyph_closure_delta, UINT32_MAX); if (init_increase < it->second) { it->second = init_increase; } else { @@ -404,12 +418,17 @@ StatusOr> CandidateMerge::ComputeInitFontCostDelta( VLOG(1) << " + " << total_delta << " [init font increase]"; - SegmentSet modified_segments = merger.Context().SegmentationInfo().SegmentsForCodepoints(codepoint_closure_delta); + SegmentSet modified_segments = + merger.Context().SegmentationInfo().SegmentsForCodepoints( + codepoint_closure_delta); SegmentSet removed_segments; for (segment_index_t s : modified_segments) { - const auto& segment = merger.Context().SegmentationInfo().Segments().at(s).Definition(); - if (segment.feature_tags.empty() && segment.codepoints.is_subset_of(codepoint_closure_delta)) { - // all codepoints are being moved to init font, so this segment is going to be removed. + const auto& segment = + merger.Context().SegmentationInfo().Segments().at(s).Definition(); + if (segment.feature_tags.empty() && + segment.codepoints.is_subset_of(codepoint_closure_delta)) { + // all codepoints are being moved to init font, so this segment is going + // to be removed. removed_segments.insert(s); } } @@ -417,9 +436,9 @@ StatusOr> CandidateMerge::ComputeInitFontCostDelta( flat_hash_map new_conditions; const uint32_t per_request_overhead = merger.Strategy().NetworkOverheadCost(); - auto affected_conditions = PatchesWithGlyphsOrSegments(merger.Context(), glyph_closure_delta, modified_segments); + auto affected_conditions = PatchesWithGlyphsOrSegments( + merger.Context(), glyph_closure_delta, modified_segments); for (const auto& [condition, glyphs] : affected_conditions) { - GlyphSet glyphs_after = glyphs; glyphs_after.subtract(glyph_closure_delta); @@ -429,19 +448,23 @@ StatusOr> CandidateMerge::ComputeInitFontCostDelta( continue; } - // Remove segments effectively become always true, so update the condition by removing - // sub-groups that contain those segments as the whole subgroup will also become "true". - ActivationCondition updated = condition.RemoveIntersectingSubgroups(removed_segments); - auto [it, did_insert] = new_conditions.emplace(updated, GlyphSet {}); + // Remove segments effectively become always true, so update the condition + // by removing sub-groups that contain those segments as the whole subgroup + // will also become "true". + ActivationCondition updated = + condition.RemoveIntersectingSubgroups(removed_segments); + auto [it, did_insert] = new_conditions.emplace(updated, GlyphSet{}); it->second.union_set(glyphs_after); if (!did_insert) { continue; } - auto existing = FindExistingCondition(merger.Context().glyph_groupings.ConditionsAndGlyphs(), updated); - if (existing.has_value() && !affected_conditions.contains(existing->first)) { - // Existing isn't in the affected list so we need to account for it's removal, and transfer - // it's glyphs to new_conditions. + auto existing = FindExistingCondition( + merger.Context().glyph_groupings.ConditionsAndGlyphs(), updated); + if (existing.has_value() && + !affected_conditions.contains(existing->first)) { + // Existing isn't in the affected list so we need to account for it's + // removal, and transfer it's glyphs to new_conditions. it->second.union_set(existing->second); total_delta -= TRY(CostFor(merger, existing->first, existing->second)); } @@ -450,17 +473,17 @@ StatusOr> CandidateMerge::ComputeInitFontCostDelta( for (const auto& [condition, glyphs] : new_conditions) { double patch_probability_after = 1.0; if (!condition.IsFallback()) { - // TODO(garretrieger): XXXX also include the effect of modified segments in this calc. - // Start with finding a test case. + // TODO(garretrieger): XXXX also include the effect of modified segments + // in this calc. Start with finding a test case. patch_probability_after = TRY( condition.Probability(merger.Context().SegmentationInfo().Segments(), *merger.Strategy().ProbabilityCalculator())); } double patch_size_after = - TRY(merger.Context().patch_size_cache_for_init_font->GetPatchSize( - glyphs)) + - per_request_overhead; + TRY(merger.Context().patch_size_cache_for_init_font->GetPatchSize( + glyphs)) + + per_request_overhead; double cost_after = patch_probability_after * patch_size_after; VLOG(1) << " + (" << patch_probability_after << " * " << patch_size_after @@ -476,9 +499,8 @@ StatusOr> CandidateMerge::ComputeInitFontCostDelta( StatusOr CandidateMerge::ComputeBestCaseInitFontCostDelta( Merger& merger, uint32_t existing_init_font_size, const GlyphSet& moved_glyphs) { - - // TODO(garretrieger): consider reworking this to avoid running a glyph closure, by - // working only with the explicitly moved glyphs. + // TODO(garretrieger): consider reworking this to avoid running a glyph + // closure, by working only with the explicitly moved glyphs. GlyphSet new_glyph_closure, glyph_closure_delta; CodepointSet codepoint_closure_delta; TRYV(ComputeInitFontGlyphDelta(merger, moved_glyphs, new_glyph_closure, @@ -488,10 +510,11 @@ StatusOr CandidateMerge::ComputeBestCaseInitFontCostDelta( double best_case_reduction = merger.Strategy().BestCaseSizeReductionFraction(); double per_request_overhead = merger.Strategy().NetworkOverheadCost(); - // This best case computation is a simplified version that ignores the impact of changing - // segments on the cost computation and focuses only on the glyphs that are being moved. - for (const auto& [condition, glyphs] : - PatchesWithGlyphsOrSegments(merger.Context(), glyph_closure_delta, SegmentSet {})) { + // This best case computation is a simplified version that ignores the impact + // of changing segments on the cost computation and focuses only on the glyphs + // that are being moved. + for (const auto& [condition, glyphs] : PatchesWithGlyphsOrSegments( + merger.Context(), glyph_closure_delta, SegmentSet{})) { double patch_probability = 1.0; if (!condition.IsFallback()) { patch_probability = TRY( @@ -552,20 +575,16 @@ StatusOr CandidateMerge::ComputeBestCaseInitFontCostDelta( return cost_delta; } -template +template StatusOr CandidateMerge::ComputeCostDelta( - Merger& merger, - const SegmentSet& merged_segments, - const Segment& merged_segment, - std::optional exclusive_gids - ) { - + Merger& merger, const SegmentSet& merged_segments, + const Segment& merged_segment, std::optional exclusive_gids) { if (merged_segments.size() <= 1) { return 0; } - // These are conditions which will be modified by applying the merge. These are - // conditions that intersect merged_segments. + // These are conditions which will be modified by applying the merge. These + // are conditions that intersect merged_segments. // // Map value is the glyphs associated with condition. flat_hash_map modified_conditions; @@ -573,10 +592,11 @@ StatusOr CandidateMerge::ComputeCostDelta( TRYV(FindModifiedConditions(merger, merged_segments, modified_conditions)); // new_conditions is modified conditions updated to apply the segment merging. - // May have less conditions than modified as multiple conditions may merge together - // as a result of updating their segments. + // May have less conditions than modified as multiple conditions may merge + // together as a result of updating their segments. // - // Map value is the set of glyphs and probability associated with the new condition. + // Map value is the set of glyphs and probability associated with the new + // condition. struct Info { GlyphSet glyphs; double probability = 0.0; @@ -594,8 +614,7 @@ StatusOr CandidateMerge::ComputeCostDelta( double cost_delta = 0.0; const uint32_t per_request_overhead = merger.Strategy().NetworkOverheadCost(); for (const auto& [condition, glyphs] : modified_conditions) { - uint32_t patch_size = - TRY(patch_size_cache->GetPatchSize(*glyphs)); + uint32_t patch_size = TRY(patch_size_cache->GetPatchSize(*glyphs)); double p = TRY(condition.Probability(segments, *calculator)); double d = p * (patch_size + per_request_overhead); cost_delta -= d; @@ -630,7 +649,8 @@ StatusOr CandidateMerge::ComputeCostDelta( // Add in cost associated within the new conditions patch pairs. bool fallback_changed = false; - double best_case_reduction_fraction = merger.Strategy().BestCaseSizeReductionFraction(); + double best_case_reduction_fraction = + merger.Strategy().BestCaseSizeReductionFraction(); for (auto& [c, info] : new_conditions) { // For modified conditions we assume the associated patch size does not // change, only the probability associated with the condition changes. @@ -639,9 +659,8 @@ StatusOr CandidateMerge::ComputeCostDelta( uint32_t size = 0; if (best_case) { uint32_t extra = info.combined_patch_size - info.largest_patch_size; - extra = std::max( - (uint32_t)(extra * best_case_reduction_fraction), - Merger::BEST_CASE_MERGE_SIZE_DELTA); + extra = std::max((uint32_t)(extra * best_case_reduction_fraction), + Merger::BEST_CASE_MERGE_SIZE_DELTA); size = info.largest_patch_size + extra; } else { if (exclusive_gids.has_value()) { @@ -653,7 +672,8 @@ StatusOr CandidateMerge::ComputeCostDelta( info.glyphs.subtract(*exclusive_gids); if (c.IsUnitary()) { info.glyphs.union_set(*exclusive_gids); - if (context.glyph_groupings.UnmappedGlyphs().intersects(*exclusive_gids)) { + if (context.glyph_groupings.UnmappedGlyphs().intersects( + *exclusive_gids)) { fallback_changed = true; } } @@ -666,16 +686,17 @@ StatusOr CandidateMerge::ComputeCostDelta( if (merger.ShouldRecordMergedSizeReductions()) { int32_t extra_raw = info.combined_patch_size - info.largest_patch_size; int32_t extra_actual = ((int32_t)size) - info.largest_patch_size; - if (extra_raw != 0.0) { - merger.RecordMergedSizeReduction((double)extra_actual / (double)extra_raw); + if (extra_raw != 0.0) { + merger.RecordMergedSizeReduction((double)extra_actual / + (double)extra_raw); } } } double s = (size + per_request_overhead); double d = p * s; - VLOG(1) << " + (" << p << " * " << s << ") -> " << d - << " [new patch " << c.ToString() << "]"; + VLOG(1) << " + (" << p << " * " << s << ") -> " << d << " [new patch " + << c.ToString() << "]"; cost_delta += d; } @@ -683,11 +704,12 @@ StatusOr CandidateMerge::ComputeCostDelta( GlyphSet new_fallback = context.glyph_groupings.UnmappedGlyphs(); new_fallback.subtract(*exclusive_gids); - // Fallback is always needed with 100% probability so delta is just the size difference (new - old) - double diff = ((double) TRY(patch_size_cache->GetPatchSize(new_fallback))) - - ((double) TRY(patch_size_cache->GetPatchSize(context.glyph_groupings.UnmappedGlyphs()))); - VLOG(1) << " + " << diff - << " [fallback delta]"; + // Fallback is always needed with 100% probability so delta is just the size + // difference (new - old) + double diff = ((double)TRY(patch_size_cache->GetPatchSize(new_fallback))) - + ((double)TRY(patch_size_cache->GetPatchSize( + context.glyph_groupings.UnmappedGlyphs()))); + VLOG(1) << " + " << diff << " [fallback delta]"; cost_delta += diff; } @@ -760,7 +782,6 @@ StatusOr> CandidateMerge::AssessSegmentMerge( Merger& merger, segment_index_t base_segment_index, const SegmentSet& segments_to_merge_, const std::optional& best_merge_candidate) { - if (WouldMixFeaturesAndCodepoints(merger.Context().SegmentationInfo(), base_segment_index, segments_to_merge_)) { // With the heuristic merger if it doesn't find a previous merge candidate @@ -825,15 +846,17 @@ StatusOr> CandidateMerge::AssessSegmentMerge( uint32_t new_patch_size = 0; std::optional exclusive_gids; - if (!merger.Strategy().UseCosts() || merger.Context().GetConditionAnalysisMode() != config::DEP_GRAPH_ONLY) { + if (!merger.Strategy().UseCosts() || + merger.Context().GetConditionAnalysisMode() != config::DEP_GRAPH_ONLY) { if (!segments_to_merge_are_inert) { - // When we're not in pure depgraph mode then glyph conditions are incomplete. - // To help improve accuracy run a closure to find the new set of exclusive - // glyphs associated with the merge and incorporate that into the cost calculations. + // When we're not in pure depgraph mode then glyph conditions are + // incomplete. To help improve accuracy run a closure to find the new set + // of exclusive glyphs associated with the merge and incorporate that into + // the cost calculations. GlyphSet and_gids, or_gids; - exclusive_gids = GlyphSet {}; - TRYV(merger.Context().AnalyzeSegment(segments_to_merge_with_base, and_gids, - or_gids, *exclusive_gids)); + exclusive_gids = GlyphSet{}; + TRYV(merger.Context().AnalyzeSegment(segments_to_merge_with_base, + and_gids, or_gids, *exclusive_gids)); } else { // When the segments being added to base are all inert then we can assume // the merged patch is just a combination of the glyphs from the base and @@ -845,7 +868,7 @@ StatusOr> CandidateMerge::AssessSegmentMerge( merger.Context().glyph_groupings.ExclusiveGlyphs(base_segment_index)); } new_patch_size = - TRY(merger.Context().patch_size_cache->GetPatchSize(*exclusive_gids)); + TRY(merger.Context().patch_size_cache->GetPatchSize(*exclusive_gids)); } if (!merger.Strategy().UseCosts() && @@ -856,8 +879,8 @@ StatusOr> CandidateMerge::AssessSegmentMerge( double cost_delta = 0.0; if (merger.Strategy().UseCosts()) { // Cost delta values are only needed when using cost based merge strategy. - cost_delta = TRY(ComputeCostDelta(merger, segments_to_merge_with_base, - merged_segment, exclusive_gids)); + cost_delta = TRY(ComputeCostDelta( + merger, segments_to_merge_with_base, merged_segment, exclusive_gids)); } if (best_merge_candidate.has_value() && diff --git a/ift/encoder/candidate_merge.h b/ift/encoder/candidate_merge.h index c22d8ec3..8f1f986e 100644 --- a/ift/encoder/candidate_merge.h +++ b/ift/encoder/candidate_merge.h @@ -141,14 +141,16 @@ struct CandidateMerge { // are joined together into a new segment, merged_segment. // // exclusive_gids is an optional hint that specifies which glyphs - // will end up in the exclusive patch after merged_segments are merged together. + // will end up in the exclusive patch after merged_segments are merged + // together. // // If best_case is true then this will compute an estimated best possible cost // delta from the merge (computationally cheap) instead of the real delta. - template + template static absl::StatusOr ComputeCostDelta( Merger& merger, const ift::common::SegmentSet& merged_segments, - const Segment& merged_segment, std::optional exclusive_gids); + const Segment& merged_segment, + std::optional exclusive_gids); // Computes the predicted change to the toal cost if moved_glyphs are // moved from patches into the initial font. @@ -158,13 +160,12 @@ struct CandidateMerge { static absl::StatusOr> ComputeInitFontCostDelta(Merger& merger, uint32_t existing_init_font_size, const ift::common::GlyphSet& moved_glyphs, - absl::flat_hash_map& smallest_size_increases - ); + absl::flat_hash_map& + smallest_size_increases); static absl::StatusOr ComputeBestCaseInitFontCostDelta( Merger& merger, uint32_t existing_init_font_size, - const ift::common::GlyphSet& moved_glyphs - ); + const ift::common::GlyphSet& moved_glyphs); static absl::StatusOr ComputePatchMergeCostDelta( const Merger& context, segment_index_t base_segment, @@ -182,8 +183,7 @@ struct CandidateMerge { Merger& merger, const ift::common::GlyphSet& moved_glyphs, ift::common::GlyphSet& glyph_closure_delta, ift::common::GlyphSet& new_glyph_closure, - ift::common::CodepointSet& codepoint_closure_delta - ); + ift::common::CodepointSet& codepoint_closure_delta); }; } // namespace ift::encoder diff --git a/ift/encoder/candidate_merge_test.cc b/ift/encoder/candidate_merge_test.cc index 682ceef0..61c187c2 100644 --- a/ift/encoder/candidate_merge_test.cc +++ b/ift/encoder/candidate_merge_test.cc @@ -1,10 +1,10 @@ #include "ift/encoder/candidate_merge.h" -#include "ift/common/bazel_data_file_resolver.h" #include #include #include "gtest/gtest.h" +#include "ift/common/bazel_data_file_resolver.h" #include "ift/common/font_data.h" #include "ift/common/int_set.h" #include "ift/encoder/closure_glyph_segmenter.h" @@ -23,15 +23,15 @@ using ift::config::CLOSURE_ONLY; using ift::config::DEP_GRAPH_ONLY; using ift::config::PATCH; +using ift::common::BazelDataFileResolver; using ift::common::CodepointSet; -using ift::common::GlyphSet; +using ift::common::DataFileResolver; using ift::common::FontData; +using ift::common::GlyphSet; using ift::common::hb_face_unique_ptr; using ift::common::IntSet; using ift::common::make_hb_face; using ift::common::SegmentSet; -using ift::common::DataFileResolver; -using ift::common::BazelDataFileResolver; using ift::freq::MockProbabilityCalculator; using ift::freq::ProbabilityBound; using ift::freq::UnicodeFrequencies; @@ -520,93 +520,113 @@ TEST_F(CandidateMergeTest, ComputeInitFontCostDelta) { glyph_id_t g_fi = 444; glyph_id_t g_ffi = 446; - uint32_t init_size = *context->patch_size_cache_for_init_font->GetPatchSize({0, g_a}); - uint32_t plus_b_size = *context->patch_size_cache_for_init_font->GetPatchSize({0, g_a, g_b}); - uint32_t plus_f_size = *context->patch_size_cache_for_init_font->GetPatchSize({0, g_a, g_f}); - uint32_t plus_f_i_fi_ffi_size = *context->patch_size_cache_for_init_font->GetPatchSize({0, g_a, g_f, g_i, g_fi, g_ffi}); - uint32_t plus_fi_ffi_size = *context->patch_size_cache_for_init_font->GetPatchSize({0, g_a, g_fi, g_ffi}); - uint32_t plus_fi_size = *context->patch_size_cache_for_init_font->GetPatchSize({0, g_a, g_fi}); - - uint32_t b_size = *context->patch_size_cache_for_init_font->GetPatchSize({g_b}); - uint32_t f_size = *context->patch_size_cache_for_init_font->GetPatchSize({g_f}); - uint32_t i_size = *context->patch_size_cache_for_init_font->GetPatchSize({g_i}); - uint32_t fi_ffi_size = *context->patch_size_cache_for_init_font->GetPatchSize({g_fi, g_ffi}); - uint32_t ffi_size = *context->patch_size_cache_for_init_font->GetPatchSize({g_ffi}); - uint32_t i_fi_ffi_size = *context->patch_size_cache_for_init_font->GetPatchSize({g_i, g_fi, g_ffi}); + uint32_t init_size = + *context->patch_size_cache_for_init_font->GetPatchSize({0, g_a}); + uint32_t plus_b_size = + *context->patch_size_cache_for_init_font->GetPatchSize({0, g_a, g_b}); + uint32_t plus_f_size = + *context->patch_size_cache_for_init_font->GetPatchSize({0, g_a, g_f}); + uint32_t plus_f_i_fi_ffi_size = + *context->patch_size_cache_for_init_font->GetPatchSize( + {0, g_a, g_f, g_i, g_fi, g_ffi}); + uint32_t plus_fi_ffi_size = + *context->patch_size_cache_for_init_font->GetPatchSize( + {0, g_a, g_fi, g_ffi}); + uint32_t plus_fi_size = + *context->patch_size_cache_for_init_font->GetPatchSize({0, g_a, g_fi}); + + uint32_t b_size = + *context->patch_size_cache_for_init_font->GetPatchSize({g_b}); + uint32_t f_size = + *context->patch_size_cache_for_init_font->GetPatchSize({g_f}); + uint32_t i_size = + *context->patch_size_cache_for_init_font->GetPatchSize({g_i}); + uint32_t fi_ffi_size = + *context->patch_size_cache_for_init_font->GetPatchSize({g_fi, g_ffi}); + uint32_t ffi_size = + *context->patch_size_cache_for_init_font->GetPatchSize({g_ffi}); + uint32_t i_fi_ffi_size = + *context->patch_size_cache_for_init_font->GetPatchSize( + {g_i, g_fi, g_ffi}); /* Case 1: simple move */ flat_hash_map smallest; - auto r = CandidateMerge::ComputeInitFontCostDelta(merger, init_size, {g_b}, smallest); + auto r = CandidateMerge::ComputeInitFontCostDelta(merger, init_size, {g_b}, + smallest); ASSERT_TRUE(r.ok()) << r.status(); double delta = r->first; GlyphSet moved_glyphs = r->second; EXPECT_EQ(delta, - // patch with b removed and moved to the init font. - (plus_b_size - init_size) - 0.95 * (b_size + 75)); - EXPECT_EQ(moved_glyphs, (GlyphSet {g_b})); + // patch with b removed and moved to the init font. + (plus_b_size - init_size) - 0.95 * (b_size + 75)); + EXPECT_EQ(moved_glyphs, (GlyphSet{g_b})); /* Case 2: move + modified conditions (one half of (f AND i) moved) */ smallest.clear(); - r = CandidateMerge::ComputeInitFontCostDelta(merger, init_size, {g_f}, smallest); + r = CandidateMerge::ComputeInitFontCostDelta(merger, init_size, {g_f}, + smallest); ASSERT_TRUE(r.ok()) << r.status(); delta = r->first; moved_glyphs = r->second; EXPECT_NEAR(delta, - (plus_f_size - init_size) /* init font increase */ - - 0.85 * (f_size + 75) /* if (f) removed */ - - 0.75 * (i_size + 75) /* if (i) removed */ - - (0.75 * 0.85) * (fi_ffi_size + 75) /* if (f and i) removed */ - + 0.75 * (i_fi_ffi_size + 75) /* if (i) with ligatures */, - 1e-9 - ); - EXPECT_EQ(moved_glyphs, (GlyphSet {g_f})); + (plus_f_size - init_size) /* init font increase */ + - 0.85 * (f_size + 75) /* if (f) removed */ + - 0.75 * (i_size + 75) /* if (i) removed */ + - + (0.75 * 0.85) * (fi_ffi_size + 75) /* if (f and i) removed */ + + 0.75 * (i_fi_ffi_size + 75) /* if (i) with ligatures */, + 1e-9); + EXPECT_EQ(moved_glyphs, (GlyphSet{g_f})); /* Case 3: move non exclusive */ smallest.clear(); - r = CandidateMerge::ComputeInitFontCostDelta(merger, init_size, {g_fi, g_ffi}, smallest); + r = CandidateMerge::ComputeInitFontCostDelta(merger, init_size, {g_fi, g_ffi}, + smallest); ASSERT_TRUE(r.ok()) << r.status(); delta = r->first; moved_glyphs = r->second; EXPECT_NEAR(delta, - (plus_fi_ffi_size - init_size) /* init font increase */ - - (0.85 * 0.75) * (fi_ffi_size + 75), /* if (f and i) removed */ - 1e-9 - ); - EXPECT_EQ(moved_glyphs, (GlyphSet {g_fi, g_ffi})); + (plus_fi_ffi_size - init_size) /* init font increase */ + - + (0.85 * 0.75) * (fi_ffi_size + 75), /* if (f and i) removed */ + 1e-9); + EXPECT_EQ(moved_glyphs, (GlyphSet{g_fi, g_ffi})); /* Case 4: move part of a patch */ smallest.clear(); - r = CandidateMerge::ComputeInitFontCostDelta(merger, init_size, {g_fi}, smallest); + r = CandidateMerge::ComputeInitFontCostDelta(merger, init_size, {g_fi}, + smallest); ASSERT_TRUE(r.ok()) << r.status(); delta = r->first; moved_glyphs = r->second; EXPECT_NEAR(delta, - (plus_fi_size - init_size) /* init font increase */ - - (0.85 * 0.75) * (fi_ffi_size + 75) /* if (f and i) removed */ - + (0.85 * 0.75) * (ffi_size + 75), /* if (f and i) added */ - 1e-9 - ); - EXPECT_EQ(moved_glyphs, (GlyphSet {g_fi})); + (plus_fi_size - init_size) /* init font increase */ + - + (0.85 * 0.75) * (fi_ffi_size + 75) /* if (f and i) removed */ + + (0.85 * 0.75) * (ffi_size + 75), /* if (f and i) added */ + 1e-9); + EXPECT_EQ(moved_glyphs, (GlyphSet{g_fi})); /* Case 5: move multiple */ smallest.clear(); - r = CandidateMerge::ComputeInitFontCostDelta(merger, init_size, {g_f, g_i}, smallest); + r = CandidateMerge::ComputeInitFontCostDelta(merger, init_size, {g_f, g_i}, + smallest); ASSERT_TRUE(r.ok()) << r.status(); delta = r->first; moved_glyphs = r->second; EXPECT_NEAR(delta, - (plus_f_i_fi_ffi_size - init_size) /* init font increase */ - - 0.85 * (f_size + 75) /* if (f) removed */ - - 0.75 * (i_size + 75) /* if (i) removed */ - - (0.75 * 0.85) * (fi_ffi_size + 75), /* if (f and i) removed */ - 1e-9 - ); - EXPECT_EQ(moved_glyphs, (GlyphSet {g_f, g_i, g_fi, g_ffi})); + (plus_f_i_fi_ffi_size - init_size) /* init font increase */ + - 0.85 * (f_size + 75) /* if (f) removed */ + - 0.75 * (i_size + 75) /* if (i) removed */ + - + (0.75 * 0.85) * (fi_ffi_size + 75), /* if (f and i) removed */ + 1e-9); + EXPECT_EQ(moved_glyphs, (GlyphSet{g_f, g_i, g_fi, g_ffi})); } TEST_F(CandidateMergeTest, ComputeInitFontCostDelta_TracksSmallestDelta) { @@ -644,7 +664,8 @@ TEST_F(CandidateMergeTest, ComputeInitFontCostDelta_TracksSmallestDelta) { flat_hash_map smallest_size_increases; // First call: size increase is 100. - auto r = CandidateMerge::ComputeInitFontCostDelta(merger, init_size, {g_b}, smallest_size_increases); + auto r = CandidateMerge::ComputeInitFontCostDelta(merger, init_size, {g_b}, + smallest_size_increases); ASSERT_TRUE(r.ok()) << r.status(); double first_delta = r->first; EXPECT_EQ(smallest_size_increases.at({g_b}), 100); @@ -652,7 +673,8 @@ TEST_F(CandidateMergeTest, ComputeInitFontCostDelta_TracksSmallestDelta) { // Second call: simulated size increase is 200 (1200 - 1000). // But it should use the stored smallest increase (100). mock_cache->SetPatchSize({0, g_a, g_b}, 1200); - r = CandidateMerge::ComputeInitFontCostDelta(merger, init_size, {g_b}, smallest_size_increases); + r = CandidateMerge::ComputeInitFontCostDelta(merger, init_size, {g_b}, + smallest_size_increases); ASSERT_TRUE(r.ok()) << r.status(); double second_delta = r->first; @@ -660,9 +682,11 @@ TEST_F(CandidateMergeTest, ComputeInitFontCostDelta_TracksSmallestDelta) { EXPECT_EQ(first_delta, second_delta); // Third call: simulated size increase is 50 (1050 - 1000). - // It should update the stored smallest increase to 50 and return a smaller delta. + // It should update the stored smallest increase to 50 and return a smaller + // delta. mock_cache->SetPatchSize({0, g_a, g_b}, 1050); - r = CandidateMerge::ComputeInitFontCostDelta(merger, init_size, {g_b}, smallest_size_increases); + r = CandidateMerge::ComputeInitFontCostDelta(merger, init_size, {g_b}, + smallest_size_increases); ASSERT_TRUE(r.ok()) << r.status(); double third_delta = r->first; diff --git a/ift/encoder/closure_glyph_segmenter.cc b/ift/encoder/closure_glyph_segmenter.cc index 725204cc..99300520 100644 --- a/ift/encoder/closure_glyph_segmenter.cc +++ b/ift/encoder/closure_glyph_segmenter.cc @@ -41,12 +41,12 @@ using ift::config::SegmentationPlan; using ift::config::SegmentsProto; using ift::config::UnmappedGlyphHandling; -using absl::Span; using absl::btree_map; using absl::btree_set; using absl::flat_hash_map; using absl::flat_hash_set; using absl::node_hash_map; +using absl::Span; using absl::Status; using absl::StatusOr; using absl::StrCat; @@ -87,19 +87,19 @@ Status CheckForDisjointCodepoints( return absl::OkStatus(); } -static void PrintCondition( - const GlyphGroupings& groupings, const ActivationCondition& condition, bool added) { +static void PrintCondition(const GlyphGroupings& groupings, + const ActivationCondition& condition, bool added) { const auto& glyphs = groupings.ConditionsAndGlyphs().at(condition); VLOG(0) << (added ? "++ " : "-- ") << condition.ToString() << " => " << glyphs.ToString(); } -static void PrintDiff(const GlyphGroupings& a, - const GlyphGroupings& b) { +static void PrintDiff(const GlyphGroupings& a, const GlyphGroupings& b) { auto it_a = a.OrderedConditions().begin(); auto it_b = b.OrderedConditions().begin(); - while (it_a != a.OrderedConditions().end() || it_b != b.OrderedConditions().end()) { + while (it_a != a.OrderedConditions().end() || + it_b != b.OrderedConditions().end()) { if (it_a == a.OrderedConditions().end()) { PrintCondition(b, *it_b, true); it_b++; @@ -107,7 +107,8 @@ static void PrintDiff(const GlyphGroupings& a, PrintCondition(a, *it_a, false); it_a++; } else if (*it_a == *it_b) { - if (a.ConditionsAndGlyphs().at(*it_a) != b.ConditionsAndGlyphs().at(*it_b)) { + if (a.ConditionsAndGlyphs().at(*it_a) != + b.ConditionsAndGlyphs().at(*it_b)) { PrintCondition(a, *it_a, false); PrintCondition(b, *it_b, true); } @@ -194,7 +195,6 @@ static StatusOr GlyphGroupingsAreEquivalent( */ Status ValidateIncrementalGroupings(hb_face_t* face, const SegmentationContext& context) { - SegmentationContext non_incremental_context = TRY(context.WithSameSettings()); // Compute the glyph groupings/conditions from scratch to compare against the @@ -334,11 +334,9 @@ static std::vector PreGroupSegments( segment_index_map[o.original_index] = i; - if (strategy != nullptr && - !is_feature_segment && + if (strategy != nullptr && !is_feature_segment && strategy->PreClosureGroupSize() > 1 && o.probability.Max() <= strategy->PreClosureProbabilityThreshold()) { - uint32_t remaining = strategy->PreClosureGroupSize() - 1; while (remaining > 0) { if (ordering_it == ordering.end() || @@ -346,9 +344,10 @@ static std::vector PreGroupSegments( break; } - // Don't pregroup feature segments, these generally have broad interactions - // and pre-grouping them blindly can cause poor outcomes. - if (!subset_definitions[ordering_it->original_index].feature_tags.empty()) { + // Don't pregroup feature segments, these generally have broad + // interactions and pre-grouping them blindly can cause poor outcomes. + if (!subset_definitions[ordering_it->original_index] + .feature_tags.empty()) { break; } @@ -663,8 +662,8 @@ StatusOr> ClosureGlyphSegmenter::TotalCosts( PatchSizeCacheImpl patch_sizer(original_face, 11); std::vector out; - for (const ProbabilityCalculator* probability_calculator : probability_calculators) { - + for (const ProbabilityCalculator* probability_calculator : + probability_calculators) { std::vector segments; for (const auto& def : segmentation.Segments()) { auto P = probability_calculator->ComputeProbability(def); @@ -689,7 +688,8 @@ StatusOr> ClosureGlyphSegmenter::TotalCosts( double ideal_cost = 0.0; double incremental_size = non_ift_font_size / (double)non_ift.codepoints.size(); - double init_font_ideal_size = incremental_size * segmentation.InitialFontSegment().codepoints.size(); + double init_font_ideal_size = + incremental_size * segmentation.InitialFontSegment().codepoints.size(); for (unsigned cp : non_ift.codepoints) { if (segmentation.InitialFontSegment().codepoints.contains(cp)) { continue; @@ -699,11 +699,11 @@ StatusOr> ClosureGlyphSegmenter::TotalCosts( } out.push_back(SegmentationCost{ - .ift_init_cost = init_font_size, - .ift_patch_cost = total_cost, - .non_ift_total_cost = non_ift_font_size, - .ideal_init_cost = init_font_ideal_size, - .ideal_patch_cost = ideal_cost, + .ift_init_cost = init_font_size, + .ift_patch_cost = total_cost, + .non_ift_total_cost = non_ift_font_size, + .ideal_init_cost = init_font_ideal_size, + .ideal_patch_cost = ideal_cost, }); } return out; @@ -731,7 +731,6 @@ void ClosureGlyphSegmenter::AddTableKeyedSegments( const btree_map& merge_groups, const std::vector& segments, const SubsetDefinition& init_segment) { - SegmentSet uncovered_segments; if (!segments.empty()) { uncovered_segments.insert_range(0, segments.size() - 1); diff --git a/ift/encoder/closure_glyph_segmenter.h b/ift/encoder/closure_glyph_segmenter.h index 596cbae1..b02d457f 100644 --- a/ift/encoder/closure_glyph_segmenter.h +++ b/ift/encoder/closure_glyph_segmenter.h @@ -1,21 +1,21 @@ #ifndef IFT_ENCODER_CLOSURE_GLYPH_SEGMENTER_H_ #define IFT_ENCODER_CLOSURE_GLYPH_SEGMENTER_H_ -#include #include +#include #include #include "absl/container/btree_map.h" #include "absl/status/statusor.h" +#include "ift/common/data_file_resolver.h" #include "ift/config/common.pb.h" #include "ift/config/segmentation_plan.pb.h" #include "ift/config/segmenter_config.pb.h" #include "ift/encoder/glyph_segmentation.h" #include "ift/encoder/merge_strategy.h" +#include "ift/encoder/patch_size_cache.h" #include "ift/encoder/subset_definition.h" #include "ift/freq/probability_calculator.h" -#include "ift/encoder/patch_size_cache.h" -#include "ift/common/data_file_resolver.h" namespace ift::encoder { @@ -27,7 +27,6 @@ struct SegmentationCost { double ideal_init_cost; double ideal_patch_cost; - }; /* @@ -80,7 +79,8 @@ class ClosureGlyphSegmenter { */ absl::StatusOr> TotalCosts( hb_face_t* original_face, const GlyphSegmentation& segmentation, - absl::Span probability_calculators) const; + absl::Span + probability_calculators) const; /* * Computes the total cost of the fallback patch (expected number of bytes diff --git a/ift/encoder/closure_glyph_segmenter_test.cc b/ift/encoder/closure_glyph_segmenter_test.cc index 4b6ecd74..ada541d1 100644 --- a/ift/encoder/closure_glyph_segmenter_test.cc +++ b/ift/encoder/closure_glyph_segmenter_test.cc @@ -1,9 +1,9 @@ #include "ift/encoder/closure_glyph_segmenter.h" -#include "ift/common/bazel_data_file_resolver.h" #include #include "gtest/gtest.h" +#include "ift/common/bazel_data_file_resolver.h" #include "ift/common/font_data.h" #include "ift/common/font_helper.h" #include "ift/common/int_set.h" @@ -24,12 +24,12 @@ using ift::config::PATCH; using absl::btree_map; using absl::StatusOr; +using ift::common::BazelDataFileResolver; using ift::common::CodepointSet; +using ift::common::DataFileResolver; using ift::common::FontData; using ift::common::FontHelper; using ift::common::hb_face_unique_ptr; -using ift::common::DataFileResolver; -using ift::common::BazelDataFileResolver; using ift::common::IntSet; using ift::common::make_hb_face; using ift::common::SegmentSet; @@ -47,20 +47,23 @@ class ClosureGlyphSegmenterTest : public ::testing::Test { resolver(*BazelDataFileResolver::CreateForTest()), segmenter(8, 8, PATCH, CLOSURE_ONLY, resolver), - segmenter_find_conditions(8, 8, FIND_CONDITIONS, CLOSURE_ONLY, resolver), - segmenter_move_to_init_font(8, 8, MOVE_TO_INIT_FONT, CLOSURE_ONLY, resolver), + segmenter_find_conditions(8, 8, FIND_CONDITIONS, CLOSURE_ONLY, + resolver), + segmenter_move_to_init_font(8, 8, MOVE_TO_INIT_FONT, CLOSURE_ONLY, + resolver), #ifdef HB_DEPEND_API - segmenter_dep_graph(8, 8, PATCH, CLOSURE_AND_VALIDATE_DEP_GRAPH, resolver), + segmenter_dep_graph(8, 8, PATCH, CLOSURE_AND_VALIDATE_DEP_GRAPH, + resolver), segmenter_dep_graph_only(8, 8, PATCH, DEP_GRAPH_ONLY, resolver), - segmenter_find_conditions_dep_graph(8, 8, FIND_CONDITIONS, - CLOSURE_AND_VALIDATE_DEP_GRAPH, resolver), - segmenter_move_to_init_font_dep_graph(8, 8, MOVE_TO_INIT_FONT, - CLOSURE_AND_VALIDATE_DEP_GRAPH, resolver) + segmenter_find_conditions_dep_graph( + 8, 8, FIND_CONDITIONS, CLOSURE_AND_VALIDATE_DEP_GRAPH, resolver), + segmenter_move_to_init_font_dep_graph( + 8, 8, MOVE_TO_INIT_FONT, CLOSURE_AND_VALIDATE_DEP_GRAPH, resolver) #else segmenter_dep_graph(8, 8, PATCH, CLOSURE_ONLY, resolver), segmenter_dep_graph_only(8, 8, PATCH, CLOSURE_ONLY, resolver), - segmenter_find_conditions_dep_graph(8, 8, FIND_CONDITIONS, - CLOSURE_ONLY, resolver), + segmenter_find_conditions_dep_graph(8, 8, FIND_CONDITIONS, CLOSURE_ONLY, + resolver), segmenter_move_to_init_font_dep_graph(8, 8, MOVE_TO_INIT_FONT, CLOSURE_ONLY, resolver) #endif @@ -772,7 +775,8 @@ TEST_F(ClosureGlyphSegmenterTest, FullRoboto_WithFeaturesAndDepGraph) { roboto.get(), {}, segments, MergeStrategy::Heuristic(4000, 12000)); ASSERT_TRUE(segmentation.ok()) << segmentation.status(); ASSERT_TRUE(dep_graph_segmentation.ok()) << dep_graph_segmentation.status(); - ASSERT_TRUE(dep_graph_only_segmentation.ok()) << dep_graph_only_segmentation.status(); + ASSERT_TRUE(dep_graph_only_segmentation.ok()) + << dep_graph_only_segmentation.status(); ASSERT_EQ(segmentation, dep_graph_segmentation); } @@ -1133,7 +1137,8 @@ TEST_F(ClosureGlyphSegmenterTest, TotalCost) { std::vector with_patches_cost = *segmenter.TotalCosts(roboto.get(), segmentation2, {&calculator}); ASSERT_GT(with_patches_cost[0].ift_patch_cost, base_cost[0].ift_patch_cost); - ASSERT_LT(with_patches_cost[0].ideal_patch_cost, with_patches_cost[0].ift_patch_cost); + ASSERT_LT(with_patches_cost[0].ideal_patch_cost, + with_patches_cost[0].ift_patch_cost); } TEST_F(ClosureGlyphSegmenterTest, NoGlyphSegments_CostMerging) { @@ -1143,7 +1148,9 @@ TEST_F(ClosureGlyphSegmenterTest, NoGlyphSegments_CostMerging) { // analysis and final output. UnicodeFrequencies frequencies{ - {{'A', 'A'}, 1000}, {{'C', 'C'}, 1000}, {{0x106, 0x106}, 1}, /* Cacute */ + {{'A', 'A'}, 1000}, + {{'C', 'C'}, 1000}, + {{0x106, 0x106}, 1}, /* Cacute */ }; MergeStrategy strategy = @@ -1151,12 +1158,13 @@ TEST_F(ClosureGlyphSegmenterTest, NoGlyphSegments_CostMerging) { strategy.SetOptimizationCutoffFraction(0.0); strategy.SetUsePatchMerges(true); - auto segmentation = - CodepointToGlyphSegments(roboto.get(), {}, - { - {'A'}, {'C'}, {0x106}, /* Cacute */ - }, - strategy); + auto segmentation = CodepointToGlyphSegments(roboto.get(), {}, + { + {'A'}, + {'C'}, + {0x106}, /* Cacute */ + }, + strategy); ASSERT_TRUE(segmentation.ok()) << segmentation.status(); // The initial conditions are: @@ -1890,7 +1898,9 @@ if (s3) then p0 TEST_F(ClosureGlyphSegmenterTest, FeatureSegments_NoPreGrouping_MidGroup) { UnicodeFrequencies freq{ - {{' ', ' '}, 100}, {{'a', 'a'}, 30}, {{'b', 'b'}, 29}, + {{' ', ' '}, 100}, + {{'a', 'a'}, 30}, + {{'b', 'b'}, 29}, }; MergeStrategy costs = *MergeStrategy::CostBased(std::move(freq), 0, 1); @@ -1916,22 +1926,21 @@ TEST_F(ClosureGlyphSegmenterTest, FeatureSegments_NoPreGrouping_MidGroup) { }; ASSERT_EQ(segmentation->Segments(), expected_segments); - // Use heuristic so that segments don't get re-ordered by probability. MergeStrategy heuristic = MergeStrategy::Heuristic(0); heuristic.SetPreClosureProbabilityThreshold(0.55); heuristic.SetPreClosureGroupSize(3); segmentation = CodepointToGlyphSegments(roboto.get(), {}, - { - {'a'}, - feat_seg, - {'b'}, - }, - heuristic); + { + {'a'}, + feat_seg, + {'b'}, + }, + heuristic); ASSERT_TRUE(segmentation.ok()) << segmentation.status(); - // The feature segment comes up mid group so it breaks up the group and segments - // all stay separate. + // The feature segment comes up mid group so it breaks up the group and + // segments all stay separate. expected_segments = { {'a'}, feat_seg, @@ -1942,7 +1951,10 @@ TEST_F(ClosureGlyphSegmenterTest, FeatureSegments_NoPreGrouping_MidGroup) { TEST_F(ClosureGlyphSegmenterTest, FeatureSegments_NoPreGrouping_AsFirst) { UnicodeFrequencies freq{ - {{' ', ' '}, 100}, {{'a', 'a'}, 30}, {{'b', 'b'}, 29}, {{'c', 'c'}, 28}, + {{' ', ' '}, 100}, + {{'a', 'a'}, 30}, + {{'b', 'b'}, 29}, + {{'c', 'c'}, 28}, }; MergeStrategy costs = *MergeStrategy::CostBased(std::move(freq), 0, 1); @@ -1970,14 +1982,17 @@ TEST_F(ClosureGlyphSegmenterTest, FeatureSegments_NoPreGrouping_AsFirst) { } TEST_F(ClosureGlyphSegmenterTest, PreGrouping_RemappedIndexBug) { - // Regression test for a bug in assigned updated segment indices after pre-grouping - // The first segment in a group was always being remapped to segment 0. This uses - // two separate merge groups to ensure the second one does not end up with segment 0. + // Regression test for a bug in assigned updated segment indices after + // pre-grouping The first segment in a group was always being remapped to + // segment 0. This uses two separate merge groups to ensure the second one + // does not end up with segment 0. UnicodeFrequencies freq_high{ - {{' ', ' '}, 100}, {{'a', 'a'}, 100}, + {{' ', ' '}, 100}, + {{'a', 'a'}, 100}, }; - MergeStrategy strategy_0 = *MergeStrategy::CostBased(std::move(freq_high), 0, 1); + MergeStrategy strategy_0 = + *MergeStrategy::CostBased(std::move(freq_high), 0, 1); // Use Heuristic for Group 1 to force merging of whatever ends up in it. MergeStrategy strategy_1 = MergeStrategy::Heuristic(1000); @@ -1998,8 +2013,6 @@ TEST_F(ClosureGlyphSegmenterTest, PreGrouping_RemappedIndexBug) { merge_groups); ASSERT_TRUE(segmentation.ok()) << segmentation.status(); - - // We should not see 'a' show up in the second group std::vector expected_segments = { {'a'}, @@ -2015,7 +2028,8 @@ TEST_F(ClosureGlyphSegmenterTest, PreGrouping_RemappedIndexBug) { // TODO(garretrieger): add test where or_set glyphs are moved back to unmapped // due to found "additional conditions". -TEST_F(ClosureGlyphSegmenterTest, AddTableKeyedSegments_FeatureOnlyAndUncovered) { +TEST_F(ClosureGlyphSegmenterTest, + AddTableKeyedSegments_FeatureOnlyAndUncovered) { ift::config::SegmentationPlan plan; btree_map merge_groups; std::vector segments(4); @@ -2032,7 +2046,8 @@ TEST_F(ClosureGlyphSegmenterTest, AddTableKeyedSegments_FeatureOnlyAndUncovered) SubsetDefinition init_segment; - ClosureGlyphSegmenter::AddTableKeyedSegments(plan, merge_groups, segments, init_segment); + ClosureGlyphSegmenter::AddTableKeyedSegments(plan, merge_groups, segments, + init_segment); ASSERT_EQ(plan.non_glyph_segments_size(), 3); @@ -2076,7 +2091,8 @@ TEST_F(ClosureGlyphSegmenterTest, AddTableKeyedSegments_SubtractInitSegment) { SubsetDefinition init_segment; init_segment.codepoints.insert('a'); - ClosureGlyphSegmenter::AddTableKeyedSegments(plan, merge_groups, segments, init_segment); + ClosureGlyphSegmenter::AddTableKeyedSegments(plan, merge_groups, segments, + init_segment); ASSERT_EQ(plan.non_glyph_segments_size(), 1); uint32_t id = plan.non_glyph_segments(0).values(0); diff --git a/ift/encoder/complex_condition_finder_test.cc b/ift/encoder/complex_condition_finder_test.cc index 2f02ca49..2e940281 100644 --- a/ift/encoder/complex_condition_finder_test.cc +++ b/ift/encoder/complex_condition_finder_test.cc @@ -1,8 +1,8 @@ #include "ift/encoder/complex_condition_finder.h" -#include "ift/common/bazel_data_file_resolver.h" #include "absl/container/btree_map.h" #include "gtest/gtest.h" +#include "ift/common/bazel_data_file_resolver.h" #include "ift/common/font_data.h" #include "ift/common/int_set.h" #include "ift/encoder/closure_glyph_segmenter.h" @@ -16,14 +16,14 @@ using ift::config::PATCH; using absl::btree_map; using absl::StatusOr; +using ift::common::BazelDataFileResolver; +using ift::common::DataFileResolver; using ift::common::FontData; using ift::common::GlyphSet; using ift::common::hb_face_unique_ptr; using ift::common::make_hb_face; using ift::common::SegmentSet; using ift::freq::ProbabilityBound; -using ift::common::DataFileResolver; -using ift::common::BazelDataFileResolver; namespace ift::encoder { @@ -295,7 +295,9 @@ TEST_F(ComplexConditionFinderTest, FindConditions_WithBadInscope) { 756, }, { - 2, 3, 4, // missing 1, 6 + 2, + 3, + 4, // missing 1, 6 }); ASSERT_TRUE(absl::IsInternal(r.status())); } diff --git a/ift/encoder/condition_to_glyphs_index.h b/ift/encoder/condition_to_glyphs_index.h index b843ecc2..73ea512e 100644 --- a/ift/encoder/condition_to_glyphs_index.h +++ b/ift/encoder/condition_to_glyphs_index.h @@ -13,7 +13,7 @@ namespace ift::encoder { // Stores a mapping from ActivationCondition to a set of glyphs. // // Also maintains the reverse mapping indices. -template +template class ConditionToGlyphsIndex { public: std::optional Invalidate(glyph_id_t gid) { @@ -49,8 +49,9 @@ class ConditionToGlyphsIndex { } private: - template - void Remove(const ActivationCondition& condition, It conditions_and_glyphs_it) { + template + void Remove(const ActivationCondition& condition, + It conditions_and_glyphs_it) { conditions_and_glyphs_.erase(conditions_and_glyphs_it); if (keep_ordered) { ordered_conditions_.erase(condition); @@ -66,8 +67,8 @@ class ConditionToGlyphsIndex { } } } - public: + public: absl::Status Union(ActivationCondition condition, common::GlyphSet glyphs) { conditions_and_glyphs_[condition].union_set(glyphs); if (keep_ordered) { @@ -153,14 +154,16 @@ class ConditionToGlyphsIndex { bool operator==(const ConditionToGlyphsIndex& other) const { return conditions_and_glyphs_ == other.conditions_and_glyphs_ && - /* don't need to check ordered_conditions_ it's equivalent to conditions_and_glyphs_ */ + /* don't need to check ordered_conditions_ it's equivalent to + conditions_and_glyphs_ */ glyph_to_condition_ == other.glyph_to_condition_ && triggering_segment_to_conditions_ == other.triggering_segment_to_conditions_; } private: - absl::flat_hash_map conditions_and_glyphs_; + absl::flat_hash_map + conditions_and_glyphs_; absl::btree_set ordered_conditions_; absl::flat_hash_map glyph_to_condition_; diff --git a/ift/encoder/dependency_closure.cc b/ift/encoder/dependency_closure.cc index ba1ced3c..b489ebba 100644 --- a/ift/encoder/dependency_closure.cc +++ b/ift/encoder/dependency_closure.cc @@ -554,7 +554,9 @@ DependencyClosure::ExtractAllNodeConditions() const { return conditions; } -SegmentSet DependencyClosure::ComputeInertSegments(const absl::flat_hash_map& conditions) const { +SegmentSet DependencyClosure::ComputeInertSegments( + const absl::flat_hash_map& conditions) + const { SegmentSet candidates = segmentation_info_->NonEmptySegments(); for (const auto& [_, condition] : conditions) { if (!condition.IsUnitary()) { diff --git a/ift/encoder/dependency_closure.h b/ift/encoder/dependency_closure.h index 04770be4..fab60b16 100644 --- a/ift/encoder/dependency_closure.h +++ b/ift/encoder/dependency_closure.h @@ -7,8 +7,8 @@ #include "absl/container/flat_hash_map.h" #include "absl/status/statusor.h" #include "hb.h" -#include "ift/common/font_data.h" #include "ift/common/data_file_resolver.h" +#include "ift/common/font_data.h" #include "ift/common/int_set.h" #include "ift/common/try.h" #include "ift/encoder/activation_condition.h" @@ -34,13 +34,12 @@ class DependencyClosure { public: static absl::StatusOr> Create( const RequestedSegmentationInformation* segmentation_info, - hb_face_t* face, - const ift::common::DataFileResolver& resolver) { + hb_face_t* face, const ift::common::DataFileResolver& resolver) { #ifndef HB_DEPEND_API return std::unique_ptr(new DependencyClosure()); #else - dep_graph::DependencyGraph graph = - TRY(dep_graph::DependencyGraph::Create(segmentation_info, face, resolver)); + dep_graph::DependencyGraph graph = TRY( + dep_graph::DependencyGraph::Create(segmentation_info, face, resolver)); auto result = std::unique_ptr( new DependencyClosure(std::move(graph), segmentation_info, face)); TRYV(result->InitFontChanged(ift::common::SegmentSet::all())); @@ -145,7 +144,9 @@ class DependencyClosure { absl::StatusOr> ExtractAllNodeConditions() const; - ift::common::SegmentSet ComputeInertSegments(const absl::flat_hash_map& conditions) const; + ift::common::SegmentSet ComputeInertSegments( + const absl::flat_hash_map& conditions) + const; absl::flat_hash_map InitializeConditions() const; diff --git a/ift/encoder/dependency_closure_test.cc b/ift/encoder/dependency_closure_test.cc index 78978654..85c8cd71 100644 --- a/ift/encoder/dependency_closure_test.cc +++ b/ift/encoder/dependency_closure_test.cc @@ -1,10 +1,10 @@ #include "ift/encoder/dependency_closure.h" -#include "ift/common/bazel_data_file_resolver.h" #include #include #include "gtest/gtest.h" +#include "ift/common/bazel_data_file_resolver.h" #include "ift/common/font_data.h" #include "ift/common/font_helper.h" #include "ift/common/int_set.h" @@ -21,15 +21,15 @@ using ift::config::PATCH; -using bazel::tools::cpp::runfiles::Runfiles; using absl::btree_set; using absl::flat_hash_map; using absl::flat_hash_set; using absl::Status; +using bazel::tools::cpp::runfiles::Runfiles; +using ift::common::BazelDataFileResolver; using ift::common::CodepointSet; -using ift::common::FontData; using ift::common::DataFileResolver; -using ift::common::BazelDataFileResolver; +using ift::common::FontData; using ift::common::FontHelper; using ift::common::GlyphSet; using ift::common::hb_face_unique_ptr; @@ -44,9 +44,10 @@ class DependencyClosureTest : public ::testing::Test { protected: DependencyClosureTest() : face(from_file("_main/ift/common/testdata/Roboto-Regular.ttf")), - double_nested_face( - from_file("_main/ift/common/testdata/double-nested-components.ttf")), - noto_sans_jp(from_file("_main/ift/common/testdata/NotoSansJP-Regular.ttf")), + double_nested_face(from_file( + "_main/ift/common/testdata/double-nested-components.ttf")), + noto_sans_jp( + from_file("_main/ift/common/testdata/NotoSansJP-Regular.ttf")), noto_sans_jp_vf( from_file("_main/ift/common/testdata/NotoSansJP-VF.cmap14.ttf")), roboto_vf(from_file("_main/ift/common/testdata/Roboto[wdth,wght].ttf")), @@ -54,8 +55,8 @@ class DependencyClosureTest : public ::testing::Test { closure_cache(*GlyphClosureCache::Create(face.get(), *resolver)), segmentation_info(*RequestedSegmentationInformation::Create( segments, WithDefaultFeatures(), *closure_cache, PATCH)), - dependency_closure( - *DependencyClosure::Create(segmentation_info.get(), face.get(), *resolver)) {} + dependency_closure(*DependencyClosure::Create(segmentation_info.get(), + face.get(), *resolver)) {} static SubsetDefinition WithDefaultFeatures() { SubsetDefinition def; @@ -98,8 +99,8 @@ class DependencyClosureTest : public ::testing::Test { std::vector new_segments) { segmentation_info = *RequestedSegmentationInformation::Create( new_segments, new_init, *closure_cache, PATCH); - dependency_closure = - *DependencyClosure::Create(segmentation_info.get(), face.get(), *resolver); + dependency_closure = *DependencyClosure::Create(segmentation_info.get(), + face.get(), *resolver); } void Reconfigure(hb_face_t* new_face, SubsetDefinition new_init, @@ -107,8 +108,8 @@ class DependencyClosureTest : public ::testing::Test { closure_cache = *GlyphClosureCache::Create(new_face, *resolver); segmentation_info = *RequestedSegmentationInformation::Create( new_segments, new_init, *closure_cache, PATCH); - dependency_closure = - *DependencyClosure::Create(segmentation_info.get(), new_face, *resolver); + dependency_closure = *DependencyClosure::Create(segmentation_info.get(), + new_face, *resolver); } Status RejectedAnalysis(segment_index_t segment) { @@ -141,8 +142,8 @@ class DependencyClosureTest : public ::testing::Test { GlyphSet expected_or_gids; GlyphSet expected_exclusive_gids; TRYV(closure_cache->AnalyzeSegment(*segmentation_info, segments, - expected_and_gids, expected_or_gids, - expected_exclusive_gids)); + expected_and_gids, expected_or_gids, + expected_exclusive_gids)); std::string message; bool success = true; @@ -256,7 +257,7 @@ TEST_F(DependencyClosureTest, InertSegments) { /* 3 */ {{'i'}, ProbabilityBound::Zero()}, }); - ASSERT_EQ(dependency_closure->InertSegments(), (SegmentSet {0, 1})); + ASSERT_EQ(dependency_closure->InertSegments(), (SegmentSet{0, 1})); Reconfigure(face.get(), WithDefaultFeatures({}), { @@ -265,7 +266,7 @@ TEST_F(DependencyClosureTest, InertSegments) { /* 2 */ {{'f', 'i'}, ProbabilityBound::Zero()}, }); - ASSERT_EQ(dependency_closure->InertSegments(), (SegmentSet {0, 1, 2})); + ASSERT_EQ(dependency_closure->InertSegments(), (SegmentSet{0, 1, 2})); } TEST_F(DependencyClosureTest, InertSegments_InitFont) { @@ -276,7 +277,7 @@ TEST_F(DependencyClosureTest, InertSegments_InitFont) { /* 2 */ {{'i'}, ProbabilityBound::Zero()}, }); - ASSERT_EQ(dependency_closure->InertSegments(), (SegmentSet {0, 1, 2})); + ASSERT_EQ(dependency_closure->InertSegments(), (SegmentSet{0, 1, 2})); } TEST_F(DependencyClosureTest, ExtractAllGlyphConditions_Composite) { @@ -822,12 +823,12 @@ TEST_F(DependencyClosureTest, SegmentsMerged_InertSegments) { /* 2 */ {{'i'}, ProbabilityBound::Zero()}, }); - ASSERT_EQ(dependency_closure->InertSegments(), (SegmentSet {0})); + ASSERT_EQ(dependency_closure->InertSegments(), (SegmentSet{0})); Status s = dependency_closure->SegmentsMerged(2, {1, 2}); ASSERT_TRUE(s.ok()) << s; - ASSERT_EQ(dependency_closure->InertSegments(), (SegmentSet {0, 2})); + ASSERT_EQ(dependency_closure->InertSegments(), (SegmentSet{0, 2})); } TEST_F(DependencyClosureTest, InitFontChanged_GlyphConditionsUpdate) { @@ -863,7 +864,7 @@ TEST_F(DependencyClosureTest, InitFontChanged_InertSegments) { /* 2 */ {{'i'}, ProbabilityBound::Zero()}, }); - ASSERT_EQ(dependency_closure->InertSegments(), (SegmentSet {0})); + ASSERT_EQ(dependency_closure->InertSegments(), (SegmentSet{0})); Status s = segmentation_info->ReassignInitSubset(*closure_cache, WithDefaultFeatures({'f'})); @@ -872,7 +873,7 @@ TEST_F(DependencyClosureTest, InitFontChanged_InertSegments) { s = dependency_closure->InitFontChanged(SegmentSet::all()); ASSERT_TRUE(s.ok()) << s; - ASSERT_EQ(dependency_closure->InertSegments(), (SegmentSet {0, 2})); + ASSERT_EQ(dependency_closure->InertSegments(), (SegmentSet{0, 2})); } TEST_F(DependencyClosureTest, SegmentsThatInteractWith_Nodes) { @@ -1112,19 +1113,20 @@ TEST_F(DependencyClosureTest, SegmentsToAffected) { EXPECT_EQ(dependency_closure->SegmentsToAffectedGlyphs({1}), (GlyphSet{74 /* f */, 444 /* fi */, 446 /* ffi */})); EXPECT_EQ(dependency_closure->SegmentsToAffectedGlyphs({2}), - (GlyphSet{77 /* i */, 141 /* dotlessi */, 444 /* fi */, 446 /* ffi */, 609 /* dotlessi */, 679 /* iacute */})); + (GlyphSet{77 /* i */, 141 /* dotlessi */, 444 /* fi */, + 446 /* ffi */, 609 /* dotlessi */, 679 /* iacute */})); EXPECT_EQ(dependency_closure->SegmentsToAffectedGlyphs({4}), (GlyphSet{37 /* A */})); EXPECT_EQ(dependency_closure->SegmentsToAffectedGlyphs({5}), (GlyphSet{ - 37 /* A */, - 117 /* acute */, - 141, /* dotlessi */ - 169, /* acutecomb */ - 609, /* dotlessi */ - 640 /* Aacute */, - 667, /* aacute */ - 679, /* iacute */ + 37 /* A */, + 117 /* acute */, + 141, /* dotlessi */ + 169, /* acutecomb */ + 609, /* dotlessi */ + 640 /* Aacute */, + 667, /* aacute */ + 679, /* iacute */ })); EXPECT_EQ(dependency_closure->SegmentsToAffectedGlyphs({1, 2}), diff --git a/ift/encoder/entry_graph_test.cc b/ift/encoder/entry_graph_test.cc index 812a779b..f5a626dc 100644 --- a/ift/encoder/entry_graph_test.cc +++ b/ift/encoder/entry_graph_test.cc @@ -312,8 +312,8 @@ TEST(EntryGraphTest, CalculateSubsumptionCostDelta_Disjunctive) { } TEST(EntryGraphTest, CalculateSubsumptionCostDelta_Disjunctive_PositiveDelta) { - // Children are shared and large. Subsuming them into the parent will duplicate - // them. Cost of duplication outweights entry cost savings. + // Children are shared and large. Subsuming them into the parent will + // duplicate them. Cost of duplication outweights entry cost savings. SubsetDefinition s0, s1; // Insert non-continous codepoints to ensure the sparse bit sets take up diff --git a/ift/encoder/estimated_patch_size_cache.h b/ift/encoder/estimated_patch_size_cache.h index 7cf18cc2..52d68f1b 100644 --- a/ift/encoder/estimated_patch_size_cache.h +++ b/ift/encoder/estimated_patch_size_cache.h @@ -23,7 +23,8 @@ class EstimatedPatchSizeCache : public PatchSizeCache { new EstimatedPatchSizeCache(face, compression_ratio)); } - static std::unique_ptr New(hb_face_t* face, double compression_ratio) { + static std::unique_ptr New(hb_face_t* face, + double compression_ratio) { return std::unique_ptr( new EstimatedPatchSizeCache(face, compression_ratio)); } diff --git a/ift/encoder/glyph_closure_cache.cc b/ift/encoder/glyph_closure_cache.cc index 40ef60d8..885f68a3 100644 --- a/ift/encoder/glyph_closure_cache.cc +++ b/ift/encoder/glyph_closure_cache.cc @@ -15,33 +15,33 @@ using ift::config::Glyphs; using absl::Status; using absl::StatusOr; using ift::common::CodepointSet; +using ift::common::DataFileResolver; using ift::common::FontHelper; using ift::common::GlyphSet; +using ift::common::hb_face_unique_ptr; using ift::common::hb_set_unique_ptr; -using ift::common::DataFileResolver; -using ift::common::make_hb_set; using ift::common::make_hb_face; +using ift::common::make_hb_set; using ift::common::SegmentSet; -using ift::common::FontHelper; -using ift::common::hb_face_unique_ptr; using ift::dep_graph::UnicodeEdges; namespace ift::encoder { -StatusOr> GlyphClosureCache::Create(hb_face_t* face, const DataFileResolver& resolver) { +StatusOr> GlyphClosureCache::Create( + hb_face_t* face, const DataFileResolver& resolver) { auto preprocessed_face = make_hb_face(hb_subset_preprocess(face)); - auto unicode_edges = TRY(UnicodeEdges::ComputeUnicodeDependencyEdges(preprocessed_face.get(), resolver)); - return std::unique_ptr(new GlyphClosureCache(face, std::move(preprocessed_face), std::move(unicode_edges))); + auto unicode_edges = TRY(UnicodeEdges::ComputeUnicodeDependencyEdges( + preprocessed_face.get(), resolver)); + return std::unique_ptr(new GlyphClosureCache( + face, std::move(preprocessed_face), std::move(unicode_edges))); } -GlyphClosureCache::GlyphClosureCache( - hb_face_t* original_face, - hb_face_unique_ptr preprocessed_face, - dep_graph::UnicodeEdges unicode_edges) +GlyphClosureCache::GlyphClosureCache(hb_face_t* original_face, + hb_face_unique_ptr preprocessed_face, + dep_graph::UnicodeEdges unicode_edges) : original_face_(make_hb_face(hb_face_reference(original_face))), preprocessed_face_(std::move(preprocessed_face)), - gid_to_unicode_( - FontHelper::GidToUnicodeMap(preprocessed_face_.get())), + gid_to_unicode_(FontHelper::GidToUnicodeMap(preprocessed_face_.get())), unicode_edges_(std::move(unicode_edges)) {} StatusOr GlyphClosureCache::SegmentClosure( @@ -86,26 +86,25 @@ SubsetDefinition ComputeExceptSegment( StatusOr GlyphClosureCache::HasAdditionalConditions( const RequestedSegmentationInformation* segmentation_info, const SegmentSet& segments, const GlyphSet& glyphs) { - SubsetDefinition combined; for (segment_index_t s : segments) { combined.Union(segmentation_info->Segments().at(s).Definition()); } - SubsetDefinition except = ComputeExceptSegment(*segmentation_info, segments, combined); + SubsetDefinition except = + ComputeExceptSegment(*segmentation_info, segments, combined); GlyphSet closure_glyphs = TRY(GlyphClosure(except)); return closure_glyphs.intersects(glyphs); } -CodepointSet GlyphClosureCache::UnicodeClosure(const CodepointSet& unicodes) const { - +CodepointSet GlyphClosureCache::UnicodeClosure( + const CodepointSet& unicodes) const { CodepointSet queue = unicodes; - CodepointSet visited; + CodepointSet visited = unicodes; while (!queue.empty()) { hb_codepoint_t u = *queue.min(); queue.erase(u); - visited.insert(u); // outgoing decomp edges auto decomp = unicode_edges_.decomposition.find(u); @@ -120,21 +119,19 @@ CodepointSet GlyphClosureCache::UnicodeClosure(const CodepointSet& unicodes) con auto comp = unicode_edges_.composition.find(u); if (comp != unicode_edges_.composition.end()) { for (const auto& edge : comp->second) { - if (!visited.contains(edge.dest) && visited.contains(edge.other_source)) { + if (!visited.contains(edge.dest) && + visited.contains(edge.other_source)) { // edge requirements are satisfied, we can follow it. queue.insert(edge.dest); visited.insert(edge.dest); } - // Note: if edge requirements are not satisfied, then this edge will be rechecked - // later on if edge.other_source is visited, since edges for a dest are always - // present from both sources. + // Note: if edge requirements are not satisfied, then this edge will be + // rechecked later on if edge.other_source is visited, since edges for a + // dest are always present from both sources. } } } - CodepointSet delta = visited; - delta.subtract(unicodes); - return visited; } @@ -250,9 +247,10 @@ Status GlyphClosureCache::AnalyzeSegment( return absl::OkStatus(); } -CodepointSet GlyphClosureCache::CodepointsForGlyphs(const GlyphSet& glyphs) const { - // Codepoints can map to glyphs via either standard cmap mappings, or via variation selectors - // this method captures both. +CodepointSet GlyphClosureCache::CodepointsForGlyphs( + const GlyphSet& glyphs) const { + // Codepoints can map to glyphs via either standard cmap mappings, or via + // variation selectors this method captures both. CodepointSet unicodes; for (glyph_id_t gid : glyphs) { auto unicode = gid_to_unicode_.find(gid); diff --git a/ift/encoder/glyph_closure_cache.h b/ift/encoder/glyph_closure_cache.h index cfcff34a..a7ef1cdb 100644 --- a/ift/encoder/glyph_closure_cache.h +++ b/ift/encoder/glyph_closure_cache.h @@ -1,13 +1,14 @@ #ifndef IFT_ENCODER_GLYPH_CLOSURE_CACHE_H_ #define IFT_ENCODER_GLYPH_CLOSURE_CACHE_H_ +#include + #include "absl/status/status.h" +#include "ift/common/data_file_resolver.h" #include "ift/common/font_data.h" #include "ift/common/int_set.h" -#include "ift/common/data_file_resolver.h" -#include -#include "ift/encoder/subset_definition.h" #include "ift/dep_graph/unicode_edges.h" +#include "ift/encoder/subset_definition.h" namespace ift::encoder { @@ -18,13 +19,13 @@ class RequestedSegmentationInformation; */ class GlyphClosureCache { public: - static absl::StatusOr> Create(hb_face_t* face, const ift::common::DataFileResolver& resolver); + static absl::StatusOr> Create( + hb_face_t* face, const ift::common::DataFileResolver& resolver); private: - GlyphClosureCache( - hb_face_t* original_face, - common::hb_face_unique_ptr preprocessed_face, - dep_graph::UnicodeEdges unicode_edges); + GlyphClosureCache(hb_face_t* original_face, + common::hb_face_unique_ptr preprocessed_face, + dep_graph::UnicodeEdges unicode_edges); public: absl::StatusOr GlyphClosure( @@ -34,7 +35,8 @@ class GlyphClosureCache { const RequestedSegmentationInformation* segmentation_info, const ift::common::SegmentSet& segments); - common::CodepointSet CodepointsForGlyphs(const common::GlyphSet& glyphs) const; + common::CodepointSet CodepointsForGlyphs( + const common::GlyphSet& glyphs) const; // Checks if a disjunction accross segments satisifies the closure require for // glyphs, returns true if there are potential additional conditions beyond @@ -65,7 +67,8 @@ class GlyphClosureCache { hb_face_t* Face() { return preprocessed_face_.get(); } private: - common::CodepointSet UnicodeClosure(const common::CodepointSet& unicodes) const; + common::CodepointSet UnicodeClosure( + const common::CodepointSet& unicodes) const; ift::common::hb_face_unique_ptr original_face_; ift::common::hb_face_unique_ptr preprocessed_face_; diff --git a/ift/encoder/glyph_closure_cache_test.cc b/ift/encoder/glyph_closure_cache_test.cc index 0a862286..80630ba6 100644 --- a/ift/encoder/glyph_closure_cache_test.cc +++ b/ift/encoder/glyph_closure_cache_test.cc @@ -1,9 +1,9 @@ #include "ift/encoder/glyph_closure_cache.h" -#include "ift/common/bazel_data_file_resolver.h" #include #include "gtest/gtest.h" +#include "ift/common/bazel_data_file_resolver.h" #include "ift/common/font_data.h" #include "ift/common/font_helper.h" #include "ift/common/int_set.h" @@ -14,16 +14,16 @@ using ift::config::PATCH; using absl::StatusOr; +using ift::common::BazelDataFileResolver; using ift::common::CodepointSet; +using ift::common::DataFileResolver; using ift::common::FontData; +using ift::common::FontHelper; using ift::common::GlyphSet; using ift::common::hb_face_unique_ptr; using ift::common::make_hb_face; using ift::common::SegmentSet; using ift::freq::ProbabilityBound; -using ift::common::FontHelper; -using ift::common::DataFileResolver; -using ift::common::BazelDataFileResolver; namespace ift::encoder { @@ -98,8 +98,10 @@ TEST_F(GlyphClosureCacheTest, HasAdditionalConditions) { RequestedSegmentationInformation::Create(segments, {}, **cache, PATCH); ASSERT_TRUE(info.ok()); - ASSERT_TRUE(*(*cache)->HasAdditionalConditions(info->get(), {0}, {37 /* A */})); - ASSERT_TRUE(*(*cache)->HasAdditionalConditions(info->get(), {1}, {37 /* A */})); + ASSERT_TRUE( + *(*cache)->HasAdditionalConditions(info->get(), {0}, {37 /* A */})); + ASSERT_TRUE( + *(*cache)->HasAdditionalConditions(info->get(), {1}, {37 /* A */})); ASSERT_FALSE( *(*cache)->HasAdditionalConditions(info->get(), {0, 1}, {37 /* A */})); @@ -125,8 +127,10 @@ TEST_F(GlyphClosureCacheTest, HasAdditionalConditions_IncludesInitFont) { ASSERT_TRUE(info.ok()); // s0 or s1 -> g563 (smcap A) - EXPECT_EQ(*(*cache)->CodepointsToOrGids(*info->get(), {0}), (GlyphSet{37, 563})); - EXPECT_EQ(*(*cache)->CodepointsToOrGids(*info->get(), {1}), (GlyphSet{37, 563})); + EXPECT_EQ(*(*cache)->CodepointsToOrGids(*info->get(), {0}), + (GlyphSet{37, 563})); + EXPECT_EQ(*(*cache)->CodepointsToOrGids(*info->get(), {1}), + (GlyphSet{37, 563})); EXPECT_TRUE(*(*cache)->HasAdditionalConditions(info->get(), {0}, {563})); EXPECT_TRUE(*(*cache)->HasAdditionalConditions(info->get(), {1}, {563})); @@ -149,7 +153,7 @@ TEST_F(GlyphClosureCacheTest, AnalyzeSegment) { GlyphSet and_gids, or_gids, exclusive_gids; auto status = (*cache)->AnalyzeSegment(*info->get(), {0}, and_gids, or_gids, - exclusive_gids); + exclusive_gids); ASSERT_TRUE(status.ok()); EXPECT_EQ(exclusive_gids, (GlyphSet{74 /* f */})); @@ -200,13 +204,13 @@ TEST_F(GlyphClosureCacheTest, UnicodeCompDecomp_ExpandClosure) { ASSERT_TRUE(cache.ok()) << cache.status(); SubsetDefinition def; - def.codepoints = {0xe1}; // á + def.codepoints = {0xe1}; // á auto expanded = (*cache)->ExpandClosure(def); ASSERT_TRUE(expanded.ok()); - EXPECT_TRUE(expanded->codepoints.contains(0x61)); // a - EXPECT_TRUE(expanded->codepoints.contains(0x301)); // combining acute + EXPECT_TRUE(expanded->codepoints.contains(0x61)); // a + EXPECT_TRUE(expanded->codepoints.contains(0x301)); // combining acute } TEST_F(GlyphClosureCacheTest, UnicodeCompDecomp_Composition) { @@ -214,12 +218,12 @@ TEST_F(GlyphClosureCacheTest, UnicodeCompDecomp_Composition) { ASSERT_TRUE(cache.ok()) << cache.status(); SubsetDefinition def; - def.codepoints = {0x61, 0x301}; // a, combining acute + def.codepoints = {0x61, 0x301}; // a, combining acute auto expanded = (*cache)->ExpandClosure(def); ASSERT_TRUE(expanded.ok()); - EXPECT_TRUE(expanded->codepoints.contains(0xe1)); // á + EXPECT_TRUE(expanded->codepoints.contains(0xe1)); // á } TEST_F(GlyphClosureCacheTest, CodepointsForGlyphs) { @@ -234,12 +238,12 @@ TEST_F(GlyphClosureCacheTest, CodepointsForGlyphs) { // 2. Single nominal glyph auto a_gid = *FontHelper::GetNominalGlyph(roboto.get(), 'a'); auto cps = (*cache)->CodepointsForGlyphs({a_gid}); - EXPECT_EQ(cps, (CodepointSet {'a'})); + EXPECT_EQ(cps, (CodepointSet{'a'})); // 3. Multiple nominal glyphs auto b_gid = *FontHelper::GetNominalGlyph(roboto.get(), 'b'); cps = (*cache)->CodepointsForGlyphs({a_gid, b_gid}); - EXPECT_EQ(cps, (CodepointSet {'a', 'b'})); + EXPECT_EQ(cps, (CodepointSet{'a', 'b'})); } // Test with NotoSansJP for UVS mappings. diff --git a/ift/encoder/glyph_groupings.cc b/ift/encoder/glyph_groupings.cc index 43412cc3..7a144a50 100644 --- a/ift/encoder/glyph_groupings.cc +++ b/ift/encoder/glyph_groupings.cc @@ -145,7 +145,8 @@ StatusOr GlyphGroupings::ToGlyphSegmentation( // manually. SegmentSet fallback_segments; btree_map conditions_with_fallback; - conditions_with_fallback.insert(ConditionsAndGlyphs().begin(), ConditionsAndGlyphs().end()); + conditions_with_fallback.insert(ConditionsAndGlyphs().begin(), + ConditionsAndGlyphs().end()); if (!unmapped_glyphs_.empty()) { fallback_segments = segmentation_info.NonEmptySegments(); diff --git a/ift/encoder/glyph_groupings_test.cc b/ift/encoder/glyph_groupings_test.cc index d7ca6cdb..20b07a0f 100644 --- a/ift/encoder/glyph_groupings_test.cc +++ b/ift/encoder/glyph_groupings_test.cc @@ -1,10 +1,10 @@ #include "ift/encoder/glyph_groupings.h" -#include "ift/common/bazel_data_file_resolver.h" #include #include "absl/container/flat_hash_map.h" #include "gtest/gtest.h" +#include "ift/common/bazel_data_file_resolver.h" #include "ift/common/font_data.h" #include "ift/common/int_set.h" #include "ift/encoder/activation_condition.h" @@ -27,9 +27,9 @@ namespace ift::encoder { using absl::btree_map; using absl::flat_hash_map; using freq::ProbabilityBound; +using ift::common::BazelDataFileResolver; using ift::common::CodepointSet; using ift::common::DataFileResolver; -using ift::common::BazelDataFileResolver; using ift::common::FontData; using ift::common::GlyphSet; using ift::common::hb_face_unique_ptr; @@ -75,7 +75,8 @@ class GlyphGroupingsTest : public ::testing::Test { AddInitSubsetDefaults(init_font_segment); resolver = *BazelDataFileResolver::CreateForTest(); - closure_cache_ = std::move(*GlyphClosureCache::Create(roboto_.get(), *resolver)); + closure_cache_ = + std::move(*GlyphClosureCache::Create(roboto_.get(), *resolver)); requested_segmentation_info_ = *RequestedSegmentationInformation::Create( segments_, init_font_segment, *closure_cache_, PATCH); diff --git a/ift/encoder/glyph_segmentation_test.cc b/ift/encoder/glyph_segmentation_test.cc index 9f746b4c..69fbbb7f 100644 --- a/ift/encoder/glyph_segmentation_test.cc +++ b/ift/encoder/glyph_segmentation_test.cc @@ -1,9 +1,9 @@ #include "ift/encoder/glyph_segmentation.h" -#include "ift/common/bazel_data_file_resolver.h" #include #include "gtest/gtest.h" +#include "ift/common/bazel_data_file_resolver.h" #include "ift/common/font_data.h" #include "ift/common/int_set.h" #include "ift/encoder/activation_condition.h" @@ -17,7 +17,9 @@ using ift::config::CLOSURE_ONLY; using ift::config::PATCH; using google::protobuf::TextFormat; +using ift::common::BazelDataFileResolver; using ift::common::CodepointSet; +using ift::common::DataFileResolver; using ift::common::FontData; using ift::common::hb_face_unique_ptr; using ift::common::IntSet; @@ -26,8 +28,6 @@ using ift::common::SegmentSet; using ift::freq::UnicodeFrequencies; using ift::proto::PatchEncoding; using ift::proto::PatchMap; -using ift::common::DataFileResolver; -using ift::common::BazelDataFileResolver; namespace ift::encoder { diff --git a/ift/encoder/merge_strategy.h b/ift/encoder/merge_strategy.h index f8a2dd61..858a1c8d 100644 --- a/ift/encoder/merge_strategy.h +++ b/ift/encoder/merge_strategy.h @@ -137,8 +137,6 @@ class MergeStrategy { return probability_calculator_.get(); } - - // Configures the threshold for when to stop optimizing segments. // // For the set of segments which account for less than this fraction of the diff --git a/ift/encoder/merger.cc b/ift/encoder/merger.cc index b97e61e5..b0f4425d 100644 --- a/ift/encoder/merger.cc +++ b/ift/encoder/merger.cc @@ -651,7 +651,9 @@ StatusOr> Merger::TryMergingACompositeCondition( std::vector sorted_conditions; sorted_conditions.reserve(candidate_conditions.size()); - sorted_conditions.insert(sorted_conditions.end(), candidate_conditions.begin(), candidate_conditions.end()); + sorted_conditions.insert(sorted_conditions.end(), + candidate_conditions.begin(), + candidate_conditions.end()); std::sort(sorted_conditions.begin(), sorted_conditions.end()); for (ActivationCondition next_condition : sorted_conditions) { diff --git a/ift/encoder/requested_segmentation_information.cc b/ift/encoder/requested_segmentation_information.cc index 889a2bb9..b4489483 100644 --- a/ift/encoder/requested_segmentation_information.cc +++ b/ift/encoder/requested_segmentation_information.cc @@ -9,9 +9,9 @@ using ift::config::UnmappedGlyphHandling; +using absl::StatusOr; using ift::common::CodepointSet; using ift::common::SegmentSet; -using absl::StatusOr; namespace ift::encoder { diff --git a/ift/encoder/requested_segmentation_information.h b/ift/encoder/requested_segmentation_information.h index 6a0d5818..1a7fcfae 100644 --- a/ift/encoder/requested_segmentation_information.h +++ b/ift/encoder/requested_segmentation_information.h @@ -52,7 +52,6 @@ class RequestedSegmentationInformation { absl::Status ReassignInitSubset(GlyphClosureCache& closure_cache, const SubsetDefinition& new_def) { - // TODO XXXXX Can we avoid aggressively expanding the init closure? // ie. run the init subsetting/closure operation purely in glyph form. init_font_segment_ = TRY(closure_cache.ExpandClosure(new_def)); @@ -62,7 +61,8 @@ class RequestedSegmentationInformation { auto closure = closure_cache.GlyphClosure(full_definition_); if (closure.ok()) { full_closure_ = std::move(*closure); - full_codepoint_closure_ = closure_cache.CodepointsForGlyphs(full_closure_); + full_codepoint_closure_ = + closure_cache.CodepointsForGlyphs(full_closure_); } } @@ -110,7 +110,9 @@ class RequestedSegmentationInformation { } const ift::common::GlyphSet& FullClosure() const { return full_closure_; } - const ift::common::CodepointSet& FullCodepointClosure() const { return full_codepoint_closure_; } + const ift::common::CodepointSet& FullCodepointClosure() const { + return full_codepoint_closure_; + } const SubsetDefinition& FullDefinition() const { return full_definition_; } diff --git a/ift/encoder/requested_segmentation_information_test.cc b/ift/encoder/requested_segmentation_information_test.cc index 50f9487e..501863f5 100644 --- a/ift/encoder/requested_segmentation_information_test.cc +++ b/ift/encoder/requested_segmentation_information_test.cc @@ -1,22 +1,22 @@ #include "ift/encoder/requested_segmentation_information.h" -#include "ift/common/bazel_data_file_resolver.h" #include #include "gtest/gtest.h" +#include "ift/common/bazel_data_file_resolver.h" #include "ift/common/font_data.h" #include "ift/encoder/glyph_closure_cache.h" #include "ift/encoder/subset_definition.h" #include "ift/freq/probability_bound.h" -using ift::config::PATCH; +using ift::common::BazelDataFileResolver; using ift::common::CodepointSet; -using ift::common::SegmentSet; -using ift::common::FontData; using ift::common::DataFileResolver; -using ift::common::BazelDataFileResolver; +using ift::common::FontData; using ift::common::hb_face_unique_ptr; using ift::common::make_hb_face; +using ift::common::SegmentSet; +using ift::config::PATCH; using ift::freq::ProbabilityBound; namespace ift::encoder { @@ -44,7 +44,8 @@ class RequestedSegmentationInformationTest : public ::testing::Test { }; TEST_F(RequestedSegmentationInformationTest, SegmentsForCodepoints) { - std::unique_ptr cache = *GlyphClosureCache::Create(roboto.get(), *resolver); + std::unique_ptr cache = + *GlyphClosureCache::Create(roboto.get(), *resolver); std::vector segments{ {{'a', 'b'}, ProbabilityBound::Zero()}, {{'b', 'c'}, ProbabilityBound::Zero()}, @@ -66,7 +67,8 @@ TEST_F(RequestedSegmentationInformationTest, SegmentsForCodepoints) { } TEST_F(RequestedSegmentationInformationTest, IndexUpdatesOnMerge) { - std::unique_ptr cache = *GlyphClosureCache::Create(roboto.get(), *resolver); + std::unique_ptr cache = + *GlyphClosureCache::Create(roboto.get(), *resolver); std::vector segments{ {{'a'}, ProbabilityBound::Zero()}, {{'b'}, ProbabilityBound::Zero()}, @@ -92,7 +94,8 @@ TEST_F(RequestedSegmentationInformationTest, IndexUpdatesOnMerge) { } TEST_F(RequestedSegmentationInformationTest, IndexUpdatesOnReassignInit) { - std::unique_ptr cache = *GlyphClosureCache::Create(roboto.get(), *resolver); + std::unique_ptr cache = + *GlyphClosureCache::Create(roboto.get(), *resolver); std::vector segments{ {{'a', 'b'}, ProbabilityBound::Zero()}, {{'b', 'c'}, ProbabilityBound::Zero()}, @@ -107,7 +110,8 @@ TEST_F(RequestedSegmentationInformationTest, IndexUpdatesOnReassignInit) { EXPECT_EQ(info->SegmentsForCodepoints({'b'}), (SegmentSet{0, 1})); EXPECT_EQ(info->SegmentsForCodepoints({'c'}), (SegmentSet{1})); - // Reassign init subset to include 'b'. This should remove 'b' from all segments. + // Reassign init subset to include 'b'. This should remove 'b' from all + // segments. SubsetDefinition new_init; new_init.codepoints = {'b'}; ASSERT_TRUE(info->ReassignInitSubset(*cache, new_init).ok()); diff --git a/ift/encoder/segmentation_context.cc b/ift/encoder/segmentation_context.cc index e2a30801..16b87706 100644 --- a/ift/encoder/segmentation_context.cc +++ b/ift/encoder/segmentation_context.cc @@ -21,10 +21,10 @@ using ift::config::UnmappedGlyphHandling; using absl::Status; using absl::StatusOr; +using ift::common::DataFileResolver; using ift::common::GlyphSet; using ift::common::IntSet; using ift::common::SegmentSet; -using ift::common::DataFileResolver; namespace ift::encoder { @@ -93,7 +93,8 @@ Status SegmentationContext::ReprocessChanged(InvalidationSet modified) { } } else { #ifndef HB_DEPEND_API - return absl::InternalError("DEP_GRAPH_ONLY mode requires dependency graph support."); + return absl::InternalError( + "DEP_GRAPH_ONLY mode requires dependency graph support."); #else modified.glyphs.union_set( (*dependency_closure_) @@ -114,7 +115,8 @@ Status SegmentationContext::ReprocessAll() { } } else { #ifndef HB_DEPEND_API - return absl::InternalError("DEP_GRAPH_ONLY mode requires dependency graph support."); + return absl::InternalError( + "DEP_GRAPH_ONLY mode requires dependency graph support."); #else // Pull conditions directly out of the dep graph instead of running closure // processing. @@ -229,7 +231,8 @@ Status SegmentationContext::ReassignInitSubset(SubsetDefinition new_def) { } } else { #ifndef HB_DEPEND_API - return absl::InternalError("DEP_GRAPH_ONLY mode requires dependency graph support."); + return absl::InternalError( + "DEP_GRAPH_ONLY mode requires dependency graph support."); #else GlyphSet gids; for (segment_index_t s : segments_to_reprocess) { @@ -346,7 +349,8 @@ SegmentationContext::InitializeSegmentationContext( // No merging is done during init. SegmentationContext context = TRY(SegmentationContext::Create( face, initial_segment, segments, unmapped_glyph_handling, - condition_analysis_mode, brotli_quality, init_font_brotli_quality, std::move(resolver))); + condition_analysis_mode, brotli_quality, init_font_brotli_quality, + std::move(resolver))); // ### Generate the initial conditions and groupings by processing all // segments and glyphs. ### diff --git a/ift/encoder/segmentation_context.h b/ift/encoder/segmentation_context.h index 3e204269..94ca15b8 100644 --- a/ift/encoder/segmentation_context.h +++ b/ift/encoder/segmentation_context.h @@ -77,14 +77,15 @@ class SegmentationContext { TRY(GlyphClosureCache::Create(original_face.get(), *resolver_)); std::unique_ptr segmentation_info = - TRY(RequestedSegmentationInformation::Create(SegmentationInfo().Segments(), SegmentationInfo().InitFontSegment(), - *closure_cache, - SegmentationInfo().GetUnmappedGlyphHandling())); + TRY(RequestedSegmentationInformation::Create( + SegmentationInfo().Segments(), SegmentationInfo().InitFontSegment(), + *closure_cache, SegmentationInfo().GetUnmappedGlyphHandling())); SegmentationContext context( - original_face.get(), SegmentationInfo().GetUnmappedGlyphHandling(), condition_analysis_mode_, brotli_quality_, - init_font_brotli_quality_, std::move(closure_cache), - std::move(segmentation_info), resolver_, estimated_compression_ratio_); + original_face.get(), SegmentationInfo().GetUnmappedGlyphHandling(), + condition_analysis_mode_, brotli_quality_, init_font_brotli_quality_, + std::move(closure_cache), std::move(segmentation_info), resolver_, + estimated_compression_ratio_); TRYV(context.InitDependencyClosure()); return std::move(context); @@ -113,8 +114,7 @@ class SegmentationContext { std::unique_ptr closure_cache, std::unique_ptr segmentation_info, std::shared_ptr resolver, - std::optional estimated_compression_ratio = std::nullopt - ) + std::optional estimated_compression_ratio = std::nullopt) : estimated_compression_ratio_(estimated_compression_ratio), patch_size_cache(NewPatchSizeCache(face, brotli_quality)), patch_size_cache_for_init_font( @@ -300,8 +300,8 @@ class SegmentationContext { absl::StatusOr ComputeSegmentCutoff() const; // TODO XXXX make this return StatusOr<...> - std::unique_ptr NewPatchSizeCache( - hb_face_t* face, uint32_t brotli_quality) { + std::unique_ptr NewPatchSizeCache(hb_face_t* face, + uint32_t brotli_quality) { if (brotli_quality == 0) { if (!estimated_compression_ratio_.has_value()) { auto r = EstimatedPatchSizeCache::EstimateCompressionRatio(face); @@ -311,7 +311,8 @@ class SegmentationContext { } if (estimated_compression_ratio_.has_value()) { - return EstimatedPatchSizeCache::New(face, *estimated_compression_ratio_); + return EstimatedPatchSizeCache::New(face, + *estimated_compression_ratio_); } } return std::unique_ptr( diff --git a/ift/freq/bigram_probability_calculator.cc b/ift/freq/bigram_probability_calculator.cc index 2e65a5e6..b47607d7 100644 --- a/ift/freq/bigram_probability_calculator.cc +++ b/ift/freq/bigram_probability_calculator.cc @@ -87,15 +87,14 @@ ProbabilityBound BigramProbabilityCalculator::BigramProbabilityBound( // - Either the largest individual codepoint frequency. // - max(Pi + Pj - Pij) // - Or: sum(Pi) - sum(Pj Format2PatchMap::EstimateEncodingCost( // estimates by using fixed values. std::string out; TRYV(EncodeEntry(dummy_entry, /*last_entry_index=*/0, - /*default_encoding=*/TABLE_KEYED_FULL, out)); + /*default_encoding=*/TABLE_KEYED_FULL, out)); return out.size(); } diff --git a/ift/proto/format_2_patch_map_test.cc b/ift/proto/format_2_patch_map_test.cc index f620a4cc..a4bea086 100644 --- a/ift/proto/format_2_patch_map_test.cc +++ b/ift/proto/format_2_patch_map_test.cc @@ -384,8 +384,9 @@ TEST_F(Format2PatchMapTest, NonDefaultPatchFormat) { }; std::string entry_0 = { - 0x10, // Codepoints - 0x05, 0x0e, // codepoints = {1, 2, 3} + 0x10, // Codepoints + 0x05, + 0x0e, // codepoints = {1, 2, 3} }; std::string entry_1 = { diff --git a/util/font2ift.cc b/util/font2ift.cc index 6cffb892..3bbf910a 100644 --- a/util/font2ift.cc +++ b/util/font2ift.cc @@ -16,10 +16,10 @@ #include "absl/strings/str_cat.h" #include "hb.h" #include "ift/common/axis_range.h" +#include "ift/common/bazel_data_file_resolver.h" +#include "ift/common/data_file_resolver.h" #include "ift/common/font_data.h" #include "ift/common/font_helper.h" -#include "ift/common/data_file_resolver.h" -#include "ift/common/bazel_data_file_resolver.h" #include "ift/common/int_set.h" #include "ift/common/try.h" #include "ift/config/auto_segmenter_config.h" @@ -34,12 +34,12 @@ #include "ift/encoder/subset_definition.h" #include "util/auto_config_flags.h" +using ift::common::BazelDataFileResolver; +using ift::common::DataFileResolver; using ift::config::ActivationConditionProto; using ift::config::ConfigCompiler; using ift::config::DesignSpace; using ift::config::SegmentationPlan; -using ift::common::DataFileResolver; -using ift::common::BazelDataFileResolver; /* * Utility that converts a standard font file into an IFT font file optionally @@ -153,7 +153,8 @@ int write_output(const Compiler::Encoding& encoding) { return 0; } -StatusOr CreateSegmentationPlan(hb_face_t* font, std::shared_ptr resolver) { +StatusOr CreateSegmentationPlan( + hb_face_t* font, std::shared_ptr resolver) { SegmentationPlan plan; if (absl::GetFlag(FLAGS_plan).empty() || absl::GetFlag(FLAGS_plan) == "auto") { @@ -163,7 +164,8 @@ StatusOr CreateSegmentationPlan(hb_face_t* font, std::shared_p quality_level = absl::GetFlag(FLAGS_auto_config_quality); } auto config = AutoSegmenterConfig::GenerateConfig( - font, *resolver, absl::GetFlag(FLAGS_auto_config_primary_script), quality_level); + font, *resolver, absl::GetFlag(FLAGS_auto_config_primary_script), + quality_level); if (!config.ok()) { return absl::InternalError( StrCat("Failed to generate config: ", config.status().message())); @@ -212,7 +214,8 @@ int main(int argc, char** argv) { auto resolver_status = BazelDataFileResolver::Create(argv[0]); if (!resolver_status.ok()) { - std::cerr << "Failed to create data file resolver: " << resolver_status.status() << std::endl; + std::cerr << "Failed to create data file resolver: " + << resolver_status.status() << std::endl; return -1; } auto resolver = std::move(*resolver_status); diff --git a/util/gen_ift_segmentation_plan.cc b/util/gen_ift_segmentation_plan.cc index 33738182..7fefd5f1 100644 --- a/util/gen_ift_segmentation_plan.cc +++ b/util/gen_ift_segmentation_plan.cc @@ -15,11 +15,11 @@ #include "absl/status/statusor.h" #include "absl/strings/str_cat.h" #include "hb.h" +#include "ift/common/bazel_data_file_resolver.h" +#include "ift/common/data_file_resolver.h" #include "ift/common/font_data.h" #include "ift/common/font_helper.h" #include "ift/common/int_set.h" -#include "ift/common/data_file_resolver.h" -#include "ift/common/bazel_data_file_resolver.h" #include "ift/common/try.h" #include "ift/config/auto_segmenter_config.h" #include "ift/config/load_codepoints.h" @@ -35,13 +35,13 @@ #include "ift/freq/unigram_probability_calculator.h" #include "util/auto_config_flags.h" +using ift::common::BazelDataFileResolver; +using ift::common::DataFileResolver; using ift::config::CLOSURE_ONLY; using ift::config::PATCH; using ift::config::SegmentationPlan; using ift::config::SegmenterConfig; using ift::freq::ProbabilityCalculator; -using ift::common::DataFileResolver; -using ift::common::BazelDataFileResolver; /* * Given a code point based segmentation creates an appropriate glyph based @@ -100,18 +100,20 @@ using ift::encoder::MergeStrategy; using ift::encoder::Segment; using ift::encoder::SegmentationCost; using ift::encoder::SubsetDefinition; +using ift::freq::BigramProbabilityCalculator; using ift::freq::UnicodeFrequencies; using ift::freq::UnigramProbabilityCalculator; -using ift::freq::BigramProbabilityCalculator; -static StatusOr LoadConfig(hb_face_t* font, const DataFileResolver& resolver) { +static StatusOr LoadConfig(hb_face_t* font, + const DataFileResolver& resolver) { if (absl::GetFlag(FLAGS_config) == "auto") { std::optional quality_level = std::nullopt; if (absl::GetFlag(FLAGS_auto_config_quality) > 0) { quality_level = absl::GetFlag(FLAGS_auto_config_quality); } return AutoSegmenterConfig::GenerateConfig( - font, resolver, absl::GetFlag(FLAGS_auto_config_primary_script), quality_level); + font, resolver, absl::GetFlag(FLAGS_auto_config_primary_script), + quality_level); } FontData config_text = @@ -133,7 +135,6 @@ static Status Analysis(hb_face_t* font, btree_map&& merge_groups, const GlyphSegmentation& segmentation, std::shared_ptr resolver) { - std::vector calculator_storage; calculator_storage.reserve(merge_groups.size()); @@ -148,24 +149,29 @@ static Status Analysis(hb_face_t* font, continue; } - // Depending on the configuration the encoding may have used unigram probability - // calculations. For the purpose of analysis we want to be consistent and so - // unigram probability calculator should be upgraded to a bigram + // Depending on the configuration the encoding may have used unigram + // probability calculations. For the purpose of analysis we want to be + // consistent and so unigram probability calculator should be upgraded to a + // bigram if (UnigramProbabilityCalculator* unigram = - dynamic_cast(strategy.ProbabilityCalculator())) { + dynamic_cast( + strategy.ProbabilityCalculator())) { calculator_storage.push_back(std::move(*unigram).ToBigramCalculator()); strategy_probability_calculators.push_back(&calculator_storage.back()); } else { - strategy_probability_calculators.push_back(strategy.ProbabilityCalculator()); + strategy_probability_calculators.push_back( + strategy.ProbabilityCalculator()); } strategy_group_index.push_back(i - 1); } ClosureGlyphSegmenter segmenter(11, 11, PATCH, CLOSURE_ONLY, resolver); - auto costs = TRY(segmenter.TotalCosts(font, segmentation, strategy_probability_calculators)); + auto costs = TRY(segmenter.TotalCosts(font, segmentation, + strategy_probability_calculators)); - std::cerr << "number_of_codepoints = " << FontHelper::ToCodepointsSet(font).size() << std::endl; + std::cerr << "number_of_codepoints = " + << FontHelper::ToCodepointsSet(font).size() << std::endl; double ift_init_cost = 0.0; double non_ift_total_cost = 0.0; @@ -186,7 +192,6 @@ static Status Analysis(hb_face_t* font, ift_patch_cost += cost.ift_patch_cost; ideal_patch_cost += cost.ideal_patch_cost; - std::cerr << "ift_cost_bytes[" << group_index << "] = " << (uint64_t)cost.ift_patch_cost << std::endl; std::cerr << "ideal_cost_bytes[" << group_index @@ -196,9 +201,12 @@ static Status Analysis(hb_face_t* font, group_index++; } - std::cerr << "non_ift_total_cost = " << (uint64_t) non_ift_total_cost << std::endl; - std::cerr << "ift_total_cost = " << (uint64_t) (ift_init_cost + ift_patch_cost) << std::endl; - std::cerr << "ideal_total_cost = " << (uint64_t) (ideal_init_cost + ideal_patch_cost) << std::endl; + std::cerr << "non_ift_total_cost = " << (uint64_t)non_ift_total_cost + << std::endl; + std::cerr << "ift_total_cost = " << (uint64_t)(ift_init_cost + ift_patch_cost) + << std::endl; + std::cerr << "ideal_total_cost = " + << (uint64_t)(ideal_init_cost + ideal_patch_cost) << std::endl; return absl::OkStatus(); } @@ -280,7 +288,8 @@ static Status Main(const std::vector args) { } std::cerr << ">> Analysis" << std::endl; - return Analysis(font.get(), std::move(result.merge_groups), segmentation, resolver); + return Analysis(font.get(), std::move(result.merge_groups), segmentation, + resolver); } int main(int argc, char** argv) { diff --git a/util/gen_ift_segmenter_config.cc b/util/gen_ift_segmenter_config.cc index 8accac43..8e8a97fa 100644 --- a/util/gen_ift_segmenter_config.cc +++ b/util/gen_ift_segmenter_config.cc @@ -1,5 +1,4 @@ #include -#include "ift/common/bazel_data_file_resolver.h" #include #include @@ -11,6 +10,7 @@ #include "absl/log/globals.h" #include "absl/log/initialize.h" #include "absl/status/status.h" +#include "ift/common/bazel_data_file_resolver.h" #include "ift/common/font_data.h" #include "ift/common/try.h" #include "ift/config/auto_segmenter_config.h" @@ -33,9 +33,9 @@ ABSL_FLAG( "Log verbosity level from. 0 is least verbose, higher values are more."); using absl::Status; +using ift::common::BazelDataFileResolver; using ift::common::hb_face_unique_ptr; using ift::config::AutoSegmenterConfig; -using ift::common::BazelDataFileResolver; static Status Main(const std::vector args) { auto resolver = TRY(BazelDataFileResolver::Create(args[0])); @@ -50,7 +50,8 @@ static Status Main(const std::vector args) { } auto config = TRY(AutoSegmenterConfig::GenerateConfig( - font.get(), *resolver, absl::GetFlag(FLAGS_primary_script), quality_level)); + font.get(), *resolver, absl::GetFlag(FLAGS_primary_script), + quality_level)); std::string output; if (!google::protobuf::TextFormat::PrintToString(config, &output)) {