Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 19 additions & 9 deletions ift/common/bazel_data_file_resolver.cc
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#include "ift/common/bazel_data_file_resolver.h"

#include <filesystem>

#include "absl/status/status.h"
#include "absl/strings/str_cat.h"

Expand All @@ -14,18 +15,23 @@ StatusOr<std::shared_ptr<DataFileResolver>> BazelDataFileResolver::Create(
std::string error;
auto runfiles = std::unique_ptr<Runfiles>(Runfiles::Create(argv0, &error));
if (!runfiles) {
return absl::InternalError(absl::StrCat("Failed to create runfiles: ", error));
return absl::InternalError(
absl::StrCat("Failed to create runfiles: ", error));
}
return std::shared_ptr<BazelDataFileResolver>(new BazelDataFileResolver(std::move(runfiles)));
return std::shared_ptr<BazelDataFileResolver>(
new BazelDataFileResolver(std::move(runfiles)));
}

StatusOr<std::shared_ptr<DataFileResolver>> BazelDataFileResolver::CreateForTest() {
StatusOr<std::shared_ptr<DataFileResolver>>
BazelDataFileResolver::CreateForTest() {
std::string error;
auto runfiles = std::unique_ptr<Runfiles>(Runfiles::CreateForTest(&error));
if (!runfiles) {
return absl::InternalError(absl::StrCat("Failed to create runfiles for test: ", error));
return absl::InternalError(
absl::StrCat("Failed to create runfiles for test: ", error));
}
return std::shared_ptr<BazelDataFileResolver>(new BazelDataFileResolver(std::move(runfiles)));
return std::shared_ptr<BazelDataFileResolver>(
new BazelDataFileResolver(std::move(runfiles)));
}

BazelDataFileResolver::BazelDataFileResolver(std::unique_ptr<Runfiles> runfiles)
Expand All @@ -34,23 +40,27 @@ BazelDataFileResolver::BazelDataFileResolver(std::unique_ptr<Runfiles> runfiles)
StatusOr<std::string> BazelDataFileResolver::GetUnicodeDataPath() const {
std::string path = runfiles_->Rlocation(UNICODE_DATA_PATH);
if (path.empty() || !std::filesystem::exists(path)) {
return absl::NotFoundError(absl::StrCat("Failed to find UnicodeData.txt via runfiles: ", path));
return absl::NotFoundError(
absl::StrCat("Failed to find UnicodeData.txt via runfiles: ", path));
}
return path;
}

StatusOr<std::string> BazelDataFileResolver::GetDerivedNormalizationPropsPath() const {
StatusOr<std::string> BazelDataFileResolver::GetDerivedNormalizationPropsPath()
const {
std::string path = runfiles_->Rlocation(DERIVED_PROPS_PATH);
if (path.empty() || !std::filesystem::exists(path)) {
return absl::NotFoundError(absl::StrCat("Failed to find DerivedNormalizationProps.txt via runfiles: ", path));
return absl::NotFoundError(absl::StrCat(
"Failed to find DerivedNormalizationProps.txt via runfiles: ", path));
}
return path;
}

StatusOr<std::string> BazelDataFileResolver::GetFrequencyDataDirectory() const {
std::string metadata_path = runfiles_->Rlocation(FREQ_DATA_METADATA);
if (metadata_path.empty() || !std::filesystem::exists(metadata_path)) {
return absl::NotFoundError("Failed to find frequency data directory via runfiles");
return absl::NotFoundError(
"Failed to find frequency data directory via runfiles");
}
return std::filesystem::path(metadata_path).parent_path().string();
}
Expand Down
2 changes: 1 addition & 1 deletion ift/common/bit_buffer_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ using std::vector;

class BitBufferTest : public ::testing::Test {
protected:
static void check_transcode(const vector<uint32_t> &input, BranchFactor bf,
static void check_transcode(const vector<uint32_t>& input, BranchFactor bf,
unsigned int depth) {
BitOutputBuffer bout(bf, depth);
for (uint32_t value : input) {
Expand Down
2 changes: 1 addition & 1 deletion ift/common/bit_input_buffer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ absl::string_view BitInputBuffer::Remaining() const {
}
}

bool BitInputBuffer::read(uint32_t *out) {
bool BitInputBuffer::read(uint32_t* out) {
if (!out) {
return false;
}
Expand Down
2 changes: 1 addition & 1 deletion ift/common/bit_input_buffer.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ class BitInputBuffer {
// The lowest/rightmost bits of the value bits are set, the remaining are
// cleared. The number of bits set depends on the BranchFactor this
// BitInputBuffer was constructed with.
bool read(uint32_t *out);
bool read(uint32_t* out);

private:
const BranchFactor branch_factor;
Expand Down
2 changes: 1 addition & 1 deletion ift/common/bit_output_buffer_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ using std::string;

class BitOutputBufferTest : public ::testing::Test {
protected:
static string Bits(const string &s) {
static string Bits(const string& s) {
if (s.empty()) {
return "";
}
Expand Down
4 changes: 3 additions & 1 deletion ift/common/data_file_resolver.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
#define IFT_COMMON_DATA_FILE_RESOLVER_H_

#include <string>

#include "absl/status/statusor.h"

namespace ift::common {
Expand All @@ -14,7 +15,8 @@ class DataFileResolver {
virtual absl::StatusOr<std::string> GetUnicodeDataPath() const = 0;

// Returns the path to DerivedNormalizationProps.txt
virtual absl::StatusOr<std::string> GetDerivedNormalizationPropsPath() const = 0;
virtual absl::StatusOr<std::string> GetDerivedNormalizationPropsPath()
const = 0;

// Returns the path to the directory containing frequency data files.
virtual absl::StatusOr<std::string> GetFrequencyDataDirectory() const = 0;
Expand Down
3 changes: 2 additions & 1 deletion ift/common/fixed_data_file_resolver.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,8 @@ class FixedDataFileResolver : public DataFileResolver {
return unicode_data_path_;
}

absl::StatusOr<std::string> GetDerivedNormalizationPropsPath() const override {
absl::StatusOr<std::string> GetDerivedNormalizationPropsPath()
const override {
return derived_props_path_;
}

Expand Down
6 changes: 4 additions & 2 deletions ift/common/font_helper_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -575,8 +575,10 @@ TEST_F(FontHelperTest, GlyfData_ShortOverflowSynthetic) {
hb_face_unique_ptr face = make_hb_face(hb_face_builder_create());

std::vector<uint8_t> loca = {
0xC3, 0x50, // 50,000 (100,000 actual)
0xC3, 0x52, // 50,002 (100,005 actual)
0xC3,
0x50, // 50,000 (100,000 actual)
0xC3,
0x52, // 50,002 (100,005 actual)
};
{
auto blob =
Expand Down
6 changes: 3 additions & 3 deletions ift/common/int_set.h
Original file line number Diff line number Diff line change
Expand Up @@ -367,7 +367,7 @@ class IntSet {
// Typed variants
class GlyphSet : public IntSet {
public:
GlyphSet() : IntSet(){};
GlyphSet() : IntSet() {};
GlyphSet(std::initializer_list<hb_codepoint_t> values) : IntSet(values) {}
explicit GlyphSet(const hb_set_t* set) : IntSet(set) {}
explicit GlyphSet(const hb_set_unique_ptr& set) : IntSet(set) {}
Expand All @@ -381,7 +381,7 @@ class GlyphSet : public IntSet {

class CodepointSet : public IntSet {
public:
CodepointSet() : IntSet(){};
CodepointSet() : IntSet() {};
CodepointSet(std::initializer_list<hb_codepoint_t> values) : IntSet(values) {}
explicit CodepointSet(const hb_set_t* set) : IntSet(set) {}
explicit CodepointSet(const hb_set_unique_ptr& set) : IntSet(set) {}
Expand All @@ -395,7 +395,7 @@ class CodepointSet : public IntSet {

class SegmentSet : public IntSet {
public:
SegmentSet() : IntSet(){};
SegmentSet() : IntSet() {};
SegmentSet(std::initializer_list<hb_codepoint_t> values) : IntSet(values) {}
explicit SegmentSet(const hb_set_t* set) : IntSet(set) {}
explicit SegmentSet(const hb_set_unique_ptr& set) : IntSet(set) {}
Expand Down
12 changes: 7 additions & 5 deletions ift/config/auto_segmenter_config.cc
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,8 @@ enum Quality {
// Would be reasonable to always have this set to at least the minimum group
// size.
//
// - condition_analysis_mode: use DEP_GRAPH_ONLY if dependency API is available, otherwise CLOSURE_ONLY.
// - condition_analysis_mode: use DEP_GRAPH_ONLY if dependency API is available,
// otherwise CLOSURE_ONLY.
//
// Merge group settings:
//
Expand Down Expand Up @@ -648,16 +649,16 @@ static void ApplyQualityLevelTo(Quality quality, SegmenterConfig& config) {

// Based on measured network overhead cost in practice from the
// ift demo.
config.mutable_base_cost_config()->set_network_overhead_cost(DEFAULT_NETWORK_COST);
config.mutable_base_cost_config()->set_network_overhead_cost(
DEFAULT_NETWORK_COST);

for (auto& merge_group : *config.mutable_merge_groups()) {
ApplyQualityLevelTo(quality, merge_group);
}
}

StatusOr<SegmenterConfig> AutoSegmenterConfig::GenerateConfig(
hb_face_t* face,
const ift::common::DataFileResolver& resolver,
hb_face_t* face, const ift::common::DataFileResolver& resolver,
std::optional<std::string> primary_script,
std::optional<int> quality_level) {
SegmenterConfig config;
Expand Down Expand Up @@ -718,7 +719,8 @@ StatusOr<SegmenterConfig> AutoSegmenterConfig::GenerateConfig(

cost->set_built_in_freq_data_name(script);
if (script == primary_script_file) {
cost->set_initial_font_merge_threshold(-(double) DEFAULT_NETWORK_COST * (0.8));
cost->set_initial_font_merge_threshold(-(double)DEFAULT_NETWORK_COST *
(0.8));
}
}

Expand Down
5 changes: 2 additions & 3 deletions ift/config/auto_segmenter_config.h
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,11 @@

#include <optional>
#include <string>
#include "ift/common/data_file_resolver.h"

#include "absl/status/statusor.h"
#include "absl/strings/string_view.h"
#include "hb.h"
#include "ift/common/data_file_resolver.h"
#include "ift/config/segmenter_config.pb.h"

namespace ift::config {
Expand All @@ -26,8 +26,7 @@ class AutoSegmenterConfig {
// times, high values have longer segmenting times but
// typically results in better segmentation quality.
static absl::StatusOr<SegmenterConfig> GenerateConfig(
hb_face_t* face,
const ift::common::DataFileResolver& resolver,
hb_face_t* face, const ift::common::DataFileResolver& resolver,
std::optional<std::string> primary_script = std::nullopt,
std::optional<int> quality_level = std::nullopt);

Expand Down
51 changes: 26 additions & 25 deletions ift/config/auto_segmenter_config_test.cc
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
#include "ift/config/auto_segmenter_config.h"
#include "ift/common/bazel_data_file_resolver.h"

#include <google/protobuf/text_format.h>

Expand All @@ -11,6 +10,7 @@
#include "gmock/gmock.h"
#include "gtest/gtest.h"
#include "hb.h"
#include "ift/common/bazel_data_file_resolver.h"
#include "ift/common/font_data.h"
#include "ift/config/load_codepoints.h"

Expand Down Expand Up @@ -157,16 +157,16 @@ base_segmentation_plan {
generate_feature_segments: true
)"
#ifdef HB_DEPEND_API
"condition_analysis_mode: DEP_GRAPH_ONLY\n"
"condition_analysis_mode: DEP_GRAPH_ONLY\n"
#else
"condition_analysis_mode: CLOSURE_ONLY\n"
"condition_analysis_mode: CLOSURE_ONLY\n"
#endif
);
);
}

TEST_F(AutoSegmenterConfigTest, Roboto_ScriptCyrillic) {
auto config_or =
AutoSegmenterConfig::GenerateConfig(face_.get(), *resolver, "Script_cyrillic");
auto config_or = AutoSegmenterConfig::GenerateConfig(face_.get(), *resolver,
"Script_cyrillic");
ASSERT_TRUE(config_or.ok()) << config_or.status();
EXPECT_THAT(
GetScripts(*config_or),
Expand All @@ -176,8 +176,8 @@ TEST_F(AutoSegmenterConfigTest, Roboto_ScriptCyrillic) {
}

TEST_F(AutoSegmenterConfigTest, Roboto_LanguageFr) {
auto config_or =
AutoSegmenterConfig::GenerateConfig(face_.get(), *resolver, "Language_fr");
auto config_or = AutoSegmenterConfig::GenerateConfig(face_.get(), *resolver,
"Language_fr");
ASSERT_TRUE(config_or.ok()) << config_or.status();
EXPECT_THAT(GetScripts(*config_or),
UnorderedElementsAre(Pair("Language_fr", "Language_fr.riegeli"),
Expand All @@ -188,7 +188,8 @@ TEST_F(AutoSegmenterConfigTest, Roboto_LanguageFr) {

TEST_F(AutoSegmenterConfigTest, NotoSansJP_UnspecifiedPrimary) {
if (!cjk_face_) GTEST_SKIP() << "NotoSansJP-Regular.ttf not found";
auto config_or = AutoSegmenterConfig::GenerateConfig(cjk_face_.get(), *resolver);
auto config_or =
AutoSegmenterConfig::GenerateConfig(cjk_face_.get(), *resolver);
ASSERT_TRUE(config_or.ok()) << config_or.status();
EXPECT_THAT(GetScripts(*config_or),
UnorderedElementsAre(kLatin, kGreek, kCyrillic, kCJK, kSymbols,
Expand All @@ -199,8 +200,8 @@ TEST_F(AutoSegmenterConfigTest, NotoSansJP_UnspecifiedPrimary) {

TEST_F(AutoSegmenterConfigTest, NotoSansJP_ScriptCJK) {
if (!cjk_face_) GTEST_SKIP() << "NotoSansJP-Regular.ttf not found";
auto config_or =
AutoSegmenterConfig::GenerateConfig(cjk_face_.get(), *resolver, "Script_CJK");
auto config_or = AutoSegmenterConfig::GenerateConfig(cjk_face_.get(),
*resolver, "Script_CJK");
ASSERT_TRUE(config_or.ok()) << config_or.status();
EXPECT_THAT(GetScripts(*config_or),
UnorderedElementsAre(kLatin, kGreek, kCyrillic, kCJK, kSymbols,
Expand All @@ -211,8 +212,8 @@ TEST_F(AutoSegmenterConfigTest, NotoSansJP_ScriptCJK) {

TEST_F(AutoSegmenterConfigTest, NotoSansJP_ScriptJapanese) {
if (!cjk_face_) GTEST_SKIP() << "NotoSansJP-Regular.ttf not found";
auto config_or =
AutoSegmenterConfig::GenerateConfig(cjk_face_.get(), *resolver, "Script_japanese");
auto config_or = AutoSegmenterConfig::GenerateConfig(
cjk_face_.get(), *resolver, "Script_japanese");
ASSERT_TRUE(config_or.ok()) << config_or.status();
EXPECT_THAT(
GetScripts(*config_or),
Expand All @@ -225,8 +226,8 @@ TEST_F(AutoSegmenterConfigTest, NotoSansJP_ScriptJapanese) {

TEST_F(AutoSegmenterConfigTest, NotoSansJP_LanguageZhHans) {
if (!cjk_face_) GTEST_SKIP() << "NotoSansJP-Regular.ttf not found";
auto config_or =
AutoSegmenterConfig::GenerateConfig(cjk_face_.get(), *resolver, "Language_zh-Hans");
auto config_or = AutoSegmenterConfig::GenerateConfig(
cjk_face_.get(), *resolver, "Language_zh-Hans");
ASSERT_TRUE(config_or.ok()) << config_or.status();
EXPECT_THAT(GetScripts(*config_or),
UnorderedElementsAre(
Expand All @@ -238,14 +239,14 @@ TEST_F(AutoSegmenterConfigTest, NotoSansJP_LanguageZhHans) {
}

TEST_F(AutoSegmenterConfigTest, Roboto_ScriptNotFound) {
auto config_or =
AutoSegmenterConfig::GenerateConfig(face_.get(), *resolver, "Script_foobar");
auto config_or = AutoSegmenterConfig::GenerateConfig(face_.get(), *resolver,
"Script_foobar");
EXPECT_EQ(config_or.status().code(), absl::StatusCode::kNotFound);
}

TEST_F(AutoSegmenterConfigTest, Roboto_LanguageNotFound) {
auto config_or =
AutoSegmenterConfig::GenerateConfig(face_.get(), *resolver, "Language_foobar");
auto config_or = AutoSegmenterConfig::GenerateConfig(face_.get(), *resolver,
"Language_foobar");
EXPECT_EQ(config_or.status().code(), absl::StatusCode::kNotFound);
}

Expand All @@ -267,8 +268,8 @@ TEST_F(AutoSegmenterConfigTest, Roboto_FullFileName_Script) {
}

TEST_F(AutoSegmenterConfigTest, Roboto_FullFileName_Language) {
auto config_or =
AutoSegmenterConfig::GenerateConfig(face_.get(), *resolver, "Language_fr.riegeli");
auto config_or = AutoSegmenterConfig::GenerateConfig(face_.get(), *resolver,
"Language_fr.riegeli");
EXPECT_THAT(GetScripts(*config_or),
UnorderedElementsAre(Pair("Language_fr", "Language_fr.riegeli"),
kCyrillic, kGreek, kSymbols, kFallback));
Expand All @@ -291,17 +292,17 @@ TEST_F(AutoSegmenterConfigTest, LanguageMappingsExist) {
}

TEST_F(AutoSegmenterConfigTest, QualityLevelForcing) {
auto config_or =
AutoSegmenterConfig::GenerateConfig(face_.get(), *resolver, std::nullopt, 1);
auto config_or = AutoSegmenterConfig::GenerateConfig(face_.get(), *resolver,
std::nullopt, 1);
ASSERT_TRUE(config_or.ok()) << config_or.status();
EXPECT_EQ(config_or->brotli_quality(), 0);
EXPECT_EQ(config_or->unmapped_glyph_handling(), MOVE_TO_INIT_FONT);
EXPECT_EQ(config_or->base_cost_config().use_bigrams(), false);
EXPECT_EQ(config_or->brotli_quality_for_initial_font_merging(), 0);
EXPECT_EQ(config_or->base_cost_config().optimization_cutoff_fraction(), 0.05);

auto config_or_8 =
AutoSegmenterConfig::GenerateConfig(face_.get(), *resolver, std::nullopt, 8);
auto config_or_8 = AutoSegmenterConfig::GenerateConfig(face_.get(), *resolver,
std::nullopt, 8);
ASSERT_TRUE(config_or_8.ok()) << config_or_8.status();
EXPECT_EQ(config_or_8->brotli_quality(), 11);
EXPECT_EQ(config_or_8->unmapped_glyph_handling(), MOVE_TO_INIT_FONT);
Expand Down
6 changes: 2 additions & 4 deletions ift/config/load_codepoints.cc
Original file line number Diff line number Diff line change
Expand Up @@ -205,8 +205,7 @@ static Status LoadFrequenciesFromRiegeliIndividual(
}

StatusOr<UnicodeFrequencies> LoadFrequenciesFromRiegeli(
const char* path,
std::optional<CodepointSet> filter) {
const char* path, std::optional<CodepointSet> filter) {
auto paths = TRY(ExpandShardedPath(path));
UnicodeFrequenciesBuilder builder(filter);
for (const auto& path : paths) {
Expand All @@ -216,8 +215,7 @@ StatusOr<UnicodeFrequencies> LoadFrequenciesFromRiegeli(
}

StatusOr<UnicodeFrequencies> LoadBuiltInFrequencies(
const char* name,
const DataFileResolver& resolver,
const char* name, const DataFileResolver& resolver,
std::optional<CodepointSet> filter) {
std::string data_dir = TRY(resolver.GetFrequencyDataDirectory());
std::string path = StrCat(data_dir, "/", name);
Expand Down
Loading
Loading