From a989761c71b9e7b029fd20475e3deef5a53ae724 Mon Sep 17 00:00:00 2001 From: Aleksandr Kovalko Date: Thu, 11 Jun 2026 02:57:10 +0200 Subject: [PATCH] Add encode/decode for the Google Encoded Polyline format Port of PolyUtil.encode/decode from android-maps-utils, in a new header so stays free of /. The zig-zag/varint arithmetic is done in unsigned types: the Java original relies on wrapping int arithmetic, and left-shifting a negative signed value is UB in C++17. decode is bounds-checked: a string truncated mid-point yields the points decoded so far and drops the incomplete trailing point (the Java original throws IndexOutOfBounds there). Tests cover the reference strings from the Encoded Polyline Algorithm Format docs byte-for-byte, truncation, and quantized round-trips across the poles and the antimeridian. --- README.md | 4 +- docs/api.md | 55 ++++++++++++++++-- include/geo/encoding.hpp | 118 ++++++++++++++++++++++++++++++++++++++ include/geo/geo.hpp | 1 + tests/encoding/decode.hpp | 45 +++++++++++++++ tests/encoding/encode.hpp | 29 ++++++++++ tests/tests.cpp | 3 + 7 files changed, 249 insertions(+), 6 deletions(-) create mode 100644 include/geo/encoding.hpp create mode 100644 tests/encoding/decode.hpp create mode 100644 tests/encoding/encode.hpp diff --git a/README.md b/README.md index 3f7a468..998f64a 100644 --- a/README.md +++ b/README.md @@ -51,10 +51,12 @@ Earth approximation model. - **Lat/lng-native API** — pass latitude/longitude coordinates directly, no framework-specific point types to convert through. -- **Header-only, dependency-free** — about 36 KB across 4 headers; nothing +- **Header-only, dependency-free** — about 40 KB across 5 headers; nothing to build or link. - **Spherical math** — distance, heading, offset, interpolation, area. - **Polygon utilities** — point-in-polygon and path proximity checks. +- **Polyline encoding** — `encode`/`decode` for the Google Encoded Polyline + format. - **Fast** — matches hand-written haversine on `distance`; especially strong on polygon `area` (see [benchmarks](docs/benchmarks.md)). - **Focused scope** — intentionally small API for GPS, navigation, tracking, diff --git a/docs/api.md b/docs/api.md index cfa67f8..ef88486 100644 --- a/docs/api.md +++ b/docs/api.md @@ -23,11 +23,13 @@ are internal and not part of the supported API. (`area`, `path_length`, `contains`, `on_edge`, `on_path`) are not marked `noexcept` because the generic `Path` contract doesn't constrain `operator[]` / `size()` to be `noexcept`; they don't throw - themselves. + themselves. `encode` and `decode` return owning containers and can + throw `std::bad_alloc` on allocation failure. - **Include strategy.** Each subsystem has its own header: `` (types), `` (distance, heading, - area), `` (point-in-polygon, on-path). The umbrella - `` pulls all three in for convenience. + area), `` (point-in-polygon, on-path), `` + (encoded polylines). The umbrella `` pulls all four in for + convenience. ## LatLng @@ -71,8 +73,8 @@ a.approx_equal(b, 1e-5); // true (1e-5° ≈ 1 m on equator) A series of connected coordinates in an ordered sequence. `Path` is a template parameter accepted by `path_length`, `area`, `signed_area`, -`contains`, `on_edge`, and `on_path`. It must be a random-access container of -`geo::LatLng` — specifically, it must support: +`contains`, `on_edge`, `on_path`, and `encode`. It must be a random-access +container of `geo::LatLng` — specifically, it must support: - `path.size()` returning a size in elements - `path[i]` returning a `LatLng` (or something convertible) for `0 ≤ i < size` @@ -369,6 +371,49 @@ std::cout << geo::distance_to_segment(point, start, end); --- +## Polyline encoding + +Encoder and decoder for the [Encoded Polyline Algorithm Format](https://developers.google.com/maps/documentation/utilities/polylinealgorithm) +used by the Google Maps APIs. + +```cpp +#include +``` + +### encode + +**`geo::encode(const Path& path)`** — Encodes a sequence of LatLngs into an encoded path string. Coordinates are quantized to `1e-5` degrees (about one meter), so an encode/decode round-trip is lossy beyond that precision. + +Returns: `std::string` — the encoded polyline; empty for an empty path. + +```cpp +std::vector path = { {38.5, -120.2}, {40.7, -120.95}, {43.252, -126.453} }; + +std::cout << geo::encode(path); // "_p~iF~ps|U_ulLnnqC_mqNvxq`@" +``` + +--- + +### decode + +**`geo::decode(std::string_view encoded)`** — Decodes an encoded path string into a sequence of LatLngs on the `1e-5`-degree grid. + +Returns: `std::vector` — the decoded points; empty for an empty string. + +> **Note.** The input is assumed to be a well-formed encoded polyline. +> Decoding any string is memory-safe, but malformed input yields +> unspecified coordinates; a string truncated mid-point yields the points +> decoded so far and drops the incomplete trailing point. + +```cpp +auto path = geo::decode("_p~iF~ps|U_ulLnnqC_mqNvxq`@"); + +std::cout << path.size(); // 3 +std::cout << path[0]; // LatLng(38.5, -120.2) +``` + +--- + ## Constants | Symbol | Value | Description | diff --git a/include/geo/encoding.hpp b/include/geo/encoding.hpp new file mode 100644 index 0000000..7816168 --- /dev/null +++ b/include/geo/encoding.hpp @@ -0,0 +1,118 @@ +// Copyright 2026 Aleksandr Kovalko +// Licensed under the Apache License, Version 2.0 +// +// Portions of this file are based on Google Maps Android Utils: +// https://github.com/googlemaps/android-maps-utils +// +// Original work: +// Copyright 2013 Google Inc. +// Licensed under the Apache License, Version 2.0 +// +// This file has been modified from the original work, +// including a port from Java to C++. + +#pragma once + +#include +#include +#include +#include +#include +#include + +#include "latlng.hpp" + +namespace geo { + +namespace detail { + +inline void encode_value(std::int64_t v, std::string& out) { + // Zig-zag encode in unsigned arithmetic: left-shifting a negative signed + // value is undefined behavior in C++17. + std::uint64_t value = static_cast(v) << 1; + if (v < 0) { + value = ~value; + } + while (value >= 0x20) { + out += static_cast((0x20 | (value & 0x1f)) + 63); + value >>= 5; + } + out += static_cast(value + 63); +} + +} // namespace detail + +/** + * Encodes a sequence of LatLngs into a string using the Encoded Polyline + * Algorithm Format. Coordinates are quantized to 1e-5 degrees (about one + * meter), so an encode/decode round-trip is lossy beyond that precision. + */ +template +[[nodiscard]] std::string encode(const Path& path) { + std::int64_t last_lat = 0; + std::int64_t last_lng = 0; + std::string result; + + for (const auto& point : path) { + std::int64_t lat = std::llround(point.lat * 1e5); + std::int64_t lng = std::llround(point.lng * 1e5); + + detail::encode_value(lat - last_lat, result); + detail::encode_value(lng - last_lng, result); + + last_lat = lat; + last_lng = lng; + } + return result; +} + +/** + * Decodes an Encoded Polyline Algorithm Format string into a sequence of + * LatLngs on the 1e-5-degree grid the format uses. + * + * The input is assumed to be a well-formed encoded polyline. Decoding any + * string is memory-safe, but malformed input yields unspecified coordinates; + * a string truncated mid-point yields the points decoded so far and drops + * the incomplete trailing point. + */ +[[nodiscard]] inline std::vector decode(std::string_view encoded) { + std::vector path; + std::size_t index = 0; + std::int32_t lat = 0; + std::int32_t lng = 0; + + // Reads one zig-zag/varint-encoded delta and adds it to coord; returns + // false when the string ends mid-chunk. Accumulates in unsigned + // arithmetic (well-defined wrap-around), matching the Java original bit + // for bit on well-formed input. + const auto decode_delta = [&encoded, &index](std::int32_t& coord) { + std::uint32_t result = 1; + unsigned shift = 0; + std::int32_t b; + do { + if (index >= encoded.size()) { + return false; + } + b = static_cast(encoded[index++]) - 64; + if (shift < 32) { // only malformed input reaches shift 35+ + result += static_cast(b) << shift; + } + shift += 5; + } while (b >= 0x1f); + const auto r = static_cast(result); + // r >> 1 on a negative value is an arithmetic shift on every + // supported compiler; C++20 makes that guarantee standard. + coord += (r & 1) != 0 ? ~(r >> 1) : (r >> 1); + return true; + }; + + while (index < encoded.size()) { + if (!decode_delta(lat) || !decode_delta(lng)) { + break; + } + path.push_back(LatLng(lat * 1e-5, lng * 1e-5)); + } + return path; +} + +} // namespace geo diff --git a/include/geo/geo.hpp b/include/geo/geo.hpp index d4154f7..dbec9a0 100644 --- a/include/geo/geo.hpp +++ b/include/geo/geo.hpp @@ -3,6 +3,7 @@ #pragma once +#include "encoding.hpp" #include "latlng.hpp" #include "poly.hpp" #include "spherical.hpp" diff --git a/tests/encoding/decode.hpp b/tests/encoding/decode.hpp new file mode 100644 index 0000000..df6fd21 --- /dev/null +++ b/tests/encoding/decode.hpp @@ -0,0 +1,45 @@ +#include +#include +#include +#include + +#include + +using geo::LatLng; +using geo::decode; +using geo::encode; + +TEST(Encoding, decode) { + // Empty. + EXPECT_TRUE(decode("").empty()); + + // Reference example from the Encoded Polyline Algorithm Format docs. + auto path = decode("_p~iF~ps|U_ulLnnqC_mqNvxq`@"); + ASSERT_EQ(path.size(), 3U); + EXPECT_TRUE(path[0].approx_equal(LatLng(38.5, -120.2), 1e-9)); + EXPECT_TRUE(path[1].approx_equal(LatLng(40.7, -120.95), 1e-9)); + EXPECT_TRUE(path[2].approx_equal(LatLng(43.252, -126.453), 1e-9)); + + // Truncated mid-chunk: the incomplete trailing point is dropped. + auto cut_in_lat = decode("_p~iF~ps|U_ul"); + ASSERT_EQ(cut_in_lat.size(), 1U); + EXPECT_TRUE(cut_in_lat[0].approx_equal(LatLng(38.5, -120.2), 1e-9)); + + // Truncated between chunks: lat decoded but lng missing — point dropped. + auto cut_after_lat = decode("_p~iF~ps|U_ulL"); + ASSERT_EQ(cut_after_lat.size(), 1U); + EXPECT_TRUE(cut_after_lat[0].approx_equal(LatLng(38.5, -120.2), 1e-9)); +} + +TEST(Encoding, encode_decode_roundtrip) { + std::vector path = { + {0, 0}, {90, 180}, {-90, -180}, {1.00001, -1.00001}, + {59.93863, 30.31413}, {-33.86882, 151.20929}, + }; + auto decoded = decode(encode(path)); + ASSERT_EQ(decoded.size(), path.size()); + for (std::size_t i = 0; i < path.size(); ++i) { + // Quantization to 1e-5 degrees: round-trip error is at most 5e-6. + EXPECT_TRUE(decoded[i].approx_equal(path[i], 1e-5)) << decoded[i]; + } +} diff --git a/tests/encoding/encode.hpp b/tests/encoding/encode.hpp new file mode 100644 index 0000000..19a8f50 --- /dev/null +++ b/tests/encoding/encode.hpp @@ -0,0 +1,29 @@ +#include +#include +#include + +#include + +using geo::LatLng; +using geo::encode; + +TEST(Encoding, encode) { + // Empty. + EXPECT_EQ(encode(std::vector{}), ""); + + // Single zero point: zig-zag 0 encodes as '?' for both coordinates. + EXPECT_EQ(encode(std::vector{ {0, 0} }), "??"); + + // Reference example from the Encoded Polyline Algorithm Format docs. + std::vector path = { {38.5, -120.2}, {40.7, -120.95}, {43.252, -126.453} }; + EXPECT_EQ(encode(path), "_p~iF~ps|U_ulLnnqC_mqNvxq`@"); + + // Reference example for a single negative value (-179.9832104). + std::vector negative = { {0, -179.9832104} }; + EXPECT_EQ(encode(negative), "?`~oia@"); + + // Quantization: differences below 1e-5 degrees collapse to the same string. + std::vector a = { {38.5, -120.2} }; + std::vector b = { {38.500000004, -120.199999996} }; + EXPECT_EQ(encode(a), encode(b)); +} diff --git a/tests/tests.cpp b/tests/tests.cpp index b42c957..5cef75f 100644 --- a/tests/tests.cpp +++ b/tests/tests.cpp @@ -18,6 +18,9 @@ #include "poly/on_edge.hpp" #include "poly/on_path.hpp" +#include "encoding/decode.hpp" +#include "encoding/encode.hpp" + int main(int argc, char** argv) { testing::InitGoogleTest(&argc, argv);