diff --git a/AGENTS.md b/AGENTS.md new file mode 100644 index 00000000..4f351115 --- /dev/null +++ b/AGENTS.md @@ -0,0 +1,345 @@ +# sonic-cpp Agent Guide + +This file is for AI coding tools and human contributors working in this +repository. It follows the common AGENTS.md shape used by public projects: +project overview, commands, architecture, style, testing, security, and +agent-specific rules. Keep it practical: prefer facts that can be verified from +source, build files, and tests. + +If a subdirectory gets its own `AGENTS.md` later, treat that more specific file +as overriding this root guide for files under that subtree. + +## Project Shape + +`sonic-cpp` is a header-only, SIMD-accelerated C++ JSON parser/serializer. +The public API is under namespace `sonic_json` and is normally consumed through: + +- `include/sonic/sonic.h` +- `sonic_json::Document` +- `sonic_json::Node` + +The README says C++11 or above, but the active CMake and Bazel builds use +C++17. Treat build files and tests as the practical source of truth. + +Primary source directories: + +- `include/sonic/dom/`: DOM tree, document, parser, handlers, JSON pointer. +- `include/sonic/internal/`: low-level stack, SIMD helpers, arch dispatch. +- `include/sonic/internal/arch/`: x86/Arm/SVE2 implementation details. +- `include/sonic/jsonpath/`: JSONPath query and dump helpers. +- `include/sonic/experiment/`: experimental helpers such as lazy update. +- `tests/`: GoogleTest unit tests. +- `benchmark/`: benchmark binary sources. +- `fuzz/`: CMake-only fuzz target. + +Important public plumbing: + +- Errors: `include/sonic/error.h` +- Allocator: `include/sonic/allocator.h` +- Internal stack: `include/sonic/internal/stack.h` +- Write buffer: `include/sonic/writebuffer.h` +- Parse flags: `include/sonic/dom/flags.h` + +## Agent Workflow + +Before changing behavior: + +1. Read the relevant public header and its tests. +2. Search with `rg`; avoid broad filesystem scans. +3. Preserve existing API style unless the task explicitly asks for a breaking + API change. +4. Add focused tests before or with behavior changes. +5. Run the smallest relevant test first, then the broader suite if the change + touches shared parsing, DOM, allocator, SIMD, or serialization code. + +When reviewing changes: + +- Lead with correctness bugs, memory-safety risks, API compatibility problems, + and missing tests. +- Pay special attention to allocation failure paths. Silent failure is usually + not acceptable in new code. +- Do not simplify arch dispatch or SIMD code without checking both build flags + and tests. + +## Change Checklist + +Use this checklist before handing work back: + +1. The change is scoped to the requested behavior. +2. Public API compatibility has been considered and documented if affected. +3. Allocation failures either propagate `SonicError` or preserve the old object + state for legacy APIs. +4. Relevant unit tests were added or updated. +5. The smallest relevant test was run. +6. Broader tests were run when shared parser, DOM, allocator, SIMD, or + serialization behavior changed. +7. No build outputs, dependency caches, or benchmark artifacts were edited. + +## Build And Test Commands + +Quick command recap: + +| Task | Command | +| --- | --- | +| Configure CMake build | `cmake -S . -B build` | +| Build CMake unit test | `cmake --build build --target unittest -j` | +| Run CMake unit test | `./build/tests/unittest` | +| Run Bazel unit test | `bazel run :unittest --//:sonic_arch=haswell --//:sonic_dispatch=static` | +| Run full Bazel helper | `bash scripts/unittest.sh -g --arch=haswell --dispatch=static` | +| Run benchmark with Bazel | `bazel run :benchmark --compilation_mode=opt` | + +### CMake + +Common local flow: + +```bash +cmake -S . -B build +cmake --build build --target unittest -j +./build/tests/unittest +``` + +Useful CMake options: + +- `BUILD_UNITTEST=ON` by default. +- `BUILD_FUZZ=OFF` by default. +- `BUILD_BENCH=OFF` by default. +- `ENABLE_SVE2_128=OFF` by default. + +Sanitizers in CMake tests: + +- `tests/CMakeLists.txt` enables ASAN by default through `ENABLE_ASAN=ON`. +- UBSAN can be enabled with `-DENABLE_UBSAN=ON`. + +### Bazel + +Bazel uses Bzlmod. `.bazelversion` pins the expected version. + +Useful commands: + +```bash +bazel run :unittest --//:sonic_arch=haswell --//:sonic_dispatch=static +bazel run :benchmark --compilation_mode=opt +bash scripts/unittest.sh -g --arch=haswell --dispatch=static +``` + +Bazel flags: + +- `--//:sonic_arch={default|arm|sve2|westmere|haswell}` +- `--//:sonic_dispatch={static|dynamic}` +- `--//:sonic_sanitizer={no|gcc|clang}` + +Note: `scripts/unittest.sh` accepts `--arch=aarch64` and `--arch=arm64`, then +maps them to Bazel's `arm` setting. When invoking Bazel directly, use +`--//:sonic_arch=arm`. + +## Error Handling And Allocation Rules + +Code in this repository often runs in parser, serializer, allocator, and SIMD +hot paths. Allocation failure handling must be explicit enough that callers can +distinguish resource failure from valid empty/null JSON values. + +Preferred patterns: + +- Parser/handler failures should return a concrete `SonicError`, especially + `kErrorNoMem` for allocation failure. +- APIs that can fail should expose failure through the existing project style: + `ParseResult`, `SonicError`, boolean success, or allocator error state. +- Legacy public APIs may need to keep source-compatible return types. In that + case, preserve the previous object state on allocation failure whenever + practical and provide or use a checked path for callers that need diagnostics. +- Do not ignore return values from low-level buffer, stack, allocator, parser, + or handler methods that can fail. +- Check integer overflow before size arithmetic for allocations, padding, + capacity growth, and SIMD lookahead buffers. + +Key places to inspect for allocation-sensitive changes: + +- `include/sonic/allocator.h` +- `include/sonic/internal/stack.h` +- `include/sonic/writebuffer.h` +- `include/sonic/dom/parser.h` +- `include/sonic/dom/handler.h` +- `include/sonic/dom/schema_handler.h` +- `include/sonic/dom/dynamicnode.h` +- `tests/allocator_test.cpp` +- parser/DOM tests under `tests/` + +## DOM And Memory Model + +`GenericDocument` owns or references an allocator and is also the root JSON +node. Re-parsing a document discards the previous tree; any raw pointer, +iterator, or node reference from the old tree must be reacquired after parse. + +`DNode` stores arrays and objects in compact contiguous buffers. Object member +keys are part of the container's lookup invariants; avoid APIs or internal +changes that let callers mutate keys in a way that invalidates cached lookup +structures. + +String storage has two modes: + +- Non-owning string views for parsed/raw/const strings. +- Allocator-backed copies for APIs that copy strings. + +When copying or mutating DOM nodes: + +- Prefer commit-after-success updates for operations that can fail halfway. +- Keep source compatibility for existing public APIs where possible. +- Avoid `memmove`/raw byte copies for non-trivial node/member objects unless + the type is explicitly safe for that operation. + +## ParseOnDemand And JSONPath + +`ParseOnDemand` is optimized to find a target subtree without fully materializing +the document. + +Rule of thumb: + +- Default behavior should preserve the fast short-circuit path. +- Full-document validation in on-demand paths is a semantic and performance + choice; do not add it by default unless the API or caller explicitly asks for + it. +- Skipped branches still must not swallow local parse errors such as malformed + strings, invalid numbers, missing separators, or impossible object/array + syntax. + +Relevant files: + +- `include/sonic/dom/parser.h` +- `include/sonic/internal/arch/simd_skip.h` +- `include/sonic/jsonpath/*.h` +- `tests/document_test.cpp` +- `tests/jsonpath_test.cpp` +- `tests/json_tuple_test.cpp` + +## SIMD And Architecture Dispatch + +The SIMD layer has static and dynamic dispatch modes. Do not assume only AVX2 +exists even though x86 AVX2 is the primary documented target. + +Important files: + +- `include/sonic/internal/arch/sonic_cpu_feature.h` +- `include/sonic/internal/arch/simd_dispatch.h` +- `include/sonic/internal/arch/avx2/` +- `include/sonic/internal/arch/common/` +- `include/sonic/internal/arch/neon/` +- `include/sonic/internal/arch/sve2-128/` +- `include/sonic/internal/arch/x86_ifuncs/` + +When touching shared SIMD helpers, validate both parser and skip/on-demand +tests. If possible, also test `--//:sonic_dispatch=dynamic` on x86. + +## Coding Style + +- Follow root `.clang-format` (`BasedOnStyle: Google`). +- Keep public headers self-contained. +- Do not add heavyweight dependencies to the header-only library. +- Prefer simple, explicit control flow in parser and allocator code. +- Keep comments sparse and useful; explain invariants and failure handling, + not obvious assignments. +- Default to ASCII in source and docs unless a file already uses non-ASCII. + +## Testing Guidance + +Choose tests based on the touched area: + +- Allocator/stack/write buffer: `tests/allocator_test.cpp`, + `tests/writebuffer_test.cpp`, `tests/parser_oom_test.cpp`. +- DOM mutation/copy/member map: `tests/node_test.cpp`, + `tests/document_test.cpp`, `tests/parser_oom_test.cpp`. +- Full parser and lazy parser: `tests/parser_oom_test.cpp`, + `tests/document_test.cpp`. +- Parse schema: `tests/parse_schema_test.cpp`. +- Parse on demand / JSON pointer: `tests/document_test.cpp`, + `tests/json_pointer_test.cpp`. +- JSONPath / tuple extraction: `tests/jsonpath_test.cpp`, + `tests/json_tuple_test.cpp`. +- SIMD skip scanner: `tests/skip_test.cpp`. + +For broad validation, run: + +```bash +cmake --build build --target unittest -j +./build/tests/unittest +``` + +or: + +```bash +bash scripts/unittest.sh -g --arch=haswell --dispatch=static +``` + +Testing philosophy: + +- Test public behavior first; avoid overfitting tests to private helper details. +- For bug fixes, add a regression test that fails on the old behavior. +- For allocation-failure fixes, assert both the reported error and the + preserved state when preservation is part of the contract. +- For parser fixes, include malformed input around the exact branch being + changed, not only a happy-path JSON sample. +- For performance-sensitive parser/SIMD changes, keep tests deterministic and + put speed measurements in benchmarks, not unit tests. + +## Benchmarking + +Use benchmarks for changes that affect: + +- object lookup or member insertion/removal, +- parser hot loops, +- SIMD skip/string paths, +- serialization, +- allocator growth behavior. + +Commands: + +```bash +cmake -S . -B build-bench -DBUILD_BENCH=ON +cmake --build build-bench --target bench -j +./build-bench/benchmark/bench +``` + +or: + +```bash +bazel run :benchmark --compilation_mode=opt +``` + +## Common Pitfalls + +- Do not treat `operator[]` missing-member behavior as mutable storage; prefer + `FindMember`. +- Do not mutate object member keys through iterators or internal aliases unless + every affected lookup structure is rebuilt or updated. +- Do not add parse-on-demand full validation by default unless the task accepts + a performance/semantic change. +- Do not ignore trailing characters in full parse paths. +- Do not use throwing allocation in low-level parser/SAX paths when the + surrounding code expects explicit error propagation. +- Do not change public type layout casually; this is a header-only library and + downstream code may depend on source-level details. +- Do not modify generated build outputs, `build/`, `bazel-*`, or benchmark + result artifacts unless explicitly asked. + +## PR / Handoff Notes + +When summarizing work for another tool or reviewer: + +- List behavior changes first, then files touched. +- State which tests were run and which were not run. +- Call out API compatibility, memory-safety, and performance tradeoffs. +- Mention any remaining risk if only a narrow test was run. +- For large parser or SIMD changes, include the exact architecture/dispatch mode + used for validation. + +## Security Notes + +Inputs should be treated as untrusted JSON. The usage docs state that UTF-8 is +assumed and not verified by default. Always check parse results with: + +- `HasParseError()` +- `GetParseError()` +- `GetErrorOffset()` +- `ErrorMsg(...)` + +Security issues should not be disclosed through public issues. Follow +`CONTRIBUTING.md` for the reporting contact. diff --git a/include/sonic/allocator.h b/include/sonic/allocator.h index 6885502d..5f2ee6bb 100644 --- a/include/sonic/allocator.h +++ b/include/sonic/allocator.h @@ -23,9 +23,12 @@ #include #include +#include #include #include +#include #include +#include #include #include "sonic/macro.h" @@ -103,7 +106,7 @@ class SpinLock { }; #ifdef SONIC_LOCKED_ALLOCATOR -#define LOCK_GUARD std::lock_guard guard(lock_); +#define LOCK_GUARD std::lock_guard guard(shared_->lock) #else #define LOCK_GUARD #endif @@ -155,9 +158,7 @@ class AdaptiveChunkPolicy { inline size_t ChunkSize(size_t need_alloc_size) { if (min_chunk_size_ < need_alloc_size && min_chunk_size_ < SONIC_ALLOCATOR_MAX_CHUNK_CAPACITY) { - size_t p = - 1ULL << (64 - __builtin_clzll( - need_alloc_size)); // size > 0 && never shift 64 + size_t p = NextPowerOfTwoSaturated(need_alloc_size); min_chunk_size_ = p < SONIC_ALLOCATOR_MAX_CHUNK_CAPACITY ? p : SONIC_ALLOCATOR_MAX_CHUNK_CAPACITY; @@ -167,6 +168,20 @@ class AdaptiveChunkPolicy { } private: + static inline size_t NextPowerOfTwoSaturated(size_t size) { + if (size <= 1) { + return 1; + } + --size; + for (size_t shift = 1; shift < sizeof(size_t) * 8; shift <<= 1) { + size |= size >> shift; + } + if (size == std::numeric_limits::max()) { + return std::numeric_limits::max(); + } + return size + 1; + } + size_t min_chunk_size_; }; @@ -188,11 +203,12 @@ class MemoryPoolAllocator { //!< chunk serves allocation. BaseAllocator* ownBaseAllocator; //!< base allocator created by this object. - size_t refcount; + std::atomic refcount; bool ownBuffer; //!< Sticky OOM flag shared across refcounted copies. Atomic because //!< the per-instance SpinLock does not synchronize different copies. std::atomic hadOom; + SpinLock lock; }; static const size_t SIZEOF_SHARED_DATA = SONIC_ALIGN(sizeof(SharedData)); @@ -222,25 +238,21 @@ class MemoryPoolAllocator { size_t chunkSize = SONIC_ALLOCATOR_MIN_CHUNK_CAPACITY, BaseAllocator* baseAllocator = 0) : cp_(chunkSize), - baseAllocator_(baseAllocator ? baseAllocator : new BaseAllocator()), - shared_(static_cast( - baseAllocator_ ? baseAllocator_->Malloc(SIZEOF_SHARED_DATA + - SIZEOF_CHUNK_HEADER) - : 0)) { - sonic_assert(baseAllocator_ != 0); - sonic_assert(shared_ != 0); - new (&shared_->hadOom) std::atomic(false); - if (baseAllocator) { - shared_->ownBaseAllocator = 0; - } else { - shared_->ownBaseAllocator = baseAllocator_; + baseAllocator_(baseAllocator ? baseAllocator + : new (std::nothrow) BaseAllocator()), + shared_(0), + ownBaseAllocatorWhenInvalid_(baseAllocator == 0) { + if (!baseAllocator_) { + ownBaseAllocatorWhenInvalid_ = false; + return; } - shared_->chunkHead = GetChunkHead(shared_); - shared_->chunkHead->capacity = 0; - shared_->chunkHead->size = 0; - shared_->chunkHead->next = 0; - shared_->ownBuffer = true; - shared_->refcount = 1; + shared_ = static_cast( + baseAllocator_->Malloc(SIZEOF_SHARED_DATA + SIZEOF_CHUNK_HEADER)); + if (!shared_) { + return; + } + InitializeShared(baseAllocator ? 0 : baseAllocator_, true, 0); + ownBaseAllocatorWhenInvalid_ = false; } //! Constructor with user-supplied buffer. @@ -259,75 +271,126 @@ class MemoryPoolAllocator { size_t chunkSize = SONIC_ALLOCATOR_MIN_CHUNK_CAPACITY, BaseAllocator* baseAllocator = 0) : cp_(chunkSize), - baseAllocator_(baseAllocator ? baseAllocator : new BaseAllocator()), - shared_(static_cast(AlignBuffer(buffer, size))) { - sonic_assert(size >= SIZEOF_SHARED_DATA + SIZEOF_CHUNK_HEADER); - new (&shared_->hadOom) std::atomic(false); - shared_->chunkHead = GetChunkHead(shared_); + baseAllocator_(baseAllocator ? baseAllocator + : new (std::nothrow) BaseAllocator()), + shared_(nullptr), + ownBaseAllocatorWhenInvalid_(baseAllocator == 0) { + if (sonic_unlikely(buffer == nullptr)) { + return; + } + shared_ = static_cast(AlignBuffer(buffer, size)); + if (sonic_unlikely(shared_ == nullptr || + size < SIZEOF_SHARED_DATA + SIZEOF_CHUNK_HEADER)) { + shared_ = nullptr; + return; + } + InitializeShared(baseAllocator ? 0 : baseAllocator_, false, + size - SIZEOF_SHARED_DATA - SIZEOF_CHUNK_HEADER); shared_->chunkHead->capacity = size - SIZEOF_SHARED_DATA - SIZEOF_CHUNK_HEADER; - shared_->chunkHead->size = 0; - shared_->chunkHead->next = 0; - shared_->ownBaseAllocator = baseAllocator ? 0 : baseAllocator_; - shared_->ownBuffer = false; - shared_->refcount = 1; + ownBaseAllocatorWhenInvalid_ = false; } MemoryPoolAllocator(const MemoryPoolAllocator& rhs) noexcept - : cp_(rhs.cp_), baseAllocator_(rhs.baseAllocator_), shared_(rhs.shared_) { - sonic_assert(shared_->refcount > 0); - ++shared_->refcount; + : cp_(rhs.cp_), + baseAllocator_(rhs.shared_ ? rhs.baseAllocator_ : 0), + shared_(rhs.shared_), + ownBaseAllocatorWhenInvalid_(false) { + if (shared_) { + sonic_assert(shared_->refcount.load(std::memory_order_acquire) > 0); + shared_->refcount.fetch_add(1, std::memory_order_acq_rel); + } } MemoryPoolAllocator& operator=(const MemoryPoolAllocator& rhs) noexcept { - sonic_assert(rhs.shared_->refcount > 0); - ++rhs.shared_->refcount; - this->~MemoryPoolAllocator(); - baseAllocator_ = rhs.baseAllocator_; + if (this == &rhs) { + return *this; + } + if (rhs.shared_) { + sonic_assert(rhs.shared_->refcount.load(std::memory_order_acquire) > 0); + rhs.shared_->refcount.fetch_add(1, std::memory_order_acq_rel); + } + Release(); + baseAllocator_ = rhs.shared_ ? rhs.baseAllocator_ : 0; cp_ = rhs.cp_; shared_ = rhs.shared_; + ownBaseAllocatorWhenInvalid_ = false; return *this; } MemoryPoolAllocator(MemoryPoolAllocator&& rhs) noexcept - : cp_(rhs.cp_), baseAllocator_(rhs.baseAllocator_), shared_(rhs.shared_) { - sonic_assert(rhs.shared_->refcount > 0); + : cp_(rhs.cp_), + baseAllocator_(rhs.baseAllocator_), + shared_(rhs.shared_), + ownBaseAllocatorWhenInvalid_(rhs.ownBaseAllocatorWhenInvalid_) { + sonic_assert(!rhs.shared_ || + rhs.shared_->refcount.load(std::memory_order_acquire) > 0); rhs.shared_ = 0; + rhs.baseAllocator_ = 0; + rhs.ownBaseAllocatorWhenInvalid_ = false; } MemoryPoolAllocator& operator=(MemoryPoolAllocator&& rhs) noexcept { - sonic_assert(rhs.shared_->refcount > 0); - this->~MemoryPoolAllocator(); + if (this == &rhs) { + return *this; + } + sonic_assert(!rhs.shared_ || + rhs.shared_->refcount.load(std::memory_order_acquire) > 0); + Release(); baseAllocator_ = rhs.baseAllocator_; cp_ = rhs.cp_; shared_ = rhs.shared_; + ownBaseAllocatorWhenInvalid_ = rhs.ownBaseAllocatorWhenInvalid_; rhs.shared_ = 0; + rhs.baseAllocator_ = 0; + rhs.ownBaseAllocatorWhenInvalid_ = false; return *this; } //! Destructor. /*! This deallocates all memory chunks, excluding the user-supplied buffer. */ - ~MemoryPoolAllocator() noexcept { + ~MemoryPoolAllocator() noexcept { Release(); } + + private: + void Release() noexcept { if (!shared_) { - // do nothing if moved + if (ownBaseAllocatorWhenInvalid_) { + delete baseAllocator_; + } + baseAllocator_ = nullptr; + ownBaseAllocatorWhenInvalid_ = false; return; } - if (shared_->refcount > 1) { - --shared_->refcount; + if (shared_->refcount.load(std::memory_order_acquire) > 1 && + shared_->refcount.fetch_sub(1, std::memory_order_acq_rel) > 1) { + shared_ = nullptr; + baseAllocator_ = nullptr; + ownBaseAllocatorWhenInvalid_ = false; return; } Clear(); BaseAllocator* a = shared_->ownBaseAllocator; using AtomicBool = std::atomic; + using AtomicSize = std::atomic; + shared_->lock.~SpinLock(); shared_->hadOom.~AtomicBool(); + shared_->refcount.~AtomicSize(); if (shared_->ownBuffer) { baseAllocator_->Free(shared_); } delete a; + shared_ = nullptr; + baseAllocator_ = nullptr; + ownBaseAllocatorWhenInvalid_ = false; } + public: //! Deallocates all memory chunks, excluding the first/user one. void Clear() noexcept { - sonic_assert(shared_->refcount > 0); + if (!shared_) { + return; + } + sonic_assert(shared_->refcount.load(std::memory_order_acquire) > 0); + LOCK_GUARD; for (;;) { ChunkHeader* c = shared_->chunkHead; if (!c->next) { @@ -343,7 +406,11 @@ class MemoryPoolAllocator { /*! \return total capacity in bytes. */ size_t Capacity() const noexcept { - sonic_assert(shared_->refcount > 0); + if (!shared_) { + return 0; + } + sonic_assert(shared_->refcount.load(std::memory_order_acquire) > 0); + LOCK_GUARD; size_t capacity = 0; for (ChunkHeader* c = shared_->chunkHead; c != 0; c = c->next) capacity += c->capacity; @@ -354,7 +421,11 @@ class MemoryPoolAllocator { /*! \return total used bytes. */ size_t Size() const noexcept { - sonic_assert(shared_->refcount > 0); + if (!shared_) { + return 0; + } + sonic_assert(shared_->refcount.load(std::memory_order_acquire) > 0); + LOCK_GUARD; size_t size = 0; for (ChunkHeader* c = shared_->chunkHead; c != 0; c = c->next) size += c->size; @@ -365,21 +436,30 @@ class MemoryPoolAllocator { /*! \return true or false. */ bool Shared() const noexcept { - sonic_assert(shared_->refcount > 0); - return shared_->refcount > 1; + if (!shared_) { + return false; + } + sonic_assert(shared_->refcount.load(std::memory_order_acquire) > 0); + return shared_->refcount.load(std::memory_order_acquire) > 1; } //! Allocates a memory block. (concept Allocator) void* Malloc(size_t size) { - sonic_assert(shared_->refcount > 0); + if (!shared_) { + return NULL; + } + sonic_assert(shared_->refcount.load(std::memory_order_acquire) > 0); if (!size) return NULL; - size = SONIC_ALIGN(size); + if (sonic_unlikely(!AlignSize(size, &size))) { + SetOom(); + return NULL; + } LOCK_GUARD; - if (sonic_unlikely(shared_->chunkHead->size + size > - shared_->chunkHead->capacity)) { + if (sonic_unlikely(size > shared_->chunkHead->capacity - + shared_->chunkHead->size)) { if (!AddChunk(cp_.ChunkSize(size))) { - shared_->hadOom.store(true, std::memory_order_release); + SetOom(); return NULL; } } @@ -393,11 +473,17 @@ class MemoryPoolAllocator { void* Realloc(void* originalPtr, size_t originalSize, size_t newSize) { if (originalPtr == 0) return Malloc(newSize); - sonic_assert(shared_->refcount > 0); + if (!shared_) { + return nullptr; + } + sonic_assert(shared_->refcount.load(std::memory_order_acquire) > 0); if (newSize == 0) return nullptr; - originalSize = SONIC_ALIGN(originalSize); - newSize = SONIC_ALIGN(newSize); + if (sonic_unlikely(!AlignSize(originalSize, &originalSize) || + !AlignSize(newSize, &newSize))) { + SetOom(); + return nullptr; + } // Do not shrink if new size is smaller than original if (originalSize >= newSize) return originalPtr; @@ -409,8 +495,8 @@ class MemoryPoolAllocator { if (originalPtr == GetChunkBuffer(shared_) + shared_->chunkHead->size - originalSize) { size_t increment = static_cast(newSize - originalSize); - if (shared_->chunkHead->size + increment <= - shared_->chunkHead->capacity) { + if (increment <= + shared_->chunkHead->capacity - shared_->chunkHead->size) { shared_->chunkHead->size += increment; return originalPtr; } @@ -424,27 +510,37 @@ class MemoryPoolAllocator { } // Mark OOM even on the Malloc-copy fallback so the flag is set // regardless of which internal path actually failed. - shared_->hadOom.store(true, std::memory_order_release); + SetOom(); return nullptr; } // Lets callers distinguish an OOM from a logical null (e.g. Malloc(0)). bool HadOom() const { - sonic_assert(shared_->refcount > 0); + if (!shared_) { + return true; + } + sonic_assert(shared_->refcount.load(std::memory_order_acquire) > 0); return shared_->hadOom.load(std::memory_order_acquire); } void ClearOom() { - sonic_assert(shared_->refcount > 0); + if (!shared_) { + return; + } + sonic_assert(shared_->refcount.load(std::memory_order_acquire) > 0); shared_->hadOom.store(false, std::memory_order_release); } + void MarkOom() { SetOom(); } + //! Frees a memory block (concept Allocator) static void Free(void* ptr) noexcept { (void)ptr; } // Do nothing // ! Compare (equality) with another MemoryPoolAllocator bool operator==(const MemoryPoolAllocator& rhs) const noexcept { - sonic_assert(shared_->refcount > 0); - sonic_assert(rhs.shared_->refcount > 0); + sonic_assert(!shared_ || + shared_->refcount.load(std::memory_order_acquire) > 0); + sonic_assert(!rhs.shared_ || + rhs.shared_->refcount.load(std::memory_order_acquire) > 0); return shared_ == rhs.shared_; } // ! Compare (inequality) with another MemoryPoolAllocator @@ -458,8 +554,20 @@ class MemoryPoolAllocator { \return true if success. */ bool AddChunk(size_t capacity) { + if (!shared_) { + return false; + } + if (capacity > std::numeric_limits::max() - SIZEOF_CHUNK_HEADER) { + SetOom(); + return false; + } if (!baseAllocator_) { - shared_->ownBaseAllocator = baseAllocator_ = new BaseAllocator(); + baseAllocator_ = new (std::nothrow) BaseAllocator(); + if (!baseAllocator_) { + SetOom(); + return false; + } + shared_->ownBaseAllocator = baseAllocator_; } if (ChunkHeader* chunk = static_cast( baseAllocator_->Malloc(SIZEOF_CHUNK_HEADER + capacity))) { @@ -469,18 +577,50 @@ class MemoryPoolAllocator { shared_->chunkHead = chunk; return true; } + SetOom(); return false; } + void InitializeShared(BaseAllocator* ownBaseAllocator, bool ownBuffer, + size_t capacity) { + new (&shared_->hadOom) std::atomic(false); + new (&shared_->refcount) std::atomic(1); + new (&shared_->lock) SpinLock(); + shared_->ownBaseAllocator = ownBaseAllocator; + shared_->chunkHead = GetChunkHead(shared_); + shared_->chunkHead->capacity = capacity; + shared_->chunkHead->size = 0; + shared_->chunkHead->next = 0; + shared_->ownBuffer = ownBuffer; + } + + static inline bool AlignSize(size_t size, size_t* aligned) { + if (size > std::numeric_limits::max() - 7) { + return false; + } + *aligned = SONIC_ALIGN(size); + return true; + } + + void SetOom() { + if (shared_) { + shared_->hadOom.store(true, std::memory_order_release); + } + } + static inline void* AlignBuffer(void* buf, size_t& size) { sonic_assert(buf != 0); const uintptr_t mask = sizeof(void*) - 1; const uintptr_t ubuf = reinterpret_cast(buf); if (sonic_unlikely(ubuf & mask)) { const uintptr_t abuf = (ubuf + mask) & ~mask; - sonic_assert(size >= abuf - ubuf); + const size_t delta = static_cast(abuf - ubuf); + if (sonic_unlikely(delta > size)) { + size = 0; + return nullptr; + } buf = reinterpret_cast(abuf); - size -= abuf - ubuf; + size -= delta; } return buf; } @@ -491,7 +631,7 @@ class MemoryPoolAllocator { BaseAllocator* baseAllocator_; //!< base allocator for allocating memory chunks. SharedData* shared_; //!< The shared data of the allocator - SpinLock lock_; + bool ownBaseAllocatorWhenInvalid_; }; template @@ -510,6 +650,9 @@ class MapAllocator { pointer allocate(size_type n, const void* = nullptr) { if (alloc_ == nullptr || n == 0) return nullptr; + if (n > std::numeric_limits::max() / sizeof(T)) { + return nullptr; + } return static_cast(alloc_->Malloc(n * sizeof(T))); } diff --git a/include/sonic/dom/dynamicnode.h b/include/sonic/dom/dynamicnode.h index 26786567..0fbb11e9 100644 --- a/include/sonic/dom/dynamicnode.h +++ b/include/sonic/dom/dynamicnode.h @@ -16,7 +16,12 @@ #pragma once -#include +#include +#include +#include +#include +#include +#include #include #include @@ -32,9 +37,9 @@ namespace sonic_json { -// OOM invariant: mutating operations (Reserve, AddMember, PushBack, ...) -// leave the node unchanged on allocation failure rather than propagating an -// error. Callers that need to detect OOM should use Allocator::HadOom(). +// Legacy mutating operations (Reserve, AddMember, PushBack, ...) keep their +// source-compatible return types and leave the node unchanged on allocation +// failure. Prefer Try* APIs when callers need full-chain status propagation. template class DNode : public GenericNode> { public: @@ -50,6 +55,8 @@ class DNode : public GenericNode> { friend class SAXHandler; friend class LazySAXHandler; friend class SchemaHandler; + template + friend class GenericDocument; friend BaseNode; template @@ -78,88 +85,7 @@ class DNode : public GenericNode> { DNode(const DNode& rhs, Allocator& alloc, bool copyString = false) : BaseNode() { - using rhsNodeType = DNode; - switch (rhs.getBasicType()) { - case kObject: { - size_t count = rhs.Size(); - this->o.len = rhs.getTypeAndLen(); // Copy size and type. - if (count > 0) { - void* mem = containerMalloc(count, alloc); - if (sonic_unlikely(mem == nullptr)) { - this->setLength(0, kObject); - setChildren(nullptr); - break; - } - rhsNodeType* rn = rhs.getObjChildrenFirst(); - DNode* ln = (DNode*)((char*)mem + sizeof(MetaNode)); - for (size_t i = 0; i < count * 2; i += 2) { - new (ln + i) DNode(*(rn + i), alloc, copyString); - new (ln + i + 1) DNode(*(rn + i + 1), alloc, copyString); - } - setChildren(mem); - } else { - setChildren(nullptr); - } - break; - } - case kArray: { - size_t a_size = rhs.Size(); - this->a.len = rhs.getTypeAndLen(); // Copy size and type. - if (a_size > 0) { - void* mem = containerMalloc(a_size, alloc); - if (sonic_unlikely(mem == nullptr)) { - this->setLength(0, kArray); - setChildren(nullptr); - break; - } - rhsNodeType* rn = rhs.getArrChildrenFirst(); - DNode* ln = (DNode*)((char*)mem + sizeof(MetaNode)); - for (size_t i = 0; i < a_size; ++i) { - new (ln + i) DNode(*(rn + i), alloc, copyString); - } - setChildren(mem); - setCapacity(a_size); - } else { - setChildren(nullptr); - } - break; - } - case kString: { - this->sv.len = rhs.getTypeAndLen(); // Copy size and type. - if (rhs.GetType() != kStringConst || copyString) { - this->StringCopy(rhs.GetStringView().data(), rhs.Size(), alloc); - } else { - this->sv.p = rhs.GetStringView().data(); - } - break; - } - case kNumber: { - if (rhs.GetType() != kNumStr) { - std::memcpy(&(this->data), &rhs, sizeof(this->data)); - break; - } - [[fallthrough]]; - } - case kRaw: { - size_t len = rhs.Size(); - char* p = static_cast(alloc.Malloc(len + 1)); - if (p) { - // Mark buffer as owned so destroy() will free it for kNeedFree alloc. - this->sv.len = rhs.getTypeAndLen() | kOwnedStringMask; - this->sv.p = p; - std::memcpy(const_cast(this->sv.p), rhs.GetStringView().data(), - len); - const_cast(this->sv.p)[len] = '\0'; - } else { - this->sv.p = ""; - this->setLength(0, rhs.GetType()); - } - break; - } - default: - std::memcpy(&(this->data), &rhs, sizeof(this->data)); - break; - } + (void)initCopyFrom(rhs, alloc, copyString); } /** @@ -325,10 +251,12 @@ class DNode : public GenericNode> { if (getMapUnsafe()) return true; map_type* map = static_cast(alloc.Malloc(sizeof(map_type))); if (nullptr == map) return false; - new (map) map_type(MAType(&alloc)); + new (map) map_type(&alloc); MemberNode* m = (MemberNode*)getObjChildrenFirstUnsafe(); - for (size_t i = 0; i < this->Size(); ++i) { - map->emplace(std::make_pair((m + i)->name.GetStringView(), i)); + if (!map->BuildFromMembers(m, this->Size())) { + map->~map_type(); + alloc.Free(map); + return false; } setMap(map); return true; @@ -372,19 +300,167 @@ class DNode : public GenericNode> { template DNode& CopyFrom(const DNode& rhs, Allocator& alloc, bool copyString = false) { - this->destroy(); - new (this) DNode(rhs, alloc, copyString); + (void)TryCopyFrom(rhs, alloc, copyString); return *this; } + template + bool TryCopyFrom(const DNode& rhs, Allocator& alloc, + bool copyString = false) { + if (sonic_unlikely(reinterpret_cast(this) == + reinterpret_cast(&rhs))) { + return true; + } + DNode temp; + if (sonic_unlikely(!temp.initCopyFrom(rhs, alloc, copyString))) { + return false; + } + this->destroy(); + rawAssign(temp); + return true; + } + /** * @brief move another node to this. * @param rhs source node */ private: + static void destroyMembers(MemberNode* members, size_t count) { + for (size_t i = 0; i < count; ++i) { + members[i].~MemberNode(); + } + } + + static void destroyElements(DNode* elements, size_t count) { + for (size_t i = 0; i < count; ++i) { + elements[i].~DNode(); + } + } + + template + bool initCopyFrom(const DNode& rhs, Allocator& alloc, + bool copyString) { + switch (rhs.getBasicType()) { + case kObject: { + size_t count = rhs.Size(); + this->o.len = rhs.getTypeAndLen(); + if (count == 0) { + setChildren(nullptr); + return true; + } + void* mem = containerMalloc(count, alloc); + if (sonic_unlikely(mem == nullptr)) { + this->setLength(0, kObject); + setChildren(nullptr); + return false; + } + MemberNode* members = reinterpret_cast( + static_cast(mem) + sizeof(MetaNode)); + auto rhs_member = rhs.MemberBegin(); + size_t constructed = 0; + for (size_t i = 0; i < count; ++i, ++rhs_member) { + DNode copied_name; + if (sonic_unlikely(!copied_name.initCopyFrom(rhs_member->name, alloc, + copyString))) { + destroyMembers(members, constructed); + Allocator::Free(mem); + this->setLength(0, kObject); + setChildren(nullptr); + return false; + } + DNode copied_value; + if (sonic_unlikely(!copied_value.initCopyFrom(rhs_member->value, + alloc, copyString))) { + destroyMembers(members, constructed); + Allocator::Free(mem); + this->setLength(0, kObject); + setChildren(nullptr); + return false; + } + new (&members[i]) + MemberNode(std::move(copied_name), std::move(copied_value)); + ++constructed; + } + setChildren(mem); + return true; + } + case kArray: { + size_t count = rhs.Size(); + this->a.len = rhs.getTypeAndLen(); + if (count == 0) { + setChildren(nullptr); + return true; + } + void* mem = containerMalloc(count, alloc); + if (sonic_unlikely(mem == nullptr)) { + this->setLength(0, kArray); + setChildren(nullptr); + return false; + } + DNode* elements = reinterpret_cast(static_cast(mem) + + sizeof(MetaNode)); + auto rhs_it = rhs.Begin(); + size_t constructed = 0; + for (size_t i = 0; i < count; ++i, ++rhs_it) { + new (&elements[i]) DNode(); + if (sonic_unlikely( + !elements[i].initCopyFrom(*rhs_it, alloc, copyString))) { + elements[i].~DNode(); + destroyElements(elements, constructed); + Allocator::Free(mem); + this->setLength(0, kArray); + setChildren(nullptr); + return false; + } + ++constructed; + } + setChildren(mem); + setCapacity(count); + return true; + } + case kString: { + this->sv.len = rhs.getTypeAndLen(); + if (rhs.GetType() != kStringConst || copyString) { + return this->StringCopy(rhs.GetStringView().data(), rhs.Size(), + alloc); + } + this->sv.p = rhs.GetStringView().data(); + return true; + } + case kNumber: { + if (rhs.GetType() != kNumStr) { + std::memcpy(&(this->data), &rhs, sizeof(this->data)); + return true; + } + [[fallthrough]]; + } + case kRaw: { + size_t len = rhs.Size(); + if (sonic_unlikely(len == std::numeric_limits::max())) { + this->sv.p = ""; + this->setLength(0, rhs.GetType()); + return false; + } + char* p = static_cast(alloc.Malloc(len + 1)); + if (sonic_unlikely(p == nullptr)) { + this->sv.p = ""; + this->setLength(0, rhs.GetType()); + return false; + } + this->sv.len = rhs.getTypeAndLen() | kOwnedStringMask; + this->sv.p = p; + std::memcpy(p, rhs.GetStringView().data(), len); + p[len] = '\0'; + return true; + } + default: + std::memcpy(&(this->data), &rhs, sizeof(this->data)); + return true; + } + } + using MSType = StringView; - using MAType = MapAllocator, Allocator>; #if defined(SONIC_STATIC_DISPATCH) struct Less { bool operator()(MSType s1, MSType s2) const { @@ -394,11 +470,190 @@ class DNode : public GenericNode> { return cmp < 0 || (cmp == 0 && n1 < n2); } }; - using map_type = std::multimap; #else - using map_type = std::multimap, MAType>; + using Less = std::less; #endif + struct map_type { + struct Entry { + MSType key; + size_t index; + }; + + explicit map_type(Allocator* alloc) : alloc_{alloc} {} + ~map_type() { Allocator::Free(entries_); } + + bool Reserve(size_t new_cap) { + if (new_cap <= cap_) return true; + if (sonic_unlikely(new_cap > + std::numeric_limits::max() / sizeof(Entry))) { + MarkOom(alloc_); + return false; + } + void* mem = alloc_->Realloc(entries_, cap_ * sizeof(Entry), + new_cap * sizeof(Entry)); + if (sonic_unlikely(mem == nullptr)) return false; + entries_ = static_cast(mem); + cap_ = new_cap; + return true; + } + + bool Insert(MSType key, size_t index) { + if (sonic_unlikely(!ReserveForInsert())) return false; + InsertNoGrow(key, index); + return true; + } + + bool ReserveForInsert() { + if (size_ < cap_) return true; + if (cap_ == 0) return Reserve(16); + size_t inc = cap_ / 2 + (cap_ & 1); + if (sonic_unlikely(cap_ > std::numeric_limits::max() - inc)) { + MarkOom(alloc_); + return false; + } + return Reserve(cap_ + inc); + } + + void InsertAfterReserve(MSType key, size_t index) { + sonic_assert(size_ < cap_); + InsertNoGrow(key, index); + } + + bool BuildFromMembers(MemberNode* members, size_t count) { + if (sonic_unlikely(!Reserve(count))) return false; + size_ = 0; + if (count == 0) return true; + for (size_t i = 0; i < count; ++i) { + entries_[i] = Entry{(members + i)->name.GetStringView(), i}; + } + size_ = count; + std::sort(entries_, entries_ + size_, + [this](const Entry& lhs, const Entry& rhs) { + if (KeyLess(lhs.key, rhs.key)) return true; + if (KeyLess(rhs.key, lhs.key)) return false; + return lhs.index < rhs.index; + }); + return true; + } + + Entry* Find(MSType key) const { + size_t pos = LowerBound(key); + if (pos < size_ && KeyEqual(entries_[pos].key, key)) { + return entries_ + pos; + } + return nullptr; + } + + void Erase(Entry* entry) { + size_t pos = static_cast(entry - entries_); + sonic_assert(pos < size_); + if (pos + 1 < size_) { + std::memmove(entries_ + pos, entries_ + pos + 1, + (size_ - pos - 1) * sizeof(Entry)); + } + --size_; + } + + bool ReplaceIndex(size_t old_index, MSType key, size_t new_index) { + Entry* entry = FindByIndex(old_index); + if (sonic_unlikely(entry == nullptr)) return false; + Erase(entry); + sonic_assert(size_ < cap_); + InsertNoGrow(key, new_index); + return true; + } + + private: + bool KeyLess(MSType lhs, MSType rhs) const { return Less{}(lhs, rhs); } + bool KeyEqual(MSType lhs, MSType rhs) const { + return !KeyLess(lhs, rhs) && !KeyLess(rhs, lhs); + } + + size_t LowerBound(MSType key) const { + size_t first = 0; + size_t count = size_; + while (count > 0) { + size_t step = count / 2; + size_t mid = first + step; + if (KeyLess(entries_[mid].key, key)) { + first = mid + 1; + count -= step + 1; + } else { + count = step; + } + } + return first; + } + + size_t UpperBound(MSType key) const { + size_t first = 0; + size_t count = size_; + while (count > 0) { + size_t step = count / 2; + size_t mid = first + step; + if (!KeyLess(key, entries_[mid].key)) { + first = mid + 1; + count -= step + 1; + } else { + count = step; + } + } + return first; + } + + Entry* FindByIndex(size_t index) const { + for (size_t i = 0; i < size_; ++i) { + if (entries_[i].index == index) return entries_ + i; + } + return nullptr; + } + + void InsertNoGrow(MSType key, size_t index) { + size_t pos = LowerBound(Entry{key, index}); + if (pos < size_) { + std::memmove(entries_ + pos + 1, entries_ + pos, + (size_ - pos) * sizeof(Entry)); + } + entries_[pos] = Entry{key, index}; + ++size_; + } + + bool EntryLess(const Entry& lhs, const Entry& rhs) const { + if (KeyLess(lhs.key, rhs.key)) return true; + if (KeyLess(rhs.key, lhs.key)) return false; + return lhs.index < rhs.index; + } + + size_t LowerBound(const Entry& entry) const { + size_t first = 0; + size_t count = size_; + while (count > 0) { + size_t step = count / 2; + size_t mid = first + step; + if (EntryLess(entries_[mid], entry)) { + first = mid + 1; + count -= step + 1; + } else { + count = step; + } + } + return first; + } + + template + static auto MarkOom(A* alloc, int = 0) + -> decltype(alloc->MarkOom(), void()) { + if (alloc) alloc->MarkOom(); + } + static void MarkOom(...) {} + + Allocator* alloc_{nullptr}; + Entry* entries_{nullptr}; + size_t size_{0}; + size_t cap_{0}; + }; + struct MetaNode { size_t cap; map_type* map; @@ -417,72 +672,92 @@ class DNode : public GenericNode> { // Set APIs DNode& setNullImpl() { this->destroy(); - new (this) BaseNode(kNull); + this->setType(kNull); return *this; } DNode& setBoolImpl(bool b) { this->destroy(); - new (this) BaseNode(b); + this->setType(b ? kTrue : kFalse); return *this; } DNode& setObjectImpl() { this->destroy(); - new (this) BaseNode(kObject); + this->setType(kObject); setChildren(nullptr); return *this; } DNode& setArrayImpl() { this->destroy(); - new (this) BaseNode(kArray); + this->setType(kArray); setChildren(nullptr); return *this; } DNode& setIntImpl(int i) { this->destroy(); - new (this) BaseNode(i); + this->setType(i >= 0 ? kUint : kSint); + this->n.i64 = i; return *this; } DNode& setUintImpl(unsigned int i) { this->destroy(); - new (this) BaseNode(i); + this->setType(kUint); + this->n.u64 = i; return *this; } DNode& setInt64Impl(int64_t i) { this->destroy(); - new (this) BaseNode(i); + this->setType(i >= 0 ? kUint : kSint); + this->n.i64 = i; return *this; } DNode& setUint64Impl(uint64_t i) { this->destroy(); - new (this) BaseNode(i); + this->setType(kUint); + this->n.u64 = i; return *this; } DNode& setDoubleImpl(double d) { this->destroy(); - new (this) BaseNode(d); + this->setType(kReal); + this->n.f64 = d; return *this; } DNode& setStringImpl(const char* s, size_t len) { this->destroy(); - new (this) BaseNode(s, len); + if (sonic_likely(this->setLengthChecked(len, kStringConst))) { + this->sv.p = s; + } else { + this->sv.p = ""; + } return *this; } DNode& setStringImpl(const char* s, size_t len, Allocator& alloc) { - this->destroy(); - new (this) BaseNode(s, len, alloc); + (void)trySetStringImpl(s, len, alloc); return *this; } + bool trySetStringImpl(const char* s, size_t len, Allocator& alloc) { + if (sonic_unlikely(len > BaseNode::kMaxStoredLength)) return false; + char* p = static_cast(alloc.Malloc(len + 1)); + if (sonic_unlikely(p == nullptr)) return false; + std::memcpy(p, s, len); + p[len] = '\0'; + this->destroy(); + this->sv.p = p; + this->setLength(len, kStringFree); + return true; + } + DNode& setRawImpl(StringView s) { return setRawLikeImpl(s, kRaw); } DNode& setRawImpl(StringView s, Allocator& alloc) { @@ -497,29 +772,37 @@ class DNode : public GenericNode> { return setRawLikeImpl(s, kNumStr, alloc); } + bool trySetStringNumberImpl(StringView s, Allocator& alloc) { + return trySetRawLikeImpl(s, kNumStr, alloc); + } + DNode& setRawLikeImpl(StringView s, TypeFlag typ) { this->destroy(); this->raw.p = s.data(); - this->setLength(s.size(), typ); + if (sonic_unlikely(!this->setLengthChecked(s.size(), typ))) { + this->raw.p = ""; + } return *this; } DNode& setRawLikeImpl(StringView s, TypeFlag typ, Allocator& alloc) { - this->destroy(); + (void)trySetRawLikeImpl(s, typ, alloc); + return *this; + } + + bool trySetRawLikeImpl(StringView s, TypeFlag typ, Allocator& alloc) { + if (sonic_unlikely(s.size() > BaseNode::kMaxStoredLength)) return false; size_t len = s.size(); char* p = static_cast(alloc.Malloc(len + 1)); - if (p) { - std::memcpy(p, s.data(), len); - p[len] = '\0'; - this->raw.p = p; - // Mark buffer as owned so destroy() will free it for kNeedFree alloc. - this->setLength(len, static_cast(static_cast(typ) | - kOwnedStringMask)); - } else { - this->raw.p = ""; - this->setLength(0, typ); - } - return *this; + if (sonic_unlikely(p == nullptr)) return false; + std::memcpy(p, s.data(), len); + p[len] = '\0'; + this->destroy(); + this->raw.p = p; + // Mark buffer as owned so destroy() will free it for kNeedFree alloc. + this->setLength(len, static_cast(static_cast(typ) | + kOwnedStringMask)); + return true; } DNode& popBackImpl() { @@ -530,8 +813,8 @@ class DNode : public GenericNode> { DNode& reserveImpl(size_t new_cap, Allocator& alloc) { if (new_cap > this->Capacity()) { - void* mem = - containerRealloc(children(), this->Capacity(), new_cap, alloc); + void* mem = containerRealloc(children(), this->Capacity(), new_cap, + this->Size(), alloc); if (sonic_likely(mem != nullptr)) setChildren(mem); } return *this; @@ -576,15 +859,18 @@ class DNode : public GenericNode> { if (!self->IsObject() && !self->IsArray()) { return true; } - using CurPtr = std::conditional_t< - std::is_const>::value, const DNode*, - DNode*>; - CurPtr n = reinterpret_cast(self->getChildrenFirstUnsafe()) + - (self->IsObject() ? 1 : 0); - size_t step = self->IsObject() ? 2 : 1; - for (size_t i = 0; i < self->Size(); ++i) { - CurPtr cur = (n + i * step); - atJsonPathImplCommon(cur, path, index + 1, res); + if (self->IsObject()) { + auto it = self->MemberBegin(); + for (size_t i = 0; i < self->Size(); ++i, ++it) { + auto* cur = reinterpret_cast(&it->value); + atJsonPathImplCommon(cur, path, index + 1, res); + } + } else { + auto it = self->Begin(); + for (size_t i = 0; i < self->Size(); ++i, ++it) { + auto* cur = reinterpret_cast(&*it); + atJsonPathImplCommon(cur, path, index + 1, res); + } } return true; } @@ -628,8 +914,8 @@ class DNode : public GenericNode> { if (new_cap > this->Capacity()) { void* old_ptr = children(); size_t old_cap = this->Capacity(); - void* mem = - containerRealloc(old_ptr, old_cap, new_cap, alloc); + void* mem = containerRealloc(old_ptr, old_cap, new_cap, + this->Size(), alloc); if (sonic_likely(mem != nullptr)) { setChildren(mem); if (old_cap == 0) { @@ -668,6 +954,12 @@ class DNode : public GenericNode> { template sonic_force_inline void* containerMalloc(size_t cap, Allocator& alloc) { + if (sonic_unlikely(cap > + (std::numeric_limits::max() - sizeof(MetaNode)) / + sizeof(T))) { + markAllocatorOom(alloc); + return nullptr; + } size_t alloc_size = cap * sizeof(T) + sizeof(MetaNode); void* mem = alloc.Malloc(alloc_size); if (sonic_likely(mem != nullptr)) { @@ -678,16 +970,52 @@ class DNode : public GenericNode> { template sonic_force_inline void* containerRealloc(void* old_ptr, size_t old_cap, - size_t new_cap, Allocator& alloc) { - size_t old_size = old_cap * sizeof(T) + sizeof(MetaNode); + size_t new_cap, size_t count, + Allocator& alloc) { + if (sonic_unlikely( + old_cap > (std::numeric_limits::max() - sizeof(MetaNode)) / + sizeof(T) || + new_cap > (std::numeric_limits::max() - sizeof(MetaNode)) / + sizeof(T))) { + markAllocatorOom(alloc); + return nullptr; + } size_t new_size = new_cap * sizeof(T) + sizeof(MetaNode); - void* mem = alloc.Realloc(old_ptr, old_size, new_size); - if (sonic_likely(mem != nullptr)) { - static_cast(mem)->SetMetaCap(new_cap); + void* mem = alloc.Malloc(new_size); + if (sonic_unlikely(mem == nullptr)) return nullptr; + auto* new_meta = static_cast(mem); + new (new_meta) MetaNode(new_cap); + if (old_ptr != nullptr) { + auto* old_meta = static_cast(old_ptr); + new_meta->map = old_meta->map; + old_meta->map = nullptr; + relocateContainer( + reinterpret_cast(reinterpret_cast(mem) + sizeof(MetaNode)), + reinterpret_cast(reinterpret_cast(old_ptr) + + sizeof(MetaNode)), + count); + old_meta->~MetaNode(); + Allocator::Free(old_ptr); } return mem; } + template + static void relocateContainer(T* dst, T* src, size_t count) { + if constexpr (std::is_same::value) { + for (size_t i = 0; i < count; ++i) { + new (&dst[i]) MemberNode(std::move(src[i].mutableName()), + std::move(src[i].value)); + src[i].~MemberNode(); + } + } else { + for (size_t i = 0; i < count; ++i) { + new (&dst[i]) DNode(std::move(src[i])); + src[i].~DNode(); + } + } + } + sonic_force_inline void* children() const { sonic_assert(this->IsContainer()); return this->a.next.children; @@ -718,19 +1046,33 @@ class DNode : public GenericNode> { sizeof(MetaNode) / sizeof(char)); } - sonic_force_inline DNode* getObjChildrenFirst() const { + sonic_force_inline bool pointsIntoChildren(const DNode* p) const { + if (sonic_unlikely(!this->IsContainer() || children() == nullptr)) { + return false; + } + const uintptr_t addr = reinterpret_cast(p); + const uintptr_t first = reinterpret_cast( + this->IsObject() ? static_cast(getObjChildrenFirstUnsafe()) + : static_cast(getArrChildrenFirstUnsafe())); + const size_t bytes = this->IsObject() ? this->Size() * sizeof(MemberNode) + : this->Size() * sizeof(DNode); + const uintptr_t last = first + bytes; + return addr >= first && addr < last; + } + + sonic_force_inline MemberNode* getObjChildrenFirst() const { sonic_assert(this->IsObject()); if (nullptr == children()) { return nullptr; } - return (DNode*)((char*)this->a.next.children + - sizeof(MetaNode) / sizeof(char)); + return reinterpret_cast( + reinterpret_cast(this->a.next.children) + sizeof(MetaNode)); } - sonic_force_inline DNode* getObjChildrenFirstUnsafe() const { + sonic_force_inline MemberNode* getObjChildrenFirstUnsafe() const { sonic_assert(this->IsObject()); - return (DNode*)((char*)this->a.next.children + - sizeof(MetaNode) / sizeof(char)); + return reinterpret_cast( + reinterpret_cast(this->a.next.children) + sizeof(MetaNode)); } sonic_force_inline void setChildren(void* new_child) { @@ -746,6 +1088,13 @@ class DNode : public GenericNode> { ((MetaNode*)(this->o.next.children))->cap = new_cap; } + template + static auto markAllocatorOom(A& alloc, int = 0) + -> decltype(alloc.MarkOom(), void()) { + alloc.MarkOom(); + } + static void markAllocatorOom(...) {} + sonic_force_inline void setMap(map_type* new_map) { sonic_assert(this->IsObject()); sonic_assert(this->o.next.children != nullptr); @@ -763,15 +1112,36 @@ class DNode : public GenericNode> { return ((MetaNode*)(this->o.next.children))->map; } - sonic_force_inline MemberIterator findFromMap(StringView key) const { - auto it = getMap()->find(MSType(key.data(), key.size())); - if (it != getMap()->end()) { - return memberBeginUnsafe() + it->second; + sonic_force_inline MemberIterator findFromMap(StringView key) { + auto* it = getMap()->Find(MSType(key.data(), key.size())); + if (it != nullptr) { + return memberBeginUnsafe() + it->index; } return memberEndUnsafe(); } - sonic_force_inline MemberIterator findMemberImpl(StringView key) const { + sonic_force_inline ConstMemberIterator findFromMap(StringView key) const { + auto* it = getMap()->Find(MSType(key.data(), key.size())); + if (it != nullptr) { + return cmemberBeginImpl() + it->index; + } + return cmemberEndImpl(); + } + + sonic_force_inline MemberIterator findMemberImpl(StringView key) { + if (nullptr != getMap()) { + return findFromMap(key); + } + auto it = this->MemberBegin(); + for (auto e = this->MemberEnd(); it != e; ++it) { + if (it->name.GetStringView() == key) { + break; + } + } + return it; + } + + sonic_force_inline ConstMemberIterator findMemberImpl(StringView key) const { if (nullptr != getMap()) { return findFromMap(key); } @@ -781,11 +1151,11 @@ class DNode : public GenericNode> { break; } } - return const_cast(it); + return it; } sonic_force_inline MemberIterator findMemberImpl(const char* key, - size_t len) const { + size_t len) { /************************************************** * Only calling internal memcmp when static dispatch. * Dynamic dispatch will have indirect call. @@ -802,22 +1172,51 @@ class DNode : public GenericNode> { break; } } - return const_cast(it); + return it; #else return findMemberImpl(StringView(key, len)); #endif } - sonic_force_inline DNode& findValueImpl(StringView key) const noexcept { + sonic_force_inline ConstMemberIterator findMemberImpl(const char* key, + size_t len) const { +#if defined(SONIC_STATIC_DISPATCH) + if (nullptr != getMap()) { + return findFromMap(StringView(key, len)); + } + auto it = this->MemberBegin(); + for (auto e = this->MemberEnd(); it != e; ++it) { + auto name_sv = it->name.GetStringView(); + if (name_sv.size() == len && + internal::InlinedMemcmpEq(name_sv.data(), key, len)) { + break; + } + } + return it; +#else + return findMemberImpl(StringView(key, len)); +#endif + } + + sonic_force_inline DNode& findValueImpl(StringView key) noexcept { auto m = findMemberImpl(key); if (m != this->MemberEnd()) { return m->value; } - static DNode tmp{}; + static thread_local DNode tmp{}; tmp.SetNull(); return tmp; } + sonic_force_inline const DNode& findValueImpl(StringView key) const noexcept { + auto m = findMemberImpl(key); + if (m != this->MemberEnd()) { + return m->value; + } + static const DNode tmp{}; + return tmp; + } + DNode& findValueImpl(size_t idx) const noexcept { return *(getArrChildrenFirst() + idx); } @@ -826,53 +1225,79 @@ class DNode : public GenericNode> { bool copyKey) { constexpr size_t k_default_obj_cap = 16; size_t count = this->Size(); + DNode name; + if (copyKey) { + if (sonic_unlikely(!name.StringCopy(key.data(), key.size(), alloc))) { + return this->MemberEnd(); + } + } else { + name.SetString(key); + if (sonic_unlikely(name.IsNull())) { + return this->MemberEnd(); + } + } + map_type* map = getMap(); + if (map && sonic_unlikely(!map->ReserveForInsert())) { + return this->MemberEnd(); + } + + DNode moved_value; + DNode* value_to_add = &value; + bool moved_alias = false; + const bool need_grow = count >= this->Capacity(); + if (need_grow && pointsIntoChildren(&value)) { + moved_alias = true; + moved_value.rawAssign(value); + value_to_add = &moved_value; + } if (count >= this->Capacity()) { if (this->Capacity() == 0) { void* mem = containerMalloc(k_default_obj_cap, alloc); - if (sonic_unlikely(mem == nullptr)) return this->MemberEnd(); + if (sonic_unlikely(mem == nullptr)) { + if (moved_alias) value.rawAssign(moved_value); + return this->MemberEnd(); + } setChildren(mem); } else { size_t cap = this->Capacity(); - cap += (cap + 1) / 2; // grow by factor 1.5 + size_t inc = cap / 2 + (cap & 1); + if (sonic_unlikely(cap > std::numeric_limits::max() - inc)) { + markAllocatorOom(alloc); + if (moved_alias) value.rawAssign(moved_value); + return this->MemberEnd(); + } + cap += inc; // grow by factor 1.5 void* old_ptr = children(); - void* mem = - containerRealloc(old_ptr, this->Capacity(), cap, alloc); - if (sonic_unlikely(mem == nullptr)) return this->MemberEnd(); + void* mem = containerRealloc(old_ptr, this->Capacity(), cap, + count, alloc); + if (sonic_unlikely(mem == nullptr)) { + if (moved_alias) value.rawAssign(moved_value); + return this->MemberEnd(); + } setChildren(mem); } } - - // add member to the last pos - DNode name; - if (copyKey) { - name.SetString(key, alloc); - if (sonic_unlikely(name.IsNull())) return this->MemberEnd(); - } else { - name.SetString(key); - } - DNode* last = this->getObjChildrenFirst() + count * 2; - last->rawAssign(name); // MemberEnd()->name - (last + 1)->rawAssign(value); // MemberEnd()->value + if (map) map->InsertAfterReserve(name.GetStringView(), count); + MemberNode* last = memberBeginUnsafe() + count; + DNode member_name; + member_name.rawAssign(name); + DNode member_value; + member_value.rawAssign(*value_to_add); + new (last) MemberNode(std::move(member_name), std::move(member_value)); this->addLength(1); - - // maintain map - if (nullptr != getMap()) { - // If key exists, it will be still keeped in vector but replaced in map. - getMap()->emplace(std::make_pair(last->GetStringView(), count)); - } - return (MemberIterator)last; + return last; } sonic_force_inline bool removeMemberImpl(StringView key) { MemberIterator m; + typename map_type::Entry* map_entry = nullptr; if (nullptr == children()) { goto not_find; } if (getMapUnsafe()) { - auto it = getMapUnsafe()->find(MSType(key.data(), key.size())); - if (it != getMapUnsafe()->end()) { - m = memberBeginUnsafe() + it->second; - getMapUnsafe()->erase(it); + map_entry = getMapUnsafe()->Find(MSType(key.data(), key.size())); + if (map_entry != nullptr) { + m = memberBeginUnsafe() + map_entry->index; goto find; } @@ -886,30 +1311,23 @@ class DNode : public GenericNode> { } find : { MemberIterator m_tail = memberBeginUnsafe() + (this->Size() - 1); - // TODO: destroy() then memcpy. if (m != m_tail) { - DNode* m_name = (DNode*)(&(m->name)); - DNode* tail_name = (DNode*)(&(m_tail->name)); - *m_name = std::move(*tail_name); - m->value = std::move(m_tail->value); // maintain map map_type* map = getMap(); if (map) { - size_t pos = m - memberBeginUnsafe(); - // erase tail - auto range = - map->equal_range(m->name.GetStringView()); // already moved. - for (auto i = range.first; i != range.second; ++i) { - if (i->second == this->Size() - 1) { - map->erase(i); - break; // only one erase. - } - } - map->emplace(std::make_pair(m->name.GetStringView(), pos)); + map->Erase(map_entry); } + m->~MemberNode(); + new (m) MemberNode(std::move(m_tail->mutableName()), + std::move(m_tail->value)); + if (map && !map->ReplaceIndex(this->Size() - 1, m->name.GetStringView(), + size_t(m - memberBeginUnsafe()))) { + DestroyMap(); + } + m_tail->~MemberNode(); } else { - m->name.~DNode(); - m->value.~DNode(); + if (map_entry != nullptr) getMapUnsafe()->Erase(map_entry); + m->~MemberNode(); } this->subLength(1); @@ -923,6 +1341,7 @@ class DNode : public GenericNode> { // Destroy map before removing members. DestroyMap(); size_t size = this->Size(); + if (first == last) return first; MemberIterator end = this->MemberEnd(); if (size_t(last - first) >= size) { destroy(); @@ -931,12 +1350,13 @@ class DNode : public GenericNode> { return this->MemberEnd(); } for (MemberIterator it = first; it != last; ++it) { - it->name.~DNode(); - it->value.~DNode(); + it->~MemberNode(); } - if (first != last || last != end) { - std::memmove(static_cast(&(*first)), static_cast(&(*last)), - sizeof(MemberNode) * (end - last)); + MemberIterator dst = first; + for (MemberIterator src = last; src != end; ++src, ++dst) { + new (dst) + MemberNode(std::move(src->mutableName()), std::move(src->value)); + src->~MemberNode(); } this->subLength(last - first); return first; @@ -947,21 +1367,42 @@ class DNode : public GenericNode> { sonic_assert(this->IsArray()); // reserve capacity size_t cap = this->Capacity(); + DNode moved_value; + DNode* value_to_add = &value; + if (this->Size() >= cap && pointsIntoChildren(&value)) { + moved_value.rawAssign(value); + value_to_add = &moved_value; + } if (this->Size() >= cap) { - size_t new_cap = cap ? cap + (cap + 1) / 2 : k_default_array_cap; + size_t new_cap = k_default_array_cap; + if (cap) { + size_t inc = cap / 2 + (cap & 1); + if (sonic_unlikely(cap > std::numeric_limits::max() - inc)) { + markAllocatorOom(alloc); + if (value_to_add == &moved_value) value.rawAssign(moved_value); + return *this; + } + new_cap = cap + inc; + } void* old_ptr = this->a.next.children; - void* new_ptr = containerRealloc(old_ptr, cap, new_cap, alloc); - if (sonic_unlikely(new_ptr == nullptr)) return *this; + void* new_ptr = + containerRealloc(old_ptr, cap, new_cap, this->Size(), alloc); + if (sonic_unlikely(new_ptr == nullptr)) { + if (value_to_add == &moved_value) value.rawAssign(moved_value); + return *this; + } this->a.next.children = new_ptr; } // add value to the last pos - DNode& last = *(this->End()); - last.rawAssign(value); + DNode* last = this->End(); + new (last) DNode(); + last->rawAssign(*value_to_add); this->addLength(1); return *this; } ValueIterator eraseImpl(ValueIterator start, ValueIterator end) { + if (start == end) return start; sonic_assert(this->IsArray()); sonic_assert(start <= end); sonic_assert(start >= this->Begin()); @@ -969,8 +1410,12 @@ class DNode : public GenericNode> { ValueIterator pos = this->Begin() + (start - this->Begin()); for (ValueIterator it = pos; it != end; ++it) it->~DNode(); - std::memmove(static_cast(pos), end, - (this->End() - end) * sizeof(DNode)); + ValueIterator dst = pos; + ValueIterator old_end = this->End(); + for (ValueIterator src = end; src != old_end; ++src, ++dst) { + new (dst) DNode(std::move(*src)); + src->~DNode(); + } this->subLength(end - start); return start; } @@ -1005,11 +1450,11 @@ class DNode : public GenericNode> { switch (this->GetType()) { case kObject: { if (children()) { - DNode* node = getObjChildrenFirstUnsafe(); - DNode* e = node + this->Size() * 2; - for (; node < e; node += 2) { - node->destroy(); - (node + 1)->destroy(); + MemberNode* member = + reinterpret_cast(getObjChildrenFirstUnsafe()); + MemberNode* e = member + this->Size(); + for (; member < e; ++member) { + member->~MemberNode(); } static_cast(children())->~MetaNode(); } @@ -1017,8 +1462,11 @@ class DNode : public GenericNode> { break; } case kArray: { - for (auto it = this->Begin(), e = this->End(); it != e; ++it) { - it->destroy(); + if (children()) { + for (auto it = this->Begin(), e = this->End(); it != e; ++it) { + it->~DNode(); + } + static_cast(children())->~MetaNode(); } Allocator::Free(children()); break; diff --git a/include/sonic/dom/flags.h b/include/sonic/dom/flags.h index a16a4272..66025850 100644 --- a/include/sonic/dom/flags.h +++ b/include/sonic/dom/flags.h @@ -29,6 +29,9 @@ enum class ParseFlags : uint32_t { // native numeric representation. In-range floating-point numbers stay // double; in-range integers stay int64/uint64. kParseOverflowNumAsNumStr = 1 << 3, + // ParseOnDemand keeps its default short-circuit behavior. Set this flag when + // callers require full-document validation before returning a target. + kParseValidateOnDemandFull = 1 << 4, }; // Compatibility layer for downstream users. @@ -46,6 +49,10 @@ using ParseFlag [[deprecated("Use ParseFlags instead")]] = ParseFlags; [[deprecated( "Use ParseFlags::kParseOverflowNumAsNumStr instead")]] constexpr ParseFlags kParseOverflowNumAsNumStr = ParseFlags::kParseOverflowNumAsNumStr; +[[deprecated( + "Use ParseFlags::kParseValidateOnDemandFull " + "instead")]] constexpr ParseFlags kParseValidateOnDemandFull = + ParseFlags::kParseValidateOnDemandFull; constexpr ParseFlags operator|(ParseFlags lhs, ParseFlags rhs) { return static_cast(static_cast(lhs) | diff --git a/include/sonic/dom/generic_document.h b/include/sonic/dom/generic_document.h index d3a0d22b..a76ca650 100644 --- a/include/sonic/dom/generic_document.h +++ b/include/sonic/dom/generic_document.h @@ -17,6 +17,7 @@ #pragma once #include +#include #include #include @@ -32,6 +33,11 @@ struct has_clear : std::false_type {}; template struct has_clear().Clear())>> : std::true_type {}; +template +struct has_had_oom : std::false_type {}; +template +struct has_had_oom().HadOom())>> + : std::true_type {}; } // namespace internal template @@ -75,6 +81,9 @@ class GenericDocument : public NodeType { * @brief Move assignment */ GenericDocument& operator=(GenericDocument&& rhs) { + if (this == &rhs) { + return *this; + } // Step1: clear self memory // free the dynamic nodes in assignment NodeType::operator=(std::forward(rhs)); @@ -216,8 +225,9 @@ class GenericDocument : public NodeType { } return; } - // NOTE: must free dynamic nodes at first - reinterpret_cast*>(this)->~DNode(); + // NOTE: must free dynamic nodes at first, but keep the base subobject alive + // because Parse() reuses this document after cleanup. + reinterpret_cast*>(this)->destroy(); Allocator::Free(str_); Allocator::Free(schema_str_); // Avoid Double Free @@ -240,7 +250,9 @@ class GenericDocument : public NodeType { } parse_result_ = p.Parse(str_, len, sax); if (sonic_unlikely(sax.oom_)) { - parse_result_ = kErrorNoMem; + if (parse_result_.Error() != kErrorNoMem) { + parse_result_ = ParseResult(kErrorNoMem, parse_result_.Offset()); + } return *this; } if (sonic_unlikely(HasParseError())) { @@ -252,24 +264,95 @@ class GenericDocument : public NodeType { template GenericDocument& parseSchemaImpl(const char* json, size_t len) { + if (own_alloc_) { + return parseSchemaImplWithOwnAllocator(json, len); + } Parser p; - SchemaHandler sax(this, *alloc_); - if (!sax.SetUp(StringView(json, len))) { + NodeType shadow; + if (sonic_unlikely(!shadow.TryCopyFrom(*this, *alloc_))) { parse_result_ = kErrorNoMem; return *this; } - parse_result_ = allocateSchemaStringBuffer(json, len); + char* new_schema_str = nullptr; + parse_result_ = allocateSchemaStringBuffer(json, len, new_schema_str); if (sonic_unlikely(HasParseError())) { return *this; } - parse_result_ = p.Parse(schema_str_, len, sax); + SchemaHandler sax(&shadow, *alloc_); + if (!sax.SetUp(StringView(json, len))) { + Allocator::Free(new_schema_str); + parse_result_ = kErrorNoMem; + return *this; + } + parse_result_ = p.Parse(new_schema_str, len, sax); if (sonic_unlikely(sax.oom_)) { + if (parse_result_.Error() != kErrorNoMem) { + parse_result_ = ParseResult(kErrorNoMem, parse_result_.Offset()); + } + Allocator::Free(new_schema_str); + return *this; + } + if (sonic_unlikely(HasParseError())) { + Allocator::Free(new_schema_str); + return *this; + } + char* old_schema_str = schema_str_; + schema_str_ = new_schema_str; + NodeType::operator=(std::move(shadow)); + Allocator::Free(old_schema_str); + return *this; + } + + template + GenericDocument& parseSchemaImplWithOwnAllocator(const char* json, + size_t len) { + Parser p; + Allocator temp_alloc; + if constexpr (internal::has_had_oom::value) { + if (sonic_unlikely(temp_alloc.HadOom())) { + parse_result_ = kErrorNoMem; + return *this; + } + } + + NodeType shadow; + if (sonic_unlikely(!shadow.TryCopyFrom(*this, temp_alloc, true))) { + parse_result_ = kErrorNoMem; + return *this; + } + + char* new_schema_str = nullptr; + parse_result_ = allocateStringBufferWithAllocator(json, len, temp_alloc, + new_schema_str); + if (sonic_unlikely(HasParseError())) { + return *this; + } + + SchemaHandler sax(&shadow, temp_alloc); + if (!sax.SetUp(StringView(json, len))) { + Allocator::Free(new_schema_str); parse_result_ = kErrorNoMem; return *this; } + parse_result_ = p.Parse(new_schema_str, len, sax); + if (sonic_unlikely(sax.oom_)) { + if (parse_result_.Error() != kErrorNoMem) { + parse_result_ = ParseResult(kErrorNoMem, parse_result_.Offset()); + } + Allocator::Free(new_schema_str); + return *this; + } if (sonic_unlikely(HasParseError())) { + Allocator::Free(new_schema_str); return *this; } + + destroyDom(); + *own_alloc_ = std::move(temp_alloc); + alloc_ = own_alloc_.get(); + NodeType::operator=(std::move(shadow)); + schema_str_ = new_schema_str; + str_ = nullptr; return *this; } @@ -279,7 +362,8 @@ class GenericDocument : public NodeType { const GenericJsonPointer& path) { // get the target json field StringView target; - parse_result_ = GetOnDemand(StringView(json, len), path, target); + parse_result_ = + GetOnDemand(StringView(json, len), path, target); if (sonic_unlikely(HasParseError())) { return *this; } @@ -288,6 +372,9 @@ class GenericDocument : public NodeType { } SonicError allocateStringBuffer(const char* json, size_t len) { + if (sonic_unlikely(len > std::numeric_limits::max() - 64)) { + return kErrorNoMem; + } size_t pad_len = len + 64; str_ = (char*)(alloc_->Malloc(pad_len)); if (str_ == nullptr) { @@ -301,17 +388,27 @@ class GenericDocument : public NodeType { return kErrorNone; } - SonicError allocateSchemaStringBuffer(const char* json, size_t len) { + SonicError allocateSchemaStringBuffer(const char* json, size_t len, + char*& new_schema_str) { + return allocateStringBufferWithAllocator(json, len, *alloc_, + new_schema_str); + } + + SonicError allocateStringBufferWithAllocator(const char* json, size_t len, + Allocator& alloc, char*& out) { + if (sonic_unlikely(len > std::numeric_limits::max() - 64)) { + return kErrorNoMem; + } size_t pad_len = len + 64; - schema_str_ = (char*)(alloc_->Malloc(pad_len)); - if (schema_str_ == nullptr) { + out = (char*)(alloc.Malloc(pad_len)); + if (out == nullptr) { return kErrorNoMem; } - std::memcpy(schema_str_, json, len); + std::memcpy(out, json, len); // Add ending mask to support parsing invalid json - schema_str_[len] = 'x'; - schema_str_[len + 1] = '"'; - schema_str_[len + 2] = 'x'; + out[len] = 'x'; + out[len + 1] = '"'; + out[len + 2] = 'x'; return kErrorNone; } template diff --git a/include/sonic/dom/genericnode.h b/include/sonic/dom/genericnode.h index 7b3c4e9d..1849d119 100644 --- a/include/sonic/dom/genericnode.h +++ b/include/sonic/dom/genericnode.h @@ -19,6 +19,8 @@ #include #include #include +#include +#include #include #include "sonic/dom/handler.h" @@ -36,8 +38,24 @@ namespace sonic_json { template class MemberNodeT { public: - const NodeType name; + MemberNodeT(NodeType&& key, NodeType&& member_value) noexcept + : name(std::move(key)), value(std::move(member_value)) {} + MemberNodeT(const MemberNodeT&) = delete; + MemberNodeT& operator=(const MemberNodeT&) = delete; + MemberNodeT(MemberNodeT&& rhs) noexcept + : name(std::move(rhs.name)), value(std::move(rhs.value)) {} + MemberNodeT& operator=(MemberNodeT&&) = delete; + ~MemberNodeT() = default; + + // Object lookup maps cache key StringViews. Mutating a key after insertion + // can make lookups inconsistent; update members through object APIs instead. + NodeType name; NodeType value; + + private: + friend NodeType; + + NodeType& mutableName() noexcept { return name; } }; // Forward Declaration. @@ -77,7 +95,7 @@ class GenericNode { /** * @brief Default constructor, which creates a null node. */ - GenericNode() noexcept {} + GenericNode() noexcept { setType(kNull); } /** * @brief Constructor for creating specific types. @@ -161,8 +179,11 @@ class GenericNode { * heap. This constructor function only copies the pointer. */ GenericNode(const char* s, size_t len) noexcept { - setLength(len, kStringConst); - sv.p = s; + if (sonic_likely(setLengthChecked(len, kStringConst))) { + sv.p = s; + } else { + sv.p = ""; + } } /** @@ -170,8 +191,11 @@ class GenericNode { * @param s string_view that contains string pointer and length. */ explicit GenericNode(StringView s) noexcept { - setLength(s.size(), kStringConst); - sv.p = s.data(); + if (sonic_likely(setLengthChecked(s.size(), kStringConst))) { + sv.p = s.data(); + } else { + sv.p = ""; + } } /** @@ -181,6 +205,7 @@ class GenericNode { * @param alloc Allocator */ GenericNode(const char* s, size_t len, alloc_type& alloc) { + setType(kNull); StringCopy(s, len, alloc); } @@ -190,6 +215,7 @@ class GenericNode { * @param alloc Allocator */ GenericNode(StringView s, alloc_type& alloc) { + setType(kNull); StringCopy(s.data(), s.size(), alloc); } @@ -202,14 +228,23 @@ class GenericNode { * @note: If failed when allocating memory, the node will be set as an empty * string. */ - void StringCopy(const char* s, size_t len, alloc_type& alloc) { - sv.p = (char*)(alloc.Malloc(len + 1)); - if (sv.p) { - std::memcpy(const_cast(sv.p), s, len); - const_cast(sv.p)[len] = '\0'; + bool StringCopy(const char* s, size_t len, alloc_type& alloc) { + if (sonic_unlikely(len > kMaxStoredLength)) { + downCast()->destroy(); + setEmptyString(); + return false; + } + char* p = (char*)(alloc.Malloc(len + 1)); + downCast()->destroy(); + if (p) { + std::memcpy(p, s, len); + p[len] = '\0'; + sv.p = p; setLength(len, kStringFree); + return true; } else { setEmptyString(); + return false; } } @@ -457,28 +492,44 @@ class GenericNode { NodeType& SetStringNumber(StringView s, alloc_type& alloc) { return downCast()->setStringNumberImpl(s, alloc); } + + bool TrySetStringNumber(StringView s, alloc_type& alloc) { + return downCast()->trySetStringNumberImpl(s, alloc); + } + + SonicError SetStringNumberWithError(StringView s, alloc_type& alloc) { + return TrySetStringNumber(s, alloc) ? kErrorNone : kErrorNoMem; + } /** * @brief Set this node as a copied string through the allocator alloc. * allocator. * @param s string_view that contains string pointer and size. * @param alloc Allocator which maintains the node's memory. * @return NodeType& Reference to this. - * @note this node will deconstruct firstly. + * @note On allocation failure this node is unchanged. Use TrySetString to + * detect failure. */ NodeType& SetString(StringView s, alloc_type& alloc) { return SetString(s.data(), s.size(), alloc); } + bool TrySetString(StringView s, alloc_type& alloc) { + return TrySetString(s.data(), s.size(), alloc); + } /** * @brief Set this node as a copied string through the allocator alloc. * @param s string pointer * @param len string length * @param alloc Allocator which manages the node's memory. * @return NodeType& Reference to this. - * @note this node will deconstruct firstly. + * @note On allocation failure this node is unchanged. Use TrySetString to + * detect failure. */ NodeType& SetString(const char* s, size_t len, alloc_type& alloc) { return downCast()->setStringImpl(s, len, alloc); } + bool TrySetString(const char* s, size_t len, alloc_type& alloc) { + return downCast()->trySetStringImpl(s, len, alloc); + } /** * @brief Set this node as string type. Only copy string pointer. @@ -745,20 +796,25 @@ class GenericNode { ret.error = kErrorNone; internal::JsonPath path; - // padding some buffers - std::string pathpadd = internal::paddingJsonPath(jsonpath); - // Only parse the logical jsonpath length; the extra '\0' bytes are for - // safe lookahead during unescaping. - if (!path.ParsePadded(StringView(pathpadd.data(), pathpadd.size()), - jsonpath.size())) { - ret.error = kUnsupportedJsonPath; - return ret; - } + try { + // padding some buffers + std::string pathpadd = internal::paddingJsonPath(jsonpath); + // Only parse the logical jsonpath length; the extra '\0' bytes are for + // safe lookahead during unescaping. + if (!path.ParsePadded(StringView(pathpadd.data(), pathpadd.size()), + jsonpath.size())) { + ret.error = kUnsupportedJsonPath; + return ret; + } - if (path[0].is_root() && path.size() == 1) { - ret.nodes.push_back(self); - } else if (!self->atJsonPathImpl(path, 1, ret.nodes)) { - ret.error = kNotFoundByJsonPath; + if (path[0].is_root() && path.size() == 1) { + ret.nodes.push_back(self); + } else if (!self->atJsonPathImpl(path, 1, ret.nodes)) { + ret.error = kNotFoundByJsonPath; + ret.nodes.clear(); + } + } catch (const std::bad_alloc&) { + ret.error = kErrorNoMem; ret.nodes.clear(); } return ret; @@ -854,6 +910,15 @@ class GenericNode { return downCast()->addMemberImpl(key, value, alloc, copyKey); } + SonicError AddMemberWithError(StringView key, NodeType&& value, + alloc_type& alloc, bool copyKey = true, + MemberIterator* inserted = nullptr) { + sonic_assert(this->IsObject()); + MemberIterator it = downCast()->addMemberImpl(key, value, alloc, copyKey); + if (inserted) *inserted = it; + return it == MemberEnd() ? kErrorNoMem : kErrorNone; + } + /** * @brief Reserve object capacity if NodeType support. Otherwise do nothing. * @param new_cap Expected object capacity, unit is member(key-value pair) @@ -865,6 +930,14 @@ class GenericNode { return downCast()->memberReserveImpl(new_cap, alloc); } + SonicError MemberReserveWithError(size_t new_cap, alloc_type& alloc) { + sonic_assert(this->IsObject()); + size_t old_cap = Capacity(); + downCast()->memberReserveImpl(new_cap, alloc); + return new_cap <= old_cap || Capacity() >= new_cap ? kErrorNone + : kErrorNoMem; + } + /** * @brief Remove a specific member in the object by key. * @param key string view that contains key's pointer and size @@ -1004,6 +1077,14 @@ class GenericNode { return downCast()->reserveImpl(new_cap, alloc); } + SonicError ReserveWithError(size_t new_cap, alloc_type& alloc) { + sonic_assert(this->IsArray()); + size_t old_cap = Capacity(); + downCast()->reserveImpl(new_cap, alloc); + return new_cap <= old_cap || Capacity() >= new_cap ? kErrorNone + : kErrorNoMem; + } + /** * @brief Push an element into an array. * @tparam ValueType push node type @@ -1017,6 +1098,17 @@ class GenericNode { return downCast()->pushBackImpl(value, alloc); } + bool TryPushBack(NodeType&& value, alloc_type& alloc) { + sonic_assert(this->IsArray()); + size_t old_size = Size(); + downCast()->pushBackImpl(value, alloc); + return Size() == old_size + 1; + } + + SonicError PushBackWithError(NodeType&& value, alloc_type& alloc) { + return TryPushBack(std::move(value), alloc) ? kErrorNone : kErrorNoMem; + } + /** * @brief pop out the last element in an array * @return NodeType& reference for this node to support streaming APIs @@ -1054,6 +1146,10 @@ class GenericNode { * @note erase in the range [first, last) */ ValueIterator Erase(size_t first, size_t last) noexcept { + if (first == last) { + auto b = Begin(); + return b == nullptr ? b : b + first; + } return Erase(Begin() + first, Begin() + last); } @@ -1086,6 +1182,9 @@ class GenericNode { } protected: + static constexpr size_t kMaxStoredLength = + static_cast(std::numeric_limits::max() >> kInfoBits); + sonic_force_inline NodeType* next() noexcept { return downCast()->nextImpl(); } @@ -1106,10 +1205,26 @@ class GenericNode { return static_cast(t.t & kBasicTypeMask); } sonic_force_inline void setLength(size_t len) noexcept { + if (sonic_unlikely(len > kMaxStoredLength)) { + setType(kNull); + return; + } sv.len = (len << kInfoBits) | static_cast(t.t); } sonic_force_inline void setLength(size_t len, TypeFlag flag) noexcept { + if (sonic_unlikely(len > kMaxStoredLength)) { + setType(kNull); + return; + } + sv.len = (len << kInfoBits) | static_cast(flag); + } + sonic_force_inline bool setLengthChecked(size_t len, TypeFlag flag) noexcept { + if (sonic_unlikely(len > kMaxStoredLength)) { + setType(kNull); + return false; + } sv.len = (len << kInfoBits) | static_cast(flag); + return true; } sonic_force_inline void setType(TypeFlag flag) noexcept { sv.len = static_cast(flag); @@ -1218,9 +1333,9 @@ class GenericNode { } return nullptr; } else { // Json Pointer node is number - if (re->IsArray()) { - int idx = node.GetNum(); - if (idx >= 0 && idx < static_cast(re->Size())) { + if (re->IsArray() && node.IsValidNum()) { + uint64_t idx = node.GetNum(); + if (idx < re->Size()) { re = &(re->operator[]((size_t)idx)); continue; } diff --git a/include/sonic/dom/handler.h b/include/sonic/dom/handler.h index d3d95544..236f0e38 100644 --- a/include/sonic/dom/handler.h +++ b/include/sonic/dom/handler.h @@ -16,9 +16,11 @@ #pragma once +#include #include #include "sonic/dom/type.h" +#include "sonic/error.h" #include "sonic/internal/arch/simd_base.h" #include "sonic/string_view.h" #include "sonic/writebuffer.h" @@ -37,12 +39,13 @@ class SAXHandler { bool oom_{false}; SAXHandler() = default; - SAXHandler(Allocator &alloc) : alloc_(&alloc) {} + SAXHandler(Allocator& alloc) : alloc_(&alloc) {} - SAXHandler(const SAXHandler &) = delete; - SAXHandler &operator=(const SAXHandler &rhs) = delete; - SAXHandler(SAXHandler &&rhs) + SAXHandler(const SAXHandler&) = delete; + SAXHandler& operator=(const SAXHandler& rhs) = delete; + SAXHandler(SAXHandler&& rhs) : oom_(rhs.oom_), + error_(rhs.error_), st_(rhs.st_), np_(rhs.np_), cap_(rhs.cap_), @@ -53,9 +56,10 @@ class SAXHandler { rhs.np_ = 0; rhs.alloc_ = 0; rhs.oom_ = false; + rhs.error_ = kErrorNone; } - SAXHandler &operator=(SAXHandler &&rhs) { + SAXHandler& operator=(SAXHandler&& rhs) { TearDown(); st_ = rhs.st_; np_ = rhs.np_; @@ -63,6 +67,7 @@ class SAXHandler { parent_ = rhs.parent_; alloc_ = rhs.alloc_; oom_ = rhs.oom_; + error_ = rhs.error_; rhs.st_ = nullptr; rhs.np_ = 0; @@ -70,32 +75,34 @@ class SAXHandler { rhs.parent_ = 0; rhs.alloc_ = 0; rhs.oom_ = false; + rhs.error_ = kErrorNone; return *this; } ~SAXHandler() { TearDown(); } + sonic_force_inline SonicError GetError() const noexcept { return error_; } + sonic_force_inline bool SetUp(StringView json) { + oom_ = false; + error_ = kErrorNone; size_t len = json.size(); size_t cap = len / 2 + 2; if (cap < 16) cap = 16; - if (!st_ || cap_ < cap) { - NodeType *new_st = static_cast( - std::realloc((void *)(st_), sizeof(NodeType) * cap)); - if (!new_st) return false; - st_ = new_st; - cap_ = cap; - } - return true; + return reserveStack(cap); } sonic_force_inline void TearDown() { - if (st_ == nullptr) return; - for (size_t i = 0; i < np_; i++) { - st_[i].~NodeType(); + if (st_ != nullptr) { + for (size_t i = 0; i < np_; i++) { + st_[i].~NodeType(); + } + std::free(st_); } - std::free(st_); st_ = nullptr; + np_ = 0; + cap_ = 0; + parent_ = 0; } #define SONIC_ADD_NODE() \ @@ -145,7 +152,7 @@ class SAXHandler { return true; } - sonic_force_inline bool Raw(const char *data, size_t len) { + sonic_force_inline bool Raw(const char* data, size_t len) { SONIC_ADD_NODE(); new (&st_[np_ - 1]) NodeType(); auto raw = StringView(data, len); @@ -156,7 +163,7 @@ class SAXHandler { sonic_force_inline bool StartObject() noexcept { SONIC_ADD_NODE(); new (&st_[np_ - 1]) NodeType(); - NodeType *cur = &st_[np_ - 1]; + NodeType* cur = &st_[np_ - 1]; cur->o.next.ofs = parent_; parent_ = np_ - 1; return true; @@ -165,60 +172,75 @@ class SAXHandler { sonic_force_inline bool StartArray() noexcept { SONIC_ADD_NODE(); new (&st_[np_ - 1]) NodeType(); - NodeType *cur = &st_[np_ - 1]; + NodeType* cur = &st_[np_ - 1]; cur->o.next.ofs = parent_; parent_ = np_ - 1; return true; } sonic_force_inline bool EndObject(uint32_t pairs) { - NodeType &obj = st_[parent_]; + NodeType& obj = st_[parent_]; size_t old = obj.o.next.ofs; obj.setLength(pairs, kObject); + bool ok = true; if (pairs) { - void *mem = obj.template containerMalloc(pairs, *alloc_); + void* mem = obj.template containerMalloc(pairs, *alloc_); if (sonic_unlikely(mem == nullptr)) { - NodeType *children = &obj + 1; + NodeType* children = &obj + 1; for (size_t i = 0; i < size_t(pairs) * 2; i++) children[i].~NodeType(); obj.setLength(0, kObject); obj.setChildren(nullptr); - oom_ = true; + setOom(); + ok = false; } else { obj.setChildren(mem); - internal::Xmemcpy( - (void *)obj.getObjChildrenFirstUnsafe(), (void *)(&obj + 1), pairs); + MemberType* dst = + reinterpret_cast(obj.getObjChildrenFirstUnsafe()); + NodeType* src = &obj + 1; + for (size_t i = 0; i < pairs; ++i) { + new (&dst[i]) + MemberType(std::move(src[i * 2]), std::move(src[i * 2 + 1])); + src[i * 2].~NodeType(); + src[i * 2 + 1].~NodeType(); + } } } else { obj.setChildren(nullptr); } np_ = parent_ + 1; parent_ = old; - return true; + return ok; } sonic_force_inline bool EndArray(uint32_t count) { - NodeType &arr = st_[parent_]; + NodeType& arr = st_[parent_]; size_t old = arr.o.next.ofs; arr.setLength(count, kArray); + bool ok = true; if (count) { - void *mem = arr.template containerMalloc(count, *alloc_); + void* mem = arr.template containerMalloc(count, *alloc_); if (sonic_unlikely(mem == nullptr)) { - NodeType *children = &arr + 1; + NodeType* children = &arr + 1; for (size_t i = 0; i < count; i++) children[i].~NodeType(); arr.setLength(0, kArray); arr.setChildren(nullptr); - oom_ = true; + setOom(); + ok = false; } else { arr.setChildren(mem); - internal::Xmemcpy( - (void *)arr.getArrChildrenFirstUnsafe(), (void *)(&arr + 1), count); + NodeType* dst = arr.getArrChildrenFirstUnsafe(); + NodeType* src = &arr + 1; + for (size_t i = 0; i < count; ++i) { + new (&dst[i]) NodeType(std::move(src[i])); + src[i].~NodeType(); + } } } else { arr.setChildren(nullptr); } np_ = parent_ + 1; parent_ = old; - return true; + return ok; } private: @@ -239,24 +261,55 @@ class SAXHandler { np_++; return true; } + if (sonic_unlikely(cap_ > std::numeric_limits::max() / 2)) { + setOom(); + return false; + } size_t new_cap = cap_ * 2; - NodeType *new_st = static_cast( - std::realloc((void *)(st_), sizeof(NodeType) * new_cap)); + if (sonic_unlikely(new_cap > + std::numeric_limits::max() / sizeof(NodeType))) { + setOom(); + return false; + } + if (sonic_unlikely(!reserveStack(new_cap))) return false; + np_++; + return true; + } + + sonic_force_inline bool reserveStack(size_t new_cap) { + if (new_cap <= cap_) return true; + if (sonic_unlikely(new_cap > + std::numeric_limits::max() / sizeof(NodeType))) { + setOom(); + return false; + } + NodeType* new_st = + static_cast(std::malloc(sizeof(NodeType) * new_cap)); if (!new_st) { - oom_ = true; + setOom(); return false; } + for (size_t i = 0; i < np_; ++i) { + new (&new_st[i]) NodeType(std::move(st_[i])); + st_[i].~NodeType(); + } + std::free(st_); st_ = new_st; cap_ = new_cap; - np_++; return true; } - NodeType *st_{nullptr}; + sonic_force_inline void setOom() noexcept { + oom_ = true; + error_ = kErrorNoMem; + } + + SonicError error_{kErrorNone}; + NodeType* st_{nullptr}; size_t np_{0}; size_t cap_{0}; size_t parent_{0}; - Allocator *alloc_{nullptr}; + Allocator* alloc_{nullptr}; }; template @@ -266,43 +319,95 @@ class LazySAXHandler { using MemberType = typename NodeType::MemberNode; LazySAXHandler() = delete; - LazySAXHandler(Allocator &alloc) : alloc_(&alloc) {} + LazySAXHandler(Allocator& alloc) : alloc_(&alloc) {} + LazySAXHandler(const LazySAXHandler&) = delete; + LazySAXHandler& operator=(const LazySAXHandler&) = delete; + LazySAXHandler(LazySAXHandler&& rhs) noexcept + : alloc_(rhs.alloc_), + st_(rhs.st_), + np_(rhs.np_), + cap_(rhs.cap_), + oom_(rhs.oom_), + error_(rhs.error_) { + rhs.alloc_ = nullptr; + rhs.st_ = nullptr; + rhs.np_ = 0; + rhs.cap_ = 0; + rhs.oom_ = false; + rhs.error_ = kErrorNone; + } + LazySAXHandler& operator=(LazySAXHandler&& rhs) noexcept { + if (this == &rhs) return *this; + TearDown(); + alloc_ = rhs.alloc_; + st_ = rhs.st_; + np_ = rhs.np_; + cap_ = rhs.cap_; + oom_ = rhs.oom_; + error_ = rhs.error_; + rhs.alloc_ = nullptr; + rhs.st_ = nullptr; + rhs.np_ = 0; + rhs.cap_ = 0; + rhs.oom_ = false; + rhs.error_ = kErrorNone; + return *this; + } - ~LazySAXHandler() { - NodeType *st_ = stack_.template Begin(); - // free allocated escaped buffers - for (size_t i = 0; i < stack_.Size() / sizeof(NodeType); i++) { - st_[i].~NodeType(); + ~LazySAXHandler() { TearDown(); } + + void TearDown() noexcept { + if (st_ != nullptr) { + for (size_t i = 0; i < np_; i++) st_[i].~NodeType(); + std::free(st_); } + st_ = nullptr; + np_ = 0; + cap_ = 0; } sonic_force_inline bool StartArray() { - new (stack_.PushSize(1)) NodeType(kArray); + NodeType* mem = pushNode(); + if (sonic_unlikely(mem == nullptr)) { + setOom(); + return false; + } + new (mem) NodeType(kArray); return true; } sonic_force_inline bool StartObject() { - new (stack_.PushSize(1)) NodeType(kObject); + NodeType* mem = pushNode(); + if (sonic_unlikely(mem == nullptr)) { + setOom(); + return false; + } + new (mem) NodeType(kObject); return true; } sonic_force_inline bool EndArray(size_t count) { - NodeType &arr = *stack_.template Begin(); + NodeType& arr = *st_; arr.setLength(count, kArray); if (count) { - void *mem = arr.template containerMalloc(count, *alloc_); + void* mem = arr.template containerMalloc(count, *alloc_); if (sonic_unlikely(mem == nullptr)) { - NodeType *children = &arr + 1; + NodeType* children = &arr + 1; for (size_t i = 0; i < count; i++) children[i].~NodeType(); - stack_.Pop(count); + popNodes(count); arr.setLength(0, kArray); arr.setChildren(nullptr); - oom_ = true; + setOom(); + return false; } else { arr.setChildren(mem); - internal::Xmemcpy( - (void *)arr.getArrChildrenFirstUnsafe(), (void *)(&arr + 1), count); - stack_.Pop(count); + NodeType* dst = arr.getArrChildrenFirstUnsafe(); + NodeType* src = &arr + 1; + for (size_t i = 0; i < count; ++i) { + new (&dst[i]) NodeType(std::move(src[i])); + src[i].~NodeType(); + } + popNodes(count); } } else { arr.setChildren(nullptr); @@ -311,22 +416,30 @@ class LazySAXHandler { } sonic_force_inline bool EndObject(size_t pairs) { - NodeType &obj = *stack_.template Begin(); + NodeType& obj = *st_; obj.setLength(pairs, kObject); if (pairs) { - void *mem = obj.template containerMalloc(pairs, *alloc_); + void* mem = obj.template containerMalloc(pairs, *alloc_); if (sonic_unlikely(mem == nullptr)) { - NodeType *children = &obj + 1; + NodeType* children = &obj + 1; for (size_t i = 0; i < size_t(pairs) * 2; i++) children[i].~NodeType(); - stack_.Pop(pairs); + popNodes(size_t(pairs) * 2); obj.setLength(0, kObject); obj.setChildren(nullptr); - oom_ = true; + setOom(); + return false; } else { obj.setChildren(mem); - internal::Xmemcpy( - (void *)obj.getObjChildrenFirstUnsafe(), (void *)(&obj + 1), pairs); - stack_.Pop(pairs); + MemberType* dst = + reinterpret_cast(obj.getObjChildrenFirstUnsafe()); + NodeType* src = &obj + 1; + for (size_t i = 0; i < pairs; ++i) { + new (&dst[i]) + MemberType(std::move(src[i * 2]), std::move(src[i * 2 + 1])); + src[i * 2].~NodeType(); + src[i * 2 + 1].~NodeType(); + } + popNodes(size_t(pairs) * 2); } } else { obj.setChildren(nullptr); @@ -334,27 +447,90 @@ class LazySAXHandler { return true; } - sonic_force_inline bool Key(const char *data, size_t len, size_t allocated) { - new (stack_.PushSize(1)) NodeType(); - NodeType *key = stack_.Top(); - key->setLength(len, allocated ? kStringFree : kStringCopy); - key->sv.p = data; + sonic_force_inline bool Key(const char* data, size_t len, size_t allocated) { + NodeType* mem = pushNode(); + if (sonic_unlikely(mem == nullptr)) { + setOom(); + return false; + } + new (mem) NodeType(); + mem->setLength(len, allocated ? kStringFree : kStringCopy); + mem->sv.p = data; return true; } - sonic_force_inline bool Raw(const char *data, size_t len) { - new (stack_.PushSize(1)) NodeType(); - stack_.Top()->setRaw(StringView(data, len)); + sonic_force_inline bool Raw(const char* data, size_t len) { + NodeType* mem = pushNode(); + if (sonic_unlikely(mem == nullptr)) { + setOom(); + return false; + } + new (mem) NodeType(); + mem->setRaw(StringView(data, len)); return true; } - sonic_force_inline Allocator &GetAllocator() { return *alloc_; } + sonic_force_inline Allocator& GetAllocator() { return *alloc_; } + sonic_force_inline SonicError GetError() const noexcept { return error_; } + sonic_force_inline NodeType* Root() noexcept { return st_; } + sonic_force_inline size_t StackSizeBytes() const noexcept { + return np_ * sizeof(NodeType); + } static constexpr size_t kDefaultNum = 16; + + private: + sonic_force_inline NodeType* pushNode() { + if (sonic_unlikely(np_ == cap_ && !reserveStack(nextCap()))) { + return nullptr; + } + return &st_[np_++]; + } + + sonic_force_inline void popNodes(size_t n) { np_ -= n; } + + sonic_force_inline size_t nextCap() const { + if (cap_ == 0) return kDefaultNum; + size_t max_count = std::numeric_limits::max() / sizeof(NodeType); + return cap_ > max_count / 2 ? max_count : cap_ * 2; + } + + sonic_force_inline bool reserveStack(size_t new_cap) { + if (new_cap <= cap_) return true; + if (sonic_unlikely(new_cap > + std::numeric_limits::max() / sizeof(NodeType))) { + setOom(); + return false; + } + NodeType* new_st = + static_cast(std::malloc(sizeof(NodeType) * new_cap)); + if (sonic_unlikely(new_st == nullptr)) { + setOom(); + return false; + } + for (size_t i = 0; i < np_; ++i) { + new (&new_st[i]) NodeType(std::move(st_[i])); + st_[i].~NodeType(); + } + std::free(st_); + st_ = new_st; + cap_ = new_cap; + return true; + } + + sonic_force_inline void setOom() noexcept { + oom_ = true; + error_ = kErrorNoMem; + } + + public: // allocator for node stack and string buffers - Allocator *alloc_{nullptr}; - internal::Stack stack_{}; + Allocator* alloc_{nullptr}; + NodeType* st_{nullptr}; + size_t np_{0}; + size_t cap_{0}; bool oom_{false}; + SonicError error_{kErrorNone}; }; } // namespace sonic_json diff --git a/include/sonic/dom/json_pointer.h b/include/sonic/dom/json_pointer.h index 8ee95b36..78c1f448 100644 --- a/include/sonic/dom/json_pointer.h +++ b/include/sonic/dom/json_pointer.h @@ -16,6 +16,7 @@ #pragma once +#include #include #include @@ -38,15 +39,17 @@ class GenericJsonPointerNode { GenericJsonPointerNode() = delete; GenericJsonPointerNode(std::nullptr_t) = delete; GenericJsonPointerNode(StringView str) - : str_(str), num_(0), is_number_(false) {} + : str_(str), num_(0), is_number_(false), number_valid_(true) {} GenericJsonPointerNode(const std::string& str) - : str_(str), num_(0), is_number_(false) {} + : str_(str), num_(0), is_number_(false), number_valid_(true) {} GenericJsonPointerNode(const char* str) - : str_(str), num_(0), is_number_(false) {} + : str_(str), num_(0), is_number_(false), number_valid_(true) {} template ::value, bool>::type = true> GenericJsonPointerNode(T i) - : str_(), num_(static_cast(i)), is_number_(true) {} + : str_(), num_(0), is_number_(true), number_valid_(true) { + setNumber(i); + } GenericJsonPointerNode(const GenericJsonPointerNode& rhs) = default; GenericJsonPointerNode(GenericJsonPointerNode&& rhs) = default; @@ -56,7 +59,9 @@ class GenericJsonPointerNode { ~GenericJsonPointerNode() = default; bool operator==(const GenericJsonPointerNode& rhs) const { - return IsStr() ? str_ == rhs.str_ : num_ == rhs.num_; + if (is_number_ != rhs.is_number_) return false; + return IsStr() ? str_ == rhs.str_ + : (number_valid_ == rhs.number_valid_ && num_ == rhs.num_); } bool operator!=(const GenericJsonPointerNode& rhs) const { return !(*this == rhs); @@ -64,15 +69,29 @@ class GenericJsonPointerNode { bool IsNum() const { return is_number_; } bool IsStr() const { return !is_number_; } - int GetNum() const { return num_; } + bool IsValidNum() const { return !is_number_ || number_valid_; } + uint64_t GetNum() const { return num_; } const StringType& GetStr() const { return str_; } size_t Size() const { return str_.size(); } const char* Data() const { return str_.data(); } private: + template + void setNumber(T i) { + if constexpr (std::is_signed::value) { + if (i < 0) { + number_valid_ = false; + num_ = 0; + return; + } + } + num_ = static_cast(i); + } + StringType str_{}; - int num_{}; + uint64_t num_{}; bool is_number_{}; + bool number_valid_{}; }; template diff --git a/include/sonic/dom/parser.h b/include/sonic/dom/parser.h index 3521e6c2..6c793684 100644 --- a/include/sonic/dom/parser.h +++ b/include/sonic/dom/parser.h @@ -17,9 +17,11 @@ #pragma once #include +#include #include +#include #include -#include +#include #include "sonic/dom/flags.h" #include "sonic/dom/handler.h" @@ -31,6 +33,7 @@ #include "sonic/internal/arch/simd_str2int.h" #include "sonic/internal/atof_native.h" #include "sonic/internal/parse_number_normal_fast.h" +#include "sonic/internal/stack.h" #include "sonic/internal/utils.h" #include "sonic/writebuffer.h" @@ -39,36 +42,51 @@ namespace sonic_json { // GetOnDemand get the target raw json fields of the json pointer. // The default JPStringType is // std::string(SONIC_JSON_POINTER_NODE_STRING_DEFAULT_TYPE). -template +template ParseResult GetOnDemand(StringView json, - const GenericJsonPointer &path, - StringView &target) { + const GenericJsonPointer& path, + StringView& target) { internal::SkipScanner scan; size_t pos = 0; - long start = scan.GetOnDemand(json, pos, path); + long start = scan.template GetOnDemand(json, pos, path); if (start < 0) { target = ""; // clear the exist target + if constexpr (parseFlags & ParseFlags::kParseValidateOnDemandFull) { + internal::SkipScanner validator; + ParseResult validation = + validator.template ValidateJson(json); + if (validation.Error()) return validation; + } return ParseResult(SonicError(-start), pos); } target = StringView(json.data() + start, pos - start); + if constexpr (parseFlags & ParseFlags::kParseValidateOnDemandFull) { + internal::SkipScanner validator; + ParseResult validation = validator.template ValidateJson(json); + if (validation.Error()) { + target = ""; + return validation; + } + } return ParseResult(kErrorNone, pos); } template class Parser { public: - explicit Parser() noexcept = default; + explicit Parser() noexcept {} // sonic_force_inline Parser(JsonInput& input) : input_(input) {} - Parser(Parser &&other) noexcept = default; - sonic_force_inline Parser(const Parser &other) = delete; - sonic_force_inline Parser &operator=(const Parser &other) = delete; - sonic_force_inline Parser &operator=(Parser &&other) noexcept = default; + Parser(Parser&& other) noexcept = default; + sonic_force_inline Parser(const Parser& other) = delete; + sonic_force_inline Parser& operator=(const Parser& other) = delete; + sonic_force_inline Parser& operator=(Parser&& other) noexcept = default; ~Parser() noexcept = default; template - sonic_force_inline ParseResult Parse(char *data, size_t len, SAX &sax) { + sonic_force_inline ParseResult Parse(char* data, size_t len, SAX& sax) { reset(); - json_buf_ = reinterpret_cast(data); + json_buf_ = reinterpret_cast(data); len_ = len; parseImpl(sax); if (!err_ && hasTrailingChars()) { @@ -80,12 +98,34 @@ class Parser { // parseLazyImpl only mark the json positions, and not parse any more, even // the keys. template - sonic_force_inline ParseResult ParseLazy(const uint8_t *data, size_t len, - LazySAX &sax) { + sonic_force_inline ParseResult ParseLazy(const uint8_t* data, size_t len, + LazySAX& sax) { return parseLazyImpl(data, len, sax); } private: + template + struct HasGetError : std::false_type {}; + + template + struct HasGetError< + SAX, std::void_t().GetError())>> + : std::true_type {}; + + template + sonic_force_inline SonicError saxError(const SAX& sax) const { + if constexpr (HasGetError::value) { + SonicError err = sax.GetError(); + if (err != kErrorNone) return err; + } + return kSaxTermination; + } + + template + sonic_force_inline void setSaxError(const SAX& sax) { + if (err_ == kErrorNone) err_ = saxError(sax); + } + sonic_force_inline bool hasTrailingChars() { while (pos_ < len_) { if (!internal::IsSpace(json_buf_[pos_])) return true; @@ -97,10 +137,8 @@ class Parser { sonic_force_inline void setParseError(SonicError err) { err_ = err; } template - sonic_force_inline bool parseNull(SAX &sax) { - const static uint32_t kNullBin = 0x6c6c756e; - if (internal::EqBytes4(json_buf_ + pos_ - 1, kNullBin)) { - pos_ += 3; + sonic_force_inline bool parseNull(SAX& sax) { + if (internal::SkipLiteral(json_buf_, pos_, len_, 'n')) { return sax.Null(); } setParseError(kParseErrorInvalidChar); @@ -108,11 +146,8 @@ class Parser { } template - sonic_force_inline bool parseFalse(SAX &sax) { - const static uint32_t kFalseBin = - 0x65736c61; // the binary of 'alse' in false - if (internal::EqBytes4(json_buf_ + pos_, kFalseBin)) { - pos_ += 4; + sonic_force_inline bool parseFalse(SAX& sax) { + if (internal::SkipLiteral(json_buf_, pos_, len_, 'f')) { return sax.Bool(false); } setParseError(kParseErrorInvalidChar); @@ -120,10 +155,8 @@ class Parser { } template - sonic_force_inline bool parseTrue(SAX &sax) { - constexpr static uint32_t kTrueBin = 0x65757274; - if (internal::EqBytes4(json_buf_ + pos_ - 1, kTrueBin)) { - pos_ += 3; + sonic_force_inline bool parseTrue(SAX& sax) { + if (internal::SkipLiteral(json_buf_, pos_, len_, 't')) { return sax.Bool(true); } setParseError(kParseErrorInvalidChar); @@ -131,28 +164,28 @@ class Parser { } sonic_force_inline StringView parseStringHelper() { - uint8_t *src = json_buf_ + pos_; - uint8_t *sdst = src; + uint8_t* src = json_buf_ + pos_; + uint8_t* sdst = src; size_t n = internal::parseStringInplace(src, err_); pos_ = src - json_buf_; - return StringView(reinterpret_cast(sdst), n); + return StringView(reinterpret_cast(sdst), n); } template - sonic_force_inline bool parseStrInPlace(SAX &sax) { + sonic_force_inline bool parseStrInPlace(SAX& sax) { StringView sv = parseStringHelper(); if (sonic_unlikely(err_ != kErrorNone)) return true; return sax.String(sv); } template - sonic_force_inline bool parseKeyInPlace(SAX &sax) { + sonic_force_inline bool parseKeyInPlace(SAX& sax) { StringView sv = parseStringHelper(); if (sonic_unlikely(err_ != kErrorNone)) return true; return sax.Key(sv); } - sonic_force_inline bool carry_one(char c, uint64_t &sum) const { + sonic_force_inline bool carry_one(char c, uint64_t& sum) const { uint8_t d = static_cast(c - '0'); if (d > 9) { return false; @@ -161,7 +194,7 @@ class Parser { return true; } - sonic_force_inline uint64_t str2int(const char *s, size_t &i) const { + sonic_force_inline uint64_t str2int(const char* s, size_t& i) const { uint64_t sum = 0; while (carry_one(s[i], sum)) { i++; @@ -169,7 +202,7 @@ class Parser { return sum; } - sonic_force_inline bool parseFloatingFast(double &d, int exp10, + sonic_force_inline bool parseFloatingFast(double& d, int exp10, uint64_t man) const { d = (double)man; // if man is small, but exp is large, also can parse exactly @@ -191,8 +224,8 @@ class Parser { } } - SonicError parseFloatEiselLemire64(double &dbl, int exp10, uint64_t man, - int sgn, bool trunc, const char *s) const { + SonicError parseFloatEiselLemire64(double& dbl, int exp10, uint64_t man, + int sgn, bool trunc, const char* s) const { union { double val = 0; uint64_t uval; @@ -221,7 +254,7 @@ class Parser { } template - sonic_force_inline bool parseNumber(SAX &sax) { + sonic_force_inline bool parseNumber(SAX& sax) { // These helper macros are used only within this function. // Define/undefine them locally to avoid leaking into includers. #undef FLOATING_LONGEST_DIGITS @@ -248,33 +281,33 @@ class Parser { } \ } while (0) -#define SET_INT_AND_RETURN(int_val) \ - do { \ - if (!sax.Int(int_val)) RETURN_SET_ERROR_CODE(kSaxTermination); \ - RETURN_SET_ERROR_CODE(kErrorNone); \ +#define SET_INT_AND_RETURN(int_val) \ + do { \ + if (!sax.Int(int_val)) RETURN_SET_ERROR_CODE(saxError(sax)); \ + RETURN_SET_ERROR_CODE(kErrorNone); \ } while (0) -#define SET_UINT_AND_RETURN(int_val) \ - do { \ - if (!sax.Uint(int_val)) RETURN_SET_ERROR_CODE(kSaxTermination); \ - RETURN_SET_ERROR_CODE(kErrorNone); \ - } while (0) - -#define SET_DOUBLE_AND_RETURN(dbl) \ +#define SET_UINT_AND_RETURN(int_val) \ do { \ - if (!sax.Double(dbl)) RETURN_SET_ERROR_CODE(kSaxTermination); \ + if (!sax.Uint(int_val)) RETURN_SET_ERROR_CODE(saxError(sax)); \ RETURN_SET_ERROR_CODE(kErrorNone); \ } while (0) -#define SET_U64_AS_DOUBLE_AND_RETURN(int_val) \ - do { \ - union { \ - double d; \ - uint64_t u; \ - } du; \ - du.u = int_val; \ - if (!sax.Double(du.d)) RETURN_SET_ERROR_CODE(kSaxTermination); \ - RETURN_SET_ERROR_CODE(kErrorNone); \ +#define SET_DOUBLE_AND_RETURN(dbl) \ + do { \ + if (!sax.Double(dbl)) RETURN_SET_ERROR_CODE(saxError(sax)); \ + RETURN_SET_ERROR_CODE(kErrorNone); \ + } while (0) + +#define SET_U64_AS_DOUBLE_AND_RETURN(int_val) \ + do { \ + union { \ + double d; \ + uint64_t u; \ + } du; \ + du.u = int_val; \ + if (!sax.Double(du.d)) RETURN_SET_ERROR_CODE(saxError(sax)); \ + RETURN_SET_ERROR_CODE(kErrorNone); \ } while (0) static constexpr uint64_t kUint64Max = 0xFFFFFFFFFFFFFFFF; @@ -286,7 +319,7 @@ class Parser { size_t i = pos_ - 1; size_t start_idx = pos_ - 1; size_t exp10_s = i; - const char *s = reinterpret_cast(json_buf_); + const char* s = reinterpret_cast(json_buf_); using internal::is_digit; /* check sign */ @@ -329,7 +362,7 @@ class Parser { // Zero Integer if constexpr (parseFlags & ParseFlags::kParseIntegerAsRaw) { if (!sax.Raw(s + start_idx, i - start_idx)) - RETURN_SET_ERROR_CODE(kSaxTermination); + RETURN_SET_ERROR_CODE(saxError(sax)); RETURN_SET_ERROR_CODE(kErrorNone); } SET_UINT_AND_RETURN(0); @@ -370,7 +403,7 @@ class Parser { // Integer if constexpr (parseFlags & ParseFlags::kParseIntegerAsRaw) { if (!sax.Raw(s + start_idx, i - start_idx)) - RETURN_SET_ERROR_CODE(kSaxTermination); + RETURN_SET_ERROR_CODE(saxError(sax)); RETURN_SET_ERROR_CODE(kErrorNone); } @@ -519,7 +552,7 @@ class Parser { return parseNumberAsString(sax); } } - if (!sax.Double(d)) RETURN_SET_ERROR_CODE(kSaxTermination); + if (!sax.Double(d)) RETURN_SET_ERROR_CODE(saxError(sax)); RETURN_SET_ERROR_CODE(error_code); } @@ -536,7 +569,7 @@ class Parser { } template - sonic_force_inline bool parseNumberAsString(SAX &sax) { + sonic_force_inline bool parseNumberAsString(SAX& sax) { // These helper macros are used only within this function. // Define/undefine them locally to avoid hidden coupling with parseNumber(). #undef RETURN_SET_ERROR_CODE @@ -559,29 +592,29 @@ class Parser { } \ } while (0) -#define SET_INT_AND_RETURN(int_val) \ - do { \ - if (!sax.Int(int_val)) RETURN_SET_ERROR_CODE(kSaxTermination); \ - RETURN_SET_ERROR_CODE(kErrorNone); \ +#define SET_INT_AND_RETURN(int_val) \ + do { \ + if (!sax.Int(int_val)) RETURN_SET_ERROR_CODE(saxError(sax)); \ + RETURN_SET_ERROR_CODE(kErrorNone); \ } while (0) -#define SET_UINT_AND_RETURN(int_val) \ - do { \ - if (!sax.Uint(int_val)) RETURN_SET_ERROR_CODE(kSaxTermination); \ - RETURN_SET_ERROR_CODE(kErrorNone); \ - } while (0) - -#define SET_DOUBLE_AND_RETURN(dbl) \ +#define SET_UINT_AND_RETURN(int_val) \ do { \ - if (!sax.Double(dbl)) RETURN_SET_ERROR_CODE(kSaxTermination); \ + if (!sax.Uint(int_val)) RETURN_SET_ERROR_CODE(saxError(sax)); \ RETURN_SET_ERROR_CODE(kErrorNone); \ } while (0) +#define SET_DOUBLE_AND_RETURN(dbl) \ + do { \ + if (!sax.Double(dbl)) RETURN_SET_ERROR_CODE(saxError(sax)); \ + RETURN_SET_ERROR_CODE(kErrorNone); \ + } while (0) + size_t i = pos_ - 1; size_t start = i; uint64_t man = 0; int man_nd = 0; - const char *s = reinterpret_cast(json_buf_); + const char* s = reinterpret_cast(json_buf_); size_t digit_start = 0; using internal::is_digit; static constexpr uint64_t kUint64Max = 0xFFFFFFFFFFFFFFFF; @@ -691,8 +724,8 @@ class Parser { double_string_fast: // parse floating number as json string value - if (!sax.NumStr(StringView(const_cast(s + start), i - start))) { - RETURN_SET_ERROR_CODE(kSaxTermination); + if (!sax.NumStr(StringView(const_cast(s + start), i - start))) { + RETURN_SET_ERROR_CODE(saxError(sax)); } RETURN_SET_ERROR_CODE(kErrorNone); @@ -704,7 +737,7 @@ class Parser { } template - void parsePrimitives(SAX &sax) { + void parsePrimitives(SAX& sax) { bool ok = true; switch (json_buf_[pos_ - 1]) { case '0': @@ -742,9 +775,7 @@ class Parser { setParseError(kParseErrorInvalidChar); return; } - if (sonic_unlikely(!ok) && err_ == kErrorNone) { - err_ = kSaxTermination; - } + if (sonic_unlikely(!ok)) setSaxError(sax); } template @@ -755,24 +786,33 @@ class Parser { : std::true_type {}; template - sonic_force_inline void parseImpl(SAX &sax) { + sonic_force_inline void parseImpl(SAX& sax) { #define sonic_check_err() \ do { \ if (err_ != kErrorNone) { \ goto err_invalid_char; \ } \ } while (0) -#define sonic_sax_check(expr) \ - do { \ - if (sonic_unlikely(!(expr))) { \ - if (err_ == kErrorNone) err_ = kSaxTermination; \ - return; \ - } \ +#define sonic_sax_check(expr) \ + do { \ + if (sonic_unlikely(!(expr))) { \ + setSaxError(sax); \ + return; \ + } \ } while (0) +#define sonic_depth_push(value) \ + do { \ + if (sonic_unlikely(!depth.Push(static_cast(value)))) { \ + err_ = kErrorNoMem; \ + return; \ + } \ + } while (0) +#define sonic_depth_top() (*depth.Top()) +#define sonic_depth_pop() depth.Pop(1) +#define sonic_depth_empty() depth.Empty() using namespace sonic_json::internal; - // TODO (liuq19): vector is a temporary choice, will optimize in future. - std::vector depth; + Stack depth; const uint32_t kArrMask = 1ull << 31; const uint32_t kObjMask = 0; bool found = true; @@ -781,7 +821,7 @@ class Parser { switch (c) { case '[': { sonic_sax_check(sax.StartArray()); - depth.push_back(kArrMask); + sonic_depth_push(kArrMask); c = scan.SkipSpace(json_buf_, pos_); if (c == ']') { sonic_sax_check(sax.EndArray(0)); @@ -791,7 +831,7 @@ class Parser { } case '{': { sonic_sax_check(sax.StartObject()); - depth.push_back(kObjMask); + sonic_depth_push(kObjMask); c = scan.SkipSpace(json_buf_, pos_); if (c == '}') { sonic_sax_check(sax.EndObject(0)); @@ -813,8 +853,17 @@ class Parser { if SONIC_IF_CONSTEXPR (CheckKeyReturn::value) { if (!found) { - if (!scan.SkipOne(json_buf_, pos_, len_)) { - goto err_invalid_char; + if constexpr (HasGetError::value) { + SonicError key_err = sax.GetError(); + if (key_err != kErrorNone) { + err_ = key_err; + return; + } + } + long skipped = scan.template SkipOne(json_buf_, pos_, len_); + if (skipped < 0) { + err_ = SonicError(-skipped); + return; } c = GetNextToken(json_buf_, pos_, len_, "\"}"); if (c == '"') { @@ -823,7 +872,7 @@ class Parser { } if (c == '}') { pos_++; - sonic_sax_check(sax.EndObject(depth.back())); + sonic_sax_check(sax.EndObject(sonic_depth_top())); goto scope_end; } goto err_invalid_char; @@ -831,14 +880,14 @@ class Parser { } else if (sonic_unlikely(!found)) { // Without CheckKeyReturn, `false` from Key() is a handler rejection // (e.g. OOM), not a skip signal. - if (err_ == kErrorNone) err_ = kSaxTermination; + setSaxError(sax); return; } c = scan.SkipSpace(json_buf_, pos_); switch (c) { case '{': { sonic_sax_check(sax.StartObject()); - depth.push_back(kObjMask); + sonic_depth_push(kObjMask); c = scan.SkipSpace(json_buf_, pos_); if (c == '}') { sonic_sax_check(sax.EndObject(0)); @@ -848,7 +897,7 @@ class Parser { } case '[': { sonic_sax_check(sax.StartArray()); - depth.push_back(kArrMask); + sonic_depth_push(kArrMask); c = scan.SkipSpace(json_buf_, pos_); if (c == ']') { sonic_sax_check(sax.EndArray(0)); @@ -874,7 +923,7 @@ class Parser { bool ok = parseTrue(sax); sonic_check_err(); if (sonic_unlikely(!ok)) { - err_ = kSaxTermination; + setSaxError(sax); return; } break; @@ -883,7 +932,7 @@ class Parser { bool ok = parseFalse(sax); sonic_check_err(); if (sonic_unlikely(!ok)) { - err_ = kSaxTermination; + setSaxError(sax); return; } break; @@ -892,7 +941,7 @@ class Parser { bool ok = parseNull(sax); sonic_check_err(); if (sonic_unlikely(!ok)) { - err_ = kSaxTermination; + setSaxError(sax); return; } break; @@ -907,7 +956,7 @@ class Parser { c = scan.SkipSpace(json_buf_, pos_); obj_cont: - depth.back()++; + sonic_depth_top()++; if (c == ',') { c = scan.SkipSpace(json_buf_, pos_); goto obj_key; @@ -915,16 +964,16 @@ class Parser { if (sonic_unlikely(c != '}')) { goto err_invalid_char; } - sonic_sax_check(sax.EndObject(depth.back())); + sonic_sax_check(sax.EndObject(sonic_depth_top())); scope_end: sonic_check_err(); - depth.pop_back(); - if (sonic_unlikely(depth.empty())) { + sonic_depth_pop(); + if (sonic_unlikely(sonic_depth_empty())) { goto doc_end; } c = scan.SkipSpace(json_buf_, pos_); - if (depth.back() & kArrMask) { + if (sonic_depth_top() & kArrMask) { goto arr_cont; } goto obj_cont; @@ -933,7 +982,7 @@ class Parser { switch (c) { case '{': { sonic_sax_check(sax.StartObject()); - depth.push_back(kObjMask); + sonic_depth_push(kObjMask); c = scan.SkipSpace(json_buf_, pos_); if (c == '}') { sonic_sax_check(sax.EndObject(0)); @@ -943,7 +992,7 @@ class Parser { } case '[': { sonic_sax_check(sax.StartArray()); - depth.push_back(kArrMask); + sonic_depth_push(kArrMask); c = scan.SkipSpace(json_buf_, pos_); if (c == ']') { sonic_sax_check(sax.EndArray(0)); @@ -969,7 +1018,7 @@ class Parser { bool ok = parseTrue(sax); sonic_check_err(); if (sonic_unlikely(!ok)) { - err_ = kSaxTermination; + setSaxError(sax); return; } break; @@ -978,7 +1027,7 @@ class Parser { bool ok = parseFalse(sax); sonic_check_err(); if (sonic_unlikely(!ok)) { - err_ = kSaxTermination; + setSaxError(sax); return; } break; @@ -987,7 +1036,7 @@ class Parser { bool ok = parseNull(sax); sonic_check_err(); if (sonic_unlikely(!ok)) { - err_ = kSaxTermination; + setSaxError(sax); return; } break; @@ -1002,13 +1051,13 @@ class Parser { c = scan.SkipSpace(json_buf_, pos_); arr_cont: - depth.back()++; + sonic_depth_top()++; if (c == ',') { c = scan.SkipSpace(json_buf_, pos_); goto arr_val; } if (sonic_likely(c == ']')) { - sonic_sax_check(sax.EndArray(depth.back() & (kArrMask - 1))); + sonic_sax_check(sax.EndArray(sonic_depth_top() & (kArrMask - 1))); goto scope_end; } goto err_invalid_char; @@ -1020,13 +1069,17 @@ class Parser { return; #undef sonic_sax_check #undef sonic_check_err +#undef sonic_depth_push +#undef sonic_depth_top +#undef sonic_depth_pop +#undef sonic_depth_empty } // parseLazyImpl only mark the json positions, and not parse any more, even // the keys. template - sonic_force_inline ParseResult parseLazyImpl(const uint8_t *data, size_t len, - LazySAX &sax) { + sonic_force_inline ParseResult parseLazyImpl(const uint8_t* data, size_t len, + LazySAX& sax) { using Allocator = typename LazySAX::Allocator; size_t pos = 0; size_t cnt = 0; @@ -1040,20 +1093,33 @@ class Parser { size_t sn = 0; const uint8_t *src, *sdst; -#define sonic_lazy_sax_check(expr) \ - do { \ - if (sonic_unlikely(!(expr))) { \ - return ParseResult(kSaxTermination, pos); \ - } \ +#define sonic_lazy_sax_check(expr) \ + do { \ + if (sonic_unlikely(!(expr))) { \ + return ParseResult(saxError(sax), pos); \ + } \ } while (0) +#define sonic_lazy_return_ok() \ + do { \ + while (pos < len && internal::IsSpace(data[pos])) { \ + ++pos; \ + } \ + if (sonic_unlikely(pos != len)) { \ + return ParseResult(kParseErrorInvalidChar, pos); \ + } \ + return ParseResult(kErrorNone, pos); \ + } while (0) + + auto error_offset = [&]() -> size_t { return pos == 0 ? 0 : pos - 1; }; + switch (c) { case '[': { sonic_lazy_sax_check(sax.StartArray()); c = scan.SkipSpaceSafe(data, pos, len); if (c == ']') { sonic_lazy_sax_check(sax.EndArray(0)); - return kErrorNone; + sonic_lazy_return_ok(); } pos--; goto arr_val; @@ -1063,18 +1129,21 @@ class Parser { c = scan.SkipSpaceSafe(data, pos, len); if (c == '}') { sonic_lazy_sax_check(sax.EndObject(0)); - return kErrorNone; + sonic_lazy_return_ok(); } goto obj_key; } default: { // TODO: fix the abstract. + if (sonic_unlikely(pos == 0)) { + return ParseResult(kParseErrorInvalidChar, 0); + } pos--; - start = scan.SkipOne(data, pos, len); + start = scan.template SkipOne(data, pos, len); if (start < 0) goto skip_error; sonic_lazy_sax_check( - sax.Raw(reinterpret_cast(data + start), pos - start)); - return kErrorNone; + sax.Raw(reinterpret_cast(data + start), pos - start)); + sonic_lazy_return_ok(); } } @@ -1086,41 +1155,46 @@ class Parser { src = data + pos; sdst = src; skips = internal::SkipString(data, pos, len); - sn = data + pos - 1 - src; allocated = false; if (!skips) { - return kParseErrorInvalidChar; + return ParseResult(kParseErrorInvalidChar, pos); } + sn = data + pos - 1 - src; if (skips == 2) { // parse escaped strings - uint8_t *dst = (uint8_t *)alloc.Malloc(sn + 32); + if (sonic_unlikely(sn > std::numeric_limits::max() - 32)) { + return ParseResult(kErrorNoMem, pos); + } + uint8_t* dst = (uint8_t*)alloc.Malloc(sn + 32); if (sonic_unlikely(dst == nullptr)) { return ParseResult(kErrorNoMem, pos); } sdst = dst; - std::memcpy(dst, src, sn); + // parseStringInplace scans until the closing quote, so keep the + // terminator from the original buffer in scratch space. + std::memcpy(dst, src, sn + 1); sn = internal::parseStringInplace(dst, err); if (err) { // update the error positions pos = (src - data) + (dst - sdst); - Allocator::Free((void *)(sdst)); - return err; + Allocator::Free((void*)(sdst)); + return ParseResult(err, pos); } allocated = true; } - key = StringView(reinterpret_cast(sdst), sn); + key = StringView(reinterpret_cast(sdst), sn); if (!sax.Key(key.data(), key.size(), allocated)) { - if (allocated) Allocator::Free((void *)(sdst)); - return ParseResult(kSaxTermination, pos); + if (allocated) Allocator::Free((void*)(sdst)); + return ParseResult(saxError(sax), pos); } c = scan.SkipSpaceSafe(data, pos, len); if (sonic_unlikely(c != ':')) { goto err_invalid_char; } - start = scan.SkipOne(data, pos, len); + start = scan.template SkipOne(data, pos, len); if (start < 0) goto skip_error; sonic_lazy_sax_check( - sax.Raw(reinterpret_cast(data + start), pos - start)); + sax.Raw(reinterpret_cast(data + start), pos - start)); cnt++; c = scan.SkipSpaceSafe(data, pos, len); if (c == ',') { @@ -1131,13 +1205,13 @@ class Parser { goto err_invalid_char; } sonic_lazy_sax_check(sax.EndObject(cnt)); - return kErrorNone; + sonic_lazy_return_ok(); arr_val: - start = scan.SkipOne(data, pos, len); + start = scan.template SkipOne(data, pos, len); if (start < 0) goto skip_error; sonic_lazy_sax_check( - sax.Raw(reinterpret_cast(data + start), pos - start)); + sax.Raw(reinterpret_cast(data + start), pos - start)); cnt++; c = scan.SkipSpaceSafe(data, pos, len); if (c == ',') { @@ -1147,13 +1221,14 @@ class Parser { goto err_invalid_char; } sonic_lazy_sax_check(sax.EndArray(cnt)); - return kErrorNone; + sonic_lazy_return_ok(); err_invalid_char: - return ParseResult(kParseErrorInvalidChar, pos - 1); + return ParseResult(kParseErrorInvalidChar, error_offset()); skip_error: - return ParseResult(SonicError(-start), pos - 1); + return ParseResult(SonicError(-start), error_offset()); #undef sonic_lazy_sax_check +#undef sonic_lazy_return_ok } private: @@ -1164,7 +1239,7 @@ class Parser { } constexpr static size_t kJsonPaddingSize = SONICJSON_PADDING; - uint8_t *json_buf_{nullptr}; + uint8_t* json_buf_{nullptr}; size_t len_{0}; size_t pos_{0}; SonicError err_{kErrorNone}; diff --git a/include/sonic/dom/schema_handler.h b/include/sonic/dom/schema_handler.h index 8ad0385c..f9d836b8 100644 --- a/include/sonic/dom/schema_handler.h +++ b/include/sonic/dom/schema_handler.h @@ -16,10 +16,13 @@ #pragma once +#include #include #include "sonic/dom/type.h" +#include "sonic/error.h" #include "sonic/internal/arch/simd_base.h" +#include "sonic/internal/stack.h" #include "sonic/string_view.h" #include "sonic/writebuffer.h" @@ -37,13 +40,14 @@ class SchemaHandler { bool oom_{false}; SchemaHandler() = default; - SchemaHandler(NodeType *root, Allocator &alloc) + SchemaHandler(NodeType* root, Allocator& alloc) : parent_node_(root), cur_node_(root), alloc_(&alloc) {} - SchemaHandler(const SchemaHandler &) = delete; - SchemaHandler &operator=(const SchemaHandler &rhs) = delete; - SchemaHandler(SchemaHandler &&rhs) + SchemaHandler(const SchemaHandler&) = delete; + SchemaHandler& operator=(const SchemaHandler& rhs) = delete; + SchemaHandler(SchemaHandler&& rhs) : oom_(rhs.oom_), + error_(rhs.error_), st_(rhs.st_), parent_node_(rhs.parent_node_), cur_node_(rhs.cur_node_), @@ -60,11 +64,12 @@ class SchemaHandler { rhs.alloc_ = nullptr; rhs.found_node_count_ = 0; rhs.oom_ = false; + rhs.error_ = kErrorNone; parent_st_ = std::move(rhs.parent_st_); found_count_st_ = std::move(rhs.found_count_st_); } - SchemaHandler &operator=(SchemaHandler &&rhs) { + SchemaHandler& operator=(SchemaHandler&& rhs) { TearDown(); st_ = rhs.st_; parent_node_ = rhs.parent_node_; @@ -75,6 +80,7 @@ class SchemaHandler { found_node_count_ = rhs.found_node_count_; alloc_ = rhs.alloc_; oom_ = rhs.oom_; + error_ = rhs.error_; rhs.st_ = nullptr; rhs.parent_node_ = nullptr; @@ -85,6 +91,7 @@ class SchemaHandler { rhs.alloc_ = nullptr; rhs.found_node_count_ = 0; rhs.oom_ = false; + rhs.error_ = kErrorNone; parent_st_ = std::move(rhs.parent_st_); found_count_st_ = std::move(rhs.found_count_st_); return *this; @@ -92,27 +99,34 @@ class SchemaHandler { ~SchemaHandler() { TearDown(); } + sonic_force_inline SonicError GetError() const noexcept { return error_; } + sonic_force_inline bool SetUp(StringView json) { + oom_ = false; + error_ = kErrorNone; size_t len = json.size(); size_t cap = len / 2 + 2; if (cap < 16) cap = 16; - if (!st_ || cap_ < cap) { - NodeType *new_st = static_cast( - std::realloc((void *)(st_), sizeof(NodeType) * cap)); - if (!new_st) return false; - st_ = new_st; - cap_ = cap; - } + if (sonic_unlikely(!reserveStack(cap))) return false; + parent_st_.Clear(); + parent_st_.ClearOom(); + found_count_st_.Clear(); + found_count_st_.ClearOom(); return true; } sonic_force_inline void TearDown() { - if (st_ == nullptr) return; - for (size_t i = 0; i < np_; i++) { - st_[i].~NodeType(); + if (st_ != nullptr) { + for (size_t i = 0; i < np_; i++) { + st_[i].~NodeType(); + } + std::free(st_); } - std::free(st_); st_ = nullptr; + np_ = 0; + cap_ = 0; + parent_ = 0; + found_node_count_ = 0; } #define SONIC_ADD_NODE() \ @@ -170,7 +184,7 @@ class SchemaHandler { return true; } - sonic_force_inline bool Raw(const char *data, size_t len) { + sonic_force_inline bool Raw(const char* data, size_t len) { if (cur_node_) { cur_node_->setRaw(StringView(data, len)); return true; @@ -205,7 +219,10 @@ class SchemaHandler { sonic_force_inline bool String(StringView s) { if (cur_node_) { - cur_node_->SetString(s, *alloc_); + if (sonic_unlikely(!cur_node_->TrySetString(s, *alloc_))) { + setOom(); + return false; + } } else { return stringImpl(s); } @@ -214,22 +231,22 @@ class SchemaHandler { sonic_force_inline bool StartObject() noexcept { if (cur_node_) { - parent_st_.emplace_back(parent_node_); + if (sonic_unlikely(!pushParent(parent_node_))) return false; parent_node_ = cur_node_; cur_node_ = nullptr; if (!parent_node_->IsObject() || parent_node_->Size() == 0) { - parent_st_.emplace_back(parent_node_); + if (sonic_unlikely(!pushParent(parent_node_))) return false; parent_node_ = nullptr; } - found_count_st_.emplace_back(found_node_count_); + if (sonic_unlikely(!pushFoundCount(found_node_count_))) return false; found_node_count_ = 0; return true; } SONIC_ADD_NODE(); new (&st_[np_ - 1]) NodeType(); - NodeType *cur = &st_[np_ - 1]; + NodeType* cur = &st_[np_ - 1]; cur->o.next.ofs = parent_; parent_ = np_ - 1; return true; @@ -237,13 +254,13 @@ class SchemaHandler { sonic_force_inline bool StartArray() noexcept { if (cur_node_) { - parent_st_.emplace_back(parent_node_); + if (sonic_unlikely(!pushParent(parent_node_))) return false; parent_node_ = cur_node_; cur_node_ = nullptr; } SONIC_ADD_NODE(); new (&st_[np_ - 1]) NodeType(); - NodeType *cur = &st_[np_ - 1]; + NodeType* cur = &st_[np_ - 1]; cur->o.next.ofs = parent_; parent_ = np_ - 1; return true; @@ -251,8 +268,7 @@ class SchemaHandler { sonic_force_inline bool NumStr(StringView s) { if (cur_node_) { - cur_node_->setLength(s.size(), kNumStr); - cur_node_->sv.p = s.data(); + cur_node_->SetStringNumber(s); return true; } SONIC_ADD_NODE(); @@ -264,24 +280,23 @@ class SchemaHandler { sonic_force_inline bool EndObject(uint32_t pairs) { if (parent_node_ && parent_node_->IsObject()) { - parent_node_ = parent_st_.back(); - parent_st_.pop_back(); + parent_node_ = popParent(); cur_node_ = nullptr; - found_node_count_ = found_count_st_.back(); - found_count_st_.pop_back(); + found_node_count_ = popFoundCount(); return true; } // all object is need create - NodeType *obj_ptr; - void *obj_member_ptr; + NodeType* obj_ptr; + void* obj_member_ptr; + bool replacing_existing = false; if (parent_ == 0) { - obj_ptr = parent_st_.back(); + replacing_existing = true; + obj_ptr = popParent(); obj_member_ptr = &st_[0]; - parent_st_.pop_back(); // restore parent node ptr - parent_node_ = parent_st_.back(); - parent_st_.pop_back(); + parent_node_ = popParent(); cur_node_ = nullptr; + found_node_count_ = popFoundCount(); np_ = 0; parent_ = 0; } else { @@ -290,37 +305,55 @@ class SchemaHandler { np_ = parent_ + 1; parent_ = obj_ptr->o.next.ofs; } - NodeType &obj = *obj_ptr; - obj.setLength(pairs, kObject); + NodeType& obj = *obj_ptr; + NodeType new_obj; + NodeType& dst = replacing_existing ? new_obj : obj; + dst.setLength(pairs, kObject); + dst.setChildren(nullptr); + bool ok = true; if (pairs) { - void *mem = obj.template containerMalloc(pairs, *alloc_); + void* mem = dst.template containerMalloc(pairs, *alloc_); if (sonic_unlikely(mem == nullptr)) { - NodeType *children = static_cast(obj_member_ptr); + NodeType* children = static_cast(obj_member_ptr); for (size_t i = 0; i < size_t(pairs) * 2; i++) children[i].~NodeType(); - obj.setLength(0, kObject); - obj.setChildren(nullptr); - oom_ = true; + dst.setLength(0, kObject); + dst.setChildren(nullptr); + setOom(); + ok = false; } else { - obj.setChildren(mem); - internal::Xmemcpy( - (void *)obj.getObjChildrenFirstUnsafe(), obj_member_ptr, pairs); + dst.setChildren(mem); + MemberType* dst_members = + reinterpret_cast(dst.getObjChildrenFirstUnsafe()); + NodeType* src = static_cast(obj_member_ptr); + for (size_t i = 0; i < pairs; ++i) { + new (&dst_members[i]) + MemberType(std::move(src[i * 2]), std::move(src[i * 2 + 1])); + src[i * 2].~NodeType(); + src[i * 2 + 1].~NodeType(); + } } } else { - obj.setChildren(nullptr); + dst.setChildren(nullptr); } - return true; + if (ok && replacing_existing) { + obj.destroy(); + obj.rawAssign(new_obj); + } + return ok; } sonic_force_inline bool EndArray(uint32_t count) { // Assert cur_node != nullptr!! - NodeType *arr_ptr; - void *arr_element_ptr; + NodeType* arr_ptr; + void* arr_element_ptr; + bool replacing_existing = false; if (parent_ == 0) { // + replacing_existing = true; arr_ptr = parent_node_; arr_element_ptr = &st_[1]; cur_node_ = parent_node_; - parent_node_ = parent_st_.back(); - parent_st_.pop_back(); + parent_node_ = popParent(); + st_[0].~NodeType(); np_ = 0; parent_ = 0; } else { @@ -329,25 +362,38 @@ class SchemaHandler { np_ = parent_ + 1; parent_ = arr_ptr->o.next.ofs; } - NodeType &arr = *arr_ptr; - arr.setLength(count, kArray); + NodeType& arr = *arr_ptr; + NodeType new_arr; + NodeType& dst = replacing_existing ? new_arr : arr; + dst.setLength(count, kArray); + dst.setChildren(nullptr); + bool ok = true; if (count) { - void *mem = arr.template containerMalloc(count, *alloc_); + void* mem = dst.template containerMalloc(count, *alloc_); if (sonic_unlikely(mem == nullptr)) { - NodeType *children = static_cast(arr_element_ptr); + NodeType* children = static_cast(arr_element_ptr); for (size_t i = 0; i < count; i++) children[i].~NodeType(); - arr.setLength(0, kArray); - arr.setChildren(nullptr); - oom_ = true; + dst.setLength(0, kArray); + dst.setChildren(nullptr); + setOom(); + ok = false; } else { - arr.setChildren(mem); - internal::Xmemcpy( - (void *)arr.getArrChildrenFirstUnsafe(), arr_element_ptr, count); + dst.setChildren(mem); + NodeType* dst_elements = dst.getArrChildrenFirstUnsafe(); + NodeType* src = static_cast(arr_element_ptr); + for (size_t i = 0; i < count; ++i) { + new (&dst_elements[i]) NodeType(std::move(src[i])); + src[i].~NodeType(); + } } } else { - arr.setChildren(nullptr); + dst.setChildren(nullptr); } - return true; + if (ok && replacing_existing) { + arr.destroy(); + arr.rawAssign(new_arr); + } + return ok; } static constexpr bool check_key_return = true; @@ -368,21 +414,78 @@ class SchemaHandler { if (sonic_likely(np_ < cap_)) { np_++; return true; - } else { + } + setOom(); + return false; + } + + sonic_force_inline bool reserveStack(size_t new_cap) { + if (new_cap <= cap_) return true; + if (sonic_unlikely(new_cap > + std::numeric_limits::max() / sizeof(NodeType))) { + setOom(); return false; } + NodeType* new_st = + static_cast(std::malloc(sizeof(NodeType) * new_cap)); + if (!new_st) { + setOom(); + return false; + } + for (size_t i = 0; i < np_; ++i) { + new (&new_st[i]) NodeType(std::move(st_[i])); + st_[i].~NodeType(); + } + std::free(st_); + st_ = new_st; + cap_ = new_cap; + return true; + } + + sonic_force_inline void setOom() noexcept { + oom_ = true; + error_ = kErrorNoMem; + } + + sonic_force_inline bool pushParent(NodeType* node) noexcept { + if (sonic_unlikely(!parent_st_.template Push(node))) { + setOom(); + return false; + } + return true; + } + + sonic_force_inline NodeType* popParent() noexcept { + NodeType* node = *parent_st_.template Top(); + parent_st_.template Pop(1); + return node; + } + + sonic_force_inline bool pushFoundCount(size_t count) noexcept { + if (sonic_unlikely(!found_count_st_.template Push(count))) { + setOom(); + return false; + } + return true; + } + + sonic_force_inline size_t popFoundCount() noexcept { + size_t count = *found_count_st_.template Top(); + found_count_st_.template Pop(1); + return count; } - NodeType *st_{nullptr}; - NodeType *parent_node_{nullptr}; - NodeType *cur_node_{nullptr}; + SonicError error_{kErrorNone}; + NodeType* st_{nullptr}; + NodeType* parent_node_{nullptr}; + NodeType* cur_node_{nullptr}; size_t np_{0}; size_t cap_{0}; size_t parent_{0}; size_t found_node_count_{0}; - Allocator *alloc_{nullptr}; - std::vector parent_st_{16}; - std::vector found_count_st_{16}; + Allocator* alloc_{nullptr}; + internal::Stack parent_st_{16 * sizeof(NodeType*)}; + internal::Stack found_count_st_{16 * sizeof(size_t)}; }; } // namespace sonic_json diff --git a/include/sonic/dom/serialize.h b/include/sonic/dom/serialize.h index d89327db..f277710c 100644 --- a/include/sonic/dom/serialize.h +++ b/include/sonic/dom/serialize.h @@ -19,6 +19,7 @@ #include #include +#include #include "sonic/dom/flags.h" #include "sonic/dom/type.h" @@ -26,6 +27,7 @@ #include "sonic/internal/arch/simd_quote.h" #include "sonic/internal/ftoa.h" #include "sonic/internal/itoa.h" +#include "sonic/internal/stack.h" #include "sonic/writebuffer.h" namespace sonic_json { @@ -35,8 +37,11 @@ namespace internal { template sonic_force_inline SonicError SerializeImpl(const NodeType* node, WriteBuffer& wb) { + using MemberNode = typename NodeType::MemberNode; + static_assert(sizeof(MemberNode) == sizeof(NodeType) * 2, + "SerializeImpl relies on compact object member layout"); struct ParentCtx { - uint64_t len; + size_t len; const NodeType* ptr; }; @@ -45,11 +50,15 @@ sonic_force_inline SonicError SerializeImpl(const NodeType* node, constexpr size_t kNumberSize = 33; size_t node_nums = node->IsContainer() ? node->Size() : 1; + if (sonic_unlikely(node_nums > (std::numeric_limits::max() - 64) / + kExpectMinifyRatio)) { + return kErrorNoMem; + } size_t estimate = node_nums * kExpectMinifyRatio + 64; bool is_obj = node->IsObject(); bool is_key, is_obj_nxt; - uint32_t member_cnt = 0; - uint32_t val_cnt, val_cnt_nxt; + size_t member_cnt = 0; + size_t val_cnt, val_cnt_nxt; size_t str_len; long inc_len; const char* str_ptr; @@ -59,9 +68,13 @@ sonic_force_inline SonicError SerializeImpl(const NodeType* node, if constexpr ((serializeFlags & SerializeFlags::kSerializeAppendBuffer) == 0) { wb.Clear(); - wb.Reserve(estimate); + if (sonic_unlikely(!wb.Reserve(estimate))) return kErrorNoMem; } else { - wb.Reserve(estimate + wb.Size()); + if (sonic_unlikely(estimate > + std::numeric_limits::max() - wb.Size())) { + return kErrorNoMem; + } + if (sonic_unlikely(!wb.Reserve(estimate + wb.Size()))) return kErrorNoMem; } bool is_single = (!node->IsContainer()) || node->Empty(); @@ -69,18 +82,29 @@ sonic_force_inline SonicError SerializeImpl(const NodeType* node, val_cnt = 1; goto val_begin; } + if (sonic_unlikely(is_obj && + node->Size() > std::numeric_limits::max() / 2)) { + return kErrorNoMem; + } val_cnt = node->Size() << is_obj; member_cnt = node->Size(); wb.PushUnsafe('[' | (uint8_t)(is_obj) << 5); - node = is_obj ? node->getObjChildrenFirstUnsafe() - : node->getArrChildrenFirstUnsafe(); + if (is_obj) { + node = &node->getObjChildrenFirstUnsafe()->name; + } else { + node = node->getArrChildrenFirstUnsafe(); + } val_begin: switch (node->getBasicType()) { case kString: { - is_key = ((size_t)(is_obj) & (~val_cnt)); + is_key = is_obj && ((val_cnt & 1) == 0); str_len = node->Size(); - inc_len = str_len * 6 + 32 + 3; - wb.Grow(inc_len); + if (sonic_unlikely(str_len > + (std::numeric_limits::max() - 35) / 6)) { + return kErrorNoMem; + } + inc_len = static_cast(str_len * 6 + 32 + 3); + if (sonic_unlikely(wb.Grow(inc_len) == nullptr)) return kErrorNoMem; str_ptr = node->GetStringView().data(); rn = internal::Quote(str_ptr, str_len, wb.End()) - wb.End(); @@ -91,7 +115,7 @@ sonic_force_inline SonicError SerializeImpl(const NodeType* node, } case kNumber: { - wb.Grow(kNumberSize); + if (sonic_unlikely(wb.Grow(kNumberSize) == nullptr)) return kErrorNoMem; switch (node->GetType()) { case kSint: rn = internal::I64toa(wb.End(), node->GetInt64()) - @@ -105,7 +129,6 @@ sonic_force_inline SonicError SerializeImpl(const NodeType* node, const double d = node->GetDouble(); rn = internal::F64toa(wb.End(), d); // support Infinity/-Infinity or NaN/-NaN - if (sonic_unlikely(rn <= 0)) { if (serializeFlags & SerializeFlags::kSerializeInfNan) { if (sonic_unlikely(std::isinf(d))) { @@ -130,7 +153,12 @@ sonic_force_inline SonicError SerializeImpl(const NodeType* node, case kNumStr: { rn = 0; str_len = node->Size(); - wb.Grow(str_len + 1); + if (sonic_unlikely(str_len == std::numeric_limits::max())) { + return kErrorNoMem; + } + if (sonic_unlikely(wb.Grow(str_len + 1) == nullptr)) { + return kErrorNoMem; + } wb.PushUnsafe(node->GetStringNumber().data(), str_len); break; } @@ -143,17 +171,19 @@ sonic_force_inline SonicError SerializeImpl(const NodeType* node, break; } case kBool: { - wb.Push5_8(node->IsFalse() ? "false, " : "true, ", - 5 + node->IsFalse()); + if (sonic_unlikely(!wb.Push5_8(node->IsFalse() ? "false, " : "true, ", + 5 + node->IsFalse()))) { + return kErrorNoMem; + } break; } case kNull: { - wb.Push5_8("null, ", 5); + if (sonic_unlikely(!wb.Push5_8("null, ", 5))) return kErrorNoMem; break; } case kObject: case kArray: { - wb.Grow(3); + if (sonic_unlikely(wb.Grow(3) == nullptr)) return kErrorNoMem; is_obj_nxt = node->IsObject(); val_cnt_nxt = node->Size(); if (sonic_unlikely(val_cnt_nxt == 0)) { @@ -162,6 +192,11 @@ sonic_force_inline SonicError SerializeImpl(const NodeType* node, wb.PushUnsafe(','); break; } else { + if (sonic_unlikely(is_obj && + member_cnt > + (std::numeric_limits::max() - 1) / 2)) { + return kErrorNoMem; + } // check the serialized member count // member_cnt is remained member counts, val_cnt is remained value // counts. If the object key is string type, "member_cnt * 2 + 1 = @@ -169,20 +204,33 @@ sonic_force_inline SonicError SerializeImpl(const NodeType* node, if (sonic_unlikely(is_obj && ((member_cnt << 1) + 1 != val_cnt))) { goto key_err; } - stk.Push(ParentCtx{val_cnt << 1 | is_obj, node}); + if (sonic_unlikely(!stk.Push(ParentCtx{val_cnt << 1 | is_obj, node}))) { + return kErrorNoMem; + } + if (sonic_unlikely(is_obj_nxt && + val_cnt_nxt > + std::numeric_limits::max() / 2)) { + return kErrorNoMem; + } val_cnt = val_cnt_nxt << is_obj_nxt; member_cnt = val_cnt_nxt; is_obj = is_obj_nxt; wb.PushUnsafe('[' | (uint8_t)(is_obj) << 5); - node = is_obj ? node->getObjChildrenFirstUnsafe() - : node->getArrChildrenFirstUnsafe(); + if (is_obj) { + node = &node->getObjChildrenFirstUnsafe()->name; + } else { + node = node->getArrChildrenFirstUnsafe(); + } goto val_begin; } break; } case kRaw: { str_len = node->Size(); - wb.Grow(str_len + 1); + if (sonic_unlikely(str_len == std::numeric_limits::max())) { + return kErrorNoMem; + } + if (sonic_unlikely(wb.Grow(str_len + 1) == nullptr)) return kErrorNoMem; wb.PushUnsafe(node->GetRaw().data(), str_len); wb.PushUnsafe(','); break; @@ -201,7 +249,7 @@ sonic_force_inline SonicError SerializeImpl(const NodeType* node, if (sonic_unlikely((member_cnt && is_obj) != 0)) { goto key_err; } - wb.Grow(2); + if (sonic_unlikely(wb.Grow(2) == nullptr)) return kErrorNoMem; wb.PushUnsafe(']' | (uint8_t)(is_obj) << 5); wb.PushUnsafe(','); if (sonic_unlikely(stk.Size() == 0)) goto doc_end; diff --git a/include/sonic/experiment/lazy_update.h b/include/sonic/experiment/lazy_update.h index 4f46f96b..8f7b9d0a 100644 --- a/include/sonic/experiment/lazy_update.h +++ b/include/sonic/experiment/lazy_update.h @@ -16,6 +16,9 @@ #pragma once +#include +#include + #include "sonic/allocator.h" #include "sonic/dom/dynamicnode.h" #include "sonic/dom/parser.h" @@ -26,11 +29,12 @@ namespace sonic_json { namespace internal { template -static inline ParseResult ParseLazy(NodeType &node, StringView json, - Allocator &alloc) { +static inline ParseResult ParseLazy(NodeType& node, StringView json, + Allocator& alloc, + bool copyBorrowedValues = false) { LazySAXHandler sax(alloc); Parser p; - ParseResult ret = p.ParseLazy(reinterpret_cast(json.data()), + ParseResult ret = p.ParseLazy(reinterpret_cast(json.data()), json.size(), sax); if (ret.Error()) { return ret; @@ -38,25 +42,34 @@ static inline ParseResult ParseLazy(NodeType &node, StringView json, if (sonic_unlikely(sax.oom_)) { return ParseResult(kErrorNoMem, json.size()); } - NodeType *root = sax.stack_.template Begin(); - node = std::move(*root); + NodeType* root = sax.Root(); + if (copyBorrowedValues) { + NodeType owned; + if (sonic_unlikely(!owned.TryCopyFrom(*root, alloc, true))) { + return ParseResult(kErrorNoMem, ret.Offset()); + } + node = std::move(owned); + } else { + node = std::move(*root); + } return ret; } template -static inline SonicError UpdateNodeLazy(NodeType &target, NodeType &source, - Allocator &alloc) { +static inline SonicError UpdateNodeLazyInPlace(NodeType& target, + NodeType& source, + Allocator& alloc) { SonicError err = kErrorNone; if (target.IsRaw() && !target.GetRaw().empty() && *target.GetRaw().data() == '{') { ParseResult ret = ParseLazy( - target, target.GetRaw(), alloc); + target, target.GetRaw(), alloc, true); if (ret.Error()) return ret.Error(); } if (source.IsRaw() && !source.GetRaw().empty() && *source.GetRaw().data() == '{') { ParseResult ret = ParseLazy( - source, source.GetRaw(), alloc); + source, source.GetRaw(), alloc, true); if (ret.Error()) return ret.Error(); } // update the object type @@ -64,22 +77,38 @@ static inline SonicError UpdateNodeLazy(NodeType &target, NodeType &source, target = std::move(source); return kErrorNone; } - target.CreateMap(alloc); + if (!target.CreateMap(alloc)) return kErrorNoMem; auto source_begin = source.MemberBegin(), source_end = source.MemberEnd(); for (auto iter = source_begin; iter != source_end; iter++) { StringView key = iter->name.GetStringView(); auto match = target.FindMember(key); if (match == target.MemberEnd()) { - target.AddMember(key, std::move(iter->value), alloc); + SonicError add_err = + target.AddMemberWithError(key, std::move(iter->value), alloc); + if (add_err) return add_err; } else { - err = UpdateNodeLazy(match->value, - iter->value, alloc); + err = UpdateNodeLazyInPlace( + match->value, iter->value, alloc); if (err) return err; } } return err; } +template +static inline SonicError UpdateNodeLazy(NodeType& target, NodeType& source, + Allocator& alloc) { + NodeType shadow; + if (sonic_unlikely(!shadow.TryCopyFrom(target, alloc))) { + return kErrorNoMem; + } + SonicError err = UpdateNodeLazyInPlace( + shadow, source, alloc); + if (err) return err; + target = std::move(shadow); + return kErrorNone; +} + } // namespace internal /** @@ -91,34 +120,49 @@ static inline SonicError UpdateNodeLazy(NodeType &target, NodeType &source, * @param source the source json */ template -static inline std::string UpdateLazy(StringView target, StringView source) { - using Allocator = Node::AllocatorType; - Allocator alloc; - WriteBuffer wb(target.size() + source.size()); - SonicError err = kErrorNone; - ParseResult ret1, ret2; +static inline std::tuple UpdateLazyWithError( + StringView target, StringView source) { + try { + using Allocator = Node::AllocatorType; + Allocator alloc; + WriteBuffer wb(target.size() + source.size()); + SonicError err = kErrorNone; + ParseResult ret1, ret2; - Node ntarget, nsource; - ret1 = - internal::ParseLazy(ntarget, target, alloc); - ret2 = - internal::ParseLazy(nsource, source, alloc); - if (ret2.Error()) { - return ret1.Error() ? "{}" : std::string(target.data(), target.size()); - } - if (ret1.Error()) { - return std::string(source.data(), source.size()); - } - err = internal::UpdateNodeLazy(ntarget, nsource, - alloc); - if (err) { - return "{}"; - } - err = ntarget.Serialize(wb); - if (err) { - return "{}"; + Node ntarget, nsource; + ret1 = internal::ParseLazy(ntarget, target, + alloc); + ret2 = internal::ParseLazy(nsource, source, + alloc); + if (ret2.Error()) { + return std::make_tuple( + ret1.Error() ? "{}" : std::string(target.data(), target.size()), + ret2.Error()); + } + if (ret1.Error()) { + return std::make_tuple(std::string(source.data(), source.size()), + ret1.Error()); + } + err = internal::UpdateNodeLazy(ntarget, + nsource, alloc); + if (err) { + return std::make_tuple("{}", err); + } + err = ntarget.Serialize(wb); + if (err) { + return std::make_tuple("{}", err); + } + auto sv = wb.ToStringView(); + return std::make_tuple(std::string(sv.data(), sv.size()), kErrorNone); + } catch (const std::bad_alloc&) { + return std::make_tuple("{}", kErrorNoMem); } - return std::string(wb.ToString(), wb.Size()); +} + +template +static inline std::string UpdateLazy(StringView target, StringView source) { + auto ret = UpdateLazyWithError(target, source); + return std::move(std::get<0>(ret)); } } // namespace sonic_json diff --git a/include/sonic/internal/arch/avx2/base.h b/include/sonic/internal/arch/avx2/base.h index aa867754..9521b7bd 100644 --- a/include/sonic/internal/arch/avx2/base.h +++ b/include/sonic/internal/arch/avx2/base.h @@ -217,11 +217,11 @@ static sonic_force_inline int cmp_lt_32(const void* _l, const void* _r, #pragma GCC diagnostic pop #endif __m256i ans = _mm256_cmpeq_epi8(vec_l, vec_r); - int mask = _mm256_movemask_epi8(ans) + 1; + uint32_t mask = static_cast(_mm256_movemask_epi8(ans)) + 1u; // mask = mask << (32 -s); __asm__("bzhil %1, %2, %[result]\n\t" : [result] "=r"(mask) - : "r"((int)s), "r"(mask)); + : "r"(static_cast(s)), "r"(mask)); if (mask) { int ne_idx = __builtin_ctz(mask); // if (lhs[ne_idx] < rhs[ne_idx]) return -1; @@ -276,11 +276,11 @@ static sonic_force_inline bool is_eq_lt_32(const void* _a, const void* _b, __m256i vec_a = _mm256_loadu_si256((__m256i const*)a); __m256i vec_b = _mm256_loadu_si256((__m256i const*)b); __m256i ans = _mm256_cmpeq_epi8(vec_a, vec_b); - int mask = _mm256_movemask_epi8(ans) + 1; + uint32_t mask = static_cast(_mm256_movemask_epi8(ans)) + 1u; // mask = mask << (32 -s); __asm__("bzhil %1, %2, %[result]\n\t" : [result] "=r"(mask) - : "r"((int)s), "r"(mask)); + : "r"(static_cast(s)), "r"(mask)); return mask == 0; } return is_eq_lt_32_cross_page(a, b, s); @@ -305,7 +305,7 @@ sonic_force_inline bool InlinedMemcmpEq(const void* _a, const void* _b, vec_a = _mm256_loadu_si256((__m256i const*)(a + i)); vec_b = _mm256_loadu_si256((__m256i const*)(b + i)); __m256i ans = _mm256_cmpeq_epi8(vec_a, vec_b); - unsigned int mask = _mm256_movemask_epi8(ans) + 1; + uint32_t mask = static_cast(_mm256_movemask_epi8(ans)) + 1u; if (mask) return false; } // no branch for s = x32 @@ -316,7 +316,7 @@ sonic_force_inline bool InlinedMemcmpEq(const void* _a, const void* _b, vec_b = _mm256_loadu_si256((__m256i const*)(b + s - 32)); __m256i ans = _mm256_cmpeq_epi8(vec_a, vec_b); ans = _mm256_and_si256(ans, ans_1); - unsigned int mask = _mm256_movemask_epi8(ans) + 1; + uint32_t mask = static_cast(_mm256_movemask_epi8(ans)) + 1u; if (mask) return false; } return true; @@ -361,7 +361,7 @@ sonic_force_inline int InlinedMemcmp(const void* _l, const void* _r, size_t s) { vec_r = _mm256_loadu_si256((__m256i const*)(rhs + offset)); __m256i ans = _mm256_cmpeq_epi8(vec_l, vec_r); // ans = _mm256_and_si256(ans, ans_1); - unsigned int mask = static_cast(_mm256_movemask_epi8(ans)) + 1; + uint32_t mask = static_cast(_mm256_movemask_epi8(ans)) + 1u; if (mask) { int ne_idx = __builtin_ctz(mask); return lhs[offset + ne_idx] - rhs[offset + ne_idx]; diff --git a/include/sonic/internal/arch/common/arm_common/skip.inc.h b/include/sonic/internal/arch/common/arm_common/skip.inc.h index 955e9b15..741de70d 100644 --- a/include/sonic/internal/arch/common/arm_common/skip.inc.h +++ b/include/sonic/internal/arch/common/arm_common/skip.inc.h @@ -19,9 +19,9 @@ #endif template -sonic_force_inline uint64_t GetStringBits(const uint8_t *data, - uint64_t &prev_instring, - uint64_t &prev_escaped) { +sonic_force_inline uint64_t GetStringBits(const uint8_t* data, + uint64_t& prev_instring, + uint64_t& prev_escaped) { const T v(data); uint64_t escaped = 0; uint64_t bs_bits = v.eq('\\'); @@ -40,7 +40,7 @@ sonic_force_inline uint64_t GetStringBits(const uint8_t *data, // GetNextToken find the next characters in tokens and update the position to // it. template -sonic_force_inline uint8_t GetNextToken(const uint8_t *data, size_t &pos, +sonic_force_inline uint8_t GetNextToken(const uint8_t* data, size_t& pos, size_t len, const char (&tokens)[N]) { while (pos + VEC_LEN <= len) { uint8x16_t v = vld1q_u8(data + pos); @@ -74,7 +74,7 @@ sonic_force_inline uint8_t GetNextToken(const uint8_t *data, size_t &pos, } // pos is the after the ending quote -sonic_force_inline int SkipString(const uint8_t *data, size_t &pos, +sonic_force_inline int SkipString(const uint8_t* data, size_t& pos, size_t len) { const static int kEscaped = 2; const static int kNormal = 1; @@ -123,12 +123,12 @@ sonic_force_inline int SkipString(const uint8_t *data, size_t &pos, // return true if container is closed. template -sonic_force_inline bool skip_container(const uint8_t *data, size_t &pos, +sonic_force_inline bool skip_container(const uint8_t* data, size_t& pos, size_t len, uint8_t left, uint8_t right) { uint64_t prev_instring = 0, prev_escaped = 0, instring; int rbrace_num = 0, lbrace_num = 0, last_lbrace_num; - const uint8_t *p; + const uint8_t* p; while (pos + 64 <= len) { p = data + pos; #define SKIP_LOOP() \ @@ -163,10 +163,12 @@ sonic_force_inline bool skip_container(const uint8_t *data, size_t &pos, return false; } -sonic_force_inline uint8_t skip_space_safe(const uint8_t *data, size_t &pos, - size_t len, size_t &, uint64_t &) { +sonic_force_inline uint8_t skip_space_safe(const uint8_t* data, size_t& pos, + size_t len, size_t&, uint64_t&) { + if (sonic_unlikely(pos >= len)) return 0; while (pos < len && IsSpace(data[pos++])) ; + if (sonic_unlikely(pos == 0)) return 0; // if not found, still return the space chars return data[pos - 1]; } diff --git a/include/sonic/internal/arch/common/x86_common/skip.inc.h b/include/sonic/internal/arch/common/x86_common/skip.inc.h index 1cab276a..aae097d1 100644 --- a/include/sonic/internal/arch/common/x86_common/skip.inc.h +++ b/include/sonic/internal/arch/common/x86_common/skip.inc.h @@ -14,9 +14,9 @@ * limitations under the License. */ -sonic_force_inline uint64_t GetStringBits(const uint8_t *data, - uint64_t &prev_instring, - uint64_t &prev_escaped) { +sonic_force_inline uint64_t GetStringBits(const uint8_t* data, + uint64_t& prev_instring, + uint64_t& prev_escaped) { const simd::simd8x64 v(data); uint64_t escaped = 0; uint64_t bs_bits = v.eq('\\'); @@ -35,7 +35,7 @@ sonic_force_inline uint64_t GetStringBits(const uint8_t *data, // GetNextToken find the next characters in tokens and update the position to // it. template -sonic_force_inline uint8_t GetNextToken(const uint8_t *data, size_t &pos, +sonic_force_inline uint8_t GetNextToken(const uint8_t* data, size_t& pos, size_t len, const char (&tokens)[N]) { while (pos + VEC_LEN <= len) { VecUint8Type v(data + pos); @@ -62,7 +62,7 @@ sonic_force_inline uint8_t GetNextToken(const uint8_t *data, size_t &pos, } // pos is the after the ending quote -sonic_force_inline int SkipString(const uint8_t *data, size_t &pos, +sonic_force_inline int SkipString(const uint8_t* data, size_t& pos, size_t len) { const static int kEscaped = 2; const static int kNormal = 1; @@ -119,11 +119,11 @@ sonic_force_inline int SkipString(const uint8_t *data, size_t &pos, // return true if container is closed. // the implementation is inspired from JSONSki // reference: https://dl.acm.org/doi/10.1145/3503222.3507719 -sonic_force_inline bool SkipContainer(const uint8_t *data, size_t &pos, +sonic_force_inline bool SkipContainer(const uint8_t* data, size_t& pos, size_t len, uint8_t left, uint8_t right) { uint64_t prev_instring = 0, prev_escaped = 0, instring; int rbrace_num = 0, lbrace_num = 0, last_lbrace_num; - const uint8_t *p; + const uint8_t* p; while (pos + 64 <= len) { p = data + pos; #define SKIP_LOOP() \ @@ -159,9 +159,9 @@ sonic_force_inline bool SkipContainer(const uint8_t *data, size_t &pos, } // TODO: optimize by removing bound checking. -sonic_force_inline uint8_t skip_space(const uint8_t *data, size_t &pos, - size_t &nonspace_bits_end, - uint64_t &nonspace_bits) { +sonic_force_inline uint8_t skip_space(const uint8_t* data, size_t& pos, + size_t& nonspace_bits_end, + uint64_t& nonspace_bits) { // fast path for single space if (!IsSpace(data[pos++])) return data[pos - 1]; if (!IsSpace(data[pos++])) return data[pos - 1]; @@ -199,10 +199,11 @@ sonic_force_inline uint8_t skip_space(const uint8_t *data, size_t &pos, return data[pos++]; } -sonic_force_inline uint8_t skip_space_safe(const uint8_t *data, size_t &pos, +sonic_force_inline uint8_t skip_space_safe(const uint8_t* data, size_t& pos, size_t len, - size_t &nonspace_bits_end, - uint64_t &nonspace_bits) { + size_t& nonspace_bits_end, + uint64_t& nonspace_bits) { + if (sonic_unlikely(pos >= len)) return 0; if (pos + 64 + 2 > len) { goto tail; } @@ -247,6 +248,7 @@ sonic_force_inline uint8_t skip_space_safe(const uint8_t *data, size_t &pos, tail: while (pos < len && IsSpace(data[pos++])) ; + if (sonic_unlikely(pos == 0)) return 0; // if not found, still return the space chars return data[pos - 1]; } diff --git a/include/sonic/internal/arch/simd_skip.h b/include/sonic/internal/arch/simd_skip.h index 1f4ec258..3b0e5cfd 100644 --- a/include/sonic/internal/arch/simd_skip.h +++ b/include/sonic/internal/arch/simd_skip.h @@ -16,20 +16,27 @@ #pragma once +#include +#include #include +#include +#include #include +#include +#include #include #include #include "simd_dispatch.h" #include "sonic/dom/flags.h" #include "sonic/error.h" +#include "sonic/internal/arch/simd_quote.h" +#include "sonic/internal/stack.h" #include "sonic/jsonpath/jsonpath.h" +#include "sonic/writebuffer.h" #include INCLUDE_ARCH_FILE(skip.h) -#include - namespace sonic_json { namespace internal { @@ -49,90 +56,187 @@ SONIC_USING_ARCH_FUNC(skip_space_safe); } \ } while (0) -static bool SkipArray(const uint8_t *data, size_t &pos, size_t len) { - return SkipContainer(data, pos, len, '[', ']'); +static bool IsDigit(uint8_t c) { return c >= '0' && c <= '9'; } + +static bool IsNonZeroDigit(uint8_t c) { return c >= '1' && c <= '9'; } + +static bool IsAllowedDelimiter(uint8_t c, const char* delimiters) { + for (const char* p = delimiters; *p != '\0'; ++p) { + if (c == static_cast(*p)) return true; + } + return false; } -static bool SkipObject(const uint8_t *data, size_t &pos, size_t len) { - return SkipContainer(data, pos, len, '{', '}'); +static bool SkipTrailingValueSpace(const uint8_t* data, size_t& pos, size_t len, + const char* delimiters = ",]}") { + while (pos < len && IsSpace(data[pos])) { + ++pos; + } + return pos == len || IsAllowedDelimiter(data[pos], delimiters); +} + +template +static bool ValidateSkippedNumber(const uint8_t* data, size_t start, size_t end, + Stack& scratch, SonicError& err) { + if constexpr (parseFlags & ParseFlags::kParseOverflowNumAsNumStr) { + return true; + } + const bool floating = + std::memchr(data + start, '.', end - start) != nullptr || + std::memchr(data + start, 'e', end - start) != nullptr || + std::memchr(data + start, 'E', end - start) != nullptr; + if (!floating) { + return true; + } + size_t n = end - start; + if (sonic_unlikely(n > std::numeric_limits::max() - 1)) { + err = kErrorNoMem; + return false; + } + scratch.Clear(); + char* buf = scratch.PushSize(n + 1); + if (sonic_unlikely(buf == nullptr)) { + err = kErrorNoMem; + return false; + } + std::memcpy(buf, data + start, n); + buf[n] = '\0'; + errno = 0; + char* endptr = nullptr; + double value = std::strtod(buf, &endptr); + (void)value; + if (endptr != buf + n) { + err = kParseErrorInvalidChar; + return false; + } + if (!std::isfinite(value)) { + err = kParseErrorInfinity; + return false; + } + return true; } -static uint8_t SkipNumber(const uint8_t *data, size_t &pos, size_t len) { - return GetNextToken(data, pos, len, "]},"); +template +static bool SkipNumberStrict(const uint8_t* data, size_t& pos, size_t len, + const char* delimiters, Stack& scratch, + SonicError& err) { + size_t i = pos - 1; + size_t start = i; + if (data[i] == '-') { + ++i; + if (i >= len) return false; + } + + if (data[i] == '0') { + ++i; + if (i < len && IsDigit(data[i])) return false; + } else if (IsNonZeroDigit(data[i])) { + do { + ++i; + } while (i < len && IsDigit(data[i])); + } else { + return false; + } + + if (i < len && data[i] == '.') { + ++i; + if (i >= len || !IsDigit(data[i])) return false; + do { + ++i; + } while (i < len && IsDigit(data[i])); + } + + if (i < len && (data[i] == 'e' || data[i] == 'E')) { + ++i; + if (i < len && (data[i] == '+' || data[i] == '-')) ++i; + if (i >= len || !IsDigit(data[i])) return false; + do { + ++i; + } while (i < len && IsDigit(data[i])); + } + + pos = i; + if (!ValidateSkippedNumber(data, start, pos, scratch, err)) { + return false; + } + return SkipTrailingValueSpace(data, pos, len, delimiters); } // SkipScanner is used to skip space and json values in json text. class SkipScanner { public: - sonic_force_inline uint8_t SkipSpace(const uint8_t *data, size_t &pos) { + sonic_force_inline uint8_t SkipSpace(const uint8_t* data, size_t& pos) { return skip_space(data, pos, nonspace_bits_end_, nonspace_bits_); } - sonic_force_inline uint8_t SkipSpaceSafe(const uint8_t *data, size_t &pos, + sonic_force_inline uint8_t SkipSpaceSafe(const uint8_t* data, size_t& pos, size_t len) { return skip_space_safe(data, pos, len, nonspace_bits_end_, nonspace_bits_); } - sonic_force_inline SonicError GetArrayElem(const uint8_t *data, size_t &pos, - size_t len, int index) { + template + sonic_force_inline SonicError GetArrayElem(const uint8_t* data, size_t& pos, + size_t len, uint64_t index) { + char c = SkipSpaceSafe(data, pos, len); + if (c == ']') { + return kParseErrorArrIndexOutOfRange; + } + pos--; while (index > 0 && pos < len) { index--; - char c = SkipSpaceSafe(data, pos, len); - switch (c) { - case '{': { - if (!SkipObject(data, pos, len)) { - return kParseErrorInvalidChar; - } - break; - } - case '[': { - if (!SkipArray(data, pos, len)) { - return kParseErrorInvalidChar; - } - break; - } - case '"': { - if (!SkipString(data, pos, len)) { - return kParseErrorInvalidChar; - } - break; - } + long start = SkipOneOnDemand(data, pos, len, ",]"); + if (start < 0) { + return SonicError(-start); } - // skip space and primitives - // TODO (liuq): fast path for compat json. - if (GetNextToken(data, pos, len, ",]") != ',') { + c = SkipSpaceSafe(data, pos, len); + if (c == ']') { + pos--; return kParseErrorArrIndexOutOfRange; } - pos++; + if (c != ',') return kParseErrorInvalidChar; } return index == 0 ? kErrorNone : kParseErrorInvalidChar; } // SkipOne skip one raw json value and return the start of value, return the // negative if errors. - sonic_force_inline long SkipOne(const uint8_t *data, size_t &pos, - size_t len) { + template + inline long SkipOne(const uint8_t* data, size_t& pos, size_t len, + const char* delimiters = ",]}") { + if (sonic_unlikely(pos >= len)) return -kParseErrorInvalidChar; uint8_t c = SkipSpaceSafe(data, pos, len); size_t start = pos - 1; - long err = -kParseErrorInvalidChar; + SonicError err = kParseErrorInvalidChar; switch (c) { case '"': { - if (!SkipString(data, pos, len)) return err; + if (!SkipStringStrict(data, pos, len, scratch_, err)) { + return -err; + } break; } case '{': { - if (!SkipObject(data, pos, len)) return err; + if (sonic_unlikely(depth_ >= kMaxSkipDepth)) + return -kParseErrorInvalidChar; + ++depth_; + bool ok = SkipObjectStrict(data, pos, len, err); + --depth_; + if (!ok) return -err; break; } case '[': { - if (!SkipArray(data, pos, len)) return err; + if (sonic_unlikely(depth_ >= kMaxSkipDepth)) + return -kParseErrorInvalidChar; + ++depth_; + bool ok = SkipArrayStrict(data, pos, len, err); + --depth_; + if (!ok) return -err; break; } case 't': case 'n': case 'f': { - if (!SkipLiteral(data, pos, len, c)) return err; + if (!SkipLiteral(data, pos, len, c)) return -err; break; } case '0': @@ -146,18 +250,91 @@ class SkipScanner { case '8': case '9': case '-': { - SkipNumber(data, pos, len); - break; + if (!SkipNumberStrict(data, pos, len, delimiters, scratch_, + err)) { + return -err; + } + return start; } default: - return err; + return -err; } + if (!SkipTrailingValueSpace(data, pos, len, delimiters)) return -err; return start; } - sonic_force_inline bool matchKey(const uint8_t *data, size_t &pos, size_t len, - StringView key, std::vector &kbuf, - SonicError &err) { + template + inline long SkipOneOnDemand(const uint8_t* data, size_t& pos, size_t len, + const char* delimiters = ",]}") { + if constexpr (parseFlags & ParseFlags::kParseValidateOnDemandFull) { + return SkipOne(data, pos, len, delimiters); + } else { + return SkipOneFast(data, pos, len, delimiters); + } + } + + template + sonic_force_inline ParseResult ValidateJson(StringView json) { + size_t pos = 0; + const uint8_t* data = reinterpret_cast(json.data()); + long start = SkipOne(data, pos, json.size(), ""); + if (start < 0) return ParseResult(SonicError(-start), pos); + if (pos != json.size()) return ParseResult(kParseErrorInvalidChar, pos); + return ParseResult(kErrorNone, pos); + } + + template + sonic_force_inline bool SkipStringStrict(const uint8_t* data, size_t& pos, + size_t len, Stack& scratch, + SonicError& err) { + auto start = data + pos; + auto status = SkipString(data, pos, len); + if (!status) { + err = SonicError::kParseErrorInvalidChar; + return false; + } + auto slen = data + pos - 1 - start; + if (status == 1) { + if constexpr (!(parseFlags & + ParseFlags::kParseAllowUnescapedControlChars)) { + for (const uint8_t* p = start; p < data + pos - 1; ++p) { + if (*p < 0x20) { + err = kParseErrorUnEscaped; + pos = p - data; + return false; + } + } + } + return true; + } + scratch.Clear(); + size_t scratch_len = static_cast(slen); + if (sonic_unlikely(scratch_len > std::numeric_limits::max() - 32)) { + err = kErrorNoMem; + return false; + } + uint8_t* nsrc = + reinterpret_cast(scratch.PushSize(scratch_len + 32)); + if (sonic_unlikely(nsrc == nullptr)) { + err = kErrorNoMem; + return false; + } + uint8_t* nsrc_begin = nsrc; + std::memcpy(nsrc, start, slen + 1); + SonicError parse_err = kErrorNone; + (void)parseStringInplace(nsrc, parse_err); + if (parse_err) { + err = parse_err; + pos = (start - data) + (nsrc - nsrc_begin); + return false; + } + return true; + } + + template + sonic_force_inline bool matchKey(const uint8_t* data, size_t& pos, size_t len, + StringView key, Stack& kbuf, + SonicError& err) { auto start = data + pos; auto status = SkipString(data, pos, len); // has errors @@ -170,27 +347,50 @@ class SkipScanner { // has escaped char if (status == 2) { // parse escaped key - kbuf.resize(slen + 32); - uint8_t *nsrc = &kbuf[0]; + kbuf.Clear(); + size_t scratch_len = static_cast(slen); + if (sonic_unlikely(scratch_len > + std::numeric_limits::max() - 32)) { + err = kErrorNoMem; + return false; + } + uint8_t* nsrc = + reinterpret_cast(kbuf.PushSize(scratch_len + 32)); + if (sonic_unlikely(nsrc == nullptr)) { + err = kErrorNoMem; + return false; + } + uint8_t* nsrc_begin = nsrc; // parseStringInplace need `"` as the end std::memcpy(nsrc, start, slen + 1); - slen = parseStringInplace(nsrc, err); - if (err) { - pos = (start - data) + (nsrc - &kbuf[0]); + SonicError parse_err = kErrorNone; + slen = parseStringInplace(nsrc, parse_err); + if (parse_err) { + err = parse_err; + pos = (start - data) + (nsrc - nsrc_begin); return false; } - start = &kbuf[0]; + start = nsrc_begin; + } else if constexpr (!(parseFlags & + ParseFlags::kParseAllowUnescapedControlChars)) { + for (const uint8_t* p = start; p < data + pos - 1; ++p) { + if (*p < 0x20) { + err = kParseErrorUnEscaped; + pos = p - data; + return false; + } + } } - // compare the key return slen == static_cast(key.size()) && std::memcmp(start, key.data(), slen) == 0; } - sonic_force_inline int matchKeys(const uint8_t *data, size_t &pos, size_t len, - const std::vector &keys, - std::vector &kbuf, - SonicError &err) { + + template + sonic_force_inline int matchKeys(const uint8_t* data, size_t& pos, size_t len, + const std::vector& keys, + Stack& kbuf, SonicError& err) { auto start = data + pos; auto status = SkipString(data, pos, len); // has errors @@ -203,21 +403,44 @@ class SkipScanner { // has escaped char if (status == 2) { // parse escaped key - kbuf.resize(slen + 32); - uint8_t *nsrc = &kbuf[0]; + kbuf.Clear(); + size_t scratch_len = static_cast(slen); + if (sonic_unlikely(scratch_len > + std::numeric_limits::max() - 32)) { + err = kErrorNoMem; + return -1; + } + uint8_t* nsrc = + reinterpret_cast(kbuf.PushSize(scratch_len + 32)); + if (sonic_unlikely(nsrc == nullptr)) { + err = kErrorNoMem; + return -1; + } + uint8_t* nsrc_begin = nsrc; // parseStringInplace need `"` as the end std::memcpy(nsrc, start, slen + 1); - slen = parseStringInplace(nsrc, err); - if (err) { - pos = (start - data) + (nsrc - &kbuf[0]); + SonicError parse_err = kErrorNone; + slen = parseStringInplace(nsrc, parse_err); + if (parse_err) { + err = parse_err; + pos = (start - data) + (nsrc - nsrc_begin); return -1; } - start = &kbuf[0]; + start = nsrc_begin; + } else if constexpr (!(parseFlags & + ParseFlags::kParseAllowUnescapedControlChars)) { + for (const uint8_t* p = start; p < data + pos - 1; ++p) { + if (*p < 0x20) { + err = kParseErrorUnEscaped; + pos = p - data; + return -1; + } + } } for (size_t i = 0; i < keys.size(); i++) { - const auto &key = keys[i]; + const auto& key = keys[i]; if (slen == static_cast(key.size()) && std::memcmp(start, key.data(), slen) == 0) { return i; @@ -229,43 +452,67 @@ class SkipScanner { // GetOnDemand get the target json field through the path, and update the // position. - template - long GetOnDemand(StringView json, size_t &pos, - const GenericJsonPointer &path) { + template + long GetOnDemand(StringView json, size_t& pos, + const GenericJsonPointer& path) { using namespace sonic_json::internal; size_t i = 0; uint8_t c; StringView key; // TODO: use stack smallvector here. - std::vector kbuf(32); // key buffer for parsed keys - const uint8_t *data = reinterpret_cast(json.data()); + Stack kbuf(32); // key buffer for parsed keys + const uint8_t* data = reinterpret_cast(json.data()); size_t len = json.size(); SonicError err = kErrorNone; bool matched = false; + Stack path_context(path.size()); // closing token for matched path parents + if (sonic_unlikely(path_context.HadOom())) return -kErrorNoMem; query: if (i++ != path.size()) { c = SkipSpaceSafe(data, pos, len); if (path[i - 1].IsStr()) { if (c != '{') goto err_mismatch_type; - c = GetNextToken(data, pos, len, "\"}"); - if (c != '"') goto err_unknown_key; + c = SkipSpaceSafe(data, pos, len); + if (c == '}') { + pos--; + goto err_unknown_key; + } + if (c != '"') return -kParseErrorInvalidChar; + pos--; key = StringView(path[i - 1].GetStr()); goto obj_key; } else { if (c != '[') goto err_mismatch_type; - err = GetArrayElem(data, pos, len, path[i - 1].GetNum()); + if (sonic_unlikely(!path[i - 1].IsValidNum())) { + return -kParseErrorArrIndexOutOfRange; + } + err = GetArrayElem(data, pos, len, path[i - 1].GetNum()); if (err) return -err; + if (sonic_unlikely(!path_context.Push(']'))) { + return -kErrorNoMem; + } goto query; } } - return SkipOne(data, pos, len); + { + long start = SkipOneOnDemand(data, pos, len, + valueDelimiters(path_context)); + if (start < 0) return start; + size_t err_pos = pos; + err = validateMatchedPathSuffix(data, pos, len, path_context, err_pos); + if (err) { + pos = err_pos; + return -err; + } + return start; + } obj_key: // advance quote pos++; - matched = matchKey(data, pos, len, key, kbuf, err); + matched = matchKey(data, pos, len, key, kbuf, err); if (err != kErrorNone) { return -err; } @@ -277,34 +524,21 @@ class SkipScanner { // match key and skip parsing unneeded fields if (matched) { + if (sonic_unlikely(!path_context.Push('}'))) { + return -kErrorNoMem; + } goto query; } else { + long start = SkipOneOnDemand(data, pos, len, ",}"); + if (start < 0) return start; c = SkipSpaceSafe(data, pos, len); - switch (c) { - case '{': { - if (!SkipObject(data, pos, len)) { - goto err_invalid_char; - } - break; - } - case '[': { - if (!SkipArray(data, pos, len)) { - goto err_invalid_char; - } - break; - } - case '"': { - if (!SkipString(data, pos, len)) { - goto err_invalid_char; - } - break; - } - } - // skip space and , find next " or } - c = GetNextToken(data, pos, len, "\"}"); - if (c != '"') { + if (c == '}') { goto err_unknown_key; } + if (c != ',') goto err_invalid_char; + c = SkipSpaceSafe(data, pos, len); + if (c != '"') goto err_invalid_char; + pos--; goto obj_key; } @@ -319,24 +553,205 @@ class SkipScanner { } private: + // Default OnDemand keeps short-circuit semantics: validate scalar values and + // value boundaries, but use SIMD container skipping instead of recursively + // validating every unvisited subtree. Full validation routes through SkipOne. + template + inline long SkipOneFast(const uint8_t* data, size_t& pos, size_t len, + const char* delimiters) { + if (sonic_unlikely(pos >= len)) return -kParseErrorInvalidChar; + uint8_t c = SkipSpaceSafe(data, pos, len); + size_t start = pos - 1; + SonicError err = kParseErrorInvalidChar; + + switch (c) { + case '"': { + if (!SkipStringStrict(data, pos, len, scratch_, err)) { + return -err; + } + break; + } + case '{': { + if (sonic_unlikely(StartsWithMismatchedClose(data, pos, len, ']'))) { + return -err; + } + if (!SkipContainer(data, pos, len, '{', '}')) return -err; + break; + } + case '[': { + if (sonic_unlikely(StartsWithMismatchedClose(data, pos, len, '}'))) { + return -err; + } + if (!SkipContainer(data, pos, len, '[', ']')) return -err; + break; + } + case 't': + case 'n': + case 'f': { + if (!SkipLiteral(data, pos, len, c)) return -err; + break; + } + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + case '-': { + if (!SkipNumberStrict(data, pos, len, delimiters, scratch_, + err)) { + return -err; + } + return start; + } + default: + return -err; + } + if (!SkipTrailingValueSpace(data, pos, len, delimiters)) return -err; + return start; + } + + static sonic_force_inline bool isPotentialJsonValueStart(uint8_t c) { + switch (c) { + case '"': + case '{': + case '[': + case 't': + case 'f': + case 'n': + case '-': + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + return true; + default: + return false; + } + } + + // SkipContainer tracks only the requested bracket type. Reject the obvious + // opposite-close case before entering that fast path. + sonic_force_inline bool StartsWithMismatchedClose(const uint8_t* data, + size_t pos, size_t len, + uint8_t close) { + uint8_t c = SkipSpaceSafe(data, pos, len); + return c == close; + } + + static sonic_force_inline const char* valueDelimiters( + const Stack& path_context) { + if (path_context.Empty()) return ""; + return *path_context.Top() == '}' ? ",}" : ",]"; + } + + sonic_force_inline SonicError + validateMatchedPathSuffix(const uint8_t* data, size_t value_end, size_t len, + const Stack& path_context, size_t& err_pos) { + size_t cursor = value_end; + for (const char* frame = path_context.End(); + frame != path_context.Begin();) { + const char closing = *--frame; + uint8_t c = SkipSpaceSafe(data, cursor, len); + if (c == static_cast(closing)) { + continue; + } + if (c == ',') { + c = SkipSpaceSafe(data, cursor, len); + if ((closing == '}' && c == '"') || + (closing == ']' && isPotentialJsonValueStart(c))) { + return kErrorNone; + } + } + err_pos = cursor == 0 ? 0 : cursor - 1; + return kParseErrorInvalidChar; + } + return kErrorNone; + } + + template + sonic_force_inline bool SkipObjectStrict(const uint8_t* data, size_t& pos, + size_t len, SonicError& err) { + uint8_t c = SkipSpaceSafe(data, pos, len); + if (c == '}') return true; + while (true) { + if (c != '"') return false; + if (!SkipStringStrict(data, pos, len, scratch_, err)) { + return false; + } + c = SkipSpaceSafe(data, pos, len); + if (c != ':') return false; + long start = SkipOne(data, pos, len, ",}"); + if (start < 0) { + err = SonicError(-start); + return false; + } + c = SkipSpaceSafe(data, pos, len); + if (c == '}') return true; + if (c != ',') return false; + c = SkipSpaceSafe(data, pos, len); + if (c == '}') return false; + } + } + + template + sonic_force_inline bool SkipArrayStrict(const uint8_t* data, size_t& pos, + size_t len, SonicError& err) { + uint8_t c = SkipSpaceSafe(data, pos, len); + if (c == ']') return true; + pos--; + while (true) { + long start = SkipOne(data, pos, len, ",]"); + if (start < 0) { + err = SonicError(-start); + return false; + } + c = SkipSpaceSafe(data, pos, len); + if (c == ']') return true; + if (c != ',') return false; + c = SkipSpaceSafe(data, pos, len); + if (c == ']') return false; + pos--; + } + } + size_t nonspace_bits_end_{0}; uint64_t nonspace_bits_{0}; + Stack scratch_{32}; + size_t depth_{0}; + static constexpr size_t kMaxSkipDepth = 1024; }; class SkipScanner2 { public: + static constexpr ParseFlags kJsonPathParseFlags = + ParseFlags::kParseAllowUnescapedControlChars | + ParseFlags::kParseIntegerAsRaw; + + template sonic_force_inline StringView getOne() { - long start = scanner_.SkipOne(data_, pos_, len_); + long start = scanner_.template SkipOne(data_, pos_, len_); if (start < 0) { setError(SonicError(-start)); return ""; } - return StringView(reinterpret_cast(data_) + start, + return StringView(reinterpret_cast(data_) + start, pos_ - start); } + template sonic_force_inline SonicError skipOne() { - long start = scanner_.SkipOne(data_, pos_, len_); + long start = scanner_.template SkipOne(data_, pos_, len_); if (start < 0) { setError(SonicError(-start)); return error_; @@ -367,6 +782,17 @@ class SkipScanner2 { return error_ != SonicError::kErrorNone; } + sonic_force_inline bool consumeOnlyTrailingSpaces() { + while (pos_ < len_ && IsSpace(data_[pos_])) { + ++pos_; + } + if (pos_ != len_) { + setError(kParseErrorInvalidChar); + return false; + } + return true; + } + sonic_force_inline void setIsFieldName() { this->isFieldName = true; } sonic_force_inline bool getAndClearIsFieldName() { auto ret = this->isFieldName; @@ -379,14 +805,32 @@ class SkipScanner2 { // - sonic_force_inline void skipIfPresent(const uint8_t c) { - if (sonic_unlikely(pos_ == len_)) { - setError(SonicError::kParseErrorEof); - return; + sonic_force_inline bool consumeValueSeparatorOrEnd(uint8_t end, + bool& has_next) { + uint8_t c = peek(); + if (sonic_unlikely(hasError())) { + return false; } - if (peek() == c) { - advance(); + if (c == end) { + has_next = false; + return true; + } + if (c != ',') { + setError(SonicError::kParseErrorInvalidChar); + return false; + } + + advance(); + c = peek(); + if (sonic_unlikely(hasError())) { + return false; + } + if (c == end) { + setError(SonicError::kParseErrorInvalidChar); + return false; } + has_next = true; + return true; } sonic_force_inline bool consume(uint8_t c) { @@ -413,17 +857,19 @@ class SkipScanner2 { // Precondition: calling advance takes input the " of the first fieldname // post condition: if found peek() returns first char of the found value // if not found, peek() returns } + template sonic_force_inline bool advanceKey(StringView key) { - auto c = advance(); bool matched = false; - while (c != '}') { + while (peek() != '}') { + auto c = advance(); if (c != '"') { setError(SonicError::kParseErrorInvalidChar); return false; } // match the key - matched = scanner_.matchKey(data_, pos_, len_, key, kbuf_, error_); + matched = scanner_.template matchKey(data_, pos_, len_, key, + kbuf_, error_); if (error_ != SonicError::kErrorNone) { return false; } @@ -437,24 +883,15 @@ class SkipScanner2 { break; } - RETURN_FALSE_IF_PARSE_ERROR(skipOne()); + RETURN_FALSE_IF_PARSE_ERROR(skipOne()); - // get the next key - c = advance(); - if (c == ',') { - c = advance(); - } else if (c != '}') { - setError(SonicError::kParseErrorInvalidChar); + bool has_next = false; + if (!consumeValueSeparatorOrEnd('}', has_next)) { + return false; + } + if (!has_next) { + break; } - } - - // When no key matches, the loop above would consume all members *and* the - // closing '}'. However, getJsonPath() (including the Java/Spark-compatible - // template variant with `serializeFlags = kSerializeJavaStyleFlag`) expects - // '}' to be left unconsumed and handled by the caller that processes the - // object. - if (!matched && c == '}') { - pos_--; } return matched; } @@ -462,17 +899,19 @@ class SkipScanner2 { // Precondition: calling advance takes input the " of the first fieldname // post condition: if found peek() returns first char of the found value // if not found, peek() returns } - sonic_force_inline int advanceKeys(const std::vector &keys) { - auto c = advance(); + template + sonic_force_inline int advanceKeys(const std::vector& keys) { int matched = -1; - while (c != '}') { + while (peek() != '}') { + auto c = advance(); if (c != '"') { setError(SonicError::kParseErrorInvalidChar); return -1; } // match the key - matched = scanner_.matchKeys(data_, pos_, len_, keys, kbuf_, error_); + matched = scanner_.template matchKeys(data_, pos_, len_, keys, + kbuf_, error_); if (error_ != SonicError::kErrorNone) { return -1; } @@ -486,41 +925,33 @@ class SkipScanner2 { break; } - RETURN_FALSE_IF_PARSE_ERROR(skipOne()); + RETURN_FALSE_IF_PARSE_ERROR(skipOne()); - // get the next key - c = advance(); - if (c == ',') { - c = advance(); - } else if (c != '}') { - setError(SonicError::kParseErrorInvalidChar); + bool has_next = false; + if (!consumeValueSeparatorOrEnd('}', has_next)) { + return -1; + } + if (!has_next) { + break; } - } - - // When no key matches, the loop above would consume all members *and* the - // closing '}'. However, getJsonPath() (including the Java/Spark-compatible - // template variant with `serializeFlags = kSerializeJavaStyleFlag`) expects - // '}' to be left unconsumed and handled by the caller that processes the - // object. - if (matched == -1 && c == '}') { - pos_--; } return matched; } - sonic_force_inline SonicError traverseObject(const JsonPath &path, + sonic_force_inline SonicError traverseObject(const JsonPath& path, size_t index, - std::vector &res) { - auto c = advance(); - while (c != '}') { + std::vector& res) { + while (peek() != '}') { + auto c = advance(); if (c != '"') { setError(SonicError::kParseErrorInvalidChar); return error_; } // skip the key - if (!SkipString(data_, pos_, len_)) { - setError(SonicError::kParseErrorInvalidChar); + if (!scanner_.template SkipStringStrict( + data_, pos_, len_, kbuf_, error_)) { + if (!hasError()) setError(SonicError::kParseErrorInvalidChar); return error_; } @@ -533,100 +964,86 @@ class SkipScanner2 { return error_; } - // get the next key - c = advance(); - if (c == ',') { - c = advance(); - } else if (c != '}') { - setError(SonicError::kParseErrorInvalidChar); + bool has_next = false; + if (!consumeValueSeparatorOrEnd('}', has_next)) { return error_; } + if (!has_next) break; } return kErrorNone; } - sonic_force_inline SonicError traverseArray(const JsonPath &path, + sonic_force_inline SonicError traverseArray(const JsonPath& path, size_t index, - std::vector &res) { - auto c = advance(); - pos_--; - while (c != ']') { + std::vector& res) { + while (peek() != ']') { // recursively parse the value if (getJsonPath(path, index + 1, res, true) != SonicError::kErrorNone) { return error_; } - // get the next elem - c = advance(); - if (c == ',') { - continue; - } else if (c != ']') { - setError(SonicError::kParseErrorInvalidChar); + bool has_next = false; + if (!consumeValueSeparatorOrEnd(']', has_next)) { return error_; } + if (!has_next) break; } return kErrorNone; } sonic_force_inline bool advanceIndex(size_t index) /* found */ { - auto c = advance(); - if (c == ']') { + if (peek() == ']') { return false; } - pos_--; // backwared for skip the first elem - while (c != ']' && index > 0) { - if (skipOne() != SonicError::kErrorNone) { + while (index > 0) { + if (skipOne() != SonicError::kErrorNone) { return false; } - // get the next key - c = advance(); - if (c == ',') { - index--; - } else if (c != ']') { - setError(SonicError::kParseErrorInvalidChar); + bool has_next = false; + if (!consumeValueSeparatorOrEnd(']', has_next)) { return false; } + if (!has_next) return false; + --index; } - return (index == 0); + return true; } sonic_force_inline SonicError skipArrayRemain() { - auto c = advance(); - while (c != ']') { - if (c != ',') { - setError(SonicError::kParseErrorInvalidChar); + bool has_next = false; + if (!consumeValueSeparatorOrEnd(']', has_next)) { + return error_; + } + while (has_next) { + if (skipOne() != SonicError::kErrorNone) { return error_; } - - if (skipOne() != SonicError::kErrorNone) { + if (!consumeValueSeparatorOrEnd(']', has_next)) { return error_; } - - c = advance(); } return kErrorNone; } sonic_force_inline SonicError skipObjectRemain() { - auto c = advance(); - while (c != '}') { - if (c != ',') { - setError(SonicError::kParseErrorInvalidChar); - return error_; - } - - c = advance(); + bool has_next = false; + if (!consumeValueSeparatorOrEnd('}', has_next)) { + return error_; + } + while (has_next) { + auto c = advance(); if (c != '"') { setError(SonicError::kParseErrorInvalidChar); return error_; } // skip the key - if (!SkipString(data_, pos_, len_)) { - setError(SonicError::kParseErrorInvalidChar); + if (!scanner_.template SkipStringStrict( + data_, pos_, len_, kbuf_, error_)) { + if (!hasError()) setError(SonicError::kParseErrorInvalidChar); return error_; } @@ -634,12 +1051,13 @@ class SkipScanner2 { return error_; } - if (skipOne() != SonicError::kErrorNone) { + if (skipOne() != SonicError::kErrorNone) { return error_; } - // get the next key - c = advance(); + if (!consumeValueSeparatorOrEnd('}', has_next)) { + return error_; + } } return kErrorNone; } @@ -655,7 +1073,7 @@ class SkipScanner2 { virtual bool copyCurrentStructureSingleResult(StringView sv) = 0; virtual bool copyCurrentStructureJsonTupleCodeGen( StringView raw, size_t index, - std::vector> &result, + std::vector>& result, JsonValueType type) = 0; virtual bool writeRawValue(StringView sv) = 0; virtual bool writeStartArray() = 0; @@ -663,19 +1081,28 @@ class SkipScanner2 { virtual bool writeComma() = 0; virtual bool isEmpty() = 0; virtual bool isBeginArray() = 0; + virtual SonicError getError() const { return kParseErrorUnexpect; } virtual ~JsonGeneratorInterface() {} }; template using JsonGeneratorFactory = std::function>( - WriteBuffer &)>; + WriteBuffer&)>; + + template + inline bool setJsonGeneratorError( + JsonGeneratorInterface* jsonGenerator) { + SonicError err = jsonGenerator->getError(); + setError(err == kErrorNone ? kParseErrorUnexpect : err); + return false; + } template inline bool getJsonPathArrayIndex( - const JsonPath &path, size_t index, - JsonGeneratorInterface *jsonGenerator, - const JsonGeneratorFactory &jsonGeneratorFactory, + const JsonPath& path, size_t index, + JsonGeneratorInterface* jsonGenerator, + const JsonGeneratorFactory& jsonGeneratorFactory, const int64_t idx) { RETURN_FALSE_IF_PARSE_ERROR(consume('[')); int64_t cur_idx = 0; @@ -684,18 +1111,30 @@ class SkipScanner2 { if (cur_idx == idx) { dirty = getJsonPath( path, index + 1, jsonGenerator, jsonGeneratorFactory); - while (peek() != ']') { - RETURN_FALSE_IF_PARSE_ERROR(skipIfPresent(',')); - if (peek() == ']') { - break; + if (error_ != kErrorNone) { + return false; + } + bool has_next = false; + if (!consumeValueSeparatorOrEnd(']', has_next)) { + return false; + } + while (has_next) { + RETURN_FALSE_IF_PARSE_ERROR(skipOne()); + if (!consumeValueSeparatorOrEnd(']', has_next)) { + return false; } - RETURN_FALSE_IF_PARSE_ERROR(skipOne()); } break; } else { - RETURN_FALSE_IF_PARSE_ERROR(skipOne()); + RETURN_FALSE_IF_PARSE_ERROR(skipOne()); + } + bool has_next = false; + if (!consumeValueSeparatorOrEnd(']', has_next)) { + return false; + } + if (!has_next) { + break; } - RETURN_FALSE_IF_PARSE_ERROR(skipIfPresent(',')); cur_idx++; } RETURN_FALSE_IF_PARSE_ERROR(consume(']')); @@ -703,24 +1142,31 @@ class SkipScanner2 { } template inline bool jsonTupleWithCodeGenImpl( - const std::vector &keys, - JsonGeneratorInterface *jsonGenerator, - std::vector> &result) { + const std::vector& keys, + JsonGeneratorInterface* jsonGenerator, + std::vector>& result) { RETURN_FALSE_IF_PARSE_ERROR(consume('{')); - int todo = keys.size(); + std::vector seen(keys.size(), 0); - while (peek() != '}' && todo > 0) { - int keyMatchIndex = advanceKeys(keys); + while (peek() != '}') { + int keyMatchIndex = advanceKeys(keys); + if (error_ != kErrorNone) { + return false; + } if (keyMatchIndex != -1) { - todo--; - JsonValueType type = - peek() == '"' ? JsonValueType::STRING : JsonValueType::OTHER; - if (peek() == 'n') { + size_t key_index = static_cast(keyMatchIndex); + if (seen[key_index]) { + RETURN_FALSE_IF_PARSE_ERROR(skipOne()); + } else if (peek() == 'n') { // do not do anything for null - RETURN_FALSE_IF_PARSE_ERROR(skipOne()); + seen[key_index] = 1; + RETURN_FALSE_IF_PARSE_ERROR(skipOne()); } else { - const auto sv = getOne(); + seen[key_index] = 1; + JsonValueType type = + peek() == '"' ? JsonValueType::STRING : JsonValueType::OTHER; + const auto sv = getOne(); if (error_ != kErrorNone) { return false; } @@ -728,22 +1174,31 @@ class SkipScanner2 { jsonGenerator->copyCurrentStructureJsonTupleCodeGen( sv, keyMatchIndex, result, type); if (!copy_success) { - error_ = kParseErrorUnexpect; - return false; + return setJsonGeneratorError(jsonGenerator); } } } - RETURN_FALSE_IF_PARSE_ERROR(skipIfPresent(',')); + bool has_next = false; + if (!consumeValueSeparatorOrEnd('}', has_next)) { + return false; + } + if (!has_next) { + break; + } } + RETURN_FALSE_IF_PARSE_ERROR(consume('}')); return true; } template inline std::vector> jsonTupleWithCodeGen( - const std::vector &keys, - JsonGeneratorInterface *jsonGenerator, bool legacy) { + const std::vector& keys, + JsonGeneratorInterface* jsonGenerator, bool legacy) { std::vector> result(keys.size(), std::nullopt); - const auto success = jsonTupleWithCodeGenImpl(keys, jsonGenerator, result); + bool success = jsonTupleWithCodeGenImpl(keys, jsonGenerator, result); + if (success) { + success = consumeOnlyTrailingSpaces(); + } if (!success && !legacy) { std::vector> all_nulls(keys.size(), @@ -756,9 +1211,9 @@ class SkipScanner2 { template inline bool getJsonPath( - const JsonPath &path, size_t index, - JsonGeneratorInterface *jsonGenerator, - const JsonGeneratorFactory &jsonGeneratorFactory) { + const JsonPath& path, size_t index, + JsonGeneratorInterface* jsonGenerator, + const JsonGeneratorFactory& jsonGeneratorFactory) { const bool path_is_nil = index >= path.size(); const auto c = peek(); const bool is_field_name = getAndClearIsFieldName(); @@ -766,29 +1221,29 @@ class SkipScanner2 { const bool field_name = c == '"' && is_field_name; if (is_field_name && !value_string && !field_name) { - setError(kParseErrorUnexpect); + setError(kParseErrorInvalidChar); return false; } - // superhack to guarantee advancement - if (c == 'n' && !path_is_nil) { - // null cannot evaluate - RETURN_FALSE_IF_PARSE_ERROR(skipOne()); + // Primitive values cannot satisfy a non-root path, but they still need to + // be consumed so callers can distinguish "no match" from malformed suffix. + if (!path_is_nil && (c == 'n' || c == 't' || c == 'f' || c == '-' || + (c >= '0' && c <= '9'))) { + RETURN_FALSE_IF_PARSE_ERROR(skipOne()); return false; } if (value_string && !path_is_nil) { - RETURN_FALSE_IF_PARSE_ERROR(skipOne()); + RETURN_FALSE_IF_PARSE_ERROR(skipOne()); return false; } if (value_string && path_is_nil) { if constexpr (style == RAW) { - const auto sv = getOne(); + const auto sv = getOne(); if (error_ != kErrorNone) { return false; } if (!jsonGenerator->writeRaw(sv)) { - setError(kParseErrorUnexpect); - return false; + return setJsonGeneratorError(jsonGenerator); } return true; } @@ -802,7 +1257,16 @@ class SkipScanner2 { while (peek() != ']') { dirty |= getJsonPath( path, index + 1, jsonGenerator, jsonGeneratorFactory); - RETURN_FALSE_IF_PARSE_ERROR(skipIfPresent(',')); + if (error_ != kErrorNone) { + return false; + } + bool has_next = false; + if (!consumeValueSeparatorOrEnd(']', has_next)) { + return false; + } + if (!has_next) { + break; + } } RETURN_FALSE_IF_PARSE_ERROR(consume(']')); return dirty; @@ -811,17 +1275,18 @@ class SkipScanner2 { if (path_is_nil) { if (!jsonGenerator->isBeginArray() && !jsonGenerator->isEmpty()) { - jsonGenerator->writeComma(); + if (!jsonGenerator->writeComma()) { + return setJsonGeneratorError(jsonGenerator); + } } - const auto sv = getOne(); + const auto sv = getOne(); if (error_ != kErrorNone) { return false; } const auto copy_success = jsonGenerator->copyCurrentStructure(sv); if (!copy_success) { - error_ = kParseErrorUnexpect; - return false; + return setJsonGeneratorError(jsonGenerator); } return true; @@ -834,18 +1299,38 @@ class SkipScanner2 { if (dirty) { // Skip children while (peek() != '}') { - RETURN_FALSE_IF_PARSE_ERROR(skipOne()); + uint8_t key = advance(); + if (key != '"' || + !scanner_.template SkipStringStrict( + data_, pos_, len_, kbuf_, error_)) { + if (!hasError()) setError(SonicError::kParseErrorInvalidChar); + return false; + } RETURN_FALSE_IF_PARSE_ERROR(consume(':')); - RETURN_FALSE_IF_PARSE_ERROR(skipOne()); - RETURN_FALSE_IF_PARSE_ERROR(skipIfPresent(',')); + RETURN_FALSE_IF_PARSE_ERROR(skipOne()); + bool has_next = false; + if (!consumeValueSeparatorOrEnd('}', has_next)) { + return false; + } + if (!has_next) { + break; + } } } else { // The next "string_value" is a key setIsFieldName(); dirty = getJsonPath(path, index, jsonGenerator, jsonGeneratorFactory); - - RETURN_FALSE_IF_PARSE_ERROR(skipIfPresent(',')); + if (error_ != kErrorNone) { + return false; + } + bool has_next = false; + if (!consumeValueSeparatorOrEnd('}', has_next)) { + return false; + } + if (!has_next) { + break; + } } } RETURN_FALSE_IF_PARSE_ERROR(consume('}')); @@ -857,16 +1342,31 @@ class SkipScanner2 { RETURN_FALSE_IF_PARSE_ERROR(consume('[')); bool dirty = false; if (!jsonGenerator->isBeginArray() && !jsonGenerator->isEmpty()) { - jsonGenerator->writeComma(); + if (!jsonGenerator->writeComma()) { + return setJsonGeneratorError(jsonGenerator); + } + } + if (!jsonGenerator->writeStartArray()) { + return setJsonGeneratorError(jsonGenerator); } - jsonGenerator->writeStartArray(); while (peek() != ']') { const auto index_plus_two = index + 2; dirty |= getJsonPath( path, index_plus_two, jsonGenerator, jsonGeneratorFactory); - RETURN_FALSE_IF_PARSE_ERROR(skipIfPresent(',')); + if (error_ != kErrorNone) { + return false; + } + bool has_next = false; + if (!consumeValueSeparatorOrEnd(']', has_next)) { + return false; + } + if (!has_next) { + break; + } + } + if (!jsonGenerator->writeEndArray()) { + return setJsonGeneratorError(jsonGenerator); } - jsonGenerator->writeEndArray(); RETURN_FALSE_IF_PARSE_ERROR(consume(']')); return dirty; } @@ -890,29 +1390,45 @@ class SkipScanner2 { // getJsonPath() must consume at least one value on success/failure. // If not, skip the current JSON value to avoid infinite loop and // prevent desync by blindly advancing one byte. - RETURN_FALSE_IF_PARSE_ERROR(skipOne()); + RETURN_FALSE_IF_PARSE_ERROR(skipOne()); + } + if (error_ != kErrorNone) { + return false; + } + bool has_next = false; + if (!consumeValueSeparatorOrEnd(']', has_next)) { + return false; + } + if (!has_next) { + break; } - RETURN_FALSE_IF_PARSE_ERROR(skipIfPresent(',')); } if (sonic_unlikely(pos_ == len_)) { - setError(SonicError::kParseErrorEof); + setError(SonicError::kParseErrorInvalidChar); return false; } RETURN_FALSE_IF_PARSE_ERROR(consume(']')); if (dirty > 1) { if (!jsonGenerator->isBeginArray() && !jsonGenerator->isEmpty()) { - jsonGenerator->writeComma(); + if (!jsonGenerator->writeComma()) { + return setJsonGeneratorError(jsonGenerator); + } + } + if (!jsonGenerator->writeStartArray()) { + return setJsonGeneratorError(jsonGenerator); } - jsonGenerator->writeStartArray(); // should always use explicit `Size`, because there maybe '\0' in the // wb - jsonGenerator->writeRawValue(wb.ToStringView()); - jsonGenerator->writeEndArray(); + if (!jsonGenerator->writeRawValue(wb.ToStringView())) { + return setJsonGeneratorError(jsonGenerator); + } + if (!jsonGenerator->writeEndArray()) { + return setJsonGeneratorError(jsonGenerator); + } } else if (dirty == 1) { if (!jsonGenerator->copyCurrentStructureSingleResult( wb.ToStringView())) { - setError(kParseErrorUnexpect); - return false; + return setJsonGeneratorError(jsonGenerator); } } @@ -923,19 +1439,34 @@ class SkipScanner2 { if (c == '[' && path[index].is_wildcard()) { bool dirty = false; if (!jsonGenerator->isBeginArray() && !jsonGenerator->isEmpty()) { - jsonGenerator->writeComma(); + if (!jsonGenerator->writeComma()) { + return setJsonGeneratorError(jsonGenerator); + } + } + if (!jsonGenerator->writeStartArray()) { + return setJsonGeneratorError(jsonGenerator); } - jsonGenerator->writeStartArray(); RETURN_FALSE_IF_PARSE_ERROR(consume('[')); while (peek() != ']') { const auto index_plus_one = index + 1; dirty |= getJsonPath( path, index_plus_one, jsonGenerator, jsonGeneratorFactory); - RETURN_FALSE_IF_PARSE_ERROR(skipIfPresent(',')); + if (error_ != kErrorNone) { + return false; + } + bool has_next = false; + if (!consumeValueSeparatorOrEnd(']', has_next)) { + return false; + } + if (!has_next) { + break; + } } RETURN_FALSE_IF_PARSE_ERROR(consume(']')); - jsonGenerator->writeEndArray(); + if (!jsonGenerator->writeEndArray()) { + return setJsonGeneratorError(jsonGenerator); + } return dirty; } @@ -953,7 +1484,7 @@ class SkipScanner2 { } if (field_name && path[index].is_key()) { - const bool found = advanceKey(path[index].key()); + const bool found = advanceKey(path[index].key()); if (error_ != kErrorNone) { return false; } @@ -965,7 +1496,7 @@ class SkipScanner2 { path, index + 1, jsonGenerator, jsonGeneratorFactory); } else { // skip null - RETURN_FALSE_IF_PARSE_ERROR(skipOne()); + RETURN_FALSE_IF_PARSE_ERROR(skipOne()); return false; } } @@ -973,7 +1504,7 @@ class SkipScanner2 { } if (field_name && path[index].is_wildcard()) { - RETURN_FALSE_IF_PARSE_ERROR(skipOne()); + RETURN_FALSE_IF_PARSE_ERROR(skipOne()); RETURN_FALSE_IF_PARSE_ERROR(consume(':')); return getJsonPath(path, index + 1, jsonGenerator, jsonGeneratorFactory); @@ -983,22 +1514,34 @@ class SkipScanner2 { if (c == '{') { RETURN_FALSE_IF_PARSE_ERROR(consume('{')); while (peek() != '}') { - // SkipString returns a status (0/1/2) but doesn't set error_. - // Don't use RETURN_FALSE_IF_PARSE_ERROR here. - if (!SkipString(data_, pos_, len_)) { - setError(SonicError::kParseErrorInvalidChar); + RETURN_FALSE_IF_PARSE_ERROR(consume('"')); + if (!scanner_.template SkipStringStrict( + data_, pos_, len_, kbuf_, error_)) { + if (!hasError()) setError(SonicError::kParseErrorInvalidChar); return false; } RETURN_FALSE_IF_PARSE_ERROR(consume(':')); - RETURN_FALSE_IF_PARSE_ERROR(skipOne()); - RETURN_FALSE_IF_PARSE_ERROR(skipIfPresent(',')); + RETURN_FALSE_IF_PARSE_ERROR(skipOne()); + bool has_next = false; + if (!consumeValueSeparatorOrEnd('}', has_next)) { + return false; + } + if (!has_next) { + break; + } } RETURN_FALSE_IF_PARSE_ERROR(consume('}')); } else if (c == '[') { RETURN_FALSE_IF_PARSE_ERROR(consume('[')); while (peek() != ']') { - RETURN_FALSE_IF_PARSE_ERROR(skipOne()); - RETURN_FALSE_IF_PARSE_ERROR(skipIfPresent(',')); + RETURN_FALSE_IF_PARSE_ERROR(skipOne()); + bool has_next = false; + if (!consumeValueSeparatorOrEnd(']', has_next)) { + return false; + } + if (!has_next) { + break; + } } RETURN_FALSE_IF_PARSE_ERROR(consume(']')); } @@ -1007,11 +1550,11 @@ class SkipScanner2 { } // SkipOne skip one raw json value and return the start of value, return the // negative if errors. - inline SonicError getJsonPath(const JsonPath &path, size_t index, - std::vector &res, + inline SonicError getJsonPath(const JsonPath& path, size_t index, + std::vector& res, bool complete = false) { if (index >= path.size()) { - res.push_back(getOne()); + res.push_back(getOne()); return error_; } @@ -1031,14 +1574,14 @@ class SkipScanner2 { if (c != '{') { if (complete) { pos_--; - skipOne(); + skipOne(); } else { setError(SonicError::kUnmatchedTypeInJsonPath); } return error_; } - bool found = advanceKey(path[index].key()); + bool found = advanceKey(path[index].key()); if (hasError()) { return error_; } @@ -1058,7 +1601,7 @@ class SkipScanner2 { if (c != '[') { if (complete) { pos_--; - skipOne(); + skipOne(); } else { setError(SonicError::kUnmatchedTypeInJsonPath); } @@ -1096,11 +1639,11 @@ class SkipScanner2 { public: SkipScanner scanner_; - const uint8_t *data_ = nullptr; + const uint8_t* data_ = nullptr; size_t pos_ = 0; size_t len_ = 0; SonicError error_ = SonicError::kErrorNone; - std::vector kbuf_ = {}; + Stack kbuf_{32}; bool isFieldName = false; }; } // namespace internal diff --git a/include/sonic/internal/stack.h b/include/sonic/internal/stack.h index fa483065..eddb656b 100644 --- a/include/sonic/internal/stack.h +++ b/include/sonic/internal/stack.h @@ -16,10 +16,14 @@ #pragma once +#include #include #include +#include +#include #include "sonic/allocator.h" +#include "sonic/error.h" #include "sonic/macro.h" namespace sonic_json { @@ -27,45 +31,62 @@ namespace internal { class Stack { public: - Stack(size_t cap = defaultCapcity()) : cap_(0) { + Stack(size_t cap = defaultCapcity()) noexcept : cap_(0) { buf_ = nullptr; top_ = nullptr; Reserve(cap); } Stack(const Stack&) = delete; - Stack(Stack&& rhs) : buf_(rhs.buf_), top_(rhs.top_), cap_(rhs.cap_) { + Stack(Stack&& rhs) noexcept + : buf_(rhs.buf_), top_(rhs.top_), cap_(rhs.cap_), error_(rhs.error_) { rhs.setZero(); } - ~Stack() { std::free(buf_); } + ~Stack() noexcept { std::free(buf_); } Stack& operator=(const Stack&) = delete; - Stack& operator=(Stack&& rhs) { + Stack& operator=(Stack&& rhs) noexcept { std::free(buf_); buf_ = rhs.buf_; top_ = rhs.top_; cap_ = rhs.cap_; + error_ = rhs.error_; rhs.setZero(); return *this; } - sonic_force_inline size_t Size() const { return top_ - buf_; } + sonic_force_inline size_t Size() const { + return buf_ == nullptr ? 0 : static_cast(top_ - buf_); + } sonic_force_inline size_t Capacity() const { return cap_; } sonic_force_inline bool Empty() const { return Size() == 0; } + sonic_force_inline bool HadOom() const { return error_ == kErrorNoMem; } + sonic_force_inline SonicError GetError() const { return error_; } + sonic_force_inline void ClearOom() { + if (error_ == kErrorNoMem) error_ = kErrorNone; + } /** * @brief Increase the capacity of buffer if new_cap is greater than the * current capacity(). Otherwise, do nothing. */ - sonic_force_inline void Reserve(size_t new_cap) { - if (new_cap < Capacity()) { - return; + sonic_force_inline bool Reserve(size_t new_cap) { + if (new_cap <= Capacity()) { + return true; + } + if (new_cap > std::numeric_limits::max() - 7) { + setOom(); + return false; } size_t align_cap = SONIC_ALIGN(new_cap); size_t old_size = Size(); char* tmp = static_cast(std::realloc(buf_, align_cap)); - if (sonic_unlikely(tmp == nullptr)) return; + if (sonic_unlikely(tmp == nullptr)) { + setOom(); + return false; + } top_ = tmp + old_size; buf_ = tmp; cap_ = new_cap; + return true; } /** @@ -78,8 +99,10 @@ class Stack { * @param v the pushed value, as char, int... */ template - sonic_force_inline void Push(T v) { - Grow(sizeof(T)); + sonic_force_inline bool Push(T v) { + static_assert(std::is_trivially_copyable::value, + "Stack only supports trivially copyable values"); + if (sonic_unlikely(GrowTyped(sizeof(T)) == nullptr)) return false; #if defined(__GNUC__) && __GNUC__ >= 8 #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wstringop-overflow" @@ -90,6 +113,7 @@ class Stack { #pragma GCC diagnostic pop #endif top_ += sizeof(T); + return true; } /** @@ -97,8 +121,12 @@ class Stack { * @param s the beginning of string * @param n the string size */ - sonic_force_inline void Push(const char* s, size_t n) { - Grow(n + 1); + sonic_force_inline bool Push(const char* s, size_t n) { + if (sonic_unlikely(n == std::numeric_limits::max())) { + setOom(); + return false; + } + if (sonic_unlikely(Grow(n + 1) == nullptr)) return false; #if defined(__GNUC__) && __GNUC__ >= 8 #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wstringop-overflow" @@ -109,6 +137,7 @@ class Stack { #pragma GCC diagnostic pop #endif top_ += n; + return true; } sonic_force_inline void PushUnsafe(const char* s, size_t cnt) { std::memcpy(top_, s, cnt); @@ -122,22 +151,34 @@ class Stack { template sonic_force_inline T* PushSize(size_t n) { - Grow(n * sizeof(T)); + static_assert(std::is_trivially_copyable::value, + "Stack only supports trivially copyable values"); + if (n == 0) { + return reinterpret_cast(top_); + } + if (n > std::numeric_limits::max() / sizeof(T)) { + setOom(); + return nullptr; + } + if (sonic_unlikely(GrowTyped(n * sizeof(T)) == nullptr)) return nullptr; return PushSizeUnsafe(n); } template sonic_force_inline T* PushSizeUnsafe(size_t n) { + static_assert(std::is_trivially_copyable::value, + "Stack only supports trivially copyable values"); T* ret = reinterpret_cast(top_); top_ += n * sizeof(T); return ret; } // faster api for push 5 ~ 8 bytes. - sonic_force_inline void Push5_8(const char* bytes8, size_t n) { - Grow(8); + sonic_force_inline bool Push5_8(const char* bytes8, size_t n) { + if (sonic_unlikely(Grow(8) == nullptr)) return false; std::memcpy(top_, bytes8, 8); top_ += n; + return true; } /** @@ -146,11 +187,15 @@ class Stack { */ template sonic_force_inline const T* Top() const { - return reinterpret_cast(top_ - sizeof(T)); + const char* p = top_ - sizeof(T); + sonic_assert(IsAligned(p, alignof(T))); + return reinterpret_cast(p); } template sonic_force_inline T* Top() { - return reinterpret_cast(top_ - sizeof(T)); + char* p = top_ - sizeof(T); + sonic_assert(IsAligned(p, alignof(T))); + return reinterpret_cast(p); } /** @@ -167,18 +212,52 @@ class Stack { * remained capacity in the buffer. Otherwise, do nothing. */ sonic_force_inline char* Grow(size_t cnt) { - if (sonic_unlikely(top_ + cnt >= buf_ + cap_)) { - if (sonic_unlikely((top_ + cnt) > buf_ + 2 * cap_)) { - size_t needed = (top_ - buf_) + cnt; - Reserve(needed + needed / 2); - } else { - Reserve(cap_ * 2); + size_t old_size = Size(); + if (cnt > std::numeric_limits::max() - old_size) { + setOom(); + return nullptr; + } + size_t needed = old_size + cnt; + if (sonic_unlikely(buf_ == nullptr || needed >= cap_)) { + size_t new_cap = defaultCapcity(); + if (cap_ != 0) { + new_cap = cap_ > std::numeric_limits::max() / 2 + ? std::numeric_limits::max() + : cap_ * 2; } + if (new_cap <= needed) { + if (needed > std::numeric_limits::max() - needed / 2) { + setOom(); + return nullptr; + } + new_cap = needed + needed / 2; + } + if (new_cap <= needed) { + if (needed == std::numeric_limits::max()) { + setOom(); + return nullptr; + } + new_cap = needed + 1; + } + if (sonic_unlikely(!Reserve(new_cap))) return nullptr; } sonic_assert(buf_ != NULL); return top_; } + template + sonic_force_inline char* GrowTyped(size_t cnt) { + size_t padding = AlignmentPadding(Size(), alignof(T)); + if (padding > std::numeric_limits::max() - cnt) { + setOom(); + return nullptr; + } + if (sonic_unlikely(Grow(padding + cnt) == nullptr)) return nullptr; + top_ += padding; + sonic_assert(IsAligned(top_, alignof(T))); + return top_; + } + /** * @brief Get the end of the buffer. * @return the value pointer into the ending. @@ -210,11 +289,22 @@ class Stack { buf_ = nullptr; top_ = nullptr; cap_ = 0; + error_ = kErrorNone; + } + sonic_force_inline void setOom() { error_ = kErrorNoMem; } + static sonic_force_inline size_t AlignmentPadding(size_t size, + size_t alignment) { + size_t remainder = size & (alignment - 1); + return remainder == 0 ? 0 : alignment - remainder; + } + static sonic_force_inline bool IsAligned(const void* ptr, size_t alignment) { + return (reinterpret_cast(ptr) & (alignment - 1)) == 0; } static constexpr size_t defaultCapcity() { return 256; } char* buf_{nullptr}; char* top_{nullptr}; size_t cap_{0}; + SonicError error_{kErrorNone}; }; } // namespace internal diff --git a/include/sonic/jsonpath/dom.h b/include/sonic/jsonpath/dom.h index 2eea47c0..677630ff 100644 --- a/include/sonic/jsonpath/dom.h +++ b/include/sonic/jsonpath/dom.h @@ -1,68 +1,80 @@ #pragma once +#include + #include "sonic/dom/generic_document.h" #include "sonic/jsonpath/dump.h" namespace sonic_json { +static constexpr ParseFlags kJsonPathParseFlags = + ParseFlags::kParseAllowUnescapedControlChars | + ParseFlags::kParseIntegerAsRaw; + sonic_force_inline std::tuple GetByJsonPathInternal( Document& dom, StringView jsonpath) { - // get the nodes - auto result = dom.AtJsonPath(jsonpath); - if (result.error != kErrorNone) { - return std::make_tuple("", result.error); - } + try { + // get the nodes + auto result = dom.AtJsonPath(jsonpath); + if (result.error != kErrorNone) { + return std::make_tuple("", result.error); + } - // filter the null nodes - result.nodes.erase( - std::remove_if(result.nodes.begin(), result.nodes.end(), - [](const auto& node) { return node->IsNull(); }), - result.nodes.end()); + // filter the null nodes + result.nodes.erase( + std::remove_if(result.nodes.begin(), result.nodes.end(), + [](const auto& node) { return node->IsNull(); }), + result.nodes.end()); - if (result.nodes.empty()) { - return std::make_tuple("null", result.error); - } + if (result.nodes.empty()) { + return std::make_tuple("null", result.error); + } - WriteBuffer wb; - if (result.nodes.size() == 1) { - // not serialize the single string - auto& root = result.nodes[0]; - if (root->IsString()) { - wb.Push(root->GetStringView().data(), root->Size()); + WriteBuffer wb; + if (result.nodes.size() == 1) { + // not serialize the single string + auto& root = result.nodes[0]; + if (root->IsString()) { + if (!wb.Push(root->GetStringView().data(), root->Size())) { + return std::make_tuple("", kErrorNoMem); + } + } else { + auto err = + result.nodes[0] + ->template Serialize(wb); + if (err != kErrorNone) { + return std::make_tuple("", err); + } + } } else { - auto err = - result.nodes[0] - ->template Serialize(wb); - if (err != kErrorNone) { - return std::make_tuple("", err); + if (!wb.Push('[')) return std::make_tuple("", kErrorNoMem); + for (const auto& node : result.nodes) { + auto err = + node->template Serialize(wb); + if (err != kErrorNone) { + return std::make_tuple("", err); + } + if (!wb.Push(',')) return std::make_tuple("", kErrorNoMem); } - } - } else { - wb.Push('['); - for (const auto& node : result.nodes) { - auto err = - node->template Serialize(wb); - if (err != kErrorNone) { - return std::make_tuple("", err); + if (*(wb.Top()) == ',') { + wb.Pop(1); } - wb.Push(','); - } - if (*(wb.Top()) == ',') { - wb.Pop(1); + if (!wb.Push(']')) return std::make_tuple("", kErrorNoMem); } - wb.Push(']'); + auto sv = wb.ToStringView(); + return std::make_tuple(std::string(sv.data(), sv.size()), kErrorNone); + } catch (const std::bad_alloc&) { + return std::make_tuple("", kErrorNoMem); } - auto sv = wb.ToStringView(); - return std::make_tuple(std::string(sv.data(), sv.size()), kErrorNone); } sonic_force_inline std::tuple GetByJsonPath( StringView json, StringView jsonpath) { // parse json into dom Document dom; - dom.Parse(json); + dom.Parse(json); if (dom.HasParseError()) { return std::make_tuple("", dom.GetParseError()); } @@ -74,16 +86,21 @@ sonic_force_inline GetByJsonPaths(StringView json, const std::vector& jsonpaths) { // parse json into dom Document dom; - dom.Parse(json); + dom.Parse(json); if (dom.HasParseError()) { return std::make_tuple(std::vector>(), dom.GetParseError()); } std::vector> results; - results.reserve(jsonpaths.size()); + try { + results.reserve(jsonpaths.size()); - for (const auto& jsonpath : jsonpaths) { - results.emplace_back(GetByJsonPathInternal(dom, jsonpath)); + for (const auto& jsonpath : jsonpaths) { + results.emplace_back(GetByJsonPathInternal(dom, jsonpath)); + } + } catch (const std::bad_alloc&) { + return std::make_tuple(std::vector>(), + kErrorNoMem); } return std::make_tuple(results, kErrorNone); } diff --git a/include/sonic/jsonpath/dump.h b/include/sonic/jsonpath/dump.h index 62d50e82..b0d6e0e1 100644 --- a/include/sonic/jsonpath/dump.h +++ b/include/sonic/jsonpath/dump.h @@ -2,6 +2,7 @@ #pragma once #include +#include #include #include @@ -13,49 +14,55 @@ namespace internal { template sonic_force_inline std::tuple Serialize( const JsonPathResult& result) { - auto local = result; - // filter the null nodes - local.nodes.erase( - std::remove_if(local.nodes.begin(), local.nodes.end(), - [](const auto& node) { return node->IsNull(); }), - local.nodes.end()); - - if (local.nodes.empty()) { - return std::make_tuple("null", kErrorNone); - } + try { + auto local = result; + // filter the null nodes + local.nodes.erase( + std::remove_if(local.nodes.begin(), local.nodes.end(), + [](const auto& node) { return node->IsNull(); }), + local.nodes.end()); + + if (local.nodes.empty()) { + return std::make_tuple("null", kErrorNone); + } - WriteBuffer wb; - if (local.nodes.size() == 1) { - // not serialize the single string - auto& root = local.nodes[0]; - if (root->IsString()) { - wb.Push(root->GetStringView().data(), root->Size()); + WriteBuffer wb; + if (local.nodes.size() == 1) { + // not serialize the single string + auto& root = local.nodes[0]; + if (root->IsString()) { + if (!wb.Push(root->GetStringView().data(), root->Size())) { + return std::make_tuple("", kErrorNoMem); + } + } else { + auto err = + local.nodes[0] + ->template Serialize(wb); + if (err != kErrorNone) { + return std::make_tuple("", err); + } + } } else { - auto err = - local.nodes[0] - ->template Serialize(wb); - if (err != kErrorNone) { - return std::make_tuple("", err); + if (!wb.Push('[')) return std::make_tuple("", kErrorNoMem); + for (const auto& node : local.nodes) { + auto err = + node->template Serialize(wb); + if (err != kErrorNone) { + return std::make_tuple("", err); + } + if (!wb.Push(',')) return std::make_tuple("", kErrorNoMem); } - } - } else { - wb.Push('['); - for (const auto& node : local.nodes) { - auto err = - node->template Serialize(wb); - if (err != kErrorNone) { - return std::make_tuple("", err); + if (*(wb.Top()) == ',') { + wb.Pop(1); } - wb.Push(','); - } - if (*(wb.Top()) == ',') { - wb.Pop(1); + if (!wb.Push(']')) return std::make_tuple("", kErrorNoMem); } - wb.Push(']'); + auto sv = wb.ToStringView(); + return std::make_tuple(std::string(sv.data(), sv.size()), kErrorNone); + } catch (const std::bad_alloc&) { + return std::make_tuple("", kErrorNoMem); } - auto sv = wb.ToStringView(); - return std::make_tuple(std::string(sv.data(), sv.size()), kErrorNone); } } // namespace internal diff --git a/include/sonic/jsonpath/jsonpath.h b/include/sonic/jsonpath/jsonpath.h index 8f0e9a0b..48424c98 100644 --- a/include/sonic/jsonpath/jsonpath.h +++ b/include/sonic/jsonpath/jsonpath.h @@ -157,7 +157,7 @@ class JsonPath : public std::vector { // 8 extra '\0' bytes after `logical_len`. // The caller must keep the buffer alive while the parsed JsonPath is used. sonic_force_inline bool ParsePaddedInternal(StringView padded, - size_t logical_len) noexcept { + size_t logical_len) { StringView p(padded.data(), logical_len); if (p.empty() || p[0] != '$') { @@ -350,14 +350,13 @@ class JsonPath : public std::vector { public: // Parse with a padded, writable buffer (avoids extra copy). // See ParsePaddedInternal() for lifetime and padding requirements. - sonic_force_inline bool ParsePadded(StringView padded, - size_t logical_len) noexcept { + sonic_force_inline bool ParsePadded(StringView padded, size_t logical_len) { this->clear(); padded_.clear(); return ParsePaddedInternal(padded, logical_len); } - sonic_force_inline bool Parse(StringView path) noexcept { + sonic_force_inline bool Parse(StringView path) { this->clear(); padded_ = paddingJsonPath(path); return ParsePaddedInternal(StringView(padded_.data(), padded_.size()), diff --git a/include/sonic/jsonpath/ondemand.h b/include/sonic/jsonpath/ondemand.h index 4f8a72f8..d8b6e1f5 100644 --- a/include/sonic/jsonpath/ondemand.h +++ b/include/sonic/jsonpath/ondemand.h @@ -17,6 +17,7 @@ #pragma once #include +#include #include #include #include @@ -40,33 +41,29 @@ class JsonGenerator auto n = &dom_doc_; // check parse error if (dom_doc_.HasParseError()) { + error_ = dom_doc_.GetParseError(); + return false; + } + if (sonic_unlikely(!wb_.PushStr(n->GetStringView()))) { + error_ = kErrorNoMem; return false; } - wb_.PushStr(n->GetStringView()); return true; } - bool writeComma() override { - wb_.Push(','); - return true; - } + bool writeComma() override { return writeChar(','); } bool isEmpty() override { return wb_.Empty(); } - bool writeStartArray() override { - wb_.Push('['); - return true; - } + bool writeStartArray() override { return writeChar('['); } bool isBeginArray() override { return !wb_.Empty() && *(wb_.Top()) == '['; } - bool writeEndArray() override { - wb_.Push(']'); - return true; - } + bool writeEndArray() override { return writeChar(']'); } bool copyCurrentStructure(StringView raw) override { dom_doc_.template Parse(raw); // check parse error if (dom_doc_.HasParseError()) { + error_ = dom_doc_.GetParseError(); return false; } auto n = &dom_doc_; @@ -74,6 +71,7 @@ class JsonGenerator SerializeFlags::kSerializeEscapeEmoji | serializeFlags>(wb_); if (sonic_unlikely(err != kErrorNone)) { + error_ = err; return false; } @@ -83,12 +81,16 @@ class JsonGenerator dom_doc_.template Parse(raw); if (dom_doc_.HasParseError()) { + error_ = dom_doc_.GetParseError(); return false; } auto n = &dom_doc_; if (n->IsString()) { - wb_.PushStr(n->GetStringView()); + if (sonic_unlikely(!wb_.PushStr(n->GetStringView()))) { + error_ = kErrorNoMem; + return false; + } return true; } @@ -96,6 +98,7 @@ class JsonGenerator SerializeFlags::kSerializeEscapeEmoji | serializeFlags>(wb_); if (sonic_unlikely(err != kErrorNone)) { + error_ = err; return false; } return true; @@ -109,13 +112,17 @@ class JsonGenerator ParseFlags::kParseIntegerAsRaw>(raw); // check parse error if (dom_doc_.HasParseError()) { + error_ = dom_doc_.GetParseError(); return false; } auto n = &dom_doc_; if (type == internal::SkipScanner2::JsonValueType::STRING) { // strip the quotes - wb_.PushStr(n->GetStringView()); + if (sonic_unlikely(!wb_.PushStr(n->GetStringView()))) { + error_ = kErrorNoMem; + return false; + } result[index] = std::string(wb_.ToStringView()); return true; } @@ -124,6 +131,7 @@ class JsonGenerator SerializeFlags::kSerializeEscapeEmoji | serializeFlags>(wb_); if (sonic_unlikely(err != kErrorNone)) { + error_ = err; return false; } @@ -131,84 +139,126 @@ class JsonGenerator return true; } bool writeRawValue(StringView sv) override { - this->wb_.PushStr(sv); + if (sonic_unlikely(!this->wb_.PushStr(sv))) { + error_ = kErrorNoMem; + return false; + } return true; } + SonicError getError() const override { return error_; } ~JsonGenerator() override = default; private: + bool writeChar(char c) { + if (sonic_unlikely(!wb_.Push(c))) { + error_ = kErrorNoMem; + return false; + } + return true; + } + Document& dom_doc_; WriteBuffer& wb_; + SonicError error_{kErrorNone}; }; template sonic_force_inline std::tuple GetByJsonPathOnDemand( StringView json, StringView jsonpath) { - internal::SkipScanner2 scan; - - scan.data_ = reinterpret_cast(json.data()); - scan.len_ = json.size(); - internal::JsonPath path; - - // padding some buffers - std::string pathpadd = internal::paddingJsonPath(jsonpath); - // Only parse the logical jsonpath length; the extra '\0' bytes are for safe - // lookahead during unescaping. - if (!path.ParsePadded(StringView(pathpadd.data(), pathpadd.size()), - jsonpath.size())) { - return std::make_tuple("", kUnsupportedJsonPath); - } + try { + auto publicError = [](SonicError err) { + return err == kParseErrorEof ? kParseErrorInvalidChar : err; + }; + internal::SkipScanner2 scan; - Document dom_doc; - WriteBuffer wb; - - const internal::SkipScanner2::JsonGeneratorFactory - jsonGeneratorFactory = [&](WriteBuffer& local_wb) { - std::shared_ptr< - internal::SkipScanner2::JsonGeneratorInterface> - local_ret = std::make_shared>( - dom_doc, local_wb); - return local_ret; - }; - - auto rootJsonGenerator = jsonGeneratorFactory(wb); - const bool matched = - scan.getJsonPath( - path, 1, rootJsonGenerator.get(), jsonGeneratorFactory); - if (matched) { - return std::make_tuple(std::string(wb.ToStringView()), kErrorNone); - } - // if no match, it could be because valid json, just no path. - if (!scan.hasError()) { - return std::make_tuple("", kErrorNoneNoMatch); + scan.data_ = reinterpret_cast(json.data()); + scan.len_ = json.size(); + internal::JsonPath path; + + // padding some buffers + std::string pathpadd = internal::paddingJsonPath(jsonpath); + // Only parse the logical jsonpath length; the extra '\0' bytes are for safe + // lookahead during unescaping. + if (!path.ParsePadded(StringView(pathpadd.data(), pathpadd.size()), + jsonpath.size())) { + return std::make_tuple("", kUnsupportedJsonPath); + } + Document dom_doc; + WriteBuffer wb; + + const internal::SkipScanner2::JsonGeneratorFactory + jsonGeneratorFactory = [&](WriteBuffer& local_wb) { + std::shared_ptr< + internal::SkipScanner2::JsonGeneratorInterface> + local_ret = std::make_shared>( + dom_doc, local_wb); + return local_ret; + }; + + auto rootJsonGenerator = jsonGeneratorFactory(wb); + const bool matched = + scan.getJsonPath(path, 1, rootJsonGenerator.get(), + jsonGeneratorFactory); + if (matched) { + if (!scan.consumeOnlyTrailingSpaces()) { + return std::make_tuple("", publicError(scan.error_)); + } + return std::make_tuple(std::string(wb.ToStringView()), kErrorNone); + } + // if no match, it could be because valid json, just no path. + if (!scan.hasError()) { + if (!scan.consumeOnlyTrailingSpaces()) { + return std::make_tuple("", publicError(scan.error_)); + } + return std::make_tuple("", kErrorNoneNoMatch); + } + // Or a parse error caused premature path termination. Do not return partial + // output with an error; callers should only consume data on kErrorNone. + return std::make_tuple("", publicError(scan.error_)); + } catch (const std::bad_alloc&) { + return std::make_tuple("", kErrorNoMem); } - // or parse error caused premature path match termination, hence no match. - // In this case, return whatever that's been written to buffer. - return std::make_tuple(std::string(wb.ToStringView()), scan.error_); } template -sonic_force_inline std::vector> JsonTupleWithCodeGen( - StringView json, const std::vector& keys, const bool legacy) { - internal::SkipScanner2 scan; +sonic_force_inline + std::tuple>, SonicError> + JsonTupleWithCodeGenWithError(StringView json, + const std::vector& keys, + const bool legacy) { + try { + internal::SkipScanner2 scan; + + scan.data_ = reinterpret_cast(json.data()); + scan.len_ = json.size(); - scan.data_ = reinterpret_cast(json.data()); - scan.len_ = json.size(); + Document dom_doc; + WriteBuffer wb; - Document dom_doc; - WriteBuffer wb; + const internal::SkipScanner2::JsonGeneratorFactory + jsonGeneratorFactory = [&](WriteBuffer& local_wb) { + std::shared_ptr< + internal::SkipScanner2::JsonGeneratorInterface> + local_ret = std::make_shared>( + dom_doc, local_wb); + return local_ret; + }; - const internal::SkipScanner2::JsonGeneratorFactory - jsonGeneratorFactory = [&](WriteBuffer& local_wb) { - std::shared_ptr< - internal::SkipScanner2::JsonGeneratorInterface> - local_ret = std::make_shared>( - dom_doc, local_wb); - return local_ret; - }; + auto result = + scan.jsonTupleWithCodeGen(keys, jsonGeneratorFactory(wb).get(), legacy); + return std::make_tuple(std::move(result), scan.error_); + } catch (const std::bad_alloc&) { + return std::make_tuple(std::vector>{}, + kErrorNoMem); + } +} - return scan.jsonTupleWithCodeGen(keys, jsonGeneratorFactory(wb).get(), - legacy); +template +sonic_force_inline std::vector> JsonTupleWithCodeGen( + StringView json, const std::vector& keys, const bool legacy) { + auto ret = JsonTupleWithCodeGenWithError(json, keys, legacy); + return std::move(std::get<0>(ret)); } } // namespace sonic_json diff --git a/include/sonic/writebuffer.h b/include/sonic/writebuffer.h index 95a8053b..822513b6 100644 --- a/include/sonic/writebuffer.h +++ b/include/sonic/writebuffer.h @@ -42,28 +42,33 @@ class WriteBuffer { * @note Not thread-safe. */ sonic_force_inline const char* ToString() const { - if (sonic_likely(stack_.Size() < stack_.Capacity())) { - *(stack_.template End()) = '\0'; - return stack_.template Begin(); - } - stack_.Grow(1); - *(stack_.template End()) = '\0'; + if (sonic_unlikely(!ensureNullTerminated())) return ""; return stack_.template Begin(); } sonic_force_inline StringView ToStringView() const { - return StringView(ToString(), Size()); + if (Empty()) return StringView("", 0); + return StringView(stack_.template Begin(), Size()); + } + + sonic_force_inline bool EnsureNullTerminated() const { + return ensureNullTerminated(); } sonic_force_inline size_t Size() const { return stack_.Size(); } sonic_force_inline size_t Capacity() const { return stack_.Capacity(); } sonic_force_inline bool Empty() const { return stack_.Empty(); } + sonic_force_inline bool HadOom() const { return stack_.HadOom(); } + sonic_force_inline SonicError GetError() const { return stack_.GetError(); } + sonic_force_inline void ClearOom() { stack_.ClearOom(); } /** * @brief Increase the capacity of buffer if new_cap is greater than the * current capacity(). Otherwise, do nothing. */ - sonic_force_inline void Reserve(size_t new_cap) { stack_.Reserve(new_cap); } + sonic_force_inline bool Reserve(size_t new_cap) { + return stack_.Reserve(new_cap); + } /** * @brief Erases all contexts in the buffer. @@ -75,8 +80,8 @@ class WriteBuffer { * @param v the pushed value, as char, int... */ template - sonic_force_inline void Push(T v) { - stack_.template Push(v); + sonic_force_inline bool Push(T v) { + return stack_.template Push(v); } /** @@ -84,12 +89,14 @@ class WriteBuffer { * @param s the beginning of string * @param n the string size */ - sonic_force_inline void Push(const char* s, size_t n) { stack_.Push(s, n); } + sonic_force_inline bool Push(const char* s, size_t n) { + return stack_.Push(s, n); + } sonic_force_inline void PushUnsafe(const char* s, size_t n) { stack_.PushUnsafe(s, n); } - sonic_force_inline void PushStr(StringView s) { - stack_.Push(s.data(), s.size()); + sonic_force_inline bool PushStr(StringView s) { + return stack_.Push(s.data(), s.size()); } template @@ -108,8 +115,8 @@ class WriteBuffer { } // faster api for push 5 ~ 8 bytes. - sonic_force_inline void Push5_8(const char* bytes8, size_t n) { - stack_.Push5_8(bytes8, n); + sonic_force_inline bool Push5_8(const char* bytes8, size_t n) { + return stack_.Push5_8(bytes8, n); } /** @@ -166,6 +173,16 @@ class WriteBuffer { } private: + sonic_force_inline bool ensureNullTerminated() const { + if (sonic_likely(stack_.Size() < stack_.Capacity())) { + *(stack_.template End()) = '\0'; + return true; + } + if (sonic_unlikely(stack_.Grow(1) == nullptr)) return false; + *(stack_.template End()) = '\0'; + return true; + } + mutable internal::Stack stack_; }; diff --git a/scripts/unittest.sh b/scripts/unittest.sh index fdbb6f42..83b40c47 100755 --- a/scripts/unittest.sh +++ b/scripts/unittest.sh @@ -10,7 +10,7 @@ Usage: -g, --gcc compiler is gcc -c, --clang compiler is clang -h, --help display this message - --arch={aarch64|haswell|westmere} target architecture, default is haswell + --arch={aarch64|arm64|haswell|westmere} target architecture, default is haswell --dispatch={dynamic|static} sonic dispatch mode, default is static example: bash unittest.sh -g --arch=westmere --dispatch=static @@ -45,7 +45,7 @@ while true; do --arch) case "$2" in "") shift 2 ;; - aarch64 | haswell | westmere) + aarch64 | arm64 | haswell | westmere) UNIT_TEST_ARCH="$2" shift 2 ;; @@ -87,10 +87,17 @@ BAZEL="$(sonic_pick_bazel "${TOP_DIR}")" cd "${TOP_DIR}" +BAZEL_UNIT_TEST_ARCH="${UNIT_TEST_ARCH}" +case "${BAZEL_UNIT_TEST_ARCH}" in + aarch64 | arm64) + BAZEL_UNIT_TEST_ARCH=arm + ;; +esac + # default target set -x -${BAZEL} run //:unittest --//:sonic_arch=$UNIT_TEST_ARCH \ +${BAZEL} run //:unittest --//:sonic_arch=${BAZEL_UNIT_TEST_ARCH} \ --//:sonic_sanitizer=${UNIT_TEST_SANITIZER} \ --//:sonic_dispatch=${UNIT_TEST_DISPATCH} \ --copt="-DSONIC_LOCKED_ALLOCATOR" -s diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index a4fd2725..3ef6cfe0 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -23,6 +23,7 @@ FetchContent_MakeAvailable(googletest) # for arm CI: ASAN is very annoying when dealing with qemu option(ENABLE_ASAN "Enable AddressSanitizer" ON) +option(ENABLE_UBSAN "Enable UndefinedBehaviorSanitizer" OFF) file(GLOB SONIC_TEST_FILES "${PROJECT_SOURCE_DIR}/tests/*.h" @@ -48,6 +49,11 @@ else() endif() endif() +if(ENABLE_UBSAN) + target_compile_options(unittest PRIVATE -fsanitize=undefined -fno-sanitize-recover=undefined) + target_link_options(unittest PRIVATE -fsanitize=undefined) +endif() + include("${PROJECT_SOURCE_DIR}/cmake/set_arch_flags.cmake") set_arch_flags(unittest ${CMAKE_SYSTEM_PROCESSOR}) add_test(NAME sonic-unittest COMMAND unittest) diff --git a/tests/allocator_test.cpp b/tests/allocator_test.cpp index 5a44e5f4..c207b039 100644 --- a/tests/allocator_test.cpp +++ b/tests/allocator_test.cpp @@ -17,14 +17,17 @@ #include "sonic/allocator.h" #include +#include #include #include "gtest/gtest.h" +#include "sonic/dom/dynamicnode.h" #include "sonic/internal/stack.h" +#include "sonic/writebuffer.h" // Let huge-allocation OOM tests return null under ASAN instead of aborting. // Dead code in non-ASAN builds; ASAN_OPTIONS still overrides it. -extern "C" __attribute__((used)) const char *__asan_default_options() { +extern "C" __attribute__((used)) const char* __asan_default_options() { return "allocator_may_return_null=1"; } @@ -43,7 +46,7 @@ using namespace sonic_json; TEST(Allocator, Free) { SimpleAllocator a; MEMSTAT_ISEMPTY(); - void *ptr = a.Malloc(24); + void* ptr = a.Malloc(24); MEMSTAT_NOTEMPTY(); ptr = a.Realloc(ptr, 24, 48); MEMSTAT_NOTEMPTY(); @@ -61,7 +64,7 @@ TEST(Allocator, SimpleAllocatorEdgeCases) { EXPECT_EQ(a.Malloc(0), nullptr); // Realloc(..., new_size=0) should free and return nullptr. - void *ptr = a.Malloc(8); + void* ptr = a.Malloc(8); ASSERT_NE(ptr, nullptr); EXPECT_EQ(a.Realloc(ptr, 8, 0), nullptr); } @@ -94,7 +97,7 @@ TEST(Allocator, MemoryPoolAllocatorMoveAndMapAllocator) { { MemoryPoolAllocator<> pool; MapAllocator> ma(&pool); - int *p = ma.allocate(1); + int* p = ma.allocate(1); ASSERT_NE(p, nullptr); ma.deallocate(p, 1); } @@ -130,28 +133,94 @@ TEST(Stack, ReservePreservesContents) { // "failed allocation sets hadOom" path without polluting test output. struct FailAfterFirstChunkAllocator { bool allow_ctor = true; - void *Malloc(size_t n) { + void* Malloc(size_t n) { if (allow_ctor) { allow_ctor = false; return std::malloc(n); } return nullptr; } - void *Realloc(void *, size_t, size_t) { return nullptr; } - static void Free(void *p) { std::free(p); } + void* Realloc(void*, size_t, size_t) { return nullptr; } + static void Free(void* p) { std::free(p); } +}; + +struct FailAllAllocator { + void* Malloc(size_t) { return nullptr; } + void* Realloc(void*, size_t, size_t) { return nullptr; } + static void Free(void* p) { std::free(p); } }; TEST(Allocator, MemoryPoolAllocatorHadOomSignalsFailedMalloc) { FailAfterFirstChunkAllocator base; MemoryPoolAllocator pool(8, &base); EXPECT_FALSE(pool.HadOom()); - void *p = pool.Malloc(16); + void* p = pool.Malloc(16); EXPECT_EQ(p, nullptr); EXPECT_TRUE(pool.HadOom()); pool.ClearOom(); EXPECT_FALSE(pool.HadOom()); } +TEST(Allocator, MemoryPoolAllocatorConstructorOomIsQueryableAndSafe) { + FailAllAllocator base; + MemoryPoolAllocator pool(8, &base); + + EXPECT_TRUE(pool.HadOom()); + EXPECT_EQ(nullptr, pool.Malloc(8)); + EXPECT_TRUE(pool.HadOom()); +} + +TEST(Allocator, MemoryPoolAllocatorUserBufferRejectsTooSmallAfterAlignment) { + alignas(void*) char storage[64]; + MemoryPoolAllocator<> pool(storage + 1, 56); + + EXPECT_TRUE(pool.HadOom()); + EXPECT_EQ(0u, pool.Capacity()); + EXPECT_EQ(nullptr, pool.Malloc(8)); + + MemoryPoolAllocator<> tiny(storage + 1, 1); + EXPECT_TRUE(tiny.HadOom()); + EXPECT_EQ(0u, tiny.Capacity()); +} + +TEST(Allocator, MemoryPoolAllocatorRejectsOversizedMallocWithoutWraparound) { + MemoryPoolAllocator<> pool(8); + + EXPECT_EQ(nullptr, pool.Malloc(std::numeric_limits::max())); + EXPECT_TRUE(pool.HadOom()); +} + +TEST(Allocator, MemoryPoolAllocatorRejectsChunkHeaderOverflow) { + MemoryPoolAllocator<> pool(8); + + EXPECT_EQ(nullptr, + pool.Malloc(std::numeric_limits::max() - sizeof(void*))); + EXPECT_TRUE(pool.HadOom()); +} + +TEST(Allocator, DNodeContainerOverflowMarksPoolOom) { + MemoryPoolAllocator<> alloc; + DNode> arr(kArray); + + alloc.ClearOom(); + arr.Reserve(std::numeric_limits::max(), alloc); + EXPECT_TRUE(alloc.HadOom()); + EXPECT_EQ(0u, arr.Capacity()); + + DNode> obj(kObject); + alloc.ClearOom(); + obj.MemberReserve(std::numeric_limits::max(), alloc); + EXPECT_TRUE(alloc.HadOom()); + EXPECT_EQ(0u, obj.Capacity()); +} + +TEST(Allocator, AdaptiveChunkPolicyHandlesHighBitNeedsWithoutShiftOverflow) { + AdaptiveChunkPolicy cp(1024); + + EXPECT_EQ(size_t{1} << 63, cp.ChunkSize(size_t{1} << 63)); + EXPECT_EQ((size_t{1} << 63) + 1, cp.ChunkSize((size_t{1} << 63) + 1)); +} + // Both MemoryPoolAllocator ctors place SharedData (incl. atomic // hadOom) into raw storage. Assert the flag reads false on a freshly // constructed allocator before any Malloc call — guards against a @@ -232,4 +301,68 @@ TEST(Stack, ConstructorOomLeavesConsistentState) { EXPECT_EQ('X', *s.Top()); } +TEST(Stack, PushSizeReportsOomAndDoesNotAdvanceTop) { + sonic_json::internal::Stack s(8); + ASSERT_FALSE(s.HadOom()); + ASSERT_NE(nullptr, s.Begin()); + ASSERT_EQ(0u, s.Size()); + + constexpr size_t kHuge = (size_t{1} << 62); + char* p = s.PushSize(kHuge); + EXPECT_EQ(nullptr, p); + EXPECT_TRUE(s.HadOom()); + EXPECT_EQ(0u, s.Size()); + EXPECT_EQ(8u, s.Capacity()); + + s.ClearOom(); + EXPECT_FALSE(s.HadOom()); + p = s.PushSize(1); + ASSERT_NE(nullptr, p); + *p = 'Y'; + EXPECT_EQ(1u, s.Size()); + EXPECT_EQ('Y', *s.Top()); +} + +TEST(Stack, PushStringOverflowReportsOomAndDoesNotAdvanceTop) { + sonic_json::internal::Stack s(8); + ASSERT_FALSE(s.HadOom()); + ASSERT_NE(nullptr, s.Begin()); + + EXPECT_FALSE(s.Push("x", std::numeric_limits::max())); + EXPECT_TRUE(s.HadOom()); + EXPECT_EQ(0u, s.Size()); +} + +TEST(Stack, TypedPushAlignsAfterBytePush) { + sonic_json::internal::Stack s(8); + ASSERT_TRUE(s.Push('x')); + + ASSERT_TRUE(s.Push(0x0102030405060708ULL)); + const auto* p = s.Top(); + EXPECT_EQ(0u, reinterpret_cast(p) % alignof(uint64_t)); + EXPECT_EQ(0x0102030405060708ULL, *p); +} + +TEST(Stack, PushSizeAlignsAfterBytePush) { + sonic_json::internal::Stack s(8); + ASSERT_TRUE(s.Push('x')); + + uint64_t* p = s.PushSize(1); + ASSERT_NE(nullptr, p); + EXPECT_EQ(0u, reinterpret_cast(p) % alignof(uint64_t)); + *p = 0x1112131415161718ULL; + EXPECT_EQ(p, s.Top()); + EXPECT_EQ(0x1112131415161718ULL, *s.Top()); +} + +TEST(WriteBuffer, ReserveOverflowReportsOom) { + WriteBuffer wb(8); + EXPECT_FALSE(wb.HadOom()); + + EXPECT_FALSE(wb.Reserve(std::numeric_limits::max())); + EXPECT_TRUE(wb.HadOom()); + EXPECT_EQ(kErrorNoMem, wb.GetError()); + EXPECT_EQ(0u, wb.Size()); +} + } // namespace diff --git a/tests/document_test.cpp b/tests/document_test.cpp index 5db2398b..93e1941d 100644 --- a/tests/document_test.cpp +++ b/tests/document_test.cpp @@ -35,6 +35,8 @@ using namespace sonic_json; using namespace sonic_json::internal; using Document = GenericDocument>; +Document&& MoveDocument(Document& doc) { return std::move(doc); } + TEST(Document, DNode) { // constructors { @@ -251,6 +253,20 @@ TEST(Document, ParseBasic) { } } +TEST(Document, MoveAssignSelfPreservesParsedDocument) { + Document doc; + doc.Parse(R"({"k":"v","n":1})"); + ASSERT_FALSE(doc.HasParseError()); + + doc = MoveDocument(doc); + + ASSERT_FALSE(doc.HasParseError()); + ASSERT_TRUE(doc.IsObject()); + ASSERT_TRUE(doc.HasMember("k")); + EXPECT_EQ(doc["k"].GetString(), "v"); + EXPECT_FALSE(doc.GetAllocator().HadOom()); +} + template class DocumentTest : public testing::Test { public: @@ -470,6 +486,7 @@ TYPED_TEST(DocumentTest, ParseOnDemand) { {json, {"titles", 1}, &Document::IsString, kNoError}, {json, {"hots", 2}, &Document::IsTrue, kNoError}, {json, {"authors", 2, 0}, &Document::IsArray, kNoError}, + {R"({"a":1} garbage)", {"a"}, &Document::IsNumber, kNoError}, // parse ondemand failed {json, {"unknown"}, &Document::IsNull, kError}, @@ -484,6 +501,14 @@ TYPED_TEST(DocumentTest, ParseOnDemand) { {R"({"a"})", {"a"}, &Document::IsNull, kError}, {R"({"x":[[[]])", {"a"}, &Document::IsNull, kError}, {R"({"x":{)", {"a"}, &Document::IsNull, kError}, + {R"({"x":1abc "a":2})", {"a"}, &Document::IsNull, kError}, + {R"([1abc,2])", {1}, &Document::IsNull, kError}, + {R"({"a":[1] 2})", {"a"}, &Document::IsNull, kError}, + {R"({"a":"x" 2})", {"a"}, &Document::IsNull, kError}, + {R"({"x":{]},"a":2})", {"a"}, &Document::IsNull, kError}, + {R"({"x":[}],"a":2})", {"a"}, &Document::IsNull, kError}, + {R"({"a":1]})", {"a"}, &Document::IsNull, kError}, + {"1]", {}, &Document::IsNull, kError}, {json, {"authors", 2, 1}, &Document::IsNull, kError}, {json, {"authors", 3}, &Document::IsNull, kError}, {json, {"hots", 5}, &Document::IsNull, kError}, @@ -500,6 +525,36 @@ TYPED_TEST(DocumentTest, ParseOnDemand) { } } +TYPED_TEST(DocumentTest, ParseOnDemandHonorsParseFlags) { + using Document = TypeParam; + + std::string control_char_json = std::string(R"({"a":")") + char(1) + R"("})"; + Document control_char_doc; + control_char_doc + .template ParseOnDemand( + control_char_json.data(), control_char_json.size(), JsonPointer{"a"}); + EXPECT_FALSE(control_char_doc.HasParseError()); + ASSERT_TRUE(control_char_doc.IsString()); + EXPECT_EQ(control_char_doc.GetStringView(), + StringView(control_char_json.data() + 6, 1)); + + std::string overflow_json = R"({"a":1e309})"; + Document overflow_doc; + overflow_doc.template ParseOnDemand( + overflow_json.data(), overflow_json.size(), JsonPointer{"a"}); + EXPECT_FALSE(overflow_doc.HasParseError()); + ASSERT_TRUE(overflow_doc.IsStringNumber()); + EXPECT_EQ(overflow_doc.GetStringView(), "1e309"); + + std::string trailing_json = R"({"a":1} garbage)"; + Document full_validation_doc; + full_validation_doc + .template ParseOnDemand( + trailing_json.data(), trailing_json.size(), JsonPointer{"a"}); + EXPECT_TRUE(full_validation_doc.HasParseError()); + EXPECT_EQ(full_validation_doc.GetParseError(), kParseErrorInvalidChar); +} + TYPED_TEST(DocumentTest, Move) { using Document = TypeParam; auto& alloc = this->doc_.GetAllocator(); diff --git a/tests/exp_update_test.cpp b/tests/exp_update_test.cpp index 76da60b7..051db027 100644 --- a/tests/exp_update_test.cpp +++ b/tests/exp_update_test.cpp @@ -16,8 +16,10 @@ #include +#include #include +#include "sonic/dom/generic_document.h" #include "sonic/experiment/lazy_update.h" namespace { @@ -112,7 +114,7 @@ TEST(UpdateLazy, Basic) { }, }; - for (const auto &t : tests) { + for (const auto& t : tests) { auto ret = sonic_json::UpdateLazy(t.target, t.source); EXPECT_STREQ(ret.c_str(), t.updated.c_str()); @@ -149,26 +151,107 @@ TEST(UpdateLazy, InvalidJson) { } TEST(UpdateLazy, NestedInvalidTargetPropagates) { - // Nested invalid target merged with nested valid source: - // the error from the target-side lazy parse must be propagated and - // the update must fail ("{}"); it must NOT be silently overwritten - // by the source's successful parse. + // Nested invalid target merged with nested valid source follows the same + // legacy fallback as an invalid top-level target, but WithError must still + // propagate the parse error instead of silently reporting success. { std::string target = R"({"a":{"foo":}})"; // nested {"foo":} is invalid std::string source = R"({"a":{"bar":5}})"; auto ret = sonic_json::UpdateLazy(target, source); - EXPECT_STREQ(ret.c_str(), "{}") - << "invalid nested target must propagate as update failure"; + EXPECT_STREQ(ret.c_str(), source.c_str()); + auto ret_with_error = + sonic_json::UpdateLazyWithError(target, + source); + EXPECT_EQ(std::get<0>(ret_with_error), source); + EXPECT_EQ(std::get<1>(ret_with_error), sonic_json::kParseErrorInvalidChar); } { std::string target = R"({"a":{"foo": @}})"; // invalid token inside nested std::string source = R"({"a":{"bar":5}})"; auto ret = sonic_json::UpdateLazy(target, source); - EXPECT_STREQ(ret.c_str(), "{}") - << "invalid nested target must propagate as update failure"; + EXPECT_STREQ(ret.c_str(), source.c_str()); + auto ret_with_error = + sonic_json::UpdateLazyWithError(target, + source); + EXPECT_EQ(std::get<0>(ret_with_error), source); + EXPECT_EQ(std::get<1>(ret_with_error), sonic_json::kParseErrorInvalidChar); + } +} + +TEST(UpdateLazy, RejectsInvalidRawNumbers) { + for (const auto* source : {R"({"a":01})", R"({"a":1abc})", R"({"a":-})"}) { + auto ret = + sonic_json::UpdateLazy(R"({"a":0})", source); + EXPECT_STREQ(ret.c_str(), R"({"a":0})") << source; + auto ret_with_error = + sonic_json::UpdateLazyWithError(R"({"a":0})", + source); + EXPECT_EQ(std::get<0>(ret_with_error), R"({"a":0})") << source; + EXPECT_EQ(std::get<1>(ret_with_error), sonic_json::kParseErrorInvalidChar) + << source; + + sonic_json::Document doc; + doc.Parse(ret); + EXPECT_FALSE(doc.HasParseError()) << source << " -> " << ret; + } + + for (const auto* source : {R"({"a":{"b":01}})", R"({"a":{"b":1abc}})", + R"({"a":{"b":-}})", R"({"b":{]}})"}) { + auto ret = + sonic_json::UpdateLazy(R"({"a":0})", source); + EXPECT_STREQ(ret.c_str(), R"({"a":0})") << source; + auto ret_with_error = + sonic_json::UpdateLazyWithError(R"({"a":0})", + source); + EXPECT_EQ(std::get<0>(ret_with_error), R"({"a":0})") << source; + EXPECT_EQ(std::get<1>(ret_with_error), sonic_json::kParseErrorInvalidChar) + << source; + + sonic_json::Document doc; + doc.Parse(ret); + EXPECT_FALSE(doc.HasParseError()) << source << " -> " << ret; } + + { + const char* source = R"({"a":{"b":1e309}})"; + auto ret = + sonic_json::UpdateLazy(R"({"a":0})", source); + EXPECT_STREQ(ret.c_str(), R"({"a":0})"); + auto ret_with_error = + sonic_json::UpdateLazyWithError(R"({"a":0})", + source); + EXPECT_EQ(std::get<0>(ret_with_error), R"({"a":0})"); + EXPECT_EQ(std::get<1>(ret_with_error), sonic_json::kParseErrorInfinity); + } +} + +TEST(UpdateLazy, RejectsInvalidEscapesInRawValues) { + const char* source = R"({"a":"\q"})"; + auto ret = + sonic_json::UpdateLazy(R"({"a":0})", source); + EXPECT_STREQ(ret.c_str(), R"({"a":0})"); + + auto ret_with_error = + sonic_json::UpdateLazyWithError(R"({"a":0})", + source); + EXPECT_EQ(std::get<0>(ret_with_error), R"({"a":0})"); + EXPECT_EQ(std::get<1>(ret_with_error), sonic_json::kParseErrorEscapedFormat); +} + +TEST(UpdateLazy, NestedRawMergeDoesNotBorrowFreedShadowBuffer) { + auto ret_with_error = + sonic_json::UpdateLazyWithError( + R"({"a":{"x":1}})", R"({"a":{"y":2}})"); + EXPECT_EQ(std::get<1>(ret_with_error), sonic_json::kErrorNone); + + sonic_json::Document doc; + doc.Parse(std::get<0>(ret_with_error)); + ASSERT_FALSE(doc.HasParseError()) << std::get<0>(ret_with_error); + ASSERT_TRUE(doc["a"].IsObject()); + EXPECT_TRUE(doc["a"].HasMember("x")); + EXPECT_TRUE(doc["a"].HasMember("y")); } } // namespace diff --git a/tests/json_pointer_test.cpp b/tests/json_pointer_test.cpp index 5c983004..87cd1da7 100644 --- a/tests/json_pointer_test.cpp +++ b/tests/json_pointer_test.cpp @@ -16,6 +16,7 @@ #include "sonic/dom/json_pointer.h" +#include #include #include "gtest/gtest.h" @@ -183,12 +184,27 @@ TYPED_TEST(JsonPointerTest, QueryNode) { { QueryNode n1(0); EXPECT_TRUE(n1.IsNum()); + EXPECT_TRUE(n1.IsValidNum()); EXPECT_FALSE(n1.IsStr()); EXPECT_EQ(0, n1.GetNum()); EXPECT_EQ(n1.GetStr(), ""); EXPECT_EQ(0, n1.GetStr().size()); } + { + QueryNode n1(-1); + EXPECT_TRUE(n1.IsNum()); + EXPECT_FALSE(n1.IsValidNum()); + EXPECT_EQ(0, n1.GetNum()); + } + + { + QueryNode n1(std::numeric_limits::max()); + EXPECT_TRUE(n1.IsNum()); + EXPECT_TRUE(n1.IsValidNum()); + EXPECT_EQ(std::numeric_limits::max(), n1.GetNum()); + } + { std::string str{"hello"}; QueryNode n1(str); diff --git a/tests/json_tuple_test.cpp b/tests/json_tuple_test.cpp index d5ac968b..41c23e47 100644 --- a/tests/json_tuple_test.cpp +++ b/tests/json_tuple_test.cpp @@ -164,9 +164,125 @@ TEST(JsonTuple, invalidValue) { auto result = JsonTupleWithCodeGen(json, paths, true); EXPECT_EQ(result, expected); + auto result_with_error = + JsonTupleWithCodeGenWithError(json, paths, true); + EXPECT_EQ(std::get<0>(result_with_error), expected); + EXPECT_EQ(std::get<1>(result_with_error), kParseErrorInvalidChar); + expected = {std::nullopt, std::nullopt}; result = JsonTupleWithCodeGen(json, paths, false); EXPECT_EQ(result, expected); + result_with_error = JsonTupleWithCodeGenWithError( + json, paths, false); + EXPECT_EQ(std::get<0>(result_with_error), expected); + EXPECT_EQ(std::get<1>(result_with_error), kParseErrorInvalidChar); +} + +TEST(JsonTuple, ReportsMalformedSuffixAfterAllRequestedKeys) { + std::string json = "{\"a\":1,\"b\":2, BAD}"; + std::vector paths{"a", "b"}; + std::vector> legacy_expected = {"1", "2"}; + std::vector> strict_expected = {std::nullopt, + std::nullopt}; + + auto legacy_result = + JsonTupleWithCodeGen(json, paths, true); + EXPECT_EQ(legacy_result, legacy_expected); + auto legacy_with_error = + JsonTupleWithCodeGenWithError(json, paths, true); + EXPECT_EQ(std::get<0>(legacy_with_error), legacy_expected); + EXPECT_EQ(std::get<1>(legacy_with_error), kParseErrorInvalidChar); + + auto strict_result = + JsonTupleWithCodeGen(json, paths, false); + EXPECT_EQ(strict_result, strict_expected); + auto strict_with_error = + JsonTupleWithCodeGenWithError(json, paths, + false); + EXPECT_EQ(std::get<0>(strict_with_error), strict_expected); + EXPECT_EQ(std::get<1>(strict_with_error), kParseErrorInvalidChar); +} + +TEST(JsonTuple, ReportsTrailingGarbageAndMissingComma) { + std::vector paths{"a", "b"}; + std::vector> legacy_expected = {"1", std::nullopt}; + auto trailing_with_error = + JsonTupleWithCodeGenWithError( + "{\"a\":1} garbage", paths, true); + EXPECT_EQ(std::get<0>(trailing_with_error), legacy_expected); + EXPECT_EQ(std::get<1>(trailing_with_error), kParseErrorInvalidChar); + + std::vector> strict_expected = {std::nullopt, + std::nullopt}; + auto trailing_strict = JsonTupleWithCodeGenWithError( + "{\"a\":1} garbage", paths, false); + EXPECT_EQ(std::get<0>(trailing_strict), strict_expected); + EXPECT_EQ(std::get<1>(trailing_strict), kParseErrorInvalidChar); + + auto missing_comma_with_error = + JsonTupleWithCodeGenWithError( + "{\"a\":1 \"b\":2}", paths, true); + EXPECT_EQ(std::get<0>(missing_comma_with_error), strict_expected); + EXPECT_EQ(std::get<1>(missing_comma_with_error), kParseErrorInvalidChar); +} + +TEST(JsonTuple, ReportsTrailingCommaInSkippedMembers) { + std::vector paths{"a"}; + std::vector> legacy_expected = {"1"}; + auto legacy_with_error = + JsonTupleWithCodeGenWithError( + R"({"a":1,"bad":2,})", paths, true); + EXPECT_EQ(std::get<0>(legacy_with_error), legacy_expected); + EXPECT_EQ(std::get<1>(legacy_with_error), kParseErrorInvalidChar); + + std::vector> strict_expected = {std::nullopt}; + auto strict_with_error = + JsonTupleWithCodeGenWithError( + R"({"a":1,"bad":2,})", paths, false); + EXPECT_EQ(std::get<0>(strict_with_error), strict_expected); + EXPECT_EQ(std::get<1>(strict_with_error), kParseErrorInvalidChar); + + auto all_skipped_legacy = + JsonTupleWithCodeGenWithError(R"({"bad":2,})", + paths, true); + EXPECT_EQ(std::get<0>(all_skipped_legacy), strict_expected); + EXPECT_EQ(std::get<1>(all_skipped_legacy), kParseErrorInvalidChar); + + auto all_skipped_strict = + JsonTupleWithCodeGenWithError(R"({"bad":2,})", + paths, false); + EXPECT_EQ(std::get<0>(all_skipped_strict), strict_expected); + EXPECT_EQ(std::get<1>(all_skipped_strict), kParseErrorInvalidChar); +} + +TEST(JsonTuple, ReportsInvalidSkippedValuesAfterRequestedKeys) { + std::vector paths{"a"}; + std::vector> legacy_expected = {"1"}; + std::vector> strict_expected = {std::nullopt}; + + auto invalid_escape_legacy = + JsonTupleWithCodeGenWithError( + R"({"a":1,"bad":"\q"})", paths, true); + EXPECT_EQ(std::get<0>(invalid_escape_legacy), legacy_expected); + EXPECT_EQ(std::get<1>(invalid_escape_legacy), kParseErrorEscapedFormat); + + auto invalid_escape_strict = + JsonTupleWithCodeGenWithError( + R"({"a":1,"bad":"\q"})", paths, false); + EXPECT_EQ(std::get<0>(invalid_escape_strict), strict_expected); + EXPECT_EQ(std::get<1>(invalid_escape_strict), kParseErrorEscapedFormat); + + auto invalid_number_legacy = + JsonTupleWithCodeGenWithError( + R"({"a":1,"bad":1e309})", paths, true); + EXPECT_EQ(std::get<0>(invalid_number_legacy), legacy_expected); + EXPECT_EQ(std::get<1>(invalid_number_legacy), kParseErrorInfinity); + + auto invalid_number_strict = + JsonTupleWithCodeGenWithError( + R"({"a":1,"bad":1e309})", paths, false); + EXPECT_EQ(std::get<0>(invalid_number_strict), strict_expected); + EXPECT_EQ(std::get<1>(invalid_number_strict), kParseErrorInfinity); } TEST(JsonTuple, NoMatchAllKeys) { diff --git a/tests/jsonpath_test.cpp b/tests/jsonpath_test.cpp index acf91540..61dfc2f3 100644 --- a/tests/jsonpath_test.cpp +++ b/tests/jsonpath_test.cpp @@ -31,6 +31,27 @@ namespace { using namespace sonic_json; +template +class FailingCommaGenerator + : public internal::SkipScanner2::JsonGeneratorInterface { + public: + bool writeRaw(StringView) override { return true; } + bool copyCurrentStructure(StringView) override { return true; } + bool copyCurrentStructureSingleResult(StringView) override { return true; } + bool copyCurrentStructureJsonTupleCodeGen( + StringView, size_t, std::vector>&, + internal::SkipScanner2::JsonValueType) override { + return true; + } + bool writeRawValue(StringView) override { return true; } + bool writeStartArray() override { return true; } + bool writeEndArray() override { return true; } + bool writeComma() override { return false; } + bool isEmpty() override { return false; } + bool isBeginArray() override { return false; } + SonicError getError() const override { return kErrorNoMem; } +}; + #define TestOk(json, path, expect) \ do { \ auto got = GetByJsonPathOnDemand(json, path); \ @@ -91,6 +112,25 @@ void ValidBatchOK(const std::string json, } } +TEST(JsonPath, GeneratorWriteFailurePropagatesNoMem) { + StringView json("1", 1); + internal::SkipScanner2 scan; + scan.data_ = reinterpret_cast(json.data()); + scan.len_ = json.size(); + internal::JsonPath path; + ASSERT_TRUE(path.Parse("$")); + FailingCommaGenerator generator; + auto factory = [&](WriteBuffer&) + -> std::shared_ptr> { return nullptr; }; + + const bool matched = + scan.getJsonPath(path, 1, &generator, factory); + EXPECT_FALSE(matched); + EXPECT_EQ(kErrorNoMem, scan.error_); +} + TEST(JsonPath, RootIdentifier) { TestOk("[\"[\\\",\"]", "$", "[\"[\\\",\"]"); TestOk(" null ", "$", "null"); @@ -216,6 +256,7 @@ TEST(JsonPathWildcard, Basic) { // ignore when not found TestOk(R"([{"a":123}, {}])", "$[*].a", "123"); TestOk(R"([[123, 456], []])", "$[*][1]", "456"); + TestOk(R"([[123,456],{"a":1},[0,99]])", "$[*][1]", "[456,99]"); // ignore when encounter the mismatched type TestOk(R"([{"a":123}, null])", "$[*].a", "123"); @@ -426,10 +467,11 @@ TEST(JsonPath, KeyIntoStringValue) { TEST(JsonPath, BeforeNan) { auto json = R"( {"name":"xiaoxiao", "gender": false, "height": Nan, "passed": true} )"; - TestOk(json, "$.name", "xiaoxiao"); - TestOk(json, "$.gender", "false"); - TestOk(json, "$.height", ""); - TestOk(json, "$.passed", ""); + for (const auto* path : {"$.name", "$.gender", "$.height", "$.passed"}) { + auto got = GetByJsonPathOnDemand(json, path); + EXPECT_EQ(std::get<0>(got), ""); + EXPECT_EQ(std::get<1>(got), kParseErrorInvalidChar); + } } TEST(JsonPath, BackslashZero) { @@ -444,13 +486,19 @@ TEST(JsonPath, BackslashZero) { TEST(JsonPath, sparkFeature) { auto json = R"( {"price":"129.99","suggested_price":"106.39","sku_name":"Shoe Model 4 825 Mint Green [High Quality Basketball Shoe )"; - TestOk(json, "$.price", "129.99"); + auto got = GetByJsonPathOnDemand(json, "$.price"); + EXPECT_EQ(std::get<0>(got), ""); + EXPECT_EQ(std::get<1>(got), kParseErrorInvalidChar); json = R"( {"key":"fakekey","labels":"ProductMgmt","labelsIterator":"ProductMgmt","labelsSize":1,"name":"Link","setExtra":false,"setKey":true,"setLabels":true,"setName":true,"setType":false,"setValues":true,"type":0,"values":"2mPs6","valuesIterator":"2mPs6","valuesSize":1 )"; - TestOk(json, "$.key", "fakekey"); - TestOk(json, "$.labels", "ProductMgmt"); + got = GetByJsonPathOnDemand(json, "$.key"); + EXPECT_EQ(std::get<0>(got), ""); + EXPECT_EQ(std::get<1>(got), kParseErrorInvalidChar); + got = GetByJsonPathOnDemand(json, "$.labels"); + EXPECT_EQ(std::get<0>(got), ""); + EXPECT_EQ(std::get<1>(got), kParseErrorInvalidChar); } TEST(JsonPath, illegalJson) { @@ -688,7 +736,7 @@ TEST(JsonPath, JsonInfiniteLoop2) { auto got = GetByJsonPathOnDemand( json, "$.motor_content_boost"); - EXPECT_EQ(std::get<1>(got), kParseErrorEof); + EXPECT_EQ(std::get<1>(got), kParseErrorInvalidChar); EXPECT_EQ(std::get<0>(got), ""); } @@ -698,7 +746,7 @@ TEST(JsonPath, JsonInfiniteLoop3) { auto got = GetByJsonPathOnDemand( json, path); - EXPECT_EQ(std::get<1>(got), kParseErrorEof); + EXPECT_EQ(std::get<1>(got), kParseErrorInvalidChar); EXPECT_EQ(std::get<0>(got), ""); } @@ -720,7 +768,15 @@ TEST(JsonPath, JsonTuple) { auto got = GetByJsonPathOnDemand( json, "$.b"); - EXPECT_EQ(std::get<1>(got), kParseErrorUnexpect); + EXPECT_EQ(std::get<1>(got), kParseErrorInvalidChar); + EXPECT_EQ(std::get<0>(got), ""); +} + +TEST(JsonPathOnDemand, RejectsInvalidSkippedStringBeforeMatchedPath) { + auto got = + GetByJsonPathOnDemand( + R"({"a":1,"bad":"\q"})", "$.a"); + EXPECT_EQ(std::get<1>(got), kParseErrorEscapedFormat); EXPECT_EQ(std::get<0>(got), ""); } @@ -757,6 +813,24 @@ TEST(JsonPathDom, MultiNodesSerializeAndFilterNulls) { EXPECT_EQ(std::get<0>(got), R"(["x",1])"); } +TEST(JsonPathDom, ParseFlagsMatchOnDemandCompatibilityMode) { + std::string control_json = std::string(R"({"a":")") + char(1) + R"("})"; + auto dom_control = GetByJsonPath(control_json, "$.a"); + auto ondemand_control = + GetByJsonPathOnDemand(control_json, "$.a"); + EXPECT_EQ(std::get<1>(dom_control), kErrorNone); + EXPECT_EQ(std::get<1>(ondemand_control), kErrorNone); + EXPECT_EQ(std::get<0>(dom_control), std::get<0>(ondemand_control)); + + std::string integer_json = R"({"a":5555555555555555555555555555})"; + auto dom_integer = GetByJsonPath(integer_json, "$.a"); + auto ondemand_integer = + GetByJsonPathOnDemand(integer_json, "$.a"); + EXPECT_EQ(std::get<1>(dom_integer), kErrorNone); + EXPECT_EQ(std::get<1>(ondemand_integer), kErrorNone); + EXPECT_EQ(std::get<0>(dom_integer), std::get<0>(ondemand_integer)); +} + TEST(JsonPathDom, NotFoundAndUnsupportedPath) { auto not_found = GetByJsonPath(R"({"a":1})", "$.b"); EXPECT_EQ(std::get<1>(not_found), kNotFoundByJsonPath); @@ -779,6 +853,65 @@ TEST(JsonPathDom, NegativeIndexSupportedButOnDemandUnsupported) { EXPECT_EQ(std::get<0>(ondemand_got), ""); } +TEST(JsonPathOnDemand, RejectsTrailingAndMalformedRootAfterMatchOrNoMatch) { + auto matched_trailing = GetByJsonPathOnDemand( + R"({"a":1} garbage)", "$.a"); + EXPECT_EQ(std::get<1>(matched_trailing), kParseErrorInvalidChar); + EXPECT_EQ(std::get<0>(matched_trailing), ""); + + auto root_trailing = + GetByJsonPathOnDemand("1]", "$"); + EXPECT_EQ(std::get<1>(root_trailing), kParseErrorInvalidChar); + EXPECT_EQ(std::get<0>(root_trailing), ""); + + auto invalid_sibling = GetByJsonPathOnDemand( + R"({"x":{]},"a":2})", "$.a"); + EXPECT_EQ(std::get<1>(invalid_sibling), kParseErrorInvalidChar); + EXPECT_EQ(std::get<0>(invalid_sibling), ""); + + auto no_match_trailing = GetByJsonPathOnDemand( + R"({"a":1} garbage)", "$.missing"); + EXPECT_EQ(std::get<1>(no_match_trailing), kParseErrorInvalidChar); + EXPECT_EQ(std::get<0>(no_match_trailing), ""); +} + +TEST(JsonPathOnDemand, RejectsTrailingCommaInSkippedContainers) { + auto array_match = + GetByJsonPathOnDemand(R"([1,])", "$[0]"); + EXPECT_EQ(std::get<1>(array_match), kParseErrorInvalidChar); + EXPECT_EQ(std::get<0>(array_match), ""); + + auto array_wildcard = + GetByJsonPathOnDemand(R"([1,])", "$[*]"); + EXPECT_EQ(std::get<1>(array_wildcard), kParseErrorInvalidChar); + EXPECT_EQ(std::get<0>(array_wildcard), ""); + + auto nested_array = GetByJsonPathOnDemand( + R"({"a":[1,],"b":2})", "$.a[0]"); + EXPECT_EQ(std::get<1>(nested_array), kParseErrorInvalidChar); + EXPECT_EQ(std::get<0>(nested_array), ""); + + auto object_simple_match = + GetByJsonPathOnDemand(R"({"a":1,})", "$.a"); + EXPECT_EQ(std::get<1>(object_simple_match), kParseErrorInvalidChar); + EXPECT_EQ(std::get<0>(object_simple_match), ""); + + auto object_match = GetByJsonPathOnDemand( + R"({"a":1,"b":2,})", "$.a"); + EXPECT_EQ(std::get<1>(object_match), kParseErrorInvalidChar); + EXPECT_EQ(std::get<0>(object_match), ""); + + auto object_no_match = GetByJsonPathOnDemand( + R"({"a":1,})", "$.missing"); + EXPECT_EQ(std::get<1>(object_no_match), kParseErrorInvalidChar); + EXPECT_EQ(std::get<0>(object_no_match), ""); + + auto nested_object = GetByJsonPathOnDemand( + R"({"a":{"b":1,},"c":2})", "$.a.b"); + EXPECT_EQ(std::get<1>(nested_object), kParseErrorInvalidChar); + EXPECT_EQ(std::get<0>(nested_object), ""); +} + TEST(JsonPathDump, SerializeCoversEmptySingleAndMulti) { Document doc; doc.Parse(R"({"a":null,"s":"abc","arr":[null,"x",1]})"); diff --git a/tests/node_test.cpp b/tests/node_test.cpp index 25f4a490..529f2324 100644 --- a/tests/node_test.cpp +++ b/tests/node_test.cpp @@ -15,9 +15,14 @@ * limitations under the License. */ +#include #include +#include +#include #include #include +#include +#include #include "gtest/gtest.h" #include "sonic/dom/dynamicnode.h" @@ -346,6 +351,23 @@ TYPED_TEST(NodeTest, FindMember) { auto& value1 = obj["Unknown"]; EXPECT_TRUE(value1.IsNull()); } + { + const NodeType& const_obj = obj; + const auto& missing = const_obj["Unknown"]; + EXPECT_TRUE(missing.IsNull()); + EXPECT_TRUE(const_obj["Unknown"].IsNull()); + } +} + +TYPED_TEST(NodeTest, MemberStorageIsCompactAndMovable) { + using MemberNode = typename TypeParam::MemberNode; + using NameRef = decltype((std::declval().name)); + static_assert(std::is_lvalue_reference::value, + "member key should remain reference-like through expressions"); + static_assert(std::is_move_constructible::value, + "object members must be movable during container maintenance"); + static_assert(sizeof(MemberNode) == sizeof(TypeParam) * 2, + "object members should stay compact for traversal performance"); } template @@ -382,6 +404,9 @@ TYPED_TEST(NodeTest, AtPointer) { {"Object", "Array", 1, "Double"})) == nullptr); EXPECT_TRUE(obj.AtPointer(JsonPointerType({"EArray", 0})) == nullptr); EXPECT_TRUE(obj.AtPointer(JsonPointerType({"EArray", -1})) == nullptr); + EXPECT_TRUE(obj.AtPointer(JsonPointerType( + {"EArray", std::numeric_limits::max()})) == + nullptr); EXPECT_TRUE(obj.AtPointer(JsonPointerType({"Object", 0})) == nullptr); AtPointerHelper(obj); @@ -527,12 +552,36 @@ TYPED_TEST(NodeTest, RemoveMemberWithDupKey) { EXPECT_TRUE(node_map.Empty()); } +TYPED_TEST(NodeTest, RemoveMemberWithMapKeepsDuplicateKeyOrder) { + using NodeType = TypeParam; + using Allocator = typename NodeType::alloc_type; + Allocator a; + NodeType obj(kObject); + obj.AddMember("x", NodeType(0), a); + obj.AddMember("dup", NodeType(1), a); + obj.AddMember("dup", NodeType(2), a); + ASSERT_TRUE(obj.CreateMap(a)); + + ASSERT_TRUE(obj.RemoveMember("x")); + auto it = obj.FindMember("dup"); + ASSERT_NE(it, obj.MemberEnd()); + EXPECT_EQ(2, it->value.GetInt64()); +} + TYPED_TEST(NodeTest, Erase) { using NodeType = TypeParam; using Allocator = typename NodeType::alloc_type; NodeType node1; Allocator a; + { + NodeType empty; + empty.SetArray(); + auto ret = empty.Erase(empty.Begin(), empty.End()); + EXPECT_EQ(ret, empty.Begin()); + EXPECT_TRUE(empty.Empty()); + } + TestFixture::Push100Nodes(node1, a); { NodeType node2; @@ -577,6 +626,20 @@ TYPED_TEST(NodeTest, Erase) { EXPECT_TRUE(node2.Size() == size_t((9 - i) * 10)); } } + { + NodeType node2; + node2.SetArray(); + node2.PushBack(NodeType("a", 1, a), a); + node2.PushBack(NodeType(kObject), a); + node2[1].AddMember("k", NodeType("v", 1, a), a); + node2.PushBack(NodeType("tail", 4, a), a); + node2.Erase(node2.Begin(), node2.Begin() + 1); + ASSERT_EQ(2u, node2.Size()); + ASSERT_TRUE(node2[0].IsObject()); + ASSERT_TRUE(node2[0].HasMember("k")); + EXPECT_EQ("v", node2[0]["k"].GetStringView()); + EXPECT_EQ("tail", node2[1].GetStringView()); + } } TYPED_TEST(NodeTest, Back) { @@ -911,6 +974,52 @@ TEST(DNodeTest, CopyRawOrNumStrWithNullAllocatorDoesNotCrash) { #endif } +struct MovingReallocAllocator { + void* Malloc(size_t n) { return std::malloc(n); } + void* Realloc(void* p, size_t old_size, size_t new_size) { + if (new_size == 0) { + std::free(p); + return nullptr; + } + void* q = std::malloc(new_size); + if (q && p) { + std::memcpy(q, p, std::min(old_size, new_size)); + } + std::free(p); + return q; + } + static void Free(void* p) { std::free(p); } + static constexpr bool kNeedFree = true; +}; + +TEST(DNodeTest, PushBackValueAliasedInsideArraySurvivesReallocation) { + using NodeType = DNode; + MovingReallocAllocator alloc; + NodeType arr(kArray); + arr.Reserve(1, alloc); + arr.PushBack(NodeType(7), alloc); + ASSERT_EQ(1u, arr.Size()); + ASSERT_EQ(1u, arr.Capacity()); + + arr.PushBack(std::move(arr[0]), alloc); + ASSERT_EQ(2u, arr.Size()); + EXPECT_TRUE(arr[0].IsNull()); + EXPECT_EQ(7, arr[1].GetInt64()); +} + +TEST(DNodeTest, CopyFromSelfLeavesNodeUnchanged) { + using NodeType = DNode; + SimpleAllocator alloc; + NodeType node(kObject); + node.AddMember("a", NodeType("value", alloc), alloc); + ASSERT_EQ(1u, node.Size()); + + node.CopyFrom(node, alloc, true); + ASSERT_TRUE(node.IsObject()); + ASSERT_EQ(1u, node.Size()); + EXPECT_EQ("value", node["a"].GetStringView()); +} + TYPED_TEST(NodeTest, SourceAllocator) { using NodeType = TypeParam; using Allocator = typename NodeType::alloc_type; diff --git a/tests/parse_schema_test.cpp b/tests/parse_schema_test.cpp index 2860d1c2..a621d5b4 100644 --- a/tests/parse_schema_test.cpp +++ b/tests/parse_schema_test.cpp @@ -102,6 +102,9 @@ TEST(ParseSchema, SuccessBasic) { "string": null, "object": null, "array": null})", R"([])", R"([])"); TestSuccess(R"({"obj":{}})", R"({"obj":{"a":1}})", R"({"obj":{"a":1}})"); + TestSuccess(R"({"obj":{}, "keep":0})", + R"({"obj":{"a":1}, "keep":2, "obj":3})", + R"({"obj":{"a":1}, "keep":2})"); TestSuccess(R"({"obj":{"a":2}})", R"({"obj":{"a":1, "b":1}})", R"({"obj":{"a":1}})"); TestSuccess( @@ -164,6 +167,19 @@ TEST(ParseSchema, FailedBasic) { TestFailed(R"(null)", R"([null,])"); } +TEST(ParseSchema, FailedUpdatePreservesOriginalDocument) { + Document doc; + doc.Parse(R"({"a":"old","b":"old"})"); + ASSERT_FALSE(doc.HasParseError()); + + doc.ParseSchema(R"({"a":"new","b":)"); + ASSERT_TRUE(doc.HasParseError()); + ASSERT_TRUE(doc["a"].IsString()); + ASSERT_TRUE(doc["b"].IsString()); + EXPECT_EQ("old", doc["a"].GetStringView()); + EXPECT_EQ("old", doc["b"].GetStringView()); +} + TEST(ParseSchema, ParseOverflowNumAsNumStr) { std::string schema = R"({"val": 1})"; std::string json = R"({"val": 18446744073709551616})"; @@ -175,4 +191,59 @@ TEST(ParseSchema, ParseOverflowNumAsNumStr) { EXPECT_EQ(doc["val"].GetStringView(), "18446744073709551616"); } +TEST(ParseSchema, SkippedUnknownFieldPropagatesParseError) { + Document doc; + doc.Parse(R"({"a":1})"); + ASSERT_FALSE(doc.HasParseError()); + + doc.ParseSchema(R"({"x":1abc,"a":2})"); + EXPECT_TRUE(doc.HasParseError()); + EXPECT_EQ(kParseErrorInvalidChar, doc.GetParseError()); + EXPECT_EQ(1, doc["a"].GetInt64()); +} + +TEST(ParseSchema, SkippedUnknownFieldHonorsParseFlags) { + Document doc; + doc.Parse(R"({"a":1})"); + ASSERT_FALSE(doc.HasParseError()); + + doc.ParseSchema( + R"({"x":1e309,"a":2})"); + EXPECT_FALSE(doc.HasParseError()); + EXPECT_EQ(2, doc["a"].GetInt64()); +} + +TEST(ParseSchema, FailedOwnAllocatorTransactionDoesNotGrowPool) { + Document doc; + doc.Parse( + R"({"a":"old","b":{"c":[1,2,3]},"pad":"abcdefghijklmnopqrstuvwxyz"})"); + ASSERT_FALSE(doc.HasParseError()); + const size_t size_before = doc.GetAllocator().Size(); + + for (int i = 0; i < 16; ++i) { + doc.ParseSchema(R"({"a":"new","b":)"); + ASSERT_TRUE(doc.HasParseError()); + EXPECT_EQ("old", doc["a"].GetStringView()); + EXPECT_EQ(size_before, doc.GetAllocator().Size()); + } +} + +TEST(ParseSchema, SuccessfulOwnAllocatorTransactionDoesNotGrowUnbounded) { + Document doc; + doc.Parse( + R"({"a":"old","b":{"c":[1,2,3]},"pad":"abcdefghijklmnopqrstuvwxyz"})"); + ASSERT_FALSE(doc.HasParseError()); + + doc.ParseSchema(R"({"a":"new","b":{"c":[4,5,6]}})"); + ASSERT_FALSE(doc.HasParseError()); + const size_t size_after_first_update = doc.GetAllocator().Size(); + + for (int i = 0; i < 16; ++i) { + doc.ParseSchema(R"({"a":"new","b":{"c":[4,5,6]}})"); + ASSERT_FALSE(doc.HasParseError()); + EXPECT_EQ("new", doc["a"].GetStringView()); + EXPECT_EQ(size_after_first_update, doc.GetAllocator().Size()); + } +} + } // namespace diff --git a/tests/parser_oom_test.cpp b/tests/parser_oom_test.cpp index 3b455eb8..ff7c9252 100644 --- a/tests/parser_oom_test.cpp +++ b/tests/parser_oom_test.cpp @@ -19,11 +19,14 @@ #include #include #include +#include #include +#include #include #include "sonic/dom/handler.h" #include "sonic/dom/parser.h" +#include "sonic/experiment/lazy_update.h" #include "sonic/sonic.h" namespace { @@ -87,15 +90,22 @@ struct AlwaysOomAllocator { static constexpr bool kNeedFree = false; }; +static constexpr size_t kJsonHeadroom = 64; + static std::vector pad_json_bytes(const char* json, size_t len) { - std::vector buf(len + 64, 0); - std::memcpy(buf.data(), json, len); - buf[len] = 'x'; - buf[len + 1] = '"'; - buf[len + 2] = 'x'; + std::vector buf(kJsonHeadroom + len + 64, 0); + uint8_t* data = buf.data() + kJsonHeadroom; + std::memcpy(data, json, len); + data[len] = 'x'; + data[len + 1] = '"'; + data[len + 2] = 'x'; return buf; } +static uint8_t* padded_json_data(std::vector& buf) { + return buf.data() + kJsonHeadroom; +} + TEST(Document, OomDoesNotCrashPushBack) { AlwaysOomAllocator alloc; DNode arr; @@ -106,6 +116,32 @@ TEST(Document, OomDoesNotCrashPushBack) { EXPECT_EQ(0u, arr.Size()); } +TEST(Document, MutatingStatusApisReportOom) { + AlwaysOomAllocator alloc; + + DNode arr; + arr.SetArray(); + EXPECT_EQ(kErrorNoMem, + arr.PushBackWithError(DNode(42), alloc)); + EXPECT_EQ(0u, arr.Size()); + EXPECT_EQ(kErrorNoMem, arr.ReserveWithError(16, alloc)); + + DNode obj; + obj.SetObject(); + DNode::MemberIterator inserted = nullptr; + EXPECT_EQ(kErrorNoMem, + obj.AddMemberWithError("k", DNode(1), alloc, + true, &inserted)); + EXPECT_EQ(obj.MemberEnd(), inserted); + EXPECT_EQ(0u, obj.Size()); + EXPECT_EQ(kErrorNoMem, obj.MemberReserveWithError(16, alloc)); + + DNode num; + EXPECT_EQ(kErrorNoMem, + num.SetStringNumberWithError(StringView("123", 3), alloc)); + EXPECT_TRUE(num.IsNull()); +} + TEST(Document, NoFreeAllocatorWithoutClearCompilesAndRuns) { GenericDocument> doc; doc.Parse("{}"); @@ -184,6 +220,23 @@ TEST(Document, OomDoesNotCrashParseArray) { EXPECT_TRUE(doc.HasParseError()); } +TEST(Document, ParseArrayOomReportsNoMemAtParserOffset) { + OomAfterNthAllocator alloc(1); + GenericDocument> doc(&alloc); + doc.Parse("[1,2,3]"); + EXPECT_TRUE(doc.HasParseError()); + EXPECT_EQ(kErrorNoMem, doc.GetParseError()); + EXPECT_GT(doc.GetErrorOffset(), 0u); +} + +TEST(Document, ParseRejectsLengthPaddingOverflowBeforeCopy) { + Document doc; + doc.Parse("", std::numeric_limits::max()); + EXPECT_TRUE(doc.HasParseError()); + EXPECT_EQ(kErrorNoMem, doc.GetParseError()); + EXPECT_EQ(0u, doc.GetErrorOffset()); +} + TEST(Document, ParseImplHandlesRepeatedOomCleanly) { OomAfterNthAllocator alloc(0); GenericDocument> doc(&alloc); @@ -244,7 +297,7 @@ TEST(Document, ParseLazyEscapedKeyOomReportsNoMem) { Parser p; const char* json = R"({"\n": 1})"; auto buf = pad_json_bytes(json, std::strlen(json)); - auto res = p.ParseLazy(buf.data(), std::strlen(json), sax); + auto res = p.ParseLazy(padded_json_data(buf), std::strlen(json), sax); EXPECT_EQ(kErrorNoMem, res.Error()); } @@ -277,6 +330,179 @@ TEST(Document, AddMemberWithoutMapOnOomLeavesObjectEmpty) { EXPECT_EQ(0u, obj.Size()); } +TEST(Document, AddMemberCopiedKeyOomLeavesObjectEmpty) { + OomAfterNthAllocator alloc(1); + DNode obj; + obj.SetObject(); + DNode val; + val.SetInt64(1); + + auto it = obj.AddMember("k", std::move(val), alloc, true); + EXPECT_EQ(obj.MemberEnd(), it); + EXPECT_EQ(0u, obj.Size()); +} + +TEST(Document, CreateMapOomForTreeNodeReturnsFalse) { + OomAfterNthAllocator alloc(2); + DNode obj; + obj.SetObject(); + obj.AddMember("k", DNode(1), alloc, false); + ASSERT_EQ(1u, obj.Size()); + + alloc.remaining = 1; + EXPECT_FALSE(obj.CreateMap(alloc)); +} + +TEST(Document, RemoveMemberWithMapDoesNotAllocateOrThrow) { + OomAfterNthAllocator alloc(16); + DNode obj; + obj.SetObject(); + obj.AddMember("a", DNode(1), alloc, false); + obj.AddMember("b", DNode(2), alloc, false); + ASSERT_TRUE(obj.CreateMap(alloc)); + + alloc.remaining = 0; + EXPECT_TRUE(obj.RemoveMember("a")); + ASSERT_EQ(1u, obj.Size()); + auto it = obj.FindMember("b"); + ASSERT_NE(obj.MemberEnd(), it); + EXPECT_EQ(2, it->value.GetInt64()); +} + +TEST(Document, TryCopyFromOomPreservesDestination) { + OomAfterNthAllocator alloc(64); + GenericDocument> src(&alloc); + src.Parse(R"({"a":{"b":"copied"},"c":2})"); + ASSERT_FALSE(src.HasParseError()); + + DNode dst; + dst.SetObject(); + dst.AddMember("old", DNode(1), alloc, false); + ASSERT_EQ(1u, dst.Size()); + + alloc.remaining = 1; + EXPECT_FALSE(dst.TryCopyFrom(src, alloc)); + ASSERT_TRUE(dst.IsObject()); + ASSERT_EQ(1u, dst.Size()); + auto old = dst.FindMember("old"); + ASSERT_NE(dst.MemberEnd(), old); + EXPECT_EQ(1, old->value.GetInt64()); + + alloc.remaining = 1; + dst.CopyFrom(src, alloc); + ASSERT_TRUE(dst.IsObject()); + ASSERT_EQ(1u, dst.Size()); + old = dst.FindMember("old"); + ASSERT_NE(dst.MemberEnd(), old); + EXPECT_EQ(1, old->value.GetInt64()); +} + +TEST(Document, ContainerReserveOverflowDoesNotChangeCapacity) { + OomAfterNthAllocator alloc(1); + DNode arr; + arr.SetArray(); + + arr.Reserve(std::numeric_limits::max(), alloc); + EXPECT_EQ(0u, arr.Capacity()); + EXPECT_EQ(0u, arr.Size()); +} + +TEST(Document, HandlerSetupRejectsCapacityOverflow) { + OomAfterNthAllocator alloc(1); + SAXHandler> sax(alloc); + EXPECT_FALSE( + sax.SetUp(StringView("", std::numeric_limits::max() - 1))); + EXPECT_TRUE(sax.oom_); + EXPECT_EQ(kErrorNoMem, sax.GetError()); +} + +TEST(Document, ParseSchemaStringUpdateOomReportsNoMem) { + TrackingNthOomAllocator::balance = 0; + TrackingNthOomAllocator::remaining = 32; + TrackingNthOomAllocator alloc; + GenericDocument> doc(&alloc); + doc.Parse(R"({"a":"old"})"); + ASSERT_FALSE(doc.HasParseError()); + ASSERT_EQ("old", doc["a"].GetStringView()); + + TrackingNthOomAllocator::remaining = 1; + doc.ParseSchema(R"({"a":"new"})"); + EXPECT_TRUE(doc.HasParseError()); + EXPECT_EQ(kErrorNoMem, doc.GetParseError()); + EXPECT_EQ("old", doc["a"].GetStringView()); +} + +TEST(Document, ParseSchemaNewObjectOomPreservesExistingValue) { + TrackingNthOomAllocator::balance = 0; + TrackingNthOomAllocator::remaining = 32; + TrackingNthOomAllocator alloc; + GenericDocument> doc(&alloc); + doc.Parse(R"({"a":"old"})"); + ASSERT_FALSE(doc.HasParseError()); + ASSERT_EQ("old", doc["a"].GetStringView()); + + TrackingNthOomAllocator::remaining = 1; + doc.ParseSchema(R"({"a":{"b":2}})"); + EXPECT_TRUE(doc.HasParseError()); + EXPECT_EQ(kErrorNoMem, doc.GetParseError()); + ASSERT_TRUE(doc["a"].IsString()); + EXPECT_EQ("old", doc["a"].GetStringView()); +} + +TEST(Document, ParseSchemaNewArrayOomPreservesExistingValue) { + TrackingNthOomAllocator::balance = 0; + TrackingNthOomAllocator::remaining = 32; + TrackingNthOomAllocator alloc; + GenericDocument> doc(&alloc); + doc.Parse(R"({"a":"old"})"); + ASSERT_FALSE(doc.HasParseError()); + ASSERT_EQ("old", doc["a"].GetStringView()); + + TrackingNthOomAllocator::remaining = 1; + doc.ParseSchema(R"({"a":[1]} )"); + EXPECT_TRUE(doc.HasParseError()); + EXPECT_EQ(kErrorNoMem, doc.GetParseError()); + ASSERT_TRUE(doc["a"].IsString()); + EXPECT_EQ("old", doc["a"].GetStringView()); +} + +TEST(Document, ParseSchemaStringNumberOverwriteFreesOldValue) { + TrackingNthOomAllocator::balance = 0; + TrackingNthOomAllocator::remaining = 64; + { + TrackingNthOomAllocator alloc; + GenericDocument> doc(&alloc); + doc.ParseSchema( + R"({"n":184467440737095516160})"); + ASSERT_FALSE(doc.HasParseError()); + doc.ParseSchema( + R"({"n":184467440737095516161})"); + ASSERT_FALSE(doc.HasParseError()); + ASSERT_TRUE(doc["n"].IsNumber()); + ASSERT_TRUE(doc["n"].IsStringNumber()); + EXPECT_EQ("184467440737095516161", doc["n"].GetStringNumber()); + } + EXPECT_EQ(0, TrackingNthOomAllocator::balance); +} + +TEST(Document, UpdateNodeLazyReportsAddMemberOom) { + OomAfterNthAllocator alloc(8); + DNode target(kObject); + target.AddMember("a", DNode(1), alloc, false); + DNode source(kObject); + source.AddMember("b", DNode(2), alloc, false); + ASSERT_EQ(1u, target.Size()); + ASSERT_EQ(1u, source.Size()); + + alloc.remaining = 0; + auto err = + internal::UpdateNodeLazy, + OomAfterNthAllocator, ParseFlags::kParseDefault>( + target, source, alloc); + EXPECT_EQ(kErrorNoMem, err); + EXPECT_EQ(1u, target.Size()); +} + TEST(LazySAXHandler, EndObjectOomLeavesStackMatchingSuccessArm) { TrackingNthOomAllocator::balance = 0; TrackingNthOomAllocator::remaining = 1; @@ -292,9 +518,10 @@ TEST(LazySAXHandler, EndObjectOomLeavesStackMatchingSuccessArm) { std::memcpy(buf, kKey, sizeof(kKey)); ASSERT_TRUE(sax.Key(static_cast(buf), sizeof(kKey) - 1, 1)); ASSERT_TRUE(sax.Raw("1", 1)); - ASSERT_TRUE(sax.EndObject(1)); + ASSERT_FALSE(sax.EndObject(1)); EXPECT_TRUE(sax.oom_); - EXPECT_EQ(sizeof(Node), sax.stack_.Size()); + EXPECT_EQ(kErrorNoMem, sax.GetError()); + EXPECT_EQ(sizeof(Node), sax.StackSizeBytes()); } TEST(Document, ParseLazyFreesEscapedKeyOnKeyFailure) { @@ -304,7 +531,7 @@ TEST(Document, ParseLazyFreesEscapedKeyOnKeyFailure) { Parser p; const char* json = R"({"\n": 1})"; auto buf = pad_json_bytes(json, std::strlen(json)); - p.ParseLazy(buf.data(), std::strlen(json), sax); + p.ParseLazy(padded_json_data(buf), std::strlen(json), sax); ASSERT_TRUE(sax.key_called); EXPECT_EQ(0, SentinelTrackingAllocator::balance); @@ -379,6 +606,18 @@ TEST(Parser, EndArrayFalseAbortsParse) { EXPECT_EQ(kSaxTermination, res.Error()); } +TEST(Parser, SaxHandlerArrayOomReportsNoMem) { + const char* json = "[1,2,3]"; + auto buf = pad_json_for_parser(json, std::strlen(json)); + OomAfterNthAllocator alloc(0); + SAXHandler> sax(alloc); + ASSERT_TRUE(sax.SetUp(StringView(json, std::strlen(json)))); + Parser p; + auto res = p.Parse(buf.data(), std::strlen(json), sax); + EXPECT_EQ(kErrorNoMem, res.Error()); + EXPECT_GT(res.Offset(), 0u); +} + TEST(Parser, EndObjectFalseAbortsParse) { const char* json = R"({"a":1})"; auto buf = pad_json_for_parser(json, std::strlen(json)); @@ -543,6 +782,32 @@ TEST(Parser, KeyFalsePreservesSkipSemanticsUnderCheckKeyReturn) { EXPECT_EQ(2, sax.keys_seen); } +struct OomKeyCheckReturnSAX { + static constexpr bool check_key_return = true; + bool Null() { return true; } + bool Bool(bool) { return true; } + bool Int(int64_t) { return true; } + bool Uint(uint64_t) { return true; } + bool Double(double) { return true; } + bool NumStr(StringView) { return true; } + bool Key(StringView) { return false; } + bool String(StringView) { return true; } + bool StartArray() { return true; } + bool EndArray(uint32_t) { return true; } + bool StartObject() { return true; } + bool EndObject(uint32_t) { return true; } + SonicError GetError() const { return kErrorNoMem; } +}; + +TEST(Parser, KeyFalseUnderCheckKeyReturnPropagatesHandlerError) { + const char* json = R"({"a":1})"; + auto buf = pad_json_for_parser(json, std::strlen(json)); + OomKeyCheckReturnSAX sax; + Parser p; + auto res = p.Parse(buf.data(), std::strlen(json), sax); + EXPECT_EQ(kErrorNoMem, res.Error()); +} + struct RejectAllLazySax { using Allocator = SONIC_DEFAULT_ALLOCATOR; Allocator alloc_; @@ -560,7 +825,7 @@ TEST(ParseLazy, RawRejectionReportsSaxTermination) { Parser p; const char* j = "42"; auto buf = pad_json_bytes(j, 2); - auto r = p.ParseLazy(buf.data(), 2, sax); + auto r = p.ParseLazy(padded_json_data(buf), 2, sax); EXPECT_EQ(r.Error(), kSaxTermination); } @@ -569,7 +834,7 @@ TEST(ParseLazy, StartArrayRejectionReportsSaxTermination) { Parser p; const char* j = "[1,2,3]"; auto buf = pad_json_bytes(j, 7); - auto r = p.ParseLazy(buf.data(), 7, sax); + auto r = p.ParseLazy(padded_json_data(buf), 7, sax); EXPECT_EQ(r.Error(), kSaxTermination); } @@ -578,7 +843,7 @@ TEST(ParseLazy, StartObjectRejectionReportsSaxTermination) { Parser p; const char* j = R"({"k":1})"; auto buf = pad_json_bytes(j, 7); - auto r = p.ParseLazy(buf.data(), 7, sax); + auto r = p.ParseLazy(padded_json_data(buf), 7, sax); EXPECT_EQ(r.Error(), kSaxTermination); } @@ -599,8 +864,74 @@ TEST(ParseLazy, AcceptAllStillCompletesCleanly) { Parser p; const char* j = R"({"a":1,"b":[2,3]})"; auto buf = pad_json_bytes(j, std::strlen(j)); - auto r = p.ParseLazy(buf.data(), std::strlen(j), sax); + auto r = p.ParseLazy(padded_json_data(buf), std::strlen(j), sax); + EXPECT_EQ(r.Error(), kErrorNone); +} + +struct CapturingLazySax { + using Allocator = SimpleAllocator; + Allocator alloc_; + std::vector keys; + + Allocator& GetAllocator() { return alloc_; } + bool StartArray() { return true; } + bool EndArray(size_t) { return true; } + bool StartObject() { return true; } + bool EndObject(size_t) { return true; } + bool Key(const char* data, size_t len, size_t allocated) { + keys.emplace_back(data, len); + if (allocated) Allocator::Free(const_cast(data)); + return true; + } + bool Raw(const char*, size_t) { return true; } +}; + +TEST(ParseLazy, EscapedObjectKeyScratchKeepsClosingQuote) { + CapturingLazySax sax; + Parser p; + const char* j = R"({"\n":1})"; + auto buf = pad_json_bytes(j, std::strlen(j)); + auto r = p.ParseLazy(padded_json_data(buf), std::strlen(j), sax); EXPECT_EQ(r.Error(), kErrorNone); + ASSERT_EQ(sax.keys.size(), 1); + EXPECT_EQ(sax.keys[0], "\n"); +} + +TEST(ParseLazy, EmptyInputReportsZeroOffset) { + AcceptAllLazySax sax; + Parser p; + auto r = p.ParseLazy(reinterpret_cast(""), 0, sax); + EXPECT_EQ(kParseErrorInvalidChar, r.Error()); + EXPECT_EQ(0u, r.Offset()); +} + +TEST(ParseLazy, WhitespaceOnlyInputDoesNotWrapOffset) { + AcceptAllLazySax sax; + Parser p; + const char* j = " "; + auto buf = pad_json_bytes(j, std::strlen(j)); + auto r = p.ParseLazy(padded_json_data(buf), std::strlen(j), sax); + EXPECT_EQ(kParseErrorInvalidChar, r.Error()); + EXPECT_LE(r.Offset(), std::strlen(j)); +} + +TEST(ParseLazy, UnterminatedObjectKeyReportsCurrentOffset) { + AcceptAllLazySax sax; + Parser p; + const char* j = R"({"abc)"; + auto buf = pad_json_bytes(j, std::strlen(j)); + auto r = p.ParseLazy(padded_json_data(buf), std::strlen(j), sax); + EXPECT_EQ(kParseErrorInvalidChar, r.Error()); + EXPECT_EQ(std::strlen(j), r.Offset()); +} + +TEST(LazySAXHandler, HandlerOwnsStackAndIsMoveOnly) { + using Handler = LazySAXHandler; + static_assert(!std::is_copy_constructible::value, + "LazySAXHandler owns a raw stack and must not be copyable"); + static_assert( + !std::is_copy_assignable::value, + "LazySAXHandler owns a raw stack and must not be copy assignable"); } struct StringKeyCountingSAX { @@ -647,4 +978,15 @@ TEST(Parser, InvalidSurrogateInKeyDoesNotInvokeKeyCallback) { EXPECT_EQ(0, sax.key_calls); } +TEST(Parser, PaddedShortLiteralReturnsInvalidChar) { + for (char c : {'t', 'f', 'n'}) { + char json[2] = {c, '\0'}; + auto buf = pad_json_for_parser(json, 1); + StringKeyCountingSAX sax; + Parser p; + auto res = p.Parse(buf.data(), 1, sax); + EXPECT_EQ(kParseErrorInvalidChar, res.Error()) << c; + } +} + } // namespace diff --git a/tests/skip_test.cpp b/tests/skip_test.cpp index 1f59b60f..676289c8 100644 --- a/tests/skip_test.cpp +++ b/tests/skip_test.cpp @@ -14,6 +14,7 @@ * limitations under the License. */ +#include #include #include "gtest/gtest.h" @@ -40,6 +41,14 @@ void TestGetOnDemandFailed(StringView json, const JsonPointer& path, EXPECT_EQ(target, ""); } +void TestGetOnDemandError(StringView json, const JsonPointer& path, + SonicError expect) { + StringView target; + auto result = GetOnDemand(json, path, target); + EXPECT_EQ(result.Error(), expect) << json << result.Offset(); + EXPECT_EQ(target, ""); +} + TEST(GetOnDemand, SuccessBasic) { TestGetOnDemand("{}", {}, "{}"); TestGetOnDemand("1", {}, "1"); @@ -85,7 +94,7 @@ TEST(GetOnDemand, SuccessEscapeCharacters) { "1234567890123456789012345678901\"123":"avx2_string", "obj\n\t\\":{"name\\\\\\\\":"string\\\\"}, "array\"\t\n\b\r":["\n\tHello,\nworld!\n", - "{\" / \b \f \n \r \t } [景] 测试中文 😀")], + "{\" / \b \f \n \r \t } [景] 测试中文 😀"], "\"a\"":"\n\tHello,\nworld!\n"})", {"\"a\""}, R"("\n\tHello,\nworld!\n")"); } @@ -94,12 +103,95 @@ TEST(GetOnDemand, Failed) { TestGetOnDemandFailed("{}", {1}, ParseResult(kParseErrorMismatchType, 0)); TestGetOnDemandFailed("{}", {"a"}, ParseResult(kParseErrorUnknownObjKey, 1)); - TestGetOnDemandFailed("{123}", {"a"}, - ParseResult(kParseErrorUnknownObjKey, 4)); + TestGetOnDemandFailed("{123}", {"a"}, ParseResult(kParseErrorInvalidChar, 2)); TestGetOnDemandFailed("[]", {1}, ParseResult(kParseErrorArrIndexOutOfRange, 2)); + TestGetOnDemandFailed("[]", {0}, + ParseResult(kParseErrorArrIndexOutOfRange, 2)); + + TestGetOnDemandError(R"("\")", {}, kParseErrorInvalidChar); +} + +TEST(GetOnDemand, EmptyInputDoesNotReadBeforeBuffer) { + TestGetOnDemandFailed("", {}, ParseResult(kParseErrorInvalidChar, 0)); +} + +TEST(GetOnDemand, LargeIntegralPointerDoesNotWrapToZero) { + JsonPointer path{JsonPointerNode(uint64_t{1} << 32)}; + TestGetOnDemandFailed("[10,20]", path, + ParseResult(kParseErrorArrIndexOutOfRange, 6)); +} + +TEST(GetOnDemand, HugeUnsignedPointerDoesNotWrapThroughSignedStorage) { + JsonPointer path{JsonPointerNode(std::numeric_limits::max())}; + TestGetOnDemandFailed("[10,20]", path, + ParseResult(kParseErrorArrIndexOutOfRange, 6)); +} + +TEST(GetOnDemand, NegativePointerIndexReportsOutOfRange) { + JsonPointer path{JsonPointerNode(-1)}; + TestGetOnDemandFailed("[10,20]", path, + ParseResult(kParseErrorArrIndexOutOfRange, 1)); +} + +TEST(GetOnDemand, RejectsInvalidSkippedValuesAndTrailingTargetGarbage) { + TestGetOnDemandFailed(R"({"x":1abc "a":2})", {"a"}, + ParseResult(kParseErrorInvalidChar, 6)); + TestGetOnDemandFailed(R"([1abc,2])", {1}, + ParseResult(kParseErrorInvalidChar, 2)); + TestGetOnDemandFailed(R"({"a":[1] 2})", {"a"}, + ParseResult(kParseErrorInvalidChar, 9)); + TestGetOnDemandFailed(R"({"a":"x" 2})", {"a"}, + ParseResult(kParseErrorInvalidChar, 9)); + TestGetOnDemandError(R"({"x":{]},"a":2})", {"a"}, kParseErrorInvalidChar); + TestGetOnDemandError(R"({"x":[}],"a":2})", {"a"}, kParseErrorInvalidChar); + TestGetOnDemandError("{]}", {}, kParseErrorInvalidChar); + TestGetOnDemandError("[}]", {}, kParseErrorInvalidChar); + TestGetOnDemandError(R"({"a":1]})", {"a"}, kParseErrorInvalidChar); + TestGetOnDemandError("1]", {}, kParseErrorInvalidChar); + TestGetOnDemandError(R"({"a":1,"bad":"\q"})", {"missing"}, + kParseErrorEscapedFormat); +} + +TEST(GetOnDemand, RejectsInvalidMatchedPathContainerSuffix) { + TestGetOnDemandError(R"({"a":[1] 2})", {"a", 0}, kParseErrorInvalidChar); + TestGetOnDemandError(R"({"a":{"b":1} 2})", {"a", "b"}, + kParseErrorInvalidChar); + TestGetOnDemandError(R"([[1] 2])", {0, 0}, kParseErrorInvalidChar); + TestGetOnDemandError(R"({"a":[{"b":1} 2]})", {"a", 0, "b"}, + kParseErrorInvalidChar); + TestGetOnDemandError(R"({"a":[1,]})", {"a", 0}, kParseErrorInvalidChar); + TestGetOnDemandError(R"({"a":{"b":1,}})", {"a", "b"}, kParseErrorInvalidChar); + TestGetOnDemandError(R"({"a":1,})", {"a"}, kParseErrorInvalidChar); +} + +TEST(GetOnDemand, FastModeDoesNotValidateUnvisitedSuffix) { + TestGetOnDemand(R"({"a":1,"bad":"\q"})", {"a"}, "1"); + TestGetOnDemand(R"({"a":1} garbage)", {"a"}, "1"); + TestGetOnDemand(R"({"a":[1,2] 3})", {"a", 0}, "1"); +} - TestGetOnDemandFailed(R"("\")", {}, ParseResult(kParseErrorInvalidChar, 3)); +TEST(GetOnDemand, FullValidationModeRejectsUnvisitedSuffix) { + StringView target; + auto escaped = GetOnDemand( + R"({"a":1,"bad":"\q"})", JsonPointer{"a"}, target); + EXPECT_EQ(escaped.Error(), kParseErrorEscapedFormat); + EXPECT_EQ(target, ""); + + auto infinity = GetOnDemand( + R"({"a":1,"bad":1e309})", JsonPointer{"a"}, target); + EXPECT_EQ(infinity.Error(), kParseErrorInfinity); + EXPECT_EQ(target, ""); + + auto trailing_match = GetOnDemand( + R"({"a":1} garbage)", JsonPointer{"a"}, target); + EXPECT_EQ(trailing_match.Error(), kParseErrorInvalidChar); + EXPECT_EQ(target, ""); + + auto trailing_no_match = GetOnDemand( + R"({"a":1} garbage)", JsonPointer{"missing"}, target); + EXPECT_EQ(trailing_no_match.Error(), kParseErrorInvalidChar); + EXPECT_EQ(target, ""); } } // namespace diff --git a/tests/writebuffer_test.cpp b/tests/writebuffer_test.cpp index 072ab5bb..d8a3c1cf 100644 --- a/tests/writebuffer_test.cpp +++ b/tests/writebuffer_test.cpp @@ -133,6 +133,16 @@ TEST(WriteBuffer, ToStringIsIdempotentWhenCapacityHasSlack) { EXPECT_EQ(p1, p2); } +TEST(WriteBuffer, ToStringViewDoesNotRequireNullTerminator) { + WriteBuffer wb(1); + wb.PushUnsafe('x'); + ASSERT_EQ(wb.Size(), wb.Capacity()); + + auto sv = wb.ToStringView(); + EXPECT_EQ(sv, "x"); + EXPECT_FALSE(wb.HadOom()); +} + TEST(WriteBuffer, StringSize) { { WriteBuffer wb;