11#include " compile_cache.h"
2+ #include < array>
3+ #include < memory>
24#include < string>
35#include " debug_utils-inl.h"
46#include " env-inl.h"
810#include " path.h"
911#include " util.h"
1012#include " zlib.h"
13+ #include " zstd.h"
1114
1215#ifdef NODE_IMPLEMENTS_POSIX_CREDENTIALS
1316#include < unistd.h> // getuid
@@ -75,18 +78,21 @@ inline void CompileCacheHandler::Debug(const char* format,
7578 }
7679}
7780
78- ScriptCompiler::CachedData* CompileCacheEntry::CopyCache () const {
81+ ScriptCompiler::CachedData* CompileCacheEntry::WrapCache () const {
7982 DCHECK_NOT_NULL (cache);
80- int cache_size = cache-> length ;
81- uint8_t * data = new uint8_t [cache_size];
82- memcpy (data, cache-> data , cache_size);
83+ // The returned CachedData does not own the buffer - it's a view into
84+ // the buffer owned by this entry, which outlives the synchronous
85+ // consumption of the cache during compilation, so no copy is necessary.
8386 return new ScriptCompiler::CachedData (
84- data, cache_size , ScriptCompiler::CachedData::BufferOwned );
87+ cache-> data , cache-> length , ScriptCompiler::CachedData::BufferNotOwned );
8588}
8689
8790// Used for identifying and verifying a file is a compile cache file.
8891// See comments in CompileCacheHandler::Persist().
89- constexpr uint32_t kCacheMagicNumber = 0x8adfdbb2 ;
92+ // The last byte is bumped whenever the format of the cache file changes
93+ // so that files in an older format are discarded as cache misses and
94+ // then overwritten with the new format.
95+ constexpr uint32_t kCacheMagicNumber = 0x8adfdbb3 ;
9096
9197const char * CompileCacheEntry::type_name () const {
9298 switch (type) {
@@ -124,10 +130,21 @@ void CompileCacheHandler::ReadCacheFile(CompileCacheEntry* entry) {
124130 uv_fs_req_cleanup (&close_req);
125131 });
126132
133+ // Get the file size upfront so that the cache can be read with a single
134+ // exactly-sized read, and truncated or trailing data can be detected
135+ // without additional read attempts.
136+ int err = uv_fs_fstat (nullptr , &req, file, nullptr );
137+ if (err < 0 ) {
138+ Debug (" fstat failed, %s\n " , uv_strerror (err));
139+ return ;
140+ }
141+ uint64_t file_size = req.statbuf .st_size ;
142+ uv_fs_req_cleanup (&req);
143+
127144 // Read the headers.
128- std::vector <uint32_t > headers ( kHeaderCount ) ;
129- uv_buf_t headers_buf = uv_buf_init ( reinterpret_cast < char *>(headers. data ()),
130- kHeaderCount * sizeof ( uint32_t ) );
145+ std::array <uint32_t , kHeaderCount > headers;
146+ uv_buf_t headers_buf =
147+ uv_buf_init ( reinterpret_cast < char *>(headers. data ()), kHeaderSize );
131148 const int r = uv_fs_read (nullptr , &req, file, &headers_buf, 1 , 0 , nullptr );
132149 if (r != static_cast <int >(headers_buf.len )) {
133150 Debug (" reading header failed, bytes read %d" , r);
@@ -137,13 +154,15 @@ void CompileCacheHandler::ReadCacheFile(CompileCacheEntry* entry) {
137154 Debug (" \n " );
138155 return ;
139156 }
157+ uv_fs_req_cleanup (&req);
140158
141- Debug (" [%d %d %d %d %d]..." ,
159+ Debug (" [%d %d %d %d %d %d ]..." ,
142160 headers[kMagicNumberOffset ],
143161 headers[kCodeSizeOffset ],
144162 headers[kCacheSizeOffset ],
145163 headers[kCodeHashOffset ],
146- headers[kCacheHashOffset ]);
164+ headers[kCacheHashOffset ],
165+ headers[kCacheRawSizeOffset ]);
147166
148167 if (headers[kMagicNumberOffset ] != kCacheMagicNumber ) {
149168 Debug (" magic number mismatch: expected %d, actual %d\n " ,
@@ -166,60 +185,102 @@ void CompileCacheHandler::ReadCacheFile(CompileCacheEntry* entry) {
166185 return ;
167186 }
168187
169- // Read the cache, grow the buffer exponentially whenever it fills up.
170- size_t offset = headers_buf.len ;
171- size_t capacity = 4096 ; // Initial buffer capacity
172- size_t total_read = 0 ;
173- uint8_t * buffer = new uint8_t [capacity];
174-
175- while (true ) {
176- // If there is not enough space to read more data, do a simple
177- // realloc here (we don't actually realloc because V8 requires
178- // the underlying buffer to be delete[]-able).
179- if (total_read == capacity) {
180- size_t new_capacity = capacity * 2 ;
181- auto * new_buffer = new uint8_t [new_capacity];
182- memcpy (new_buffer, buffer, capacity);
183- delete[] buffer;
184- buffer = new_buffer;
185- capacity = new_capacity;
186- }
188+ uint32_t cache_size = headers[kCacheSizeOffset ];
189+ uint32_t raw_size = headers[kCacheRawSizeOffset ];
190+
191+ // Check the cache size. The headers were read successfully, so
192+ // file_size >= kHeaderSize here. The file must contain exactly the
193+ // headers followed by cache_size bytes of cache content.
194+ if (file_size - kHeaderSize != cache_size) {
195+ Debug (" cache size mismatch: expected %d, actual %d\n " ,
196+ cache_size,
197+ file_size - kHeaderSize );
198+ return ;
199+ }
187200
188- uv_buf_t iov = uv_buf_init (reinterpret_cast <char *>(buffer + total_read),
189- capacity - total_read);
190- int bytes_read =
191- uv_fs_read (nullptr , &req, file, &iov, 1 , offset + total_read, nullptr );
201+ // The cache content is stored uncompressed when cache_size == raw_size,
202+ // and zstd-compressed when cache_size < raw_size (see
203+ // CompileCacheHandler::Persist()). Anything else is invalid.
204+ if (cache_size > raw_size) {
205+ Debug (" invalid cache size %d > uncompressed size %d\n " ,
206+ cache_size,
207+ raw_size);
208+ return ;
209+ }
210+
211+ // Read the cache content in one go with an exactly-sized buffer,
212+ // looping only in case of short reads.
213+ std::unique_ptr<uint8_t []> disk_data (new uint8_t [cache_size]);
214+ size_t total_read = 0 ;
215+ while (total_read < cache_size) {
216+ uv_buf_t iov =
217+ uv_buf_init (reinterpret_cast <char *>(disk_data.get () + total_read),
218+ cache_size - total_read);
219+ int bytes_read = uv_fs_read (
220+ nullptr , &req, file, &iov, 1 , kHeaderSize + total_read, nullptr );
192221 if (req.result < 0 ) { // Error.
193222 // req will be cleaned up by scope leave.
194- delete[] buffer;
195223 Debug (" %s\n " , uv_strerror (req.result ));
196224 return ;
197225 }
198226 uv_fs_req_cleanup (&req);
199- if (bytes_read <= 0 ) {
200- break ;
227+ if (bytes_read == 0 ) { // Unexpected EOF - the file shrank under us.
228+ Debug (" cache size mismatch: expected %d, actual %d\n " ,
229+ cache_size,
230+ total_read);
231+ return ;
201232 }
202233 total_read += bytes_read;
203234 }
204235
205- // Check the cache size and hash.
206- if (headers[kCacheSizeOffset ] != total_read) {
207- Debug (" cache size mismatch: expected %d, actual %d\n " ,
208- headers[kCacheSizeOffset ],
209- total_read);
210- return ;
211- }
212- uint32_t cache_hash = GetHash (reinterpret_cast <char *>(buffer), total_read);
236+ // Check the cache hash of the on-disk content before decompressing.
237+ uint32_t cache_hash =
238+ GetHash (reinterpret_cast <char *>(disk_data.get ()), cache_size);
213239 if (headers[kCacheHashOffset ] != cache_hash) {
214240 Debug (" cache hash mismatch: expected %d, actual %d\n " ,
215241 headers[kCacheHashOffset ],
216242 cache_hash);
217243 return ;
218244 }
219245
220- entry->cache .reset (new ScriptCompiler::CachedData (
221- buffer, total_read, ScriptCompiler::CachedData::BufferOwned));
222- Debug (" success, size=%d\n " , total_read);
246+ if (cache_size == raw_size) {
247+ // Stored uncompressed - hand the buffer to V8 directly.
248+ entry->cache .reset (new ScriptCompiler::CachedData (
249+ disk_data.release (),
250+ raw_size,
251+ ScriptCompiler::CachedData::BufferOwned));
252+ } else {
253+ // Cross-check the content size embedded in the zstd frame before
254+ // allocating, in case the headers are corrupted.
255+ unsigned long long content_size = // NOLINT(runtime/int)
256+ ZSTD_getFrameContentSize (disk_data.get (), cache_size);
257+ if (content_size != raw_size) {
258+ Debug (" uncompressed size mismatch: expected %d, actual %d\n " ,
259+ raw_size,
260+ content_size);
261+ return ;
262+ }
263+ // Decompress directly into the buffer handed to V8.
264+ std::unique_ptr<uint8_t []> raw_data (new uint8_t [raw_size]);
265+ size_t decompressed_size =
266+ ZSTD_decompress (raw_data.get (), raw_size, disk_data.get (), cache_size);
267+ if (ZSTD_isError (decompressed_size)) {
268+ Debug (" decompression failed: %s\n " ,
269+ ZSTD_getErrorName (decompressed_size));
270+ return ;
271+ }
272+ if (decompressed_size != raw_size) {
273+ Debug (" decompressed size mismatch: expected %d, actual %d\n " ,
274+ raw_size,
275+ decompressed_size);
276+ return ;
277+ }
278+ entry->cache .reset (new ScriptCompiler::CachedData (
279+ raw_data.release (),
280+ raw_size,
281+ ScriptCompiler::CachedData::BufferOwned));
282+ }
283+ Debug (" success, size=%d\n " , raw_size);
223284}
224285
225286static std::string GetRelativePath (std::string_view path,
@@ -280,11 +341,18 @@ CompileCacheEntry* CompileCacheHandler::GetOrInsert(Local<String> code,
280341 return loaded->second .get ();
281342 }
282343
283- // If the code hash mismatches, the code has changed, discard the stale entry
284- // and create a new one.
285- auto emplaced =
286- compiler_cache_store_.emplace (key, std::make_unique<CompileCacheEntry>());
287- auto * result = emplaced.first ->second .get ();
344+ // If the code hash mismatches, the code has changed, reset the stale
345+ // entry in place. Otherwise insert a new one.
346+ CompileCacheEntry* result;
347+ if (loaded != compiler_cache_store_.end ()) {
348+ result = loaded->second .get ();
349+ result->refreshed = false ;
350+ result->persisted = false ;
351+ } else {
352+ result = compiler_cache_store_
353+ .emplace (key, std::make_unique<CompileCacheEntry>())
354+ .first ->second .get ();
355+ }
288356
289357 result->code_hash = code_hash;
290358 result->code_size = code_utf8.length ();
@@ -418,18 +486,41 @@ void CompileCacheHandler::Persist() {
418486
419487 DCHECK_EQ (entry->cache ->buffer_policy ,
420488 ScriptCompiler::CachedData::BufferOwned);
421- char * cache_ptr =
489+ char * raw_ptr =
422490 reinterpret_cast <char *>(const_cast <uint8_t *>(entry->cache ->data ));
423- uint32_t cache_size = static_cast <uint32_t >(entry->cache ->length );
491+ uint32_t raw_size = static_cast <uint32_t >(entry->cache ->length );
492+
493+ // Compress the cache with zstd to reduce the size on disk. Compression
494+ // level 1 prioritizes speed - persistence usually happens on process
495+ // shutdown and should add as little overhead as possible. If the data
496+ // is not compressible, store it uncompressed, which is indicated by
497+ // the cache size being equal to the uncompressed size in the headers.
498+ size_t compressed_bound = ZSTD_compressBound (raw_size);
499+ std::unique_ptr<uint8_t []> compressed (new uint8_t [compressed_bound]);
500+ size_t compressed_size = ZSTD_compress (
501+ compressed.get (), compressed_bound, raw_ptr, raw_size, 1 );
502+ char * cache_ptr = raw_ptr;
503+ uint32_t cache_size = raw_size;
504+ if (!ZSTD_isError (compressed_size) && compressed_size < raw_size) {
505+ cache_ptr = reinterpret_cast <char *>(compressed.get ());
506+ cache_size = static_cast <uint32_t >(compressed_size);
507+ }
508+ Debug (" [compile cache] compressed cache for %s %s: %d -> %d bytes\n " ,
509+ type_name,
510+ entry->source_filename ,
511+ raw_size,
512+ cache_size);
513+
424514 uint32_t cache_hash = GetHash (cache_ptr, cache_size);
425515
426516 // Generating headers.
427- std::vector <uint32_t > headers ( kHeaderCount ) ;
517+ std::array <uint32_t , kHeaderCount > headers;
428518 headers[kMagicNumberOffset ] = kCacheMagicNumber ;
429519 headers[kCodeSizeOffset ] = entry->code_size ;
430520 headers[kCacheSizeOffset ] = cache_size;
431521 headers[kCodeHashOffset ] = entry->code_hash ;
432522 headers[kCacheHashOffset ] = cache_hash;
523+ headers[kCacheRawSizeOffset ] = raw_size;
433524
434525 // Generate the temporary filename.
435526 // The temporary file should be placed in a location like:
@@ -459,20 +550,21 @@ void CompileCacheHandler::Persist() {
459550 Debug (" -> %s\n " , mkstemp_req.path );
460551 Debug (" [compile cache] writing cache for %s %s to temporary file %s [%d "
461552 " %d %d "
462- " %d %d]..." ,
553+ " %d %d %d ]..." ,
463554 type_name,
464555 entry->source_filename ,
465556 mkstemp_req.path ,
466557 headers[kMagicNumberOffset ],
467558 headers[kCodeSizeOffset ],
468559 headers[kCacheSizeOffset ],
469560 headers[kCodeHashOffset ],
470- headers[kCacheHashOffset ]);
561+ headers[kCacheHashOffset ],
562+ headers[kCacheRawSizeOffset ]);
471563
472564 // Write to the temporary file.
473- uv_buf_t headers_buf = uv_buf_init ( reinterpret_cast < char *>(headers. data ()),
474- headers.size () * sizeof ( uint32_t ) );
475- uv_buf_t data_buf = uv_buf_init (cache_ptr, entry-> cache -> length );
565+ uv_buf_t headers_buf =
566+ uv_buf_init ( reinterpret_cast < char *>( headers.data ()), kHeaderSize );
567+ uv_buf_t data_buf = uv_buf_init (cache_ptr, cache_size );
476568 uv_buf_t bufs[] = {headers_buf, data_buf};
477569
478570 uv_fs_t write_req;
0 commit comments