From ce0fa98cb84095751ced7a2ecd6d715f195154cb Mon Sep 17 00:00:00 2001 From: Caideyipi <87789683+Caideyipi@users.noreply.github.com> Date: Wed, 6 May 2026 14:06:29 +0800 Subject: [PATCH 01/23] fix --- ...sertionEventTableParserTabletIterator.java | 144 ++++++++++++------ .../plan/analyze/load/LoadTsFileAnalyzer.java | 99 +++++++++--- .../event/TsFileInsertionEventParserTest.java | 138 +++++++++++++++++ .../analyze/load/LoadTsFileAnalyzerTest.java | 110 +++++++++++++ 4 files changed, 420 insertions(+), 71 deletions(-) create mode 100644 iotdb-core/datanode/src/test/java/org/apache/iotdb/db/queryengine/plan/analyze/load/LoadTsFileAnalyzerTest.java diff --git a/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/pipe/event/common/tsfile/parser/table/TsFileInsertionEventTableParserTabletIterator.java b/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/pipe/event/common/tsfile/parser/table/TsFileInsertionEventTableParserTabletIterator.java index f05cf872c798b..36479a1c701dc 100644 --- a/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/pipe/event/common/tsfile/parser/table/TsFileInsertionEventTableParserTabletIterator.java +++ b/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/pipe/event/common/tsfile/parser/table/TsFileInsertionEventTableParserTabletIterator.java @@ -101,6 +101,7 @@ public class TsFileInsertionEventTableParserTabletIterator implements Iterator columnTypes; private List measurementList; private List dataTypeList; + private List fieldSchemaList; private int deviceIdSize; private List modsInfoList; @@ -194,7 +195,7 @@ public boolean hasNext() { long size = 0; List iChunkMetadataList = - reader.getAlignedChunkMetadata(pair.left, true); + reader.getAlignedChunkMetadata(pair.left, false); Iterator chunkMetadataIterator = iChunkMetadataList.iterator(); @@ -213,27 +214,7 @@ public boolean hasNext() { continue; } - Iterator iChunkMetadataIterator = - alignedChunkMetadata.getValueChunkMetadataList().iterator(); - while (iChunkMetadataIterator.hasNext()) { - IChunkMetadata iChunkMetadata = iChunkMetadataIterator.next(); - if (iChunkMetadata == null) { - iChunkMetadataIterator.remove(); - continue; - } - - if (!modifications.isEmpty() - && ModsOperationUtil.isAllDeletedByMods( - pair.getLeft(), - iChunkMetadata.getMeasurementUid(), - alignedChunkMetadata.getStartTime(), - alignedChunkMetadata.getEndTime(), - modifications)) { - iChunkMetadataIterator.remove(); - } - } - - if (alignedChunkMetadata.getValueChunkMetadataList().isEmpty()) { + if (areAllFieldsDeletedByMods(pair.getLeft(), alignedChunkMetadata)) { chunkMetadataIterator.remove(); continue; } @@ -267,6 +248,7 @@ public boolean hasNext() { dataTypeList = new ArrayList<>(); columnTypes = new ArrayList<>(); measurementList = new ArrayList<>(); + fieldSchemaList = new ArrayList<>(); for (int i = 0; i < columnSchemaSize; i++) { final IMeasurementSchema schema = tableSchema.getColumnSchemas().get(i); @@ -280,6 +262,9 @@ public boolean hasNext() { measurementList.add(measurementName); dataTypeList.add(schema.getType()); } + if (ColumnCategory.FIELD.equals(columnCategory)) { + fieldSchemaList.add(schema); + } } } deviceIdSize = dataTypeList.size(); @@ -331,9 +316,9 @@ private Tablet buildNextTablet() { tablet = new Tablet( tableName, - measurementList, - dataTypeList, - columnTypes, + new ArrayList<>(measurementList), + new ArrayList<>(dataTypeList), + new ArrayList<>(columnTypes), rowCountAndMemorySize.getLeft()); tablet.initBitMaps(); isFirstRow = false; @@ -376,6 +361,20 @@ private void initChunkReader(final AbstractAlignedChunkMetadata alignedChunkMeta long size = timeChunkSize; final List valueChunkList = new ArrayList<>(); + final Map valueChunkMetadataMap = + alignedChunkMetadata.getValueChunkMetadataList().stream() + .filter(Objects::nonNull) + .filter( + metadata -> + !isFieldDeletedByMods( + metadata.getMeasurementUid(), + alignedChunkMetadata.getStartTime(), + alignedChunkMetadata.getEndTime())) + .collect( + Collectors.toMap( + IChunkMetadata::getMeasurementUid, + metadata -> metadata, + (left, right) -> left)); // To ensure that the Tablet has the same alignedChunk column as the current one, // you need to create a new Tablet to fill in the data. @@ -392,50 +391,99 @@ private void initChunkReader(final AbstractAlignedChunkMetadata alignedChunkMeta measurementList.subList(deviceIdSize, measurementList.size()).clear(); dataTypeList.subList(deviceIdSize, dataTypeList.size()).clear(); - for (; offset < alignedChunkMetadata.getValueChunkMetadataList().size(); ++offset) { - final IChunkMetadata metadata = alignedChunkMetadata.getValueChunkMetadataList().get(offset); + boolean hasSelectedField = fieldSchemaList.isEmpty(); + boolean hasSelectedNonNullChunk = false; + for (; offset < fieldSchemaList.size(); ++offset) { + final IMeasurementSchema schema = fieldSchemaList.get(offset); + if (isFieldDeletedByMods( + schema.getMeasurementName(), + alignedChunkMetadata.getStartTime(), + alignedChunkMetadata.getEndTime())) { + continue; + } + + final IChunkMetadata metadata = valueChunkMetadataMap.get(schema.getMeasurementName()); + Chunk chunk = null; if (metadata != null) { - final Chunk chunk = reader.readMemChunk((ChunkMetadata) metadata); - size += PipeMemoryWeightUtil.calculateChunkRamBytesUsed(chunk); - if (size > allocatedMemoryBlockForChunk.getMemoryUsageInBytes()) { - if (valueChunkList.isEmpty()) { + chunk = reader.readMemChunk((ChunkMetadata) metadata); + final long newSize = size + PipeMemoryWeightUtil.calculateChunkRamBytesUsed(chunk); + if (newSize > allocatedMemoryBlockForChunk.getMemoryUsageInBytes()) { + if (!hasSelectedNonNullChunk) { // If the first chunk exceeds the memory limit, we need to allocate more memory + size = newSize; PipeDataNodeResourceManager.memory().forceResize(allocatedMemoryBlockForChunk, size); - columnTypes.add(ColumnCategory.FIELD); - measurementList.add(metadata.getMeasurementUid()); - dataTypeList.add(metadata.getDataType()); - valueChunkList.add(chunk); - ++offset; + } else { + break; } - break; } else { - // Record the column information corresponding to Meta to fill in Tablet - columnTypes.add(ColumnCategory.FIELD); - measurementList.add(metadata.getMeasurementUid()); - dataTypeList.add(metadata.getDataType()); - valueChunkList.add(chunk); + size = newSize; } + hasSelectedNonNullChunk = true; } + + columnTypes.add(ColumnCategory.FIELD); + measurementList.add(schema.getMeasurementName()); + dataTypeList.add(schema.getType()); + valueChunkList.add(chunk); + hasSelectedField = true; } - if (offset >= alignedChunkMetadata.getValueChunkMetadataList().size()) { + if (offset >= fieldSchemaList.size()) { currentChunkMetadata = null; } + if (!hasSelectedField) { + this.chunkReader = null; + this.batchData = null; + return; + } + this.chunkReader = new TableChunkReader(timeChunk, valueChunkList, null); this.modsInfoList = ModsOperationUtil.initializeMeasurementMods(deviceID, measurementList, modifications); } + private boolean areAllFieldsDeletedByMods( + final IDeviceID currentDeviceID, final AbstractAlignedChunkMetadata alignedChunkMetadata) { + if (modifications.isEmpty() || fieldSchemaList.isEmpty()) { + return false; + } + + for (final IMeasurementSchema schema : fieldSchemaList) { + if (!ModsOperationUtil.isAllDeletedByMods( + currentDeviceID, + schema.getMeasurementName(), + alignedChunkMetadata.getStartTime(), + alignedChunkMetadata.getEndTime(), + modifications)) { + return false; + } + } + return true; + } + + private boolean isFieldDeletedByMods( + final String measurementID, final long startTime, final long endTime) { + return !modifications.isEmpty() + && ModsOperationUtil.isAllDeletedByMods( + deviceID, measurementID, startTime, endTime, modifications); + } + private boolean fillMeasurementValueColumns( final BatchData data, final Tablet tablet, final int rowIndex) { - final TsPrimitiveType[] primitiveTypes = data.getVector(); + final TsPrimitiveType[] primitiveTypes = + Objects.nonNull(data.getVector()) ? data.getVector() : new TsPrimitiveType[0]; boolean needFillTime = false; + boolean hasNonDeletedField = dataTypeList.size() == deviceIdSize; for (int i = deviceIdSize, size = dataTypeList.size(); i < size; i++) { - final TsPrimitiveType primitiveType = primitiveTypes[i - deviceIdSize]; - if (primitiveType == null - || ModsOperationUtil.isDelete(data.currentTime(), modsInfoList.get(i))) { + final TsPrimitiveType primitiveType = + i - deviceIdSize < primitiveTypes.length ? primitiveTypes[i - deviceIdSize] : null; + final boolean isDeleted = ModsOperationUtil.isDelete(data.currentTime(), modsInfoList.get(i)); + if (!isDeleted) { + hasNonDeletedField = true; + } + if (primitiveType == null || isDeleted) { switch (dataTypeList.get(i)) { case TEXT: case BLOB: @@ -480,7 +528,7 @@ private boolean fillMeasurementValueColumns( throw new UnSupportedDataTypeException("UnSupported" + primitiveType.getDataType()); } } - return needFillTime; + return needFillTime || hasNonDeletedField; } private void fillDeviceIdColumns( diff --git a/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/queryengine/plan/analyze/load/LoadTsFileAnalyzer.java b/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/queryengine/plan/analyze/load/LoadTsFileAnalyzer.java index e1e6d59719145..680add7ec5cd6 100644 --- a/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/queryengine/plan/analyze/load/LoadTsFileAnalyzer.java +++ b/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/queryengine/plan/analyze/load/LoadTsFileAnalyzer.java @@ -54,11 +54,17 @@ import org.apache.tsfile.encrypt.EncryptParameter; import org.apache.tsfile.encrypt.EncryptUtils; import org.apache.tsfile.external.commons.io.FileUtils; +import org.apache.tsfile.file.metadata.AbstractAlignedChunkMetadata; +import org.apache.tsfile.file.metadata.IChunkMetadata; import org.apache.tsfile.file.metadata.IDeviceID; +import org.apache.tsfile.file.metadata.MetadataIndexNode; import org.apache.tsfile.file.metadata.TableSchema; import org.apache.tsfile.file.metadata.TimeseriesMetadata; import org.apache.tsfile.read.TsFileSequenceReader; import org.apache.tsfile.read.TsFileSequenceReaderTimeseriesMetadataIterator; +import org.apache.tsfile.read.controller.IMetadataQuerier; +import org.apache.tsfile.read.controller.MetadataQuerierByFileImpl; +import org.apache.tsfile.utils.Pair; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -69,6 +75,7 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; +import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Objects; @@ -381,7 +388,7 @@ private void analyzeSingleTsFile(final File tsFile, int i) throws Exception { .getLoadTsFileAnalyzeSchemaBatchReadTimeSeriesMetadataCount()); // check if the tsfile is empty - if (!timeseriesMetadataIterator.hasNext()) { + if (!isTableModelFile && !timeseriesMetadataIterator.hasNext()) { throw new LoadEmptyFileException(tsFile.getAbsolutePath()); } @@ -410,7 +417,7 @@ && handleSingleMiniFile(i)) { sessionInfo.getDatabaseName().orElse(null), SqlDialect.TABLE); context.setSession(newSessionInfo); - doAnalyzeSingleTableFile(tsFile, reader, timeseriesMetadataIterator, tableSchemaMap); + doAnalyzeSingleTableFile(tsFile, reader, tableSchemaMap); } else { final SessionInfo newSessionInfo = new SessionInfo( @@ -525,14 +532,11 @@ private void doAnalyzeSingleTreeFile( private void doAnalyzeSingleTableFile( final File tsFile, final TsFileSequenceReader reader, - final TsFileSequenceReaderTimeseriesMetadataIterator timeseriesMetadataIterator, final Map tableSchemaMap) - throws IOException, LoadAnalyzeException { + throws IOException, LoadAnalyzeException, LoadEmptyFileException { // construct tsfile resource final TsFileResource tsFileResource = constructTsFileResource(reader, tsFile); - long writePointCount = 0; - if (Objects.isNull(databaseForTableData)) { // If database is not specified, use the database from current session. // If still not specified, throw an exception. @@ -553,23 +557,9 @@ private void doAnalyzeSingleTableFile( getOrCreateTableSchemaCache().setTableSchemaMap(tableSchemaMap); getOrCreateTableSchemaCache().setCurrentModificationsAndTimeIndex(tsFileResource, reader); - while (timeseriesMetadataIterator.hasNext()) { - final Map> device2TimeseriesMetadata = - timeseriesMetadataIterator.next(); - - // Update time index no matter if resource file exists or not, because resource file may be - // untrusted - TsFileResourceUtils.updateTsFileResource( - device2TimeseriesMetadata, - tsFileResource, - IoTDBDescriptor.getInstance().getConfig().isCacheLastValuesForLoad()); - getOrCreateTableSchemaCache().setCurrentTimeIndex(tsFileResource.getTimeIndex()); - - for (IDeviceID deviceId : device2TimeseriesMetadata.keySet()) { - getOrCreateTableSchemaCache().autoCreateAndVerify(deviceId); - } - - writePointCount += getWritePointCount(device2TimeseriesMetadata); + final long writePointCount = updateTableTsFileResourceAndVerifySchema(reader, tsFileResource); + if (tsFileResource.getDevices().isEmpty()) { + throw new LoadEmptyFileException(tsFile.getAbsolutePath()); } getOrCreateTableSchemaCache().flush(); @@ -589,6 +579,50 @@ private void doAnalyzeSingleTableFile( addWritePointCount(writePointCount); } + private long updateTableTsFileResourceAndVerifySchema( + final TsFileSequenceReader reader, final TsFileResource tsFileResource) + throws IOException, LoadAnalyzeException { + long writePointCount = 0; + final IMetadataQuerier metadataQuerier = new MetadataQuerierByFileImpl(reader); + final List tableNames = + new ArrayList<>(metadataQuerier.getWholeFileMetadata().getTableSchemaMap().keySet()); + + for (final String tableName : tableNames) { + final MetadataIndexNode tableRoot = + metadataQuerier.getWholeFileMetadata().getTableMetadataIndexNode(tableName); + if (Objects.isNull(tableRoot)) { + continue; + } + + final Iterator> deviceIterator = + metadataQuerier.deviceIterator(tableRoot, null); + while (deviceIterator.hasNext()) { + final IDeviceID deviceId = deviceIterator.next().getLeft(); + boolean hasChunk = false; + + for (final AbstractAlignedChunkMetadata alignedChunkMetadata : + reader.getAlignedChunkMetadata(deviceId, false)) { + if (Objects.isNull(alignedChunkMetadata) + || Objects.isNull(alignedChunkMetadata.getTimeChunkMetadata())) { + continue; + } + + hasChunk = true; + tsFileResource.updateStartTime(deviceId, alignedChunkMetadata.getStartTime()); + tsFileResource.updateEndTime(deviceId, alignedChunkMetadata.getEndTime()); + writePointCount += getTableWritePointCount(alignedChunkMetadata); + } + + if (hasChunk) { + getOrCreateTableSchemaCache().setCurrentTimeIndex(tsFileResource.getTimeIndex()); + getOrCreateTableSchemaCache().autoCreateAndVerify(deviceId); + } + } + } + + return writePointCount; + } + private TsFileResource constructTsFileResource( final TsFileSequenceReader reader, final File tsFile) throws IOException { final TsFileResource tsFileResource = new TsFileResource(tsFile); @@ -636,6 +670,25 @@ private static long getWritePointCount( .sum(); } + private static long getTableWritePointCount( + final AbstractAlignedChunkMetadata alignedChunkMetadata) { + long writePointCount = 0; + boolean hasValueChunkMetadata = false; + for (final IChunkMetadata valueChunkMetadata : + alignedChunkMetadata.getValueChunkMetadataList()) { + if (Objects.nonNull(valueChunkMetadata) + && Objects.nonNull(valueChunkMetadata.getStatistics())) { + hasValueChunkMetadata = true; + writePointCount += valueChunkMetadata.getStatistics().getCount(); + } + } + return hasValueChunkMetadata + || Objects.isNull(alignedChunkMetadata.getTimeChunkMetadata()) + || Objects.isNull(alignedChunkMetadata.getTimeChunkMetadata().getStatistics()) + ? writePointCount + : alignedChunkMetadata.getTimeChunkMetadata().getStatistics().getCount(); + } + private void addWritePointCount(long writePointCount) { if (isTableModelStatement) { loadTsFileTableStatement.addWritePointCount(writePointCount); diff --git a/iotdb-core/datanode/src/test/java/org/apache/iotdb/db/pipe/event/TsFileInsertionEventParserTest.java b/iotdb-core/datanode/src/test/java/org/apache/iotdb/db/pipe/event/TsFileInsertionEventParserTest.java index a2e7c558ea0b5..9d1b476f73666 100644 --- a/iotdb-core/datanode/src/test/java/org/apache/iotdb/db/pipe/event/TsFileInsertionEventParserTest.java +++ b/iotdb-core/datanode/src/test/java/org/apache/iotdb/db/pipe/event/TsFileInsertionEventParserTest.java @@ -24,18 +24,30 @@ import org.apache.iotdb.commons.pipe.config.PipeConfig; import org.apache.iotdb.commons.pipe.datastructure.pattern.IoTDBTreePattern; import org.apache.iotdb.commons.pipe.datastructure.pattern.PrefixTreePattern; +import org.apache.iotdb.commons.pipe.datastructure.pattern.TablePattern; import org.apache.iotdb.commons.pipe.datastructure.pattern.TreePattern; +import org.apache.iotdb.db.pipe.event.common.tablet.PipeRawTabletInsertionEvent; import org.apache.iotdb.db.pipe.event.common.tsfile.PipeTsFileInsertionEvent; import org.apache.iotdb.db.pipe.event.common.tsfile.parser.TsFileInsertionEventParser; import org.apache.iotdb.db.pipe.event.common.tsfile.parser.query.TsFileInsertionEventQueryParser; import org.apache.iotdb.db.pipe.event.common.tsfile.parser.scan.TsFileInsertionEventScanParser; +import org.apache.iotdb.db.pipe.event.common.tsfile.parser.table.TsFileInsertionEventTableParser; import org.apache.iotdb.db.storageengine.dataregion.compaction.utils.CompactionTestFileWriter; +import org.apache.iotdb.db.storageengine.dataregion.modification.DeletionPredicate; +import org.apache.iotdb.db.storageengine.dataregion.modification.IDPredicate; +import org.apache.iotdb.db.storageengine.dataregion.modification.ModificationFile; +import org.apache.iotdb.db.storageengine.dataregion.modification.TableDeletionEntry; import org.apache.iotdb.db.storageengine.dataregion.tsfile.TsFileResource; import org.apache.iotdb.db.storageengine.dataregion.tsfile.TsFileResourceStatus; import org.apache.iotdb.pipe.api.access.Row; +import org.apache.iotdb.pipe.api.event.dml.insertion.TabletInsertionEvent; import org.apache.tsfile.common.conf.TSFileConfig; +import org.apache.tsfile.enums.ColumnCategory; import org.apache.tsfile.enums.TSDataType; +import org.apache.tsfile.file.metadata.IDeviceID; +import org.apache.tsfile.file.metadata.StringArrayDeviceID; +import org.apache.tsfile.file.metadata.TableSchema; import org.apache.tsfile.file.metadata.enums.CompressionType; import org.apache.tsfile.file.metadata.enums.TSEncoding; import org.apache.tsfile.read.TsFileSequenceReader; @@ -45,9 +57,12 @@ import org.apache.tsfile.utils.Pair; import org.apache.tsfile.utils.TsFileGeneratorUtils; import org.apache.tsfile.write.TsFileWriter; +import org.apache.tsfile.write.chunk.AlignedChunkWriterImpl; import org.apache.tsfile.write.record.Tablet; import org.apache.tsfile.write.schema.IMeasurementSchema; import org.apache.tsfile.write.schema.MeasurementSchema; +import org.apache.tsfile.write.schema.Schema; +import org.apache.tsfile.write.writer.TsFileIOWriter; import org.junit.After; import org.junit.Assert; import org.junit.Before; @@ -60,12 +75,14 @@ import java.time.LocalDate; import java.util.ArrayList; import java.util.Arrays; +import java.util.Collections; import java.util.HashSet; import java.util.List; import java.util.Objects; import java.util.Set; import java.util.concurrent.atomic.AtomicInteger; import java.util.concurrent.atomic.AtomicReference; +import java.util.stream.Collectors; import static org.junit.Assert.fail; @@ -96,6 +113,7 @@ public void tearDown() throws Exception { .getConfig() .setPipeMemoryManagementEnabled(isPipeMemoryManagementEnabled); if (alignedTsFile != null) { + ModificationFile.getExclusiveMods(alignedTsFile).delete(); alignedTsFile.delete(); } if (nonalignedTsFile != null) { @@ -120,6 +138,126 @@ public void testScanParser() throws Exception { System.out.println(System.currentTimeMillis() - startTime); } + @Test + public void testTableParserWithAllNullFields() throws Exception { + alignedTsFile = new File("table-all-null.tsfile"); + writeTableTsFileWithNullableFields(true); + + assertParsedTablets(parseTableTablets(false), Arrays.asList("tag1", "s1", "s2"), 3, true); + } + + @Test + public void testTableParserWithMixedAllNullFields() throws Exception { + alignedTsFile = new File("table-mixed-all-null.tsfile"); + writeTableTsFileWithNullableFields(false); + + assertParsedTablets(parseTableTablets(false), Arrays.asList("tag1", "s1", "s2"), 3, false); + } + + @Test + public void testTableParserWithAllNullFieldsAndDeletedValueChunk() throws Exception { + alignedTsFile = new File("table-all-null-with-mod.tsfile"); + writeTableTsFileWithNullableFields(false); + try (final ModificationFile modificationFile = + new ModificationFile(ModificationFile.getExclusiveMods(alignedTsFile), false)) { + modificationFile.write( + new TableDeletionEntry( + new DeletionPredicate( + "table1", new IDPredicate.NOP(), Collections.singletonList("s2")), + new TimeRange(100, 102))); + } + + assertParsedTablets(parseTableTablets(true), Arrays.asList("tag1", "s1"), 3, true); + } + + private void writeTableTsFileWithNullableFields(final boolean allFieldsNull) throws Exception { + final List tableSchemaList = + Arrays.asList( + new MeasurementSchema("tag1", TSDataType.STRING), + new MeasurementSchema("s1", TSDataType.INT64), + new MeasurementSchema("s2", TSDataType.DOUBLE)); + final List columnCategoryList = + Arrays.asList(ColumnCategory.TAG, ColumnCategory.FIELD, ColumnCategory.FIELD); + + final Schema schema = new Schema(); + schema.registerTableSchema(new TableSchema("table1", tableSchemaList, columnCategoryList)); + try (final TsFileIOWriter writer = new TsFileIOWriter(alignedTsFile)) { + writer.setSchema(schema); + final IDeviceID deviceID = new StringArrayDeviceID(new String[] {"table1", "tagA"}); + writer.startChunkGroup(deviceID); + + final AlignedChunkWriterImpl chunkWriter = + new AlignedChunkWriterImpl(tableSchemaList.subList(1, tableSchemaList.size())); + for (int i = 0; i < 3; i++) { + final long time = 100 + i; + chunkWriter.getTimeChunkWriter().write(time); + chunkWriter.getValueChunkWriterByIndex(0).write(time, 0L, true); + chunkWriter.getValueChunkWriterByIndex(1).write(time, 1.0 + i, allFieldsNull); + } + chunkWriter.writeToFileWriter(writer); + writer.endChunkGroup(); + writer.endFile(); + } + } + + private List parseTableTablets(final boolean isWithMod) throws IOException { + final List parsedTablets = new ArrayList<>(); + try (final TsFileInsertionEventTableParser parser = + new TsFileInsertionEventTableParser( + alignedTsFile, + new TablePattern(true, null, null), + Long.MIN_VALUE, + Long.MAX_VALUE, + null, + null, + null, + isWithMod)) { + for (final TabletInsertionEvent event : parser.toTabletInsertionEvents()) { + Assert.assertTrue(event instanceof PipeRawTabletInsertionEvent); + parsedTablets.add(((PipeRawTabletInsertionEvent) event).convertToTablet()); + } + } + return parsedTablets; + } + + private void assertParsedTablets( + final List tablets, + final List expectedColumns, + final int expectedRowCount, + final boolean expectS2Null) { + Assert.assertFalse(tablets.isEmpty()); + int rowCount = 0; + for (final Tablet tablet : tablets) { + if (tablet.getRowSize() == 0) { + continue; + } + + Assert.assertEquals("table1", tablet.getTableName()); + Assert.assertEquals( + expectedColumns, + tablet.getSchemas().stream() + .map(IMeasurementSchema::getMeasurementName) + .collect(Collectors.toList())); + Assert.assertEquals(ColumnCategory.TAG, tablet.getColumnTypes().get(0)); + for (int i = 1; i < tablet.getColumnTypes().size(); i++) { + Assert.assertEquals(ColumnCategory.FIELD, tablet.getColumnTypes().get(i)); + } + + for (int i = 0; i < tablet.getRowSize(); i++, rowCount++) { + Assert.assertEquals(100 + rowCount, tablet.getTimestamp(i)); + Assert.assertFalse(tablet.isNull(i, 0)); + Assert.assertTrue(tablet.isNull(i, 1)); + if (expectedColumns.size() > 2) { + Assert.assertEquals(expectS2Null, tablet.isNull(i, 2)); + if (!expectS2Null) { + Assert.assertEquals(1.0 + rowCount, (double) tablet.getValue(i, 2), 0.0); + } + } + } + } + Assert.assertEquals(expectedRowCount, rowCount); + } + public void testToTabletInsertionEvents(final boolean isQuery) throws Exception { // Test empty chunk testMixedTsFileWithEmptyChunk(isQuery); diff --git a/iotdb-core/datanode/src/test/java/org/apache/iotdb/db/queryengine/plan/analyze/load/LoadTsFileAnalyzerTest.java b/iotdb-core/datanode/src/test/java/org/apache/iotdb/db/queryengine/plan/analyze/load/LoadTsFileAnalyzerTest.java new file mode 100644 index 0000000000000..9e86ca60728b0 --- /dev/null +++ b/iotdb-core/datanode/src/test/java/org/apache/iotdb/db/queryengine/plan/analyze/load/LoadTsFileAnalyzerTest.java @@ -0,0 +1,110 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.iotdb.db.queryengine.plan.analyze.load; + +import org.apache.tsfile.enums.ColumnCategory; +import org.apache.tsfile.enums.TSDataType; +import org.apache.tsfile.file.metadata.AbstractAlignedChunkMetadata; +import org.apache.tsfile.file.metadata.IDeviceID; +import org.apache.tsfile.file.metadata.StringArrayDeviceID; +import org.apache.tsfile.file.metadata.TableSchema; +import org.apache.tsfile.read.TsFileSequenceReader; +import org.apache.tsfile.write.chunk.AlignedChunkWriterImpl; +import org.apache.tsfile.write.schema.IMeasurementSchema; +import org.apache.tsfile.write.schema.MeasurementSchema; +import org.apache.tsfile.write.schema.Schema; +import org.apache.tsfile.write.writer.TsFileIOWriter; +import org.junit.Assert; +import org.junit.Test; + +import java.io.File; +import java.lang.reflect.Method; +import java.util.Arrays; +import java.util.List; +import java.util.Objects; + +public class LoadTsFileAnalyzerTest { + + @Test + public void testTableWritePointCountFallbackToTimeChunkWhenAllFieldsNull() throws Exception { + final File tsFile = new File("load-table-all-null.tsfile"); + writeTableTsFileWithAllNullFields(tsFile); + + try (final TsFileSequenceReader reader = new TsFileSequenceReader(tsFile.getAbsolutePath())) { + final IDeviceID deviceID = new StringArrayDeviceID(new String[] {"table1", "tagA"}); + final List alignedChunkMetadataList = + reader.getAlignedChunkMetadata(deviceID, false); + Assert.assertEquals(1, alignedChunkMetadataList.size()); + + final AbstractAlignedChunkMetadata alignedChunkMetadata = alignedChunkMetadataList.get(0); + Assert.assertNotNull(alignedChunkMetadata.getTimeChunkMetadata()); + Assert.assertEquals( + 3, alignedChunkMetadata.getTimeChunkMetadata().getStatistics().getCount()); + Assert.assertEquals( + 0, + alignedChunkMetadata.getValueChunkMetadataList().stream() + .filter(Objects::nonNull) + .count()); + + final Method method = + LoadTsFileAnalyzer.class.getDeclaredMethod( + "getTableWritePointCount", AbstractAlignedChunkMetadata.class); + method.setAccessible(true); + Assert.assertEquals(3L, method.invoke(null, alignedChunkMetadata)); + } finally { + if (tsFile.exists()) { + Assert.assertTrue(tsFile.delete()); + } + } + } + + private void writeTableTsFileWithAllNullFields(final File tsFile) throws Exception { + if (tsFile.exists()) { + Assert.assertTrue(tsFile.delete()); + } + + final List tableSchemaList = + Arrays.asList( + new MeasurementSchema("tag1", TSDataType.STRING), + new MeasurementSchema("s1", TSDataType.INT64), + new MeasurementSchema("s2", TSDataType.DOUBLE)); + final List columnCategoryList = + Arrays.asList(ColumnCategory.TAG, ColumnCategory.FIELD, ColumnCategory.FIELD); + + final Schema schema = new Schema(); + schema.registerTableSchema(new TableSchema("table1", tableSchemaList, columnCategoryList)); + try (final TsFileIOWriter writer = new TsFileIOWriter(tsFile)) { + writer.setSchema(schema); + writer.startChunkGroup(new StringArrayDeviceID(new String[] {"table1", "tagA"})); + + final AlignedChunkWriterImpl chunkWriter = + new AlignedChunkWriterImpl(tableSchemaList.subList(1, tableSchemaList.size())); + for (int i = 0; i < 3; i++) { + final long time = 100 + i; + chunkWriter.getTimeChunkWriter().write(time); + chunkWriter.getValueChunkWriterByIndex(0).write(time, 0L, true); + chunkWriter.getValueChunkWriterByIndex(1).write(time, 0.0, true); + } + chunkWriter.writeToFileWriter(writer); + writer.endChunkGroup(); + writer.endFile(); + } + } +} From e2cb0837669ff3b56829fd852b605b2ec29e70d8 Mon Sep 17 00:00:00 2001 From: Caideyipi <87789683+Caideyipi@users.noreply.github.com> Date: Wed, 6 May 2026 19:38:42 +0800 Subject: [PATCH 02/23] fix --- .../plan/analyze/load/LoadTsFileAnalyzer.java | 110 ++++++++++++++++++ 1 file changed, 110 insertions(+) diff --git a/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/queryengine/plan/analyze/load/LoadTsFileAnalyzer.java b/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/queryengine/plan/analyze/load/LoadTsFileAnalyzer.java index 680add7ec5cd6..8944975fc412b 100644 --- a/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/queryengine/plan/analyze/load/LoadTsFileAnalyzer.java +++ b/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/queryengine/plan/analyze/load/LoadTsFileAnalyzer.java @@ -42,6 +42,7 @@ import org.apache.iotdb.db.queryengine.plan.statement.crud.LoadTsFileStatement; import org.apache.iotdb.db.storageengine.dataregion.tsfile.TsFileResource; import org.apache.iotdb.db.storageengine.dataregion.tsfile.TsFileResourceStatus; +import org.apache.iotdb.db.storageengine.dataregion.tsfile.timeindex.ITimeIndex; import org.apache.iotdb.db.storageengine.dataregion.utils.TsFileResourceUtils; import org.apache.iotdb.db.storageengine.load.active.ActiveLoadPathHelper; import org.apache.iotdb.db.storageengine.load.converter.LoadTsFileDataTypeConverter; @@ -53,6 +54,7 @@ import org.apache.tsfile.common.conf.TSFileDescriptor; import org.apache.tsfile.encrypt.EncryptParameter; import org.apache.tsfile.encrypt.EncryptUtils; +import org.apache.tsfile.enums.TSDataType; import org.apache.tsfile.external.commons.io.FileUtils; import org.apache.tsfile.file.metadata.AbstractAlignedChunkMetadata; import org.apache.tsfile.file.metadata.IChunkMetadata; @@ -60,11 +62,14 @@ import org.apache.tsfile.file.metadata.MetadataIndexNode; import org.apache.tsfile.file.metadata.TableSchema; import org.apache.tsfile.file.metadata.TimeseriesMetadata; +import org.apache.tsfile.read.TimeValuePair; import org.apache.tsfile.read.TsFileSequenceReader; import org.apache.tsfile.read.TsFileSequenceReaderTimeseriesMetadataIterator; import org.apache.tsfile.read.controller.IMetadataQuerier; import org.apache.tsfile.read.controller.MetadataQuerierByFileImpl; import org.apache.tsfile.utils.Pair; +import org.apache.tsfile.utils.RamUsageEstimator; +import org.apache.tsfile.utils.TsPrimitiveType; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -75,6 +80,7 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; +import java.util.HashMap; import java.util.Iterator; import java.util.List; import java.util.Map; @@ -583,6 +589,18 @@ private long updateTableTsFileResourceAndVerifySchema( final TsFileSequenceReader reader, final TsFileResource tsFileResource) throws IOException, LoadAnalyzeException { long writePointCount = 0; + if (tsFileResource.getTimeIndex().getTimeIndexType() == ITimeIndex.FILE_TIME_INDEX_TYPE) { + tsFileResource.setTimeIndex( + IoTDBDescriptor.getInstance().getConfig().getTimeIndexLevel().getTimeIndex()); + } + Map> deviceLastValues = + IoTDBDescriptor.getInstance().getConfig().isCacheLastValuesForLoad() + ? new HashMap<>() + : null; + long lastValuesMemCost = 0; + final long lastValuesMemoryBudget = + IoTDBDescriptor.getInstance().getConfig().getCacheLastValuesMemoryBudgetInByte(); + final IMetadataQuerier metadataQuerier = new MetadataQuerierByFileImpl(reader); final List tableNames = new ArrayList<>(metadataQuerier.getWholeFileMetadata().getTableSchemaMap().keySet()); @@ -611,6 +629,18 @@ private long updateTableTsFileResourceAndVerifySchema( tsFileResource.updateStartTime(deviceId, alignedChunkMetadata.getStartTime()); tsFileResource.updateEndTime(deviceId, alignedChunkMetadata.getEndTime()); writePointCount += getTableWritePointCount(alignedChunkMetadata); + if (deviceLastValues != null) { + lastValuesMemCost = + updateTableDeviceLastValues( + deviceLastValues, + deviceId, + alignedChunkMetadata, + lastValuesMemCost, + lastValuesMemoryBudget); + if (lastValuesMemCost > lastValuesMemoryBudget) { + deviceLastValues = null; + } + } } if (hasChunk) { @@ -620,9 +650,89 @@ private long updateTableTsFileResourceAndVerifySchema( } } + tsFileResource.setLastValues(convertTableDeviceLastValues(deviceLastValues)); return writePointCount; } + private static long updateTableDeviceLastValues( + final Map> deviceLastValues, + final IDeviceID deviceId, + final AbstractAlignedChunkMetadata alignedChunkMetadata, + long lastValuesMemCost, + final long lastValuesMemoryBudget) { + for (final IChunkMetadata chunkMetadata : alignedChunkMetadata.getValueChunkMetadataList()) { + if (Objects.isNull(chunkMetadata)) { + continue; + } + + Map deviceMap = deviceLastValues.get(deviceId); + if (Objects.isNull(deviceMap)) { + deviceMap = new HashMap<>(); + deviceLastValues.put(deviceId, deviceMap); + lastValuesMemCost += RamUsageEstimator.shallowSizeOf(deviceMap); + lastValuesMemCost += deviceId.ramBytesUsed(); + } + + final int previousSize = deviceMap.size(); + final String measurement = chunkMetadata.getMeasurementUid(); + final TimeValuePair oldPair = deviceMap.get(measurement); + if (Objects.nonNull(oldPair) && oldPair.getTimestamp() > chunkMetadata.getEndTime()) { + continue; + } + + final TimeValuePair newPair = buildLastValuePair(chunkMetadata); + if (Objects.nonNull(oldPair)) { + lastValuesMemCost -= oldPair.getSize(); + } + if (Objects.nonNull(newPair)) { + deviceMap.put(measurement, newPair); + lastValuesMemCost += newPair.getSize(); + } else { + deviceMap.remove(measurement); + } + lastValuesMemCost += + (long) (deviceMap.size() - previousSize) * RamUsageEstimator.HASHTABLE_RAM_BYTES_PER_ENTRY; + if (lastValuesMemCost > lastValuesMemoryBudget) { + return lastValuesMemCost; + } + } + return lastValuesMemCost; + } + + private static TimeValuePair buildLastValuePair(final IChunkMetadata chunkMetadata) { + if (Objects.isNull(chunkMetadata.getStatistics()) + || Objects.equals(chunkMetadata.getDataType(), TSDataType.BLOB)) { + return null; + } + + final TsPrimitiveType lastValue = + TsPrimitiveType.getByType( + Objects.equals(chunkMetadata.getDataType(), TSDataType.VECTOR) + ? TSDataType.INT64 + : chunkMetadata.getDataType(), + chunkMetadata.getStatistics().getLastValue()); + return new TimeValuePair(chunkMetadata.getEndTime(), lastValue); + } + + private static Map>> convertTableDeviceLastValues( + final Map> deviceLastValues) { + if (Objects.isNull(deviceLastValues)) { + return null; + } + + final Map>> finalDeviceLastValues = + new HashMap<>(deviceLastValues.size()); + for (final Map.Entry> entry : + deviceLastValues.entrySet()) { + final List> lastValues = new ArrayList<>(entry.getValue().size()); + for (final Map.Entry lastValueEntry : entry.getValue().entrySet()) { + lastValues.add(new Pair<>(lastValueEntry.getKey(), lastValueEntry.getValue())); + } + finalDeviceLastValues.put(entry.getKey(), lastValues); + } + return finalDeviceLastValues; + } + private TsFileResource constructTsFileResource( final TsFileSequenceReader reader, final File tsFile) throws IOException { final TsFileResource tsFileResource = new TsFileResource(tsFile); From 85a9b39183fb4eba3fe5bcab56ed33d8f93592f9 Mon Sep 17 00:00:00 2001 From: Caideyipi <87789683+Caideyipi@users.noreply.github.com> Date: Thu, 7 May 2026 09:31:35 +0800 Subject: [PATCH 03/23] fix --- .../db/queryengine/plan/analyze/load/LoadTsFileAnalyzer.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/queryengine/plan/analyze/load/LoadTsFileAnalyzer.java b/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/queryengine/plan/analyze/load/LoadTsFileAnalyzer.java index 8944975fc412b..66340f5b0562e 100644 --- a/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/queryengine/plan/analyze/load/LoadTsFileAnalyzer.java +++ b/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/queryengine/plan/analyze/load/LoadTsFileAnalyzer.java @@ -691,7 +691,8 @@ private static long updateTableDeviceLastValues( deviceMap.remove(measurement); } lastValuesMemCost += - (long) (deviceMap.size() - previousSize) * RamUsageEstimator.HASHTABLE_RAM_BYTES_PER_ENTRY; + (long) (deviceMap.size() - previousSize) + * RamUsageEstimator.HASHTABLE_RAM_BYTES_PER_ENTRY; if (lastValuesMemCost > lastValuesMemoryBudget) { return lastValuesMemCost; } From 17243ff20578347d3c7fae762a85d57b7bf5bef4 Mon Sep 17 00:00:00 2001 From: Caideyipi <87789683+Caideyipi@users.noreply.github.com> Date: Thu, 7 May 2026 11:57:32 +0800 Subject: [PATCH 04/23] Fix --- .../plan/analyze/load/LoadTsFileAnalyzer.java | 167 +++++++++++++++++- .../load/LoadTsFileTableSchemaCache.java | 24 ++- 2 files changed, 178 insertions(+), 13 deletions(-) diff --git a/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/queryengine/plan/analyze/load/LoadTsFileAnalyzer.java b/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/queryengine/plan/analyze/load/LoadTsFileAnalyzer.java index 66340f5b0562e..05ca66db885c7 100644 --- a/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/queryengine/plan/analyze/load/LoadTsFileAnalyzer.java +++ b/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/queryengine/plan/analyze/load/LoadTsFileAnalyzer.java @@ -423,7 +423,7 @@ && handleSingleMiniFile(i)) { sessionInfo.getDatabaseName().orElse(null), SqlDialect.TABLE); context.setSession(newSessionInfo); - doAnalyzeSingleTableFile(tsFile, reader, tableSchemaMap); + doAnalyzeSingleTableFile(tsFile, reader, timeseriesMetadataIterator, tableSchemaMap); } else { final SessionInfo newSessionInfo = new SessionInfo( @@ -538,6 +538,7 @@ private void doAnalyzeSingleTreeFile( private void doAnalyzeSingleTableFile( final File tsFile, final TsFileSequenceReader reader, + final TsFileSequenceReaderTimeseriesMetadataIterator timeseriesMetadataIterator, final Map tableSchemaMap) throws IOException, LoadAnalyzeException, LoadEmptyFileException { // construct tsfile resource @@ -563,7 +564,9 @@ private void doAnalyzeSingleTableFile( getOrCreateTableSchemaCache().setTableSchemaMap(tableSchemaMap); getOrCreateTableSchemaCache().setCurrentModificationsAndTimeIndex(tsFileResource, reader); - final long writePointCount = updateTableTsFileResourceAndVerifySchema(reader, tsFileResource); + final long writePointCount = + updateTableTsFileResourceAndVerifySchema( + reader, timeseriesMetadataIterator, tsFileResource); if (tsFileResource.getDevices().isEmpty()) { throw new LoadEmptyFileException(tsFile.getAbsolutePath()); } @@ -586,13 +589,42 @@ private void doAnalyzeSingleTableFile( } private long updateTableTsFileResourceAndVerifySchema( - final TsFileSequenceReader reader, final TsFileResource tsFileResource) + final TsFileSequenceReader reader, + final TsFileSequenceReaderTimeseriesMetadataIterator timeseriesMetadataIterator, + final TsFileResource tsFileResource) throws IOException, LoadAnalyzeException { long writePointCount = 0; + + while (timeseriesMetadataIterator.hasNext()) { + final Map> device2TimeseriesMetadata = + timeseriesMetadataIterator.next(); + + // Update time index no matter if resource file exists or not, because resource file may be + // untrusted + TsFileResourceUtils.updateTsFileResource( + device2TimeseriesMetadata, + tsFileResource, + IoTDBDescriptor.getInstance().getConfig().isCacheLastValuesForLoad()); + getOrCreateTableSchemaCache().setCurrentTimeIndex(tsFileResource.getTimeIndex()); + + for (final IDeviceID deviceId : device2TimeseriesMetadata.keySet()) { + if (!getOrCreateTableSchemaCache().isDeviceDeletedByMods(deviceId)) { + getOrCreateTableSchemaCache().autoCreateAndVerify(deviceId); + } + } + + writePointCount += getWritePointCount(device2TimeseriesMetadata); + } + + if (!tsFileResource.getDevices().isEmpty()) { + return writePointCount; + } + if (tsFileResource.getTimeIndex().getTimeIndexType() == ITimeIndex.FILE_TIME_INDEX_TYPE) { tsFileResource.setTimeIndex( IoTDBDescriptor.getInstance().getConfig().getTimeIndexLevel().getTimeIndex()); } + Map> deviceLastValues = IoTDBDescriptor.getInstance().getConfig().isCacheLastValuesForLoad() ? new HashMap<>() @@ -600,6 +632,7 @@ private long updateTableTsFileResourceAndVerifySchema( long lastValuesMemCost = 0; final long lastValuesMemoryBudget = IoTDBDescriptor.getInstance().getConfig().getCacheLastValuesMemoryBudgetInByte(); + Map> device2TimeseriesMetadata = null; final IMetadataQuerier metadataQuerier = new MetadataQuerierByFileImpl(reader); final List tableNames = @@ -643,9 +676,50 @@ private long updateTableTsFileResourceAndVerifySchema( } } + if (!hasChunk) { + if (Objects.isNull(device2TimeseriesMetadata)) { + device2TimeseriesMetadata = reader.getAllTimeseriesMetadata(true); + } + + final List timeseriesMetadataList = + getTimeseriesMetadata(device2TimeseriesMetadata, deviceId); + if (Objects.nonNull(timeseriesMetadataList)) { + for (final TimeseriesMetadata timeseriesMetadata : timeseriesMetadataList) { + if (Objects.isNull(timeseriesMetadata) + || Objects.isNull(timeseriesMetadata.getStatistics())) { + continue; + } + + hasChunk = true; + tsFileResource.updateStartTime( + deviceId, timeseriesMetadata.getStatistics().getStartTime()); + tsFileResource.updateEndTime( + deviceId, timeseriesMetadata.getStatistics().getEndTime()); + } + + if (hasChunk) { + writePointCount += getTableWritePointCount(timeseriesMetadataList); + if (deviceLastValues != null) { + lastValuesMemCost = + updateTableDeviceLastValues( + deviceLastValues, + deviceId, + timeseriesMetadataList, + lastValuesMemCost, + lastValuesMemoryBudget); + if (lastValuesMemCost > lastValuesMemoryBudget) { + deviceLastValues = null; + } + } + } + } + } + if (hasChunk) { getOrCreateTableSchemaCache().setCurrentTimeIndex(tsFileResource.getTimeIndex()); - getOrCreateTableSchemaCache().autoCreateAndVerify(deviceId); + if (!getOrCreateTableSchemaCache().isDeviceDeletedByMods(deviceId)) { + getOrCreateTableSchemaCache().autoCreateAndVerify(deviceId); + } } } } @@ -700,6 +774,50 @@ private static long updateTableDeviceLastValues( return lastValuesMemCost; } + private static long updateTableDeviceLastValues( + final Map> deviceLastValues, + final IDeviceID deviceId, + final List timeseriesMetadataList, + long lastValuesMemCost, + final long lastValuesMemoryBudget) { + Map deviceMap = deviceLastValues.get(deviceId); + if (Objects.isNull(deviceMap)) { + deviceMap = new HashMap<>(); + deviceLastValues.put(deviceId, deviceMap); + lastValuesMemCost += RamUsageEstimator.shallowSizeOf(deviceMap); + lastValuesMemCost += deviceId.ramBytesUsed(); + } + + for (final TimeseriesMetadata timeseriesMetadata : timeseriesMetadataList) { + if (Objects.isNull(timeseriesMetadata) + || Objects.isNull(timeseriesMetadata.getStatistics())) { + continue; + } + + final int previousSize = deviceMap.size(); + final String measurement = timeseriesMetadata.getMeasurementId(); + final TimeValuePair oldPair = deviceMap.get(measurement); + final TimeValuePair newPair = buildLastValuePair(timeseriesMetadata); + if (Objects.nonNull(oldPair)) { + lastValuesMemCost -= oldPair.getSize(); + } + + if (Objects.nonNull(newPair)) { + deviceMap.put(measurement, newPair); + lastValuesMemCost += newPair.getSize(); + } else { + deviceMap.remove(measurement); + } + lastValuesMemCost += + (long) (deviceMap.size() - previousSize) + * RamUsageEstimator.HASHTABLE_RAM_BYTES_PER_ENTRY; + if (lastValuesMemCost > lastValuesMemoryBudget) { + return lastValuesMemCost; + } + } + return lastValuesMemCost; + } + private static TimeValuePair buildLastValuePair(final IChunkMetadata chunkMetadata) { if (Objects.isNull(chunkMetadata.getStatistics()) || Objects.equals(chunkMetadata.getDataType(), TSDataType.BLOB)) { @@ -715,6 +833,38 @@ private static TimeValuePair buildLastValuePair(final IChunkMetadata chunkMetada return new TimeValuePair(chunkMetadata.getEndTime(), lastValue); } + private static TimeValuePair buildLastValuePair(final TimeseriesMetadata timeseriesMetadata) { + if (Objects.isNull(timeseriesMetadata.getStatistics()) + || Objects.equals(timeseriesMetadata.getTsDataType(), TSDataType.BLOB)) { + return null; + } + + final TsPrimitiveType lastValue = + TsPrimitiveType.getByType( + Objects.equals(timeseriesMetadata.getTsDataType(), TSDataType.VECTOR) + ? TSDataType.INT64 + : timeseriesMetadata.getTsDataType(), + timeseriesMetadata.getStatistics().getLastValue()); + return new TimeValuePair(timeseriesMetadata.getStatistics().getEndTime(), lastValue); + } + + private static List getTimeseriesMetadata( + final Map> device2TimeseriesMetadata, + final IDeviceID deviceId) { + final List timeseriesMetadataList = device2TimeseriesMetadata.get(deviceId); + if (Objects.nonNull(timeseriesMetadataList)) { + return timeseriesMetadataList; + } + + for (final Map.Entry> entry : + device2TimeseriesMetadata.entrySet()) { + if (Arrays.equals(entry.getKey().getSegments(), deviceId.getSegments())) { + return entry.getValue(); + } + } + return null; + } + private static Map>> convertTableDeviceLastValues( final Map> deviceLastValues) { if (Objects.isNull(deviceLastValues)) { @@ -800,6 +950,15 @@ private static long getTableWritePointCount( : alignedChunkMetadata.getTimeChunkMetadata().getStatistics().getCount(); } + private static long getTableWritePointCount( + final List timeseriesMetadataList) { + return timeseriesMetadataList.stream() + .filter(Objects::nonNull) + .filter(timeseriesMetadata -> Objects.nonNull(timeseriesMetadata.getStatistics())) + .mapToLong(timeseriesMetadata -> timeseriesMetadata.getStatistics().getCount()) + .sum(); + } + private void addWritePointCount(long writePointCount) { if (isTableModelStatement) { loadTsFileTableStatement.addWritePointCount(writePointCount); diff --git a/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/queryengine/plan/analyze/load/LoadTsFileTableSchemaCache.java b/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/queryengine/plan/analyze/load/LoadTsFileTableSchemaCache.java index 237f5eea36378..9363a54db4b54 100644 --- a/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/queryengine/plan/analyze/load/LoadTsFileTableSchemaCache.java +++ b/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/queryengine/plan/analyze/load/LoadTsFileTableSchemaCache.java @@ -137,15 +137,8 @@ public void setTableSchemaMap( } public void autoCreateAndVerify(final IDeviceID device) throws LoadAnalyzeException { - try { - if (ModificationUtils.isDeviceDeletedByMods(currentModifications, currentTimeIndex, device)) { - return; - } - } catch (final IllegalPathException e) { - LOGGER.warn( - "Failed to check if device {} is deleted by mods. Will see it as not deleted.", - device, - e); + if (isDeviceDeletedByMods(device)) { + return; } try { @@ -167,6 +160,19 @@ public void autoCreateAndVerify(final IDeviceID device) throws LoadAnalyzeExcept } } + public boolean isDeviceDeletedByMods(final IDeviceID device) { + try { + return ModificationUtils.isDeviceDeletedByMods( + currentModifications, currentTimeIndex, device); + } catch (final IllegalPathException e) { + LOGGER.warn( + "Failed to check if device {} is deleted by mods. Will see it as not deleted.", + device, + e); + return false; + } + } + private void addDevice(final IDeviceID device) { final String tableName = device.getTableName(); long memoryUsageSizeInBytes = 0; From bc5f9ed946da4d24dd678ec7d6a3ab9e7348b2c1 Mon Sep 17 00:00:00 2001 From: Caideyipi <87789683+Caideyipi@users.noreply.github.com> Date: Thu, 7 May 2026 12:33:18 +0800 Subject: [PATCH 05/23] spt --- .../plan/analyze/load/LoadTsFileAnalyzer.java | 93 +++++++++--- .../analyze/load/LoadTsFileAnalyzerTest.java | 139 ++++++++++++++++++ 2 files changed, 213 insertions(+), 19 deletions(-) diff --git a/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/queryengine/plan/analyze/load/LoadTsFileAnalyzer.java b/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/queryengine/plan/analyze/load/LoadTsFileAnalyzer.java index 05ca66db885c7..8dddb5e8178c4 100644 --- a/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/queryengine/plan/analyze/load/LoadTsFileAnalyzer.java +++ b/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/queryengine/plan/analyze/load/LoadTsFileAnalyzer.java @@ -81,11 +81,13 @@ import java.util.Arrays; import java.util.Collections; import java.util.HashMap; +import java.util.HashSet; import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Objects; import java.util.Optional; +import java.util.Set; import static org.apache.iotdb.db.queryengine.plan.execution.config.TableConfigTaskVisitor.DATABASE_NOT_SPECIFIED; import static org.apache.iotdb.db.storageengine.load.metrics.LoadTsFileCostMetricsSet.ANALYSIS; @@ -594,6 +596,7 @@ private long updateTableTsFileResourceAndVerifySchema( final TsFileResource tsFileResource) throws IOException, LoadAnalyzeException { long writePointCount = 0; + final Set> devicesHandledByTimeseriesMetadataIterator = new HashSet<>(); while (timeseriesMetadataIterator.hasNext()) { final Map> device2TimeseriesMetadata = @@ -608,6 +611,7 @@ private long updateTableTsFileResourceAndVerifySchema( getOrCreateTableSchemaCache().setCurrentTimeIndex(tsFileResource.getTimeIndex()); for (final IDeviceID deviceId : device2TimeseriesMetadata.keySet()) { + devicesHandledByTimeseriesMetadataIterator.add(getDeviceKey(deviceId)); if (!getOrCreateTableSchemaCache().isDeviceDeletedByMods(deviceId)) { getOrCreateTableSchemaCache().autoCreateAndVerify(deviceId); } @@ -615,24 +619,14 @@ private long updateTableTsFileResourceAndVerifySchema( writePointCount += getWritePointCount(device2TimeseriesMetadata); } - - if (!tsFileResource.getDevices().isEmpty()) { - return writePointCount; - } - - if (tsFileResource.getTimeIndex().getTimeIndexType() == ITimeIndex.FILE_TIME_INDEX_TYPE) { - tsFileResource.setTimeIndex( - IoTDBDescriptor.getInstance().getConfig().getTimeIndexLevel().getTimeIndex()); - } - - Map> deviceLastValues = - IoTDBDescriptor.getInstance().getConfig().isCacheLastValuesForLoad() - ? new HashMap<>() - : null; + Map> deviceLastValues = null; long lastValuesMemCost = 0; + boolean hasFallbackProcessedDevice = false; + final boolean isCacheLastValuesForLoad = + IoTDBDescriptor.getInstance().getConfig().isCacheLastValuesForLoad(); final long lastValuesMemoryBudget = IoTDBDescriptor.getInstance().getConfig().getCacheLastValuesMemoryBudgetInByte(); - Map> device2TimeseriesMetadata = null; + Map> allTimeseriesMetadataByDevice = null; final IMetadataQuerier metadataQuerier = new MetadataQuerierByFileImpl(reader); final List tableNames = @@ -649,6 +643,26 @@ private long updateTableTsFileResourceAndVerifySchema( metadataQuerier.deviceIterator(tableRoot, null); while (deviceIterator.hasNext()) { final IDeviceID deviceId = deviceIterator.next().getLeft(); + if (devicesHandledByTimeseriesMetadataIterator.contains(getDeviceKey(deviceId))) { + continue; + } + + if (!hasFallbackProcessedDevice) { + if (tsFileResource.getTimeIndex().getTimeIndexType() == ITimeIndex.FILE_TIME_INDEX_TYPE) { + tsFileResource.setTimeIndex( + IoTDBDescriptor.getInstance().getConfig().getTimeIndexLevel().getTimeIndex()); + } + if (isCacheLastValuesForLoad) { + if (devicesHandledByTimeseriesMetadataIterator.isEmpty()) { + deviceLastValues = new HashMap<>(); + } else if (Objects.nonNull(tsFileResource.getLastValues())) { + deviceLastValues = restoreTableDeviceLastValues(tsFileResource.getLastValues()); + lastValuesMemCost = calculateTableDeviceLastValuesMemoryCost(deviceLastValues); + } + } + hasFallbackProcessedDevice = true; + } + boolean hasChunk = false; for (final AbstractAlignedChunkMetadata alignedChunkMetadata : @@ -677,12 +691,12 @@ private long updateTableTsFileResourceAndVerifySchema( } if (!hasChunk) { - if (Objects.isNull(device2TimeseriesMetadata)) { - device2TimeseriesMetadata = reader.getAllTimeseriesMetadata(true); + if (Objects.isNull(allTimeseriesMetadataByDevice)) { + allTimeseriesMetadataByDevice = reader.getAllTimeseriesMetadata(true); } final List timeseriesMetadataList = - getTimeseriesMetadata(device2TimeseriesMetadata, deviceId); + getTimeseriesMetadata(allTimeseriesMetadataByDevice, deviceId); if (Objects.nonNull(timeseriesMetadataList)) { for (final TimeseriesMetadata timeseriesMetadata : timeseriesMetadataList) { if (Objects.isNull(timeseriesMetadata) @@ -724,7 +738,9 @@ private long updateTableTsFileResourceAndVerifySchema( } } - tsFileResource.setLastValues(convertTableDeviceLastValues(deviceLastValues)); + if (hasFallbackProcessedDevice) { + tsFileResource.setLastValues(convertTableDeviceLastValues(deviceLastValues)); + } return writePointCount; } @@ -865,6 +881,45 @@ private static List getTimeseriesMetadata( return null; } + private static List getDeviceKey(final IDeviceID deviceId) { + return Arrays.asList(deviceId.getSegments()); + } + + private static Map> restoreTableDeviceLastValues( + final Map>> deviceLastValues) { + final Map> restoredDeviceLastValues = + new HashMap<>(deviceLastValues.size()); + for (final Map.Entry>> entry : + deviceLastValues.entrySet()) { + final Map restoredLastValues = new HashMap<>(entry.getValue().size()); + for (final Pair lastValueEntry : entry.getValue()) { + restoredLastValues.put(lastValueEntry.getLeft(), lastValueEntry.getRight()); + } + restoredDeviceLastValues.put(entry.getKey(), restoredLastValues); + } + return restoredDeviceLastValues; + } + + private static long calculateTableDeviceLastValuesMemoryCost( + final Map> deviceLastValues) { + long lastValuesMemCost = 0; + for (final Map.Entry> entry : + deviceLastValues.entrySet()) { + lastValuesMemCost += entry.getKey().ramBytesUsed(); + lastValuesMemCost += RamUsageEstimator.shallowSizeOf(entry.getValue()); + lastValuesMemCost += RamUsageEstimator.HASHTABLE_RAM_BYTES_PER_ENTRY; + for (final Map.Entry lastValueEntry : entry.getValue().entrySet()) { + lastValuesMemCost += RamUsageEstimator.sizeOf(lastValueEntry.getKey()); + lastValuesMemCost += + Objects.nonNull(lastValueEntry.getValue()) + ? lastValueEntry.getValue().getSize() + : RamUsageEstimator.NUM_BYTES_OBJECT_REF; + lastValuesMemCost += RamUsageEstimator.HASHTABLE_RAM_BYTES_PER_ENTRY; + } + } + return lastValuesMemCost; + } + private static Map>> convertTableDeviceLastValues( final Map> deviceLastValues) { if (Objects.isNull(deviceLastValues)) { diff --git a/iotdb-core/datanode/src/test/java/org/apache/iotdb/db/queryengine/plan/analyze/load/LoadTsFileAnalyzerTest.java b/iotdb-core/datanode/src/test/java/org/apache/iotdb/db/queryengine/plan/analyze/load/LoadTsFileAnalyzerTest.java index 9e86ca60728b0..ca5a83a17a98b 100644 --- a/iotdb-core/datanode/src/test/java/org/apache/iotdb/db/queryengine/plan/analyze/load/LoadTsFileAnalyzerTest.java +++ b/iotdb-core/datanode/src/test/java/org/apache/iotdb/db/queryengine/plan/analyze/load/LoadTsFileAnalyzerTest.java @@ -19,6 +19,12 @@ package org.apache.iotdb.db.queryengine.plan.analyze.load; +import org.apache.iotdb.db.exception.load.LoadRuntimeOutOfMemoryException; +import org.apache.iotdb.db.queryengine.common.MPPQueryContext; +import org.apache.iotdb.db.queryengine.common.QueryId; +import org.apache.iotdb.db.queryengine.plan.relational.sql.ast.LoadTsFile; +import org.apache.iotdb.db.storageengine.dataregion.tsfile.TsFileResource; + import org.apache.tsfile.enums.ColumnCategory; import org.apache.tsfile.enums.TSDataType; import org.apache.tsfile.file.metadata.AbstractAlignedChunkMetadata; @@ -26,6 +32,7 @@ import org.apache.tsfile.file.metadata.StringArrayDeviceID; import org.apache.tsfile.file.metadata.TableSchema; import org.apache.tsfile.read.TsFileSequenceReader; +import org.apache.tsfile.read.TsFileSequenceReaderTimeseriesMetadataIterator; import org.apache.tsfile.write.chunk.AlignedChunkWriterImpl; import org.apache.tsfile.write.schema.IMeasurementSchema; import org.apache.tsfile.write.schema.MeasurementSchema; @@ -35,10 +42,14 @@ import org.junit.Test; import java.io.File; +import java.lang.reflect.Field; import java.lang.reflect.Method; import java.util.Arrays; +import java.util.Collections; +import java.util.HashSet; import java.util.List; import java.util.Objects; +import java.util.Set; public class LoadTsFileAnalyzerTest { @@ -75,6 +86,48 @@ public void testTableWritePointCountFallbackToTimeChunkWhenAllFieldsNull() throw } } + @Test + public void testAnalyzeSingleTableFileShouldNotMissAllNullDeviceInMixedFile() throws Exception { + final File tsFile = new File("load-table-mixed-null-device.tsfile"); + writeTableTsFileWithMixedDevices(tsFile); + + final LoadTsFile statement = + new LoadTsFile(null, tsFile.getAbsolutePath(), Collections.emptyMap()).setDatabase("db"); + final TrackingLoadTsFileTableSchemaCache schemaCache = new TrackingLoadTsFileTableSchemaCache(); + try (final LoadTsFileAnalyzer analyzer = + new LoadTsFileAnalyzer(statement, false, new MPPQueryContext(new QueryId("test"))); + final TsFileSequenceReader reader = new TsFileSequenceReader(tsFile.getAbsolutePath())) { + injectTableSchemaCache(analyzer, schemaCache); + + final Method method = + LoadTsFileAnalyzer.class.getDeclaredMethod( + "doAnalyzeSingleTableFile", + File.class, + TsFileSequenceReader.class, + TsFileSequenceReaderTimeseriesMetadataIterator.class, + java.util.Map.class); + method.setAccessible(true); + + final TsFileSequenceReaderTimeseriesMetadataIterator timeseriesMetadataIterator = + new TsFileSequenceReaderTimeseriesMetadataIterator(reader, false); + method.invoke( + analyzer, tsFile, reader, timeseriesMetadataIterator, reader.getTableSchemaMap()); + } finally { + if (tsFile.exists()) { + Assert.assertTrue(tsFile.delete()); + } + } + + Assert.assertEquals(1, statement.getResources().size()); + final TsFileResource resource = statement.getResources().get(0); + Assert.assertTrue(containsDevice(resource.getDevices(), "table1", "tagA")); + Assert.assertTrue(containsDevice(resource.getDevices(), "table1", "tagB")); + Assert.assertEquals(6L, statement.getWritePointCount(0)); + Assert.assertTrue(schemaCache.containsDevice("table1", "tagA")); + Assert.assertTrue(schemaCache.containsDevice("table1", "tagB")); + Assert.assertEquals(2, schemaCache.getVerifiedDeviceCount()); + } + private void writeTableTsFileWithAllNullFields(final File tsFile) throws Exception { if (tsFile.exists()) { Assert.assertTrue(tsFile.delete()); @@ -107,4 +160,90 @@ private void writeTableTsFileWithAllNullFields(final File tsFile) throws Excepti writer.endFile(); } } + + private void writeTableTsFileWithMixedDevices(final File tsFile) throws Exception { + if (tsFile.exists()) { + Assert.assertTrue(tsFile.delete()); + } + + final List tableSchemaList = + Arrays.asList( + new MeasurementSchema("tag1", TSDataType.STRING), + new MeasurementSchema("s1", TSDataType.INT64), + new MeasurementSchema("s2", TSDataType.DOUBLE)); + final List columnCategoryList = + Arrays.asList(ColumnCategory.TAG, ColumnCategory.FIELD, ColumnCategory.FIELD); + + final Schema schema = new Schema(); + schema.registerTableSchema(new TableSchema("table1", tableSchemaList, columnCategoryList)); + try (final TsFileIOWriter writer = new TsFileIOWriter(tsFile)) { + writer.setSchema(schema); + + writeDevice(writer, tableSchemaList, new String[] {"table1", "tagA"}, false); + writeDevice(writer, tableSchemaList, new String[] {"table1", "tagB"}, true); + + writer.endFile(); + } + } + + private void writeDevice( + final TsFileIOWriter writer, + final List tableSchemaList, + final String[] deviceSegments, + final boolean areAllFieldsNull) + throws Exception { + writer.startChunkGroup(new StringArrayDeviceID(deviceSegments)); + + final AlignedChunkWriterImpl chunkWriter = + new AlignedChunkWriterImpl(tableSchemaList.subList(1, tableSchemaList.size())); + for (int i = 0; i < 3; i++) { + final long time = 100 + i; + chunkWriter.getTimeChunkWriter().write(time); + chunkWriter.getValueChunkWriterByIndex(0).write(time, i, areAllFieldsNull); + chunkWriter.getValueChunkWriterByIndex(1).write(time, 0.5 + i, areAllFieldsNull); + } + chunkWriter.writeToFileWriter(writer); + writer.endChunkGroup(); + } + + private void injectTableSchemaCache( + final LoadTsFileAnalyzer analyzer, final TrackingLoadTsFileTableSchemaCache schemaCache) + throws Exception { + final Field tableSchemaCacheField = + LoadTsFileAnalyzer.class.getDeclaredField("tableSchemaCache"); + tableSchemaCacheField.setAccessible(true); + tableSchemaCacheField.set(analyzer, schemaCache); + } + + private boolean containsDevice(final Set devices, final String... expectedSegments) { + return devices.stream() + .anyMatch(device -> Arrays.equals(device.getSegments(), expectedSegments)); + } + + private static class TrackingLoadTsFileTableSchemaCache extends LoadTsFileTableSchemaCache { + + private final Set> verifiedDevices = new HashSet<>(); + + private TrackingLoadTsFileTableSchemaCache() throws LoadRuntimeOutOfMemoryException { + super(null, new MPPQueryContext(new QueryId("load_test")), false); + } + + @Override + public void autoCreateAndVerify(final IDeviceID device) { + verifiedDevices.add(Arrays.asList(device.getSegments())); + } + + @Override + public boolean isDeviceDeletedByMods(final IDeviceID device) { + return false; + } + + private boolean containsDevice(final String... expectedSegments) { + return verifiedDevices.contains(Arrays.asList((Object[]) expectedSegments)); + } + + private int getVerifiedDeviceCount() { + return verifiedDevices.size(); + } + } } From 3b13a70d2e688711c4f171297115ae5c85360fc0 Mon Sep 17 00:00:00 2001 From: Caideyipi <87789683+Caideyipi@users.noreply.github.com> Date: Thu, 7 May 2026 12:56:50 +0800 Subject: [PATCH 06/23] refactor --- .../plan/analyze/load/LoadTsFileAnalyzer.java | 224 +--------------- .../utils/TableDeviceLastValueCollector.java | 240 ++++++++++++++++++ .../storageengine/load/LoadTsFileManager.java | 83 +----- 3 files changed, 262 insertions(+), 285 deletions(-) create mode 100644 iotdb-core/datanode/src/main/java/org/apache/iotdb/db/storageengine/dataregion/utils/TableDeviceLastValueCollector.java diff --git a/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/queryengine/plan/analyze/load/LoadTsFileAnalyzer.java b/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/queryengine/plan/analyze/load/LoadTsFileAnalyzer.java index 8dddb5e8178c4..46a8815cbcc80 100644 --- a/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/queryengine/plan/analyze/load/LoadTsFileAnalyzer.java +++ b/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/queryengine/plan/analyze/load/LoadTsFileAnalyzer.java @@ -43,6 +43,7 @@ import org.apache.iotdb.db.storageengine.dataregion.tsfile.TsFileResource; import org.apache.iotdb.db.storageengine.dataregion.tsfile.TsFileResourceStatus; import org.apache.iotdb.db.storageengine.dataregion.tsfile.timeindex.ITimeIndex; +import org.apache.iotdb.db.storageengine.dataregion.utils.TableDeviceLastValueCollector; import org.apache.iotdb.db.storageengine.dataregion.utils.TsFileResourceUtils; import org.apache.iotdb.db.storageengine.load.active.ActiveLoadPathHelper; import org.apache.iotdb.db.storageengine.load.converter.LoadTsFileDataTypeConverter; @@ -54,7 +55,6 @@ import org.apache.tsfile.common.conf.TSFileDescriptor; import org.apache.tsfile.encrypt.EncryptParameter; import org.apache.tsfile.encrypt.EncryptUtils; -import org.apache.tsfile.enums.TSDataType; import org.apache.tsfile.external.commons.io.FileUtils; import org.apache.tsfile.file.metadata.AbstractAlignedChunkMetadata; import org.apache.tsfile.file.metadata.IChunkMetadata; @@ -62,14 +62,11 @@ import org.apache.tsfile.file.metadata.MetadataIndexNode; import org.apache.tsfile.file.metadata.TableSchema; import org.apache.tsfile.file.metadata.TimeseriesMetadata; -import org.apache.tsfile.read.TimeValuePair; import org.apache.tsfile.read.TsFileSequenceReader; import org.apache.tsfile.read.TsFileSequenceReaderTimeseriesMetadataIterator; import org.apache.tsfile.read.controller.IMetadataQuerier; import org.apache.tsfile.read.controller.MetadataQuerierByFileImpl; import org.apache.tsfile.utils.Pair; -import org.apache.tsfile.utils.RamUsageEstimator; -import org.apache.tsfile.utils.TsPrimitiveType; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -80,7 +77,6 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; -import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; import java.util.List; @@ -619,8 +615,7 @@ private long updateTableTsFileResourceAndVerifySchema( writePointCount += getWritePointCount(device2TimeseriesMetadata); } - Map> deviceLastValues = null; - long lastValuesMemCost = 0; + TableDeviceLastValueCollector lastValueCollector = null; boolean hasFallbackProcessedDevice = false; final boolean isCacheLastValuesForLoad = IoTDBDescriptor.getInstance().getConfig().isCacheLastValuesForLoad(); @@ -653,12 +648,11 @@ private long updateTableTsFileResourceAndVerifySchema( IoTDBDescriptor.getInstance().getConfig().getTimeIndexLevel().getTimeIndex()); } if (isCacheLastValuesForLoad) { - if (devicesHandledByTimeseriesMetadataIterator.isEmpty()) { - deviceLastValues = new HashMap<>(); - } else if (Objects.nonNull(tsFileResource.getLastValues())) { - deviceLastValues = restoreTableDeviceLastValues(tsFileResource.getLastValues()); - lastValuesMemCost = calculateTableDeviceLastValuesMemoryCost(deviceLastValues); - } + lastValueCollector = + devicesHandledByTimeseriesMetadataIterator.isEmpty() + ? TableDeviceLastValueCollector.create(lastValuesMemoryBudget) + : TableDeviceLastValueCollector.restore( + lastValuesMemoryBudget, tsFileResource.getLastValues()); } hasFallbackProcessedDevice = true; } @@ -676,17 +670,8 @@ private long updateTableTsFileResourceAndVerifySchema( tsFileResource.updateStartTime(deviceId, alignedChunkMetadata.getStartTime()); tsFileResource.updateEndTime(deviceId, alignedChunkMetadata.getEndTime()); writePointCount += getTableWritePointCount(alignedChunkMetadata); - if (deviceLastValues != null) { - lastValuesMemCost = - updateTableDeviceLastValues( - deviceLastValues, - deviceId, - alignedChunkMetadata, - lastValuesMemCost, - lastValuesMemoryBudget); - if (lastValuesMemCost > lastValuesMemoryBudget) { - deviceLastValues = null; - } + if (Objects.nonNull(lastValueCollector)) { + lastValueCollector.update(deviceId, alignedChunkMetadata.getValueChunkMetadataList()); } } @@ -713,17 +698,8 @@ private long updateTableTsFileResourceAndVerifySchema( if (hasChunk) { writePointCount += getTableWritePointCount(timeseriesMetadataList); - if (deviceLastValues != null) { - lastValuesMemCost = - updateTableDeviceLastValues( - deviceLastValues, - deviceId, - timeseriesMetadataList, - lastValuesMemCost, - lastValuesMemoryBudget); - if (lastValuesMemCost > lastValuesMemoryBudget) { - deviceLastValues = null; - } + if (Objects.nonNull(lastValueCollector)) { + lastValueCollector.update(deviceId, timeseriesMetadataList); } } } @@ -738,132 +714,12 @@ private long updateTableTsFileResourceAndVerifySchema( } } - if (hasFallbackProcessedDevice) { - tsFileResource.setLastValues(convertTableDeviceLastValues(deviceLastValues)); + if (hasFallbackProcessedDevice && Objects.nonNull(lastValueCollector)) { + tsFileResource.setLastValues(lastValueCollector.toTsFileResourceLastValues()); } return writePointCount; } - private static long updateTableDeviceLastValues( - final Map> deviceLastValues, - final IDeviceID deviceId, - final AbstractAlignedChunkMetadata alignedChunkMetadata, - long lastValuesMemCost, - final long lastValuesMemoryBudget) { - for (final IChunkMetadata chunkMetadata : alignedChunkMetadata.getValueChunkMetadataList()) { - if (Objects.isNull(chunkMetadata)) { - continue; - } - - Map deviceMap = deviceLastValues.get(deviceId); - if (Objects.isNull(deviceMap)) { - deviceMap = new HashMap<>(); - deviceLastValues.put(deviceId, deviceMap); - lastValuesMemCost += RamUsageEstimator.shallowSizeOf(deviceMap); - lastValuesMemCost += deviceId.ramBytesUsed(); - } - - final int previousSize = deviceMap.size(); - final String measurement = chunkMetadata.getMeasurementUid(); - final TimeValuePair oldPair = deviceMap.get(measurement); - if (Objects.nonNull(oldPair) && oldPair.getTimestamp() > chunkMetadata.getEndTime()) { - continue; - } - - final TimeValuePair newPair = buildLastValuePair(chunkMetadata); - if (Objects.nonNull(oldPair)) { - lastValuesMemCost -= oldPair.getSize(); - } - if (Objects.nonNull(newPair)) { - deviceMap.put(measurement, newPair); - lastValuesMemCost += newPair.getSize(); - } else { - deviceMap.remove(measurement); - } - lastValuesMemCost += - (long) (deviceMap.size() - previousSize) - * RamUsageEstimator.HASHTABLE_RAM_BYTES_PER_ENTRY; - if (lastValuesMemCost > lastValuesMemoryBudget) { - return lastValuesMemCost; - } - } - return lastValuesMemCost; - } - - private static long updateTableDeviceLastValues( - final Map> deviceLastValues, - final IDeviceID deviceId, - final List timeseriesMetadataList, - long lastValuesMemCost, - final long lastValuesMemoryBudget) { - Map deviceMap = deviceLastValues.get(deviceId); - if (Objects.isNull(deviceMap)) { - deviceMap = new HashMap<>(); - deviceLastValues.put(deviceId, deviceMap); - lastValuesMemCost += RamUsageEstimator.shallowSizeOf(deviceMap); - lastValuesMemCost += deviceId.ramBytesUsed(); - } - - for (final TimeseriesMetadata timeseriesMetadata : timeseriesMetadataList) { - if (Objects.isNull(timeseriesMetadata) - || Objects.isNull(timeseriesMetadata.getStatistics())) { - continue; - } - - final int previousSize = deviceMap.size(); - final String measurement = timeseriesMetadata.getMeasurementId(); - final TimeValuePair oldPair = deviceMap.get(measurement); - final TimeValuePair newPair = buildLastValuePair(timeseriesMetadata); - if (Objects.nonNull(oldPair)) { - lastValuesMemCost -= oldPair.getSize(); - } - - if (Objects.nonNull(newPair)) { - deviceMap.put(measurement, newPair); - lastValuesMemCost += newPair.getSize(); - } else { - deviceMap.remove(measurement); - } - lastValuesMemCost += - (long) (deviceMap.size() - previousSize) - * RamUsageEstimator.HASHTABLE_RAM_BYTES_PER_ENTRY; - if (lastValuesMemCost > lastValuesMemoryBudget) { - return lastValuesMemCost; - } - } - return lastValuesMemCost; - } - - private static TimeValuePair buildLastValuePair(final IChunkMetadata chunkMetadata) { - if (Objects.isNull(chunkMetadata.getStatistics()) - || Objects.equals(chunkMetadata.getDataType(), TSDataType.BLOB)) { - return null; - } - - final TsPrimitiveType lastValue = - TsPrimitiveType.getByType( - Objects.equals(chunkMetadata.getDataType(), TSDataType.VECTOR) - ? TSDataType.INT64 - : chunkMetadata.getDataType(), - chunkMetadata.getStatistics().getLastValue()); - return new TimeValuePair(chunkMetadata.getEndTime(), lastValue); - } - - private static TimeValuePair buildLastValuePair(final TimeseriesMetadata timeseriesMetadata) { - if (Objects.isNull(timeseriesMetadata.getStatistics()) - || Objects.equals(timeseriesMetadata.getTsDataType(), TSDataType.BLOB)) { - return null; - } - - final TsPrimitiveType lastValue = - TsPrimitiveType.getByType( - Objects.equals(timeseriesMetadata.getTsDataType(), TSDataType.VECTOR) - ? TSDataType.INT64 - : timeseriesMetadata.getTsDataType(), - timeseriesMetadata.getStatistics().getLastValue()); - return new TimeValuePair(timeseriesMetadata.getStatistics().getEndTime(), lastValue); - } - private static List getTimeseriesMetadata( final Map> device2TimeseriesMetadata, final IDeviceID deviceId) { @@ -885,60 +741,6 @@ private static List getDeviceKey(final IDeviceID deviceId) { return Arrays.asList(deviceId.getSegments()); } - private static Map> restoreTableDeviceLastValues( - final Map>> deviceLastValues) { - final Map> restoredDeviceLastValues = - new HashMap<>(deviceLastValues.size()); - for (final Map.Entry>> entry : - deviceLastValues.entrySet()) { - final Map restoredLastValues = new HashMap<>(entry.getValue().size()); - for (final Pair lastValueEntry : entry.getValue()) { - restoredLastValues.put(lastValueEntry.getLeft(), lastValueEntry.getRight()); - } - restoredDeviceLastValues.put(entry.getKey(), restoredLastValues); - } - return restoredDeviceLastValues; - } - - private static long calculateTableDeviceLastValuesMemoryCost( - final Map> deviceLastValues) { - long lastValuesMemCost = 0; - for (final Map.Entry> entry : - deviceLastValues.entrySet()) { - lastValuesMemCost += entry.getKey().ramBytesUsed(); - lastValuesMemCost += RamUsageEstimator.shallowSizeOf(entry.getValue()); - lastValuesMemCost += RamUsageEstimator.HASHTABLE_RAM_BYTES_PER_ENTRY; - for (final Map.Entry lastValueEntry : entry.getValue().entrySet()) { - lastValuesMemCost += RamUsageEstimator.sizeOf(lastValueEntry.getKey()); - lastValuesMemCost += - Objects.nonNull(lastValueEntry.getValue()) - ? lastValueEntry.getValue().getSize() - : RamUsageEstimator.NUM_BYTES_OBJECT_REF; - lastValuesMemCost += RamUsageEstimator.HASHTABLE_RAM_BYTES_PER_ENTRY; - } - } - return lastValuesMemCost; - } - - private static Map>> convertTableDeviceLastValues( - final Map> deviceLastValues) { - if (Objects.isNull(deviceLastValues)) { - return null; - } - - final Map>> finalDeviceLastValues = - new HashMap<>(deviceLastValues.size()); - for (final Map.Entry> entry : - deviceLastValues.entrySet()) { - final List> lastValues = new ArrayList<>(entry.getValue().size()); - for (final Map.Entry lastValueEntry : entry.getValue().entrySet()) { - lastValues.add(new Pair<>(lastValueEntry.getKey(), lastValueEntry.getValue())); - } - finalDeviceLastValues.put(entry.getKey(), lastValues); - } - return finalDeviceLastValues; - } - private TsFileResource constructTsFileResource( final TsFileSequenceReader reader, final File tsFile) throws IOException { final TsFileResource tsFileResource = new TsFileResource(tsFile); diff --git a/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/storageengine/dataregion/utils/TableDeviceLastValueCollector.java b/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/storageengine/dataregion/utils/TableDeviceLastValueCollector.java new file mode 100644 index 0000000000000..a8d2fe9115000 --- /dev/null +++ b/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/storageengine/dataregion/utils/TableDeviceLastValueCollector.java @@ -0,0 +1,240 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.iotdb.db.storageengine.dataregion.utils; + +import org.apache.tsfile.enums.TSDataType; +import org.apache.tsfile.file.metadata.IChunkMetadata; +import org.apache.tsfile.file.metadata.IDeviceID; +import org.apache.tsfile.file.metadata.TimeseriesMetadata; +import org.apache.tsfile.read.TimeValuePair; +import org.apache.tsfile.utils.Pair; +import org.apache.tsfile.utils.RamUsageEstimator; +import org.apache.tsfile.utils.TsPrimitiveType; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Objects; + +public final class TableDeviceLastValueCollector { + + private final long memoryBudgetInBytes; + private Map> deviceLastValues; + private long memoryCostInBytes; + + private TableDeviceLastValueCollector( + final long memoryBudgetInBytes, + final Map> deviceLastValues, + final long memoryCostInBytes) { + this.memoryBudgetInBytes = memoryBudgetInBytes; + this.deviceLastValues = deviceLastValues; + this.memoryCostInBytes = memoryCostInBytes; + } + + public static TableDeviceLastValueCollector create(final long memoryBudgetInBytes) { + return new TableDeviceLastValueCollector(memoryBudgetInBytes, new HashMap<>(), 0); + } + + public static TableDeviceLastValueCollector restore( + final long memoryBudgetInBytes, + final Map>> deviceLastValues) { + if (Objects.isNull(deviceLastValues)) { + return new TableDeviceLastValueCollector(memoryBudgetInBytes, null, 0); + } + + final Map> restoredDeviceLastValues = + restoreDeviceLastValues(deviceLastValues); + return new TableDeviceLastValueCollector( + memoryBudgetInBytes, + restoredDeviceLastValues, + calculateDeviceLastValuesMemoryCost(restoredDeviceLastValues)); + } + + public void update( + final IDeviceID deviceId, final Iterable chunkMetadataList) { + if (Objects.isNull(chunkMetadataList) || Objects.isNull(deviceLastValues)) { + return; + } + + for (final IChunkMetadata chunkMetadata : chunkMetadataList) { + update(deviceId, chunkMetadata); + if (Objects.isNull(deviceLastValues)) { + return; + } + } + } + + public void update(final IDeviceID deviceId, final IChunkMetadata chunkMetadata) { + if (Objects.isNull(chunkMetadata) || Objects.isNull(deviceLastValues)) { + return; + } + + updateMeasurementLastValue( + deviceId, + chunkMetadata.getMeasurementUid(), + chunkMetadata.getEndTime(), + buildLastValuePair(chunkMetadata)); + } + + public void update( + final IDeviceID deviceId, final List timeseriesMetadataList) { + if (Objects.isNull(timeseriesMetadataList) || Objects.isNull(deviceLastValues)) { + return; + } + + for (final TimeseriesMetadata timeseriesMetadata : timeseriesMetadataList) { + if (Objects.isNull(timeseriesMetadata) + || Objects.isNull(timeseriesMetadata.getStatistics())) { + continue; + } + + updateMeasurementLastValue( + deviceId, + timeseriesMetadata.getMeasurementId(), + timeseriesMetadata.getStatistics().getEndTime(), + buildLastValuePair(timeseriesMetadata)); + if (Objects.isNull(deviceLastValues)) { + return; + } + } + } + + public Map>> toTsFileResourceLastValues() { + if (Objects.isNull(deviceLastValues)) { + return null; + } + + final Map>> finalDeviceLastValues = + new HashMap<>(deviceLastValues.size()); + for (final Map.Entry> entry : + deviceLastValues.entrySet()) { + final List> lastValues = new ArrayList<>(entry.getValue().size()); + for (final Map.Entry lastValueEntry : entry.getValue().entrySet()) { + lastValues.add(new Pair<>(lastValueEntry.getKey(), lastValueEntry.getValue())); + } + finalDeviceLastValues.put(entry.getKey(), lastValues); + } + return finalDeviceLastValues; + } + + private void updateMeasurementLastValue( + final IDeviceID deviceId, + final String measurement, + final long endTime, + final TimeValuePair newPair) { + final Map deviceMap = getOrCreateDeviceMap(deviceId); + final int previousSize = deviceMap.size(); + final TimeValuePair oldPair = deviceMap.get(measurement); + if (Objects.nonNull(oldPair) && oldPair.getTimestamp() > endTime) { + return; + } + + if (Objects.nonNull(oldPair)) { + memoryCostInBytes -= oldPair.getSize(); + } + if (Objects.nonNull(newPair)) { + deviceMap.put(measurement, newPair); + memoryCostInBytes += newPair.getSize(); + } else { + deviceMap.remove(measurement); + } + memoryCostInBytes += + (long) (deviceMap.size() - previousSize) * RamUsageEstimator.HASHTABLE_RAM_BYTES_PER_ENTRY; + if (memoryCostInBytes > memoryBudgetInBytes) { + deviceLastValues = null; + memoryCostInBytes = 0; + } + } + + private Map getOrCreateDeviceMap(final IDeviceID deviceId) { + return deviceLastValues.computeIfAbsent( + deviceId, + id -> { + final Map deviceMap = new HashMap<>(); + memoryCostInBytes += RamUsageEstimator.shallowSizeOf(deviceMap); + memoryCostInBytes += id.ramBytesUsed(); + return deviceMap; + }); + } + + private static TimeValuePair buildLastValuePair(final IChunkMetadata chunkMetadata) { + return Objects.isNull(chunkMetadata.getStatistics()) + ? null + : buildLastValuePair( + chunkMetadata.getDataType(), + chunkMetadata.getStatistics().getLastValue(), + chunkMetadata.getEndTime()); + } + + private static TimeValuePair buildLastValuePair(final TimeseriesMetadata timeseriesMetadata) { + return buildLastValuePair( + timeseriesMetadata.getTsDataType(), + timeseriesMetadata.getStatistics().getLastValue(), + timeseriesMetadata.getStatistics().getEndTime()); + } + + private static TimeValuePair buildLastValuePair( + final TSDataType dataType, final Object value, final long endTime) { + if (dataType == TSDataType.BLOB) { + return null; + } + + final TsPrimitiveType lastValue = + TsPrimitiveType.getByType( + dataType == TSDataType.VECTOR ? TSDataType.INT64 : dataType, value); + return new TimeValuePair(endTime, lastValue); + } + + private static Map> restoreDeviceLastValues( + final Map>> deviceLastValues) { + final Map> restoredDeviceLastValues = + new HashMap<>(deviceLastValues.size()); + for (final Map.Entry>> entry : + deviceLastValues.entrySet()) { + final Map restoredLastValues = new HashMap<>(entry.getValue().size()); + for (final Pair lastValueEntry : entry.getValue()) { + restoredLastValues.put(lastValueEntry.getLeft(), lastValueEntry.getRight()); + } + restoredDeviceLastValues.put(entry.getKey(), restoredLastValues); + } + return restoredDeviceLastValues; + } + + private static long calculateDeviceLastValuesMemoryCost( + final Map> deviceLastValues) { + long lastValuesMemCost = 0; + for (final Map.Entry> entry : + deviceLastValues.entrySet()) { + lastValuesMemCost += entry.getKey().ramBytesUsed(); + lastValuesMemCost += RamUsageEstimator.shallowSizeOf(entry.getValue()); + lastValuesMemCost += RamUsageEstimator.HASHTABLE_RAM_BYTES_PER_ENTRY; + for (final Map.Entry lastValueEntry : entry.getValue().entrySet()) { + lastValuesMemCost += RamUsageEstimator.sizeOf(lastValueEntry.getKey()); + lastValuesMemCost += + Objects.nonNull(lastValueEntry.getValue()) + ? lastValueEntry.getValue().getSize() + : RamUsageEstimator.NUM_BYTES_OBJECT_REF; + lastValuesMemCost += RamUsageEstimator.HASHTABLE_RAM_BYTES_PER_ENTRY; + } + } + return lastValuesMemCost; + } +} diff --git a/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/storageengine/load/LoadTsFileManager.java b/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/storageengine/load/LoadTsFileManager.java index a39e212892d7f..0c72325fb0b0a 100644 --- a/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/storageengine/load/LoadTsFileManager.java +++ b/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/storageengine/load/LoadTsFileManager.java @@ -49,6 +49,7 @@ import org.apache.iotdb.db.storageengine.dataregion.modification.v1.ModificationFileV1; import org.apache.iotdb.db.storageengine.dataregion.tsfile.TsFileResource; import org.apache.iotdb.db.storageengine.dataregion.tsfile.TsFileResourceStatus; +import org.apache.iotdb.db.storageengine.dataregion.utils.TableDeviceLastValueCollector; import org.apache.iotdb.db.storageengine.load.active.ActiveLoadAgent; import org.apache.iotdb.db.storageengine.load.splitter.ChunkData; import org.apache.iotdb.db.storageengine.load.splitter.DeletionData; @@ -60,15 +61,10 @@ import com.github.benmanes.caffeine.cache.Cache; import com.github.benmanes.caffeine.cache.Caffeine; import org.apache.tsfile.common.constant.TsFileConstant; -import org.apache.tsfile.enums.TSDataType; import org.apache.tsfile.exception.write.PageException; import org.apache.tsfile.file.metadata.ChunkGroupMetadata; import org.apache.tsfile.file.metadata.ChunkMetadata; import org.apache.tsfile.file.metadata.IDeviceID; -import org.apache.tsfile.read.TimeValuePair; -import org.apache.tsfile.utils.Pair; -import org.apache.tsfile.utils.RamUsageEstimator; -import org.apache.tsfile.utils.TsPrimitiveType; import org.apache.tsfile.write.writer.TsFileIOWriter; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -81,16 +77,13 @@ import java.util.Arrays; import java.util.HashMap; import java.util.HashSet; -import java.util.List; import java.util.Map; import java.util.Objects; import java.util.Optional; import java.util.Set; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.PriorityBlockingQueue; -import java.util.concurrent.atomic.AtomicLong; import java.util.concurrent.atomic.AtomicReference; -import java.util.stream.Collectors; import java.util.stream.Stream; /** @@ -625,81 +618,23 @@ private void endTsFileResource( TsFileIOWriter writer, TsFileResource tsFileResource, ProgressIndex progressIndex) throws IOException { // Update time index by chunk groups still in memory - Map> deviceLastValues = null; - if (IoTDBDescriptor.getInstance().getConfig().isCacheLastValuesForLoad()) { - deviceLastValues = new HashMap<>(); - } - AtomicLong lastValuesMemCost = new AtomicLong(0); + final TableDeviceLastValueCollector lastValueCollector = + CONFIG.isCacheLastValuesForLoad() + ? TableDeviceLastValueCollector.create(CONFIG.getCacheLastValuesMemoryBudgetInByte()) + : null; for (final ChunkGroupMetadata chunkGroupMetadata : writer.getChunkGroupMetadataList()) { final IDeviceID device = chunkGroupMetadata.getDevice(); for (final ChunkMetadata chunkMetadata : chunkGroupMetadata.getChunkMetadataList()) { tsFileResource.updateStartTime(device, chunkMetadata.getStartTime()); tsFileResource.updateEndTime(device, chunkMetadata.getEndTime()); - if (deviceLastValues != null) { - Map deviceMap = - deviceLastValues.computeIfAbsent( - device, - d -> { - Map map = new HashMap<>(); - lastValuesMemCost.addAndGet(RamUsageEstimator.shallowSizeOf(map)); - lastValuesMemCost.addAndGet(device.ramBytesUsed()); - return map; - }); - int prevSize = deviceMap.size(); - deviceMap.compute( - chunkMetadata.getMeasurementUid(), - (m, oldPair) -> { - if (oldPair != null && oldPair.getTimestamp() > chunkMetadata.getEndTime()) { - return oldPair; - } - TsPrimitiveType lastValue = - chunkMetadata.getStatistics() != null - && chunkMetadata.getDataType() != TSDataType.BLOB - ? TsPrimitiveType.getByType( - chunkMetadata.getDataType() == TSDataType.VECTOR - ? TSDataType.INT64 - : chunkMetadata.getDataType(), - chunkMetadata.getStatistics().getLastValue()) - : null; - TimeValuePair timeValuePair = - lastValue != null - ? new TimeValuePair(chunkMetadata.getEndTime(), lastValue) - : null; - if (oldPair != null) { - lastValuesMemCost.addAndGet(-oldPair.getSize()); - } - if (timeValuePair != null) { - lastValuesMemCost.addAndGet(timeValuePair.getSize()); - } - return timeValuePair; - }); - int afterSize = deviceMap.size(); - lastValuesMemCost.addAndGet( - (afterSize - prevSize) * RamUsageEstimator.HASHTABLE_RAM_BYTES_PER_ENTRY); - if (lastValuesMemCost.get() - > IoTDBDescriptor.getInstance() - .getConfig() - .getCacheLastValuesMemoryBudgetInByte()) { - deviceLastValues = null; - } + if (Objects.nonNull(lastValueCollector)) { + lastValueCollector.update(device, chunkMetadata); } } } - if (deviceLastValues != null) { - Map>> finalDeviceLastValues; - finalDeviceLastValues = new HashMap<>(deviceLastValues.size()); - for (final Map.Entry> entry : - deviceLastValues.entrySet()) { - final IDeviceID device = entry.getKey(); - Map lastValues = entry.getValue(); - List> pairList = - lastValues.entrySet().stream() - .map(e -> new Pair<>(e.getKey(), e.getValue())) - .collect(Collectors.toList()); - finalDeviceLastValues.put(device, pairList); - } - tsFileResource.setLastValues(finalDeviceLastValues); + if (Objects.nonNull(lastValueCollector)) { + tsFileResource.setLastValues(lastValueCollector.toTsFileResourceLastValues()); } tsFileResource.setStatus(TsFileResourceStatus.NORMAL); tsFileResource.setProgressIndex(progressIndex); From 30057c0d145c3f7e1c84cf3d77d0694a53bef3cf Mon Sep 17 00:00:00 2001 From: Caideyipi <87789683+Caideyipi@users.noreply.github.com> Date: Thu, 7 May 2026 14:53:05 +0800 Subject: [PATCH 07/23] Update LoadTsFileAnalyzerTest.java --- .../queryengine/plan/analyze/load/LoadTsFileAnalyzerTest.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/iotdb-core/datanode/src/test/java/org/apache/iotdb/db/queryengine/plan/analyze/load/LoadTsFileAnalyzerTest.java b/iotdb-core/datanode/src/test/java/org/apache/iotdb/db/queryengine/plan/analyze/load/LoadTsFileAnalyzerTest.java index ca5a83a17a98b..8a928d86be06e 100644 --- a/iotdb-core/datanode/src/test/java/org/apache/iotdb/db/queryengine/plan/analyze/load/LoadTsFileAnalyzerTest.java +++ b/iotdb-core/datanode/src/test/java/org/apache/iotdb/db/queryengine/plan/analyze/load/LoadTsFileAnalyzerTest.java @@ -199,7 +199,7 @@ private void writeDevice( for (int i = 0; i < 3; i++) { final long time = 100 + i; chunkWriter.getTimeChunkWriter().write(time); - chunkWriter.getValueChunkWriterByIndex(0).write(time, i, areAllFieldsNull); + chunkWriter.getValueChunkWriterByIndex(0).write(time, (long) i, areAllFieldsNull); chunkWriter.getValueChunkWriterByIndex(1).write(time, 0.5 + i, areAllFieldsNull); } chunkWriter.writeToFileWriter(writer); From 7b59e5332b4b00e6e77490837c0e9bb72dd7f117 Mon Sep 17 00:00:00 2001 From: Caideyipi <87789683+Caideyipi@users.noreply.github.com> Date: Thu, 7 May 2026 15:35:37 +0800 Subject: [PATCH 08/23] Fix blob --- .../utils/TableDeviceLastValueCollector.java | 20 +++-- .../TableDeviceLastValueCollectorTest.java | 89 +++++++++++++++++++ 2 files changed, 103 insertions(+), 6 deletions(-) create mode 100644 iotdb-core/datanode/src/test/java/org/apache/iotdb/db/storageengine/dataregion/utils/TableDeviceLastValueCollectorTest.java diff --git a/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/storageengine/dataregion/utils/TableDeviceLastValueCollector.java b/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/storageengine/dataregion/utils/TableDeviceLastValueCollector.java index a8d2fe9115000..0663e9fe89143 100644 --- a/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/storageengine/dataregion/utils/TableDeviceLastValueCollector.java +++ b/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/storageengine/dataregion/utils/TableDeviceLastValueCollector.java @@ -176,15 +176,23 @@ private Map getOrCreateDeviceMap(final IDeviceID deviceId } private static TimeValuePair buildLastValuePair(final IChunkMetadata chunkMetadata) { - return Objects.isNull(chunkMetadata.getStatistics()) - ? null - : buildLastValuePair( - chunkMetadata.getDataType(), - chunkMetadata.getStatistics().getLastValue(), - chunkMetadata.getEndTime()); + if (Objects.isNull(chunkMetadata.getStatistics()) + || chunkMetadata.getDataType() == TSDataType.BLOB) { + return null; + } + + return buildLastValuePair( + chunkMetadata.getDataType(), + chunkMetadata.getStatistics().getLastValue(), + chunkMetadata.getEndTime()); } private static TimeValuePair buildLastValuePair(final TimeseriesMetadata timeseriesMetadata) { + if (Objects.isNull(timeseriesMetadata.getStatistics()) + || timeseriesMetadata.getTsDataType() == TSDataType.BLOB) { + return null; + } + return buildLastValuePair( timeseriesMetadata.getTsDataType(), timeseriesMetadata.getStatistics().getLastValue(), diff --git a/iotdb-core/datanode/src/test/java/org/apache/iotdb/db/storageengine/dataregion/utils/TableDeviceLastValueCollectorTest.java b/iotdb-core/datanode/src/test/java/org/apache/iotdb/db/storageengine/dataregion/utils/TableDeviceLastValueCollectorTest.java new file mode 100644 index 0000000000000..92399d282f5e7 --- /dev/null +++ b/iotdb-core/datanode/src/test/java/org/apache/iotdb/db/storageengine/dataregion/utils/TableDeviceLastValueCollectorTest.java @@ -0,0 +1,89 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.iotdb.db.storageengine.dataregion.utils; + +import org.apache.tsfile.enums.TSDataType; +import org.apache.tsfile.file.metadata.IChunkMetadata; +import org.apache.tsfile.file.metadata.IDeviceID; +import org.apache.tsfile.file.metadata.TimeseriesMetadata; +import org.apache.tsfile.file.metadata.statistics.Statistics; +import org.apache.tsfile.read.TimeValuePair; +import org.apache.tsfile.utils.Pair; +import org.junit.Assert; +import org.junit.Test; +import org.mockito.Mockito; + +import java.io.Serializable; +import java.util.Collections; +import java.util.List; +import java.util.Map; + +public class TableDeviceLastValueCollectorTest { + + @Test + public void testIgnoreBlobChunkMetadataLastValue() { + final TableDeviceLastValueCollector collector = + TableDeviceLastValueCollector.create(Long.MAX_VALUE); + final IDeviceID deviceID = Mockito.mock(IDeviceID.class); + final IChunkMetadata chunkMetadata = Mockito.mock(IChunkMetadata.class); + final Statistics statistics = Mockito.mock(Statistics.class); + + Mockito.when(deviceID.ramBytesUsed()).thenReturn(0L); + Mockito.when(chunkMetadata.getDataType()).thenReturn(TSDataType.BLOB); + Mockito.when(chunkMetadata.getMeasurementUid()).thenReturn("s1"); + Mockito.when(chunkMetadata.getEndTime()).thenReturn(1L); + Mockito.when(chunkMetadata.getStatistics()).thenReturn(statistics); + Mockito.when(statistics.getLastValue()) + .thenThrow(new AssertionError("BLOB last value should not be read")); + + collector.update(deviceID, chunkMetadata); + + final Map>> lastValues = + collector.toTsFileResourceLastValues(); + Assert.assertNotNull(lastValues); + Assert.assertTrue(lastValues.containsKey(deviceID)); + Assert.assertEquals(Collections.emptyList(), lastValues.get(deviceID)); + } + + @Test + public void testIgnoreBlobTimeseriesMetadataLastValue() { + final TableDeviceLastValueCollector collector = + TableDeviceLastValueCollector.create(Long.MAX_VALUE); + final IDeviceID deviceID = Mockito.mock(IDeviceID.class); + final TimeseriesMetadata timeseriesMetadata = Mockito.mock(TimeseriesMetadata.class); + final Statistics statistics = Mockito.mock(Statistics.class); + + Mockito.when(deviceID.ramBytesUsed()).thenReturn(0L); + Mockito.when(timeseriesMetadata.getTsDataType()).thenReturn(TSDataType.BLOB); + Mockito.when(timeseriesMetadata.getMeasurementId()).thenReturn("s1"); + Mockito.when(timeseriesMetadata.getStatistics()).thenReturn(statistics); + Mockito.when(statistics.getEndTime()).thenReturn(1L); + Mockito.when(statistics.getLastValue()) + .thenThrow(new AssertionError("BLOB last value should not be read")); + + collector.update(deviceID, Collections.singletonList(timeseriesMetadata)); + + final Map>> lastValues = + collector.toTsFileResourceLastValues(); + Assert.assertNotNull(lastValues); + Assert.assertTrue(lastValues.containsKey(deviceID)); + Assert.assertEquals(Collections.emptyList(), lastValues.get(deviceID)); + } +} From be89ab3b060111d95017fab6d6511d105663409e Mon Sep 17 00:00:00 2001 From: Caideyipi <87789683+Caideyipi@users.noreply.github.com> Date: Thu, 7 May 2026 15:47:45 +0800 Subject: [PATCH 09/23] Update TableDeviceLastValueCollectorTest.java --- .../dataregion/utils/TableDeviceLastValueCollectorTest.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/iotdb-core/datanode/src/test/java/org/apache/iotdb/db/storageengine/dataregion/utils/TableDeviceLastValueCollectorTest.java b/iotdb-core/datanode/src/test/java/org/apache/iotdb/db/storageengine/dataregion/utils/TableDeviceLastValueCollectorTest.java index 92399d282f5e7..2e2f6eab6ab4f 100644 --- a/iotdb-core/datanode/src/test/java/org/apache/iotdb/db/storageengine/dataregion/utils/TableDeviceLastValueCollectorTest.java +++ b/iotdb-core/datanode/src/test/java/org/apache/iotdb/db/storageengine/dataregion/utils/TableDeviceLastValueCollectorTest.java @@ -49,7 +49,7 @@ public void testIgnoreBlobChunkMetadataLastValue() { Mockito.when(chunkMetadata.getDataType()).thenReturn(TSDataType.BLOB); Mockito.when(chunkMetadata.getMeasurementUid()).thenReturn("s1"); Mockito.when(chunkMetadata.getEndTime()).thenReturn(1L); - Mockito.when(chunkMetadata.getStatistics()).thenReturn(statistics); + Mockito.doReturn(statistics).when(chunkMetadata).getStatistics(); Mockito.when(statistics.getLastValue()) .thenThrow(new AssertionError("BLOB last value should not be read")); @@ -73,7 +73,7 @@ public void testIgnoreBlobTimeseriesMetadataLastValue() { Mockito.when(deviceID.ramBytesUsed()).thenReturn(0L); Mockito.when(timeseriesMetadata.getTsDataType()).thenReturn(TSDataType.BLOB); Mockito.when(timeseriesMetadata.getMeasurementId()).thenReturn("s1"); - Mockito.when(timeseriesMetadata.getStatistics()).thenReturn(statistics); + Mockito.doReturn(statistics).when(timeseriesMetadata).getStatistics(); Mockito.when(statistics.getEndTime()).thenReturn(1L); Mockito.when(statistics.getLastValue()) .thenThrow(new AssertionError("BLOB last value should not be read")); From 857558cc1027391f1e4c2e27215bcfaec4aec8e6 Mon Sep 17 00:00:00 2001 From: Caideyipi <87789683+Caideyipi@users.noreply.github.com> Date: Thu, 7 May 2026 16:08:42 +0800 Subject: [PATCH 10/23] refact --- .../iotdb/db/it/IoTDBLoadLastCacheIT.java | 1 - .../org/apache/iotdb/db/conf/IoTDBConfig.java | 4 +-- .../apache/iotdb/db/conf/IoTDBDescriptor.java | 13 +++++--- .../fetcher/cache/LastCacheLoadStrategy.java | 2 -- .../fetcher/cache/TableDeviceCacheEntry.java | 10 ++---- .../fetcher/cache/TableDeviceLastCache.java | 12 ------- .../fetcher/cache/TableDeviceSchemaCache.java | 31 +++++-------------- .../storageengine/dataregion/DataRegion.java | 10 ++---- .../conf/iotdb-system.properties.template | 5 ++- 9 files changed, 23 insertions(+), 65 deletions(-) diff --git a/integration-test/src/test/java/org/apache/iotdb/db/it/IoTDBLoadLastCacheIT.java b/integration-test/src/test/java/org/apache/iotdb/db/it/IoTDBLoadLastCacheIT.java index 755b3aef75864..4fd1f6fab9139 100644 --- a/integration-test/src/test/java/org/apache/iotdb/db/it/IoTDBLoadLastCacheIT.java +++ b/integration-test/src/test/java/org/apache/iotdb/db/it/IoTDBLoadLastCacheIT.java @@ -86,7 +86,6 @@ public static Collection data() { new Object[][] { {LastCacheLoadStrategy.CLEAN_ALL}, {LastCacheLoadStrategy.UPDATE}, - {LastCacheLoadStrategy.UPDATE_NO_BLOB}, {LastCacheLoadStrategy.CLEAN_DEVICE} }); } diff --git a/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/conf/IoTDBConfig.java b/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/conf/IoTDBConfig.java index c7fc72152e21e..0381f934a8857 100644 --- a/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/conf/IoTDBConfig.java +++ b/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/conf/IoTDBConfig.java @@ -4313,9 +4313,7 @@ public void setLastCacheLoadStrategy(LastCacheLoadStrategy lastCacheLoadStrategy } public boolean isCacheLastValuesForLoad() { - return (lastCacheLoadStrategy == LastCacheLoadStrategy.UPDATE - || lastCacheLoadStrategy == LastCacheLoadStrategy.UPDATE_NO_BLOB) - && cacheLastValuesForLoad; + return lastCacheLoadStrategy == LastCacheLoadStrategy.UPDATE && cacheLastValuesForLoad; } public void setCacheLastValuesForLoad(boolean cacheLastValuesForLoad) { diff --git a/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/conf/IoTDBDescriptor.java b/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/conf/IoTDBDescriptor.java index 065583a383c96..4c10bd3891a5d 100644 --- a/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/conf/IoTDBDescriptor.java +++ b/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/conf/IoTDBDescriptor.java @@ -2529,10 +2529,15 @@ private void loadLoadTsFileProps(TrimProperties properties) { "load_disk_select_strategy_for_pipe_and_iotv2", ILoadDiskSelector.LoadDiskSelectorType.INHERIT_LOAD.getValue())); - conf.setLastCacheLoadStrategy( - LastCacheLoadStrategy.valueOf( - properties.getProperty( - "last_cache_operation_on_load", LastCacheLoadStrategy.UPDATE.name()))); + final String lastCacheOperationOnLoad = + properties.getProperty("last_cache_operation_on_load", LastCacheLoadStrategy.UPDATE.name()); + if ("UPDATE_NO_BLOB".equals(lastCacheOperationOnLoad)) { + LOGGER.warn( + "last_cache_operation_on_load=UPDATE_NO_BLOB is deprecated and treated as UPDATE."); + conf.setLastCacheLoadStrategy(LastCacheLoadStrategy.UPDATE); + } else { + conf.setLastCacheLoadStrategy(LastCacheLoadStrategy.valueOf(lastCacheOperationOnLoad)); + } conf.setCacheLastValuesForLoad( Boolean.parseBoolean( diff --git a/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/queryengine/plan/relational/metadata/fetcher/cache/LastCacheLoadStrategy.java b/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/queryengine/plan/relational/metadata/fetcher/cache/LastCacheLoadStrategy.java index d9738e7d3044d..d91c4a56a864f 100644 --- a/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/queryengine/plan/relational/metadata/fetcher/cache/LastCacheLoadStrategy.java +++ b/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/queryengine/plan/relational/metadata/fetcher/cache/LastCacheLoadStrategy.java @@ -21,8 +21,6 @@ public enum LastCacheLoadStrategy { // when a TsFile is loaded, read its data to update LastCache UPDATE, - // similar to UPDATE, but will invalidate cache of Blob series instead of updating them - UPDATE_NO_BLOB, // when a TsFile is loaded, clean its included device in LastCache CLEAN_DEVICE, // when a TsFile is loaded, clean all LastCache diff --git a/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/queryengine/plan/relational/metadata/fetcher/cache/TableDeviceCacheEntry.java b/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/queryengine/plan/relational/metadata/fetcher/cache/TableDeviceCacheEntry.java index 4f151d15eaeb6..3dadeb5366b27 100644 --- a/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/queryengine/plan/relational/metadata/fetcher/cache/TableDeviceCacheEntry.java +++ b/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/queryengine/plan/relational/metadata/fetcher/cache/TableDeviceCacheEntry.java @@ -184,18 +184,12 @@ int initOrInvalidateLastCache( return Objects.nonNull(lastCache.get()) ? result : 0; } - int tryUpdateLastCache( - final String[] measurements, final TimeValuePair[] timeValuePairs, boolean invalidateNull) { + int tryUpdateLastCache(final String[] measurements, final TimeValuePair[] timeValuePairs) { final TableDeviceLastCache cache = lastCache.get(); - final int result = - Objects.nonNull(cache) ? cache.tryUpdate(measurements, timeValuePairs, invalidateNull) : 0; + final int result = Objects.nonNull(cache) ? cache.tryUpdate(measurements, timeValuePairs) : 0; return Objects.nonNull(lastCache.get()) ? result : 0; } - int tryUpdateLastCache(final String[] measurements, final TimeValuePair[] timeValuePairs) { - return tryUpdateLastCache(measurements, timeValuePairs, false); - } - int invalidateLastCache(final String measurement) { final TableDeviceLastCache cache = lastCache.get(); final int result = Objects.nonNull(cache) ? cache.invalidate(measurement) : 0; diff --git a/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/queryengine/plan/relational/metadata/fetcher/cache/TableDeviceLastCache.java b/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/queryengine/plan/relational/metadata/fetcher/cache/TableDeviceLastCache.java index 5a2ac3d1e5329..8fc23230b0687 100644 --- a/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/queryengine/plan/relational/metadata/fetcher/cache/TableDeviceLastCache.java +++ b/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/queryengine/plan/relational/metadata/fetcher/cache/TableDeviceLastCache.java @@ -142,23 +142,11 @@ int initOrInvalidate( int tryUpdate( final @Nonnull String[] measurements, final @Nonnull TimeValuePair[] timeValuePairs) { - return tryUpdate(measurements, timeValuePairs, false); - } - - int tryUpdate( - final @Nonnull String[] measurements, - final @Nonnull TimeValuePair[] timeValuePairs, - final boolean invalidateNull) { final AtomicInteger diff = new AtomicInteger(0); long lastTime = Long.MIN_VALUE; for (int i = 0; i < measurements.length; ++i) { if (Objects.isNull(timeValuePairs[i])) { - if (invalidateNull) { - diff.addAndGet( - -((int) RamUsageEstimator.sizeOf(measurements[i]) - + getTvPairEntrySize(measurement2CachedLastMap.remove(measurements[i])))); - } continue; } diff --git a/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/queryengine/plan/relational/metadata/fetcher/cache/TableDeviceSchemaCache.java b/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/queryengine/plan/relational/metadata/fetcher/cache/TableDeviceSchemaCache.java index 3e599355098a9..d50d76ec11a61 100644 --- a/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/queryengine/plan/relational/metadata/fetcher/cache/TableDeviceSchemaCache.java +++ b/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/queryengine/plan/relational/metadata/fetcher/cache/TableDeviceSchemaCache.java @@ -258,29 +258,7 @@ public void initOrInvalidateLastCache( * {@code null}. For correctness, this will put the cache lazily and only update the existing last * caches of measurements. * - * @param database the device's database, without "root" - * @param deviceId {@link IDeviceID} - * @param measurements the fetched measurements - * @param timeValuePairs the {@link TimeValuePair}s with indexes corresponding to the measurements - * @param invalidateNull when true invalidate cache entries where timeValuePairs[i] == null; when - * false ignore cache entries where timeValuePairs[i] == null - */ - public void updateLastCacheIfExists( - final String database, - final IDeviceID deviceId, - final String[] measurements, - final TimeValuePair[] timeValuePairs, - boolean invalidateNull) { - dualKeyCache.update( - new TableId(database, deviceId.getTableName()), - deviceId, - null, - entry -> entry.tryUpdateLastCache(measurements, timeValuePairs, invalidateNull), - false); - } - - /** - * Update the last cache in writing or the second push of last cache query. If a measurement is + *

Update the last cache in writing or the second push of last cache query. If a measurement is * with all {@code null}s or is a tag/attribute column, its {@link TimeValuePair}[] shall be * {@code null}. For correctness, this will put the cache lazily and only update the existing last * caches of measurements. @@ -295,7 +273,12 @@ public void updateLastCacheIfExists( final IDeviceID deviceId, final String[] measurements, final TimeValuePair[] timeValuePairs) { - updateLastCacheIfExists(database, deviceId, measurements, timeValuePairs, false); + dualKeyCache.update( + new TableId(database, deviceId.getTableName()), + deviceId, + null, + entry -> entry.tryUpdateLastCache(measurements, timeValuePairs), + false); } /** diff --git a/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/storageengine/dataregion/DataRegion.java b/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/storageengine/dataregion/DataRegion.java index e5d75b4a210b7..7c30a52acfad1 100644 --- a/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/storageengine/dataregion/DataRegion.java +++ b/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/storageengine/dataregion/DataRegion.java @@ -3999,16 +3999,11 @@ public void loadNewTsFile( TsFileLastReader lastReader = null; LastCacheLoadStrategy lastCacheLoadStrategy = config.getLastCacheLoadStrategy(); if (!isFromConsensus - && (lastCacheLoadStrategy == LastCacheLoadStrategy.UPDATE - || lastCacheLoadStrategy == LastCacheLoadStrategy.UPDATE_NO_BLOB) + && lastCacheLoadStrategy == LastCacheLoadStrategy.UPDATE && newTsFileResource.getLastValues() == null) { try { // init reader outside of lock to boost performance - lastReader = - new TsFileLastReader( - newTsFileResource.getTsFilePath(), - true, - lastCacheLoadStrategy == LastCacheLoadStrategy.UPDATE_NO_BLOB); + lastReader = new TsFileLastReader(newTsFileResource.getTsFilePath(), true, false); } catch (IOException e) { throw new LoadFileException(e); } @@ -4109,7 +4104,6 @@ private void onTsFileLoaded( if (CommonDescriptor.getInstance().getConfig().isLastCacheEnable() && !isFromConsensus) { switch (config.getLastCacheLoadStrategy()) { case UPDATE: - case UPDATE_NO_BLOB: updateLastCache(newTsFileResource, lastReader); break; case CLEAN_ALL: diff --git a/iotdb-core/node-commons/src/assembly/resources/conf/iotdb-system.properties.template b/iotdb-core/node-commons/src/assembly/resources/conf/iotdb-system.properties.template index 87be30f4520e0..686460194541a 100644 --- a/iotdb-core/node-commons/src/assembly/resources/conf/iotdb-system.properties.template +++ b/iotdb-core/node-commons/src/assembly/resources/conf/iotdb-system.properties.template @@ -2198,14 +2198,13 @@ load_active_listening_check_interval_seconds=5 # The operation performed to LastCache when a TsFile is successfully loaded. # UPDATE: use the data in the TsFile to update LastCache; -# UPDATE_NO_BLOB: similar to UPDATE, but will invalidate LastCache for blob series; # CLEAN_DEVICE: invalidate LastCache of devices contained in the TsFile; # CLEAN_ALL: clean the whole LastCache. # effectiveMode: restart -last_cache_operation_on_load=UPDATE_NO_BLOB +last_cache_operation_on_load=UPDATE # Whether to cache last values before loading a TsFile. Only effective when -# last_cache_operation_on_load=UPDATE_NO_BLOB or last_cache_operation_on_load=UPDATE. +# last_cache_operation_on_load=UPDATE. # When set to true, blob series will be ignored even with last_cache_operation_on_load=UPDATE. # Enabling this will increase the memory footprint during loading TsFiles. # effectiveMode: restart From a5e0e903ba6e214cb6e80dc31dcbc71b27465264 Mon Sep 17 00:00:00 2001 From: Caideyipi <87789683+Caideyipi@users.noreply.github.com> Date: Thu, 7 May 2026 18:10:01 +0800 Subject: [PATCH 11/23] fix --- .../plan/analyze/load/LoadTsFileAnalyzer.java | 1 + .../analyze/load/LoadTsFileAnalyzerTest.java | 18 +++++++++++++++++- 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/queryengine/plan/analyze/load/LoadTsFileAnalyzer.java b/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/queryengine/plan/analyze/load/LoadTsFileAnalyzer.java index 46a8815cbcc80..08c0b7da0977d 100644 --- a/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/queryengine/plan/analyze/load/LoadTsFileAnalyzer.java +++ b/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/queryengine/plan/analyze/load/LoadTsFileAnalyzer.java @@ -784,6 +784,7 @@ private static long getWritePointCount( Map> device2TimeseriesMetadata) { return device2TimeseriesMetadata.values().stream() .flatMap(List::stream) + .filter(timeseriesMetadata -> !timeseriesMetadata.getMeasurementId().isEmpty()) .mapToLong(t -> t.getStatistics().getCount()) .sum(); } diff --git a/iotdb-core/datanode/src/test/java/org/apache/iotdb/db/queryengine/plan/analyze/load/LoadTsFileAnalyzerTest.java b/iotdb-core/datanode/src/test/java/org/apache/iotdb/db/queryengine/plan/analyze/load/LoadTsFileAnalyzerTest.java index 8a928d86be06e..3599988a5fbc4 100644 --- a/iotdb-core/datanode/src/test/java/org/apache/iotdb/db/queryengine/plan/analyze/load/LoadTsFileAnalyzerTest.java +++ b/iotdb-core/datanode/src/test/java/org/apache/iotdb/db/queryengine/plan/analyze/load/LoadTsFileAnalyzerTest.java @@ -19,6 +19,7 @@ package org.apache.iotdb.db.queryengine.plan.analyze.load; +import org.apache.iotdb.db.conf.IoTDBDescriptor; import org.apache.iotdb.db.exception.load.LoadRuntimeOutOfMemoryException; import org.apache.iotdb.db.queryengine.common.MPPQueryContext; import org.apache.iotdb.db.queryengine.common.QueryId; @@ -38,7 +39,9 @@ import org.apache.tsfile.write.schema.MeasurementSchema; import org.apache.tsfile.write.schema.Schema; import org.apache.tsfile.write.writer.TsFileIOWriter; +import org.junit.After; import org.junit.Assert; +import org.junit.Before; import org.junit.Test; import java.io.File; @@ -53,13 +56,26 @@ public class LoadTsFileAnalyzerTest { + private int dataNodeId; + + @Before + public void setUp() { + dataNodeId = IoTDBDescriptor.getInstance().getConfig().getDataNodeId(); + IoTDBDescriptor.getInstance().getConfig().setDataNodeId(0); + } + + @After + public void tearDown() { + IoTDBDescriptor.getInstance().getConfig().setDataNodeId(dataNodeId); + } + @Test public void testTableWritePointCountFallbackToTimeChunkWhenAllFieldsNull() throws Exception { final File tsFile = new File("load-table-all-null.tsfile"); writeTableTsFileWithAllNullFields(tsFile); try (final TsFileSequenceReader reader = new TsFileSequenceReader(tsFile.getAbsolutePath())) { - final IDeviceID deviceID = new StringArrayDeviceID(new String[] {"table1", "tagA"}); + final IDeviceID deviceID = new StringArrayDeviceID("table1", "tagA"); final List alignedChunkMetadataList = reader.getAlignedChunkMetadata(deviceID, false); Assert.assertEquals(1, alignedChunkMetadataList.size()); From 1292537ae7f24cc7888e7941d577ae6f91ef0154 Mon Sep 17 00:00:00 2001 From: Caideyipi <87789683+Caideyipi@users.noreply.github.com> Date: Fri, 8 May 2026 10:13:04 +0800 Subject: [PATCH 12/23] no-def --- .../plan/analyze/load/LoadTsFileAnalyzer.java | 30 ------------------- 1 file changed, 30 deletions(-) diff --git a/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/queryengine/plan/analyze/load/LoadTsFileAnalyzer.java b/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/queryengine/plan/analyze/load/LoadTsFileAnalyzer.java index 08c0b7da0977d..8250f8be9adeb 100644 --- a/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/queryengine/plan/analyze/load/LoadTsFileAnalyzer.java +++ b/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/queryengine/plan/analyze/load/LoadTsFileAnalyzer.java @@ -675,36 +675,6 @@ private long updateTableTsFileResourceAndVerifySchema( } } - if (!hasChunk) { - if (Objects.isNull(allTimeseriesMetadataByDevice)) { - allTimeseriesMetadataByDevice = reader.getAllTimeseriesMetadata(true); - } - - final List timeseriesMetadataList = - getTimeseriesMetadata(allTimeseriesMetadataByDevice, deviceId); - if (Objects.nonNull(timeseriesMetadataList)) { - for (final TimeseriesMetadata timeseriesMetadata : timeseriesMetadataList) { - if (Objects.isNull(timeseriesMetadata) - || Objects.isNull(timeseriesMetadata.getStatistics())) { - continue; - } - - hasChunk = true; - tsFileResource.updateStartTime( - deviceId, timeseriesMetadata.getStatistics().getStartTime()); - tsFileResource.updateEndTime( - deviceId, timeseriesMetadata.getStatistics().getEndTime()); - } - - if (hasChunk) { - writePointCount += getTableWritePointCount(timeseriesMetadataList); - if (Objects.nonNull(lastValueCollector)) { - lastValueCollector.update(deviceId, timeseriesMetadataList); - } - } - } - } - if (hasChunk) { getOrCreateTableSchemaCache().setCurrentTimeIndex(tsFileResource.getTimeIndex()); if (!getOrCreateTableSchemaCache().isDeviceDeletedByMods(deviceId)) { From 0c42078c46e3c802b8c32cdd650398e37449ffd5 Mon Sep 17 00:00:00 2001 From: Caideyipi <87789683+Caideyipi@users.noreply.github.com> Date: Fri, 8 May 2026 10:14:28 +0800 Subject: [PATCH 13/23] no-useless --- .../plan/analyze/load/LoadTsFileAnalyzer.java | 17 ----------------- 1 file changed, 17 deletions(-) diff --git a/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/queryengine/plan/analyze/load/LoadTsFileAnalyzer.java b/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/queryengine/plan/analyze/load/LoadTsFileAnalyzer.java index 8250f8be9adeb..7aed4ad59eaad 100644 --- a/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/queryengine/plan/analyze/load/LoadTsFileAnalyzer.java +++ b/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/queryengine/plan/analyze/load/LoadTsFileAnalyzer.java @@ -690,23 +690,6 @@ private long updateTableTsFileResourceAndVerifySchema( return writePointCount; } - private static List getTimeseriesMetadata( - final Map> device2TimeseriesMetadata, - final IDeviceID deviceId) { - final List timeseriesMetadataList = device2TimeseriesMetadata.get(deviceId); - if (Objects.nonNull(timeseriesMetadataList)) { - return timeseriesMetadataList; - } - - for (final Map.Entry> entry : - device2TimeseriesMetadata.entrySet()) { - if (Arrays.equals(entry.getKey().getSegments(), deviceId.getSegments())) { - return entry.getValue(); - } - } - return null; - } - private static List getDeviceKey(final IDeviceID deviceId) { return Arrays.asList(deviceId.getSegments()); } From d0090b299ec5d6dab34e7ab67ae18e1d4feff9ad Mon Sep 17 00:00:00 2001 From: Caideyipi <87789683+Caideyipi@users.noreply.github.com> Date: Fri, 8 May 2026 10:15:51 +0800 Subject: [PATCH 14/23] shit-b --- .../plan/analyze/load/LoadTsFileAnalyzer.java | 9 --------- 1 file changed, 9 deletions(-) diff --git a/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/queryengine/plan/analyze/load/LoadTsFileAnalyzer.java b/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/queryengine/plan/analyze/load/LoadTsFileAnalyzer.java index 7aed4ad59eaad..61f26bc99352c 100644 --- a/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/queryengine/plan/analyze/load/LoadTsFileAnalyzer.java +++ b/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/queryengine/plan/analyze/load/LoadTsFileAnalyzer.java @@ -761,15 +761,6 @@ private static long getTableWritePointCount( : alignedChunkMetadata.getTimeChunkMetadata().getStatistics().getCount(); } - private static long getTableWritePointCount( - final List timeseriesMetadataList) { - return timeseriesMetadataList.stream() - .filter(Objects::nonNull) - .filter(timeseriesMetadata -> Objects.nonNull(timeseriesMetadata.getStatistics())) - .mapToLong(timeseriesMetadata -> timeseriesMetadata.getStatistics().getCount()) - .sum(); - } - private void addWritePointCount(long writePointCount) { if (isTableModelStatement) { loadTsFileTableStatement.addWritePointCount(writePointCount); From d04e36b466357a22f42708559ba4678d48678612 Mon Sep 17 00:00:00 2001 From: Caideyipi <87789683+Caideyipi@users.noreply.github.com> Date: Fri, 8 May 2026 12:05:22 +0800 Subject: [PATCH 15/23] fix --- .../manual/enhanced/IoTDBPipeClusterIT.java | 30 +++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/integration-test/src/test/java/org/apache/iotdb/pipe/it/dual/tablemodel/manual/enhanced/IoTDBPipeClusterIT.java b/integration-test/src/test/java/org/apache/iotdb/pipe/it/dual/tablemodel/manual/enhanced/IoTDBPipeClusterIT.java index 4350d4072bb06..cbcfccfcfeb19 100644 --- a/integration-test/src/test/java/org/apache/iotdb/pipe/it/dual/tablemodel/manual/enhanced/IoTDBPipeClusterIT.java +++ b/integration-test/src/test/java/org/apache/iotdb/pipe/it/dual/tablemodel/manual/enhanced/IoTDBPipeClusterIT.java @@ -36,6 +36,7 @@ import org.apache.iotdb.it.env.cluster.node.DataNodeWrapper; import org.apache.iotdb.it.framework.IoTDBTestRunner; import org.apache.iotdb.itbase.category.MultiClusterIT2DualTableManualEnhanced; +import org.apache.iotdb.itbase.env.BaseEnv; import org.apache.iotdb.pipe.it.dual.tablemodel.TableModelUtils; import org.apache.iotdb.pipe.it.dual.tablemodel.manual.AbstractPipeTableModelDualManualIT; import org.apache.iotdb.rpc.TSStatusCode; @@ -49,7 +50,9 @@ import java.io.IOException; import java.util.ArrayList; +import java.util.Arrays; import java.util.HashMap; +import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.concurrent.TimeUnit; @@ -1001,4 +1004,31 @@ public void testNegativeTimestamp() throws Exception { TableModelUtils.assertData("test", "test", -200, 100, receiverEnv, handleFailure); } } + + @Test + public void testHistoryDataWithEmptyField() { + TestUtils.executeNonQueries( + senderEnv, + Arrays.asList( + "CREATE DATABASE iot_table_stream_attr", + "USE iot_table_stream_attr", + "CREATE TABLE table1 (region STRING TAG, device_id STRING TAG, model_id STRING ATTRIBUTE, maintenance STRING ATTRIBUTE COMMENT 'maintenance', temperature FLOAT FIELD COMMENT 'temperature', humidity STRING ATTRIBUTE COMMENT 'humidity', plant_id STRING TAG) COMMENT 'table1'", + "create pipe test with source ('inclusion'='all') with sink('node-urls'='127.0.0.1:6668')", + "select * from table1 order by time", + "INSERT INTO table1(region, plant_id, device_id, model_id, maintenance, time, temperature, humidity) VALUES ('north', null, 'd101', 'red', null, '2025-11-26 13:38:00', 91.0, null), (null, '1003', null, null, 'maint-a', '2025-11-26 13:39:00', null, '36.2'), (null, null, null, 'green', 'maint-b', '2025-11-26 13:40:00', 88.8, '34.9')", + "INSERT INTO table1(region, plant_id, device_id, model_id, maintenance, time, temperature, humidity) VALUES ('south', '1005', 'd105', null, null, '2025-11-26 13:41:00', 87.5, null)", + "INSERT INTO table1(region, plant_id, device_id, model_id, maintenance, time, temperature, humidity) VALUES ('west', '1006', 'd106', 'blue', 'maint-c', '2025-11-26 13:42:00', null, '36.8')"), + BaseEnv.TABLE_SQL_DIALECT); + TestUtils.assertDataEventuallyOnEnv( + receiverEnv, + "select * from iot_table_stream_attr.table1 order by time", + "time,region,device_id,model_id,maintenance,temperature,humidity,plant_id,", + new HashSet<>( + Arrays.asList( + "2025-11-26T13:38:00.000+08:00,north,d101,red,null,null,91.0,null,", + "2025-11-26T13:39:00.000+08:00,null,null,null,maint-a,null,36.2,1003,", + "2025-11-26T13:40:00.000+08:00,null,null,green,maint-b,88.8,34.9,null,", + "2025-11-26T13:41:00.000+08:00,south,d105,null,null,87.5,null,1005,", + "2025-11-26T13:42:00.000+08:00,west,d106,blue,maint-c,null,36.8,1006,"))); + } } From 00730b49b1dc179c9118829d23b36edcadfebd72 Mon Sep 17 00:00:00 2001 From: Caideyipi <87789683+Caideyipi@users.noreply.github.com> Date: Fri, 8 May 2026 12:09:07 +0800 Subject: [PATCH 16/23] Update IoTDBPipeClusterIT.java --- .../it/dual/tablemodel/manual/enhanced/IoTDBPipeClusterIT.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/integration-test/src/test/java/org/apache/iotdb/pipe/it/dual/tablemodel/manual/enhanced/IoTDBPipeClusterIT.java b/integration-test/src/test/java/org/apache/iotdb/pipe/it/dual/tablemodel/manual/enhanced/IoTDBPipeClusterIT.java index cbcfccfcfeb19..0d5396d6815c4 100644 --- a/integration-test/src/test/java/org/apache/iotdb/pipe/it/dual/tablemodel/manual/enhanced/IoTDBPipeClusterIT.java +++ b/integration-test/src/test/java/org/apache/iotdb/pipe/it/dual/tablemodel/manual/enhanced/IoTDBPipeClusterIT.java @@ -1029,6 +1029,7 @@ public void testHistoryDataWithEmptyField() { "2025-11-26T13:39:00.000+08:00,null,null,null,maint-a,null,36.2,1003,", "2025-11-26T13:40:00.000+08:00,null,null,green,maint-b,88.8,34.9,null,", "2025-11-26T13:41:00.000+08:00,south,d105,null,null,87.5,null,1005,", - "2025-11-26T13:42:00.000+08:00,west,d106,blue,maint-c,null,36.8,1006,"))); + "2025-11-26T13:42:00.000+08:00,west,d106,blue,maint-c,null,36.8,1006,")), + (String) null); } } From c7b14a0f9f7d996af0e15d2cf43e7c2dc185f979 Mon Sep 17 00:00:00 2001 From: Caideyipi <87789683+Caideyipi@users.noreply.github.com> Date: Fri, 8 May 2026 12:18:02 +0800 Subject: [PATCH 17/23] fix-it --- .../java/org/apache/iotdb/db/it/utils/TestUtils.java | 2 +- .../manual/enhanced/IoTDBPipeClusterIT.java | 12 +++++++----- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/integration-test/src/test/java/org/apache/iotdb/db/it/utils/TestUtils.java b/integration-test/src/test/java/org/apache/iotdb/db/it/utils/TestUtils.java index 93f14dc572b55..5391286a41da8 100644 --- a/integration-test/src/test/java/org/apache/iotdb/db/it/utils/TestUtils.java +++ b/integration-test/src/test/java/org/apache/iotdb/db/it/utils/TestUtils.java @@ -1719,7 +1719,7 @@ public static void assertDataEventuallyOnEnv( final String expectedHeader, final Set expectedResSet, final String dataBaseName) { - assertDataEventuallyOnEnv(env, sql, expectedHeader, expectedResSet, 600, dataBaseName, null); + assertDataEventuallyOnEnv(env, sql, expectedHeader, expectedResSet, 10, dataBaseName, null); } public static void assertDataEventuallyOnEnv( diff --git a/integration-test/src/test/java/org/apache/iotdb/pipe/it/dual/tablemodel/manual/enhanced/IoTDBPipeClusterIT.java b/integration-test/src/test/java/org/apache/iotdb/pipe/it/dual/tablemodel/manual/enhanced/IoTDBPipeClusterIT.java index 0d5396d6815c4..e030293e53cf1 100644 --- a/integration-test/src/test/java/org/apache/iotdb/pipe/it/dual/tablemodel/manual/enhanced/IoTDBPipeClusterIT.java +++ b/integration-test/src/test/java/org/apache/iotdb/pipe/it/dual/tablemodel/manual/enhanced/IoTDBPipeClusterIT.java @@ -76,6 +76,8 @@ public void setUp() { .getConfig() .getCommonConfig() .setAutoCreateSchemaEnabled(true) + .setDataReplicationFactor(1) + .setSchemaReplicationFactor(1) .setConfigNodeConsensusProtocolClass(ConsensusFactory.RATIS_CONSENSUS) .setSchemaRegionConsensusProtocolClass(ConsensusFactory.RATIS_CONSENSUS) .setDataRegionConsensusProtocolClass(ConsensusFactory.IOT_CONSENSUS) @@ -87,8 +89,8 @@ public void setUp() { .getConfig() .getCommonConfig() .setAutoCreateSchemaEnabled(true) - .setDataReplicationFactor(2) - .setSchemaReplicationFactor(3) + .setDataReplicationFactor(1) + .setSchemaReplicationFactor(1) .setConfigNodeConsensusProtocolClass(ConsensusFactory.RATIS_CONSENSUS) .setSchemaRegionConsensusProtocolClass(ConsensusFactory.RATIS_CONSENSUS) .setDataRegionConsensusProtocolClass(ConsensusFactory.IOT_CONSENSUS) @@ -96,8 +98,8 @@ public void setUp() { .setPipeMemoryManagementEnabled(false) .setIsPipeEnableMemoryCheck(false); - senderEnv.initClusterEnvironment(3, 3, 180); - receiverEnv.initClusterEnvironment(3, 3, 180); + senderEnv.initClusterEnvironment(1, 1, 180); + receiverEnv.initClusterEnvironment(1, 1, 180); } @Test @@ -1013,7 +1015,7 @@ public void testHistoryDataWithEmptyField() { "CREATE DATABASE iot_table_stream_attr", "USE iot_table_stream_attr", "CREATE TABLE table1 (region STRING TAG, device_id STRING TAG, model_id STRING ATTRIBUTE, maintenance STRING ATTRIBUTE COMMENT 'maintenance', temperature FLOAT FIELD COMMENT 'temperature', humidity STRING ATTRIBUTE COMMENT 'humidity', plant_id STRING TAG) COMMENT 'table1'", - "create pipe test with source ('inclusion'='all') with sink('node-urls'='127.0.0.1:6668')", + String.format("create pipe test with source ('inclusion'='all') with sink('node-urls'='%s')", receiverEnv.getDataNodeWrapper(0).getIpAndPortString()), "select * from table1 order by time", "INSERT INTO table1(region, plant_id, device_id, model_id, maintenance, time, temperature, humidity) VALUES ('north', null, 'd101', 'red', null, '2025-11-26 13:38:00', 91.0, null), (null, '1003', null, null, 'maint-a', '2025-11-26 13:39:00', null, '36.2'), (null, null, null, 'green', 'maint-b', '2025-11-26 13:40:00', 88.8, '34.9')", "INSERT INTO table1(region, plant_id, device_id, model_id, maintenance, time, temperature, humidity) VALUES ('south', '1005', 'd105', null, null, '2025-11-26 13:41:00', 87.5, null)", From 71e91850ab782727dbdfe3d608c8a79c15f64787 Mon Sep 17 00:00:00 2001 From: Caideyipi <87789683+Caideyipi@users.noreply.github.com> Date: Fri, 8 May 2026 12:19:33 +0800 Subject: [PATCH 18/23] no --- .../dual/tablemodel/manual/enhanced/IoTDBPipeClusterIT.java | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/integration-test/src/test/java/org/apache/iotdb/pipe/it/dual/tablemodel/manual/enhanced/IoTDBPipeClusterIT.java b/integration-test/src/test/java/org/apache/iotdb/pipe/it/dual/tablemodel/manual/enhanced/IoTDBPipeClusterIT.java index e030293e53cf1..08383e48c2f53 100644 --- a/integration-test/src/test/java/org/apache/iotdb/pipe/it/dual/tablemodel/manual/enhanced/IoTDBPipeClusterIT.java +++ b/integration-test/src/test/java/org/apache/iotdb/pipe/it/dual/tablemodel/manual/enhanced/IoTDBPipeClusterIT.java @@ -1015,7 +1015,9 @@ public void testHistoryDataWithEmptyField() { "CREATE DATABASE iot_table_stream_attr", "USE iot_table_stream_attr", "CREATE TABLE table1 (region STRING TAG, device_id STRING TAG, model_id STRING ATTRIBUTE, maintenance STRING ATTRIBUTE COMMENT 'maintenance', temperature FLOAT FIELD COMMENT 'temperature', humidity STRING ATTRIBUTE COMMENT 'humidity', plant_id STRING TAG) COMMENT 'table1'", - String.format("create pipe test with source ('inclusion'='all') with sink('node-urls'='%s')", receiverEnv.getDataNodeWrapper(0).getIpAndPortString()), + String.format( + "create pipe test with source ('inclusion'='all') with sink('node-urls'='%s')", + receiverEnv.getDataNodeWrapper(0).getIpAndPortString()), "select * from table1 order by time", "INSERT INTO table1(region, plant_id, device_id, model_id, maintenance, time, temperature, humidity) VALUES ('north', null, 'd101', 'red', null, '2025-11-26 13:38:00', 91.0, null), (null, '1003', null, null, 'maint-a', '2025-11-26 13:39:00', null, '36.2'), (null, null, null, 'green', 'maint-b', '2025-11-26 13:40:00', 88.8, '34.9')", "INSERT INTO table1(region, plant_id, device_id, model_id, maintenance, time, temperature, humidity) VALUES ('south', '1005', 'd105', null, null, '2025-11-26 13:41:00', 87.5, null)", From a429de63cc5e09c37077433deafbccd56409889d Mon Sep 17 00:00:00 2001 From: Caideyipi <87789683+Caideyipi@users.noreply.github.com> Date: Fri, 8 May 2026 16:08:03 +0800 Subject: [PATCH 19/23] Fix --- .../manual/enhanced/IoTDBPipeClusterIT.java | 10 +- .../plan/analyze/load/LoadTsFileAnalyzer.java | 21 ++- .../load/splitter/TsFileSplitter.java | 22 ++- .../load/splitter/TsFileSplitterTest.java | 157 ++++++++++++++++++ 4 files changed, 200 insertions(+), 10 deletions(-) create mode 100644 iotdb-core/datanode/src/test/java/org/apache/iotdb/db/storageengine/load/splitter/TsFileSplitterTest.java diff --git a/integration-test/src/test/java/org/apache/iotdb/pipe/it/dual/tablemodel/manual/enhanced/IoTDBPipeClusterIT.java b/integration-test/src/test/java/org/apache/iotdb/pipe/it/dual/tablemodel/manual/enhanced/IoTDBPipeClusterIT.java index 08383e48c2f53..285966a5637cd 100644 --- a/integration-test/src/test/java/org/apache/iotdb/pipe/it/dual/tablemodel/manual/enhanced/IoTDBPipeClusterIT.java +++ b/integration-test/src/test/java/org/apache/iotdb/pipe/it/dual/tablemodel/manual/enhanced/IoTDBPipeClusterIT.java @@ -1029,11 +1029,11 @@ public void testHistoryDataWithEmptyField() { "time,region,device_id,model_id,maintenance,temperature,humidity,plant_id,", new HashSet<>( Arrays.asList( - "2025-11-26T13:38:00.000+08:00,north,d101,red,null,null,91.0,null,", - "2025-11-26T13:39:00.000+08:00,null,null,null,maint-a,null,36.2,1003,", - "2025-11-26T13:40:00.000+08:00,null,null,green,maint-b,88.8,34.9,null,", - "2025-11-26T13:41:00.000+08:00,south,d105,null,null,87.5,null,1005,", - "2025-11-26T13:42:00.000+08:00,west,d106,blue,maint-c,null,36.8,1006,")), + "2025-11-26T13:38:00.000Z,north,d101,red,null,91.0,null,null,", + "2025-11-26T13:39:00.000Z,null,null,null,maint-a,null,36.2,1003,", + "2025-11-26T13:40:00.000Z,null,null,green,maint-b,88.8,34.9,null,", + "2025-11-26T13:41:00.000Z,south,d105,null,null,87.5,null,1005,", + "2025-11-26T13:42:00.000Z,west,d106,blue,maint-c,null,36.8,1006,")), (String) null); } } diff --git a/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/queryengine/plan/analyze/load/LoadTsFileAnalyzer.java b/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/queryengine/plan/analyze/load/LoadTsFileAnalyzer.java index 61f26bc99352c..3290cb13d1d16 100644 --- a/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/queryengine/plan/analyze/load/LoadTsFileAnalyzer.java +++ b/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/queryengine/plan/analyze/load/LoadTsFileAnalyzer.java @@ -606,8 +606,12 @@ private long updateTableTsFileResourceAndVerifySchema( IoTDBDescriptor.getInstance().getConfig().isCacheLastValuesForLoad()); getOrCreateTableSchemaCache().setCurrentTimeIndex(tsFileResource.getTimeIndex()); - for (final IDeviceID deviceId : device2TimeseriesMetadata.keySet()) { - devicesHandledByTimeseriesMetadataIterator.add(getDeviceKey(deviceId)); + for (final Map.Entry> entry : + device2TimeseriesMetadata.entrySet()) { + final IDeviceID deviceId = entry.getKey(); + if (canUpdateTsFileResource(entry.getValue())) { + devicesHandledByTimeseriesMetadataIterator.add(getDeviceKey(deviceId)); + } if (!getOrCreateTableSchemaCache().isDeviceDeletedByMods(deviceId)) { getOrCreateTableSchemaCache().autoCreateAndVerify(deviceId); } @@ -694,6 +698,17 @@ private static List getDeviceKey(final IDeviceID deviceId) { return Arrays.asList(deviceId.getSegments()); } + private static boolean canUpdateTsFileResource( + final List timeseriesMetadata) { + return Objects.nonNull(timeseriesMetadata) + && timeseriesMetadata.stream() + .anyMatch( + metadata -> + Objects.nonNull(metadata) + && Objects.nonNull(metadata.getStatistics()) + && metadata.getStatistics().getCount() > 0); + } + private TsFileResource constructTsFileResource( final TsFileSequenceReader reader, final File tsFile) throws IOException { final TsFileResource tsFileResource = new TsFileResource(tsFile); @@ -754,7 +769,7 @@ private static long getTableWritePointCount( writePointCount += valueChunkMetadata.getStatistics().getCount(); } } - return hasValueChunkMetadata + return hasValueChunkMetadata && writePointCount > 0 || Objects.isNull(alignedChunkMetadata.getTimeChunkMetadata()) || Objects.isNull(alignedChunkMetadata.getTimeChunkMetadata().getStatistics()) ? writePointCount diff --git a/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/storageengine/load/splitter/TsFileSplitter.java b/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/storageengine/load/splitter/TsFileSplitter.java index bbfd8f1bb30c4..c8c179db1110f 100644 --- a/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/storageengine/load/splitter/TsFileSplitter.java +++ b/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/storageengine/load/splitter/TsFileSplitter.java @@ -127,6 +127,7 @@ public void splitTsFileByDataPartition() processValueChunk(reader, marker); break; case MetaMarker.CHUNK_GROUP_HEADER: + consumeAllPendingAlignedChunkData(reader.position()); ChunkGroupHeader chunkGroupHeader = reader.readChunkGroupHeader(); curDevice = chunkGroupHeader.getDeviceID(); pageIndex2ChunkDataList = new ArrayList<>(); @@ -143,7 +144,7 @@ public void splitTsFileByDataPartition() } } - consumeAllAlignedChunkData(reader.position(), pageIndex2ChunkData); + consumeAllPendingAlignedChunkData(reader.position()); handleModification(deletions); } } @@ -464,7 +465,24 @@ private void consumeAllAlignedChunkData( offset, chunkData)); } } - this.pageIndex2ChunkData = new HashMap<>(); + pageIndex2ChunkData.clear(); + if (this.pageIndex2ChunkData == pageIndex2ChunkData) { + this.pageIndex2ChunkData = new HashMap<>(); + } + } + + private void consumeAllPendingAlignedChunkData(final long offset) throws LoadFileException { + consumeAllAlignedChunkData(offset, pageIndex2ChunkData); + for (final Map> pendingPageIndex2ChunkData : + pageIndex2ChunkDataList) { + consumeAllAlignedChunkData(offset, pendingPageIndex2ChunkData); + } + pageIndex2ChunkDataList.clear(); + if (pageIndex2TimesList != null) { + pageIndex2TimesList.clear(); + } + isTimeChunkNeedDecodeList.clear(); + valueColumn2TimeChunkIndex.clear(); } private void consumeChunkData(String measurement, long offset, ChunkData chunkData) diff --git a/iotdb-core/datanode/src/test/java/org/apache/iotdb/db/storageengine/load/splitter/TsFileSplitterTest.java b/iotdb-core/datanode/src/test/java/org/apache/iotdb/db/storageengine/load/splitter/TsFileSplitterTest.java new file mode 100644 index 0000000000000..6610880567e90 --- /dev/null +++ b/iotdb-core/datanode/src/test/java/org/apache/iotdb/db/storageengine/load/splitter/TsFileSplitterTest.java @@ -0,0 +1,157 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.iotdb.db.storageengine.load.splitter; + +import org.apache.tsfile.enums.ColumnCategory; +import org.apache.tsfile.enums.TSDataType; +import org.apache.tsfile.file.metadata.AbstractAlignedChunkMetadata; +import org.apache.tsfile.file.metadata.IDeviceID; +import org.apache.tsfile.file.metadata.StringArrayDeviceID; +import org.apache.tsfile.file.metadata.TableSchema; +import org.apache.tsfile.read.TsFileSequenceReader; +import org.apache.tsfile.write.chunk.AlignedChunkWriterImpl; +import org.apache.tsfile.write.schema.IMeasurementSchema; +import org.apache.tsfile.write.schema.MeasurementSchema; +import org.apache.tsfile.write.schema.Schema; +import org.apache.tsfile.write.writer.TsFileIOWriter; +import org.junit.Assert; +import org.junit.Test; + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.DataOutputStream; +import java.io.File; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.List; +import java.util.Objects; + +public class TsFileSplitterTest { + + @Test + public void testSplitTableTimeOnlyAlignedChunk() throws Exception { + final File sourceTsFile = new File("split-table-time-only-source.tsfile"); + final File targetTsFile = new File("split-table-time-only-target.tsfile"); + final IDeviceID deviceID = new StringArrayDeviceID("table1", "tagA"); + + try { + writeTableTsFileWithTimeOnlyChunk(sourceTsFile, deviceID); + + final List emittedChunkDataList = new ArrayList<>(); + final TsFileSplitter splitter = + new TsFileSplitter( + sourceTsFile, + tsFileData -> { + if (tsFileData instanceof ChunkData) { + emittedChunkDataList.add((ChunkData) tsFileData); + } + return true; + }); + splitter.splitTsFileByDataPartition(); + + if (targetTsFile.exists()) { + Assert.assertTrue(targetTsFile.delete()); + } + try (final TsFileIOWriter writer = new TsFileIOWriter(targetTsFile)) { + writer.setSchema(createSchema()); + IDeviceID currentDeviceID = null; + for (final ChunkData chunkData : emittedChunkDataList) { + if (!Objects.equals(currentDeviceID, chunkData.getDevice())) { + if (Objects.nonNull(currentDeviceID)) { + writer.endChunkGroup(); + } + writer.startChunkGroup(chunkData.getDevice()); + currentDeviceID = chunkData.getDevice(); + } + + writeSerializedChunkDataToWriter(chunkData, writer); + } + if (Objects.nonNull(currentDeviceID)) { + writer.endChunkGroup(); + } + writer.endFile(); + } + + Assert.assertEquals(1, emittedChunkDataList.size()); + try (final TsFileSequenceReader reader = + new TsFileSequenceReader(targetTsFile.getAbsolutePath())) { + final List chunkMetadataList = + reader.getAlignedChunkMetadata(deviceID, false); + Assert.assertEquals(1, chunkMetadataList.size()); + Assert.assertEquals( + 2, chunkMetadataList.get(0).getTimeChunkMetadata().getStatistics().getCount()); + Assert.assertTrue(chunkMetadataList.get(0).getValueChunkMetadataList().isEmpty()); + } + } finally { + if (sourceTsFile.exists()) { + Assert.assertTrue(sourceTsFile.delete()); + } + if (targetTsFile.exists()) { + Assert.assertTrue(targetTsFile.delete()); + } + } + } + + private void writeTableTsFileWithTimeOnlyChunk(final File tsFile, final IDeviceID deviceID) + throws Exception { + if (tsFile.exists()) { + Assert.assertTrue(tsFile.delete()); + } + + try (final TsFileIOWriter writer = new TsFileIOWriter(tsFile)) { + writer.setSchema(createSchema()); + writer.startChunkGroup(deviceID); + + final AlignedChunkWriterImpl chunkWriter = + new AlignedChunkWriterImpl(Collections.emptyList()); + chunkWriter.write(100); + chunkWriter.write(101); + chunkWriter.writeToFileWriter(writer); + + writer.endChunkGroup(); + writer.endFile(); + } + } + + private Schema createSchema() { + final List tableSchemaList = + Arrays.asList( + new MeasurementSchema("tag1", TSDataType.STRING), + new MeasurementSchema("s1", TSDataType.INT64)); + final List columnCategoryList = + Arrays.asList(ColumnCategory.TAG, ColumnCategory.FIELD); + + final Schema schema = new Schema(); + schema.registerTableSchema(new TableSchema("table1", tableSchemaList, columnCategoryList)); + return schema; + } + + private void writeSerializedChunkDataToWriter( + final ChunkData chunkData, final TsFileIOWriter writer) throws Exception { + final ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream(); + try (final DataOutputStream dataOutputStream = new DataOutputStream(byteArrayOutputStream)) { + chunkData.serialize(dataOutputStream); + } + ((ChunkData) + TsFileData.deserialize(new ByteArrayInputStream(byteArrayOutputStream.toByteArray()))) + .writeToFileWriter(writer); + } +} From c141d33c37d46305cfe9bba85d77f43c614b6735 Mon Sep 17 00:00:00 2001 From: Caideyipi <87789683+Caideyipi@users.noreply.github.com> Date: Fri, 8 May 2026 16:16:53 +0800 Subject: [PATCH 20/23] fix --- .../java/org/apache/iotdb/db/it/utils/TestUtils.java | 2 +- .../tablemodel/manual/enhanced/IoTDBPipeClusterIT.java | 10 ++++------ 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/integration-test/src/test/java/org/apache/iotdb/db/it/utils/TestUtils.java b/integration-test/src/test/java/org/apache/iotdb/db/it/utils/TestUtils.java index 5391286a41da8..93f14dc572b55 100644 --- a/integration-test/src/test/java/org/apache/iotdb/db/it/utils/TestUtils.java +++ b/integration-test/src/test/java/org/apache/iotdb/db/it/utils/TestUtils.java @@ -1719,7 +1719,7 @@ public static void assertDataEventuallyOnEnv( final String expectedHeader, final Set expectedResSet, final String dataBaseName) { - assertDataEventuallyOnEnv(env, sql, expectedHeader, expectedResSet, 10, dataBaseName, null); + assertDataEventuallyOnEnv(env, sql, expectedHeader, expectedResSet, 600, dataBaseName, null); } public static void assertDataEventuallyOnEnv( diff --git a/integration-test/src/test/java/org/apache/iotdb/pipe/it/dual/tablemodel/manual/enhanced/IoTDBPipeClusterIT.java b/integration-test/src/test/java/org/apache/iotdb/pipe/it/dual/tablemodel/manual/enhanced/IoTDBPipeClusterIT.java index 285966a5637cd..ad283d4a02ccd 100644 --- a/integration-test/src/test/java/org/apache/iotdb/pipe/it/dual/tablemodel/manual/enhanced/IoTDBPipeClusterIT.java +++ b/integration-test/src/test/java/org/apache/iotdb/pipe/it/dual/tablemodel/manual/enhanced/IoTDBPipeClusterIT.java @@ -76,8 +76,6 @@ public void setUp() { .getConfig() .getCommonConfig() .setAutoCreateSchemaEnabled(true) - .setDataReplicationFactor(1) - .setSchemaReplicationFactor(1) .setConfigNodeConsensusProtocolClass(ConsensusFactory.RATIS_CONSENSUS) .setSchemaRegionConsensusProtocolClass(ConsensusFactory.RATIS_CONSENSUS) .setDataRegionConsensusProtocolClass(ConsensusFactory.IOT_CONSENSUS) @@ -89,8 +87,8 @@ public void setUp() { .getConfig() .getCommonConfig() .setAutoCreateSchemaEnabled(true) - .setDataReplicationFactor(1) - .setSchemaReplicationFactor(1) + .setDataReplicationFactor(2) + .setSchemaReplicationFactor(3) .setConfigNodeConsensusProtocolClass(ConsensusFactory.RATIS_CONSENSUS) .setSchemaRegionConsensusProtocolClass(ConsensusFactory.RATIS_CONSENSUS) .setDataRegionConsensusProtocolClass(ConsensusFactory.IOT_CONSENSUS) @@ -98,8 +96,8 @@ public void setUp() { .setPipeMemoryManagementEnabled(false) .setIsPipeEnableMemoryCheck(false); - senderEnv.initClusterEnvironment(1, 1, 180); - receiverEnv.initClusterEnvironment(1, 1, 180); + senderEnv.initClusterEnvironment(3, 3, 180); + receiverEnv.initClusterEnvironment(3, 3, 180); } @Test From 28dbcbc8989580f2380671498530b62d8bbab966 Mon Sep 17 00:00:00 2001 From: Caideyipi <87789683+Caideyipi@users.noreply.github.com> Date: Fri, 8 May 2026 17:56:10 +0800 Subject: [PATCH 21/23] Update TsFileSplitter.java --- .../load/splitter/TsFileSplitter.java | 114 ++++++++++++++++-- 1 file changed, 106 insertions(+), 8 deletions(-) diff --git a/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/storageengine/load/splitter/TsFileSplitter.java b/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/storageengine/load/splitter/TsFileSplitter.java index c8c179db1110f..6a92eb2898188 100644 --- a/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/storageengine/load/splitter/TsFileSplitter.java +++ b/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/storageengine/load/splitter/TsFileSplitter.java @@ -37,10 +37,12 @@ import org.apache.tsfile.file.header.ChunkGroupHeader; import org.apache.tsfile.file.header.ChunkHeader; import org.apache.tsfile.file.header.PageHeader; +import org.apache.tsfile.file.metadata.AbstractAlignedChunkMetadata; import org.apache.tsfile.file.metadata.IChunkMetadata; import org.apache.tsfile.file.metadata.IDeviceID; import org.apache.tsfile.file.metadata.TimeseriesMetadata; import org.apache.tsfile.file.metadata.enums.TSEncoding; +import org.apache.tsfile.read.TsFileDeviceIterator; import org.apache.tsfile.read.TsFileSequenceReader; import org.apache.tsfile.read.common.BatchData; import org.apache.tsfile.read.reader.page.PageReader; @@ -69,6 +71,8 @@ public class TsFileSplitter { private final File tsFile; private final TsFileDataConsumer consumer; private Map offset2ChunkMetadata = new HashMap<>(); + private Map valueChunkOffset2TimeChunkOffset = new HashMap<>(); + private Map timeChunkOffset2RemainingValueChunkCount = new HashMap<>(); private List deletions = new ArrayList<>(); private Map> pageIndex2ChunkData = new HashMap<>(); private Map pageIndex2Times = new HashMap<>(); @@ -86,6 +90,8 @@ public class TsFileSplitter { private List>> pageIndex2ChunkDataList = new ArrayList<>(); private List> pageIndex2TimesList = null; private List isTimeChunkNeedDecodeList = new ArrayList<>(); + private Map timeChunkOffset2ContextIndex = new HashMap<>(); + private Long currentTimeChunkOffset = null; public TsFileSplitter(File tsFile, TsFileDataConsumer consumer) { this.tsFile = tsFile; @@ -119,7 +125,9 @@ public void splitTsFileByDataPartition() case MetaMarker.ONLY_ONE_PAGE_TIME_CHUNK_HEADER: processTimeChunkOrNonAlignedChunk(reader, marker); if (isAligned) { + final Long timeChunkOffset = currentTimeChunkOffset; storeTimeChunkContext(); + consumeAlignedChunkDataIfComplete(reader.position(), timeChunkOffset); } break; case MetaMarker.VALUE_CHUNK_HEADER: @@ -135,6 +143,8 @@ public void splitTsFileByDataPartition() isTimeChunkNeedDecodeList = new ArrayList<>(); valueColumn2TimeChunkIndex = new HashMap<>(); timeChunkIndexOfCurrentValueColumn = 0; + timeChunkOffset2ContextIndex = new HashMap<>(); + currentTimeChunkOffset = null; break; case MetaMarker.OPERATION_INDEX_RANGE: reader.readPlanIndex(); @@ -152,8 +162,8 @@ public void splitTsFileByDataPartition() private void processTimeChunkOrNonAlignedChunk(TsFileSequenceReader reader, byte marker) throws IOException, LoadFileException { long chunkOffset = reader.position(); + currentTimeChunkOffset = null; timeChunkIndexOfCurrentValueColumn = pageIndex2TimesList.size(); - consumeAllAlignedChunkData(chunkOffset, pageIndex2ChunkData); ChunkHeader header = reader.readChunkHeader(marker); String measurementId = header.getMeasurementID(); @@ -174,6 +184,10 @@ private void processTimeChunkOrNonAlignedChunk(TsFileSequenceReader reader, byte reader.readChunk(-1, header.getDataSize()); return; } + if (isAligned) { + currentTimeChunkOffset = chunkMetadata.getOffsetOfChunkHeader(); + timeChunkOffset2ContextIndex.put(currentTimeChunkOffset, pageIndex2TimesList.size()); + } TTimePartitionSlot timePartitionSlot = TimePartitionUtils.getTimePartitionSlot(chunkMetadata.getStartTime()); ChunkData chunkData = @@ -309,9 +323,11 @@ private void processValueChunk(TsFileSequenceReader reader, byte marker) reader.readChunk(-1, header.getDataSize()); return; } - switchToTimeChunkContextOfCurrentMeasurement(reader, header.getMeasurementID()); + switchToTimeChunkContextOfCurrentValueChunk( + reader, header.getMeasurementID(), chunkMetadata.getOffsetOfChunkHeader()); if (header.getDataSize() == 0) { handleEmptyValueChunk(header, pageIndex2ChunkData, chunkMetadata, isTimeChunkNeedDecode); + consumeValueChunkAndAlignedChunkDataIfComplete(reader.position(), chunkMetadata); return; } @@ -319,6 +335,7 @@ private void processValueChunk(TsFileSequenceReader reader, byte marker) AlignedChunkData alignedChunkData = pageIndex2ChunkData.get(1).get(0); alignedChunkData.addValueChunk(header); alignedChunkData.writeEntireChunk(reader.readChunk(-1, header.getDataSize()), chunkMetadata); + consumeValueChunkAndAlignedChunkDataIfComplete(reader.position(), chunkMetadata); return; } @@ -354,6 +371,7 @@ private void processValueChunk(TsFileSequenceReader reader, byte marker) pageIndex += 1; dataSize -= pageDataSize; } + consumeValueChunkAndAlignedChunkDataIfComplete(reader.position(), chunkMetadata); } private void storeTimeChunkContext() { @@ -365,14 +383,30 @@ private void storeTimeChunkContext() { isTimeChunkNeedDecode = true; } - private void switchToTimeChunkContextOfCurrentMeasurement( - TsFileSequenceReader reader, String measurement) throws IOException, LoadFileException { - int index = valueColumn2TimeChunkIndex.getOrDefault(measurement, 0); - if (index != timeChunkIndexOfCurrentValueColumn) { - consumeAllAlignedChunkData(reader.position(), pageIndex2ChunkData); + private void switchToTimeChunkContextOfCurrentValueChunk( + TsFileSequenceReader reader, String measurement, long valueChunkOffset) throws IOException { + final Long timeChunkOffset = valueChunkOffset2TimeChunkOffset.get(valueChunkOffset); + int index; + if (timeChunkOffset == null) { + index = valueColumn2TimeChunkIndex.getOrDefault(measurement, 0); + valueColumn2TimeChunkIndex.put(measurement, index + 1); + } else { + final Integer contextIndex = timeChunkOffset2ContextIndex.get(timeChunkOffset); + if (contextIndex == null) { + throw new TsFileRuntimeException( + String.format( + "Cannot find aligned time chunk context for value chunk %s at offset %d in TsFile %s, reader position: %d.", + measurement, valueChunkOffset, tsFile.getPath(), reader.position())); + } + index = contextIndex; } timeChunkIndexOfCurrentValueColumn = index; - valueColumn2TimeChunkIndex.put(measurement, index + 1); + if (index < 0 || index >= pageIndex2ChunkDataList.size()) { + throw new TsFileRuntimeException( + String.format( + "Aligned time chunk context index %d is out of range for value chunk %s at offset %d in TsFile %s, reader position: %d.", + index, measurement, valueChunkOffset, tsFile.getPath(), reader.position())); + } pageIndex2Times = pageIndex2TimesList.get(index); pageIndex2ChunkData = pageIndex2ChunkDataList.get(index); @@ -424,6 +458,35 @@ private void getChunkMetadata( } } } + TsFileDeviceIterator deviceIterator = reader.getAllDevicesIteratorWithIsAligned(); + while (deviceIterator.hasNext()) { + Pair deviceInfo = deviceIterator.next(); + if (!Boolean.TRUE.equals(deviceInfo.getRight())) { + continue; + } + for (AbstractAlignedChunkMetadata alignedChunkMetadata : + reader.getAlignedChunkMetadata(deviceInfo.getLeft(), false)) { + if (alignedChunkMetadata == null || alignedChunkMetadata.getTimeChunkMetadata() == null) { + continue; + } + IChunkMetadata timeChunkMetadata = alignedChunkMetadata.getTimeChunkMetadata(); + long timeChunkOffset = timeChunkMetadata.getOffsetOfChunkHeader(); + offset2ChunkMetadata.put(timeChunkOffset, timeChunkMetadata); + + int valueChunkCount = 0; + for (IChunkMetadata valueChunkMetadata : alignedChunkMetadata.getValueChunkMetadataList()) { + if (valueChunkMetadata == null) { + continue; + } + long valueChunkOffset = valueChunkMetadata.getOffsetOfChunkHeader(); + offset2ChunkMetadata.put(valueChunkOffset, valueChunkMetadata); + valueChunkOffset2TimeChunkOffset.put(valueChunkOffset, timeChunkOffset); + valueChunkCount += 1; + } + timeChunkOffset2RemainingValueChunkCount.merge( + timeChunkOffset, valueChunkCount, Integer::sum); + } + } } private void handleModification(List deletions) throws LoadFileException { @@ -483,6 +546,41 @@ private void consumeAllPendingAlignedChunkData(final long offset) throws LoadFil } isTimeChunkNeedDecodeList.clear(); valueColumn2TimeChunkIndex.clear(); + timeChunkOffset2ContextIndex.clear(); + currentTimeChunkOffset = null; + } + + private void consumeValueChunkAndAlignedChunkDataIfComplete( + final long offset, final IChunkMetadata valueChunkMetadata) throws LoadFileException { + final Long timeChunkOffset = + valueChunkOffset2TimeChunkOffset.get(valueChunkMetadata.getOffsetOfChunkHeader()); + if (timeChunkOffset == null) { + return; + } + final Integer remainingValueChunkCount = + timeChunkOffset2RemainingValueChunkCount.get(timeChunkOffset); + if (remainingValueChunkCount == null) { + return; + } + if (remainingValueChunkCount > 0) { + timeChunkOffset2RemainingValueChunkCount.put(timeChunkOffset, remainingValueChunkCount - 1); + } + consumeAlignedChunkDataIfComplete(offset, timeChunkOffset); + } + + private void consumeAlignedChunkDataIfComplete(final long offset, final Long timeChunkOffset) + throws LoadFileException { + if (timeChunkOffset == null + || timeChunkOffset2RemainingValueChunkCount.getOrDefault(timeChunkOffset, 0) > 0) { + return; + } + final Integer contextIndex = timeChunkOffset2ContextIndex.remove(timeChunkOffset); + if (contextIndex == null + || contextIndex < 0 + || contextIndex >= pageIndex2ChunkDataList.size()) { + return; + } + consumeAllAlignedChunkData(offset, pageIndex2ChunkDataList.get(contextIndex)); } private void consumeChunkData(String measurement, long offset, ChunkData chunkData) From f27bd782486994a523d1234eba9ca90cefdd8640 Mon Sep 17 00:00:00 2001 From: Caideyipi <87789683+Caideyipi@users.noreply.github.com> Date: Fri, 8 May 2026 18:07:26 +0800 Subject: [PATCH 22/23] Update TsFileSplitter.java --- .../load/splitter/TsFileSplitter.java | 61 +++++++++++++++++++ 1 file changed, 61 insertions(+) diff --git a/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/storageengine/load/splitter/TsFileSplitter.java b/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/storageengine/load/splitter/TsFileSplitter.java index 6a92eb2898188..d71f32b36f988 100644 --- a/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/storageengine/load/splitter/TsFileSplitter.java +++ b/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/storageengine/load/splitter/TsFileSplitter.java @@ -458,12 +458,14 @@ private void getChunkMetadata( } } } + Set alignedDevices = new HashSet<>(); TsFileDeviceIterator deviceIterator = reader.getAllDevicesIteratorWithIsAligned(); while (deviceIterator.hasNext()) { Pair deviceInfo = deviceIterator.next(); if (!Boolean.TRUE.equals(deviceInfo.getRight())) { continue; } + alignedDevices.add(deviceInfo.getLeft()); for (AbstractAlignedChunkMetadata alignedChunkMetadata : reader.getAlignedChunkMetadata(deviceInfo.getLeft(), false)) { if (alignedChunkMetadata == null || alignedChunkMetadata.getTimeChunkMetadata() == null) { @@ -487,6 +489,65 @@ private void getChunkMetadata( timeChunkOffset, valueChunkCount, Integer::sum); } } + mapUnmatchedValueChunksToTimeChunks(device2Metadata, alignedDevices); + } + + private void mapUnmatchedValueChunksToTimeChunks( + Map> device2Metadata, Set alignedDevices) { + for (Map.Entry> entry : device2Metadata.entrySet()) { + if (!alignedDevices.contains(entry.getKey())) { + continue; + } + List timeChunkMetadataList = new ArrayList<>(); + List valueChunkMetadataList = new ArrayList<>(); + for (TimeseriesMetadata timeseriesMetadata : entry.getValue()) { + for (IChunkMetadata chunkMetadata : timeseriesMetadata.getChunkMetadataList()) { + if (chunkMetadata.getMeasurementUid().isEmpty()) { + timeChunkMetadataList.add(chunkMetadata); + } else { + valueChunkMetadataList.add(chunkMetadata); + } + } + } + for (IChunkMetadata timeChunkMetadata : timeChunkMetadataList) { + timeChunkOffset2RemainingValueChunkCount.putIfAbsent( + timeChunkMetadata.getOffsetOfChunkHeader(), 0); + } + for (IChunkMetadata valueChunkMetadata : valueChunkMetadataList) { + if (valueChunkOffset2TimeChunkOffset.containsKey( + valueChunkMetadata.getOffsetOfChunkHeader())) { + continue; + } + IChunkMetadata timeChunkMetadata = + findCorrespondingTimeChunk(timeChunkMetadataList, valueChunkMetadata); + if (timeChunkMetadata == null) { + continue; + } + long timeChunkOffset = timeChunkMetadata.getOffsetOfChunkHeader(); + valueChunkOffset2TimeChunkOffset.put( + valueChunkMetadata.getOffsetOfChunkHeader(), timeChunkOffset); + timeChunkOffset2RemainingValueChunkCount.merge(timeChunkOffset, 1, Integer::sum); + } + } + } + + private IChunkMetadata findCorrespondingTimeChunk( + List timeChunkMetadataList, IChunkMetadata valueChunkMetadata) { + IChunkMetadata matchedTimeChunkMetadata = null; + for (IChunkMetadata timeChunkMetadata : timeChunkMetadataList) { + if (timeChunkMetadata.getStartTime() != valueChunkMetadata.getStartTime() + || timeChunkMetadata.getEndTime() != valueChunkMetadata.getEndTime()) { + continue; + } + if (matchedTimeChunkMetadata == null + || (timeChunkMetadata.getOffsetOfChunkHeader() + < valueChunkMetadata.getOffsetOfChunkHeader() + && timeChunkMetadata.getOffsetOfChunkHeader() + > matchedTimeChunkMetadata.getOffsetOfChunkHeader())) { + matchedTimeChunkMetadata = timeChunkMetadata; + } + } + return matchedTimeChunkMetadata; } private void handleModification(List deletions) throws LoadFileException { From ab81bcf270c13af1dc2455ec75c9edb26bf79346 Mon Sep 17 00:00:00 2001 From: Caideyipi <87789683+Caideyipi@users.noreply.github.com> Date: Fri, 8 May 2026 18:13:00 +0800 Subject: [PATCH 23/23] Update TsFileSplitter.java --- .../load/splitter/TsFileSplitter.java | 23 ++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/storageengine/load/splitter/TsFileSplitter.java b/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/storageengine/load/splitter/TsFileSplitter.java index d71f32b36f988..78d8d0344c1ec 100644 --- a/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/storageengine/load/splitter/TsFileSplitter.java +++ b/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/storageengine/load/splitter/TsFileSplitter.java @@ -332,7 +332,17 @@ private void processValueChunk(TsFileSequenceReader reader, byte marker) } if (!isTimeChunkNeedDecode) { - AlignedChunkData alignedChunkData = pageIndex2ChunkData.get(1).get(0); + List alignedChunkDataList = pageIndex2ChunkData.get(1); + if (alignedChunkDataList == null || alignedChunkDataList.isEmpty()) { + throw new TsFileRuntimeException( + String.format( + "Missing non-decoded aligned time chunk context for value chunk %s at offset %d, time chunk offset: %s, context page keys: %s.", + header.getMeasurementID(), + chunkMetadata.getOffsetOfChunkHeader(), + valueChunkOffset2TimeChunkOffset.get(chunkMetadata.getOffsetOfChunkHeader()), + pageIndex2ChunkData.keySet())); + } + AlignedChunkData alignedChunkData = alignedChunkDataList.get(0); alignedChunkData.addValueChunk(header); alignedChunkData.writeEntireChunk(reader.readChunk(-1, header.getDataSize()), chunkMetadata); consumeValueChunkAndAlignedChunkDataIfComplete(reader.position(), chunkMetadata); @@ -349,6 +359,17 @@ private void processValueChunk(TsFileSequenceReader reader, byte marker) reader.readPageHeader( header.getDataType(), (header.getChunkType() & 0x3F) == MetaMarker.CHUNK_HEADER); List alignedChunkDataList = pageIndex2ChunkData.get(pageIndex); + if (alignedChunkDataList == null) { + throw new TsFileRuntimeException( + String.format( + "Missing decoded aligned time page context for value chunk %s at offset %d, page index: %d, time chunk offset: %s, context page keys: %s, time page keys: %s.", + header.getMeasurementID(), + chunkMetadata.getOffsetOfChunkHeader(), + pageIndex, + valueChunkOffset2TimeChunkOffset.get(chunkMetadata.getOffsetOfChunkHeader()), + pageIndex2ChunkData.keySet(), + pageIndex2Times.keySet())); + } for (AlignedChunkData alignedChunkData : alignedChunkDataList) { if (!allChunkData.contains(alignedChunkData)) { alignedChunkData.addValueChunk(header);