From 9cefd2efbe0f9d34764f0169525ab2208e445afb Mon Sep 17 00:00:00 2001 From: Pramod Immaneni Date: Fri, 26 Sep 2025 16:51:19 -0700 Subject: [PATCH 01/29] Speed up searching of partitions within the in memory data source state --- .../timeline/VersionedIntervalTimeline.java | 25 +++++++++++++++---- 1 file changed, 20 insertions(+), 5 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/timeline/VersionedIntervalTimeline.java b/processing/src/main/java/org/apache/druid/timeline/VersionedIntervalTimeline.java index a2f2699c41c9..e8290e455bf7 100644 --- a/processing/src/main/java/org/apache/druid/timeline/VersionedIntervalTimeline.java +++ b/processing/src/main/java/org/apache/druid/timeline/VersionedIntervalTimeline.java @@ -96,6 +96,8 @@ public class VersionedIntervalTimeline versionComparator) { this(versionComparator, false); @@ -289,11 +291,24 @@ public PartitionChunk findChunk(Interval interval, VersionType versi { lock.readLock().lock(); try { - for (Entry> entry : allTimelineEntries.entrySet()) { - if (entry.getKey().equals(interval) || entry.getKey().contains(interval)) { - TimelineEntry foundEntry = entry.getValue().get(version); - if (foundEntry != null) { - return foundEntry.getPartitionHolder().getChunk(partitionNum); + + // Speed up search with an exact interval match lookup first + TreeMap versionEntries = allTimelineEntries.get(interval); + if (versionEntries != null) { + TimelineEntry foundEntry = versionEntries.get(version); + if (foundEntry != null) { + return foundEntry.getPartitionHolder().getChunk(partitionNum); + } + } + + if (!exactIntervalMatch) { + // If an exact interval match is not found search for an encapsulating interval + for (Entry> entry : allTimelineEntries.entrySet()) { + if (entry.getKey().contains(interval)) { + TimelineEntry foundEntry = entry.getValue().get(version); + if (foundEntry != null) { + return foundEntry.getPartitionHolder().getChunk(partitionNum); + } } } } From 143f0955149289040387834d13a1ead9dd4e45a2 Mon Sep 17 00:00:00 2001 From: Pramod Immaneni Date: Mon, 6 Oct 2025 12:26:34 -0700 Subject: [PATCH 02/29] Narrowing search space for an interval when an exact match is not found --- .../timeline/VersionedIntervalTimeline.java | 26 ++++++++++++------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/timeline/VersionedIntervalTimeline.java b/processing/src/main/java/org/apache/druid/timeline/VersionedIntervalTimeline.java index e8290e455bf7..ca021185efc1 100644 --- a/processing/src/main/java/org/apache/druid/timeline/VersionedIntervalTimeline.java +++ b/processing/src/main/java/org/apache/druid/timeline/VersionedIntervalTimeline.java @@ -23,6 +23,9 @@ import com.google.common.base.Preconditions; import com.google.common.collect.FluentIterable; import com.google.common.collect.Iterators; +import com.google.common.collect.Range; +import com.google.common.collect.RangeMap; +import com.google.common.collect.TreeRangeMap; import com.google.errorprone.annotations.concurrent.GuardedBy; import org.apache.druid.java.util.common.DateTimes; import org.apache.druid.java.util.common.UOE; @@ -30,6 +33,7 @@ import org.apache.druid.timeline.partition.PartitionChunk; import org.apache.druid.timeline.partition.PartitionHolder; import org.apache.druid.utils.CollectionUtils; +import org.joda.time.DateTime; import org.joda.time.Interval; import javax.annotation.Nullable; @@ -89,6 +93,7 @@ public class VersionedIntervalTimeline version -> timelineEntry private final Map> allTimelineEntries = new HashMap<>(); + private final RangeMap> allTimeRanges = TreeRangeMap.create(); private final AtomicInteger numObjects = new AtomicInteger(); private final Comparator versionComparator; @@ -96,8 +101,6 @@ public class VersionedIntervalTimeline versionComparator) { this(versionComparator, false); @@ -212,6 +215,7 @@ public void addAll(final Iterator> TreeMap versionEntry = new TreeMap<>(versionComparator); versionEntry.put(version, entry); allTimelineEntries.put(interval, versionEntry); + allTimeRanges.put(Range.closedOpen(interval.getStart(), interval.getEnd()), versionEntry); numObjects.incrementAndGet(); } else { entry = exists.get(version); @@ -271,6 +275,7 @@ public PartitionChunk remove(Interval interval, VersionType version, versionEntries.remove(version); if (versionEntries.isEmpty()) { allTimelineEntries.remove(interval); + allTimeRanges.remove(Range.closedOpen(interval.getStart(), interval.getEnd())); } remove(incompletePartitionsTimeline, interval, entry, true); @@ -301,14 +306,15 @@ public PartitionChunk findChunk(Interval interval, VersionType versi } } - if (!exactIntervalMatch) { - // If an exact interval match is not found search for an encapsulating interval - for (Entry> entry : allTimelineEntries.entrySet()) { - if (entry.getKey().contains(interval)) { - TimelineEntry foundEntry = entry.getValue().get(version); - if (foundEntry != null) { - return foundEntry.getPartitionHolder().getChunk(partitionNum); - } + // If an exact interval match is not found look for a matching range + RangeMap> possibleMatches = allTimeRanges.subRangeMap(Range.closedOpen(interval.getStart(), interval.getEnd())); + for (Entry, TreeMap> entry : possibleMatches.asMapOfRanges().entrySet()) { + Range range = entry.getKey(); + Interval eninterval = new Interval(range.lowerEndpoint(), range.upperEndpoint()); + if (eninterval.contains(interval)) { + TimelineEntry foundEntry = entry.getValue().get(version); + if (foundEntry != null) { + return foundEntry.getPartitionHolder().getChunk(partitionNum); } } } From d38750e7bf30d4afa030789e16e78026cfe3132c Mon Sep 17 00:00:00 2001 From: Pramod Immaneni Date: Wed, 8 Oct 2025 14:31:08 -0700 Subject: [PATCH 03/29] Implemented an optimized data structure to find intervals encompassing a given interval. Using it for finding segments loaded from cache. --- .../java/util/common/guava/Comparators.java | 39 +++ .../apache/druid/timeline/IntervalTree.java | 241 ++++++++++++++++++ .../timeline/VersionedIntervalTimeline.java | 17 +- .../druid/timeline/IntervalTreeTest.java | 189 ++++++++++++++ 4 files changed, 475 insertions(+), 11 deletions(-) create mode 100644 processing/src/main/java/org/apache/druid/timeline/IntervalTree.java create mode 100644 processing/src/test/java/org/apache/druid/timeline/IntervalTreeTest.java diff --git a/processing/src/main/java/org/apache/druid/java/util/common/guava/Comparators.java b/processing/src/main/java/org/apache/druid/java/util/common/guava/Comparators.java index 618698c4ac46..04e4f7f0fc05 100644 --- a/processing/src/main/java/org/apache/druid/java/util/common/guava/Comparators.java +++ b/processing/src/main/java/org/apache/druid/java/util/common/guava/Comparators.java @@ -119,6 +119,34 @@ public int compare(Interval lhs, Interval rhs) } }; + private static final Comparator INTERVAL_BY_START = new Comparator<>() + { + private final DateTimeComparator dateTimeComp = DateTimeComparator.getInstance(); + + @Override + public int compare(Interval lhs, Interval rhs) + { + if (lhs.getChronology().equals(rhs.getChronology())) { + return Long.compare(lhs.getStartMillis(), rhs.getStartMillis()); + } + return dateTimeComp.compare(lhs.getStart(), rhs.getStart()); + } + }; + + private static final Comparator INTERVAL_BY_END = new Comparator<>() + { + private final DateTimeComparator dateTimeComp = DateTimeComparator.getInstance(); + + @Override + public int compare(Interval lhs, Interval rhs) + { + if (lhs.getChronology().equals(rhs.getChronology())) { + return Long.compare(lhs.getEndMillis(), rhs.getEndMillis()); + } + return dateTimeComp.compare(lhs.getEnd(), rhs.getEnd()); + } + }; + @Deprecated public static Comparator intervals() { @@ -135,4 +163,15 @@ public static Comparator intervalsByEndThenStart() return INTERVAL_BY_END_THEN_START; } + public static Comparator intervalsByStart() + { + return INTERVAL_BY_START; + } + + public static Comparator intervalsByEnd() + { + return INTERVAL_BY_END; + } + + } diff --git a/processing/src/main/java/org/apache/druid/timeline/IntervalTree.java b/processing/src/main/java/org/apache/druid/timeline/IntervalTree.java new file mode 100644 index 000000000000..2d051055c920 --- /dev/null +++ b/processing/src/main/java/org/apache/druid/timeline/IntervalTree.java @@ -0,0 +1,241 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.timeline; + +import org.joda.time.Interval; + +import java.util.Comparator; +import java.util.HashMap; +import java.util.Map; + + +/** + * A variation of Interval Trees (https://en.wikipedia.org/wiki/Interval_tree) + * Custom implementation for faster search and specific joda Interval comparator arithmetic used in project + * + * + * Multiple intervals are added to the tree and a value can also be associated with each interval. These are stored in + * nodes in the tree along with additional state. The tree can then be searched for intervals matching a given interval. + * A match is any interval in the tree that fully encompasses the given interval. There can be multiple results. + * + * The tree is a binary search tree sorted by interval start time. Additional state containing the minimum and maximum + * interval bounds of the entire subtree under a node, is stored on each node. This state helps speed up search for + * matching intervals by skipping unsuitable subtrees that won't have a match. + * + * TODO:- Add balancing + */ +public class IntervalTree +{ + Comparator comparator; + Comparator highComparator; + + Node root; + + public IntervalTree(Comparator comparator) + { + this(comparator, comparator); + } + + public IntervalTree(Comparator comparator, Comparator highComparator) + { + this.comparator = comparator; + this.highComparator = highComparator; + } + + /* + public static class Entry { + Interval interval; + T value; + + public Entry(Interval interval, T value) { + this.interval = interval; + this.value = value; + } + } + */ + + static class Node + { + Interval interval; + T value; + // The min and max of the range for the subtree + Interval min; + Interval max; + Node left; + Node right; + } + + public void add(Interval interval, T value) + { + root = insert(root, interval, value); + } + + private Node insert(Node node, Interval interval, T value) + { + + if (node == null) { + node = new Node<>(); + node.interval = interval; + node.min = interval; + node.max = interval; + node.value = value; + return node; + } + + if (comparator.compare(interval, node.interval) <= 0) { + node.left = insert(node.left, interval, value); + } else { + node.right = insert(node.right, interval, value); + } + + if (comparator.compare(interval, node.min) < 0) { + node.min = interval; + } + + if (highComparator.compare(node.max, interval) < 0) { + node.max = interval; + } + + return node; + } + + //public List> findEncompassing(Interval interval) + public Map findEncompassing(Interval interval) + { + //List> result = new ArrayList<>(); + Map result = new HashMap<>(); + findEncompassing(root, interval, result); + return result; + } + + //private void findEncompassing(Node node, Interval interval, List> result) + private void findEncompassing(Node node, Interval interval, Map result) + { + + if (node == null) { + return; + } + + /* + // If interval falls outside the min to max range of the subtree don't follow the subtree + if ((comparator.compare(interval, node.min) < 0) + || (highComparator.compare(node.max, interval) < 0)) { + return; + } + */ + + if (node.interval.contains(interval)) { + //result.add(new Entry<>(node.interval, node.value)); + result.put(node.interval, node.value); + } + + // Matches can be found on both left and right side as the given interval start needs to be just greater + // than a node start and end less than the node end + + // If there is a potential candidate on left search left + if ((node.left != null) && isIntervalInBounds(node.left, interval)) { + findEncompassing(node.left, interval, result); + } + + // If there is a potential candidate on right search right + if (node.right != null && isIntervalInBounds(node.right, interval)) { + findEncompassing(node.right, interval, result); + } + + /* + int cmp = comparator.compare(interval, node.interval); + if (cmp <= 0) { + findEncompassing(node.left, interval, result); + } else { + findEncompassing(node.right, interval, result); + } + */ + } + + private boolean isIntervalInBounds(Node node, Interval interval) + { + return (comparator.compare(node.min, interval) <= 0) + && (highComparator.compare(node.max, interval) >= 0); + } + + + public void remove(Interval interval) + { + root = removeNode(root, interval); + } + + private Node removeNode(Node node, Interval interval) + { + /* + if ((node == null) || comparator.compare(node.interval, interval) == 0) { + return null; + } + */ + if (node == null) { + return null; + } + + if (node.interval.equals(interval)) { + if ((node.left != null) && (node.right != null)) { + makeLeftChild(node.right, node.left); + return node.right; + } else if (node.left != null) { + return node.left; + } + return null; + } + + if (comparator.compare(interval, node.interval) <= 0) { + node.left = removeNode(node.left, interval); + } else { + node.right = removeNode(node.right, interval); + } + + return node; + } + + private void makeLeftChild(Node node, Node childNode) + { + if (node.left == null) { + node.left = childNode; + } else { + makeLeftChild(node.left, childNode); + } + } + + public void clear() + { + root = null; + } + + public int size() + { + return size(root); + } + + private int size(Node node) + { + if (node == null) { + return 0; + } + return 1 + size(node.left) + size(node.right); + } + +} diff --git a/processing/src/main/java/org/apache/druid/timeline/VersionedIntervalTimeline.java b/processing/src/main/java/org/apache/druid/timeline/VersionedIntervalTimeline.java index ca021185efc1..f94a4043a7f7 100644 --- a/processing/src/main/java/org/apache/druid/timeline/VersionedIntervalTimeline.java +++ b/processing/src/main/java/org/apache/druid/timeline/VersionedIntervalTimeline.java @@ -23,9 +23,6 @@ import com.google.common.base.Preconditions; import com.google.common.collect.FluentIterable; import com.google.common.collect.Iterators; -import com.google.common.collect.Range; -import com.google.common.collect.RangeMap; -import com.google.common.collect.TreeRangeMap; import com.google.errorprone.annotations.concurrent.GuardedBy; import org.apache.druid.java.util.common.DateTimes; import org.apache.druid.java.util.common.UOE; @@ -33,7 +30,6 @@ import org.apache.druid.timeline.partition.PartitionChunk; import org.apache.druid.timeline.partition.PartitionHolder; import org.apache.druid.utils.CollectionUtils; -import org.joda.time.DateTime; import org.joda.time.Interval; import javax.annotation.Nullable; @@ -93,7 +89,7 @@ public class VersionedIntervalTimeline version -> timelineEntry private final Map> allTimelineEntries = new HashMap<>(); - private final RangeMap> allTimeRanges = TreeRangeMap.create(); + private final IntervalTree> allTimeIntervals = new IntervalTree<>(Comparators.intervalsByStart(), Comparators.intervalsByEnd()); private final AtomicInteger numObjects = new AtomicInteger(); private final Comparator versionComparator; @@ -215,7 +211,7 @@ public void addAll(final Iterator> TreeMap versionEntry = new TreeMap<>(versionComparator); versionEntry.put(version, entry); allTimelineEntries.put(interval, versionEntry); - allTimeRanges.put(Range.closedOpen(interval.getStart(), interval.getEnd()), versionEntry); + allTimeIntervals.add(interval, versionEntry); numObjects.incrementAndGet(); } else { entry = exists.get(version); @@ -275,7 +271,7 @@ public PartitionChunk remove(Interval interval, VersionType version, versionEntries.remove(version); if (versionEntries.isEmpty()) { allTimelineEntries.remove(interval); - allTimeRanges.remove(Range.closedOpen(interval.getStart(), interval.getEnd())); + allTimeIntervals.remove(interval); } remove(incompletePartitionsTimeline, interval, entry, true); @@ -307,10 +303,9 @@ public PartitionChunk findChunk(Interval interval, VersionType versi } // If an exact interval match is not found look for a matching range - RangeMap> possibleMatches = allTimeRanges.subRangeMap(Range.closedOpen(interval.getStart(), interval.getEnd())); - for (Entry, TreeMap> entry : possibleMatches.asMapOfRanges().entrySet()) { - Range range = entry.getKey(); - Interval eninterval = new Interval(range.lowerEndpoint(), range.upperEndpoint()); + Map> possibleMatches = allTimeIntervals.findEncompassing(interval); + for (Entry> entry : possibleMatches.entrySet()) { + Interval eninterval = entry.getKey(); if (eninterval.contains(interval)) { TimelineEntry foundEntry = entry.getValue().get(version); if (foundEntry != null) { diff --git a/processing/src/test/java/org/apache/druid/timeline/IntervalTreeTest.java b/processing/src/test/java/org/apache/druid/timeline/IntervalTreeTest.java new file mode 100644 index 000000000000..e29cf9be19a1 --- /dev/null +++ b/processing/src/test/java/org/apache/druid/timeline/IntervalTreeTest.java @@ -0,0 +1,189 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.timeline; + +import org.apache.druid.java.util.common.Intervals; +import org.apache.druid.java.util.common.guava.Comparators; +import org.joda.time.Interval; +import org.junit.Assert; +import org.junit.Test; + +import java.util.HashMap; +import java.util.Map; +import java.util.Random; + +public class IntervalTreeTest +{ + + @Test + public void testSize() + { + IntervalTree tree = setupBaseIntervalTree(); + Assert.assertEquals( "Size", 4, tree.size()); + } + + @Test + public void testMatch() + { + IntervalTree tree = setupBaseIntervalTree(); + Map entries = tree.findEncompassing(Intervals.of("2025-01-04T00:00:00/P1D")); + + Assert.assertEquals(1, entries.size()); + Assert.assertEquals("Match", "v4", entries.get(Intervals.of("2025-01-04T00:00:00/P1D"))); + } + + @Test + public void testNoMatch() + { + IntervalTree tree = setupBaseIntervalTree(); + Map entries = tree.findEncompassing(Intervals.of("2025-01-05T00:00:00/P1D")); + + Assert.assertEquals(0, entries.size()); + } + + + @Test + public void testOverlap() + { + IntervalTree tree = setupOverlapIntervalTree(); + Map entries = tree.findEncompassing(Intervals.of("2025-01-02T09:00:00/PT1H")); + + Assert.assertEquals(2, entries.size()); + Assert.assertEquals("Day match", "v2", entries.get(Intervals.of("2025-01-02T00:00:00/P1D"))); + Assert.assertEquals("Year match", "v5", entries.get(Intervals.of("2025-01-01T00:00:00/P1Y"))); + } + + @Test + public void testSparseOverlap() + { + IntervalTree tree = setupSparseOverlapTree(); + Map entries = tree.findEncompassing(Intervals.of("2025-06-03T00:00:00/P1D")); + + Assert.assertEquals(3, entries.size()); + Assert.assertEquals("Match 1", "v5", entries.get(Intervals.of("2025-06-03T00:00:00/P1D"))); + Assert.assertEquals("Match 2", "v9", entries.get(Intervals.of("2025-05-10T00:00:00/P1M"))); + Assert.assertEquals("Match 3", "v11", entries.get(Intervals.of("2025-06-01T00:00:00/P1M"))); + } + + @Test + public void testRemove() { + IntervalTree tree = setupBaseIntervalTree(); + tree.remove(Intervals.of("2025-01-02T00:00:00/P1D")); + Assert.assertEquals("Size", 3, tree.size()); + } + + @Test + public void testRemoveRootAndMatch() + { + IntervalTree tree = setupBaseIntervalTree(); + tree.remove(Intervals.of("2025-01-03T00:00:00/P1D")); + Assert.assertEquals("Size", 3, tree.size()); + Map entries = tree.findEncompassing(Intervals.of("2025-01-04T00:00:00/P1D")); + Assert.assertEquals(1, entries.size()); + Assert.assertEquals("Match", "v4", entries.get(Intervals.of("2025-01-04T00:00:00/P1D"))); + } + + @Test + public void testRemoveMultiple() + { + IntervalTree tree = setupSparseOverlapTree(); + int isize = tree.size(); + tree.remove(Intervals.of("2025-01-12T00:00:00/P1D")); + tree.remove(Intervals.of("2025-06-03T00:00:00/P1D")); + tree.remove(Intervals.of("2025-06-01T00:00:00/P1M")); + int csize = tree.size(); + Assert.assertEquals("Size", 3, isize - csize); + } + + @Test + public void testClear() + { + IntervalTree tree = setupBaseIntervalTree(); + tree.clear(); + Assert.assertEquals("Size", 0, tree.size()); + } + + @Test + public void testTree() + { + IntervalTree tree = new IntervalTree<>(Comparators.intervalsByStartThenEnd(), Comparators.intervalsByEndThenStart()); + Map data = new HashMap<>(); + Random random = new Random(); + for (int i = 0; i < 10; i++) { + //int month = random.nextInt(12) + 1; + //int day = random.nextInt(28) + 1; + int month = 1; + int day = i + 1; + String timestr = "2024-" + month + "-" + day + "T00:00:00/P" + (i+1) + "D"; + Interval interval = Intervals.of(timestr); + String value = "v" + i; + tree.add(interval, value); + data.put(interval, value); + } + for (Map.Entry entry : data.entrySet()) { + System.out.println(entry.getKey() + ": " + entry.getValue()); + } + /* + List> intervals = tree.findEncompassing(Intervals.of("2024-01-04T10:00:00/PT1H")); + System.out.println("Matched intervals"); + for (IntervalTree.Entry entry : intervals) { + System.out.println(entry.interval + ": " + entry.value); + } + */ + } + + private IntervalTree setupBaseIntervalTree() + { + IntervalTree tree = new IntervalTree<>(Comparators.intervalsByStart(), Comparators.intervalsByEnd()); + tree.add(Intervals.of("2025-01-03T00:00:00/P1D"), "v3"); + tree.add(Intervals.of("2025-01-01T00:00:00/P1D"), "v1"); + tree.add(Intervals.of("2025-01-02T00:00:00/P1D"), "v2"); + tree.add(Intervals.of("2025-01-04T00:00:00/P1D"), "v4"); + + return tree; + } + + private IntervalTree setupOverlapIntervalTree() + { + IntervalTree tree = setupBaseIntervalTree(); + tree.add(Intervals.of("2025-01-01T00:00:00/P1Y"), "v5"); + + return tree; + } + + private IntervalTree setupSparseOverlapTree() + { + IntervalTree tree = new IntervalTree<>(Comparators.intervalsByStart(), Comparators.intervalsByEnd()); + tree.add(Intervals.of("2025-01-01T00:00:00/P1D"), "v1"); + tree.add(Intervals.of("2025-02-01T00:00:00/P1D"), "v2"); + tree.add(Intervals.of("2025-01-12T00:00:00/P1D"), "v3"); + tree.add(Intervals.of("2025-07-12T00:00:00/P1D"), "v4"); + tree.add(Intervals.of("2025-06-03T00:00:00/P1D"), "v5"); + tree.add(Intervals.of("2025-08-09T00:00:00/P1D"), "v6"); + tree.add(Intervals.of("2025-09-04T00:00:00/P1D"), "v7"); + tree.add(Intervals.of("2025-04-02T00:00:00/P1D"), "v8"); + tree.add(Intervals.of("2025-05-10T00:00:00/P1M"), "v9"); + tree.add(Intervals.of("2025-10-06T00:00:00/P1M"), "v10"); + tree.add(Intervals.of("2025-06-01T00:00:00/P1M"), "v11"); + + return tree; + } + +} From ecb9f82be89cdb06fe6a71396995c9fefbb3597c Mon Sep 17 00:00:00 2001 From: Pramod Immaneni Date: Thu, 9 Oct 2025 15:45:49 -0700 Subject: [PATCH 04/29] Added rebalancing --- .../apache/druid/timeline/IntervalTree.java | 356 +++++++++++------- .../druid/timeline/IntervalTreeTest.java | 309 ++++++++------- 2 files changed, 390 insertions(+), 275 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/timeline/IntervalTree.java b/processing/src/main/java/org/apache/druid/timeline/IntervalTree.java index 2d051055c920..42f8993abe78 100644 --- a/processing/src/main/java/org/apache/druid/timeline/IntervalTree.java +++ b/processing/src/main/java/org/apache/druid/timeline/IntervalTree.java @@ -19,45 +19,49 @@ package org.apache.druid.timeline; +import org.jetbrains.annotations.VisibleForTesting; import org.joda.time.Interval; +import java.util.ArrayList; import java.util.Comparator; import java.util.HashMap; +import java.util.List; import java.util.Map; /** * A variation of Interval Trees (https://en.wikipedia.org/wiki/Interval_tree) * Custom implementation for faster search and specific joda Interval comparator arithmetic used in project - * - * + *

+ *

* Multiple intervals are added to the tree and a value can also be associated with each interval. These are stored in * nodes in the tree along with additional state. The tree can then be searched for intervals matching a given interval. * A match is any interval in the tree that fully encompasses the given interval. There can be multiple results. - * + *

* The tree is a binary search tree sorted by interval start time. Additional state containing the minimum and maximum * interval bounds of the entire subtree under a node, is stored on each node. This state helps speed up search for * matching intervals by skipping unsuitable subtrees that won't have a match. - * + *

* TODO:- Add balancing - */ + */ public class IntervalTree { - Comparator comparator; - Comparator highComparator; + Comparator comparator; + Comparator highComparator; - Node root; + @VisibleForTesting + Node root; - public IntervalTree(Comparator comparator) - { - this(comparator, comparator); - } + public IntervalTree(Comparator comparator) + { + this(comparator, comparator); + } - public IntervalTree(Comparator comparator, Comparator highComparator) - { - this.comparator = comparator; - this.highComparator = highComparator; - } + public IntervalTree(Comparator comparator, Comparator highComparator) + { + this.comparator = comparator; + this.highComparator = highComparator; + } /* public static class Entry { @@ -71,67 +75,96 @@ public Entry(Interval interval, T value) { } */ - static class Node + static class Node + { + Interval interval; + T value; + // The min and max of the range for the subtree + Interval min; + Interval max; + Node left; + Node right; + + + @Override + public String toString() { - Interval interval; - T value; - // The min and max of the range for the subtree - Interval min; - Interval max; - Node left; - Node right; + return "Node{" + + "interval=" + interval + + ", value=" + value + + ", min=" + min + + ", max=" + max + + ", left=" + left + + ", right=" + right + + '}'; } - public void add(Interval interval, T value) - { - root = insert(root, interval, value); + public String print(int level) { + StringBuilder sb = new StringBuilder(); + String prefix = "\t".repeat(level); + sb.append(prefix).append("{").append("\n"); + sb.append(prefix).append("interval = ").append(interval).append("\n"); + sb.append(prefix).append("value = ").append(value).append("\n"); + sb.append(prefix).append("min = ").append(min).append("\n"); + sb.append(prefix).append("max = ").append(max).append("\n"); + sb.append(prefix).append("left = ").append((left != null) ? left.print(level + 1) : null).append("\n"); + sb.append(prefix).append("right = ").append((right != null) ? right.print(level + 1) : null).append("\n"); + sb.append(prefix).append("}"); + return sb.toString(); } - private Node insert(Node node, Interval interval, T value) - { + } - if (node == null) { - node = new Node<>(); - node.interval = interval; - node.min = interval; - node.max = interval; - node.value = value; - return node; - } + public void add(Interval interval, T value) + { + root = insert(root, interval, value); + } - if (comparator.compare(interval, node.interval) <= 0) { - node.left = insert(node.left, interval, value); - } else { - node.right = insert(node.right, interval, value); - } + private Node insert(Node node, Interval interval, T value) + { - if (comparator.compare(interval, node.min) < 0) { - node.min = interval; - } + if (node == null) { + node = new Node<>(); + node.interval = interval; + node.min = interval; + node.max = interval; + node.value = value; + return node; + } - if (highComparator.compare(node.max, interval) < 0) { - node.max = interval; - } + if (comparator.compare(interval, node.interval) <= 0) { + node.left = insert(node.left, interval, value); + } else { + node.right = insert(node.right, interval, value); + } - return node; + if (comparator.compare(interval, node.min) < 0) { + node.min = interval; } - //public List> findEncompassing(Interval interval) - public Map findEncompassing(Interval interval) - { - //List> result = new ArrayList<>(); - Map result = new HashMap<>(); - findEncompassing(root, interval, result); - return result; + if (highComparator.compare(node.max, interval) < 0) { + node.max = interval; } - //private void findEncompassing(Node node, Interval interval, List> result) - private void findEncompassing(Node node, Interval interval, Map result) - { + return node; + } - if (node == null) { - return; - } + //public List> findEncompassing(Interval interval) + public Map findEncompassing(Interval interval) + { + //List> result = new ArrayList<>(); + Map result = new HashMap<>(); + findEncompassing(root, interval, result); + return result; + } + + //private void findEncompassing(Node node, Interval interval, List> result) + private void findEncompassing(Node node, Interval interval, Map result) + { + + if (node == null) { + return; + } /* // If interval falls outside the min to max range of the subtree don't follow the subtree @@ -141,23 +174,23 @@ private void findEncompassing(Node node, Interval interval, Map } */ - if (node.interval.contains(interval)) { - //result.add(new Entry<>(node.interval, node.value)); - result.put(node.interval, node.value); - } + if (node.interval.contains(interval)) { + //result.add(new Entry<>(node.interval, node.value)); + result.put(node.interval, node.value); + } - // Matches can be found on both left and right side as the given interval start needs to be just greater - // than a node start and end less than the node end + // Matches can be found on both left and right side as the given interval start needs to be just greater + // than a node start and end less than the node end - // If there is a potential candidate on left search left - if ((node.left != null) && isIntervalInBounds(node.left, interval)) { - findEncompassing(node.left, interval, result); - } + // If there is a potential candidate on left search left + if ((node.left != null) && isIntervalInBounds(node.left, interval)) { + findEncompassing(node.left, interval, result); + } - // If there is a potential candidate on right search right - if (node.right != null && isIntervalInBounds(node.right, interval)) { - findEncompassing(node.right, interval, result); - } + // If there is a potential candidate on right search right + if (node.right != null && isIntervalInBounds(node.right, interval)) { + findEncompassing(node.right, interval, result); + } /* int cmp = comparator.compare(interval, node.interval); @@ -167,75 +200,140 @@ private void findEncompassing(Node node, Interval interval, Map findEncompassing(node.right, interval, result); } */ - } + } - private boolean isIntervalInBounds(Node node, Interval interval) - { - return (comparator.compare(node.min, interval) <= 0) - && (highComparator.compare(node.max, interval) >= 0); - } + private boolean isIntervalInBounds(Node node, Interval interval) + { + return (comparator.compare(node.min, interval) <= 0) + && (highComparator.compare(node.max, interval) >= 0); + } - public void remove(Interval interval) - { - root = removeNode(root, interval); - } + public void remove(Interval interval) + { + root = removeNode(root, interval); + } - private Node removeNode(Node node, Interval interval) - { + private Node removeNode(Node node, Interval interval) + { /* if ((node == null) || comparator.compare(node.interval, interval) == 0) { return null; } */ - if (node == null) { - return null; - } - - if (node.interval.equals(interval)) { - if ((node.left != null) && (node.right != null)) { - makeLeftChild(node.right, node.left); - return node.right; - } else if (node.left != null) { - return node.left; - } - return null; - } - - if (comparator.compare(interval, node.interval) <= 0) { - node.left = removeNode(node.left, interval); - } else { - node.right = removeNode(node.right, interval); - } - - return node; + if (node == null) { + return null; } - private void makeLeftChild(Node node, Node childNode) - { - if (node.left == null) { - node.left = childNode; - } else { - makeLeftChild(node.left, childNode); - } + if (node.interval.equals(interval)) { + if ((node.left != null) && (node.right != null)) { + makeLeftChild(node.right, node.left); + return node.right; + } else if (node.left != null) { + return node.left; + } + return null; } - public void clear() - { - root = null; + if (comparator.compare(interval, node.interval) <= 0) { + node.left = removeNode(node.left, interval); + } else { + node.right = removeNode(node.right, interval); } - public int size() - { - return size(root); - } + return node; + } - private int size(Node node) - { - if (node == null) { - return 0; + private void makeLeftChild(Node node, Node childNode) + { + if (node.left == null) { + node.left = childNode; + } else { + makeLeftChild(node.left, childNode); + } + } + + public void rebalance() + { + // In order traversal followed by recursive binary segmentation + List> nodes = new ArrayList<>(); + inOrderTraverse(root, nodes); + root = constructTree(nodes, 0, nodes.size()); + } + + private void inOrderTraverse(Node node, List> nodes) + { + if (node == null) { + return; + } + inOrderTraverse(node.left, nodes); + nodes.add(node); + inOrderTraverse(node.right, nodes); + } + + private Node constructTree(List> nodes, int start, int end) + { + if (start == end) { + return null; + } + int mid = (start + end - 1) / 2; + Node node = nodes.get(mid); + node.left = constructTree(nodes, start, mid); + node.right = constructTree(nodes, mid + 1, end); + node.max = maxInterval(node.interval, node.left, node.right); + node.min = minInterval(node.interval, node.left, node.right); + return node; + } + + public void clear() + { + root = null; + } + + public int size() + { + return size(root); + } + + private int size(Node node) + { + if (node == null) { + return 0; + } + return 1 + size(node.left) + size(node.right); + } + + public String print() + { + return (root != null) ? root.print(1) : null; + } + + @SafeVarargs + private Interval maxInterval(Interval interval, Node... nodes) + { + Interval max = interval; + for (Node node : nodes) { + if (node != null) { + if (highComparator.compare(node.max, max) > 0) { + max = node.max; + } + } + } + return max; + } + + @SafeVarargs + private Interval minInterval(Interval interval, Node... nodes) + { + Interval min = interval; + for (Node node : nodes) { + if (node != null) { + if (comparator.compare(node.min, min) <= 0) { + min = node.min; } - return 1 + size(node.left) + size(node.right); + } } + return min; + } } diff --git a/processing/src/test/java/org/apache/druid/timeline/IntervalTreeTest.java b/processing/src/test/java/org/apache/druid/timeline/IntervalTreeTest.java index e29cf9be19a1..d1f382410372 100644 --- a/processing/src/test/java/org/apache/druid/timeline/IntervalTreeTest.java +++ b/processing/src/test/java/org/apache/druid/timeline/IntervalTreeTest.java @@ -19,6 +19,10 @@ package org.apache.druid.timeline; +import com.fasterxml.jackson.annotation.JsonAutoDetect; +import com.fasterxml.jackson.annotation.PropertyAccessor; +import com.fasterxml.jackson.core.JsonProcessingException; +import com.fasterxml.jackson.databind.ObjectMapper; import org.apache.druid.java.util.common.Intervals; import org.apache.druid.java.util.common.guava.Comparators; import org.joda.time.Interval; @@ -32,158 +36,171 @@ public class IntervalTreeTest { - @Test - public void testSize() - { - IntervalTree tree = setupBaseIntervalTree(); - Assert.assertEquals( "Size", 4, tree.size()); + @Test + public void testSize() + { + IntervalTree tree = setupBaseIntervalTree(); + Assert.assertEquals("Size", 6, tree.size()); + } + + @Test + public void testMatch() + { + IntervalTree tree = setupBaseIntervalTree(); + Map entries = tree.findEncompassing(Intervals.of("2025-01-04T00:00:00/P1D")); + + Assert.assertEquals(1, entries.size()); + Assert.assertEquals("Match", "v4", entries.get(Intervals.of("2025-01-04T00:00:00/P1D"))); + } + + @Test + public void testNoMatch() + { + IntervalTree tree = setupBaseIntervalTree(); + Map entries = tree.findEncompassing(Intervals.of("2025-01-07T00:00:00/P1D")); + + Assert.assertEquals(0, entries.size()); + } + + + @Test + public void testOverlap() + { + IntervalTree tree = setupOverlapIntervalTree(); + Map entries = tree.findEncompassing(Intervals.of("2025-01-02T09:00:00/PT1H")); + + Assert.assertEquals(2, entries.size()); + Assert.assertEquals("Day match", "v2", entries.get(Intervals.of("2025-01-02T00:00:00/P1D"))); + Assert.assertEquals("Year match", "v7", entries.get(Intervals.of("2025-01-01T00:00:00/P1Y"))); + } + + @Test + public void testSparseOverlap() + { + IntervalTree tree = setupSparseOverlapTree(); + Map entries = tree.findEncompassing(Intervals.of("2025-06-03T00:00:00/P1D")); + + Assert.assertEquals(3, entries.size()); + Assert.assertEquals("Match 1", "v5", entries.get(Intervals.of("2025-06-03T00:00:00/P1D"))); + Assert.assertEquals("Match 2", "v9", entries.get(Intervals.of("2025-05-10T00:00:00/P1M"))); + Assert.assertEquals("Match 3", "v11", entries.get(Intervals.of("2025-06-01T00:00:00/P1M"))); + } + + @Test + public void testRemove() + { + IntervalTree tree = setupBaseIntervalTree(); + tree.remove(Intervals.of("2025-01-02T00:00:00/P1D")); + Assert.assertEquals("Size", 5, tree.size()); + } + + @Test + public void testRemoveRootAndMatch() + { + IntervalTree tree = setupBaseIntervalTree(); + tree.remove(Intervals.of("2025-01-03T00:00:00/P1D")); + Assert.assertEquals("Size", 5, tree.size()); + Map entries = tree.findEncompassing(Intervals.of("2025-01-04T00:00:00/P1D")); + Assert.assertEquals(1, entries.size()); + Assert.assertEquals("Match", "v4", entries.get(Intervals.of("2025-01-04T00:00:00/P1D"))); + } + + @Test + public void testRemoveMultiple() + { + IntervalTree tree = setupSparseOverlapTree(); + int isize = tree.size(); + tree.remove(Intervals.of("2025-01-12T00:00:00/P1D")); + tree.remove(Intervals.of("2025-06-03T00:00:00/P1D")); + tree.remove(Intervals.of("2025-06-01T00:00:00/P1M")); + int csize = tree.size(); + Assert.assertEquals("Size", 3, isize - csize); + } + + @Test + public void testClear() + { + IntervalTree tree = setupBaseIntervalTree(); + tree.clear(); + Assert.assertEquals("Size", 0, tree.size()); + } + + /* + @Test + public void testTree() + { + IntervalTree tree = new IntervalTree<>(Comparators.intervalsByStartThenEnd(), Comparators.intervalsByEndThenStart()); + Map data = new HashMap<>(); + Random random = new Random(); + for (int i = 0; i < 10; i++) { + //int month = random.nextInt(12) + 1; + //int day = random.nextInt(28) + 1; + int month = 1; + int day = i + 1; + String timestr = "2024-" + month + "-" + day + "T00:00:00/P" + (i + 1) + "D"; + Interval interval = Intervals.of(timestr); + String value = "v" + i; + tree.add(interval, value); + data.put(interval, value); } - - @Test - public void testMatch() - { - IntervalTree tree = setupBaseIntervalTree(); - Map entries = tree.findEncompassing(Intervals.of("2025-01-04T00:00:00/P1D")); - - Assert.assertEquals(1, entries.size()); - Assert.assertEquals("Match", "v4", entries.get(Intervals.of("2025-01-04T00:00:00/P1D"))); - } - - @Test - public void testNoMatch() - { - IntervalTree tree = setupBaseIntervalTree(); - Map entries = tree.findEncompassing(Intervals.of("2025-01-05T00:00:00/P1D")); - - Assert.assertEquals(0, entries.size()); - } - - - @Test - public void testOverlap() - { - IntervalTree tree = setupOverlapIntervalTree(); - Map entries = tree.findEncompassing(Intervals.of("2025-01-02T09:00:00/PT1H")); - - Assert.assertEquals(2, entries.size()); - Assert.assertEquals("Day match", "v2", entries.get(Intervals.of("2025-01-02T00:00:00/P1D"))); - Assert.assertEquals("Year match", "v5", entries.get(Intervals.of("2025-01-01T00:00:00/P1Y"))); - } - - @Test - public void testSparseOverlap() - { - IntervalTree tree = setupSparseOverlapTree(); - Map entries = tree.findEncompassing(Intervals.of("2025-06-03T00:00:00/P1D")); - - Assert.assertEquals(3, entries.size()); - Assert.assertEquals("Match 1", "v5", entries.get(Intervals.of("2025-06-03T00:00:00/P1D"))); - Assert.assertEquals("Match 2", "v9", entries.get(Intervals.of("2025-05-10T00:00:00/P1M"))); - Assert.assertEquals("Match 3", "v11", entries.get(Intervals.of("2025-06-01T00:00:00/P1M"))); - } - - @Test - public void testRemove() { - IntervalTree tree = setupBaseIntervalTree(); - tree.remove(Intervals.of("2025-01-02T00:00:00/P1D")); - Assert.assertEquals("Size", 3, tree.size()); - } - - @Test - public void testRemoveRootAndMatch() - { - IntervalTree tree = setupBaseIntervalTree(); - tree.remove(Intervals.of("2025-01-03T00:00:00/P1D")); - Assert.assertEquals("Size", 3, tree.size()); - Map entries = tree.findEncompassing(Intervals.of("2025-01-04T00:00:00/P1D")); - Assert.assertEquals(1, entries.size()); - Assert.assertEquals("Match", "v4", entries.get(Intervals.of("2025-01-04T00:00:00/P1D"))); + for (Map.Entry entry : data.entrySet()) { + System.out.println(entry.getKey() + ": " + entry.getValue()); } - - @Test - public void testRemoveMultiple() - { - IntervalTree tree = setupSparseOverlapTree(); - int isize = tree.size(); - tree.remove(Intervals.of("2025-01-12T00:00:00/P1D")); - tree.remove(Intervals.of("2025-06-03T00:00:00/P1D")); - tree.remove(Intervals.of("2025-06-01T00:00:00/P1M")); - int csize = tree.size(); - Assert.assertEquals("Size", 3, isize - csize); - } - - @Test - public void testClear() - { - IntervalTree tree = setupBaseIntervalTree(); - tree.clear(); - Assert.assertEquals("Size", 0, tree.size()); - } - - @Test - public void testTree() - { - IntervalTree tree = new IntervalTree<>(Comparators.intervalsByStartThenEnd(), Comparators.intervalsByEndThenStart()); - Map data = new HashMap<>(); - Random random = new Random(); - for (int i = 0; i < 10; i++) { - //int month = random.nextInt(12) + 1; - //int day = random.nextInt(28) + 1; - int month = 1; - int day = i + 1; - String timestr = "2024-" + month + "-" + day + "T00:00:00/P" + (i+1) + "D"; - Interval interval = Intervals.of(timestr); - String value = "v" + i; - tree.add(interval, value); - data.put(interval, value); - } - for (Map.Entry entry : data.entrySet()) { - System.out.println(entry.getKey() + ": " + entry.getValue()); - } - /* + /--* List> intervals = tree.findEncompassing(Intervals.of("2024-01-04T10:00:00/PT1H")); System.out.println("Matched intervals"); for (IntervalTree.Entry entry : intervals) { System.out.println(entry.interval + ": " + entry.value); } - */ - } - - private IntervalTree setupBaseIntervalTree() - { - IntervalTree tree = new IntervalTree<>(Comparators.intervalsByStart(), Comparators.intervalsByEnd()); - tree.add(Intervals.of("2025-01-03T00:00:00/P1D"), "v3"); - tree.add(Intervals.of("2025-01-01T00:00:00/P1D"), "v1"); - tree.add(Intervals.of("2025-01-02T00:00:00/P1D"), "v2"); - tree.add(Intervals.of("2025-01-04T00:00:00/P1D"), "v4"); - - return tree; - } - - private IntervalTree setupOverlapIntervalTree() - { - IntervalTree tree = setupBaseIntervalTree(); - tree.add(Intervals.of("2025-01-01T00:00:00/P1Y"), "v5"); - - return tree; - } - - private IntervalTree setupSparseOverlapTree() - { - IntervalTree tree = new IntervalTree<>(Comparators.intervalsByStart(), Comparators.intervalsByEnd()); - tree.add(Intervals.of("2025-01-01T00:00:00/P1D"), "v1"); - tree.add(Intervals.of("2025-02-01T00:00:00/P1D"), "v2"); - tree.add(Intervals.of("2025-01-12T00:00:00/P1D"), "v3"); - tree.add(Intervals.of("2025-07-12T00:00:00/P1D"), "v4"); - tree.add(Intervals.of("2025-06-03T00:00:00/P1D"), "v5"); - tree.add(Intervals.of("2025-08-09T00:00:00/P1D"), "v6"); - tree.add(Intervals.of("2025-09-04T00:00:00/P1D"), "v7"); - tree.add(Intervals.of("2025-04-02T00:00:00/P1D"), "v8"); - tree.add(Intervals.of("2025-05-10T00:00:00/P1M"), "v9"); - tree.add(Intervals.of("2025-10-06T00:00:00/P1M"), "v10"); - tree.add(Intervals.of("2025-06-01T00:00:00/P1M"), "v11"); - - return tree; - } + *--/ + } + */ + + @Test + public void testRebalance() throws JsonProcessingException + { + IntervalTree tree = setupBaseIntervalTree(); + tree.rebalance(); + System.out.println(tree.print()); + } + + private IntervalTree setupBaseIntervalTree() + { + IntervalTree tree = new IntervalTree<>(Comparators.intervalsByStart(), Comparators.intervalsByEnd()); + tree.add(Intervals.of("2025-01-03T00:00:00/P1D"), "v3"); + tree.add(Intervals.of("2025-01-01T00:00:00/P1D"), "v1"); + tree.add(Intervals.of("2025-01-02T00:00:00/P1D"), "v2"); + tree.add(Intervals.of("2025-01-04T00:00:00/P1D"), "v4"); + tree.add(Intervals.of("2025-01-05T00:00:00/P1D"), "v5"); + tree.add(Intervals.of("2025-01-06T00:00:00/P1D"), "v6"); + + return tree; + } + + private IntervalTree setupOverlapIntervalTree() + { + IntervalTree tree = setupBaseIntervalTree(); + tree.add(Intervals.of("2025-01-01T00:00:00/P1Y"), "v7"); + + return tree; + } + + private IntervalTree setupSparseOverlapTree() + { + IntervalTree tree = new IntervalTree<>(Comparators.intervalsByStart(), Comparators.intervalsByEnd()); + tree.add(Intervals.of("2025-01-01T00:00:00/P1D"), "v1"); + tree.add(Intervals.of("2025-02-01T00:00:00/P1D"), "v2"); + tree.add(Intervals.of("2025-01-12T00:00:00/P1D"), "v3"); + tree.add(Intervals.of("2025-07-12T00:00:00/P1D"), "v4"); + tree.add(Intervals.of("2025-06-03T00:00:00/P1D"), "v5"); + tree.add(Intervals.of("2025-08-09T00:00:00/P1D"), "v6"); + tree.add(Intervals.of("2025-09-04T00:00:00/P1D"), "v7"); + tree.add(Intervals.of("2025-04-02T00:00:00/P1D"), "v8"); + tree.add(Intervals.of("2025-05-10T00:00:00/P1M"), "v9"); + tree.add(Intervals.of("2025-10-06T00:00:00/P1M"), "v10"); + tree.add(Intervals.of("2025-06-01T00:00:00/P1M"), "v11"); + + return tree; + } } From 678964690502088e48b15a0906f0d84bad417ed3 Mon Sep 17 00:00:00 2001 From: Pramod Immaneni Date: Fri, 10 Oct 2025 12:04:19 -0700 Subject: [PATCH 05/29] Added imbalance threshold to control when to trigger rebalancing --- .../apache/druid/timeline/IntervalTree.java | 70 +++++++++++++++++-- .../druid/timeline/IntervalTreeTest.java | 17 +++-- 2 files changed, 76 insertions(+), 11 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/timeline/IntervalTree.java b/processing/src/main/java/org/apache/druid/timeline/IntervalTree.java index 42f8993abe78..19eba6d777d3 100644 --- a/processing/src/main/java/org/apache/druid/timeline/IntervalTree.java +++ b/processing/src/main/java/org/apache/druid/timeline/IntervalTree.java @@ -42,7 +42,8 @@ * interval bounds of the entire subtree under a node, is stored on each node. This state helps speed up search for * matching intervals by skipping unsuitable subtrees that won't have a match. *

- * TODO:- Add balancing + * + * Not thread safe */ public class IntervalTree { @@ -51,6 +52,10 @@ public class IntervalTree @VisibleForTesting Node root; + int size; + + // A tolerance from ideal depth on either side expressed as a percentage + int imbalanceTolerance = 50; public IntervalTree(Comparator comparator) { @@ -63,7 +68,17 @@ public IntervalTree(Comparator comparator, Comparator highCo this.highComparator = highComparator; } - /* + public int getImbalanceTolerance() + { + return imbalanceTolerance; + } + + public void setImbalanceTolerance(int imbalanceTolerance) + { + this.imbalanceTolerance = imbalanceTolerance; + } + + /* public static class Entry { Interval interval; T value; @@ -79,6 +94,8 @@ static class Node { Interval interval; T value; + @VisibleForTesting + int height; // The min and max of the range for the subtree Interval min; Interval max; @@ -118,6 +135,7 @@ public String print(int level) { public void add(Interval interval, T value) { root = insert(root, interval, value); + checkRebalance(); } private Node insert(Node node, Interval interval, T value) @@ -126,9 +144,11 @@ private Node insert(Node node, Interval interval, T value) if (node == null) { node = new Node<>(); node.interval = interval; + node.value = value; + node.height = 0; node.min = interval; node.max = interval; - node.value = value; + ++size; return node; } @@ -138,6 +158,10 @@ private Node insert(Node node, Interval interval, T value) node.right = insert(node.right, interval, value); } + int lheight = (node.left != null) ? node.left.height : -1; + int rheight = (node.right != null) ? node.right.height : -1; + node.height = Math.max(lheight, rheight) + 1; + if (comparator.compare(interval, node.min) < 0) { node.min = interval; } @@ -212,6 +236,7 @@ private boolean isIntervalInBounds(Node node, Interval interval) public void remove(Interval interval) { root = removeNode(root, interval); + checkRebalance(); } private Node removeNode(Node node, Interval interval) @@ -226,11 +251,14 @@ private Node removeNode(Node node, Interval interval) } if (node.interval.equals(interval)) { + --size; if ((node.left != null) && (node.right != null)) { makeLeftChild(node.right, node.left); return node.right; } else if (node.left != null) { return node.left; + } else if (node.right != null) { + return node.right; } return null; } @@ -241,6 +269,8 @@ private Node removeNode(Node node, Interval interval) node.right = removeNode(node.right, interval); } + recomputeState(node); + return node; } @@ -251,6 +281,7 @@ private void makeLeftChild(Node node, Node childNode) } else { makeLeftChild(node.left, childNode); } + recomputeState(node); } public void rebalance() @@ -280,19 +311,36 @@ private Node constructTree(List> nodes, int start, int end) Node node = nodes.get(mid); node.left = constructTree(nodes, start, mid); node.right = constructTree(nodes, mid + 1, end); + recomputeState(node); + return node; + } + + private void recomputeState(Node node) + { + int lheight = (node.left != null) ? node.left.height : -1; + int rheight = (node.right != null) ? node.right.height : -1; + node.height = Math.max(lheight, rheight) + 1; node.max = maxInterval(node.interval, node.left, node.right); node.min = minInterval(node.interval, node.left, node.right); - return node; } public void clear() { root = null; + size = 0; } public int size() { - return size(root); + //return size(root); + return size; + } + + @VisibleForTesting + // returns the number of edges from root to leaf along the longest path + int height() + { + return (root != null) ? root.height : -1; } private int size(Node node) @@ -303,6 +351,18 @@ private int size(Node node) return 1 + size(node.left) + size(node.right); } + private void checkRebalance() + { + if (root != null) { + int ideal = (int)Math.floor(Math.log10(size + 1)/Math.log10(2)); + double tolerance = ideal * imbalanceTolerance/100.0; + int threshold = ideal + (int)tolerance; + if (root.height > threshold) { + rebalance(); + } + } + } + public String print() { return (root != null) ? root.print(1) : null; diff --git a/processing/src/test/java/org/apache/druid/timeline/IntervalTreeTest.java b/processing/src/test/java/org/apache/druid/timeline/IntervalTreeTest.java index d1f382410372..06ca540149af 100644 --- a/processing/src/test/java/org/apache/druid/timeline/IntervalTreeTest.java +++ b/processing/src/test/java/org/apache/druid/timeline/IntervalTreeTest.java @@ -19,19 +19,14 @@ package org.apache.druid.timeline; -import com.fasterxml.jackson.annotation.JsonAutoDetect; -import com.fasterxml.jackson.annotation.PropertyAccessor; import com.fasterxml.jackson.core.JsonProcessingException; -import com.fasterxml.jackson.databind.ObjectMapper; import org.apache.druid.java.util.common.Intervals; import org.apache.druid.java.util.common.guava.Comparators; import org.joda.time.Interval; import org.junit.Assert; import org.junit.Test; -import java.util.HashMap; import java.util.Map; -import java.util.Random; public class IntervalTreeTest { @@ -159,8 +154,10 @@ public void testTree() @Test public void testRebalance() throws JsonProcessingException { - IntervalTree tree = setupBaseIntervalTree(); + IntervalTree tree = setupSparseOverlapTree(60); + System.out.println(tree.height()); tree.rebalance(); + System.out.println(tree.height()); System.out.println(tree.print()); } @@ -186,8 +183,16 @@ private IntervalTree setupOverlapIntervalTree() } private IntervalTree setupSparseOverlapTree() + { + return setupSparseOverlapTree(null); + } + + private IntervalTree setupSparseOverlapTree(Integer imbalanceTolerance) { IntervalTree tree = new IntervalTree<>(Comparators.intervalsByStart(), Comparators.intervalsByEnd()); + if (imbalanceTolerance != null) { + tree.setImbalanceTolerance(imbalanceTolerance); + } tree.add(Intervals.of("2025-01-01T00:00:00/P1D"), "v1"); tree.add(Intervals.of("2025-02-01T00:00:00/P1D"), "v2"); tree.add(Intervals.of("2025-01-12T00:00:00/P1D"), "v3"); From b55c9e29e473544c0040a7f45462fa64335695b8 Mon Sep 17 00:00:00 2001 From: Pramod Immaneni Date: Sat, 11 Oct 2025 00:22:08 -0700 Subject: [PATCH 06/29] Added rebalance and data content checks --- .../apache/druid/timeline/IntervalTree.java | 72 ++++--- .../druid/timeline/IntervalTreeTest.java | 195 +++++++++++------- 2 files changed, 163 insertions(+), 104 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/timeline/IntervalTree.java b/processing/src/main/java/org/apache/druid/timeline/IntervalTree.java index 19eba6d777d3..90e2de81e679 100644 --- a/processing/src/main/java/org/apache/druid/timeline/IntervalTree.java +++ b/processing/src/main/java/org/apache/druid/timeline/IntervalTree.java @@ -25,6 +25,7 @@ import java.util.ArrayList; import java.util.Comparator; import java.util.HashMap; +import java.util.Iterator; import java.util.List; import java.util.Map; @@ -54,7 +55,8 @@ public class IntervalTree Node root; int size; - // A tolerance from ideal depth on either side expressed as a percentage + // Deviation allowed from ideal height for the maximum height on either side of tree, expressed as a + // percentage of ideal height int imbalanceTolerance = 50; public IntervalTree(Comparator comparator) @@ -78,23 +80,32 @@ public void setImbalanceTolerance(int imbalanceTolerance) this.imbalanceTolerance = imbalanceTolerance; } - /* - public static class Entry { - Interval interval; - T value; + @VisibleForTesting + static class Entry + { + Interval interval; + T value; - public Entry(Interval interval, T value) { - this.interval = interval; - this.value = value; - } + public Entry(Interval interval, T value) + { + this.interval = interval; + this.value = value; + } + + @Override + public String toString() + { + return "Entry{" + + "interval=" + interval + + ", value=" + value + + '}'; } - */ + } static class Node { Interval interval; T value; - @VisibleForTesting int height; // The min and max of the range for the subtree Interval min; @@ -116,7 +127,8 @@ public String toString() '}'; } - public String print(int level) { + public String print(int level) + { StringBuilder sb = new StringBuilder(); String prefix = "\t".repeat(level); sb.append(prefix).append("{").append("\n"); @@ -152,7 +164,8 @@ private Node insert(Node node, Interval interval, T value) return node; } - if (comparator.compare(interval, node.interval) <= 0) { + // If start of interval matches with node sending to right to preserve stability during in order traversal retrieval + if (comparator.compare(interval, node.interval) < 0) { node.left = insert(node.left, interval, value); } else { node.right = insert(node.right, interval, value); @@ -191,7 +204,6 @@ private void findEncompassing(Node node, Interval interval, Map } /* - // If interval falls outside the min to max range of the subtree don't follow the subtree if ((comparator.compare(interval, node.min) < 0) || (highComparator.compare(node.max, interval) < 0)) { return; @@ -206,24 +218,18 @@ private void findEncompassing(Node node, Interval interval, Map // Matches can be found on both left and right side as the given interval start needs to be just greater // than a node start and end less than the node end - // If there is a potential candidate on left search left + // Look for potential candidates in left and right subtrees + // If interval falls outside the min to max range of the subtree don't follow the subtree + + // Search left if ((node.left != null) && isIntervalInBounds(node.left, interval)) { findEncompassing(node.left, interval, result); } - // If there is a potential candidate on right search right + // Search right if (node.right != null && isIntervalInBounds(node.right, interval)) { findEncompassing(node.right, interval, result); } - - /* - int cmp = comparator.compare(interval, node.interval); - if (cmp <= 0) { - findEncompassing(node.left, interval, result); - } else { - findEncompassing(node.right, interval, result); - } - */ } private boolean isIntervalInBounds(Node node, Interval interval) @@ -284,9 +290,17 @@ private void makeLeftChild(Node node, Node childNode) recomputeState(node); } + @VisibleForTesting + Iterator> inOrderTraverse() + { + List> nodes = new ArrayList<>(); + inOrderTraverse(root, nodes); + return nodes.stream().map(node -> new Entry(node.interval, node.value)).iterator(); + } + public void rebalance() { - // In order traversal followed by recursive binary segmentation + // In order traversal followed by repeated binary segmentation of the list List> nodes = new ArrayList<>(); inOrderTraverse(root, nodes); root = constructTree(nodes, 0, nodes.size()); @@ -354,9 +368,9 @@ private int size(Node node) private void checkRebalance() { if (root != null) { - int ideal = (int)Math.floor(Math.log10(size + 1)/Math.log10(2)); - double tolerance = ideal * imbalanceTolerance/100.0; - int threshold = ideal + (int)tolerance; + int ideal = (int) Math.floor(Math.log10(size + 1) / Math.log10(2)); + double tolerance = ideal * imbalanceTolerance / 100.0; + int threshold = ideal + (int) tolerance; if (root.height > threshold) { rebalance(); } diff --git a/processing/src/test/java/org/apache/druid/timeline/IntervalTreeTest.java b/processing/src/test/java/org/apache/druid/timeline/IntervalTreeTest.java index 06ca540149af..f936adfedc14 100644 --- a/processing/src/test/java/org/apache/druid/timeline/IntervalTreeTest.java +++ b/processing/src/test/java/org/apache/druid/timeline/IntervalTreeTest.java @@ -21,12 +21,22 @@ import com.fasterxml.jackson.core.JsonProcessingException; import org.apache.druid.java.util.common.Intervals; +import org.apache.druid.java.util.common.Pair; import org.apache.druid.java.util.common.guava.Comparators; import org.joda.time.Interval; import org.junit.Assert; import org.junit.Test; +import java.util.ArrayList; +import java.util.HashSet; +import java.util.Iterator; +import java.util.List; import java.util.Map; +import java.util.Random; +import java.util.Set; +import java.util.concurrent.ThreadLocalRandom; +import java.util.function.Consumer; +import java.util.stream.Collectors; public class IntervalTreeTest { @@ -34,45 +44,51 @@ public class IntervalTreeTest @Test public void testSize() { - IntervalTree tree = setupBaseIntervalTree(); + IntervalTree tree = setupTree(baseData); Assert.assertEquals("Size", 6, tree.size()); } + @Test + public void testAdd() + { + IntervalTree tree = setupTree(baseData); + compareData(baseData, tree); + } + @Test public void testMatch() { - IntervalTree tree = setupBaseIntervalTree(); + IntervalTree tree = setupTree(baseData); Map entries = tree.findEncompassing(Intervals.of("2025-01-04T00:00:00/P1D")); Assert.assertEquals(1, entries.size()); - Assert.assertEquals("Match", "v4", entries.get(Intervals.of("2025-01-04T00:00:00/P1D"))); + Assert.assertEquals("Match", "v5", entries.get(Intervals.of("2025-01-04T00:00:00/P1D"))); } @Test public void testNoMatch() { - IntervalTree tree = setupBaseIntervalTree(); + IntervalTree tree = setupTree(baseData); Map entries = tree.findEncompassing(Intervals.of("2025-01-07T00:00:00/P1D")); Assert.assertEquals(0, entries.size()); } - @Test public void testOverlap() { - IntervalTree tree = setupOverlapIntervalTree(); + IntervalTree tree = setupTree(overlapData); Map entries = tree.findEncompassing(Intervals.of("2025-01-02T09:00:00/PT1H")); Assert.assertEquals(2, entries.size()); - Assert.assertEquals("Day match", "v2", entries.get(Intervals.of("2025-01-02T00:00:00/P1D"))); + Assert.assertEquals("Day match", "v4", entries.get(Intervals.of("2025-01-02T00:00:00/P1D"))); Assert.assertEquals("Year match", "v7", entries.get(Intervals.of("2025-01-01T00:00:00/P1Y"))); } @Test public void testSparseOverlap() { - IntervalTree tree = setupSparseOverlapTree(); + IntervalTree tree = setupTree(sparseOverlapData); Map entries = tree.findEncompassing(Intervals.of("2025-06-03T00:00:00/P1D")); Assert.assertEquals(3, entries.size()); @@ -84,7 +100,7 @@ public void testSparseOverlap() @Test public void testRemove() { - IntervalTree tree = setupBaseIntervalTree(); + IntervalTree tree = setupTree(baseData); tree.remove(Intervals.of("2025-01-02T00:00:00/P1D")); Assert.assertEquals("Size", 5, tree.size()); } @@ -92,18 +108,18 @@ public void testRemove() @Test public void testRemoveRootAndMatch() { - IntervalTree tree = setupBaseIntervalTree(); + IntervalTree tree = setupTree(baseData); tree.remove(Intervals.of("2025-01-03T00:00:00/P1D")); Assert.assertEquals("Size", 5, tree.size()); Map entries = tree.findEncompassing(Intervals.of("2025-01-04T00:00:00/P1D")); Assert.assertEquals(1, entries.size()); - Assert.assertEquals("Match", "v4", entries.get(Intervals.of("2025-01-04T00:00:00/P1D"))); + Assert.assertEquals("Match", "v5", entries.get(Intervals.of("2025-01-04T00:00:00/P1D"))); } @Test public void testRemoveMultiple() { - IntervalTree tree = setupSparseOverlapTree(); + IntervalTree tree = setupTree(sparseOverlapData); int isize = tree.size(); tree.remove(Intervals.of("2025-01-12T00:00:00/P1D")); tree.remove(Intervals.of("2025-06-03T00:00:00/P1D")); @@ -115,96 +131,125 @@ public void testRemoveMultiple() @Test public void testClear() { - IntervalTree tree = setupBaseIntervalTree(); + IntervalTree tree = setupTree(baseData); tree.clear(); Assert.assertEquals("Size", 0, tree.size()); } - /* @Test - public void testTree() - { - IntervalTree tree = new IntervalTree<>(Comparators.intervalsByStartThenEnd(), Comparators.intervalsByEndThenStart()); - Map data = new HashMap<>(); - Random random = new Random(); - for (int i = 0; i < 10; i++) { - //int month = random.nextInt(12) + 1; - //int day = random.nextInt(28) + 1; - int month = 1; - int day = i + 1; - String timestr = "2024-" + month + "-" + day + "T00:00:00/P" + (i + 1) + "D"; - Interval interval = Intervals.of(timestr); - String value = "v" + i; - tree.add(interval, value); - data.put(interval, value); - } - for (Map.Entry entry : data.entrySet()) { - System.out.println(entry.getKey() + ": " + entry.getValue()); + public void testLargeLoadTree() + { + IntervalTree tree = new IntervalTree<>(Comparators.intervalsByStart(), Comparators.intervalsByEnd()); + List> expectedData = new ArrayList<>(); + Set existingIntervals = new HashSet<>(); + Random random = ThreadLocalRandom.current(); + int total = 100000; + int count = 0; + while (count < total) { + int year = random.nextInt(26) + 2000; + int month = random.nextInt(12) + 1; + int day = random.nextInt(28) + 1; + int hour = random.nextInt(23) + 1; + String intervalstr = year + "-" + month + "-" + day + "T" + hour + ":00:00/P" + ((count % 30) + 1) + "D"; + if (!existingIntervals.contains(intervalstr)) { + Interval interval = Intervals.of(intervalstr); + String value = "v" + count; + tree.add(interval, value); + expectedData.add(Pair.of(interval, value)); + existingIntervals.add(intervalstr); + ++count; + } } - /--* - List> intervals = tree.findEncompassing(Intervals.of("2024-01-04T10:00:00/PT1H")); - System.out.println("Matched intervals"); - for (IntervalTree.Entry entry : intervals) { - System.out.println(entry.interval + ": " + entry.value); - } - *--/ + Assert.assertEquals("Size", total, tree.size()); + compareData(expectedData, tree); + } + + @Test + public void testAutoRebalance() throws JsonProcessingException + { + IntervalTree tree = setupTree(sparseOverlapData); + Assert.assertEquals("Height", 4, tree.height()); + compareData(sparseOverlapData, tree); } - */ @Test - public void testRebalance() throws JsonProcessingException + public void testManualRebalance() throws JsonProcessingException { - IntervalTree tree = setupSparseOverlapTree(60); - System.out.println(tree.height()); + // Set a high threshold so auto-rebalance does not happen + IntervalTree tree = setupTree(sparseOverlapData, t -> t.setImbalanceTolerance(100)); + Assert.assertEquals("Height", 6, tree.height()); + compareData(sparseOverlapData, tree); tree.rebalance(); - System.out.println(tree.height()); - System.out.println(tree.print()); + Assert.assertEquals("Height", 3, tree.height()); + compareData(sparseOverlapData, tree); } - private IntervalTree setupBaseIntervalTree() + private void compareData(List> inputData, IntervalTree tree) { - IntervalTree tree = new IntervalTree<>(Comparators.intervalsByStart(), Comparators.intervalsByEnd()); - tree.add(Intervals.of("2025-01-03T00:00:00/P1D"), "v3"); - tree.add(Intervals.of("2025-01-01T00:00:00/P1D"), "v1"); - tree.add(Intervals.of("2025-01-02T00:00:00/P1D"), "v2"); - tree.add(Intervals.of("2025-01-04T00:00:00/P1D"), "v4"); - tree.add(Intervals.of("2025-01-05T00:00:00/P1D"), "v5"); - tree.add(Intervals.of("2025-01-06T00:00:00/P1D"), "v6"); + Iterator> iterator = tree.inOrderTraverse(); - return tree; + List> expected = inputData.stream() + .sorted((p1, p2) -> Comparators.intervalsByStart().compare(p1.lhs, p2.lhs)) + .collect(Collectors.toList()); + + compareEntries(expected.iterator(), iterator); } - private IntervalTree setupOverlapIntervalTree() + private void compareEntries(Iterator> expected, Iterator> actual) { - IntervalTree tree = setupBaseIntervalTree(); - tree.add(Intervals.of("2025-01-01T00:00:00/P1Y"), "v7"); + while (actual.hasNext()) { + Assert.assertTrue("Entry available", expected.hasNext()); + Pair expectedEntry = expected.next(); + IntervalTree.Entry actualEntry = actual.next(); + Assert.assertEquals("Interval match", expectedEntry.lhs, actualEntry.interval); + Assert.assertEquals("Value match", expectedEntry.rhs, actualEntry.value); + } + Assert.assertFalse("Matched all entries", expected.hasNext()); + } - return tree; + static List> baseData = new ArrayList<>(); + static List> overlapData = new ArrayList<>(); + static List> sparseOverlapData = new ArrayList<>(); + + static { + + baseData.add(Pair.of(Intervals.of("2025-01-03T00:00:00/P1D"), "v1")); + baseData.add(Pair.of(Intervals.of("2025-01-05T00:00:00/P1D"), "v2")); + baseData.add(Pair.of(Intervals.of("2025-01-01T00:00:00/P1D"), "v3")); + baseData.add(Pair.of(Intervals.of("2025-01-02T00:00:00/P1D"), "v4")); + baseData.add(Pair.of(Intervals.of("2025-01-04T00:00:00/P1D"), "v5")); + baseData.add(Pair.of(Intervals.of("2025-01-06T00:00:00/P1D"), "v6")); + + overlapData.addAll(baseData); + overlapData.add(Pair.of(Intervals.of("2025-01-01T00:00:00/P1Y"), "v7")); + + sparseOverlapData.add(Pair.of(Intervals.of("2025-01-01T00:00:00/P1D"), "v1")); + sparseOverlapData.add(Pair.of(Intervals.of("2025-02-01T00:00:00/P1D"), "v2")); + sparseOverlapData.add(Pair.of(Intervals.of("2025-01-12T00:00:00/P1D"), "v3")); + sparseOverlapData.add(Pair.of(Intervals.of("2025-07-12T00:00:00/P1D"), "v4")); + sparseOverlapData.add(Pair.of(Intervals.of("2025-06-03T00:00:00/P1D"), "v5")); + sparseOverlapData.add(Pair.of(Intervals.of("2025-08-09T00:00:00/P1D"), "v6")); + sparseOverlapData.add(Pair.of(Intervals.of("2025-09-04T00:00:00/P1D"), "v7")); + sparseOverlapData.add(Pair.of(Intervals.of("2025-04-02T00:00:00/P1D"), "v8")); + sparseOverlapData.add(Pair.of(Intervals.of("2025-05-10T00:00:00/P1M"), "v9")); + sparseOverlapData.add(Pair.of(Intervals.of("2025-10-06T00:00:00/P1M"), "v10")); + sparseOverlapData.add(Pair.of(Intervals.of("2025-06-01T00:00:00/P1M"), "v11")); } - private IntervalTree setupSparseOverlapTree() + private IntervalTree setupTree(List> inputData) { - return setupSparseOverlapTree(null); + return setupTree(inputData, null); } - private IntervalTree setupSparseOverlapTree(Integer imbalanceTolerance) + private IntervalTree setupTree(List> inputData, Consumer> setupFunc) { IntervalTree tree = new IntervalTree<>(Comparators.intervalsByStart(), Comparators.intervalsByEnd()); - if (imbalanceTolerance != null) { - tree.setImbalanceTolerance(imbalanceTolerance); + if (setupFunc != null) { + setupFunc.accept(tree); + } + for (Pair entry : inputData) { + tree.add(entry.lhs, entry.rhs); } - tree.add(Intervals.of("2025-01-01T00:00:00/P1D"), "v1"); - tree.add(Intervals.of("2025-02-01T00:00:00/P1D"), "v2"); - tree.add(Intervals.of("2025-01-12T00:00:00/P1D"), "v3"); - tree.add(Intervals.of("2025-07-12T00:00:00/P1D"), "v4"); - tree.add(Intervals.of("2025-06-03T00:00:00/P1D"), "v5"); - tree.add(Intervals.of("2025-08-09T00:00:00/P1D"), "v6"); - tree.add(Intervals.of("2025-09-04T00:00:00/P1D"), "v7"); - tree.add(Intervals.of("2025-04-02T00:00:00/P1D"), "v8"); - tree.add(Intervals.of("2025-05-10T00:00:00/P1M"), "v9"); - tree.add(Intervals.of("2025-10-06T00:00:00/P1M"), "v10"); - tree.add(Intervals.of("2025-06-01T00:00:00/P1M"), "v11"); - return tree; } From 54b0a4e1dd8c994056809d21e9bb2a9d1fc0d8c7 Mon Sep 17 00:00:00 2001 From: Pramod Immaneni Date: Mon, 13 Oct 2025 10:42:10 -0700 Subject: [PATCH 07/29] Updated doc --- .../apache/druid/timeline/IntervalTree.java | 36 +++++++++---------- 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/timeline/IntervalTree.java b/processing/src/main/java/org/apache/druid/timeline/IntervalTree.java index 90e2de81e679..4f95b2e5e12b 100644 --- a/processing/src/main/java/org/apache/druid/timeline/IntervalTree.java +++ b/processing/src/main/java/org/apache/druid/timeline/IntervalTree.java @@ -32,16 +32,21 @@ /** * A variation of Interval Trees (https://en.wikipedia.org/wiki/Interval_tree) - * Custom implementation for faster search and specific joda Interval comparator arithmetic used in project + * Custom optimizations for faster interval search and additional support for specific joda Interval comparator + * arithmetic used in the project *

*

- * Multiple intervals are added to the tree and a value can also be associated with each interval. These are stored in - * nodes in the tree along with additional state. The tree can then be searched for intervals matching a given interval. - * A match is any interval in the tree that fully encompasses the given interval. There can be multiple results. + * Multiple intervals can be added to the tree. The tree can then be searched for intervals matching a given interval. + * A match is any interval that fully encompasses or exactly matches the given interval, leading for the search to + * return multiple results. Using the tree, reduces the search time from O(N) iterating through all the intervals to + * find all the matches, to roughly O(log2(N)). Furthermore, a value can be associated with each interval, which is also + * returned during the search. + * *

- * The tree is a binary search tree sorted by interval start time. Additional state containing the minimum and maximum - * interval bounds of the entire subtree under a node, is stored on each node. This state helps speed up search for - * matching intervals by skipping unsuitable subtrees that won't have a match. + * The tree is a binary search tree sorted by interval start time. The intervals are stored as nodes in the tree. + * Additional state containing the minimum and maximum interval bounds of the entire subtree under a node, is also tored + * in each node. This helps speed up the search for matching intervals by skipping unsuitable subtrees that will not + * contain a matching candidate interval. *

* * Not thread safe @@ -203,12 +208,12 @@ private void findEncompassing(Node node, Interval interval, Map return; } - /* - if ((comparator.compare(interval, node.min) < 0) - || (highComparator.compare(node.max, interval) < 0)) { - return; - } - */ + /* + if ((comparator.compare(interval, node.min) < 0) + || (highComparator.compare(node.max, interval) < 0)) { + return; + } + */ if (node.interval.contains(interval)) { //result.add(new Entry<>(node.interval, node.value)); @@ -247,11 +252,6 @@ public void remove(Interval interval) private Node removeNode(Node node, Interval interval) { - /* - if ((node == null) || comparator.compare(node.interval, interval) == 0) { - return null; - } - */ if (node == null) { return null; } From 982b238465dfd3da0d092a7e164f68ae727ed96e Mon Sep 17 00:00:00 2001 From: Pramod Immaneni Date: Mon, 13 Oct 2025 14:42:30 -0700 Subject: [PATCH 08/29] Cleaned up some comments and names --- .../apache/druid/timeline/IntervalTree.java | 43 ++++++++++--------- 1 file changed, 23 insertions(+), 20 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/timeline/IntervalTree.java b/processing/src/main/java/org/apache/druid/timeline/IntervalTree.java index 4f95b2e5e12b..4d86d2f97c07 100644 --- a/processing/src/main/java/org/apache/druid/timeline/IntervalTree.java +++ b/processing/src/main/java/org/apache/druid/timeline/IntervalTree.java @@ -37,24 +37,30 @@ *

*

* Multiple intervals can be added to the tree. The tree can then be searched for intervals matching a given interval. - * A match is any interval that fully encompasses or exactly matches the given interval, leading for the search to - * return multiple results. Using the tree, reduces the search time from O(N) iterating through all the intervals to - * find all the matches, to roughly O(log2(N)). Furthermore, a value can be associated with each interval, which is also - * returned during the search. + * A match is any interval that fully encompasses or exactly matches the given interval, leading the search to + * potentially return multiple results. Using the tree, reduces the search time from O(N) from iterating through all the + * intervals, to roughly O(log2(N)). Furthermore, a value can be associated with each interval, which is also returned + * during the search. * *

* The tree is a binary search tree sorted by interval start time. The intervals are stored as nodes in the tree. - * Additional state containing the minimum and maximum interval bounds of the entire subtree under a node, is also tored - * in each node. This helps speed up the search for matching intervals by skipping unsuitable subtrees that will not - * contain a matching candidate interval. + * Additional state containing the minimum and maximum interval bounds of the entire subtree under a node, is also + * stored in each node. This helps speed up the search for matching intervals by skipping unsuitable subtrees that will + * not contain a matching candidate interval. + * + * To optimize the balancing cost w.r.t the operation time, the tree is not balanced on every modification operation. + * Rather a configurable imbalance tolerance from the theoretical ideal height of log2(N) is allowed, breaching which + * triggers the rebalance. *

* * Not thread safe */ public class IntervalTree { + // The compartor for comparing the interval start timnes Comparator comparator; - Comparator highComparator; + // The comparator for comparing interval end times + Comparator endComparator; @VisibleForTesting Node root; @@ -64,15 +70,10 @@ public class IntervalTree // percentage of ideal height int imbalanceTolerance = 50; - public IntervalTree(Comparator comparator) - { - this(comparator, comparator); - } - - public IntervalTree(Comparator comparator, Comparator highComparator) + public IntervalTree(Comparator comparator, Comparator endComparator) { this.comparator = comparator; - this.highComparator = highComparator; + this.endComparator = endComparator; } public int getImbalanceTolerance() @@ -184,7 +185,7 @@ private Node insert(Node node, Interval interval, T value) node.min = interval; } - if (highComparator.compare(node.max, interval) < 0) { + if (endComparator.compare(node.max, interval) < 0) { node.max = interval; } @@ -220,8 +221,10 @@ private void findEncompassing(Node node, Interval interval, Map result.put(node.interval, node.value); } - // Matches can be found on both left and right side as the given interval start needs to be just greater - // than a node start and end less than the node end + // If given interval start is greater than or equal to current interval start, matches can still be found on both + // left and right as the given interval only needs to be encompassed. + // If the given interval start is less than current, then we don't need to search the right + // To keep it uniform searching on both sides as we will quickly elimate unsuitable subtrees with the bounds check // Look for potential candidates in left and right subtrees // If interval falls outside the min to max range of the subtree don't follow the subtree @@ -240,7 +243,7 @@ private void findEncompassing(Node node, Interval interval, Map private boolean isIntervalInBounds(Node node, Interval interval) { return (comparator.compare(node.min, interval) <= 0) - && (highComparator.compare(node.max, interval) >= 0); + && (endComparator.compare(node.max, interval) >= 0); } @@ -388,7 +391,7 @@ private Interval maxInterval(Interval interval, Node... nodes) Interval max = interval; for (Node node : nodes) { if (node != null) { - if (highComparator.compare(node.max, max) > 0) { + if (endComparator.compare(node.max, max) > 0) { max = node.max; } } From 99d6628d8d601e4ab3f35445781479fd7968e7d2 Mon Sep 17 00:00:00 2001 From: Pramod Immaneni Date: Mon, 13 Oct 2025 16:53:25 -0700 Subject: [PATCH 09/29] Overwriting value if there is an exact interval match with add --- .../org/apache/druid/timeline/IntervalTree.java | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/timeline/IntervalTree.java b/processing/src/main/java/org/apache/druid/timeline/IntervalTree.java index 4d86d2f97c07..6f6749e28b72 100644 --- a/processing/src/main/java/org/apache/druid/timeline/IntervalTree.java +++ b/processing/src/main/java/org/apache/druid/timeline/IntervalTree.java @@ -119,7 +119,6 @@ static class Node Node left; Node right; - @Override public String toString() { @@ -147,7 +146,6 @@ public String print(int level) sb.append(prefix).append("}"); return sb.toString(); } - } public void add(Interval interval, T value) @@ -170,6 +168,12 @@ private Node insert(Node node, Interval interval, T value) return node; } + // If exact interval already exists, just replace the value + if (doesMatch(node, interval)) { + node.value = value; + return node; + } + // If start of interval matches with node sending to right to preserve stability during in order traversal retrieval if (comparator.compare(interval, node.interval) < 0) { node.left = insert(node.left, interval, value); @@ -240,6 +244,11 @@ private void findEncompassing(Node node, Interval interval, Map } } + private boolean doesMatch(Node node, Interval interval) + { + return (comparator.compare(node.interval, interval) == 0) && (endComparator.compare(node.interval, interval) == 0); + } + private boolean isIntervalInBounds(Node node, Interval interval) { return (comparator.compare(node.min, interval) <= 0) From ac4b8e047008e3074acbaf3b3f80a0ea9ef76416 Mon Sep 17 00:00:00 2001 From: Pramod Immaneni Date: Tue, 14 Oct 2025 15:58:32 -0700 Subject: [PATCH 10/29] Addressing review comments --- .../apache/druid/timeline/IntervalTree.java | 92 ++++++++++++------- .../druid/timeline/IntervalTreeTest.java | 92 ++++++++++++++++++- 2 files changed, 148 insertions(+), 36 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/timeline/IntervalTree.java b/processing/src/main/java/org/apache/druid/timeline/IntervalTree.java index 6f6749e28b72..d423cff15380 100644 --- a/processing/src/main/java/org/apache/druid/timeline/IntervalTree.java +++ b/processing/src/main/java/org/apache/druid/timeline/IntervalTree.java @@ -19,6 +19,7 @@ package org.apache.druid.timeline; +import org.apache.druid.java.util.common.StringUtils; import org.jetbrains.annotations.VisibleForTesting; import org.joda.time.Interval; @@ -29,7 +30,6 @@ import java.util.List; import java.util.Map; - /** * A variation of Interval Trees (https://en.wikipedia.org/wiki/Interval_tree) * Custom optimizations for faster interval search and additional support for specific joda Interval comparator @@ -58,7 +58,7 @@ public class IntervalTree { // The compartor for comparing the interval start timnes - Comparator comparator; + Comparator startComparator; // The comparator for comparing interval end times Comparator endComparator; @@ -70,9 +70,9 @@ public class IntervalTree // percentage of ideal height int imbalanceTolerance = 50; - public IntervalTree(Comparator comparator, Comparator endComparator) + public IntervalTree(Comparator startComparator, Comparator endComparator) { - this.comparator = comparator; + this.startComparator = startComparator; this.endComparator = endComparator; } @@ -119,21 +119,28 @@ static class Node Node left; Node right; - @Override - public String toString() - { - return "Node{" + - "interval=" + interval + - ", value=" + value + - ", min=" + min + - ", max=" + max + - ", left=" + left + - ", right=" + right + - '}'; - } + private static final String PRINT_FORMAT = "{\n" + + "%sinterval = %s\n" + + "%svalue = %s\n" + + "%sheight = %d\n" + + "%smin = %s\n" + + "%smax = %s\n" + + "%sleft = %s\n" + + "%sright = %s\n" + + "%s}"; public String print(int level) { + String prefix = "\t".repeat(level); + String eprefix = "\t".repeat(level - 1); + return StringUtils.format(PRINT_FORMAT, + prefix, interval, prefix, value, prefix, height, + prefix, min, prefix, max, + prefix, (left != null) ? left.print(level + 1) : null, + prefix, (right != null) ? right.print(level + 1) : null, + eprefix + ); + /* StringBuilder sb = new StringBuilder(); String prefix = "\t".repeat(level); sb.append(prefix).append("{").append("\n"); @@ -145,6 +152,7 @@ public String print(int level) sb.append(prefix).append("right = ").append((right != null) ? right.print(level + 1) : null).append("\n"); sb.append(prefix).append("}"); return sb.toString(); + */ } } @@ -175,7 +183,7 @@ private Node insert(Node node, Interval interval, T value) } // If start of interval matches with node sending to right to preserve stability during in order traversal retrieval - if (comparator.compare(interval, node.interval) < 0) { + if (startComparator.compare(interval, node.interval) < 0) { node.left = insert(node.left, interval, value); } else { node.right = insert(node.right, interval, value); @@ -185,7 +193,7 @@ private Node insert(Node node, Interval interval, T value) int rheight = (node.right != null) ? node.right.height : -1; node.height = Math.max(lheight, rheight) + 1; - if (comparator.compare(interval, node.min) < 0) { + if (startComparator.compare(interval, node.min) < 0) { node.min = interval; } @@ -196,16 +204,13 @@ private Node insert(Node node, Interval interval, T value) return node; } - //public List> findEncompassing(Interval interval) public Map findEncompassing(Interval interval) { - //List> result = new ArrayList<>(); Map result = new HashMap<>(); findEncompassing(root, interval, result); return result; } - //private void findEncompassing(Node node, Interval interval, List> result) private void findEncompassing(Node node, Interval interval, Map result) { @@ -228,10 +233,8 @@ private void findEncompassing(Node node, Interval interval, Map // If given interval start is greater than or equal to current interval start, matches can still be found on both // left and right as the given interval only needs to be encompassed. // If the given interval start is less than current, then we don't need to search the right - // To keep it uniform searching on both sides as we will quickly elimate unsuitable subtrees with the bounds check - - // Look for potential candidates in left and right subtrees - // If interval falls outside the min to max range of the subtree don't follow the subtree + // To keep it uniform looking for potential candidates in both left and right subtrees + // If interval falls outside the min to max range of a subtree we quickly eliminate it and not follow it // Search left if ((node.left != null) && isIntervalInBounds(node.left, interval)) { @@ -246,12 +249,12 @@ private void findEncompassing(Node node, Interval interval, Map private boolean doesMatch(Node node, Interval interval) { - return (comparator.compare(node.interval, interval) == 0) && (endComparator.compare(node.interval, interval) == 0); + return (startComparator.compare(node.interval, interval) == 0) && (endComparator.compare(node.interval, interval) == 0); } private boolean isIntervalInBounds(Node node, Interval interval) { - return (comparator.compare(node.min, interval) <= 0) + return (startComparator.compare(node.min, interval) <= 0) && (endComparator.compare(node.max, interval) >= 0); } @@ -271,8 +274,15 @@ private Node removeNode(Node node, Interval interval) if (node.interval.equals(interval)) { --size; if ((node.left != null) && (node.right != null)) { - makeLeftChild(node.right, node.left); - return node.right; + // Make the right most child of the left node the new node at current level + Node newNode = unlinkRightLeaf(node.left); + // If left node did not have any right children, it is the matching candidate + if (node.left != newNode) { + newNode.left = node.left; + } + newNode.right = node.right; + recomputeState(newNode); + return newNode; } else if (node.left != null) { return node.left; } else if (node.right != null) { @@ -281,7 +291,7 @@ private Node removeNode(Node node, Interval interval) return null; } - if (comparator.compare(interval, node.interval) <= 0) { + if (startComparator.compare(interval, node.interval) < 0) { node.left = removeNode(node.left, interval); } else { node.right = removeNode(node.right, interval); @@ -292,6 +302,22 @@ private Node removeNode(Node node, Interval interval) return node; } + private Node unlinkRightLeaf(Node node) + { + if (node.right == null) { + return node; + } else { + Node rnode = unlinkRightLeaf(node.right); + // If the right node has a left child, make it new right child + if (rnode == node.right) { + node.right = rnode.left; + rnode.left = null; + } + recomputeState(node); + return rnode; + } + } + private void makeLeftChild(Node node, Node childNode) { if (node.left == null) { @@ -305,7 +331,7 @@ private void makeLeftChild(Node node, Node childNode) @VisibleForTesting Iterator> inOrderTraverse() { - List> nodes = new ArrayList<>(); + List> nodes = new ArrayList<>(size); inOrderTraverse(root, nodes); return nodes.stream().map(node -> new Entry(node.interval, node.value)).iterator(); } @@ -313,7 +339,7 @@ Iterator> inOrderTraverse() public void rebalance() { // In order traversal followed by repeated binary segmentation of the list - List> nodes = new ArrayList<>(); + List> nodes = new ArrayList<>(size); inOrderTraverse(root, nodes); root = constructTree(nodes, 0, nodes.size()); } @@ -414,7 +440,7 @@ private Interval minInterval(Interval interval, Node... nodes) Interval min = interval; for (Node node : nodes) { if (node != null) { - if (comparator.compare(node.min, min) <= 0) { + if (startComparator.compare(node.min, min) <= 0) { min = node.min; } } diff --git a/processing/src/test/java/org/apache/druid/timeline/IntervalTreeTest.java b/processing/src/test/java/org/apache/druid/timeline/IntervalTreeTest.java index f936adfedc14..052d30b24d19 100644 --- a/processing/src/test/java/org/apache/druid/timeline/IntervalTreeTest.java +++ b/processing/src/test/java/org/apache/druid/timeline/IntervalTreeTest.java @@ -100,9 +100,47 @@ public void testSparseOverlap() @Test public void testRemove() { - IntervalTree tree = setupTree(baseData); - tree.remove(Intervals.of("2025-01-02T00:00:00/P1D")); - Assert.assertEquals("Size", 5, tree.size()); + IntervalTree tree = setupTree(sparseOverlapData); + int size = tree.size(); + + // Remove leaf + String intervalstr = "2025-06-01T00:00:00/P1M"; + Map entries = tree.findEncompassing(Intervals.of(intervalstr)); + Assert.assertEquals("Value size", 1, entries.size()); + String value = entries.values().iterator().next(); + + tree.remove(Intervals.of(intervalstr)); + size--; + Assert.assertEquals("Size", size, tree.size()); + List> expectedData = new ArrayList<>(sparseOverlapData); + expectedData.remove(Pair.of(Intervals.of(intervalstr), value)); + compareData(expectedData, tree); + + // Remove node in penultimate level + intervalstr = "2025-09-04T00:00:00/P1D"; + entries = tree.findEncompassing(Intervals.of(intervalstr)); + Assert.assertEquals("Value size", 1, entries.size()); + value = entries.values().iterator().next(); + + tree.remove(Intervals.of(intervalstr)); + size--; + Assert.assertEquals("Size", size, tree.size()); + expectedData = new ArrayList<>(expectedData); + expectedData.remove(Pair.of(Intervals.of(intervalstr), value)); + compareData(expectedData, tree); + + // Remove node at a higher level + intervalstr = "2025-07-12T00:00:00/P1D"; + entries = tree.findEncompassing(Intervals.of(intervalstr)); + Assert.assertEquals("Value size", 1, entries.size()); + value = entries.values().iterator().next(); + + tree.remove(Intervals.of(intervalstr)); + size--; + Assert.assertEquals("Size", size, tree.size()); + expectedData = new ArrayList<>(expectedData); + expectedData.remove(Pair.of(Intervals.of(intervalstr), value)); + compareData(expectedData, tree); } @Test @@ -164,6 +202,54 @@ public void testLargeLoadTree() compareData(expectedData, tree); } + /* + @Test + public void testPerf() + { + IntervalTree tree = new IntervalTree<>(Comparators.intervalsByStart(), Comparators.intervalsByEnd()); + List> expectedData = new ArrayList<>(); + Map mappedData = new HashMap<>(); + Set existingIntervals = new HashSet<>(); + Random random = ThreadLocalRandom.current(); + int total = 10000; + int count = 0; + while (count < total) { + int year = random.nextInt(26) + 2000; + int month = random.nextInt(12) + 1; + int day = random.nextInt(28) + 1; + int hour = random.nextInt(23) + 1; + String intervalstr = year + "-" + month + "-" + day + "T" + hour + ":00:00/P" + ((count % 30) + 1) + "D"; + if (!existingIntervals.contains(intervalstr)) { + Interval interval = Intervals.of(intervalstr); + String value = "v" + count; + tree.add(interval, value); + mappedData.put(interval, value); + expectedData.add(Pair.of(interval, value)); + existingIntervals.add(intervalstr); + ++count; + } + } + long start = System.currentTimeMillis(); + for (int i = 0; i < total; i++) { + Pair pair = expectedData.get(i); + Interval interval = pair.lhs; + for (Map.Entry entry : mappedData.entrySet()) { + if (entry.getKey().contains(interval)) { + break; + } + } + } + System.out.println("Seq find time " + (System.currentTimeMillis() - start)); + start = System.currentTimeMillis(); + for (int i = 0; i < total; i++) { + Pair pair = expectedData.get(i); + Interval interval = pair.lhs; + tree.findEncompassing(interval); + } + System.out.println("Tree find time " + (System.currentTimeMillis() - start)); + } + */ + @Test public void testAutoRebalance() throws JsonProcessingException { From 42cafffa160355acd7884dd962cdcdb52d64225c Mon Sep 17 00:00:00 2001 From: Pramod Immaneni Date: Tue, 14 Oct 2025 16:06:14 -0700 Subject: [PATCH 11/29] Added feature flag to control use of interval tree for matching segments --- .../timeline/VersionedIntervalTimeline.java | 40 ++++++++++++++----- 1 file changed, 30 insertions(+), 10 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/timeline/VersionedIntervalTimeline.java b/processing/src/main/java/org/apache/druid/timeline/VersionedIntervalTimeline.java index f94a4043a7f7..9203c87c60d9 100644 --- a/processing/src/main/java/org/apache/druid/timeline/VersionedIntervalTimeline.java +++ b/processing/src/main/java/org/apache/druid/timeline/VersionedIntervalTimeline.java @@ -97,6 +97,8 @@ public class VersionedIntervalTimeline versionComparator) { this(versionComparator, false); @@ -211,7 +213,9 @@ public void addAll(final Iterator> TreeMap versionEntry = new TreeMap<>(versionComparator); versionEntry.put(version, entry); allTimelineEntries.put(interval, versionEntry); - allTimeIntervals.add(interval, versionEntry); + if (useTreeIntervalMatch) { + allTimeIntervals.add(interval, versionEntry); + } numObjects.incrementAndGet(); } else { entry = exists.get(version); @@ -271,7 +275,9 @@ public PartitionChunk remove(Interval interval, VersionType version, versionEntries.remove(version); if (versionEntries.isEmpty()) { allTimelineEntries.remove(interval); - allTimeIntervals.remove(interval); + if (useTreeIntervalMatch) { + allTimeIntervals.remove(interval); + } } remove(incompletePartitionsTimeline, interval, entry, true); @@ -302,16 +308,30 @@ public PartitionChunk findChunk(Interval interval, VersionType versi } } - // If an exact interval match is not found look for a matching range - Map> possibleMatches = allTimeIntervals.findEncompassing(interval); - for (Entry> entry : possibleMatches.entrySet()) { - Interval eninterval = entry.getKey(); - if (eninterval.contains(interval)) { - TimelineEntry foundEntry = entry.getValue().get(version); - if (foundEntry != null) { - return foundEntry.getPartitionHolder().getChunk(partitionNum); + // If an exact interval match is not found search for an encapsulating interval + + // If tree search is enabled use it else revert to checking all intervals + if (useTreeIntervalMatch) { + Map> possibleMatches = allTimeIntervals.findEncompassing(interval); + for (Entry> entry : possibleMatches.entrySet()) { + Interval eninterval = entry.getKey(); + if (eninterval.contains(interval)) { + TimelineEntry foundEntry = entry.getValue().get(version); + if (foundEntry != null) { + return foundEntry.getPartitionHolder().getChunk(partitionNum); + } } } + } else { + for (Entry> entry : allTimelineEntries.entrySet()) { + if (entry.getKey().contains(interval)) { + TimelineEntry foundEntry = entry.getValue().get(version); + if (foundEntry != null) { + return foundEntry.getPartitionHolder().getChunk(partitionNum); + } + } + } + } return null; From de9138b3013484e810e65c5669ef465b3cc668c7 Mon Sep 17 00:00:00 2001 From: Pramod Immaneni Date: Mon, 20 Oct 2025 12:43:50 -0700 Subject: [PATCH 12/29] Using a single interval field for storing the min to max range for a node --- .../apache/druid/timeline/IntervalTree.java | 76 ++++++++++++++----- 1 file changed, 55 insertions(+), 21 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/timeline/IntervalTree.java b/processing/src/main/java/org/apache/druid/timeline/IntervalTree.java index d423cff15380..bcab59c56922 100644 --- a/processing/src/main/java/org/apache/druid/timeline/IntervalTree.java +++ b/processing/src/main/java/org/apache/druid/timeline/IntervalTree.java @@ -36,9 +36,9 @@ * arithmetic used in the project *

*

- * Multiple intervals can be added to the tree. The tree can then be searched for intervals matching a given interval. - * A match is any interval that fully encompasses or exactly matches the given interval, leading the search to - * potentially return multiple results. Using the tree, reduces the search time from O(N) from iterating through all the + * Multiple intervals can be added to the tree, and an interval can be searched to find all matching intervals in the + * tree. A match is any interval that fully encompasses or exactly matches the given interval, leading the search to + * potentially return multiple results. Using the tree, reduces the search time from O(N) iterating through all the * intervals, to roughly O(log2(N)). Furthermore, a value can be associated with each interval, which is also returned * during the search. * @@ -51,6 +51,8 @@ * To optimize the balancing cost w.r.t the operation time, the tree is not balanced on every modification operation. * Rather a configurable imbalance tolerance from the theoretical ideal height of log2(N) is allowed, breaching which * triggers the rebalance. + * + * Not thread safe. *

* * Not thread safe @@ -114,8 +116,10 @@ static class Node T value; int height; // The min and max of the range for the subtree - Interval min; - Interval max; + //Interval min; + //Interval max; + // The full interval range of the subtree formed by this Node + Interval range; Node left; Node right; @@ -123,8 +127,9 @@ static class Node + "%sinterval = %s\n" + "%svalue = %s\n" + "%sheight = %d\n" - + "%smin = %s\n" - + "%smax = %s\n" + //+ "%smin = %s\n" + //+ "%smax = %s\n" + + "%srange = %s\n" + "%sleft = %s\n" + "%sright = %s\n" + "%s}"; @@ -135,24 +140,11 @@ public String print(int level) String eprefix = "\t".repeat(level - 1); return StringUtils.format(PRINT_FORMAT, prefix, interval, prefix, value, prefix, height, - prefix, min, prefix, max, + prefix, range, //min, prefix, max, prefix, (left != null) ? left.print(level + 1) : null, prefix, (right != null) ? right.print(level + 1) : null, eprefix ); - /* - StringBuilder sb = new StringBuilder(); - String prefix = "\t".repeat(level); - sb.append(prefix).append("{").append("\n"); - sb.append(prefix).append("interval = ").append(interval).append("\n"); - sb.append(prefix).append("value = ").append(value).append("\n"); - sb.append(prefix).append("min = ").append(min).append("\n"); - sb.append(prefix).append("max = ").append(max).append("\n"); - sb.append(prefix).append("left = ").append((left != null) ? left.print(level + 1) : null).append("\n"); - sb.append(prefix).append("right = ").append((right != null) ? right.print(level + 1) : null).append("\n"); - sb.append(prefix).append("}"); - return sb.toString(); - */ } } @@ -170,8 +162,11 @@ private Node insert(Node node, Interval interval, T value) node.interval = interval; node.value = value; node.height = 0; + /* node.min = interval; node.max = interval; + */ + node.range = interval; ++size; return node; } @@ -193,13 +188,24 @@ private Node insert(Node node, Interval interval, T value) int rheight = (node.right != null) ? node.right.height : -1; node.height = Math.max(lheight, rheight) + 1; + /* if (startComparator.compare(interval, node.min) < 0) { node.min = interval; } + */ + + if (startComparator.compare(interval, node.range) < 0) { + node.range = node.range.withStart(interval.getStart()); + } + /* if (endComparator.compare(node.max, interval) < 0) { node.max = interval; } + */ + if (endComparator.compare(node.range, interval) < 0) { + node.range = node.range.withEnd(interval.getEnd()); + } return node; } @@ -254,8 +260,11 @@ private boolean doesMatch(Node node, Interval interval) private boolean isIntervalInBounds(Node node, Interval interval) { + return node.range.contains(interval); + /* return (startComparator.compare(node.min, interval) <= 0) && (endComparator.compare(node.max, interval) >= 0); + */ } @@ -372,8 +381,11 @@ private void recomputeState(Node node) int lheight = (node.left != null) ? node.left.height : -1; int rheight = (node.right != null) ? node.right.height : -1; node.height = Math.max(lheight, rheight) + 1; + node.range = computeRange(node.interval, node.left, node.right); + /* node.max = maxInterval(node.interval, node.left, node.right); node.min = minInterval(node.interval, node.left, node.right); + */ } public void clear() @@ -420,6 +432,7 @@ public String print() return (root != null) ? root.print(1) : null; } + /* @SafeVarargs private Interval maxInterval(Interval interval, Node... nodes) { @@ -447,5 +460,26 @@ private Interval minInterval(Interval interval, Node... nodes) } return min; } + */ + + @SafeVarargs + private Interval computeRange(Interval interval, Node... nodes) + { + // Find the intervals that have the minimum start and the maximum end + Interval min = interval; + Interval max = interval; + for (Node node : nodes) { + if (node != null) { + if (startComparator.compare(node.range, min) <= 0) { + min = node.range; + } + if (endComparator.compare(node.range, max) > 0) { + max = node.range; + } + } + } + // Return an interval with the min and max + return interval.withStart(min.getStart()).withEnd(max.getEnd()); + } } From ee4dcab6a94e826fba09672db562850749d426d2 Mon Sep 17 00:00:00 2001 From: Pramod Immaneni Date: Tue, 21 Oct 2025 12:44:11 -0700 Subject: [PATCH 13/29] Generified the match function, so it can be used with different types of matches --- .../apache/druid/timeline/IntervalTree.java | 57 +++++++++++-------- 1 file changed, 34 insertions(+), 23 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/timeline/IntervalTree.java b/processing/src/main/java/org/apache/druid/timeline/IntervalTree.java index bcab59c56922..970ea8300c24 100644 --- a/processing/src/main/java/org/apache/druid/timeline/IntervalTree.java +++ b/processing/src/main/java/org/apache/druid/timeline/IntervalTree.java @@ -19,6 +19,7 @@ package org.apache.druid.timeline; +import com.google.common.base.Predicate; import org.apache.druid.java.util.common.StringUtils; import org.jetbrains.annotations.VisibleForTesting; import org.joda.time.Interval; @@ -36,26 +37,25 @@ * arithmetic used in the project *

*

- * Multiple intervals can be added to the tree, and an interval can be searched to find all matching intervals in the - * tree. A match is any interval that fully encompasses or exactly matches the given interval, leading the search to - * potentially return multiple results. Using the tree, reduces the search time from O(N) iterating through all the - * intervals, to roughly O(log2(N)). Furthermore, a value can be associated with each interval, which is also returned - * during the search. + * Multiple different intervals can be added to the tree. It can then be searched to find all intervals matching a given + * interval. The user specifies the match condition, such as encompassing the given interval, overlapping etc. The + * search can return multiple results as multiple intervals in the tree could match the criteria. + * + * Using the tree, reduces the search time from O(N) iterating through all the intervals, to roughly O(log2(N)). + * Furthermore, a value can be associated with each interval, which is also returned in the search result. * *

* The tree is a binary search tree sorted by interval start time. The intervals are stored as nodes in the tree. - * Additional state containing the minimum and maximum interval bounds of the entire subtree under a node, is also + * Additional state containing the minimum and maximum interval bounds of the entire subtree under a node is also * stored in each node. This helps speed up the search for matching intervals by skipping unsuitable subtrees that will * not contain a matching candidate interval. * * To optimize the balancing cost w.r.t the operation time, the tree is not balanced on every modification operation. - * Rather a configurable imbalance tolerance from the theoretical ideal height of log2(N) is allowed, breaching which + * Rather, a configurable imbalance tolerance from the theoretical ideal height of log2(N) is allowed, breaching which * triggers the rebalance. * * Not thread safe. *

- * - * Not thread safe */ public class IntervalTree { @@ -172,7 +172,8 @@ private Node insert(Node node, Interval interval, T value) } // If exact interval already exists, just replace the value - if (doesMatch(node, interval)) { + //if (doesMatch(node, interval)) { + if (node.interval.equals(interval)) { node.value = value; return node; } @@ -212,12 +213,17 @@ private Node insert(Node node, Interval interval, T value) public Map findEncompassing(Interval interval) { + // If given interval start is greater than or equal to current interval start, matches can still be found on both + // left and right as the given interval only needs to be encompassed. + // If the given interval start is less than current, then we don't need to search the right + // To keep it uniform looking for potential candidates in both left and right subtrees + // If interval falls outside the min to max range of a subtree we quickly eliminate it and not follow it Map result = new HashMap<>(); - findEncompassing(root, interval, result); + findMatching(root, result, i -> i.contains(interval)); return result; } - private void findEncompassing(Node node, Interval interval, Map result) + private void findMatching(Node node, Map result, Predicate condition) { if (node == null) { @@ -231,32 +237,35 @@ private void findEncompassing(Node node, Interval interval, Map } */ - if (node.interval.contains(interval)) { + if (condition.apply(node.interval)) { //result.add(new Entry<>(node.interval, node.value)); result.put(node.interval, node.value); } - // If given interval start is greater than or equal to current interval start, matches can still be found on both - // left and right as the given interval only needs to be encompassed. - // If the given interval start is less than current, then we don't need to search the right - // To keep it uniform looking for potential candidates in both left and right subtrees - // If interval falls outside the min to max range of a subtree we quickly eliminate it and not follow it - // Search left - if ((node.left != null) && isIntervalInBounds(node.left, interval)) { - findEncompassing(node.left, interval, result); + if ((node.left != null) && condition.apply(node.left.range)) { + findMatching(node.left, result, condition); } // Search right - if (node.right != null && isIntervalInBounds(node.right, interval)) { - findEncompassing(node.right, interval, result); + if (node.right != null && condition.apply(node.right.range)) { + findMatching(node.right, result, condition); } } + public Map findOverlapping(Interval interval) + { + Map result = new HashMap<>(); + findMatching(root, result, i -> i.overlaps(interval)); + return result; + } + + /* private boolean doesMatch(Node node, Interval interval) { return (startComparator.compare(node.interval, interval) == 0) && (endComparator.compare(node.interval, interval) == 0); } + */ private boolean isIntervalInBounds(Node node, Interval interval) { @@ -327,6 +336,7 @@ private Node unlinkRightLeaf(Node node) } } + /* private void makeLeftChild(Node node, Node childNode) { if (node.left == null) { @@ -336,6 +346,7 @@ private void makeLeftChild(Node node, Node childNode) } recomputeState(node); } + */ @VisibleForTesting Iterator> inOrderTraverse() From 236c0e0f1d40db28b33f30c19e8df5dc02e810fc Mon Sep 17 00:00:00 2001 From: Pramod Immaneni Date: Wed, 22 Oct 2025 11:34:08 -0700 Subject: [PATCH 14/29] Addressed review comments --- .../apache/druid/timeline/IntervalTree.java | 111 +----------------- .../druid/timeline/IntervalTreeTest.java | 5 +- 2 files changed, 9 insertions(+), 107 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/timeline/IntervalTree.java b/processing/src/main/java/org/apache/druid/timeline/IntervalTree.java index 970ea8300c24..d01adf07ed6e 100644 --- a/processing/src/main/java/org/apache/druid/timeline/IntervalTree.java +++ b/processing/src/main/java/org/apache/druid/timeline/IntervalTree.java @@ -115,9 +115,6 @@ static class Node Interval interval; T value; int height; - // The min and max of the range for the subtree - //Interval min; - //Interval max; // The full interval range of the subtree formed by this Node Interval range; Node left; @@ -127,8 +124,6 @@ static class Node + "%sinterval = %s\n" + "%svalue = %s\n" + "%sheight = %d\n" - //+ "%smin = %s\n" - //+ "%smax = %s\n" + "%srange = %s\n" + "%sleft = %s\n" + "%sright = %s\n" @@ -140,7 +135,7 @@ public String print(int level) String eprefix = "\t".repeat(level - 1); return StringUtils.format(PRINT_FORMAT, prefix, interval, prefix, value, prefix, height, - prefix, range, //min, prefix, max, + prefix, range, prefix, (left != null) ? left.print(level + 1) : null, prefix, (right != null) ? right.print(level + 1) : null, eprefix @@ -162,10 +157,6 @@ private Node insert(Node node, Interval interval, T value) node.interval = interval; node.value = value; node.height = 0; - /* - node.min = interval; - node.max = interval; - */ node.range = interval; ++size; return node; @@ -189,21 +180,10 @@ private Node insert(Node node, Interval interval, T value) int rheight = (node.right != null) ? node.right.height : -1; node.height = Math.max(lheight, rheight) + 1; - /* - if (startComparator.compare(interval, node.min) < 0) { - node.min = interval; - } - */ - if (startComparator.compare(interval, node.range) < 0) { node.range = node.range.withStart(interval.getStart()); } - /* - if (endComparator.compare(node.max, interval) < 0) { - node.max = interval; - } - */ if (endComparator.compare(node.range, interval) < 0) { node.range = node.range.withEnd(interval.getEnd()); } @@ -213,11 +193,6 @@ private Node insert(Node node, Interval interval, T value) public Map findEncompassing(Interval interval) { - // If given interval start is greater than or equal to current interval start, matches can still be found on both - // left and right as the given interval only needs to be encompassed. - // If the given interval start is less than current, then we don't need to search the right - // To keep it uniform looking for potential candidates in both left and right subtrees - // If interval falls outside the min to max range of a subtree we quickly eliminate it and not follow it Map result = new HashMap<>(); findMatching(root, result, i -> i.contains(interval)); return result; @@ -230,13 +205,6 @@ private void findMatching(Node node, Map result, Predicate(node.interval, node.value)); result.put(node.interval, node.value); @@ -260,23 +228,6 @@ public Map findOverlapping(Interval interval) return result; } - /* - private boolean doesMatch(Node node, Interval interval) - { - return (startComparator.compare(node.interval, interval) == 0) && (endComparator.compare(node.interval, interval) == 0); - } - */ - - private boolean isIntervalInBounds(Node node, Interval interval) - { - return node.range.contains(interval); - /* - return (startComparator.compare(node.min, interval) <= 0) - && (endComparator.compare(node.max, interval) >= 0); - */ - } - - public void remove(Interval interval) { root = removeNode(root, interval); @@ -290,6 +241,7 @@ private Node removeNode(Node node, Interval interval) } if (node.interval.equals(interval)) { + // This is the node to delete --size; if ((node.left != null) && (node.right != null)) { // Make the right most child of the left node the new node at current level @@ -302,19 +254,23 @@ private Node removeNode(Node node, Interval interval) recomputeState(newNode); return newNode; } else if (node.left != null) { + // Right nodde is null, make the left node the new node at current level return node.left; } else if (node.right != null) { + // Left nodde is null, make the right node the new node at current level return node.right; } return null; } + // Current node didn't match, search children if (startComparator.compare(interval, node.interval) < 0) { node.left = removeNode(node.left, interval); } else { node.right = removeNode(node.right, interval); } + // Update our state as a modification may have happened somewhere in our subtree recomputeState(node); return node; @@ -336,18 +292,6 @@ private Node unlinkRightLeaf(Node node) } } - /* - private void makeLeftChild(Node node, Node childNode) - { - if (node.left == null) { - node.left = childNode; - } else { - makeLeftChild(node.left, childNode); - } - recomputeState(node); - } - */ - @VisibleForTesting Iterator> inOrderTraverse() { @@ -393,10 +337,6 @@ private void recomputeState(Node node) int rheight = (node.right != null) ? node.right.height : -1; node.height = Math.max(lheight, rheight) + 1; node.range = computeRange(node.interval, node.left, node.right); - /* - node.max = maxInterval(node.interval, node.left, node.right); - node.min = minInterval(node.interval, node.left, node.right); - */ } public void clear() @@ -407,7 +347,6 @@ public void clear() public int size() { - //return size(root); return size; } @@ -418,14 +357,6 @@ int height() return (root != null) ? root.height : -1; } - private int size(Node node) - { - if (node == null) { - return 0; - } - return 1 + size(node.left) + size(node.right); - } - private void checkRebalance() { if (root != null) { @@ -443,36 +374,6 @@ public String print() return (root != null) ? root.print(1) : null; } - /* - @SafeVarargs - private Interval maxInterval(Interval interval, Node... nodes) - { - Interval max = interval; - for (Node node : nodes) { - if (node != null) { - if (endComparator.compare(node.max, max) > 0) { - max = node.max; - } - } - } - return max; - } - - @SafeVarargs - private Interval minInterval(Interval interval, Node... nodes) - { - Interval min = interval; - for (Node node : nodes) { - if (node != null) { - if (startComparator.compare(node.min, min) <= 0) { - min = node.min; - } - } - } - return min; - } - */ - @SafeVarargs private Interval computeRange(Interval interval, Node... nodes) { diff --git a/processing/src/test/java/org/apache/druid/timeline/IntervalTreeTest.java b/processing/src/test/java/org/apache/druid/timeline/IntervalTreeTest.java index 052d30b24d19..063b310174b7 100644 --- a/processing/src/test/java/org/apache/druid/timeline/IntervalTreeTest.java +++ b/processing/src/test/java/org/apache/druid/timeline/IntervalTreeTest.java @@ -25,9 +25,11 @@ import org.apache.druid.java.util.common.guava.Comparators; import org.joda.time.Interval; import org.junit.Assert; +import org.junit.Ignore; import org.junit.Test; import java.util.ArrayList; +import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; import java.util.List; @@ -202,7 +204,7 @@ public void testLargeLoadTree() compareData(expectedData, tree); } - /* + @Ignore @Test public void testPerf() { @@ -248,7 +250,6 @@ public void testPerf() } System.out.println("Tree find time " + (System.currentTimeMillis() - start)); } - */ @Test public void testAutoRebalance() throws JsonProcessingException From e63e660d28918ef9e12a08ae47d00f1abadd8b3a Mon Sep 17 00:00:00 2001 From: Pramod Immaneni Date: Fri, 24 Oct 2025 07:03:00 -0700 Subject: [PATCH 15/29] Removed commented code --- .../src/main/java/org/apache/druid/timeline/IntervalTree.java | 1 - 1 file changed, 1 deletion(-) diff --git a/processing/src/main/java/org/apache/druid/timeline/IntervalTree.java b/processing/src/main/java/org/apache/druid/timeline/IntervalTree.java index d01adf07ed6e..29fc3d1d4ffa 100644 --- a/processing/src/main/java/org/apache/druid/timeline/IntervalTree.java +++ b/processing/src/main/java/org/apache/druid/timeline/IntervalTree.java @@ -206,7 +206,6 @@ private void findMatching(Node node, Map result, Predicate(node.interval, node.value)); result.put(node.interval, node.value); } From 0badf8c08ce4d0197d53e70b361daace64282101 Mon Sep 17 00:00:00 2001 From: Pramod Immaneni Date: Fri, 24 Oct 2025 11:53:34 -0700 Subject: [PATCH 16/29] Added addition documentation --- .../src/main/java/org/apache/druid/timeline/IntervalTree.java | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/processing/src/main/java/org/apache/druid/timeline/IntervalTree.java b/processing/src/main/java/org/apache/druid/timeline/IntervalTree.java index 29fc3d1d4ffa..00cc522dce5a 100644 --- a/processing/src/main/java/org/apache/druid/timeline/IntervalTree.java +++ b/processing/src/main/java/org/apache/druid/timeline/IntervalTree.java @@ -235,6 +235,10 @@ public void remove(Interval interval) private Node removeNode(Node node, Interval interval) { + // When deleting a node, try to replace it with the right most leaf child. + // If it does not exist, i.e., the right most node at the second last level only has a left child and does not have + // a right child, this node becomes the replacement. In this scenario, it's remaining left child (subtree) is moved + // up to it's parent if (node == null) { return null; } From e076a740de3ece95c529a236ed6625f7dc8d6195 Mon Sep 17 00:00:00 2001 From: Pramod Immaneni Date: Fri, 24 Oct 2025 13:09:19 -0700 Subject: [PATCH 17/29] Updated doc --- .../main/java/org/apache/druid/timeline/IntervalTree.java | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/timeline/IntervalTree.java b/processing/src/main/java/org/apache/druid/timeline/IntervalTree.java index 00cc522dce5a..e6b9a5473c63 100644 --- a/processing/src/main/java/org/apache/druid/timeline/IntervalTree.java +++ b/processing/src/main/java/org/apache/druid/timeline/IntervalTree.java @@ -235,10 +235,10 @@ public void remove(Interval interval) private Node removeNode(Node node, Interval interval) { - // When deleting a node, try to replace it with the right most leaf child. - // If it does not exist, i.e., the right most node at the second last level only has a left child and does not have - // a right child, this node becomes the replacement. In this scenario, it's remaining left child (subtree) is moved - // up to it's parent + // When deleting a node, try to replace it with the right most leaf of the left sub-tree. + // If it is does not exist, i.e. the bottom most right node in the left subtree only has a left child and does not + // have a right child, this node becomes the replacement. Also, in this scenario, the left child (subtree) of this + // node is moved up to it's parent as the parent's right child. if (node == null) { return null; } From af170439b892d7f3db939df0e1ea23fe2e15224e Mon Sep 17 00:00:00 2001 From: Pramod Immaneni Date: Fri, 24 Oct 2025 13:32:02 -0700 Subject: [PATCH 18/29] Removed commented code --- .../src/main/java/org/apache/druid/timeline/IntervalTree.java | 1 - 1 file changed, 1 deletion(-) diff --git a/processing/src/main/java/org/apache/druid/timeline/IntervalTree.java b/processing/src/main/java/org/apache/druid/timeline/IntervalTree.java index e6b9a5473c63..8cb40fa146a2 100644 --- a/processing/src/main/java/org/apache/druid/timeline/IntervalTree.java +++ b/processing/src/main/java/org/apache/druid/timeline/IntervalTree.java @@ -163,7 +163,6 @@ private Node insert(Node node, Interval interval, T value) } // If exact interval already exists, just replace the value - //if (doesMatch(node, interval)) { if (node.interval.equals(interval)) { node.value = value; return node; From ef483c467afe6c770ead02ee8d69b516652e368a Mon Sep 17 00:00:00 2001 From: Pramod Immaneni Date: Fri, 7 Nov 2025 10:25:57 -0800 Subject: [PATCH 19/29] Cast IntervalTree as a NavigableMap so it can become a drop in replacement for... --- .../apache/druid/timeline/IntervalTree.java | 581 +++++++++++++++--- .../timeline/VersionedIntervalTimeline.java | 116 +++- .../druid/timeline/IntervalTreeTest.java | 278 +++++++-- 3 files changed, 813 insertions(+), 162 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/timeline/IntervalTree.java b/processing/src/main/java/org/apache/druid/timeline/IntervalTree.java index 8cb40fa146a2..a9de79906860 100644 --- a/processing/src/main/java/org/apache/druid/timeline/IntervalTree.java +++ b/processing/src/main/java/org/apache/druid/timeline/IntervalTree.java @@ -21,15 +21,23 @@ import com.google.common.base.Predicate; import org.apache.druid.java.util.common.StringUtils; +import org.jetbrains.annotations.NotNull; import org.jetbrains.annotations.VisibleForTesting; import org.joda.time.Interval; +import java.util.AbstractMap; +import java.util.AbstractSet; import java.util.ArrayList; import java.util.Comparator; import java.util.HashMap; import java.util.Iterator; import java.util.List; import java.util.Map; +import java.util.NavigableMap; +import java.util.NavigableSet; +import java.util.Set; +import java.util.SortedMap; +import java.util.function.BiConsumer; /** * A variation of Interval Trees (https://en.wikipedia.org/wiki/Interval_tree) @@ -38,9 +46,9 @@ *

*

* Multiple different intervals can be added to the tree. It can then be searched to find all intervals matching a given - * interval. The user specifies the match condition, such as encompassing the given interval, overlapping etc. The + * interval. The user specifies the match condition, such as encompassing the given interval, overlapping, etc. The * search can return multiple results as multiple intervals in the tree could match the criteria. - * + *

* Using the tree, reduces the search time from O(N) iterating through all the intervals, to roughly O(log2(N)). * Furthermore, a value can be associated with each interval, which is also returned in the search result. * @@ -49,15 +57,15 @@ * Additional state containing the minimum and maximum interval bounds of the entire subtree under a node is also * stored in each node. This helps speed up the search for matching intervals by skipping unsuitable subtrees that will * not contain a matching candidate interval. - * + *

* To optimize the balancing cost w.r.t the operation time, the tree is not balanced on every modification operation. * Rather, a configurable imbalance tolerance from the theoretical ideal height of log2(N) is allowed, breaching which * triggers the rebalance. - * + *

* Not thread safe. *

*/ -public class IntervalTree +public class IntervalTree extends AbstractMap implements NavigableMap { // The compartor for comparing the interval start timnes Comparator startComparator; @@ -68,10 +76,12 @@ public class IntervalTree Node root; int size; - // Deviation allowed from ideal height for the maximum height on either side of tree, expressed as a + // Deviation allowed from ideal height for the maximum height on either side of the tree, expressed as a // percentage of ideal height int imbalanceTolerance = 50; + EntrySet entrySet = new EntrySet(); + public IntervalTree(Comparator startComparator, Comparator endComparator) { this.startComparator = startComparator; @@ -88,35 +98,14 @@ public void setImbalanceTolerance(int imbalanceTolerance) this.imbalanceTolerance = imbalanceTolerance; } - @VisibleForTesting - static class Entry - { - Interval interval; - T value; - - public Entry(Interval interval, T value) - { - this.interval = interval; - this.value = value; - } - - @Override - public String toString() - { - return "Entry{" + - "interval=" + interval + - ", value=" + value + - '}'; - } - } - - static class Node + static class Node implements Map.Entry { Interval interval; T value; int height; // The full interval range of the subtree formed by this Node Interval range; + Node parent; Node left; Node right; @@ -141,16 +130,49 @@ public String print(int level) eprefix ); } + + @Override + public Interval getKey() + { + return interval; + } + + @Override + public T getValue() + { + return value; + } + + @Override + public T setValue(T value) + { + T oldValue = this.value; + this.value = value; + return oldValue; + } } - public void add(Interval interval, T value) + @Override + public T put(Interval interval, T value) { - root = insert(root, interval, value); + //root = insert(root, interval, value); + T oldValue = insert(null, false, interval, value); checkRebalance(); + return oldValue; } - private Node insert(Node node, Interval interval, T value) + private T insert(Node parent, boolean left, Interval interval, T value) { + // Passing parent so that when a new node is created, it can be added to parent, and we can still use return value + // for another purpose, namely returning the old value if the key already exists in the tree + Node node; + if (parent == null) { + node = root; + } else if (left) { + node = parent.left; + } else { + node = parent.right; + } if (node == null) { node = new Node<>(); @@ -158,46 +180,98 @@ private Node insert(Node node, Interval interval, T value) node.value = value; node.height = 0; node.range = interval; + if (root == null) { + root = node; + } else if (left) { + setLeftNode(parent, node); + } else { + setRightNode(parent, node); + } ++size; - return node; + return null; } - // If exact interval already exists, just replace the value - if (node.interval.equals(interval)) { + T oldValue; + + int cmp = compareInterval(interval, node.interval); + + // If exact interval already exists, just replace the value and return + if (cmp == 0) { + oldValue = node.value; node.value = value; - return node; + return oldValue; } - // If start of interval matches with node sending to right to preserve stability during in order traversal retrieval - if (startComparator.compare(interval, node.interval) < 0) { - node.left = insert(node.left, interval, value); + if (cmp < 0) { + // Go to the left + oldValue = insert(node, true, interval, value); } else { - node.right = insert(node.right, interval, value); + // Go to the right + oldValue = insert(node, false, interval, value); } + recomputeState(node); - int lheight = (node.left != null) ? node.left.height : -1; - int rheight = (node.right != null) ? node.right.height : -1; - node.height = Math.max(lheight, rheight) + 1; + //return node; + return oldValue; + } - if (startComparator.compare(interval, node.range) < 0) { - node.range = node.range.withStart(interval.getStart()); + @Override + public T get(Object key) + { + if (!Interval.class.isAssignableFrom(key.getClass())) { + throw new ClassCastException("key must be an instance of Interval"); } - - if (endComparator.compare(node.range, interval) < 0) { - node.range = node.range.withEnd(interval.getEnd()); + Interval interval = (Interval) key; + + T value = null; + Node node = root; + while (node != null) { + int cmp = compareInterval(interval, node.interval); + if (cmp == 0) { + value = node.value; + break; + } + if (cmp < 0) { + node = node.left; + } else { + node = node.right; + } } + return value; + } - return node; + private int compareInterval(Interval interval1, Interval interval2) + { + int cmp = startComparator.compare(interval1, interval2); + if (cmp == 0) { + return endComparator.compare(interval1, interval2); + } + return cmp; } public Map findEncompassing(Interval interval) + { + return findMatching(i -> i.contains(interval)); + } + + public Map findOverlapping(Interval interval) + { + return findMatching(i -> i.overlaps(interval)); + } + + public Map findMatching(Predicate condition) { Map result = new HashMap<>(); - findMatching(root, result, i -> i.contains(interval)); + forEachMatching(condition, result::put); return result; } - private void findMatching(Node node, Map result, Predicate condition) + public void forEachMatching(Predicate condition, BiConsumer action) + { + forEachMatching(root, condition, action); + } + + private void forEachMatching(Node node, Predicate condition, BiConsumer action) { if (node == null) { @@ -205,71 +279,85 @@ private void findMatching(Node node, Map result, Predicate findOverlapping(Interval interval) + @Override + public T remove(Object key) { - Map result = new HashMap<>(); - findMatching(root, result, i -> i.overlaps(interval)); - return result; + return remove((Interval) key); } - public void remove(Interval interval) + public T remove(Interval interval) { - root = removeNode(root, interval); + List oldValue = new ArrayList<>(1); + root = removeNode(root, interval, oldValue); + if (root != null) { + root.parent = null; + } checkRebalance(); + return oldValue.size() == 1 ? oldValue.get(0) : null; } - private Node removeNode(Node node, Interval interval) + private Node removeNode(Node node, Interval interval, List oldValue) { // When deleting a node, try to replace it with the right most leaf of the left sub-tree. - // If it is does not exist, i.e. the bottom most right node in the left subtree only has a left child and does not + // If it is does not exist, i.e., the bottom most right node in the left subtree only has a left child and does not // have a right child, this node becomes the replacement. Also, in this scenario, the left child (subtree) of this - // node is moved up to it's parent as the parent's right child. + // node is moved up to its parent as the parent's right child. if (node == null) { return null; } - if (node.interval.equals(interval)) { + int cmp = compareInterval(interval, node.interval); + + if (cmp == 0) { // This is the node to delete --size; + oldValue.add(node.value); if ((node.left != null) && (node.right != null)) { - // Make the right most child of the left node the new node at current level - Node newNode = unlinkRightLeaf(node.left); - // If left node did not have any right children, it is the matching candidate - if (node.left != newNode) { - newNode.left = node.left; + // Make the right bottom most child in the left subtree of the node, the new node at the current level + Node left = node.left; + Node newNode = unlinkRightLeaf(left); + // Make the current left and right children, the left and right children of the new node respectively. + // However, if the new node turns out to be the same as the left node, it means the left node did not have any + // right child. In this case, only set its right child to be the current node's right child. + if (left != newNode) { + // A right child exists + setLeftNode(newNode, left); } - newNode.right = node.right; + setRightNode(newNode, node.right); recomputeState(newNode); return newNode; } else if (node.left != null) { - // Right nodde is null, make the left node the new node at current level + // Right node is null, make the left node the new node at current level return node.left; } else if (node.right != null) { - // Left nodde is null, make the right node the new node at current level + // Left node is null, make the right node the new node at current level return node.right; } return null; } // Current node didn't match, search children - if (startComparator.compare(interval, node.interval) < 0) { - node.left = removeNode(node.left, interval); + if (cmp < 0) { + Node left = removeNode(node.left, interval, oldValue); + setLeftNode(node, left); } else { - node.right = removeNode(node.right, interval); + Node right = removeNode(node.right, interval, oldValue); + setRightNode(node, right); } // Update our state as a modification may have happened somewhere in our subtree @@ -286,7 +374,7 @@ private Node unlinkRightLeaf(Node node) Node rnode = unlinkRightLeaf(node.right); // If the right node has a left child, make it new right child if (rnode == node.right) { - node.right = rnode.left; + setRightNode(node, rnode.left); rnode.left = null; } recomputeState(node); @@ -294,21 +382,16 @@ private Node unlinkRightLeaf(Node node) } } + /* @VisibleForTesting - Iterator> inOrderTraverse() - { - List> nodes = new ArrayList<>(size); - inOrderTraverse(root, nodes); - return nodes.stream().map(node -> new Entry(node.interval, node.value)).iterator(); - } - - public void rebalance() + Iterator> inOrderTraverse() { - // In order traversal followed by repeated binary segmentation of the list - List> nodes = new ArrayList<>(size); - inOrderTraverse(root, nodes); - root = constructTree(nodes, 0, nodes.size()); + List> nodes = new ArrayList<>(size); + inOrderTraverse(root, (List)nodes); + return nodes.iterator(); + //return nodes.stream().map(node -> new ReEntry(node.interval, node.value)).iterator(); } + */ private void inOrderTraverse(Node node, List> nodes) { @@ -320,6 +403,15 @@ private void inOrderTraverse(Node node, List> nodes) inOrderTraverse(node.right, nodes); } + public void rebalance() + { + // In order traversal followed by repeated binary segmentation of the list + List> nodes = new ArrayList<>(size); + inOrderTraverse(root, nodes); + root = constructTree(nodes, 0, nodes.size()); + root.parent = null; + } + private Node constructTree(List> nodes, int start, int end) { if (start == end) { @@ -327,12 +419,223 @@ private Node constructTree(List> nodes, int start, int end) } int mid = (start + end - 1) / 2; Node node = nodes.get(mid); - node.left = constructTree(nodes, start, mid); - node.right = constructTree(nodes, mid + 1, end); + + Node left = constructTree(nodes, start, mid); + setLeftNode(node, left); + + Node right = constructTree(nodes, mid + 1, end); + setRightNode(node, right); + recomputeState(node); return node; } + @Override + public Map.Entry lowerEntry(Interval key) + { + Node lnode = null; + Node node = root; + while (node != null) { + // Since we want to return a smaller entry even when there is an exact match, go left in the equality case too + if (startComparator.compare(key, node.getKey()) <= 0) { + node = node.left; + } else { + lnode = node; + node = node.right; + } + } + return lnode; + } + + @Override + public Interval lowerKey(Interval key) + { + Map.Entry entry = lowerEntry(key); + return entry != null ? entry.getKey() : null; + } + + @Override + public Map.Entry floorEntry(Interval key) + { + Node fnode = null; + Node node = root; + while (node != null) { + if (node.getKey().equals(key)) { + fnode = node; + break; + } + if (startComparator.compare(key, node.getKey()) < 0) { + node = node.left; + } else { + fnode = node; + node = node.right; + } + } + return fnode; + } + + @Override + public Interval floorKey(Interval key) + { + Map.Entry entry = floorEntry(key); + return entry != null ? entry.getKey() : null; + } + + @Override + public Map.Entry ceilingEntry(Interval key) + { + Node cnode = null; + Node node = root; + while (node != null) { + if (node.getKey().equals(key)) { + cnode = node; + break; + } + if (startComparator.compare(key, node.getKey()) < 0) { + cnode = node; + node = node.left; + } else { + node = node.right; + } + } + return cnode; + } + + @Override + public Interval ceilingKey(Interval key) + { + Entry entry = ceilingEntry(key); + return entry != null ? entry.getKey() : null; + } + + @Override + public Map.Entry higherEntry(Interval key) + { + Node hnode = null; + Node node = root; + while (node != null) { + if (startComparator.compare(key, node.getKey()) < 0) { + hnode = node; + node = node.left; + } else { + node = node.right; + } + } + return hnode; + } + + @Override + public Interval higherKey(Interval key) + { + Entry entry = higherEntry(key); + return entry != null ? entry.getKey() : null; + } + + @Override + public Map.Entry firstEntry() + { + return firstEntry(root); + } + + @Override + public Map.Entry lastEntry() + { + if (root == null) { + return null; + } + Node node = root; + while (node.right != null) { + node = node.right; + } + return node; + } + + @Override + public Interval firstKey() + { + Map.Entry entry = firstEntry(); + return entry != null ? entry.getKey() : null; + } + + @Override + public Interval lastKey() + { + Map.Entry entry = lastEntry(); + return entry != null ? entry.getKey() : null; + } + + @Override + public Map.Entry pollFirstEntry() + { + throw new UnsupportedOperationException(); + } + + @Override + public Map.Entry pollLastEntry() + { + throw new UnsupportedOperationException(); + } + + @Override + public NavigableMap descendingMap() + { + throw new UnsupportedOperationException(); + } + + @Override + public NavigableSet navigableKeySet() + { + throw new UnsupportedOperationException(); + } + + @Override + public NavigableSet descendingKeySet() + { + throw new UnsupportedOperationException(); + } + + @Override + public NavigableMap subMap(Interval fromKey, boolean fromInclusive, Interval toKey, boolean toInclusive) + { + throw new UnsupportedOperationException(); + } + + @Override + public NavigableMap headMap(Interval toKey, boolean inclusive) + { + throw new UnsupportedOperationException(); + } + + @Override + public NavigableMap tailMap(Interval fromKey, boolean inclusive) + { + throw new UnsupportedOperationException(); + } + + @Override + public Comparator comparator() + { + throw new UnsupportedOperationException(); + } + + @Override + public SortedMap subMap(Interval fromKey, Interval toKey) + { + throw new UnsupportedOperationException(); + } + + @Override + public SortedMap headMap(Interval toKey) + { + throw new UnsupportedOperationException(); + } + + @Override + public SortedMap tailMap(Interval fromKey) + { + throw new UnsupportedOperationException(); + } + private void recomputeState(Node node) { int lheight = (node.left != null) ? node.left.height : -1; @@ -341,24 +644,86 @@ private void recomputeState(Node node) node.range = computeRange(node.interval, node.left, node.right); } + @Override public void clear() { root = null; size = 0; } + @Override + public @NotNull Set> entrySet() + { + return entrySet; + } + + @Override public int size() { return size; } @VisibleForTesting - // returns the number of edges from root to leaf along the longest path + // returns the number of edges from root to leaf along the longest path int height() { return (root != null) ? root.height : -1; } + class EntrySet extends AbstractSet> + { + + // Currently this returns a distinct collection when iterating + @Override + public Iterator> iterator() + { + //return inOrderTraverse(); + return new EntrySetIterator(); + } + + @Override + public int size() + { + return IntervalTree.this.size; + } + + class EntrySetIterator implements Iterator> + { + + Node current = firstEntry(IntervalTree.this.root); + + @Override + public boolean hasNext() + { + return (current != null); + } + + @Override + public Entry next() + { + Entry entry = current; + if (entry == null) { + return entry; + } + // Move current to next node + if (current.right != null) { + current = firstEntry(current.right); + } else { + // No more right children, go up one level to the parent. + // However, if the current node is right child of parent, keep going up till you find a parent who is on the + // right side + Node prev; + do { + prev = current; + current = current.parent; + } while ((current != null) && (current.right == prev)); + } + return entry; + } + } + + } + private void checkRebalance() { if (root != null) { @@ -371,6 +736,38 @@ private void checkRebalance() } } + private Node firstEntry(Node node) + { + if (node == null) { + return null; + } + while (node.left != null) { + node = node.left; + } + return node; + } + + private void setLeftNode(Node node, Node left) + { + if (node.left != left) { + node.left = left; + if (left != null) { + left.parent = node; + } + } + } + + private void setRightNode(Node node, Node right) + { + if (node.right != right) { + node.right = right; + if (right != null) { + right.parent = node; + } + } + } + + public String print() { return (root != null) ? root.print(1) : null; @@ -384,7 +781,7 @@ private Interval computeRange(Interval interval, Node... nodes) Interval max = interval; for (Node node : nodes) { if (node != null) { - if (startComparator.compare(node.range, min) <= 0) { + if (startComparator.compare(node.range, min) < 0) { min = node.range; } if (endComparator.compare(node.range, max) > 0) { diff --git a/processing/src/main/java/org/apache/druid/timeline/VersionedIntervalTimeline.java b/processing/src/main/java/org/apache/druid/timeline/VersionedIntervalTimeline.java index 9203c87c60d9..82ba93c161f2 100644 --- a/processing/src/main/java/org/apache/druid/timeline/VersionedIntervalTimeline.java +++ b/processing/src/main/java/org/apache/druid/timeline/VersionedIntervalTimeline.java @@ -27,6 +27,7 @@ import org.apache.druid.java.util.common.DateTimes; import org.apache.druid.java.util.common.UOE; import org.apache.druid.java.util.common.guava.Comparators; +import org.apache.druid.java.util.common.logger.Logger; import org.apache.druid.timeline.partition.PartitionChunk; import org.apache.druid.timeline.partition.PartitionHolder; import org.apache.druid.utils.CollectionUtils; @@ -74,19 +75,16 @@ public class VersionedIntervalTimeline> implements TimelineLookup { + private static final Logger logger = new Logger(VersionedIntervalTimeline.class); private final ReentrantReadWriteLock lock = new ReentrantReadWriteLock(true); // Below timelines stores only *visible* timelineEntries // adjusted interval -> timelineEntry - private final NavigableMap completePartitionsTimeline = new TreeMap<>( - Comparators.intervalsByStartThenEnd() - ); + private final NavigableMap completePartitionsTimeline; // IncompletePartitionsTimeline also includes completePartitionsTimeline // adjusted interval -> timelineEntry @VisibleForTesting - final NavigableMap incompletePartitionsTimeline = new TreeMap<>( - Comparators.intervalsByStartThenEnd() - ); + final NavigableMap incompletePartitionsTimeline; // true interval -> version -> timelineEntry private final Map> allTimelineEntries = new HashMap<>(); private final IntervalTree> allTimeIntervals = new IntervalTree<>(Comparators.intervalsByStart(), Comparators.intervalsByEnd()); @@ -97,7 +95,56 @@ public class VersionedIntervalTimeline capabilities; + + IntervalTreeMatchMode(Capability... capabilities) + { + this.capabilities = Set.of(capabilities); + } + + public boolean isEnabled(Capability capability) + { + return capabilities.contains(capability); + } + } + + private static IntervalTreeMatchMode intervalTreeMatchMode = IntervalTreeMatchMode.NONE; + + static { + String mode = System.getProperty("experimental.timeline.intervalTreeMatchMode"); + if (mode != null) { + try { + intervalTreeMatchMode = IntervalTreeMatchMode.valueOf(mode); + } + catch (IllegalArgumentException e) { + logger.warn(e, "Unrecognized interval tree match mode specified [%s]", mode); + } + } + } + + { + if (intervalTreeMatchMode.isEnabled(IntervalTreeMatchMode.Capability.QUERY)) { + this.completePartitionsTimeline = new IntervalTree<>(Comparators.intervalsByStart(), Comparators.intervalsByEnd()); + this.incompletePartitionsTimeline = new IntervalTree<>(Comparators.intervalsByStart(), Comparators.intervalsByEnd()); + } else { + this.completePartitionsTimeline = new TreeMap<>(Comparators.intervalsByStartThenEnd()); + this.incompletePartitionsTimeline = new TreeMap<>(Comparators.intervalsByStartThenEnd()); + } + } public VersionedIntervalTimeline(Comparator versionComparator) { @@ -213,8 +260,8 @@ public void addAll(final Iterator> TreeMap versionEntry = new TreeMap<>(versionComparator); versionEntry.put(version, entry); allTimelineEntries.put(interval, versionEntry); - if (useTreeIntervalMatch) { - allTimeIntervals.add(interval, versionEntry); + if (intervalTreeMatchMode.isEnabled(IntervalTreeMatchMode.Capability.ENTRIES)) { + allTimeIntervals.put(interval, versionEntry); } numObjects.incrementAndGet(); } else { @@ -275,7 +322,7 @@ public PartitionChunk remove(Interval interval, VersionType version, versionEntries.remove(version); if (versionEntries.isEmpty()) { allTimelineEntries.remove(interval); - if (useTreeIntervalMatch) { + if (intervalTreeMatchMode.isEnabled(IntervalTreeMatchMode.Capability.ENTRIES)) { allTimeIntervals.remove(interval); } } @@ -311,7 +358,7 @@ public PartitionChunk findChunk(Interval interval, VersionType versi // If an exact interval match is not found search for an encapsulating interval // If tree search is enabled use it else revert to checking all intervals - if (useTreeIntervalMatch) { + if (intervalTreeMatchMode.isEnabled(IntervalTreeMatchMode.Capability.ENTRIES)) { Map> possibleMatches = allTimeIntervals.findEncompassing(interval); for (Entry> entry : possibleMatches.entrySet()) { Interval eninterval = entry.getKey(); @@ -783,21 +830,38 @@ private List> lookup(Interval inte timeline = completePartitionsTimeline; } - for (Entry entry : timeline.entrySet()) { - Interval timelineInterval = entry.getKey(); - TimelineEntry val = entry.getValue(); - - // exclude empty partition holders (i.e. tombstones) since they do not add value - // for higher level code...they have no data rows... - if ((!skipObjectsWithNoData || val.partitionHolder.hasData()) && timelineInterval.overlaps(interval)) { - retVal.add( - new TimelineObjectHolder<>( - timelineInterval, - val.getTrueInterval(), - val.getVersion(), - PartitionHolder.copyWithOnlyVisibleChunks(val.getPartitionHolder()) - ) - ); + if (intervalTreeMatchMode.isEnabled(IntervalTreeMatchMode.Capability.QUERY)) { + IntervalTree tree = (IntervalTree) timeline; + tree.forEachMatching(timelineInterval -> timelineInterval.overlaps(interval), + (timelineInterval, val) -> { + if (!skipObjectsWithNoData || val.partitionHolder.hasData()) { + retVal.add( + new TimelineObjectHolder<>( + timelineInterval, + val.getTrueInterval(), + val.getVersion(), + PartitionHolder.copyWithOnlyVisibleChunks(val.getPartitionHolder()) + ) + ); + } + }); + } else { + for (Entry entry : timeline.entrySet()) { + Interval timelineInterval = entry.getKey(); + TimelineEntry val = entry.getValue(); + + // exclude empty partition holders (i.e. tombstones) since they do not add value + // for higher level code...they have no data rows... + if ((!skipObjectsWithNoData || val.partitionHolder.hasData()) && timelineInterval.overlaps(interval)) { + retVal.add( + new TimelineObjectHolder<>( + timelineInterval, + val.getTrueInterval(), + val.getVersion(), + PartitionHolder.copyWithOnlyVisibleChunks(val.getPartitionHolder()) + ) + ); + } } } diff --git a/processing/src/test/java/org/apache/druid/timeline/IntervalTreeTest.java b/processing/src/test/java/org/apache/druid/timeline/IntervalTreeTest.java index 063b310174b7..e93b58dd4d12 100644 --- a/processing/src/test/java/org/apache/druid/timeline/IntervalTreeTest.java +++ b/processing/src/test/java/org/apache/druid/timeline/IntervalTreeTest.java @@ -19,7 +19,7 @@ package org.apache.druid.timeline; -import com.fasterxml.jackson.core.JsonProcessingException; +import org.apache.commons.collections.CollectionUtils; import org.apache.druid.java.util.common.Intervals; import org.apache.druid.java.util.common.Pair; import org.apache.druid.java.util.common.guava.Comparators; @@ -29,6 +29,7 @@ import org.junit.Test; import java.util.ArrayList; +import java.util.Collection; import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; @@ -51,12 +52,47 @@ public void testSize() } @Test - public void testAdd() + public void testPut() { IntervalTree tree = setupTree(baseData); compareData(baseData, tree); } + @Test + public void testReplace() + { + IntervalTree tree = setupTree(baseData); + Pair entry = baseData.get(2); + Interval interval = entry.lhs; + String value = entry.rhs; + String newValue = value + "n"; + String oldValue = tree.put(interval, newValue); + Assert.assertEquals("Old value match", oldValue, value); + } + + @Test + public void testGet() + { + IntervalTree tree = setupTree(baseData); + baseData.forEach( + (Pair item) -> { + Interval interval = item.lhs; + String evalue = item.rhs; + String value = tree.get(interval); + Assert.assertEquals("value", evalue, value); + } + ); + } + + @Test + public void testValues() + { + IntervalTree tree = setupTree(baseData); + Collection values = tree.values(); + Collection bvalues = baseData.stream().map(entry -> entry.rhs).collect(Collectors.toList()); + Assert.assertTrue("values", CollectionUtils.isEqualCollection(bvalues, values)); + } + @Test public void testMatch() { @@ -93,10 +129,11 @@ public void testSparseOverlap() IntervalTree tree = setupTree(sparseOverlapData); Map entries = tree.findEncompassing(Intervals.of("2025-06-03T00:00:00/P1D")); - Assert.assertEquals(3, entries.size()); - Assert.assertEquals("Match 1", "v5", entries.get(Intervals.of("2025-06-03T00:00:00/P1D"))); - Assert.assertEquals("Match 2", "v9", entries.get(Intervals.of("2025-05-10T00:00:00/P1M"))); - Assert.assertEquals("Match 3", "v11", entries.get(Intervals.of("2025-06-01T00:00:00/P1M"))); + Assert.assertEquals(4, entries.size()); + Assert.assertEquals("Match 1", "v1", entries.get(Intervals.of("2025-05-10T00:00:00/P1M"))); + Assert.assertEquals("Match 2", "v7", entries.get(Intervals.of("2025-06-03T00:00:00/P1D"))); + Assert.assertEquals("Match 3", "v13", entries.get(Intervals.of("2025-06-01T00:00:00/P1M"))); + Assert.assertEquals("Match 4", "v14", entries.get(Intervals.of("2025-01-01T00:00:00/P1Y"))); } @Test @@ -105,41 +142,49 @@ public void testRemove() IntervalTree tree = setupTree(sparseOverlapData); int size = tree.size(); + // Remove node that does not exist + String intervalstr = "2025-03-11T00:00:00/P1M"; + String oldValue = tree.remove(Intervals.of(intervalstr)); + Assert.assertEquals("Size", size, tree.size()); + Assert.assertNull("Old value", oldValue); + List> expectedData = new ArrayList<>(sparseOverlapData); + compareData(expectedData, tree); + // Remove leaf - String intervalstr = "2025-06-01T00:00:00/P1M"; - Map entries = tree.findEncompassing(Intervals.of(intervalstr)); - Assert.assertEquals("Value size", 1, entries.size()); - String value = entries.values().iterator().next(); + intervalstr = "2025-06-01T00:00:00/P1M"; + String value = tree.get(Intervals.of(intervalstr)); + Assert.assertNotNull("Value", value); - tree.remove(Intervals.of(intervalstr)); + oldValue = tree.remove(Intervals.of(intervalstr)); size--; Assert.assertEquals("Size", size, tree.size()); - List> expectedData = new ArrayList<>(sparseOverlapData); + Assert.assertEquals("Old value", value, oldValue); + expectedData = new ArrayList<>(sparseOverlapData); expectedData.remove(Pair.of(Intervals.of(intervalstr), value)); compareData(expectedData, tree); // Remove node in penultimate level intervalstr = "2025-09-04T00:00:00/P1D"; - entries = tree.findEncompassing(Intervals.of(intervalstr)); - Assert.assertEquals("Value size", 1, entries.size()); - value = entries.values().iterator().next(); + value = tree.get(Intervals.of(intervalstr)); + Assert.assertNotNull("Value", value); - tree.remove(Intervals.of(intervalstr)); + oldValue = tree.remove(Intervals.of(intervalstr)); size--; Assert.assertEquals("Size", size, tree.size()); + Assert.assertEquals("Old value", value, oldValue); expectedData = new ArrayList<>(expectedData); expectedData.remove(Pair.of(Intervals.of(intervalstr), value)); compareData(expectedData, tree); // Remove node at a higher level intervalstr = "2025-07-12T00:00:00/P1D"; - entries = tree.findEncompassing(Intervals.of(intervalstr)); - Assert.assertEquals("Value size", 1, entries.size()); - value = entries.values().iterator().next(); + value = tree.get(Intervals.of(intervalstr)); + Assert.assertNotNull("Value", value); - tree.remove(Intervals.of(intervalstr)); + oldValue = tree.remove(Intervals.of(intervalstr)); size--; Assert.assertEquals("Size", size, tree.size()); + Assert.assertEquals("Old value", value, oldValue); expectedData = new ArrayList<>(expectedData); expectedData.remove(Pair.of(Intervals.of(intervalstr), value)); compareData(expectedData, tree); @@ -194,7 +239,7 @@ public void testLargeLoadTree() if (!existingIntervals.contains(intervalstr)) { Interval interval = Intervals.of(intervalstr); String value = "v" + count; - tree.add(interval, value); + tree.put(interval, value); expectedData.add(Pair.of(interval, value)); existingIntervals.add(intervalstr); ++count; @@ -224,7 +269,7 @@ public void testPerf() if (!existingIntervals.contains(intervalstr)) { Interval interval = Intervals.of(intervalstr); String value = "v" + count; - tree.add(interval, value); + tree.put(interval, value); mappedData.put(interval, value); expectedData.add(Pair.of(interval, value)); existingIntervals.add(intervalstr); @@ -252,7 +297,7 @@ public void testPerf() } @Test - public void testAutoRebalance() throws JsonProcessingException + public void testAutoRebalance() { IntervalTree tree = setupTree(sparseOverlapData); Assert.assertEquals("Height", 4, tree.height()); @@ -260,38 +305,179 @@ public void testAutoRebalance() throws JsonProcessingException } @Test - public void testManualRebalance() throws JsonProcessingException + public void testManualRebalance() { // Set a high threshold so auto-rebalance does not happen IntervalTree tree = setupTree(sparseOverlapData, t -> t.setImbalanceTolerance(100)); - Assert.assertEquals("Height", 6, tree.height()); + Assert.assertEquals("Height", 4, tree.height()); compareData(sparseOverlapData, tree); tree.rebalance(); Assert.assertEquals("Height", 3, tree.height()); compareData(sparseOverlapData, tree); } + @Test + public void testIsEmpty() + { + IntervalTree tree = setupTree(sparseOverlapData); + Assert.assertFalse("Not Empty", tree.isEmpty()); + sparseOverlapData.forEach(t -> tree.remove(t.lhs)); + Assert.assertTrue("Empty", tree.isEmpty()); + } + + @Test + public void testFirstEntryAndKey() + { + IntervalTree tree = setupTree(sparseOverlapData); + Map.Entry entry = tree.firstEntry(); + Interval matchInterval = Intervals.of("2025-01-01T00:00:00/P1D"); + Assert.assertEquals("Entry interval", matchInterval, entry.getKey()); + Assert.assertEquals("Entry value", "v2", entry.getValue()); + + Interval interval = tree.firstKey(); + Assert.assertEquals("Interval key", matchInterval, interval); + } + + @Test + public void testLastEntryAndKey() + { + IntervalTree tree = setupTree(sparseOverlapData); + Map.Entry entry = tree.lastEntry(); + Interval matchInterval = Intervals.of("2025-10-06T00:00:00/P1M"); + Assert.assertEquals("Entry interval", matchInterval, entry.getKey()); + Assert.assertEquals("Entry value", "v12", entry.getValue()); + + Interval interval = tree.lastKey(); + Assert.assertEquals("Interval key", matchInterval, interval); + } + + @Test + public void testFloorKey() + { + IntervalTree tree = setupTree(sparseOverlapData); + + // Only one smaller entry + Interval floor = tree.floorKey(Intervals.of("2025-01-11T00:00:00/P1D")); + Assert.assertEquals("Floor key 1", Intervals.of("2025-01-01T00:00:00/P1Y"), floor); + + // Exact match + floor = tree.floorKey(Intervals.of("2025-01-12T00:00:00/P1D")); + Assert.assertEquals("Floor key 2", Intervals.of("2025-01-12T00:00:00/P1D"), floor); + + // Random entry + floor = tree.floorKey(Intervals.of("2025-08-01T00:00:00/P1D")); + Assert.assertEquals("Floor key 3", Intervals.of("2025-07-12T00:00:00/P1D"), floor); + + // Last entry + floor = tree.floorKey(Intervals.of("2025-11-01T00:00:00/P1M")); + Assert.assertEquals("Floor key 4", Intervals.of("2025-10-06T00:00:00/P1M"), floor); + + // No smaller entry + floor = tree.floorKey(Intervals.of("2024-12-31T00:00:00/P1D")); + Assert.assertNull("Floor key 5", floor); + } + + @Test + public void testLowerKey() + { + IntervalTree tree = setupTree(sparseOverlapData); + + // Only one smaller entry + Interval lower = tree.lowerKey(Intervals.of("2025-01-11T00:00:00/P1D")); + Assert.assertEquals("Lower key 1", Intervals.of("2025-01-01T00:00:00/P1Y"), lower); + + // Matching interval entry + lower = tree.lowerKey(Intervals.of("2025-01-12T00:00:00/P1D")); + Assert.assertEquals("Lower key 2", Intervals.of("2025-01-01T00:00:00/P1Y"), lower); + + // Random entry + lower = tree.lowerKey(Intervals.of("2025-08-01T00:00:00/P1D")); + Assert.assertEquals("Lower key 3", Intervals.of("2025-07-12T00:00:00/P1D"), lower); + + // Last entry + lower = tree.lowerKey(Intervals.of("2025-11-01T00:00:00/P1M")); + Assert.assertEquals("Lower key 4", Intervals.of("2025-10-06T00:00:00/P1M"), lower); + + // No smaller entry + lower = tree.lowerKey(Intervals.of("2024-12-31T00:00:00/P1D")); + Assert.assertNull("Lower key 5", lower); + } + + + @Test + public void testCeiinglKey() + { + IntervalTree tree = setupTree(sparseOverlapData); + + // First entry + Interval ceiling = tree.ceilingKey(Intervals.of("2024-12-31T00:00:00/P1D")); + Assert.assertEquals("Ceiling key 1", Intervals.of("2025-01-01T00:00:00/P1D"), ceiling); + + // Exact match + ceiling = tree.ceilingKey(Intervals.of("2025-09-04T00:00:00/P1D")); + Assert.assertEquals("Ceiling key 2", Intervals.of("2025-09-04T00:00:00/P1D"), ceiling); + + // Random entry + ceiling = tree.ceilingKey(Intervals.of("2025-03-31T00:00:00/P1D")); + Assert.assertEquals("Ceiling key 3", Intervals.of("2025-04-02T00:00:00/P1D"), ceiling); + + // Only one greater entry + ceiling = tree.ceilingKey(Intervals.of("2025-09-28T00:00:00/P1D")); + Assert.assertEquals("Ceiling key 4", Intervals.of("2025-10-06T00:00:00/P1M"), ceiling); + + // No greater entry + ceiling = tree.ceilingKey(Intervals.of("2025-11-01T00:00:00/P1D")); + Assert.assertNull("Ceiling key 5", ceiling); + } + + @Test + public void testHigherKey() + { + IntervalTree tree = setupTree(sparseOverlapData); + + // First entry + Interval higher = tree.higherKey(Intervals.of("2024-12-31T00:00:00/P1D")); + Assert.assertEquals("Higher key 1", Intervals.of("2025-01-01T00:00:00/P1D"), higher); + + // Matching interval of an entry + higher = tree.higherKey(Intervals.of("2025-09-04T00:00:00/P1D")); + Assert.assertEquals("Higher key 2", Intervals.of("2025-10-06T00:00:00/P1M"), higher); + + // Random entry + higher = tree.higherKey(Intervals.of("2025-03-31T00:00:00/P1D")); + Assert.assertEquals("Higher key 3", Intervals.of("2025-04-02T00:00:00/P1D"), higher); + + // Only one greater entry + higher = tree.higherKey(Intervals.of("2025-09-28T00:00:00/P1D")); + Assert.assertEquals("Higher key 4", Intervals.of("2025-10-06T00:00:00/P1M"), higher); + + // No greater entry + higher = tree.higherKey(Intervals.of("2025-11-01T00:00:00/P1D")); + Assert.assertNull("Higher key 5", higher); + } + private void compareData(List> inputData, IntervalTree tree) { - Iterator> iterator = tree.inOrderTraverse(); + //Iterator> iterator = tree.inOrderTraverse(); + Iterator> iterator = tree.entrySet().iterator(); List> expected = inputData.stream() - .sorted((p1, p2) -> Comparators.intervalsByStart().compare(p1.lhs, p2.lhs)) + .sorted((p1, p2) -> Comparators.intervalsByStartThenEnd().compare(p1.lhs, p2.lhs)) .collect(Collectors.toList()); compareEntries(expected.iterator(), iterator); } - private void compareEntries(Iterator> expected, Iterator> actual) + private void compareEntries(Iterator> expected, Iterator> actual) { while (actual.hasNext()) { Assert.assertTrue("Entry available", expected.hasNext()); Pair expectedEntry = expected.next(); - IntervalTree.Entry actualEntry = actual.next(); - Assert.assertEquals("Interval match", expectedEntry.lhs, actualEntry.interval); - Assert.assertEquals("Value match", expectedEntry.rhs, actualEntry.value); + Map.Entry actualEntry = actual.next(); + Assert.assertEquals("Interval match", expectedEntry.lhs, actualEntry.getKey()); + Assert.assertEquals("Value match", expectedEntry.rhs, actualEntry.getValue()); } - Assert.assertFalse("Matched all entries", expected.hasNext()); + Assert.assertFalse("No outstanding entries", expected.hasNext()); } static List> baseData = new ArrayList<>(); @@ -310,17 +496,21 @@ private void compareEntries(Iterator> expected, Iterator< overlapData.addAll(baseData); overlapData.add(Pair.of(Intervals.of("2025-01-01T00:00:00/P1Y"), "v7")); - sparseOverlapData.add(Pair.of(Intervals.of("2025-01-01T00:00:00/P1D"), "v1")); - sparseOverlapData.add(Pair.of(Intervals.of("2025-02-01T00:00:00/P1D"), "v2")); - sparseOverlapData.add(Pair.of(Intervals.of("2025-01-12T00:00:00/P1D"), "v3")); - sparseOverlapData.add(Pair.of(Intervals.of("2025-07-12T00:00:00/P1D"), "v4")); - sparseOverlapData.add(Pair.of(Intervals.of("2025-06-03T00:00:00/P1D"), "v5")); - sparseOverlapData.add(Pair.of(Intervals.of("2025-08-09T00:00:00/P1D"), "v6")); - sparseOverlapData.add(Pair.of(Intervals.of("2025-09-04T00:00:00/P1D"), "v7")); - sparseOverlapData.add(Pair.of(Intervals.of("2025-04-02T00:00:00/P1D"), "v8")); - sparseOverlapData.add(Pair.of(Intervals.of("2025-05-10T00:00:00/P1M"), "v9")); - sparseOverlapData.add(Pair.of(Intervals.of("2025-10-06T00:00:00/P1M"), "v10")); - sparseOverlapData.add(Pair.of(Intervals.of("2025-06-01T00:00:00/P1M"), "v11")); + sparseOverlapData.add(Pair.of(Intervals.of("2025-05-10T00:00:00/P1M"), "v1")); + sparseOverlapData.add(Pair.of(Intervals.of("2025-01-01T00:00:00/P1D"), "v2")); + sparseOverlapData.add(Pair.of(Intervals.of("2025-02-01T00:00:00/P1D"), "v3")); + sparseOverlapData.add(Pair.of(Intervals.of("2025-01-12T00:00:00/P1D"), "v4")); + sparseOverlapData.add(Pair.of(Intervals.of("2025-07-12T00:00:00/P1D"), "v5")); + sparseOverlapData.add(Pair.of(Intervals.of("2025-02-01T00:00:00/P1M"), "v6")); + sparseOverlapData.add(Pair.of(Intervals.of("2025-06-03T00:00:00/P1D"), "v7")); + sparseOverlapData.add(Pair.of(Intervals.of("2025-08-09T00:00:00/P1D"), "v8")); + sparseOverlapData.add(Pair.of(Intervals.of("2025-08-02T00:00:00/P1M"), "v9")); + sparseOverlapData.add(Pair.of(Intervals.of("2025-09-04T00:00:00/P1D"), "v10")); + sparseOverlapData.add(Pair.of(Intervals.of("2025-04-02T00:00:00/P1D"), "v11")); + sparseOverlapData.add(Pair.of(Intervals.of("2025-10-06T00:00:00/P1M"), "v12")); + sparseOverlapData.add(Pair.of(Intervals.of("2025-06-01T00:00:00/P1M"), "v13")); + sparseOverlapData.add(Pair.of(Intervals.of("2025-01-01T00:00:00/P1Y"), "v14")); + } private IntervalTree setupTree(List> inputData) @@ -335,7 +525,7 @@ private IntervalTree setupTree(List> inputData, C setupFunc.accept(tree); } for (Pair entry : inputData) { - tree.add(entry.lhs, entry.rhs); + tree.put(entry.lhs, entry.rhs); } return tree; } From b3c6791bc06b652359e95c9ca14a12f19333469b Mon Sep 17 00:00:00 2001 From: Pramod Immaneni Date: Fri, 7 Nov 2025 17:44:40 -0800 Subject: [PATCH 20/29] Using both start and end dates of the interval during comparision when finding lower and higher entries --- .../apache/druid/timeline/IntervalTree.java | 17 +++--- .../druid/timeline/IntervalTreeTest.java | 57 ++++++++++++------- 2 files changed, 45 insertions(+), 29 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/timeline/IntervalTree.java b/processing/src/main/java/org/apache/druid/timeline/IntervalTree.java index a9de79906860..56f0f7079a09 100644 --- a/processing/src/main/java/org/apache/druid/timeline/IntervalTree.java +++ b/processing/src/main/java/org/apache/druid/timeline/IntervalTree.java @@ -278,16 +278,17 @@ private void forEachMatching(Node node, Predicate condition, BiCons return; } - if (condition.apply(node.interval)) { - action.accept(node.interval, node.value); - //result.put(node.interval, node.value); - } + // Process in-order // Search left if ((node.left != null) && condition.apply(node.left.range)) { forEachMatching(node.left, condition, action); } + if (condition.apply(node.interval)) { + action.accept(node.interval, node.value); + } + // Search right if (node.right != null && condition.apply(node.right.range)) { forEachMatching(node.right, condition, action); @@ -437,7 +438,7 @@ public Map.Entry lowerEntry(Interval key) Node node = root; while (node != null) { // Since we want to return a smaller entry even when there is an exact match, go left in the equality case too - if (startComparator.compare(key, node.getKey()) <= 0) { + if (compareInterval(key, node.getKey()) <= 0) { node = node.left; } else { lnode = node; @@ -464,7 +465,7 @@ public Map.Entry floorEntry(Interval key) fnode = node; break; } - if (startComparator.compare(key, node.getKey()) < 0) { + if (compareInterval(key, node.getKey()) < 0) { node = node.left; } else { fnode = node; @@ -491,7 +492,7 @@ public Map.Entry ceilingEntry(Interval key) cnode = node; break; } - if (startComparator.compare(key, node.getKey()) < 0) { + if (compareInterval(key, node.getKey()) < 0) { cnode = node; node = node.left; } else { @@ -514,7 +515,7 @@ public Map.Entry higherEntry(Interval key) Node hnode = null; Node node = root; while (node != null) { - if (startComparator.compare(key, node.getKey()) < 0) { + if (compareInterval(key, node.getKey()) < 0) { hnode = node; node = node.left; } else { diff --git a/processing/src/test/java/org/apache/druid/timeline/IntervalTreeTest.java b/processing/src/test/java/org/apache/druid/timeline/IntervalTreeTest.java index e93b58dd4d12..056f576b72ae 100644 --- a/processing/src/test/java/org/apache/druid/timeline/IntervalTreeTest.java +++ b/processing/src/test/java/org/apache/druid/timeline/IntervalTreeTest.java @@ -360,21 +360,25 @@ public void testFloorKey() Interval floor = tree.floorKey(Intervals.of("2025-01-11T00:00:00/P1D")); Assert.assertEquals("Floor key 1", Intervals.of("2025-01-01T00:00:00/P1Y"), floor); - // Exact match + // Entry with same start date but different end date + floor = tree.lowerKey(Intervals.of("2025-01-01T00:00:00/P1M")); + Assert.assertEquals("Lower key 2", Intervals.of("2025-01-01T00:00:00/P1D"), floor); + + // Matching entry floor = tree.floorKey(Intervals.of("2025-01-12T00:00:00/P1D")); - Assert.assertEquals("Floor key 2", Intervals.of("2025-01-12T00:00:00/P1D"), floor); + Assert.assertEquals("Floor key 3", Intervals.of("2025-01-12T00:00:00/P1D"), floor); // Random entry floor = tree.floorKey(Intervals.of("2025-08-01T00:00:00/P1D")); - Assert.assertEquals("Floor key 3", Intervals.of("2025-07-12T00:00:00/P1D"), floor); + Assert.assertEquals("Floor key 4", Intervals.of("2025-07-12T00:00:00/P1D"), floor); // Last entry floor = tree.floorKey(Intervals.of("2025-11-01T00:00:00/P1M")); - Assert.assertEquals("Floor key 4", Intervals.of("2025-10-06T00:00:00/P1M"), floor); + Assert.assertEquals("Floor key 5", Intervals.of("2025-10-06T00:00:00/P1M"), floor); // No smaller entry floor = tree.floorKey(Intervals.of("2024-12-31T00:00:00/P1D")); - Assert.assertNull("Floor key 5", floor); + Assert.assertNull("Floor key 6", floor); } @Test @@ -386,24 +390,27 @@ public void testLowerKey() Interval lower = tree.lowerKey(Intervals.of("2025-01-11T00:00:00/P1D")); Assert.assertEquals("Lower key 1", Intervals.of("2025-01-01T00:00:00/P1Y"), lower); - // Matching interval entry + // Entry with same start date but different end date + lower = tree.lowerKey(Intervals.of("2025-01-01T00:00:00/P1M")); + Assert.assertEquals("Lower key 2", Intervals.of("2025-01-01T00:00:00/P1D"), lower); + + // Matching entry lower = tree.lowerKey(Intervals.of("2025-01-12T00:00:00/P1D")); - Assert.assertEquals("Lower key 2", Intervals.of("2025-01-01T00:00:00/P1Y"), lower); + Assert.assertEquals("Lower key 3", Intervals.of("2025-01-01T00:00:00/P1Y"), lower); // Random entry lower = tree.lowerKey(Intervals.of("2025-08-01T00:00:00/P1D")); - Assert.assertEquals("Lower key 3", Intervals.of("2025-07-12T00:00:00/P1D"), lower); + Assert.assertEquals("Lower key 4", Intervals.of("2025-07-12T00:00:00/P1D"), lower); // Last entry lower = tree.lowerKey(Intervals.of("2025-11-01T00:00:00/P1M")); - Assert.assertEquals("Lower key 4", Intervals.of("2025-10-06T00:00:00/P1M"), lower); + Assert.assertEquals("Lower key 5", Intervals.of("2025-10-06T00:00:00/P1M"), lower); // No smaller entry lower = tree.lowerKey(Intervals.of("2024-12-31T00:00:00/P1D")); - Assert.assertNull("Lower key 5", lower); + Assert.assertNull("Lower key 6", lower); } - @Test public void testCeiinglKey() { @@ -413,21 +420,25 @@ public void testCeiinglKey() Interval ceiling = tree.ceilingKey(Intervals.of("2024-12-31T00:00:00/P1D")); Assert.assertEquals("Ceiling key 1", Intervals.of("2025-01-01T00:00:00/P1D"), ceiling); - // Exact match + // Entry with same start date but different end date + ceiling = tree.ceilingKey(Intervals.of("2025-02-01T00:00:00/PT6H")); + Assert.assertEquals("Ceiling key 2", Intervals.of("2025-02-01T00:00:00/P1D"), ceiling); + + // Matching entry ceiling = tree.ceilingKey(Intervals.of("2025-09-04T00:00:00/P1D")); - Assert.assertEquals("Ceiling key 2", Intervals.of("2025-09-04T00:00:00/P1D"), ceiling); + Assert.assertEquals("Ceiling key 3", Intervals.of("2025-09-04T00:00:00/P1D"), ceiling); // Random entry ceiling = tree.ceilingKey(Intervals.of("2025-03-31T00:00:00/P1D")); - Assert.assertEquals("Ceiling key 3", Intervals.of("2025-04-02T00:00:00/P1D"), ceiling); + Assert.assertEquals("Ceiling key 4", Intervals.of("2025-04-02T00:00:00/P1D"), ceiling); // Only one greater entry ceiling = tree.ceilingKey(Intervals.of("2025-09-28T00:00:00/P1D")); - Assert.assertEquals("Ceiling key 4", Intervals.of("2025-10-06T00:00:00/P1M"), ceiling); + Assert.assertEquals("Ceiling key 5", Intervals.of("2025-10-06T00:00:00/P1M"), ceiling); // No greater entry ceiling = tree.ceilingKey(Intervals.of("2025-11-01T00:00:00/P1D")); - Assert.assertNull("Ceiling key 5", ceiling); + Assert.assertNull("Ceiling key 6", ceiling); } @Test @@ -439,21 +450,25 @@ public void testHigherKey() Interval higher = tree.higherKey(Intervals.of("2024-12-31T00:00:00/P1D")); Assert.assertEquals("Higher key 1", Intervals.of("2025-01-01T00:00:00/P1D"), higher); - // Matching interval of an entry + // Entry with same start date but different end date + higher = tree.ceilingKey(Intervals.of("2025-02-01T00:00:00/PT6H")); + Assert.assertEquals("Higher key 2", Intervals.of("2025-02-01T00:00:00/P1D"), higher); + + // Matching entry higher = tree.higherKey(Intervals.of("2025-09-04T00:00:00/P1D")); - Assert.assertEquals("Higher key 2", Intervals.of("2025-10-06T00:00:00/P1M"), higher); + Assert.assertEquals("Higher key 3", Intervals.of("2025-10-06T00:00:00/P1M"), higher); // Random entry higher = tree.higherKey(Intervals.of("2025-03-31T00:00:00/P1D")); - Assert.assertEquals("Higher key 3", Intervals.of("2025-04-02T00:00:00/P1D"), higher); + Assert.assertEquals("Higher key 4", Intervals.of("2025-04-02T00:00:00/P1D"), higher); // Only one greater entry higher = tree.higherKey(Intervals.of("2025-09-28T00:00:00/P1D")); - Assert.assertEquals("Higher key 4", Intervals.of("2025-10-06T00:00:00/P1M"), higher); + Assert.assertEquals("Higher key 5", Intervals.of("2025-10-06T00:00:00/P1M"), higher); // No greater entry higher = tree.higherKey(Intervals.of("2025-11-01T00:00:00/P1D")); - Assert.assertNull("Higher key 5", higher); + Assert.assertNull("Higher key 6", higher); } private void compareData(List> inputData, IntervalTree tree) From 01c477feccd4887d4d745cdfc25bfda12b49168d Mon Sep 17 00:00:00 2001 From: Pramod Immaneni Date: Wed, 22 Apr 2026 15:25:28 -0700 Subject: [PATCH 21/29] Made the index configurable via a timeline configuration parameter --- .../timeline/VersionedIntervalTimeline.java | 69 +++++-------------- .../apache/druid/guice/StorageNodeModule.java | 2 + .../segment/indexing/TimelineConfig.java | 42 +++++++++++ .../apache/druid/server/SegmentManager.java | 23 +++++-- 4 files changed, 80 insertions(+), 56 deletions(-) create mode 100644 server/src/main/java/org/apache/druid/segment/indexing/TimelineConfig.java diff --git a/processing/src/main/java/org/apache/druid/timeline/VersionedIntervalTimeline.java b/processing/src/main/java/org/apache/druid/timeline/VersionedIntervalTimeline.java index 82ba93c161f2..fcee29a55141 100644 --- a/processing/src/main/java/org/apache/druid/timeline/VersionedIntervalTimeline.java +++ b/processing/src/main/java/org/apache/druid/timeline/VersionedIntervalTimeline.java @@ -95,49 +95,25 @@ public class VersionedIntervalTimeline capabilities; - - IntervalTreeMatchMode(Capability... capabilities) - { - this.capabilities = Set.of(capabilities); - } + // Set this to true to use an interval tree index for the segment intervals + private final boolean fastIntervalSearch; - public boolean isEnabled(Capability capability) - { - return capabilities.contains(capability); - } + public VersionedIntervalTimeline(Comparator versionComparator) + { + this(versionComparator, false); } - private static IntervalTreeMatchMode intervalTreeMatchMode = IntervalTreeMatchMode.NONE; - - static { - String mode = System.getProperty("experimental.timeline.intervalTreeMatchMode"); - if (mode != null) { - try { - intervalTreeMatchMode = IntervalTreeMatchMode.valueOf(mode); - } - catch (IllegalArgumentException e) { - logger.warn(e, "Unrecognized interval tree match mode specified [%s]", mode); - } - } + public VersionedIntervalTimeline(Comparator versionComparator, boolean skipObjectsWithNoData) + { + this(versionComparator, skipObjectsWithNoData, false); } + public VersionedIntervalTimeline(Comparator versionComparator, boolean skipObjectsWithNoData, boolean fastIntervalSearch) { - if (intervalTreeMatchMode.isEnabled(IntervalTreeMatchMode.Capability.QUERY)) { + this.versionComparator = versionComparator; + this.skipObjectsWithNoData = skipObjectsWithNoData; + this.fastIntervalSearch = fastIntervalSearch; + if (fastIntervalSearch) { this.completePartitionsTimeline = new IntervalTree<>(Comparators.intervalsByStart(), Comparators.intervalsByEnd()); this.incompletePartitionsTimeline = new IntervalTree<>(Comparators.intervalsByStart(), Comparators.intervalsByEnd()); } else { @@ -146,17 +122,6 @@ public boolean isEnabled(Capability capability) } } - public VersionedIntervalTimeline(Comparator versionComparator) - { - this(versionComparator, false); - } - - public VersionedIntervalTimeline(Comparator versionComparator, boolean skipObjectsWithNoData) - { - this.versionComparator = versionComparator; - this.skipObjectsWithNoData = skipObjectsWithNoData; - } - public static > Iterable getAllObjects( final List> holders ) @@ -260,7 +225,7 @@ public void addAll(final Iterator> TreeMap versionEntry = new TreeMap<>(versionComparator); versionEntry.put(version, entry); allTimelineEntries.put(interval, versionEntry); - if (intervalTreeMatchMode.isEnabled(IntervalTreeMatchMode.Capability.ENTRIES)) { + if (fastIntervalSearch) { allTimeIntervals.put(interval, versionEntry); } numObjects.incrementAndGet(); @@ -322,7 +287,7 @@ public PartitionChunk remove(Interval interval, VersionType version, versionEntries.remove(version); if (versionEntries.isEmpty()) { allTimelineEntries.remove(interval); - if (intervalTreeMatchMode.isEnabled(IntervalTreeMatchMode.Capability.ENTRIES)) { + if (fastIntervalSearch) { allTimeIntervals.remove(interval); } } @@ -358,7 +323,7 @@ public PartitionChunk findChunk(Interval interval, VersionType versi // If an exact interval match is not found search for an encapsulating interval // If tree search is enabled use it else revert to checking all intervals - if (intervalTreeMatchMode.isEnabled(IntervalTreeMatchMode.Capability.ENTRIES)) { + if (fastIntervalSearch) { Map> possibleMatches = allTimeIntervals.findEncompassing(interval); for (Entry> entry : possibleMatches.entrySet()) { Interval eninterval = entry.getKey(); @@ -830,7 +795,7 @@ private List> lookup(Interval inte timeline = completePartitionsTimeline; } - if (intervalTreeMatchMode.isEnabled(IntervalTreeMatchMode.Capability.QUERY)) { + if (fastIntervalSearch) { IntervalTree tree = (IntervalTree) timeline; tree.forEachMatching(timelineInterval -> timelineInterval.overlaps(interval), (timelineInterval, val) -> { diff --git a/server/src/main/java/org/apache/druid/guice/StorageNodeModule.java b/server/src/main/java/org/apache/druid/guice/StorageNodeModule.java index e788949f66b9..602cc470cbab 100644 --- a/server/src/main/java/org/apache/druid/guice/StorageNodeModule.java +++ b/server/src/main/java/org/apache/druid/guice/StorageNodeModule.java @@ -33,6 +33,7 @@ import org.apache.druid.query.DruidProcessingConfig; import org.apache.druid.segment.DefaultColumnFormatConfig; import org.apache.druid.segment.column.ColumnConfig; +import org.apache.druid.segment.indexing.TimelineConfig; import org.apache.druid.segment.loading.SegmentLoaderConfig; import org.apache.druid.segment.loading.StorageLocation; import org.apache.druid.segment.loading.StorageLocationSelectorStrategy; @@ -57,6 +58,7 @@ public void configure(Binder binder) JsonConfigProvider.bind(binder, "druid.server", DruidServerConfig.class); JsonConfigProvider.bind(binder, "druid.segmentCache", SegmentLoaderConfig.class); JsonConfigProvider.bind(binder, "druid.indexing.formats", DefaultColumnFormatConfig.class); + JsonConfigProvider.bind(binder, "druid.segment.timeline", TimelineConfig.class); bindLocationSelectorStrategy(binder); binder.bind(ServerTypeConfig.class).toProvider(Providers.of(null)); binder.bind(ColumnConfig.class).to(DruidProcessingConfig.class).in(LazySingleton.class); diff --git a/server/src/main/java/org/apache/druid/segment/indexing/TimelineConfig.java b/server/src/main/java/org/apache/druid/segment/indexing/TimelineConfig.java new file mode 100644 index 000000000000..73c19aa99c55 --- /dev/null +++ b/server/src/main/java/org/apache/druid/segment/indexing/TimelineConfig.java @@ -0,0 +1,42 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.segment.indexing; + +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonProperty; + +import javax.annotation.Nullable; + +public class TimelineConfig +{ + @JsonProperty + private final boolean fastIntervalSearch; + + @JsonCreator + public TimelineConfig(@JsonProperty("fastIntervalSearch") @Nullable Boolean fastIntervalSearch) + { + this.fastIntervalSearch = fastIntervalSearch != null && fastIntervalSearch; + } + + public boolean isFastIntervalSearch() + { + return fastIntervalSearch; + } +} diff --git a/server/src/main/java/org/apache/druid/server/SegmentManager.java b/server/src/main/java/org/apache/druid/server/SegmentManager.java index af95eafaad6a..405e18348fcf 100644 --- a/server/src/main/java/org/apache/druid/server/SegmentManager.java +++ b/server/src/main/java/org/apache/druid/server/SegmentManager.java @@ -36,6 +36,7 @@ import org.apache.druid.segment.SegmentLazyLoadFailCallback; import org.apache.druid.segment.SegmentMapFunction; import org.apache.druid.segment.SegmentReference; +import org.apache.druid.segment.indexing.TimelineConfig; import org.apache.druid.segment.join.table.IndexedTable; import org.apache.druid.segment.join.table.ReferenceCountedIndexedTableProvider; import org.apache.druid.segment.loading.AcquireSegmentAction; @@ -70,12 +71,21 @@ public class SegmentManager private final SegmentCacheManager cacheManager; + private final TimelineConfig timelineConfig; + private final ConcurrentHashMap dataSources = new ConcurrentHashMap<>(); - @Inject public SegmentManager(SegmentCacheManager cacheManager) + { + this(cacheManager, new TimelineConfig(false)); + } + + + @Inject + public SegmentManager(SegmentCacheManager cacheManager, TimelineConfig timelineConfig) { this.cacheManager = cacheManager; + this.timelineConfig = timelineConfig; } @VisibleForTesting @@ -290,7 +300,7 @@ private void loadSegmentInternal( dataSources.compute( dataSegment.getDataSource(), (k, v) -> { - final DataSourceState dataSourceState = v == null ? new DataSourceState() : v; + final DataSourceState dataSourceState = v == null ? new DataSourceState(timelineConfig) : v; final VersionedIntervalTimeline loadedIntervals = dataSourceState.getTimeline(); final PartitionChunk entry = loadedIntervals.findChunk( @@ -465,8 +475,7 @@ public void shutdown() */ public static class DataSourceState { - private final VersionedIntervalTimeline timeline = - new VersionedIntervalTimeline<>(Ordering.natural()); + private final VersionedIntervalTimeline timeline; private final ConcurrentHashMap tablesLookup = new ConcurrentHashMap<>(); private long totalSegmentSize; @@ -474,6 +483,12 @@ public static class DataSourceState private long rowCount; private final SegmentRowCountDistribution segmentRowCountDistribution = new SegmentRowCountDistribution(); + @Inject + public DataSourceState(TimelineConfig timelineConfig) + { + timeline = new VersionedIntervalTimeline<>(Ordering.natural(), false, timelineConfig.isFastIntervalSearch()); + } + private void addSegment(DataSegment segment, long numOfRows) { totalSegmentSize += segment.getSize(); From 1d3ddc563dcb706d71153e76343cdde43cda446a Mon Sep 17 00:00:00 2001 From: Pramod Immaneni Date: Wed, 6 May 2026 12:45:16 -0700 Subject: [PATCH 22/29] Parameterized VersionedIntervalTimeline tests to run against fast interval search as well --- .../apache/druid/timeline/IntervalTree.java | 4 ++-- ...ionedIntervalTimelineSpecificDataTest.java | 22 ++++++++++++++++++- .../VersionedIntervalTimelineTest.java | 20 ++++++++++++++++- .../VersionedIntervalTimelineTestBase.java | 7 +++++- 4 files changed, 48 insertions(+), 5 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/timeline/IntervalTree.java b/processing/src/main/java/org/apache/druid/timeline/IntervalTree.java index 56f0f7079a09..db28309eb6af 100644 --- a/processing/src/main/java/org/apache/druid/timeline/IntervalTree.java +++ b/processing/src/main/java/org/apache/druid/timeline/IntervalTree.java @@ -118,7 +118,7 @@ static class Node implements Map.Entry + "%sright = %s\n" + "%s}"; - public String print(int level) + private String print(int level) { String prefix = "\t".repeat(level); String eprefix = "\t".repeat(level - 1); @@ -768,7 +768,7 @@ private void setRightNode(Node node, Node right) } } - + @VisibleForTesting public String print() { return (root != null) ? root.print(1) : null; diff --git a/processing/src/test/java/org/apache/druid/timeline/VersionedIntervalTimelineSpecificDataTest.java b/processing/src/test/java/org/apache/druid/timeline/VersionedIntervalTimelineSpecificDataTest.java index a41eda2fc360..46d57f06bbd0 100644 --- a/processing/src/test/java/org/apache/druid/timeline/VersionedIntervalTimelineSpecificDataTest.java +++ b/processing/src/test/java/org/apache/druid/timeline/VersionedIntervalTimelineSpecificDataTest.java @@ -32,20 +32,40 @@ import org.junit.Assert; import org.junit.Before; import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; import java.util.Arrays; +import java.util.Collection; import java.util.Collections; /** * This test class is separated from {@link VersionedIntervalTimelineTest} because it populates specific data for tests * in {@link #setUp()}. */ +@RunWith(Parameterized.class) public class VersionedIntervalTimelineSpecificDataTest extends VersionedIntervalTimelineTestBase { + @Parameterized.Parameters + public static Collection parameters() + { + return Arrays.asList( + false, + true + ); + } + + public VersionedIntervalTimelineSpecificDataTest(boolean fastIntervalSearch) + { + this.fastIntervalSearch = fastIntervalSearch; + } + + private final boolean fastIntervalSearch; + @Before public void setUp() { - timeline = makeStringIntegerTimeline(); + timeline = makeStringIntegerTimeline(fastIntervalSearch); add("2011-04-01/2011-04-03", "1", 2); add("2011-04-03/2011-04-06", "1", 3); diff --git a/processing/src/test/java/org/apache/druid/timeline/VersionedIntervalTimelineTest.java b/processing/src/test/java/org/apache/druid/timeline/VersionedIntervalTimelineTest.java index bd2e61e20cbf..ca332773e829 100644 --- a/processing/src/test/java/org/apache/druid/timeline/VersionedIntervalTimelineTest.java +++ b/processing/src/test/java/org/apache/druid/timeline/VersionedIntervalTimelineTest.java @@ -34,6 +34,8 @@ import org.junit.Assert; import org.junit.Before; import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; import java.util.Arrays; import java.util.Collection; @@ -43,13 +45,29 @@ /** */ +@RunWith(Parameterized.class) public class VersionedIntervalTimelineTest extends VersionedIntervalTimelineTestBase { + @Parameterized.Parameters + public static Collection parameters() + { + return Arrays.asList( + false, + true + ); + } + + public VersionedIntervalTimelineTest(boolean fastIntervalSearch) + { + this.fastIntervalSearch = fastIntervalSearch; + } + + private final boolean fastIntervalSearch; @Before public void setUp() { - timeline = makeStringIntegerTimeline(); + timeline = makeStringIntegerTimeline(fastIntervalSearch); } @Test diff --git a/processing/src/test/java/org/apache/druid/timeline/VersionedIntervalTimelineTestBase.java b/processing/src/test/java/org/apache/druid/timeline/VersionedIntervalTimelineTestBase.java index 53ab4db3cc5e..67f1658e1380 100644 --- a/processing/src/test/java/org/apache/druid/timeline/VersionedIntervalTimelineTestBase.java +++ b/processing/src/test/java/org/apache/druid/timeline/VersionedIntervalTimelineTestBase.java @@ -110,7 +110,12 @@ static void assertSingleElementChunks( static VersionedIntervalTimeline makeStringIntegerTimeline() { - return new VersionedIntervalTimeline<>(Ordering.natural()); + return makeStringIntegerTimeline(false); + } + + static VersionedIntervalTimeline makeStringIntegerTimeline(boolean fastIntervalSearch) + { + return new VersionedIntervalTimeline<>(Ordering.natural(), false, fastIntervalSearch); } VersionedIntervalTimeline timeline; From 2113d1812ca733a4fa56ea399b2f180e1da9d2ac Mon Sep 17 00:00:00 2001 From: Pramod Immaneni Date: Wed, 6 May 2026 13:55:23 -0700 Subject: [PATCH 23/29] Using comparators for exact match checks to take Chronology in account --- .../org/apache/druid/timeline/IntervalTree.java | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/timeline/IntervalTree.java b/processing/src/main/java/org/apache/druid/timeline/IntervalTree.java index db28309eb6af..8414c10f99b9 100644 --- a/processing/src/main/java/org/apache/druid/timeline/IntervalTree.java +++ b/processing/src/main/java/org/apache/druid/timeline/IntervalTree.java @@ -226,12 +226,11 @@ public T get(Object key) T value = null; Node node = root; while (node != null) { - int cmp = compareInterval(interval, node.interval); + int cmp = compareInterval(node.getKey(), interval); if (cmp == 0) { value = node.value; break; - } - if (cmp < 0) { + } else if (cmp > 0) { node = node.left; } else { node = node.right; @@ -461,11 +460,11 @@ public Map.Entry floorEntry(Interval key) Node fnode = null; Node node = root; while (node != null) { - if (node.getKey().equals(key)) { + int cmp = compareInterval(node.getKey(), key); + if (cmp == 0) { fnode = node; break; - } - if (compareInterval(key, node.getKey()) < 0) { + } else if (cmp > 0) { node = node.left; } else { fnode = node; @@ -488,11 +487,11 @@ public Map.Entry ceilingEntry(Interval key) Node cnode = null; Node node = root; while (node != null) { - if (node.getKey().equals(key)) { + int cmp = compareInterval(node.getKey(), key); + if (cmp == 0) { cnode = node; break; - } - if (compareInterval(key, node.getKey()) < 0) { + } else if (cmp > 0) { cnode = node; node = node.left; } else { From 5c83921d73120328a1f7b3c5fd2d612cbf6a6c4c Mon Sep 17 00:00:00 2001 From: Pramod Immaneni Date: Thu, 7 May 2026 16:37:08 -0700 Subject: [PATCH 24/29] Added ability to specify a separate condition for the range check --- .../org/apache/druid/timeline/IntervalTree.java | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/timeline/IntervalTree.java b/processing/src/main/java/org/apache/druid/timeline/IntervalTree.java index 8414c10f99b9..c62809e8e5fa 100644 --- a/processing/src/main/java/org/apache/druid/timeline/IntervalTree.java +++ b/processing/src/main/java/org/apache/druid/timeline/IntervalTree.java @@ -267,10 +267,15 @@ public Map findMatching(Predicate condition) public void forEachMatching(Predicate condition, BiConsumer action) { - forEachMatching(root, condition, action); + forEachMatching(condition, condition, action); } - private void forEachMatching(Node node, Predicate condition, BiConsumer action) + public void forEachMatching(Predicate condition, Predicate renageCondition, BiConsumer action) + { + forEachMatching(root, condition, renageCondition, action); + } + + private void forEachMatching(Node node, Predicate condition, Predicate rangeCondition, BiConsumer action) { if (node == null) { @@ -280,8 +285,8 @@ private void forEachMatching(Node node, Predicate condition, BiCons // Process in-order // Search left - if ((node.left != null) && condition.apply(node.left.range)) { - forEachMatching(node.left, condition, action); + if ((node.left != null) && rangeCondition.apply(node.left.range)) { + forEachMatching(node.left, condition, rangeCondition, action); } if (condition.apply(node.interval)) { @@ -289,8 +294,8 @@ private void forEachMatching(Node node, Predicate condition, BiCons } // Search right - if (node.right != null && condition.apply(node.right.range)) { - forEachMatching(node.right, condition, action); + if (node.right != null && rangeCondition.apply(node.right.range)) { + forEachMatching(node.right, condition, rangeCondition, action); } } From 053423f2b1f0dc01ab7c767cd74c6ddb8390fb06 Mon Sep 17 00:00:00 2001 From: Pramod Immaneni Date: Fri, 8 May 2026 18:53:02 -0700 Subject: [PATCH 25/29] Added methods for finding matches with full traversal and documented the search methods --- .../apache/druid/timeline/IntervalTree.java | 65 +++++++++++++++++-- 1 file changed, 61 insertions(+), 4 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/timeline/IntervalTree.java b/processing/src/main/java/org/apache/druid/timeline/IntervalTree.java index c62809e8e5fa..1cefc19256f1 100644 --- a/processing/src/main/java/org/apache/druid/timeline/IntervalTree.java +++ b/processing/src/main/java/org/apache/druid/timeline/IntervalTree.java @@ -258,6 +258,14 @@ public Map findOverlapping(Interval interval) return findMatching(i -> i.overlaps(interval)); } + /** + * Get all entries matching a given condition + * @param condition The match condition + * + * This condition should not only return true when a node matches the condition but also when a child node range + * matches. It is a convenience method for {@link #forEachMatching(Predicate, Predicate, BiConsumer)} and see the + * method's documentation for more information. It calls the method with rangeCondition set to be same as condition. + */ public Map findMatching(Predicate condition) { Map result = new HashMap<>(); @@ -265,14 +273,63 @@ public Map findMatching(Predicate condition) return result; } + /** + * Find entries matching a given condition by doing a full traversal. + * @param condition The match condition + * + * The method traverses through all the nodes of the tree looking for matches. + */ + public Map findMatchingFullTraversal(Predicate condition) + { + Map result = new HashMap<>(); + forEachMatchingFullTraversal(condition, result::put); + return result; + } + + /** + * Perform on action for matching nodes. + * @param condition The match condition + * @param action The action + * + * This condition should not only return true when a node matches the condition but also when the child node range + * matches. It is a convenience method for {@link #forEachMatching(Predicate, Predicate, BiConsumer)} and see the + * method's documentation for more information. It calls the method with rangeCondition set to be same as condition. + */ public void forEachMatching(Predicate condition, BiConsumer action) { forEachMatching(condition, condition, action); } - public void forEachMatching(Predicate condition, Predicate renageCondition, BiConsumer action) + /** + * Perform on action for matching nodes by doing a full traversal. + * @param condition The match condition + * @param action The action + * + * The method traverses through all the nodes of the tree looking for matches. + */ + public void forEachMatchingFullTraversal(Predicate condition, BiConsumer action) + { + forEachMatching(condition, null, action); + } + + /** + * Perform an action for matching nodes + * @param condition The condition to match for the node + * @param rangeCondition The condition to check a child node for, to determine whether to traverse the subtree + * @param action The action to perform + * + * The rangeCondition is applied on the interval range of the child node and only if the condition returns true is the + * child subtree traversed. Interval range is the min start time to max end time for all the nodes in the child + * subtree. This is a lookup speedup optimization. If rangeCondition is null, the check is skipped and all the + * children are traversed to find matches. + * + * In some cases such as finding nodes overlapping the given interval or encompassing the given interval, the same + * predicate can be used for condition and rangeCondition. In other situations a full traversal maybe needed and a + * null can be passed in for rangeCondition. There are helper methods for these. + */ + public void forEachMatching(Predicate condition, Predicate rangeCondition, BiConsumer action) { - forEachMatching(root, condition, renageCondition, action); + forEachMatching(root, condition, rangeCondition, action); } private void forEachMatching(Node node, Predicate condition, Predicate rangeCondition, BiConsumer action) @@ -285,7 +342,7 @@ private void forEachMatching(Node node, Predicate condition, Predic // Process in-order // Search left - if ((node.left != null) && rangeCondition.apply(node.left.range)) { + if ((node.left != null) && ((rangeCondition == null) || rangeCondition.apply(node.left.range))) { forEachMatching(node.left, condition, rangeCondition, action); } @@ -294,7 +351,7 @@ private void forEachMatching(Node node, Predicate condition, Predic } // Search right - if (node.right != null && rangeCondition.apply(node.right.range)) { + if (node.right != null && ((rangeCondition == null) || rangeCondition.apply(node.right.range))) { forEachMatching(node.right, condition, rangeCondition, action); } } From 446962ba141cb7e49655a453298dd669f68a0877 Mon Sep 17 00:00:00 2001 From: Pramod Immaneni Date: Mon, 11 May 2026 12:17:53 -0700 Subject: [PATCH 26/29] Passing locale to fix forbidden api validation error --- .../src/main/java/org/apache/druid/timeline/IntervalTree.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/timeline/IntervalTree.java b/processing/src/main/java/org/apache/druid/timeline/IntervalTree.java index 1cefc19256f1..c6b35f32ac90 100644 --- a/processing/src/main/java/org/apache/druid/timeline/IntervalTree.java +++ b/processing/src/main/java/org/apache/druid/timeline/IntervalTree.java @@ -20,7 +20,6 @@ package org.apache.druid.timeline; import com.google.common.base.Predicate; -import org.apache.druid.java.util.common.StringUtils; import org.jetbrains.annotations.NotNull; import org.jetbrains.annotations.VisibleForTesting; import org.joda.time.Interval; @@ -32,6 +31,7 @@ import java.util.HashMap; import java.util.Iterator; import java.util.List; +import java.util.Locale; import java.util.Map; import java.util.NavigableMap; import java.util.NavigableSet; @@ -122,7 +122,7 @@ private String print(int level) { String prefix = "\t".repeat(level); String eprefix = "\t".repeat(level - 1); - return StringUtils.format(PRINT_FORMAT, + return String.format(Locale.ENGLISH, PRINT_FORMAT, prefix, interval, prefix, value, prefix, height, prefix, range, prefix, (left != null) ? left.print(level + 1) : null, From 51a1728de88e8915e967838b01d5afbbd1099801 Mon Sep 17 00:00:00 2001 From: Pramod Immaneni Date: Wed, 1 Jul 2026 15:16:13 -0700 Subject: [PATCH 27/29] Removed comments and unused injection --- .../java/org/apache/druid/timeline/IntervalTree.java | 12 ------------ .../druid/timeline/VersionedIntervalTimeline.java | 4 ++-- .../java/org/apache/druid/server/SegmentManager.java | 1 - 3 files changed, 2 insertions(+), 15 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/timeline/IntervalTree.java b/processing/src/main/java/org/apache/druid/timeline/IntervalTree.java index c6b35f32ac90..51091f0c511e 100644 --- a/processing/src/main/java/org/apache/druid/timeline/IntervalTree.java +++ b/processing/src/main/java/org/apache/druid/timeline/IntervalTree.java @@ -444,17 +444,6 @@ private Node unlinkRightLeaf(Node node) } } - /* - @VisibleForTesting - Iterator> inOrderTraverse() - { - List> nodes = new ArrayList<>(size); - inOrderTraverse(root, (List)nodes); - return nodes.iterator(); - //return nodes.stream().map(node -> new ReEntry(node.interval, node.value)).iterator(); - } - */ - private void inOrderTraverse(Node node, List> nodes) { if (node == null) { @@ -739,7 +728,6 @@ class EntrySet extends AbstractSet> @Override public Iterator> iterator() { - //return inOrderTraverse(); return new EntrySetIterator(); } diff --git a/processing/src/main/java/org/apache/druid/timeline/VersionedIntervalTimeline.java b/processing/src/main/java/org/apache/druid/timeline/VersionedIntervalTimeline.java index fcee29a55141..5fab6d83e07b 100644 --- a/processing/src/main/java/org/apache/druid/timeline/VersionedIntervalTimeline.java +++ b/processing/src/main/java/org/apache/druid/timeline/VersionedIntervalTimeline.java @@ -326,8 +326,8 @@ public PartitionChunk findChunk(Interval interval, VersionType versi if (fastIntervalSearch) { Map> possibleMatches = allTimeIntervals.findEncompassing(interval); for (Entry> entry : possibleMatches.entrySet()) { - Interval eninterval = entry.getKey(); - if (eninterval.contains(interval)) { + Interval enInterval = entry.getKey(); + if (enInterval.contains(interval)) { TimelineEntry foundEntry = entry.getValue().get(version); if (foundEntry != null) { return foundEntry.getPartitionHolder().getChunk(partitionNum); diff --git a/server/src/main/java/org/apache/druid/server/SegmentManager.java b/server/src/main/java/org/apache/druid/server/SegmentManager.java index 440e7b9470fc..dc98deea681f 100644 --- a/server/src/main/java/org/apache/druid/server/SegmentManager.java +++ b/server/src/main/java/org/apache/druid/server/SegmentManager.java @@ -507,7 +507,6 @@ public static class DataSourceState private long rowCount; private final SegmentRowCountDistribution segmentRowCountDistribution = new SegmentRowCountDistribution(); - @Inject public DataSourceState(TimelineConfig timelineConfig) { timeline = new VersionedIntervalTimeline<>(Ordering.natural(), false, timelineConfig.isFastIntervalSearch()); From 1f9d4c42da0627197782314bb0458a3689e8b195 Mon Sep 17 00:00:00 2001 From: Pramod Immaneni Date: Wed, 1 Jul 2026 15:37:39 -0700 Subject: [PATCH 28/29] Optimizations --- .../java/org/apache/druid/timeline/IntervalTree.java | 10 +++++----- .../druid/timeline/VersionedIntervalTimeline.java | 4 +++- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/timeline/IntervalTree.java b/processing/src/main/java/org/apache/druid/timeline/IntervalTree.java index 51091f0c511e..70dcd29b0579 100644 --- a/processing/src/main/java/org/apache/druid/timeline/IntervalTree.java +++ b/processing/src/main/java/org/apache/druid/timeline/IntervalTree.java @@ -68,17 +68,17 @@ public class IntervalTree extends AbstractMap implements NavigableMap { // The compartor for comparing the interval start timnes - Comparator startComparator; + private final Comparator startComparator; // The comparator for comparing interval end times - Comparator endComparator; + private final Comparator endComparator; @VisibleForTesting - Node root; - int size; + private Node root; + private int size; // Deviation allowed from ideal height for the maximum height on either side of the tree, expressed as a // percentage of ideal height - int imbalanceTolerance = 50; + private int imbalanceTolerance = 50; EntrySet entrySet = new EntrySet(); diff --git a/processing/src/main/java/org/apache/druid/timeline/VersionedIntervalTimeline.java b/processing/src/main/java/org/apache/druid/timeline/VersionedIntervalTimeline.java index 5fab6d83e07b..28f382127128 100644 --- a/processing/src/main/java/org/apache/druid/timeline/VersionedIntervalTimeline.java +++ b/processing/src/main/java/org/apache/druid/timeline/VersionedIntervalTimeline.java @@ -87,7 +87,8 @@ public class VersionedIntervalTimeline incompletePartitionsTimeline; // true interval -> version -> timelineEntry private final Map> allTimelineEntries = new HashMap<>(); - private final IntervalTree> allTimeIntervals = new IntervalTree<>(Comparators.intervalsByStart(), Comparators.intervalsByEnd()); + // Only instantiated and used when fastIntervalSearch is enabled + private IntervalTree> allTimeIntervals; private final AtomicInteger numObjects = new AtomicInteger(); private final Comparator versionComparator; @@ -114,6 +115,7 @@ public VersionedIntervalTimeline(Comparator versionComparat this.skipObjectsWithNoData = skipObjectsWithNoData; this.fastIntervalSearch = fastIntervalSearch; if (fastIntervalSearch) { + allTimeIntervals = new IntervalTree<>(Comparators.intervalsByStart(), Comparators.intervalsByEnd()); this.completePartitionsTimeline = new IntervalTree<>(Comparators.intervalsByStart(), Comparators.intervalsByEnd()); this.incompletePartitionsTimeline = new IntervalTree<>(Comparators.intervalsByStart(), Comparators.intervalsByEnd()); } else { From 6edc1258963d88a8aa59cda756f09c43f2505d2f Mon Sep 17 00:00:00 2001 From: Pramod Immaneni Date: Wed, 1 Jul 2026 16:04:05 -0700 Subject: [PATCH 29/29] Documentation and optimizations --- .../java/org/apache/druid/timeline/IntervalTree.java | 11 ++++++++++- .../druid/timeline/VersionedIntervalTimeline.java | 6 ++++++ 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/processing/src/main/java/org/apache/druid/timeline/IntervalTree.java b/processing/src/main/java/org/apache/druid/timeline/IntervalTree.java index 70dcd29b0579..98fc9e6cb858 100644 --- a/processing/src/main/java/org/apache/druid/timeline/IntervalTree.java +++ b/processing/src/main/java/org/apache/druid/timeline/IntervalTree.java @@ -80,7 +80,7 @@ public class IntervalTree extends AbstractMap implements Navigab // percentage of ideal height private int imbalanceTolerance = 50; - EntrySet entrySet = new EntrySet(); + private final EntrySet entrySet = new EntrySet(); public IntervalTree(Comparator startComparator, Comparator endComparator) { @@ -774,6 +774,15 @@ public Entry next() } + /** + * Perform a tree rebalance if the imbalance between the left and right sides of the tree has increased beyond a + * tolerated limit, as opposed to rebalancing all the time. This is to done to strike a balance between performance + * degradation arising from an imbalance tree and the processing overheard of rebalancing each time the contents of + * the tree changes. + * + * The limit is defined using a configurable tolerance percentage in excess of an ideal balanced tree height for the + * number of entries in the tree. + */ private void checkRebalance() { if (root != null) { diff --git a/processing/src/main/java/org/apache/druid/timeline/VersionedIntervalTimeline.java b/processing/src/main/java/org/apache/druid/timeline/VersionedIntervalTimeline.java index 28f382127128..c024b1a96940 100644 --- a/processing/src/main/java/org/apache/druid/timeline/VersionedIntervalTimeline.java +++ b/processing/src/main/java/org/apache/druid/timeline/VersionedIntervalTimeline.java @@ -109,6 +109,12 @@ public VersionedIntervalTimeline(Comparator versionComparat this(versionComparator, skipObjectsWithNoData, false); } + /** + * Constructor + * @param versionComparator The version comparator + * @param skipObjectsWithNoData Skip tombstones during lookup + * @param fastIntervalSearch Use the faster segment retrieval index based on interval tree + */ public VersionedIntervalTimeline(Comparator versionComparator, boolean skipObjectsWithNoData, boolean fastIntervalSearch) { this.versionComparator = versionComparator;