@@ -66,28 +66,44 @@ impl HistogramBuilder {
6666pub type UniformSampleSet = HistogramBounds ;
6767
6868impl HistogramBounds {
69+ pub fn has_same_supported_type ( & self , other : & HistogramBounds ) -> bool {
70+ let Some ( kind) = histogram_bound_kind ( self . lower_bound ( ) ) else {
71+ return false ;
72+ } ;
73+
74+ histogram_bound_kind ( self . upper_bound ( ) ) == Some ( kind)
75+ && histogram_bound_kind ( other. lower_bound ( ) ) == Some ( kind)
76+ && histogram_bound_kind ( other. upper_bound ( ) ) == Some ( kind)
77+ }
78+
6979 pub fn has_intersection ( & self , other : & HistogramBounds ) -> Result < bool > {
7080 match (
7181 self . lower_bound ( ) ,
7282 self . upper_bound ( ) ,
7383 other. lower_bound ( ) ,
7484 other. upper_bound ( ) ,
7585 ) {
76- ( left_min, left_max, right_min, right_max)
77- if left_min. is_numeric ( )
78- && left_max. is_numeric ( )
79- && right_min. is_numeric ( )
80- && right_max. is_numeric ( ) =>
81- {
82- Ok ( TypedHistogramBounds :: new (
83- F64 :: from ( left_min. as_double ( ) ?) ,
84- F64 :: from ( left_max. as_double ( ) ?) ,
85- )
86- . has_intersection ( & TypedHistogramBounds :: new (
87- F64 :: from ( right_min. as_double ( ) ?) ,
88- F64 :: from ( right_max. as_double ( ) ?) ,
89- ) ) )
90- }
86+ (
87+ Datum :: Int ( left_min) ,
88+ Datum :: Int ( left_max) ,
89+ Datum :: Int ( right_min) ,
90+ Datum :: Int ( right_max) ,
91+ ) => Ok ( TypedHistogramBounds :: new ( * left_min, * left_max)
92+ . has_intersection ( & TypedHistogramBounds :: new ( * right_min, * right_max) ) ) ,
93+ (
94+ Datum :: UInt ( left_min) ,
95+ Datum :: UInt ( left_max) ,
96+ Datum :: UInt ( right_min) ,
97+ Datum :: UInt ( right_max) ,
98+ ) => Ok ( TypedHistogramBounds :: new ( * left_min, * left_max)
99+ . has_intersection ( & TypedHistogramBounds :: new ( * right_min, * right_max) ) ) ,
100+ (
101+ Datum :: Float ( left_min) ,
102+ Datum :: Float ( left_max) ,
103+ Datum :: Float ( right_min) ,
104+ Datum :: Float ( right_max) ,
105+ ) => Ok ( TypedHistogramBounds :: new ( * left_min, * left_max)
106+ . has_intersection ( & TypedHistogramBounds :: new ( * right_min, * right_max) ) ) ,
91107 (
92108 Datum :: Bytes ( left_min) ,
93109 Datum :: Bytes ( left_max) ,
@@ -109,20 +125,34 @@ impl HistogramBounds {
109125 other. lower_bound ( ) ,
110126 other. upper_bound ( ) ,
111127 ) {
112- ( left_min, left_max, right_min, right_max)
113- if left_min. is_numeric ( )
114- && left_max. is_numeric ( )
115- && right_min. is_numeric ( )
116- && right_max. is_numeric ( ) =>
117- {
118- let ( min, max) = TypedHistogramBounds :: new (
119- F64 :: from ( left_min. as_double ( ) ?) ,
120- F64 :: from ( left_max. as_double ( ) ?) ,
121- )
122- . intersection ( & TypedHistogramBounds :: new (
123- F64 :: from ( right_min. as_double ( ) ?) ,
124- F64 :: from ( right_max. as_double ( ) ?) ,
125- ) ) ;
128+ (
129+ Datum :: Int ( left_min) ,
130+ Datum :: Int ( left_max) ,
131+ Datum :: Int ( right_min) ,
132+ Datum :: Int ( right_max) ,
133+ ) => {
134+ let ( min, max) = TypedHistogramBounds :: new ( * left_min, * left_max)
135+ . intersection ( & TypedHistogramBounds :: new ( * right_min, * right_max) ) ;
136+ Ok ( ( min. map ( Datum :: Int ) , max. map ( Datum :: Int ) ) )
137+ }
138+ (
139+ Datum :: UInt ( left_min) ,
140+ Datum :: UInt ( left_max) ,
141+ Datum :: UInt ( right_min) ,
142+ Datum :: UInt ( right_max) ,
143+ ) => {
144+ let ( min, max) = TypedHistogramBounds :: new ( * left_min, * left_max)
145+ . intersection ( & TypedHistogramBounds :: new ( * right_min, * right_max) ) ;
146+ Ok ( ( min. map ( Datum :: UInt ) , max. map ( Datum :: UInt ) ) )
147+ }
148+ (
149+ Datum :: Float ( left_min) ,
150+ Datum :: Float ( left_max) ,
151+ Datum :: Float ( right_min) ,
152+ Datum :: Float ( right_max) ,
153+ ) => {
154+ let ( min, max) = TypedHistogramBounds :: new ( * left_min, * left_max)
155+ . intersection ( & TypedHistogramBounds :: new ( * right_min, * right_max) ) ;
126156 Ok ( ( min. map ( Datum :: Float ) , max. map ( Datum :: Float ) ) )
127157 }
128158 (
@@ -142,9 +172,28 @@ impl HistogramBounds {
142172 }
143173}
144174
175+ #[ derive( Clone , Copy , PartialEq , Eq ) ]
176+ enum HistogramBoundKind {
177+ Int ,
178+ UInt ,
179+ Float ,
180+ Bytes ,
181+ }
182+
183+ fn histogram_bound_kind ( datum : & Datum ) -> Option < HistogramBoundKind > {
184+ match datum {
185+ Datum :: Int ( _) => Some ( HistogramBoundKind :: Int ) ,
186+ Datum :: UInt ( _) => Some ( HistogramBoundKind :: UInt ) ,
187+ Datum :: Float ( _) => Some ( HistogramBoundKind :: Float ) ,
188+ Datum :: Bytes ( _) => Some ( HistogramBoundKind :: Bytes ) ,
189+ Datum :: Bool ( _) => None ,
190+ }
191+ }
192+
145193#[ cfg( test) ]
146194mod tests {
147195 use super :: * ;
196+ use crate :: HistogramBucket ;
148197 use crate :: TypedHistogram ;
149198 use crate :: TypedHistogramBucket ;
150199
@@ -165,13 +214,32 @@ mod tests {
165214 let left = UniformSampleSet :: new ( Datum :: UInt ( 0 ) , Datum :: UInt ( 10 ) ) ;
166215 let right = UniformSampleSet :: new ( Datum :: UInt ( 5 ) , Datum :: UInt ( 15 ) ) ;
167216
217+ assert ! ( left. has_same_supported_type( & right) ) ;
168218 assert ! ( left. has_intersection( & right) . unwrap( ) ) ;
169219 assert_eq ! (
170220 left. intersection( & right) . unwrap( ) ,
171- (
172- Some ( Datum :: Float ( F64 :: from( 5.0 ) ) ) ,
173- Some ( Datum :: Float ( F64 :: from( 10.0 ) ) )
174- )
221+ ( Some ( Datum :: UInt ( 5 ) ) , Some ( Datum :: UInt ( 10 ) ) )
222+ ) ;
223+ }
224+
225+ #[ test]
226+ fn test_uniform_sample_set_rejects_mixed_numeric_intersection ( ) {
227+ let left = UniformSampleSet :: new ( Datum :: UInt ( 0 ) , Datum :: UInt ( 10 ) ) ;
228+ let right = UniformSampleSet :: new ( Datum :: Int ( 5 ) , Datum :: Int ( 15 ) ) ;
229+
230+ assert ! ( !left. has_same_supported_type( & right) ) ;
231+ assert ! ( !left. has_intersection( & right) . unwrap( ) ) ;
232+ assert_eq ! ( left. intersection( & right) . unwrap( ) , ( None , None ) ) ;
233+ }
234+
235+ #[ test]
236+ fn test_histogram_bucket_rejects_mixed_numeric_bounds ( ) {
237+ let err = HistogramBucket :: try_from_bounds ( Datum :: UInt ( 0 ) , Datum :: Int ( 10 ) , 10.0 , 10.0 )
238+ . unwrap_err ( ) ;
239+
240+ assert_eq ! (
241+ err,
242+ "histogram bucket bounds must have the same supported type"
175243 ) ;
176244 }
177245
@@ -199,9 +267,30 @@ mod tests {
199267 avg_spacing : None ,
200268 } ) ;
201269
202- let estimation = left. estimate_join ( & right) ;
270+ let estimation = left. estimate_join ( & right) . unwrap ( ) ;
203271
204272 assert_eq ! ( estimation. cardinality. expected, 5.0 ) ;
205273 assert_eq ! ( estimation. ndv. expected, 5.0 ) ;
206274 }
275+
276+ #[ test]
277+ fn test_estimate_histogram_join_rejects_mixed_numeric_types ( ) {
278+ let left = Histogram :: UInt ( TypedHistogram {
279+ accuracy : true ,
280+ buckets : vec ! [ TypedHistogramBucket :: new( 0 , 10 , 10.0 , 10.0 ) ] ,
281+ avg_spacing : None ,
282+ } ) ;
283+ let right = Histogram :: Int ( TypedHistogram {
284+ accuracy : true ,
285+ buckets : vec ! [ TypedHistogramBucket :: new( 5 , 15 , 10.0 , 10.0 ) ] ,
286+ avg_spacing : None ,
287+ } ) ;
288+
289+ let err = left. estimate_join ( & right) . unwrap_err ( ) ;
290+
291+ assert_eq ! (
292+ err. message( ) ,
293+ "cannot estimate join for histograms with different bucket types"
294+ ) ;
295+ }
207296}
0 commit comments