diff --git a/sql/catalyst/benchmarks/HashBenchmark-results.txt b/sql/catalyst/benchmarks/HashBenchmark-results.txt index 3db7b2cd0188f..eb9b2912928cc 100644 --- a/sql/catalyst/benchmarks/HashBenchmark-results.txt +++ b/sql/catalyst/benchmarks/HashBenchmark-results.txt @@ -2,69 +2,69 @@ single ints ================================================================================================ -OpenJDK 64-Bit Server VM 17.0.18+8-LTS on Linux 6.14.0-1017-azure +OpenJDK 64-Bit Server VM 17.0.19+10-LTS on Linux 6.17.0-1018-azure AMD EPYC 9V74 80-Core Processor Hash For single ints: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -interpreted version 2035 2040 7 263.8 3.8 1.0X -codegen version 3949 3952 3 135.9 7.4 0.5X -codegen version 64-bit 3419 3423 6 157.0 6.4 0.6X -codegen HiveHash version 2647 2663 23 202.8 4.9 0.8X +interpreted version 2044 2045 2 262.7 3.8 1.0X +codegen version 3953 3956 4 135.8 7.4 0.5X +codegen version 64-bit 3439 3443 5 156.1 6.4 0.6X +codegen HiveHash version 2749 2750 2 195.3 5.1 0.7X ================================================================================================ single longs ================================================================================================ -OpenJDK 64-Bit Server VM 17.0.18+8-LTS on Linux 6.14.0-1017-azure +OpenJDK 64-Bit Server VM 17.0.19+10-LTS on Linux 6.17.0-1018-azure AMD EPYC 9V74 80-Core Processor Hash For single longs: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -interpreted version 2486 2492 8 215.9 4.6 1.0X -codegen version 5601 5602 1 95.8 10.4 0.4X -codegen version 64-bit 4175 4183 11 128.6 7.8 0.6X -codegen HiveHash version 3263 3264 0 164.5 6.1 0.8X +interpreted version 2492 2493 2 215.5 4.6 1.0X +codegen version 5630 5631 0 95.4 10.5 0.4X +codegen version 64-bit 4077 4078 1 131.7 7.6 0.6X +codegen HiveHash version 3146 3147 2 170.7 5.9 0.8X ================================================================================================ normal ================================================================================================ -OpenJDK 64-Bit Server VM 17.0.18+8-LTS on Linux 6.14.0-1017-azure +OpenJDK 64-Bit Server VM 17.0.19+10-LTS on Linux 6.17.0-1018-azure AMD EPYC 9V74 80-Core Processor Hash For normal: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -interpreted version 2734 2748 20 0.8 1303.7 1.0X -codegen version 2511 2512 1 0.8 1197.4 1.1X -codegen version 64-bit 759 760 1 2.8 362.1 3.6X -codegen HiveHash version 4104 4105 1 0.5 1957.1 0.7X +interpreted version 2756 2758 2 0.8 1314.4 1.0X +codegen version 2574 2574 0 0.8 1227.2 1.1X +codegen version 64-bit 766 774 12 2.7 365.5 3.6X +codegen HiveHash version 4182 4185 5 0.5 1994.1 0.7X ================================================================================================ array ================================================================================================ -OpenJDK 64-Bit Server VM 17.0.18+8-LTS on Linux 6.14.0-1017-azure +OpenJDK 64-Bit Server VM 17.0.19+10-LTS on Linux 6.17.0-1018-azure AMD EPYC 9V74 80-Core Processor Hash For array: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -interpreted version 1158 1158 0 0.1 8836.3 1.0X -codegen version 3648 3654 9 0.0 27829.0 0.3X -codegen version 64-bit 2699 2700 1 0.0 20595.1 0.4X -codegen HiveHash version 889 889 1 0.1 6779.7 1.3X +interpreted version 1389 1391 3 0.1 10595.8 1.0X +codegen version 3945 3945 0 0.0 30098.4 0.4X +codegen version 64-bit 2977 2977 0 0.0 22710.3 0.5X +codegen HiveHash version 1086 1088 2 0.1 8289.0 1.3X ================================================================================================ map ================================================================================================ -OpenJDK 64-Bit Server VM 17.0.18+8-LTS on Linux 6.14.0-1017-azure +OpenJDK 64-Bit Server VM 17.0.19+10-LTS on Linux 6.17.0-1018-azure AMD EPYC 9V74 80-Core Processor Hash For map: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -interpreted version 0 0 0 101.5 9.9 1.0X -codegen version 247 248 0 0.0 60408.3 0.0X -codegen version 64-bit 193 193 0 0.0 47021.2 0.0X -codegen HiveHash version 30 30 0 0.1 7362.8 0.0X +interpreted version 0 0 0 102.5 9.8 1.0X +codegen version 201 201 1 0.0 49039.1 0.0X +codegen version 64-bit 157 157 0 0.0 38253.6 0.0X +codegen HiveHash version 16 17 1 0.3 3958.7 0.0X diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/HashBenchmark.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/HashBenchmark.scala index e515b771c96c6..b02642bf9724f 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/HashBenchmark.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/HashBenchmark.scala @@ -125,6 +125,7 @@ object HashBenchmark extends BenchmarkBase { .add("binary", BinaryType) .add("date", DateType) .add("timestamp", TimestampType) + .add("time", TimeType()) test("normal", normal, 1 << 10, 1 << 11) val arrayOfInt = ArrayType(IntegerType) diff --git a/sql/core/benchmarks/CSVBenchmark-results.txt b/sql/core/benchmarks/CSVBenchmark-results.txt index 867cb4ac59f12..51f118275257c 100644 --- a/sql/core/benchmarks/CSVBenchmark-results.txt +++ b/sql/core/benchmarks/CSVBenchmark-results.txt @@ -2,76 +2,81 @@ Benchmark to measure CSV read/write performance ================================================================================================ -OpenJDK 64-Bit Server VM 17.0.18+8-LTS on Linux 6.14.0-1017-azure -Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz +OpenJDK 64-Bit Server VM 17.0.19+10-LTS on Linux 6.17.0-1018-azure +AMD EPYC 7763 64-Core Processor Parsing quoted values: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -One quoted string 26170 26230 94 0.0 523394.1 1.0X +One quoted string 25478 25599 169 0.0 509568.3 1.0X -OpenJDK 64-Bit Server VM 17.0.18+8-LTS on Linux 6.14.0-1017-azure -Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz +OpenJDK 64-Bit Server VM 17.0.19+10-LTS on Linux 6.17.0-1018-azure +AMD EPYC 7763 64-Core Processor Wide rows with 1000 columns: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -Select 1000 columns 51860 52209 580 0.0 51859.6 1.0X -Select 100 columns 23745 23781 43 0.0 23745.3 2.2X -Select one column 20220 20278 56 0.0 20219.6 2.6X -count() 3218 3308 105 0.3 3218.2 16.1X -Select 100 columns, one bad input field 28039 28266 212 0.0 28039.4 1.8X -Select 100 columns, corrupt record field 31122 31132 17 0.0 31122.3 1.7X +Select 1000 columns 56126 56605 804 0.0 56125.6 1.0X +Select 100 columns 22091 22109 30 0.0 22090.8 2.5X +Select one column 18548 18678 119 0.1 18547.7 3.0X +count() 3389 3427 59 0.3 3388.7 16.6X +Select 100 columns, one bad input field 27315 27333 24 0.0 27315.5 2.1X +Select 100 columns, corrupt record field 30160 30250 136 0.0 30159.5 1.9X -OpenJDK 64-Bit Server VM 17.0.18+8-LTS on Linux 6.14.0-1017-azure -Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz +OpenJDK 64-Bit Server VM 17.0.19+10-LTS on Linux 6.17.0-1018-azure +AMD EPYC 7763 64-Core Processor Count a dataset with 10 columns: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -Select 10 columns + count() 9648 9682 35 1.0 964.8 1.0X -Select 1 column + count() 6694 6706 16 1.5 669.4 1.4X -count() 1548 1560 19 6.5 154.8 6.2X +Select 10 columns + count() 9274 9317 71 1.1 927.4 1.0X +Select 1 column + count() 6631 6641 14 1.5 663.1 1.4X +count() 1641 1645 5 6.1 164.1 5.7X -OpenJDK 64-Bit Server VM 17.0.18+8-LTS on Linux 6.14.0-1017-azure -Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz +OpenJDK 64-Bit Server VM 17.0.19+10-LTS on Linux 6.17.0-1018-azure +AMD EPYC 7763 64-Core Processor Write dates and timestamps: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -Create a dataset of timestamps 834 845 16 12.0 83.4 1.0X -to_csv(timestamp) 5794 5808 21 1.7 579.4 0.1X -write timestamps to files 6073 6082 11 1.6 607.3 0.1X -Create a dataset of dates 959 968 12 10.4 95.9 0.9X -to_csv(date) 3980 3987 6 2.5 398.0 0.2X -write dates to files 3894 3899 5 2.6 389.4 0.2X +Create a dataset of timestamps 816 819 3 12.3 81.6 1.0X +to_csv(timestamp) 5975 6013 33 1.7 597.5 0.1X +write timestamps to files 6295 6304 8 1.6 629.5 0.1X +Create a dataset of dates 930 933 2 10.7 93.0 0.9X +to_csv(date) 4194 4194 1 2.4 419.4 0.2X +write dates to files 4235 4238 4 2.4 423.5 0.2X +Create a dataset of times 836 843 10 12.0 83.6 1.0X +to_csv(time) 6053 6068 15 1.7 605.3 0.1X +write times to files 5988 5999 9 1.7 598.8 0.1X -OpenJDK 64-Bit Server VM 17.0.18+8-LTS on Linux 6.14.0-1017-azure -Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz +OpenJDK 64-Bit Server VM 17.0.19+10-LTS on Linux 6.17.0-1018-azure +AMD EPYC 7763 64-Core Processor Read dates and timestamps: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ----------------------------------------------------------------------------------------------------------------------------------------------------- -read timestamp text from files 1180 1186 4 8.5 118.0 1.0X -read timestamps from files 9655 9670 19 1.0 965.5 0.1X -infer timestamps from files 19167 19244 68 0.5 1916.7 0.1X -read date text from files 1111 1129 22 9.0 111.1 1.1X -read date from files 9513 9521 7 1.1 951.3 0.1X -infer date from files 19126 19159 31 0.5 1912.6 0.1X -timestamp strings 1137 1144 7 8.8 113.7 1.0X -parse timestamps from Dataset[String] 10759 10774 22 0.9 1075.9 0.1X -infer timestamps from Dataset[String] 19823 19835 13 0.5 1982.3 0.1X -date strings 1579 1583 5 6.3 157.9 0.7X -parse dates from Dataset[String] 11033 11055 22 0.9 1103.3 0.1X -from_csv(timestamp) 8860 8864 6 1.1 886.0 0.1X -from_csv(date) 9649 9670 27 1.0 964.9 0.1X -infer error timestamps from Dataset[String] with default format 11156 11157 1 0.9 1115.6 0.1X -infer error timestamps from Dataset[String] with user-provided format 11118 11147 26 0.9 1111.8 0.1X -infer error timestamps from Dataset[String] with legacy format 11140 11152 10 0.9 1114.0 0.1X +read timestamp text from files 1185 1200 12 8.4 118.5 1.0X +read timestamps from files 9670 9679 8 1.0 967.0 0.1X +infer timestamps from files 19643 19661 16 0.5 1964.3 0.1X +read date text from files 1091 1097 7 9.2 109.1 1.1X +read date from files 9817 9823 6 1.0 981.7 0.1X +infer date from files 19909 19928 22 0.5 1990.9 0.1X +timestamp strings 1198 1202 3 8.3 119.8 1.0X +parse timestamps from Dataset[String] 11542 11566 31 0.9 1154.2 0.1X +infer timestamps from Dataset[String] 21391 21431 46 0.5 2139.1 0.1X +date strings 1622 1627 5 6.2 162.2 0.7X +parse dates from Dataset[String] 11779 11802 21 0.8 1177.9 0.1X +from_csv(timestamp) 9606 9630 31 1.0 960.6 0.1X +from_csv(date) 10052 10073 20 1.0 1005.2 0.1X +infer error timestamps from Dataset[String] with default format 12378 12405 42 0.8 1237.8 0.1X +infer error timestamps from Dataset[String] with user-provided format 12342 12413 67 0.8 1234.2 0.1X +infer error timestamps from Dataset[String] with legacy format 12373 12394 31 0.8 1237.3 0.1X +read time text from files 1159 1165 6 8.6 115.9 1.0X +read time from files 8704 8723 17 1.1 870.4 0.1X -OpenJDK 64-Bit Server VM 17.0.18+8-LTS on Linux 6.14.0-1017-azure -Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz +OpenJDK 64-Bit Server VM 17.0.19+10-LTS on Linux 6.17.0-1018-azure +AMD EPYC 7763 64-Core Processor Filters pushdown: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -w/o filters 4268 4277 9 0.0 42682.0 1.0X -pushdown disabled 4250 4254 5 0.0 42501.3 1.0X -w/ filters 863 869 5 0.1 8634.6 4.9X +w/o filters 4080 4097 21 0.0 40801.3 1.0X +pushdown disabled 3576 3581 5 0.0 35763.5 1.1X +w/ filters 761 765 5 0.1 7607.7 5.4X -OpenJDK 64-Bit Server VM 17.0.18+8-LTS on Linux 6.14.0-1017-azure -Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz +OpenJDK 64-Bit Server VM 17.0.19+10-LTS on Linux 6.17.0-1018-azure +AMD EPYC 7763 64-Core Processor Interval: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -Read as Intervals 748 749 2 0.4 2493.1 1.0X -Read Raw Strings 304 305 1 1.0 1014.7 2.5X +Read as Intervals 696 699 3 0.4 2319.4 1.0X +Read Raw Strings 299 302 3 1.0 998.2 2.3X diff --git a/sql/core/benchmarks/ExtractBenchmark-results.txt b/sql/core/benchmarks/ExtractBenchmark-results.txt index e57086bcc84a0..9adfe9c8630cf 100644 --- a/sql/core/benchmarks/ExtractBenchmark-results.txt +++ b/sql/core/benchmarks/ExtractBenchmark-results.txt @@ -1,104 +1,122 @@ -OpenJDK 64-Bit Server VM 17.0.18+8-LTS on Linux 6.14.0-1017-azure -Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz +OpenJDK 64-Bit Server VM 17.0.19+10-LTS on Linux 6.17.0-1018-azure +AMD EPYC 7763 64-Core Processor Invoke extract for timestamp: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -cast to timestamp 257 270 18 39.0 25.7 1.0X -YEAR of timestamp 684 690 5 14.6 68.4 0.4X -YEAROFWEEK of timestamp 752 776 39 13.3 75.2 0.3X -QUARTER of timestamp 711 726 21 14.1 71.1 0.4X -MONTH of timestamp 699 706 8 14.3 69.9 0.4X -WEEK of timestamp 958 965 8 10.4 95.8 0.3X -DAY of timestamp 696 709 15 14.4 69.6 0.4X -DAYOFWEEK of timestamp 836 840 5 12.0 83.6 0.3X -DOW of timestamp 836 844 12 12.0 83.6 0.3X -DOW_ISO of timestamp 814 815 1 12.3 81.4 0.3X -DAYOFWEEK_ISO of timestamp 812 816 3 12.3 81.2 0.3X -DOY of timestamp 710 712 2 14.1 71.0 0.4X -HOUR of timestamp 577 587 14 17.3 57.7 0.4X -MINUTE of timestamp 582 584 3 17.2 58.2 0.4X -SECOND of timestamp 681 683 2 14.7 68.1 0.4X +cast to timestamp 269 286 30 37.2 26.9 1.0X +YEAR of timestamp 707 721 13 14.1 70.7 0.4X +YEAROFWEEK of timestamp 767 774 6 13.0 76.7 0.4X +QUARTER of timestamp 725 727 3 13.8 72.5 0.4X +MONTH of timestamp 703 712 14 14.2 70.3 0.4X +WEEK of timestamp 987 990 5 10.1 98.7 0.3X +DAY of timestamp 704 709 4 14.2 70.4 0.4X +DAYOFWEEK of timestamp 844 851 11 11.8 84.4 0.3X +DOW of timestamp 843 844 1 11.9 84.3 0.3X +DOW_ISO of timestamp 800 810 17 12.5 80.0 0.3X +DAYOFWEEK_ISO of timestamp 801 802 1 12.5 80.1 0.3X +DOY of timestamp 733 736 5 13.6 73.3 0.4X +HOUR of timestamp 539 547 12 18.5 53.9 0.5X +MINUTE of timestamp 534 536 2 18.7 53.4 0.5X +SECOND of timestamp 626 628 3 16.0 62.6 0.4X -OpenJDK 64-Bit Server VM 17.0.18+8-LTS on Linux 6.14.0-1017-azure -Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz +OpenJDK 64-Bit Server VM 17.0.19+10-LTS on Linux 6.17.0-1018-azure +AMD EPYC 7763 64-Core Processor Invoke date_part for timestamp: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -cast to timestamp 233 236 3 42.9 23.3 1.0X -YEAR of timestamp 686 693 8 14.6 68.6 0.3X -YEAROFWEEK of timestamp 741 744 3 13.5 74.1 0.3X -QUARTER of timestamp 708 713 4 14.1 70.8 0.3X -MONTH of timestamp 693 704 14 14.4 69.3 0.3X -WEEK of timestamp 956 960 4 10.5 95.6 0.2X -DAY of timestamp 691 696 5 14.5 69.1 0.3X -DAYOFWEEK of timestamp 830 837 8 12.0 83.0 0.3X -DOW of timestamp 830 831 0 12.0 83.0 0.3X -DOW_ISO of timestamp 803 809 11 12.5 80.3 0.3X -DAYOFWEEK_ISO of timestamp 803 808 8 12.5 80.3 0.3X -DOY of timestamp 707 714 9 14.1 70.7 0.3X -HOUR of timestamp 573 575 2 17.5 57.3 0.4X -MINUTE of timestamp 570 575 5 17.5 57.0 0.4X -SECOND of timestamp 683 686 2 14.6 68.3 0.3X +cast to timestamp 235 236 1 42.5 23.5 1.0X +YEAR of timestamp 668 670 2 15.0 66.8 0.4X +YEAROFWEEK of timestamp 713 714 1 14.0 71.3 0.3X +QUARTER of timestamp 691 693 2 14.5 69.1 0.3X +MONTH of timestamp 669 671 3 15.0 66.9 0.4X +WEEK of timestamp 963 964 1 10.4 96.3 0.2X +DAY of timestamp 671 677 10 14.9 67.1 0.4X +DAYOFWEEK of timestamp 809 811 4 12.4 80.9 0.3X +DOW of timestamp 809 810 1 12.4 80.9 0.3X +DOW_ISO of timestamp 767 776 10 13.0 76.7 0.3X +DAYOFWEEK_ISO of timestamp 767 770 4 13.0 76.7 0.3X +DOY of timestamp 699 700 1 14.3 69.9 0.3X +HOUR of timestamp 503 505 2 19.9 50.3 0.5X +MINUTE of timestamp 506 508 2 19.8 50.6 0.5X +SECOND of timestamp 591 596 5 16.9 59.1 0.4X -OpenJDK 64-Bit Server VM 17.0.18+8-LTS on Linux 6.14.0-1017-azure -Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz +OpenJDK 64-Bit Server VM 17.0.19+10-LTS on Linux 6.17.0-1018-azure +AMD EPYC 7763 64-Core Processor Invoke extract for date: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -cast to date 565 569 5 17.7 56.5 1.0X -YEAR of date 690 691 2 14.5 69.0 0.8X -YEAROFWEEK of date 734 736 2 13.6 73.4 0.8X -QUARTER of date 702 704 3 14.2 70.2 0.8X -MONTH of date 689 693 4 14.5 68.9 0.8X -WEEK of date 946 954 6 10.6 94.6 0.6X -DAY of date 684 689 6 14.6 68.4 0.8X -DAYOFWEEK of date 828 831 4 12.1 82.8 0.7X -DOW of date 826 827 1 12.1 82.6 0.7X -DOW_ISO of date 796 805 11 12.6 79.6 0.7X -DAYOFWEEK_ISO of date 798 801 3 12.5 79.8 0.7X -DOY of date 710 712 4 14.1 71.0 0.8X -HOUR of date 1177 1186 13 8.5 117.7 0.5X -MINUTE of date 1173 1175 2 8.5 117.3 0.5X -SECOND of date 1265 1272 11 7.9 126.5 0.4X +cast to date 574 575 1 17.4 57.4 1.0X +YEAR of date 667 672 4 15.0 66.7 0.9X +YEAROFWEEK of date 712 715 4 14.0 71.2 0.8X +QUARTER of date 691 694 3 14.5 69.1 0.8X +MONTH of date 666 667 1 15.0 66.6 0.9X +WEEK of date 964 964 0 10.4 96.4 0.6X +DAY of date 665 665 1 15.0 66.5 0.9X +DAYOFWEEK of date 809 811 3 12.4 80.9 0.7X +DOW of date 806 809 3 12.4 80.6 0.7X +DOW_ISO of date 766 767 0 13.0 76.6 0.7X +DAYOFWEEK_ISO of date 768 770 2 13.0 76.8 0.7X +DOY of date 698 699 2 14.3 69.8 0.8X +HOUR of date 1124 1127 3 8.9 112.4 0.5X +MINUTE of date 1127 1139 17 8.9 112.7 0.5X +SECOND of date 1276 1277 2 7.8 127.6 0.4X -OpenJDK 64-Bit Server VM 17.0.18+8-LTS on Linux 6.14.0-1017-azure -Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz +OpenJDK 64-Bit Server VM 17.0.19+10-LTS on Linux 6.17.0-1018-azure +AMD EPYC 7763 64-Core Processor Invoke date_part for date: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -cast to date 559 565 8 17.9 55.9 1.0X -YEAR of date 688 691 3 14.5 68.8 0.8X -YEAROFWEEK of date 737 741 5 13.6 73.7 0.8X -QUARTER of date 700 703 5 14.3 70.0 0.8X -MONTH of date 686 690 7 14.6 68.6 0.8X -WEEK of date 947 948 1 10.6 94.7 0.6X -DAY of date 683 685 2 14.6 68.3 0.8X -DAYOFWEEK of date 825 826 1 12.1 82.5 0.7X -DOW of date 822 826 4 12.2 82.2 0.7X -DOW_ISO of date 797 801 4 12.5 79.7 0.7X -DAYOFWEEK_ISO of date 797 802 4 12.5 79.7 0.7X -DOY of date 706 706 1 14.2 70.6 0.8X -HOUR of date 1174 1180 6 8.5 117.4 0.5X -MINUTE of date 1169 1171 2 8.6 116.9 0.5X -SECOND of date 1265 1268 3 7.9 126.5 0.4X +cast to date 569 586 23 17.6 56.9 1.0X +YEAR of date 665 667 3 15.0 66.5 0.9X +YEAROFWEEK of date 709 731 20 14.1 70.9 0.8X +QUARTER of date 689 693 4 14.5 68.9 0.8X +MONTH of date 666 668 2 15.0 66.6 0.9X +WEEK of date 959 961 2 10.4 95.9 0.6X +DAY of date 666 670 4 15.0 66.6 0.9X +DAYOFWEEK of date 807 809 2 12.4 80.7 0.7X +DOW of date 808 810 1 12.4 80.8 0.7X +DOW_ISO of date 762 765 4 13.1 76.2 0.7X +DAYOFWEEK_ISO of date 763 765 3 13.1 76.3 0.7X +DOY of date 696 704 8 14.4 69.6 0.8X +HOUR of date 1121 1126 7 8.9 112.1 0.5X +MINUTE of date 1121 1121 0 8.9 112.1 0.5X +SECOND of date 1270 1277 9 7.9 127.0 0.4X -OpenJDK 64-Bit Server VM 17.0.18+8-LTS on Linux 6.14.0-1017-azure -Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz +OpenJDK 64-Bit Server VM 17.0.19+10-LTS on Linux 6.17.0-1018-azure +AMD EPYC 7763 64-Core Processor +Invoke extract for time: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------ +cast to time 461 462 1 21.7 46.1 1.0X +HOUR of time 528 531 3 18.9 52.8 0.9X +MINUTE of time 535 540 8 18.7 53.5 0.9X +SECOND of time 1487 1492 6 6.7 148.7 0.3X + +OpenJDK 64-Bit Server VM 17.0.19+10-LTS on Linux 6.17.0-1018-azure +AMD EPYC 7763 64-Core Processor +Invoke date_part for time: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------ +cast to time 465 469 4 21.5 46.5 1.0X +HOUR of time 534 538 4 18.7 53.4 0.9X +MINUTE of time 535 540 6 18.7 53.5 0.9X +SECOND of time 1489 1490 1 6.7 148.9 0.3X + +OpenJDK 64-Bit Server VM 17.0.19+10-LTS on Linux 6.17.0-1018-azure +AMD EPYC 7763 64-Core Processor Invoke extract for interval: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -cast to interval 859 862 4 11.6 85.9 1.0X -YEAR of interval 828 828 1 12.1 82.8 1.0X -MONTH of interval 842 854 20 11.9 84.2 1.0X -DAY of interval 827 832 7 12.1 82.7 1.0X -HOUR of interval 848 850 2 11.8 84.8 1.0X -MINUTE of interval 852 860 14 11.7 85.2 1.0X -SECOND of interval 941 943 2 10.6 94.1 0.9X +cast to interval 816 818 3 12.3 81.6 1.0X +YEAR of interval 791 799 11 12.6 79.1 1.0X +MONTH of interval 802 811 8 12.5 80.2 1.0X +DAY of interval 805 808 5 12.4 80.5 1.0X +HOUR of interval 804 806 2 12.4 80.4 1.0X +MINUTE of interval 808 809 1 12.4 80.8 1.0X +SECOND of interval 849 851 2 11.8 84.9 1.0X -OpenJDK 64-Bit Server VM 17.0.18+8-LTS on Linux 6.14.0-1017-azure -Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz +OpenJDK 64-Bit Server VM 17.0.19+10-LTS on Linux 6.17.0-1018-azure +AMD EPYC 7763 64-Core Processor Invoke date_part for interval: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -cast to interval 851 852 0 11.7 85.1 1.0X -YEAR of interval 823 825 3 12.1 82.3 1.0X -MONTH of interval 833 838 4 12.0 83.3 1.0X -DAY of interval 835 836 2 12.0 83.5 1.0X -HOUR of interval 846 851 6 11.8 84.6 1.0X -MINUTE of interval 857 859 2 11.7 85.7 1.0X -SECOND of interval 942 949 10 10.6 94.2 0.9X +cast to interval 814 817 4 12.3 81.4 1.0X +YEAR of interval 789 791 4 12.7 78.9 1.0X +MONTH of interval 805 806 1 12.4 80.5 1.0X +DAY of interval 805 811 7 12.4 80.5 1.0X +HOUR of interval 804 805 0 12.4 80.4 1.0X +MINUTE of interval 805 809 4 12.4 80.5 1.0X +SECOND of interval 850 852 2 11.8 85.0 1.0X diff --git a/sql/core/benchmarks/JsonBenchmark-results.txt b/sql/core/benchmarks/JsonBenchmark-results.txt index f6b52b4def0cb..6a791fb314f5e 100644 --- a/sql/core/benchmarks/JsonBenchmark-results.txt +++ b/sql/core/benchmarks/JsonBenchmark-results.txt @@ -3,128 +3,133 @@ Benchmark for performance of JSON parsing ================================================================================================ Preparing data for benchmarking ... -OpenJDK 64-Bit Server VM 17.0.18+8-LTS on Linux 6.14.0-1017-azure -AMD EPYC 9V74 80-Core Processor +OpenJDK 64-Bit Server VM 17.0.19+10-LTS on Linux 6.17.0-1018-azure +Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz JSON schema inferring: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -No encoding 1899 1983 91 2.6 379.7 1.0X -UTF-8 is set 5121 5134 13 1.0 1024.2 0.4X +No encoding 2596 2685 103 1.9 519.2 1.0X +UTF-8 is set 5809 5825 26 0.9 1161.8 0.4X Preparing data for benchmarking ... -OpenJDK 64-Bit Server VM 17.0.18+8-LTS on Linux 6.14.0-1017-azure -AMD EPYC 9V74 80-Core Processor +OpenJDK 64-Bit Server VM 17.0.19+10-LTS on Linux 6.17.0-1018-azure +Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz count a short column: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -No encoding 1927 1963 55 2.6 385.5 1.0X -UTF-8 is set 4451 4462 10 1.1 890.2 0.4X +No encoding 2402 2459 50 2.1 480.4 1.0X +UTF-8 is set 5429 5456 23 0.9 1085.9 0.4X Preparing data for benchmarking ... -OpenJDK 64-Bit Server VM 17.0.18+8-LTS on Linux 6.14.0-1017-azure -AMD EPYC 9V74 80-Core Processor +OpenJDK 64-Bit Server VM 17.0.19+10-LTS on Linux 6.17.0-1018-azure +Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz count a wide column: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -No encoding 5230 5246 16 0.2 5230.0 1.0X -UTF-8 is set 4768 4856 88 0.2 4767.9 1.1X +No encoding 5003 5033 27 0.2 5003.3 1.0X +UTF-8 is set 4741 4749 11 0.2 4741.4 1.1X Preparing data for benchmarking ... -OpenJDK 64-Bit Server VM 17.0.18+8-LTS on Linux 6.14.0-1017-azure -AMD EPYC 9V74 80-Core Processor +OpenJDK 64-Bit Server VM 17.0.19+10-LTS on Linux 6.17.0-1018-azure +Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz select wide row: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -No encoding 8996 9163 215 0.0 179920.8 1.0X -UTF-8 is set 9757 9790 31 0.0 195143.4 0.9X +No encoding 8420 8445 30 0.0 168391.8 1.0X +UTF-8 is set 8926 8973 41 0.0 178526.8 0.9X Preparing data for benchmarking ... -OpenJDK 64-Bit Server VM 17.0.18+8-LTS on Linux 6.14.0-1017-azure -AMD EPYC 9V74 80-Core Processor +OpenJDK 64-Bit Server VM 17.0.19+10-LTS on Linux 6.17.0-1018-azure +Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz Select a subset of 10 columns: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -Select 10 columns 1557 1560 3 0.6 1557.2 1.0X -Select 1 column 1184 1196 20 0.8 1183.7 1.3X +Select 10 columns 1766 1771 5 0.6 1765.5 1.0X +Select 1 column 1312 1323 9 0.8 1312.5 1.3X Preparing data for benchmarking ... -OpenJDK 64-Bit Server VM 17.0.18+8-LTS on Linux 6.14.0-1017-azure -AMD EPYC 9V74 80-Core Processor +OpenJDK 64-Bit Server VM 17.0.19+10-LTS on Linux 6.17.0-1018-azure +Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz creation of JSON parser per line: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -Short column without encoding 561 563 3 1.8 561.5 1.0X -Short column with UTF-8 1140 1146 8 0.9 1139.9 0.5X -Wide column without encoding 5163 5179 23 0.2 5163.2 0.1X -Wide column with UTF-8 9810 9833 22 0.1 9810.2 0.1X +Short column without encoding 685 689 4 1.5 685.0 1.0X +Short column with UTF-8 1259 1260 1 0.8 1259.2 0.5X +Wide column without encoding 5335 5347 13 0.2 5335.4 0.1X +Wide column with UTF-8 10189 10243 52 0.1 10188.8 0.1X Preparing data for benchmarking ... -OpenJDK 64-Bit Server VM 17.0.18+8-LTS on Linux 6.14.0-1017-azure -AMD EPYC 9V74 80-Core Processor +OpenJDK 64-Bit Server VM 17.0.19+10-LTS on Linux 6.17.0-1018-azure +Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz JSON functions: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -Text read 62 66 4 16.2 61.7 1.0X -from_json 969 979 9 1.0 968.7 0.1X -json_tuple 905 908 4 1.1 905.1 0.1X -get_json_object wholestage off 938 942 4 1.1 938.3 0.1X -get_json_object wholestage on 835 847 18 1.2 835.0 0.1X +Text read 61 64 2 16.3 61.4 1.0X +from_json 1472 1473 1 0.7 1472.4 0.0X +json_tuple 1468 1477 9 0.7 1468.3 0.0X +get_json_object wholestage off 1493 1498 5 0.7 1492.7 0.0X +get_json_object wholestage on 1408 1418 9 0.7 1408.2 0.0X Preparing data for benchmarking ... -OpenJDK 64-Bit Server VM 17.0.18+8-LTS on Linux 6.14.0-1017-azure -AMD EPYC 9V74 80-Core Processor +OpenJDK 64-Bit Server VM 17.0.19+10-LTS on Linux 6.17.0-1018-azure +Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz Dataset of json strings: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -Text read 254 256 3 19.7 50.7 1.0X -schema inferring 1504 1524 26 3.3 300.8 0.2X -parsing 2391 2421 27 2.1 478.2 0.1X +Text read 231 232 1 21.7 46.2 1.0X +schema inferring 1816 1821 9 2.8 363.1 0.1X +parsing 2626 2628 3 1.9 525.2 0.1X Preparing data for benchmarking ... -OpenJDK 64-Bit Server VM 17.0.18+8-LTS on Linux 6.14.0-1017-azure -AMD EPYC 9V74 80-Core Processor +OpenJDK 64-Bit Server VM 17.0.19+10-LTS on Linux 6.17.0-1018-azure +Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz Json files in the per-line mode: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -Text read 624 652 30 8.0 124.8 1.0X -Schema inferring 2032 2036 5 2.5 406.5 0.3X -Parsing without charset 2527 2529 3 2.0 505.4 0.2X -Parsing with UTF-8 5453 5470 27 0.9 1090.6 0.1X +Text read 584 589 4 8.6 116.7 1.0X +Schema inferring 2511 2535 33 2.0 502.1 0.2X +Parsing without charset 2968 2976 8 1.7 593.5 0.2X +Parsing with UTF-8 5937 5960 40 0.8 1187.3 0.1X -OpenJDK 64-Bit Server VM 17.0.18+8-LTS on Linux 6.14.0-1017-azure -AMD EPYC 9V74 80-Core Processor +OpenJDK 64-Bit Server VM 17.0.19+10-LTS on Linux 6.17.0-1018-azure +Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz Write dates and timestamps: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -Create a dataset of timestamps 107 110 4 9.4 106.5 1.0X -to_json(timestamp) 576 582 9 1.7 576.3 0.2X -write timestamps to files 623 626 4 1.6 623.2 0.2X -Create a dataset of dates 120 123 3 8.3 120.1 0.9X -to_json(date) 391 398 7 2.6 391.0 0.3X -write dates to files 415 418 5 2.4 415.3 0.3X +Create a dataset of timestamps 104 108 4 9.6 103.9 1.0X +to_json(timestamp) 602 603 1 1.7 602.2 0.2X +write timestamps to files 645 646 0 1.5 645.4 0.2X +Create a dataset of dates 118 121 3 8.5 118.2 0.9X +to_json(date) 431 435 4 2.3 430.8 0.2X +write dates to files 415 418 4 2.4 415.3 0.3X +Create a dataset of times 105 107 2 9.5 105.4 1.0X +to_json(time) 599 602 3 1.7 598.7 0.2X +write times to files 604 608 4 1.7 604.3 0.2X -OpenJDK 64-Bit Server VM 17.0.18+8-LTS on Linux 6.14.0-1017-azure -AMD EPYC 9V74 80-Core Processor +OpenJDK 64-Bit Server VM 17.0.19+10-LTS on Linux 6.17.0-1018-azure +Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz Read dates and timestamps: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ----------------------------------------------------------------------------------------------------------------------------------------------------- -read timestamp text from files 163 165 4 6.1 162.8 1.0X -read timestamps from files 987 997 12 1.0 987.2 0.2X -infer timestamps from files 1835 1847 20 0.5 1835.4 0.1X -read date text from files 150 153 3 6.7 149.8 1.1X -read date from files 621 623 2 1.6 621.4 0.3X -timestamp strings 145 146 1 6.9 145.2 1.1X -parse timestamps from Dataset[String] 1171 1185 21 0.9 1170.6 0.1X -infer timestamps from Dataset[String] 2012 2015 5 0.5 2011.7 0.1X -date strings 215 217 2 4.6 215.2 0.8X -parse dates from Dataset[String] 927 931 5 1.1 927.4 0.2X -from_json(timestamp) 1635 1639 6 0.6 1635.3 0.1X -from_json(date) 1405 1412 6 0.7 1405.0 0.1X -infer error timestamps from Dataset[String] with default format 1273 1276 5 0.8 1273.4 0.1X -infer error timestamps from Dataset[String] with user-provided format 1257 1260 3 0.8 1256.7 0.1X -infer error timestamps from Dataset[String] with legacy format 1258 1260 3 0.8 1258.0 0.1X +read timestamp text from files 149 153 6 6.7 149.3 1.0X +read timestamps from files 1131 1156 25 0.9 1130.6 0.1X +infer timestamps from files 11851 11867 20 0.1 11851.3 0.0X +read date text from files 140 143 5 7.2 139.8 1.1X +read date from files 777 789 20 1.3 776.7 0.2X +timestamp strings 138 139 1 7.2 138.5 1.1X +parse timestamps from Dataset[String] 1244 1246 2 0.8 1244.2 0.1X +infer timestamps from Dataset[String] 11836 11838 2 0.1 11836.2 0.0X +date strings 193 194 0 5.2 193.2 0.8X +parse dates from Dataset[String] 896 897 1 1.1 896.1 0.2X +from_json(timestamp) 2038 2048 9 0.5 2038.5 0.1X +from_json(date) 1782 1784 1 0.6 1782.3 0.1X +infer error timestamps from Dataset[String] with default format 10711 10735 35 0.1 10711.0 0.0X +infer error timestamps from Dataset[String] with user-provided format 10642 10647 7 0.1 10641.6 0.0X +infer error timestamps from Dataset[String] with legacy format 10419 10579 142 0.1 10419.3 0.0X +read time text from files 145 149 4 6.9 144.8 1.0X +read time from files 918 933 14 1.1 917.9 0.2X -OpenJDK 64-Bit Server VM 17.0.18+8-LTS on Linux 6.14.0-1017-azure -AMD EPYC 9V74 80-Core Processor +OpenJDK 64-Bit Server VM 17.0.19+10-LTS on Linux 6.17.0-1018-azure +Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz Filters pushdown: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -w/o filters 4817 4833 15 0.0 48167.4 1.0X -pushdown disabled 4772 4776 5 0.0 47721.9 1.0X -w/ filters 695 710 16 0.1 6949.2 6.9X +w/o filters 6663 6698 31 0.0 66628.4 1.0X +pushdown disabled 6764 6791 44 0.0 67644.7 1.0X +w/ filters 565 569 4 0.2 5654.5 11.8X -OpenJDK 64-Bit Server VM 17.0.18+8-LTS on Linux 6.14.0-1017-azure -AMD EPYC 9V74 80-Core Processor +OpenJDK 64-Bit Server VM 17.0.19+10-LTS on Linux 6.17.0-1018-azure +Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz Partial JSON results: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -parse invalid JSON 2386 2451 111 0.0 238564.1 1.0X +parse invalid JSON 2197 2299 160 0.0 219679.8 1.0X diff --git a/sql/core/benchmarks/TimeBenchmark-results.txt b/sql/core/benchmarks/TimeBenchmark-results.txt new file mode 100644 index 0000000000000..b14a13da63970 --- /dev/null +++ b/sql/core/benchmarks/TimeBenchmark-results.txt @@ -0,0 +1,126 @@ +================================================================================================ +Current time +================================================================================================ + +OpenJDK 64-Bit Server VM 17.0.19+10-LTS on Linux 6.17.0-1018-azure +AMD EPYC 7763 64-Core Processor +current_time: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------ +current_time wholestage off 225 234 14 44.5 22.5 1.0X +current_time wholestage on 250 274 30 40.1 25.0 0.9X + + +================================================================================================ +make_time +================================================================================================ + +OpenJDK 64-Bit Server VM 17.0.19+10-LTS on Linux 6.17.0-1018-azure +AMD EPYC 7763 64-Core Processor +make_time: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------ +make_time wholestage off 476 534 82 21.0 47.6 1.0X +make_time wholestage on 465 474 9 21.5 46.5 1.0X + + +================================================================================================ +Parsing time +================================================================================================ + +OpenJDK 64-Bit Server VM 17.0.19+10-LTS on Linux 6.17.0-1018-azure +AMD EPYC 7763 64-Core Processor +to_time: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------ +to_time wholestage off 5782 5804 30 1.7 578.2 1.0X +to_time wholestage on 5782 5799 14 1.7 578.2 1.0X + + +================================================================================================ +Extract components from TIME +================================================================================================ + +OpenJDK 64-Bit Server VM 17.0.19+10-LTS on Linux 6.17.0-1018-azure +AMD EPYC 7763 64-Core Processor +hour of time: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------ +hour of time wholestage off 541 542 2 18.5 54.1 1.0X +hour of time wholestage on 528 534 5 18.9 52.8 1.0X + +OpenJDK 64-Bit Server VM 17.0.19+10-LTS on Linux 6.17.0-1018-azure +AMD EPYC 7763 64-Core Processor +minute of time: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------ +minute of time wholestage off 532 536 6 18.8 53.2 1.0X +minute of time wholestage on 527 533 5 19.0 52.7 1.0X + +OpenJDK 64-Bit Server VM 17.0.19+10-LTS on Linux 6.17.0-1018-azure +AMD EPYC 7763 64-Core Processor +second of time: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------ +second of time wholestage off 529 532 5 18.9 52.9 1.0X +second of time wholestage on 525 527 2 19.0 52.5 1.0X + + +================================================================================================ +time_trunc +================================================================================================ + +OpenJDK 64-Bit Server VM 17.0.19+10-LTS on Linux 6.17.0-1018-azure +AMD EPYC 7763 64-Core Processor +time_trunc HOUR: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------ +time_trunc HOUR wholestage off 897 901 5 11.1 89.7 1.0X +time_trunc HOUR wholestage on 911 916 4 11.0 91.1 1.0X + +OpenJDK 64-Bit Server VM 17.0.19+10-LTS on Linux 6.17.0-1018-azure +AMD EPYC 7763 64-Core Processor +time_trunc MINUTE: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------ +time_trunc MINUTE wholestage off 985 985 1 10.2 98.5 1.0X +time_trunc MINUTE wholestage on 965 971 6 10.4 96.5 1.0X + +OpenJDK 64-Bit Server VM 17.0.19+10-LTS on Linux 6.17.0-1018-azure +AMD EPYC 7763 64-Core Processor +time_trunc SECOND: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------ +time_trunc SECOND wholestage off 1005 1010 7 9.9 100.5 1.0X +time_trunc SECOND wholestage on 987 992 6 10.1 98.7 1.0X + + +================================================================================================ +time_diff +================================================================================================ + +OpenJDK 64-Bit Server VM 17.0.19+10-LTS on Linux 6.17.0-1018-azure +AMD EPYC 7763 64-Core Processor +time_diff: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------ +time_diff wholestage off 1075 1082 10 9.3 107.5 1.0X +time_diff wholestage on 1034 1044 9 9.7 103.4 1.0X + + +================================================================================================ +TIME +/- interval +================================================================================================ + +OpenJDK 64-Bit Server VM 17.0.19+10-LTS on Linux 6.17.0-1018-azure +AMD EPYC 7763 64-Core Processor +TIME +/- interval: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------ +time + interval hour 481 484 3 20.8 48.1 1.0X +time + interval minute 477 489 16 21.0 47.7 1.0X +time + interval second 479 482 3 20.9 47.9 1.0X +time - interval hour 481 488 12 20.8 48.1 1.0X + + +================================================================================================ +Conversion from/to external types +================================================================================================ + +OpenJDK 64-Bit Server VM 17.0.19+10-LTS on Linux 6.17.0-1018-azure +AMD EPYC 7763 64-Core Processor +To/from java.time.LocalTime: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------ +From java.time.LocalTime 181 188 6 27.6 36.2 1.0X +Collect java.time.LocalTime 930 1084 173 5.4 186.0 0.2X + + diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/ExtractBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/ExtractBenchmark.scala index 2fffa265cb735..b4668bdeb65a7 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/ExtractBenchmark.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/ExtractBenchmark.scala @@ -64,10 +64,13 @@ object ExtractBenchmark extends SqlBasedBenchmark { private def castExpr(from: String): String = from match { case "timestamp" => "timestamp_seconds(id)" case "date" => "cast(timestamp_seconds(id) as date)" + case "time" => "make_time(cast(mod(id, 24) as int), cast(mod(id, 60) as int), " + + "cast(mod(id, 60) as decimal(8,6)))" case "interval" => "(cast(timestamp_seconds(id) as date) - date'0001-01-01') + " + "(timestamp_seconds(id) - timestamp'1000-01-01 01:02:03.123456')" case other => throw new IllegalArgumentException( - s"Unsupported column type $other. Valid column types are 'timestamp' and 'date'") + s"Unsupported column type $other. Valid column types are " + + "'timestamp', 'date', 'time', and 'interval'") } private def run( @@ -88,13 +91,16 @@ object ExtractBenchmark extends SqlBasedBenchmark { } override def runBenchmarkSuite(mainArgs: Array[String]): Unit = { + spark.conf.set(SQLConf.TIME_TYPE_ENABLED.key, "true") val N = 10000000L val datetimeFields = Seq("YEAR", "YEAROFWEEK", "QUARTER", "MONTH", "WEEK", "DAY", "DAYOFWEEK", "DOW", "DOW_ISO", "DAYOFWEEK_ISO", "DOY", "HOUR", "MINUTE", "SECOND") val intervalFields = Seq("YEAR", "MONTH", "DAY", "HOUR", "MINUTE", "SECOND") + val timeFields = Seq("HOUR", "MINUTE", "SECOND") val settings = Map( "timestamp" -> datetimeFields, "date" -> datetimeFields, + "time" -> timeFields, "interval" -> intervalFields) for {(dataType, fields) <- settings; func <- Seq("extract", "date_part")} { diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/TimeBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/TimeBenchmark.scala new file mode 100644 index 0000000000000..ae1b6884f88f8 --- /dev/null +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/TimeBenchmark.scala @@ -0,0 +1,134 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.execution.benchmark + +import java.time.LocalTime + +import org.apache.spark.benchmark.Benchmark +import org.apache.spark.sql.internal.SQLConf + +/** + * Synthetic benchmark for TIME data type functions. + * To run this benchmark: + * {{{ + * 1. without sbt: + * bin/spark-submit --class + * --jars , + * 2. build/sbt "sql/Test/runMain " + * 3. generate result: + * SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "sql/Test/runMain " + * Results will be written to "benchmarks/TimeBenchmark-results.txt". + * }}} + */ +object TimeBenchmark extends SqlBasedBenchmark { + private def doBenchmark(cardinality: Int, exprs: String*): Unit = { + spark.range(cardinality) + .selectExpr(exprs: _*) + .noop() + } + + private def run(cardinality: Int, name: String, exprs: String*): Unit = { + codegenBenchmark(name, cardinality) { + doBenchmark(cardinality, exprs: _*) + } + } + + override def runBenchmarkSuite(mainArgs: Array[String]): Unit = { + spark.conf.set(SQLConf.TIME_TYPE_ENABLED.key, "true") + val N = 10000000 + // Generate TIME values using make_time(hour, minute, decimal_seconds) + val timeExpr = "make_time(cast(mod(id, 24) as int), cast(mod(id, 60) as int), " + + "cast(mod(id, 60) as decimal(8,6)))" + + runBenchmark("Current time") { + run(N, "current_time", "current_time()") + } + + runBenchmark("make_time") { + run(N, "make_time", timeExpr) + } + + runBenchmark("Parsing time") { + val timeStrExpr = "concat(lpad(cast(mod(id, 24) as string), 2, '0'), ':', " + + "lpad(cast(mod(id, 60) as string), 2, '0'), ':', " + + "lpad(cast(mod(id, 60) as string), 2, '0'))" + run(N, "to_time", s"to_time($timeStrExpr, 'HH:mm:ss')") + } + + runBenchmark("Extract components from TIME") { + run(N, "hour of time", s"hour($timeExpr)") + run(N, "minute of time", s"minute($timeExpr)") + run(N, "second of time", s"second($timeExpr)") + } + + runBenchmark("time_trunc") { + Seq("HOUR", "MINUTE", "SECOND").foreach { level => + run(N, s"time_trunc $level", s"time_trunc('$level', $timeExpr)") + } + } + + runBenchmark("time_diff") { + val timeExpr2 = "make_time(cast(mod(id + 1, 24) as int), cast(mod(id + 2, 60) as int), " + + "cast(mod(id + 3, 60) as decimal(8,6)))" + run(N, "time_diff", s"time_diff('SECOND', $timeExpr, $timeExpr2)") + } + + runBenchmark("TIME +/- interval") { + // Use make_time with hour < 22 to avoid overflow when adding intervals + val safeTimeExpr = "make_time(cast(mod(id, 20) as int), cast(mod(id, 60) as int), " + + "cast(mod(id, 60) as decimal(8,6)))" + val benchmark = new Benchmark("TIME +/- interval", N, output = output) + benchmark.addCase("time + interval hour") { _ => + doBenchmark(N, s"$safeTimeExpr + interval 1 hour") + } + benchmark.addCase("time + interval minute") { _ => + doBenchmark(N, s"$safeTimeExpr + interval 30 minute") + } + benchmark.addCase("time + interval second") { _ => + doBenchmark(N, s"$safeTimeExpr + interval 45 second") + } + benchmark.addCase("time - interval hour") { _ => + // Use hours >= 1 to avoid underflow + val subTimeExpr = "make_time(cast(mod(id, 20) + 2 as int), cast(mod(id, 60) as int), " + + "cast(mod(id, 60) as decimal(8,6)))" + doBenchmark(N, s"$subTimeExpr - interval 1 hour") + } + benchmark.run() + } + + runBenchmark("Conversion from/to external types") { + import spark.implicits._ + val rowsNum = 5000000 + val numIters = 3 + val benchmark = new Benchmark("To/from java.time.LocalTime", rowsNum, output = output) + benchmark.addCase("From java.time.LocalTime", numIters) { _ => + spark.range(rowsNum) + .map(nanos => LocalTime.ofNanoOfDay(nanos % 86400000000000L)) + .noop() + } + def localTimes = { + spark.range(0, rowsNum, 1, 1) + .map(nanos => LocalTime.ofNanoOfDay(nanos % 86400000000000L)) + } + benchmark.addCase("Collect java.time.LocalTime", numIters) { _ => + localTimes.collect() + } + benchmark.run() + } + } +} diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVBenchmark.scala index 524c222062150..8b121a1a012f9 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVBenchmark.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVBenchmark.scala @@ -17,7 +17,7 @@ package org.apache.spark.sql.execution.datasources.csv import java.io.File -import java.time.{Instant, LocalDate} +import java.time.{Instant, LocalDate, LocalTime} import org.apache.spark.benchmark.Benchmark import org.apache.spark.sql.{Column, Dataset, Row} @@ -148,6 +148,7 @@ object CSVBenchmark extends SqlBasedBenchmark { } private def datetimeBenchmark(rowsNum: Int, numIters: Int): Unit = { + spark.conf.set(SQLConf.TIME_TYPE_ENABLED.key, "true") def timestamps = { spark.range(0, rowsNum, 1, 1).mapPartitions { iter => iter.map(Instant.ofEpochSecond(_)) @@ -190,6 +191,26 @@ object CSVBenchmark extends SqlBasedBenchmark { dates.write.option("header", true).mode("overwrite").csv(dateDir) } + val timeDir = new File(path, "time").getAbsolutePath + + def times = { + spark.range(0, rowsNum, 1, 1).mapPartitions { iter => + iter.map(t => LocalTime.ofNanoOfDay(t % 86400000000000L)) + }.select($"value".as("time")) + } + + writeBench.addCase("Create a dataset of times", numIters) { _ => + times.noop() + } + + writeBench.addCase("to_csv(time)", numIters) { _ => + times.select(to_csv(struct($"time"))).noop() + } + + writeBench.addCase("write times to files", numIters) { _ => + times.write.option("header", true).mode("overwrite").csv(timeDir) + } + writeBench.run() val readBench = new Benchmark("Read dates and timestamps", rowsNum, output = output) @@ -323,6 +344,20 @@ object CSVBenchmark extends SqlBasedBenchmark { } } + val timeSchema = new StructType().add("time", TimeType()) + + readBench.addCase("read time text from files", numIters) { _ => + spark.read.text(timeDir).noop() + } + + readBench.addCase("read time from files", numIters) { _ => + val ds = spark.read + .option("header", true) + .schema(timeSchema) + .csv(timeDir) + ds.noop() + } + readBench.run() } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonBenchmark.scala index 94a2ccc41d30b..06cc7a9c37cfb 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonBenchmark.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonBenchmark.scala @@ -17,7 +17,7 @@ package org.apache.spark.sql.execution.datasources.json import java.io.File -import java.time.{Instant, LocalDate} +import java.time.{Instant, LocalDate, LocalTime} import org.apache.spark.benchmark.Benchmark import org.apache.spark.sql.{Column, Dataset, Row} @@ -365,6 +365,7 @@ object JsonBenchmark extends SqlBasedBenchmark { } private def datetimeBenchmark(rowsNum: Int, numIters: Int): Unit = { + spark.conf.set(SQLConf.TIME_TYPE_ENABLED.key, "true") def timestamps = { spark.range(0, rowsNum, 1, 1).mapPartitions { iter => iter.map(Instant.ofEpochSecond(_)) @@ -407,6 +408,26 @@ object JsonBenchmark extends SqlBasedBenchmark { dates.write.option("header", true).mode("overwrite").json(dateDir) } + val timeDir = new File(path, "time").getAbsolutePath + + def times = { + spark.range(0, rowsNum, 1, 1).mapPartitions { iter => + iter.map(t => LocalTime.ofNanoOfDay(t % 86400000000000L)) + }.select($"value".as("time")) + } + + writeBench.addCase("Create a dataset of times", numIters) { _ => + times.noop() + } + + writeBench.addCase("to_json(time)", numIters) { _ => + times.select(to_json(struct($"time"))).noop() + } + + writeBench.addCase("write times to files", numIters) { _ => + times.write.option("header", true).mode("overwrite").json(timeDir) + } + writeBench.run() val readBench = new Benchmark("Read dates and timestamps", rowsNum, output = output) @@ -508,6 +529,16 @@ object JsonBenchmark extends SqlBasedBenchmark { } } + val timeSchema = new StructType().add("time", TimeType()) + + readBench.addCase("read time text from files", numIters) { _ => + spark.read.text(timeDir).noop() + } + + readBench.addCase("read time from files", numIters) { _ => + spark.read.schema(timeSchema).json(timeDir).noop() + } + readBench.run() } }