From 866a504af2b7e930366e51862ba455d74850d9b3 Mon Sep 17 00:00:00 2001 From: Aleksey Midenkov Date: Tue, 23 Jun 2026 23:57:25 +0300 Subject: [PATCH 1/4] WITHOUT_ABI_CHECK followup Followup for b337e14440b as info_src takes time too. info_src does not make much sense without ABI check. --- CMakeLists.txt | 32 +++++++++++++++++--------------- 1 file changed, 17 insertions(+), 15 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 787537baf5ebb..0116c35a6ba47 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -571,21 +571,23 @@ CONFIGURE_FILE( ${CMAKE_SOURCE_DIR}/cmake/info_macros.cmake.in ${CMAKE_BINARY_DIR}/info_macros.cmake @ONLY) -# Handle the "INFO_*" files. -INCLUDE(${CMAKE_BINARY_DIR}/info_macros.cmake) -# Source: This can be done during the cmake phase, all information is -# available, but should be repeated on each "make" just in case someone -# does "cmake ; make ; git pull ; make". -CREATE_INFO_SRC(${CMAKE_BINARY_DIR}/Docs) -ADD_CUSTOM_TARGET(INFO_SRC ALL - COMMAND ${CMAKE_COMMAND} -P ${CMAKE_SOURCE_DIR}/cmake/info_src.cmake - WORKING_DIRECTORY ${CMAKE_BINARY_DIR} -) -# Build flags: This must be postponed to the make phase. -ADD_CUSTOM_TARGET(INFO_BIN ALL - COMMAND ${CMAKE_COMMAND} -P ${CMAKE_SOURCE_DIR}/cmake/info_bin.cmake - WORKING_DIRECTORY ${CMAKE_BINARY_DIR} -) +IF (NOT WITHOUT_ABI_CHECK) + # Handle the "INFO_*" files. + INCLUDE(${CMAKE_BINARY_DIR}/info_macros.cmake) + # Source: This can be done during the cmake phase, all information is + # available, but should be repeated on each "make" just in case someone + # does "cmake ; make ; git pull ; make". + CREATE_INFO_SRC(${CMAKE_BINARY_DIR}/Docs) + ADD_CUSTOM_TARGET(INFO_SRC ALL + COMMAND ${CMAKE_COMMAND} -P ${CMAKE_SOURCE_DIR}/cmake/info_src.cmake + WORKING_DIRECTORY ${CMAKE_BINARY_DIR} + ) + # Build flags: This must be postponed to the make phase. + ADD_CUSTOM_TARGET(INFO_BIN ALL + COMMAND ${CMAKE_COMMAND} -P ${CMAKE_SOURCE_DIR}/cmake/info_bin.cmake + WORKING_DIRECTORY ${CMAKE_BINARY_DIR} + ) +ENDIF() INSTALL_DOCUMENTATION(README.md CREDITS COPYING THIRDPARTY COMPONENT Readme) From 5d7286aa537a03d8c51a64f4f7d78c269e0a416c Mon Sep 17 00:00:00 2001 From: Aleksey Midenkov Date: Tue, 28 Apr 2026 00:55:35 +0300 Subject: [PATCH 2/4] MDEV-39384 Debug trace mtrr --mysqld=--debug=d,vers_trx_id,query:i:o,/tmp/good.log bug/v.trx_id,debug --- sql/filesort.cc | 8 +++--- sql/item_vers.cc | 5 +++- sql/sql_test.h | 4 +-- sql/table.cc | 65 +++++++++++++++++++++++++++++++++++++----------- 4 files changed, 61 insertions(+), 21 deletions(-) diff --git a/sql/filesort.cc b/sql/filesort.cc index b9586c9ccd3e1..776e3d16724b6 100644 --- a/sql/filesort.cc +++ b/sql/filesort.cc @@ -723,9 +723,9 @@ String dbug_format_row(TABLE *table, const uchar *rec, bool print_names) Example usage: (gdb) p dbug_print_row(table, table->record[1]) */ -const char *dbug_print_row(TABLE *table, const uchar *rec) +const char *dbug_print_row(TABLE *table, const uchar *rec, bool print_names) { - String row= dbug_format_row(table, table->record[0]); + String row= dbug_format_row(table, rec, print_names); if (row.length() > sizeof dbug_row_print_buf - 1) return "Couldn't fit into buffer"; memcpy(dbug_row_print_buf, row.c_ptr(), row.length()); @@ -743,9 +743,9 @@ const char *dbug_print_row(TABLE *table, const uchar *rec) Only columns in table->read_set are printed */ -const char* dbug_print_table_row(TABLE *table) +const char* dbug_print_table_row(TABLE *table, bool print_names) { - return dbug_print_row(table, table->record[0]); + return dbug_print_row(table, table->record[0], print_names); } diff --git a/sql/item_vers.cc b/sql/item_vers.cc index b806b8da78b69..d15472155d178 100644 --- a/sql/item_vers.cc +++ b/sql/item_vers.cc @@ -183,6 +183,7 @@ bool Item_func_trt_trx_sees::val_bool() { THD *thd= current_thd; + DBUG_ENTER("trans_sees"); DBUG_ASSERT(thd); DBUG_ASSERT(arg_count > 1); @@ -192,5 +193,7 @@ Item_func_trt_trx_sees::val_bool() TR_table trt(thd); null_value= trt.query_sees(result, trx_id1, trx_id0); - return result; + DBUG_PRINT("vers_trx_id", ("%llu %s %llu, null_value: %d, accept_eq: %d", + trx_id1, (result ? "sees" : "sees NOT"), trx_id0, null_value, accept_eq)); + DBUG_RETURN(result); } diff --git a/sql/sql_test.h b/sql/sql_test.h index ce154ed9954f7..617bfd6958c80 100644 --- a/sql/sql_test.h +++ b/sql/sql_test.h @@ -45,8 +45,8 @@ const char *dbug_print(SELECT_LEX *x); const char *dbug_print(SELECT_LEX_UNIT *x); /* Print current table row */ -const char* dbug_print_table_row(TABLE *table); -const char *dbug_print_row(TABLE *table, const uchar *rec); +const char* dbug_print_table_row(TABLE *table, bool print_names= false); +const char *dbug_print_row(TABLE *table, const uchar *rec, bool print_names= false); /* Check which MEM_ROOT the data is on */ bool dbug_is_mem_on_mem_root(const MEM_ROOT *mem_root, void *ptr); diff --git a/sql/table.cc b/sql/table.cc index 6a0bfa0c9b9cb..5e31306ceaca0 100644 --- a/sql/table.cc +++ b/sql/table.cc @@ -51,6 +51,7 @@ #endif #include "log_event.h" // MAX_TABLE_MAP_ID #include "sql_class.h" +#include "sql_test.h" // dbug_print_table_row() /* For MySQL 5.7 virtual fields */ #define MYSQL57_GENERATED_FIELD 128 @@ -10286,8 +10287,9 @@ bool TR_table::update(ulonglong start_id, ulonglong end_id) #define newx new (thd->mem_root) bool TR_table::query(ulonglong trx_id) { + DBUG_ENTER("query(trx)"); if (!table && open()) - return false; + DBUG_RETURN(false); SQL_SELECT_auto select; READ_RECORD info; int error; @@ -10312,16 +10314,24 @@ bool TR_table::query(ulonglong trx_id) while (!(error= info.read_record()) && !thd->killed && !thd->is_error()) { if (select->skip_record(thd) > 0) - return true; + DBUG_RETURN(true); } my_error(ER_VERS_NO_TRX_ID, MYF(0), (longlong) trx_id); - return false; + DBUG_RETURN(false); } bool TR_table::query(MYSQL_TIME &commit_time, bool backwards) { + DBUG_ENTER("query(ts)"); + +#ifndef DBUG_OFF + char dbug_commit_time[MAX_DATE_STRING_REP_LENGTH]; + my_time_to_str(&commit_time, dbug_commit_time, 6); +#endif /* DBUG_OFF */ + + if (!table && open()) - return false; + DBUG_RETURN(false); SQL_SELECT_auto select; READ_RECORD info; int error; @@ -10337,11 +10347,11 @@ bool TR_table::query(MYSQL_TIME &commit_time, bool backwards) else conds= newx Item_func_le(thd, field, value); if (unlikely((error= setup_conds(thd, this, dummy, &conds)))) - return false; + DBUG_RETURN(false); // FIXME: (performance) force index 'commit_timestamp' select= make_select(table, 0, 0, conds, NULL, 0, &error); if (unlikely(error || !select)) - return false; + DBUG_RETURN(false); error= init_read_record(&info, thd, table, select, NULL, 1 /* use_record_cache */, true /* print_error */, false /* disable_rr_cache */); @@ -10378,10 +10388,22 @@ bool TR_table::query(MYSQL_TIME &commit_time, bool backwards) found= false; break; } - } + } if (found) + { restore_record(table, record[1]); - return found; + DBUG_LOCK_FILE; + DBUG_PRINT("vers_trx_id", ("%s%s: %s", dbug_commit_time, + (backwards ? "(b)" : ""), + dbug_print_table_row(table))); + DBUG_UNLOCK_FILE; + } + else + DBUG_PRINT("vers_trx_id", ("%s%s: %s", dbug_commit_time, + (backwards ? "(b)" : ""), + "Not found!")); + + DBUG_RETURN(found); } #undef newx @@ -10389,27 +10411,34 @@ bool TR_table::query_sees(bool &result, ulonglong trx_id1, ulonglong trx_id0, ulonglong commit_id1, enum_tx_isolation iso_level1, ulonglong commit_id0) { + DBUG_ENTER("query_sees"); if (trx_id1 == trx_id0) { - return false; + DBUG_PRINT("vers_trx_id", ("%llu == %llu (result depends on accept_eq)", trx_id1, trx_id0)); + DBUG_RETURN(false); } if (trx_id1 == ULONGLONG_MAX || trx_id0 == 0) { + DBUG_PRINT("vers_trx_id", ("%llu sees %llu (border values)", trx_id1, trx_id0)); result= true; - return false; + DBUG_RETURN(false); } if (trx_id0 == ULONGLONG_MAX || trx_id1 == 0) { + DBUG_PRINT("vers_trx_id", ("%llu sees NOT %llu (border values)", trx_id1, trx_id0)); result= false; - return false; + DBUG_RETURN(false); } if (!commit_id1) { if (!query(trx_id1)) - return true; + { + DBUG_PRINT("vers_trx_id", ("query(%llu) failed", trx_id1)); + DBUG_RETURN(true); + } commit_id1= (*this)[FLD_COMMIT_ID]->val_int(); iso_level1= iso_level(); @@ -10418,7 +10447,10 @@ bool TR_table::query_sees(bool &result, ulonglong trx_id1, ulonglong trx_id0, if (!commit_id0) { if (!query(trx_id0)) - return true; + { + DBUG_PRINT("vers_trx_id", ("query(%llu) failed", trx_id0)); + DBUG_RETURN(true); + } commit_id0= (*this)[FLD_COMMIT_ID]->val_int(); } @@ -10428,14 +10460,19 @@ bool TR_table::query_sees(bool &result, ulonglong trx_id1, ulonglong trx_id0, // Concurrent transactions: TX1 committed after TX0 and TX1 is read (un)committed || (commit_id1 > commit_id0 && iso_level1 < ISO_REPEATABLE_READ)) { + DBUG_PRINT("vers_trx_id", ("(%llu, %llu) sees (%llu, %llu); %s; iso_level: %d", + trx_id1, commit_id1, trx_id0, commit_id0, + (trx_id1 > commit_id0 ? "trivial" : "concurrent"), iso_level1)); result= true; } else // All other cases: TX1 does not see TX0 { + DBUG_PRINT("vers_trx_id", ("(%llu, %llu) sees NOT (%llu, %llu); iso_level: %d", + trx_id1, commit_id1, trx_id0, commit_id0, iso_level1)); result= false; } - return false; + DBUG_RETURN(false); } void TR_table::warn_schema_incorrect(const char *reason) From 8090fff8bcfb3af649ce3c1c9f3a4ef2b6902615 Mon Sep 17 00:00:00 2001 From: Aleksey Midenkov Date: Tue, 28 Apr 2026 14:50:13 +0300 Subject: [PATCH 3/4] MDEV-39384 Wrong result when selecting from precise-versioned table Const item behave wrongly in read_record loop. const_item() turns on cache in Arg_comparator::cache_converted_constant(). The fix manipulates table->map to force Item_field to be non-const. --- mysql-test/suite/versioning/r/trx_id.result | 45 +++++++++++++++++++++ mysql-test/suite/versioning/t/trx_id.test | 39 ++++++++++++++++++ sql/table.cc | 6 +++ 3 files changed, 90 insertions(+) diff --git a/mysql-test/suite/versioning/r/trx_id.result b/mysql-test/suite/versioning/r/trx_id.result index 9beec414fbba4..e134626ca8dd5 100644 --- a/mysql-test/suite/versioning/r/trx_id.result +++ b/mysql-test/suite/versioning/r/trx_id.result @@ -611,3 +611,48 @@ ERROR HY000: Lock wait timeout exceeded; try restarting transaction disconnect con1; connection default; drop table t1; +# +# MDEV-39384 Wrong result when selecting from precise-versioned table +# +create or replace table t1 ( +x int primary key, +sys_trx_start bigint(20) unsigned as row start invisible, +sys_trx_end bigint(20) unsigned as row end invisible, +period for system_time (sys_trx_start, sys_trx_end) +) with system versioning engine innodb; +create or replace table t2 ( +x int primary key, +sys_trx_start timestamp(6) as row start invisible, +sys_trx_end timestamp(6) as row end invisible, +period for system_time (sys_trx_start, sys_trx_end) +) with system versioning engine innodb; +set timestamp= unix_timestamp('2000-01-01 00:00:00'); +insert into t1 values (1); +insert into t2 values (1); +set timestamp= unix_timestamp('2000-01-01 00:00:10'); +delete from t1; +delete from t2; +set timestamp= unix_timestamp('2000-01-01 00:00:20'); +insert into t1 values (2); +insert into t2 values (2); +# TRX result +select * from t1 for system_time all; +x +1 +2 +select * from t1 for system_time as of '2000-01-01 00:00:09'; +x +1 +select * from t1 for system_time as of '2000-01-01 00:00:10'; +x +# Timestamp result +select * from t2 for system_time all; +x +1 +2 +select * from t2 for system_time as of '2000-01-01 00:00:09'; +x +1 +select * from t2 for system_time as of '2000-01-01 00:00:10'; +x +drop tables t1, t2; diff --git a/mysql-test/suite/versioning/t/trx_id.test b/mysql-test/suite/versioning/t/trx_id.test index 8f96500f34005..8ea25b206c235 100644 --- a/mysql-test/suite/versioning/t/trx_id.test +++ b/mysql-test/suite/versioning/t/trx_id.test @@ -643,3 +643,42 @@ alter table xx; --disconnect con1 --connection default drop table t1; + +--echo # +--echo # MDEV-39384 Wrong result when selecting from precise-versioned table +--echo # +create or replace table t1 ( + x int primary key, + sys_trx_start bigint(20) unsigned as row start invisible, + sys_trx_end bigint(20) unsigned as row end invisible, + period for system_time (sys_trx_start, sys_trx_end) +) with system versioning engine innodb; + +create or replace table t2 ( + x int primary key, + sys_trx_start timestamp(6) as row start invisible, + sys_trx_end timestamp(6) as row end invisible, + period for system_time (sys_trx_start, sys_trx_end) +) with system versioning engine innodb; + +set timestamp= unix_timestamp('2000-01-01 00:00:00'); +insert into t1 values (1); +insert into t2 values (1); +set timestamp= unix_timestamp('2000-01-01 00:00:10'); +delete from t1; +delete from t2; +set timestamp= unix_timestamp('2000-01-01 00:00:20'); +insert into t1 values (2); +insert into t2 values (2); + +--echo # TRX result +select * from t1 for system_time all; +select * from t1 for system_time as of '2000-01-01 00:00:09'; +select * from t1 for system_time as of '2000-01-01 00:00:10'; + +--echo # Timestamp result +select * from t2 for system_time all; +select * from t2 for system_time as of '2000-01-01 00:00:09'; +select * from t2 for system_time as of '2000-01-01 00:00:10'; + +drop tables t1, t2; diff --git a/sql/table.cc b/sql/table.cc index 5e31306ceaca0..ed6d89283e288 100644 --- a/sql/table.cc +++ b/sql/table.cc @@ -10297,6 +10297,9 @@ bool TR_table::query(ulonglong trx_id) SELECT_LEX &slex= *(thd->lex->first_select_lex()); Name_resolution_context_backup backup(slex.context, *this); Item *field= newx Item_field(thd, &slex.context, (*this)[FLD_TRX_ID]); + /* Force Item_field to be non-const */ + SCOPE_VALUE(table->map, (table_map) 1); + DBUG_ASSERT(!field->const_item()); Item *value= newx Item_int(thd, trx_id); COND *conds= newx Item_func_eq(thd, field, value); if (unlikely((error= setup_conds(thd, this, dummy, &conds)))) @@ -10339,6 +10342,9 @@ bool TR_table::query(MYSQL_TIME &commit_time, bool backwards) SELECT_LEX &slex= *(thd->lex->first_select_lex()); Name_resolution_context_backup backup(slex.context, *this); Item *field= newx Item_field(thd, &slex.context, (*this)[FLD_COMMIT_TS]); + /* Force Item_field to be non-const */ + SCOPE_VALUE(table->map, (table_map) 1); + DBUG_ASSERT(!field->const_item()); Datetime dt(&commit_time); Item *value= newx Item_datetime_literal(thd, &dt, 6); COND *conds; From 8acdfd4784eaf35d56def54b6cdad3a8a8537bda Mon Sep 17 00:00:00 2001 From: Aleksey Midenkov Date: Thu, 25 Jun 2026 01:11:44 +0300 Subject: [PATCH 4/4] MDEV-39384 Use index in TR_table::query TR_table::query() used table scan. Now utilize the index if possible, with minimum validity detection. Getting trx_id by commit_ts is still suboptimal is it does two index accesses (as limited by fields in commit_ts index). When the index detection fails TR_table::query() falls back to original table scanning method. --- .../suite/versioning/r/trx_id,scan.rdiff | 77 ++++ mysql-test/suite/versioning/r/trx_id.result | 3 +- .../suite/versioning/t/trx_id.combinations | 2 + mysql-test/suite/versioning/t/trx_id.test | 26 +- sql/share/errmsg-utf8.txt | 2 + sql/table.cc | 379 ++++++++++++++---- sql/table.h | 26 ++ 7 files changed, 431 insertions(+), 84 deletions(-) create mode 100644 mysql-test/suite/versioning/r/trx_id,scan.rdiff create mode 100644 mysql-test/suite/versioning/t/trx_id.combinations diff --git a/mysql-test/suite/versioning/r/trx_id,scan.rdiff b/mysql-test/suite/versioning/r/trx_id,scan.rdiff new file mode 100644 index 0000000000000..6d731832fb0a4 --- /dev/null +++ b/mysql-test/suite/versioning/r/trx_id,scan.rdiff @@ -0,0 +1,77 @@ +--- trx_id.result ++++ trx_id,scan.reject +@@ -134,30 +134,42 @@ + 1 + 2 + 3 ++Warnings: ++Warning 4193 mysql.transaction_registry has unexpected index definitions; using slow scan. + select * from t1 for system_time as of timestamp @ts1; + x + 100 + 3 ++Warnings: ++Warning 4193 mysql.transaction_registry has unexpected index definitions; using slow scan. + select * from t1 for system_time as of transaction @trx_id2; + x + 100 + 2 + 3 ++Warnings: ++Warning 4193 mysql.transaction_registry has unexpected index definitions; using slow scan. + select * from t1 for system_time as of timestamp @ts2; + x + 100 + 2 + 3 ++Warnings: ++Warning 4193 mysql.transaction_registry has unexpected index definitions; using slow scan. + select * from t1 for system_time as of transaction @trx_id3; + x + 100 + 3 ++Warnings: ++Warning 4193 mysql.transaction_registry has unexpected index definitions; using slow scan. + select * from t1 for system_time as of timestamp @ts3; + x + 100 + 1 + 2 + 3 ++Warnings: ++Warning 4193 mysql.transaction_registry has unexpected index definitions; using slow scan. + # + # MDEV-15427 IB: TRX_ID based operations inside transaction generate history + # +@@ -264,6 +276,8 @@ + delete from t1; + select * from t1 for system_time as of timestamp'1990-1-1 00:00'; + x ++Warnings: ++Warning 4193 mysql.transaction_registry has unexpected index definitions; using slow scan. + select * from t1 for system_time as of NULL; + x + # MDEV-16024 transaction_registry.begin_timestamp is wrong for explicit transactions +@@ -419,6 +433,9 @@ + # + SELECT * FROM t1 FOR SYSTEM_TIME AS OF '2038-12-30 00:00:00'; + x ++1 ++Warnings: ++Warning 4193 mysql.transaction_registry has unexpected index definitions; using slow scan. + SELECT * FROM t2 FOR SYSTEM_TIME AS OF '2038-12-30 00:00:00'; + x + DROP TABLE t1, t2; +@@ -642,8 +659,12 @@ + select * from t1 for system_time as of '2000-01-01 00:00:09'; + x + 1 ++Warnings: ++Warning 4193 mysql.transaction_registry has unexpected index definitions; using slow scan. + select * from t1 for system_time as of '2000-01-01 00:00:10'; + x ++Warnings: ++Warning 4193 mysql.transaction_registry has unexpected index definitions; using slow scan. + # Timestamp result + select * from t2 for system_time all; + x diff --git a/mysql-test/suite/versioning/r/trx_id.result b/mysql-test/suite/versioning/r/trx_id.result index e134626ca8dd5..abbfc44ad6243 100644 --- a/mysql-test/suite/versioning/r/trx_id.result +++ b/mysql-test/suite/versioning/r/trx_id.result @@ -419,7 +419,6 @@ ERROR HY000: Transaction-precise system versioning for `t2` is not supported # SELECT * FROM t1 FOR SYSTEM_TIME AS OF '2038-12-30 00:00:00'; x -1 SELECT * FROM t2 FOR SYSTEM_TIME AS OF '2038-12-30 00:00:00'; x DROP TABLE t1, t2; @@ -552,7 +551,7 @@ COUNT(*) 1 DROP TABLE t; SET @@SYSTEM_VERSIONING_ALTER_HISTORY=ERROR; -SELECT count(*) from mysql.transaction_registry where begin_timestamp>=commit_timestamp; +SELECT count(*) from mysql.transaction_registry where begin_timestamp > commit_timestamp; count(*) 0 # MDEV-18875 Assertion `thd->transaction.stmt.ha_list == __null || diff --git a/mysql-test/suite/versioning/t/trx_id.combinations b/mysql-test/suite/versioning/t/trx_id.combinations new file mode 100644 index 0000000000000..50514e6315019 --- /dev/null +++ b/mysql-test/suite/versioning/t/trx_id.combinations @@ -0,0 +1,2 @@ +[scan] +[lookup] diff --git a/mysql-test/suite/versioning/t/trx_id.test b/mysql-test/suite/versioning/t/trx_id.test index 8ea25b206c235..737a3540b3e13 100644 --- a/mysql-test/suite/versioning/t/trx_id.test +++ b/mysql-test/suite/versioning/t/trx_id.test @@ -5,11 +5,24 @@ if (!$TEST_VERSIONING_SO) --source include/have_innodb.inc --source include/have_partition.inc --source include/default_charset.inc +--source include/maybe_debug.inc --disable_query_log +--error 0,ER_PLUGIN_INSTALLED --eval install plugin test_versioning soname '$TEST_VERSIONING_SO' --enable_query_log +if ($MTR_COMBINATION_SCAN) +{ + if (!$have_debug) + { + --skip Requires debug build + } + --disable_query_log + set debug_dbug= '+d,vers_trt_scan'; + --enable_query_log +} + set default_storage_engine= innodb; create or replace table t1 ( @@ -280,7 +293,10 @@ commit; --disable_cursor_protocol select row_start from t1 into @trx_id; --enable_cursor_protocol +# --ps does not produce warning in scan combination +--disable_warnings select trt_begin_ts(@trx_id) <= @ts1 as BEGIN_TS_GOOD; +--enable_warnings drop table t1; @@ -573,7 +589,7 @@ SELECT COUNT(*) FROM t FOR SYSTEM_TIME ALL; DROP TABLE t; SET @@SYSTEM_VERSIONING_ALTER_HISTORY=ERROR; -SELECT count(*) from mysql.transaction_registry where begin_timestamp>=commit_timestamp; +SELECT count(*) from mysql.transaction_registry where begin_timestamp > commit_timestamp; --echo # MDEV-18875 Assertion `thd->transaction.stmt.ha_list == __null || --echo # trans == &thd->transaction.stmt' failed or bogus ER_DUP_ENTRY upon @@ -682,3 +698,11 @@ select * from t2 for system_time as of '2000-01-01 00:00:09'; select * from t2 for system_time as of '2000-01-01 00:00:10'; drop tables t1, t2; + +if ($MTR_COMBINATION_SCAN) +{ + # Don't use @old_dbug pattern as truncates command-line --debug=O file! + --disable_query_log + set debug_dbug= '-d,vers_trt_scan'; + --enable_query_log +} diff --git a/sql/share/errmsg-utf8.txt b/sql/share/errmsg-utf8.txt index ad124f180c833..aee723b857770 100644 --- a/sql/share/errmsg-utf8.txt +++ b/sql/share/errmsg-utf8.txt @@ -10758,3 +10758,5 @@ ER_CM_OPTION_MISSING_REQUIREMENT eng "CHANGE MASTER TO option '%s=%s' is missing requirement %s" ER_SLAVE_STATEMENT_TIMEOUT 70100 eng "Slave log event execution was interrupted (slave_max_statement_time exceeded)" +WARN_VERS_TRT_DEFINITION + eng "mysql.transaction_registry has unexpected index definitions; using slow scan." diff --git a/sql/table.cc b/sql/table.cc index ed6d89283e288..49d80526eb8c3 100644 --- a/sql/table.cc +++ b/sql/table.cc @@ -10285,42 +10285,126 @@ bool TR_table::update(ulonglong start_id, ulonglong end_id) } #define newx new (thd->mem_root) +static constexpr const char *VERS_TRT_SCAN="vers_trt_scan"; + bool TR_table::query(ulonglong trx_id) { DBUG_ENTER("query(trx)"); if (!table && open()) DBUG_RETURN(false); - SQL_SELECT_auto select; - READ_RECORD info; int error; - List dummy; - SELECT_LEX &slex= *(thd->lex->first_select_lex()); - Name_resolution_context_backup backup(slex.context, *this); - Item *field= newx Item_field(thd, &slex.context, (*this)[FLD_TRX_ID]); - /* Force Item_field to be non-const */ - SCOPE_VALUE(table->map, (table_map) 1); - DBUG_ASSERT(!field->const_item()); - Item *value= newx Item_int(thd, trx_id); - COND *conds= newx Item_func_eq(thd, field, value); - if (unlikely((error= setup_conds(thd, this, dummy, &conds)))) - return false; - select= make_select(table, 0, 0, conds, NULL, 0, &error); - if (unlikely(error || !select)) + const field_index_t fld= FLD_TRX_ID; + const uint idx= IDX_TRX_ID; + + bool found= false; + DBUG_ASSERT(!table->file->keyread_enabled()); + DBUG_ASSERT(table->read_set != &table->tmp_set); + handler *file= table->file; + + uchar search_key[MAX_KEY_LENGTH]; + SCOPE_VALUE(table->read_set, &table->s->all_set); + + if (is_idx_correct(idx, fld) && !DBUG_IF(VERS_TRT_SCAN)) { - my_error(ER_OUT_OF_RESOURCES, MYF(0)); - return false; + KEY *key= &table->key_info[idx]; + + DBUG_ASSERT(thd->mdl_context.is_lock_owner(MDL_key::TABLE, table->s->db.str, + table->s->table_name.str, MDL_SHARED_READ)); + + KEY_PART_INFO *key_part= key->key_part; + const uint key_prefix_len= key_part[0].store_length; + table->field[fld]->store((longlong) trx_id, true); + key_copy(search_key, table->record[0], key, key_prefix_len); + + if ((error= file->ha_index_init(idx, true))) + goto end; + + error= file->ha_index_read_map(table->record[0], (uchar*) search_key, + (key_part_map) 1, HA_READ_KEY_EXACT); + + if (!error) + found= true; + else if (error == HA_ERR_END_OF_FILE || error == HA_ERR_KEY_NOT_FOUND) + error= 0; + + int error2= file->ha_index_end(); + if (!error && error2) + error= error2; + } - // FIXME: (performance) force index 'transaction_id' - error= init_read_record(&info, thd, table, select, NULL, - 1 /* use_record_cache */, true /* print_error */, - false /* disable_rr_cache */); - while (!(error= info.read_record()) && !thd->killed && !thd->is_error()) + else { - if (select->skip_record(thd) > 0) - DBUG_RETURN(true); + /* Unexpected transaction_registry definition, using slow scan */ + if (!thd->get_stmt_da()->has_sql_condition(WARN_VERS_TRT_DEFINITION)) + push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, + WARN_VERS_TRT_DEFINITION, + ER_THD(thd, WARN_VERS_TRT_DEFINITION)); + SQL_SELECT_auto select; + READ_RECORD info; + List dummy; + SELECT_LEX &slex= *(thd->lex->first_select_lex()); + Name_resolution_context_backup backup(slex.context, *this); + + Item *field= newx Item_field(thd, &slex.context, (*this)[fld]); + /* Force Item_field to be non-const */ + SCOPE_VALUE(table->map, (table_map) 1); + DBUG_ASSERT(!field->const_item()); + Item *value= newx Item_int(thd, trx_id); + COND *conds= newx Item_func_eq(thd, field, value); + if (unlikely((error= setup_conds(thd, this, dummy, &conds)))) + DBUG_RETURN(false); + select= make_select(table, 0, 0, conds, NULL, 0, &error); + if (unlikely(error || !select)) + { + my_error(ER_OUT_OF_RESOURCES, MYF(0)); + DBUG_RETURN(false); + } + error= init_read_record(&info, thd, table, select, NULL, + 1 /* use_record_cache */, true /* print_error */, + false /* disable_rr_cache */); + if (error) + DBUG_RETURN(false); + + while (!(error= info.read_record()) && !thd->killed && !thd->is_error()) + { + if (select->skip_record(thd) > 0) + { + found= true; + break; + } + } + + /* rr_handle_error() sets -1 for HA_ERR_END_OF_FILE */ + if (error < 0) + error= 0; } - my_error(ER_VERS_NO_TRX_ID, MYF(0), (longlong) trx_id); - DBUG_RETURN(false); + +end: + if (error) + { + myf flags= 0; + + if (file->is_fatal_error(error, HA_CHECK_ALL)) + flags|= ME_FATAL; /* Other handler errors are fatal */ + + file->print_error(error, MYF(flags)); + found= false; + } + else if (!found) + { + DBUG_PRINT("vers_trx_id", ("%llu: %s", trx_id, + "Not found!")); + my_error(ER_VERS_NO_TRX_ID, MYF(0), (longlong) trx_id); + } + else + { + DBUG_LOCK_FILE; + DBUG_PRINT("vers_trx_id", ("%llu: %s", trx_id, + dbug_print_table_row(table))); + DBUG_UNLOCK_FILE; + } + + DBUG_RETURN(found); } bool TR_table::query(MYSQL_TIME &commit_time, bool backwards) @@ -10335,79 +10419,212 @@ bool TR_table::query(MYSQL_TIME &commit_time, bool backwards) if (!table && open()) DBUG_RETURN(false); - SQL_SELECT_auto select; - READ_RECORD info; + + /* + Get FLD_TRX_ID by FLD_COMMIT_TS + + Forward direction: find rec before or at commit_time; + Backward direction: find rec at commit_time or after. + + TODO performance: Store more fields into COMMIT_TS index and get them at once, + without searching COMMIT_ID later. Important for commit_id0 in query_sees(). + + Stage 2. consulting PK index is then optional (depending on whether the scheme is old). + */ int error; - List dummy; - SELECT_LEX &slex= *(thd->lex->first_select_lex()); - Name_resolution_context_backup backup(slex.context, *this); - Item *field= newx Item_field(thd, &slex.context, (*this)[FLD_COMMIT_TS]); - /* Force Item_field to be non-const */ - SCOPE_VALUE(table->map, (table_map) 1); - DBUG_ASSERT(!field->const_item()); - Datetime dt(&commit_time); - Item *value= newx Item_datetime_literal(thd, &dt, 6); - COND *conds; - if (backwards) - conds= newx Item_func_ge(thd, field, value); - else - conds= newx Item_func_le(thd, field, value); - if (unlikely((error= setup_conds(thd, this, dummy, &conds)))) - DBUG_RETURN(false); - // FIXME: (performance) force index 'commit_timestamp' - select= make_select(table, 0, 0, conds, NULL, 0, &error); - if (unlikely(error || !select)) - DBUG_RETURN(false); - error= init_read_record(&info, thd, table, select, NULL, - 1 /* use_record_cache */, true /* print_error */, - false /* disable_rr_cache */); - // With PK by transaction_id the records are ordered by PK, so we have to - // scan TRT fully and collect min (backwards == true) - // or max (backwards == false) stats. + field_index_t fld= FLD_COMMIT_TS; + uint idx= IDX_COMMIT_TS; + bool found= false; - MYSQL_TIME found_ts; - while (!(error= info.read_record()) && !thd->killed && !thd->is_error()) + DBUG_ASSERT(!table->file->keyread_enabled()); + DBUG_ASSERT(table->read_set != &table->tmp_set); + handler *file= table->file; + + uchar search_key[MAX_KEY_LENGTH]; + + const bool commit_ts_key_ok= + is_idx_correct(idx, fld) && + table->key_info[idx].user_defined_key_parts >= 2 && + table->key_info[idx].key_part[1].fieldnr - 1 == FLD_TRX_ID && + !(table->key_info[idx].key_part[1].key_part_flag & HA_REVERSE_SORT); + + if (commit_ts_key_ok && is_idx_correct(IDX_TRX_ID, FLD_TRX_ID) && + !DBUG_IF(VERS_TRT_SCAN)) { - int res= select->skip_record(thd); - if (res > 0) + KEY *key= &table->key_info[idx]; + /* 1. get trx_id (into table row) */ + MY_BITMAP *old_read_set= table->prepare_for_keyread(idx, &table->tmp_set); + + DBUG_ASSERT(thd->mdl_context.is_lock_owner(MDL_key::TABLE, table->s->db.str, + table->s->table_name.str, MDL_SHARED_READ)); + + DBUG_ASSERT(key->key_part->fieldnr - 1 == fld); + KEY_PART_INFO *key_part= key->key_part; + uint key_prefix_len= key_part[0].store_length; + ha_rkey_function find_flag; + /* + When multiple records stay at the same key_part[0], + choose proper record among them according to find_flag logic. + */ + if (backwards) + { + find_flag= HA_READ_KEY_OR_NEXT; + key_prefix_len+= key_part[1].store_length; + table->field[FLD_TRX_ID]->store((longlong) 0, true); + } else { + find_flag= HA_READ_KEY_OR_PREV; + key_prefix_len+= key_part[1].store_length; + table->field[FLD_TRX_ID]->store((longlong) ULONGLONG_MAX, true); + } + table->field[fld]->store_time_dec(&commit_time, TIME_SECOND_PART_DIGITS); + key_copy(search_key, table->record[0], key, key_prefix_len); + + if ((error= file->ha_index_init(idx, true))) + { + table->restore_column_maps_after_keyread(old_read_set); + goto end; + } + + const key_part_map key_map= make_prev_keypart_map(backwards ? 1 : 2); + error= file->ha_index_read_map(table->record[0], (uchar*) search_key, key_map, find_flag); + if (!error) + found= true; + else if (error == HA_ERR_END_OF_FILE || error == HA_ERR_KEY_NOT_FOUND) + error= 0; + + int error2= file->ha_index_end(); + if (!error && error2) + error= error2; + table->restore_column_maps_after_keyread(old_read_set); + + if (!error && found) { - MYSQL_TIME commit_ts; - if ((*this)[FLD_COMMIT_TS]->get_date(&commit_ts, date_mode_t(0))) + /* 2. get full row from PK */ + SCOPE_VALUE(table->read_set, &table->s->all_set); + fld= FLD_TRX_ID; + idx= IDX_TRX_ID; + + key= &table->key_info[idx]; + DBUG_ASSERT(key->key_part->fieldnr - 1 == fld); + key_part= key->key_part; + key_prefix_len= key_part[0].store_length; + key_copy(search_key, table->record[0], key, key_prefix_len); + + if (!(error= file->ha_index_init(idx, true))) { - found= false; - break; + error= file->ha_index_read_map(table->record[0], (uchar*) search_key, + (key_part_map) 1, HA_READ_KEY_EXACT); + /* Now HA_ERR_END_OF_FILE/HA_ERR_KEY_NOT_FOUND is strange, don't ignore it */ + error2= file->ha_index_end(); + if (!error && error2) + error= error2; } - int c; - if (!found || ((c= my_time_compare(&commit_ts, &found_ts)) && - (backwards ? c < 0 : c > 0))) + } /* if (!error) */ + } + else + { + /* Unexpected transaction_registry definition, using slow scan */ + if (!thd->get_stmt_da()->has_sql_condition(WARN_VERS_TRT_DEFINITION)) + push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, + WARN_VERS_TRT_DEFINITION, + ER_THD(thd, WARN_VERS_TRT_DEFINITION)); + SCOPE_VALUE(table->read_set, &table->s->all_set); + SQL_SELECT_auto select; + READ_RECORD info; + List dummy; + SELECT_LEX &slex= *(thd->lex->first_select_lex()); + Name_resolution_context_backup backup(slex.context, *this); + Item *field= newx Item_field(thd, &slex.context, (*this)[FLD_COMMIT_TS]); + /* Force Item_field to be non-const */ + SCOPE_VALUE(table->map, (table_map) 1); + DBUG_ASSERT(!field->const_item()); + Datetime dt(&commit_time); + Item *value= newx Item_datetime_literal(thd, &dt, 6); + COND *conds; + if (backwards) + conds= newx Item_func_ge(thd, field, value); + else + conds= newx Item_func_le(thd, field, value); + if (unlikely((error= setup_conds(thd, this, dummy, &conds)))) + DBUG_RETURN(false); + select= make_select(table, 0, 0, conds, NULL, 0, &error); + if (unlikely(error || !select)) + { + my_error(ER_OUT_OF_RESOURCES, MYF(0)); + DBUG_RETURN(false); + } + error= init_read_record(&info, thd, table, select, NULL, + 1 /* use_record_cache */, true /* print_error */, + false /* disable_rr_cache */); + if (error) + DBUG_RETURN(false); + + + // With PK by transaction_id the records are ordered by PK, so we have to + // scan TRT fully and collect min (backwards == true) + // or max (backwards == false) stats. + MYSQL_TIME found_ts; + while (!(error= info.read_record()) && !thd->killed && !thd->is_error()) + { + int res= select->skip_record(thd); + if (res > 0) { - found_ts= commit_ts; - found= true; - // TODO: (performance) make ORDER DESC and break after first found. - // Otherwise it is O(n) scan (+copy)! - store_record(table, record[1]); + MYSQL_TIME commit_ts; + if ((*this)[FLD_COMMIT_TS]->get_date(&commit_ts, date_mode_t(0))) + { + found= false; + break; + } + int c; + if (!found || ((c= my_time_compare(&commit_ts, &found_ts)) && + (backwards ? c < 0 : c > 0))) + { + found_ts= commit_ts; + found= true; + // TODO: (performance) make ORDER DESC and break after first found. + // Otherwise it is O(n) scan (+copy)! + store_record(table, record[1]); + } + } + else if (res < 0) + { + found= false; + break; } } - else if (res < 0) - { - found= false; - break; - } + + /* rr_handle_error() sets -1 for HA_ERR_END_OF_FILE */ + if (error < 0) + error= 0; + + if (found) + restore_record(table, record[1]); } - if (found) + +end: + if (error) + { + myf flags= 0; + + if (file->is_fatal_error(error, HA_CHECK_ALL)) + flags|= ME_FATAL; /* Other handler errors are fatal */ + + file->print_error(error, MYF(flags)); + found= false; + } + else if (!found) + DBUG_PRINT("vers_trx_id", ("%s%s: %s", dbug_commit_time, + (backwards ? "(b)" : ""), + "Not found!")); + else { - restore_record(table, record[1]); DBUG_LOCK_FILE; DBUG_PRINT("vers_trx_id", ("%s%s: %s", dbug_commit_time, (backwards ? "(b)" : ""), dbug_print_table_row(table))); DBUG_UNLOCK_FILE; } - else - DBUG_PRINT("vers_trx_id", ("%s%s: %s", dbug_commit_time, - (backwards ? "(b)" : ""), - "Not found!")); DBUG_RETURN(found); } diff --git a/sql/table.h b/sql/table.h index 6a07da8769449..22a5ca69ea0d9 100644 --- a/sql/table.h +++ b/sql/table.h @@ -3551,6 +3551,14 @@ class TR_table: public TABLE_LIST FIELD_COUNT }; + enum index_id_t { + IDX_TRX_ID= 0, + IDX_COMMIT_ID, + IDX_BEGIN_TS, + IDX_COMMIT_TS, + IDX_COUNT + }; + enum enabled {NO, MAYBE, YES}; static enum enabled use_transaction_registry; @@ -3669,6 +3677,24 @@ class TR_table: public TABLE_LIST { return !(*this == subj); } + bool is_idx_correct(uint idx, field_index_t fld) const + { + DBUG_ASSERT(table->s->keys == IDX_COUNT); + + if (table->s->keys <= idx) + return false; + + handler *file= table->file; + const KEY *key= &table->key_info[idx]; + if (HA_READ_ORDER == (file->index_flags(idx, 0, false) & (HA_READ_ORDER | HA_ONLY_WHOLE_INDEX)) && + key->key_part->fieldnr - 1 == fld && + !(key->key_part->key_part_flag & HA_REVERSE_SORT)) + { + DBUG_ASSERT(key->key_part->fieldnr - 1 == fld); + return true; + } + return false; + } }; #endif /* MYSQL_CLIENT */