diff --git a/ext/strscan/extconf.rb b/ext/strscan/extconf.rb index 2b4ec25be30909..4e8d851fdb5a54 100644 --- a/ext/strscan/extconf.rb +++ b/ext/strscan/extconf.rb @@ -5,6 +5,7 @@ have_func("onig_region_memsize(NULL)") have_func("rb_reg_onig_match", "ruby/re.h") have_func("rb_deprecate_constant") + have_func("rb_int_parse_cstr", "ruby.h") # RUBY_VERSION >= 2.5 have_func("rb_gc_location", "ruby.h") # RUBY_VERSION >= 2.7 have_const("RUBY_TYPED_EMBEDDABLE", "ruby.h") # RUBY_VERSION >= 3.3 create_makefile 'strscan' diff --git a/ext/strscan/lib/strscan/strscan.rb b/ext/strscan/lib/strscan/strscan.rb index 07ed102d9a8cfe..5e262f4007b497 100644 --- a/ext/strscan/lib/strscan/strscan.rb +++ b/ext/strscan/lib/strscan/strscan.rb @@ -1,6 +1,12 @@ # frozen_string_literal: true class StringScanner + unless method_defined?(:integer_at) # For JRuby + def integer_at(specifier, *to_i_args) + self[specifier]&.to_i(*to_i_args) + end + end + # :markup: markdown # # call-seq: diff --git a/ext/strscan/strscan.c b/ext/strscan/strscan.c index d35df7e43b1a5f..dede57218bd173 100644 --- a/ext/strscan/strscan.c +++ b/ext/strscan/strscan.c @@ -1689,6 +1689,38 @@ name_to_backref_number(struct re_registers *regs, VALUE regexp, const char* name rb_long2int(name_end - name), name); } +/* + * Resolve capture group index from Integer, Symbol, or String. + * Returns the resolved register index, or -1 if unmatched/out of range. + * For Symbol/String specifiers, raises IndexError if the named group + * does not exist. + */ +static long +resolve_capture_index(struct strscanner *p, VALUE specifier) +{ + const char *name; + long i; + if (! MATCHED_P(p)) return -1; + switch (TYPE(specifier)) { + case T_SYMBOL: + specifier = rb_sym2str(specifier); + /* fall through */ + case T_STRING: + RSTRING_GETMEM(specifier, name, i); + i = name_to_backref_number(&(p->regs), p->regex, name, name + i, + rb_enc_get(specifier)); + break; + default: + i = NUM2LONG(specifier); + } + if (i < 0) + i += p->regs.num_regs; + if (i < 0) return -1; + if (i >= p->regs.num_regs) return -1; + if (p->regs.beg[i] == -1) return -1; + return i; +} + /* * * :markup: markdown @@ -1763,36 +1795,93 @@ name_to_backref_number(struct re_registers *regs, VALUE regexp, const char* name static VALUE strscan_aref(VALUE self, VALUE idx) { - const char *name; struct strscanner *p; long i; GET_SCANNER(self, p); - if (! MATCHED_P(p)) return Qnil; - - switch (TYPE(idx)) { - case T_SYMBOL: - idx = rb_sym2str(idx); - /* fall through */ - case T_STRING: - RSTRING_GETMEM(idx, name, i); - i = name_to_backref_number(&(p->regs), p->regex, name, name + i, rb_enc_get(idx)); - break; - default: - i = NUM2LONG(idx); - } - - if (i < 0) - i += p->regs.num_regs; - if (i < 0) return Qnil; - if (i >= p->regs.num_regs) return Qnil; - if (p->regs.beg[i] == -1) return Qnil; + i = resolve_capture_index(p, idx); + if (i < 0) return Qnil; return extract_range(p, adjust_register_position(p, p->regs.beg[i]), adjust_register_position(p, p->regs.end[i])); } +/* + * :markup: markdown + * + * call-seq: + * integer_at(specifier, base=10) -> integer or nil + * + * Returns the captured substring at the given `specifier` as an Integer, + * following the behavior of `String#to_i(base)`. + * + * `specifier` can be an Integer (positive, negative, or zero), a Symbol, + * or a String for named capture groups. + * + * Returns `nil` if: + * - No match has been performed or the last match failed + * - The `specifier` is an Integer and is out of range + * - The group at `specifier` did not participate in the match + * + * Raises IndexError if `specifier` is a Symbol or String that does not + * correspond to a named capture group, consistent with + * `StringScanner#[]`. + * + * This is semantically equivalent to `self[specifier]&.to_i(base)` + * but avoids the allocation of a temporary String when possible. + * + * ```rb + * scanner = StringScanner.new("2024-06-15") + * scanner.scan(/(\d{4})-(\d{2})-(\d{2})/) + * scanner.integer_at(1) # => 2024 + * scanner.integer_at(1, 16) # => 8228 + * ``` + */ +static VALUE +strscan_integer_at(int argc, VALUE *argv, VALUE self) +{ + struct strscanner *p; + long i; + long beg, end, len; + const char *ptr; + VALUE rb_specifier; + VALUE rb_base; + int base = 10; + + GET_SCANNER(self, p); + rb_scan_args(argc, argv, "11", &rb_specifier, &rb_base); + if (argc > 1) + base = NUM2INT(rb_base); + i = resolve_capture_index(p, rb_specifier); + if (i < 0) + return Qnil; + + beg = adjust_register_position(p, p->regs.beg[i]); + end = adjust_register_position(p, p->regs.end[i]); + len = end - beg; + ptr = S_PBEG(p) + beg; +#ifdef HAVE_RB_INT_PARSE_CSTR + { + /* + * Ruby 2.5 or later export the rb_int_parse_cstr() symbol but + * prototype definition isn't provided. Ruby 4.1 or later + * provide prototype definition. + */ +# ifndef RB_INT_PARSE_DEFAULT + VALUE rb_int_parse_cstr(const char *str, ssize_t len, char **endp, + size_t *ndigits, int base, int flags); +# define RB_INT_PARSE_DEFAULT 0x07 +# endif + char *endp; + return rb_int_parse_cstr(ptr, len, &endp, NULL, base, + RB_INT_PARSE_DEFAULT); + } +#else + return rb_str_to_inum(rb_str_new(ptr, len), base, 0); +#endif +} + /* * :markup: markdown * :include: strscan/link_refs.txt @@ -2353,6 +2442,7 @@ Init_strscan(void) rb_define_method(StringScanner, "matched", strscan_matched, 0); rb_define_method(StringScanner, "matched_size", strscan_matched_size, 0); rb_define_method(StringScanner, "[]", strscan_aref, 1); + rb_define_method(StringScanner, "integer_at", strscan_integer_at, -1); rb_define_method(StringScanner, "pre_match", strscan_pre_match, 0); rb_define_method(StringScanner, "post_match", strscan_post_match, 0); rb_define_method(StringScanner, "size", strscan_size, 0); diff --git a/gc.c b/gc.c index cde1b44d05b115..f0ec0f79efe692 100644 --- a/gc.c +++ b/gc.c @@ -3650,14 +3650,11 @@ rb_gc_copy_attributes(VALUE dest, VALUE obj) rb_gc_impl_copy_attributes(rb_gc_get_objspace(), dest, obj); } +#if USE_MODULAR_GC int rb_gc_modular_gc_loaded_p(void) { -#if USE_MODULAR_GC return rb_gc_functions.modular_gc_loaded_p; -#else - return false; -#endif } const char * @@ -3673,6 +3670,7 @@ rb_gc_active_gc_name(void) return gc_name; } +#endif struct rb_gc_object_metadata_entry * rb_gc_object_metadata(VALUE obj) diff --git a/internal/gc.h b/internal/gc.h index 41675810c722c4..ee2a0c28050a8a 100644 --- a/internal/gc.h +++ b/internal/gc.h @@ -257,8 +257,10 @@ void rb_gc_update_values(long n, VALUE *values); void rb_gc_mark_set_no_pin(st_table *); void rb_gc_update_set_refs(st_table *); +#if USE_MODULAR_GC const char *rb_gc_active_gc_name(void); int rb_gc_modular_gc_loaded_p(void); +#endif RUBY_SYMBOL_EXPORT_END diff --git a/spec/bundler/support/rubygems_ext.rb b/spec/bundler/support/rubygems_ext.rb index cf639a660a04fd..8e3d84212d31fd 100644 --- a/spec/bundler/support/rubygems_ext.rb +++ b/spec/bundler/support/rubygems_ext.rb @@ -73,6 +73,46 @@ def install_test_deps require_relative "helpers" Helpers.install_dev_bundler + + install_vendored_compact_index + end + + # Vendor `rubygems/rubygems.org#lib/compact_index/` under `tmp/compact_index/` + # so the artifice can serve compact-index responses without a runtime gem + # dependency. Pinned to a reviewed commit; override with COMPACT_INDEX_REF + # to refresh against another ref (the existing vendor copy is discarded). + def install_vendored_compact_index + target_root = Path.tmp_root.join("compact_index") + require "fileutils" + FileUtils.mkdir_p(Path.tmp_root) + + files = %w[ + lib/compact_index.rb + lib/compact_index/dependency.rb + lib/compact_index/gem.rb + lib/compact_index/gem_version.rb + lib/compact_index/versions_file.rb + ] + + # Serialize installs so parallel test setups don't race on the same + # vendor tree, and only skip the download when every file is present so + # an interrupted run can't leave a partial copy behind. + File.open(Path.tmp_root.join("compact_index.lock"), File::CREAT | File::RDWR) do |lock| + lock.flock(File::LOCK_EX) + + FileUtils.rm_rf(target_root) if ENV["COMPACT_INDEX_REF"] + + next if files.all? {|path| File.exist?(target_root.join(path)) } + + require "open-uri" + ref = ENV["COMPACT_INDEX_REF"] || "7c68a7b39761c61a66f9299f85b889ec39afc02c" + files.each do |path| + url = "https://raw.githubusercontent.com/rubygems/rubygems.org/#{ref}/#{path}" + target = target_root.join(path) + FileUtils.mkdir_p(File.dirname(target)) + File.write(target, URI.parse(url).open(&:read)) + end + end end def check_source_control_changes(success_message:, error_message:) diff --git a/test/strscan/test_stringscanner.rb b/test/strscan/test_stringscanner.rb index 3b6223709cf6f7..96a1badb1f1087 100644 --- a/test/strscan/test_stringscanner.rb +++ b/test/strscan/test_stringscanner.rb @@ -525,6 +525,59 @@ def test_AREF end end + def assert_integer_at(s, specifier, *to_i_args) + assert_equal(s[specifier]&.to_i(*to_i_args), + s.integer_at(specifier, *to_i_args)) + end + + def test_integer_at + s = create_string_scanner("before 20260514 after") + s.skip_until(" ") + assert_equal("20260514", s.scan(/(\d{4})(\d{2})(\d{2})/)) + assert_integer_at(s, 0) # 20260514 + assert_integer_at(s, 1) # 2026 + assert_integer_at(s, 2) # 5 + assert_integer_at(s, 3) # 14 + assert_integer_at(s, 4) # nil + assert_integer_at(s, -1) # 14 + assert_integer_at(s, -2) # 5 + assert_integer_at(s, -3) # 2026 + assert_integer_at(s, -4) # 20260514 + assert_integer_at(s, -5) # nil + end + + def test_integer_at_name_string + s = create_string_scanner("before 20260514 after") + s.skip_until(" ") + assert_equal("20260514", s.scan(/(?\d{4})(?\d{2})(?\d{2})/)) + assert_integer_at(s, "y") + assert_integer_at(s, "m") + assert_integer_at(s, "d") + end + + def test_integer_at_name_symbol + s = create_string_scanner("before 20260514 after") + s.skip_until(" ") + assert_equal("20260514", s.scan(/(?\d{4})(?\d{2})(?\d{2})/)) + assert_integer_at(s, :y) + assert_integer_at(s, :m) + assert_integer_at(s, :d) + end + + def test_integer_at_base + s = create_string_scanner("before 111 after") + s.skip_until(" ") + assert_equal("111", s.scan(/\d+/)) + assert_integer_at(s, 0, 2) + end + + def test_integer_at_base_auto + s = create_string_scanner("before 0xa_f after") + s.skip_until(" ") + assert_equal("0xa_f", s.scan(/0x[\h_]+/)) + assert_integer_at(s, 0, 0) # 0xaf + end + def test_pre_match s = create_string_scanner('a b c d e') s.scan(/\w/)