Object
Escapes any characters that would have special meaning in a regular expression. Returns a new escaped string, or self if no characters are escaped. For any string, Regexp.escape(str)=~str will be true.
ORegexp.escape('\\*?{}.') #=> \\\\\*\?\{\}\.
# File lib/oniguruma.rb, line 100 def escape( *args ) Regexp.escape( *args ) end
The first form returns the MatchData object generated by the last successful pattern match. The second form returns the nth field in this MatchData object.
ORegexp.new( 'c(.)t' ) =~ 'cat' #=> 0 ORegexp.last_match #=> #<MatchData:0x401b3d30> ORegexp.last_match(0) #=> "cat" ORegexp.last_match(1) #=> "a" ORegexp.last_match(2) #=> nil
# File lib/oniguruma.rb, line 121 def last_match( index = nil) if index @@last_match[index] else @@last_match end end
Constructs a new regular expression from pattern, which is a String. The second parameter may be a Hash of the form:
{ :options => option_value, :encoding => encoding_value, :syntax => syntax_value }
Where option_value is a bitwise OR of Oniguruma::OPTION_XXX constants; encoding_value is one of Oniguruma::ENCODING_XXX constants; and syntax_value is one of Oniguruma::SYNTAX_XXX constants.
r1 = ORegexp.new('^a-z+:\\s+\w+') #=> /^a-z+:\s+\w+/ r2 = ORegexp.new('cat', :options => OPTION_IGNORECASE ) #=> /cat/i r3 = ORegexp.new('dog', :options => OPTION_EXTEND ) #=> /dog/x #Accept java syntax on SJIS encoding: r4 = ORegexp.new('ape', :syntax => SYNTAX_JAVA, :encoding => ENCODING_SJIS) #=> /ape/
Second form uses string shortcuts to set options and encoding:
r = ORegexp.new('cat', 'i', 'utf8', 'java')
# File lib/oniguruma.rb, line 160 def initialize( pattern, *args ) defaults = { :options => OPTION_DEFAULT, :encoding => ENCODING_ASCII, :syntax => SYNTAX_DEFAULT} if args[0].is_a?(String) options = {} option_str, encoding_str, syntax_str = *args opt = 0 option_str.each_byte {|x| opt |= (OPTIONS_SHORTCUTS[x.chr] || 0) } options[:options] = opt if encoding_str && Oniguruma::const_defined?("ENCODING_#{encoding_str.upcase}") options[:encoding] = Oniguruma::const_get("ENCODING_#{encoding_str.upcase}") end if syntax_str && Oniguruma::const_defined?("SYNTAX_#{syntax_str.upcase}") options[:syntax] = Oniguruma::const_get("SYNTAX_#{syntax_str.upcase}") end else options = args[0] || {} end old_initialize( pattern, defaults.merge( options ).freeze ) end
static VALUE oregexp_initialize( VALUE self, VALUE pattern, VALUE options ) { ORegexp *oregexp; Data_Get_Struct( self, ORegexp, oregexp ); VALUE pattern_str = StringValue( pattern ); rb_iv_set( self, "@pattern", pattern_str ); rb_iv_set( self, "@options", options ); UChar* pat_ptr = RSTRING_PTR(pattern_str); int pat_len = RSTRING_LEN(pattern_str); VALUE rOptions = rb_hash_aref( options, ID2SYM( rb_intern( "options" ) ) ); VALUE rEncoding = rb_hash_aref( options, ID2SYM( rb_intern( "encoding" ) ) ); VALUE rSyntax = rb_hash_aref( options, ID2SYM( rb_intern( "syntax" ) ) ); int iOptions = NUM2INT( rOptions ); OnigEncodingType * iEncoding = int2encoding( rEncoding ); OnigSyntaxType * iSyntax = int2syntax( rSyntax ); int r; OnigErrorInfo einfo; r = onig_new(&(oregexp->reg), pat_ptr, pat_ptr + pat_len, iOptions, iEncoding, iSyntax, &einfo); if (r != ONIG_NORMAL) { char s[ONIG_MAX_ERROR_MESSAGE_LEN]; onig_error_code_to_str(s, r, &einfo); rb_raise(rb_eArgError, "Oniguruma Error: %s", s); } return self; }
Equality—Two regexps are equal if their patterns are identical, they have the same character set code, and their #casefold? values are the same.
# File lib/oniguruma.rb, line 188 def == regexp @pattern == regexp.source && kcode == regexp.kcode && casefold? == regexp.casefold? end
Case Equality—Synonym for ORegexp#=~ used in case statements.
a = "HELLO" case a when ORegexp.new('^[a-z]*$'); print "Lower case\n" when ORegexp.new('^[A-Z]*$'); print "Upper case\n" else; print "Mixed case\n" end
produces:
Upper case
static VALUE oregexp_m_eqq(VALUE self, VALUE str) { VALUE match; if (TYPE(str) != T_STRING) { str = rb_check_string_type(str); if (NIL_P(str)) { return Qfalse; } } StringValue(str); VALUE args[] = {str}; match = oregexp_match(1, args, self); if (Qnil == match) { return Qfalse; } return Qtrue; }
Matches rxp against string, returning the offset of the start of the match or nil if the match failed. Sets $~ to the corresponding MatchData or nil.
ORegexp.new( 'SIT' ) =~ "insensitive" #=> nil ORegexp.new( 'SIT', :options => OPTION_IGNORECASE ) =~ "insensitive" #=> 5
static VALUE oregexp_match_op(VALUE self, VALUE str) { VALUE args[] = {str}; VALUE ret = oregexp_match(1, args, self); if(ret == Qnil) return Qnil; return INT2FIX(RMATCH(ret)->regs->beg[0]); }
Returns the value of the case-insensitive flag.
# File lib/oniguruma.rb, line 198 def casefold? (@options[:options] & OPTION_IGNORECASE) > 0 end
Returns a copy of str with all occurrences of rxp pattern replaced with either replacement or the value of the block.
If a string is used as the replacement, the sequences 1, 2, and so on may be used to interpolate successive groups in the match.
In the block form, the current MatchData object is passed in as a parameter. The value returned by the block will be substituted for the match on each call.
static VALUE oregexp_m_gsub(int argc, VALUE *argv, VALUE self) { return oregexp_safe_gsub(self, argc, argv, 0, 0); }
Performs the substitutions of ORegexp#gsub in place, returning str, or nil if no substitutions were performed.
static VALUE oregexp_m_gsub_bang(int argc, VALUE *argv, VALUE self) { return oregexp_safe_gsub(self, argc, argv, 1, 0); }
Returns a readable version of rxp
ORegexp.new( 'cat', :options => OPTION_MULTILINE | OPTION_IGNORECASE ).inspect => /cat/im ORegexp.new( 'cat', :options => OPTION_MULTILINE | OPTION_IGNORECASE ).to_s => (?im-x)cat
# File lib/oniguruma.rb, line 271 def inspect opt_str = "" opt_str += "i" if (@options[:options] & OPTION_IGNORECASE) > 0 opt_str += "m" if (@options[:options] & OPTION_MULTILINE) > 0 opt_str += "x" if (@options[:options] & OPTION_EXTEND) > 0 "/" + @pattern + "/" + opt_str end
Returns the character set code for the regexp.
# File lib/oniguruma.rb, line 206 def kcode @options[:encoding] end
Returns a MatchData object describing the match, or nil if there was no match. This is equivalent to retrieving the value of the special variable $~ following a normal match.
ORegexp.new('(.)(.)(.)').match("abc")[2] #=> "b"
The second form allows to perform the match in a region defined by begin and end while still taking into account look-behinds and look-forwards.
ORegexp.new('1*2*').match('11221122').offset => [4,8] ORegexp.new('(?<=2)1*2*').match('11221122').offset => [4,8]
Compare with:
ORegexp.new('(?<=2)1*2*').match('11221122'[4..-1]) => nil
static VALUE oregexp_match( int argc, VALUE * argv, VALUE self ) { ORegexp *oregexp; Data_Get_Struct( self, ORegexp, oregexp ); if ( argc == 0 || argc > 2) { rb_raise(rb_eArgError, "wrong number of arguments (%d for 2)", argc); exit; } VALUE string_str = StringValue( argv[0] ); UChar* str_ptr = RSTRING_PTR(string_str); int str_len = RSTRING_LEN(string_str); int begin = 0; int end = str_len; if (argc > 1 ) { begin = NUM2INT( argv[1] ); } // if (argc > 2) { // end = NUM2INT( argv[2] ); // } OnigRegion *region = onig_region_new(); int r = onig_search(oregexp->reg, str_ptr, str_ptr + str_len, str_ptr + begin, str_ptr + end, region, ONIG_OPTION_NONE); rb_backref_set(Qnil); if (r >= 0) { VALUE matchData = oregexp_make_match_data( oregexp, region, string_str); onig_region_free(region, 1 ); rb_backref_set(matchData); rb_match_busy(matchData); return matchData; } else if (r == ONIG_MISMATCH) { onig_region_free(region, 1 ); return Qnil; } else { onig_region_free(region, 1 ); char s[ONIG_MAX_ERROR_MESSAGE_LEN]; onig_error_code_to_str(s, r); rb_raise(rb_eArgError, "Oniguruma Error: %s", s); } }
Returns the set of bits corresponding to the options used when creating this ORegexp (see ORegexp::new for details. Note that additional bits may be set in the returned options: these are used internally by the regular expression code. These extra bits are ignored if the options are passed to ORegexp::new.
Oniguruma::OPTION_IGNORECASE #=> 1 Oniguruma::OPTION_EXTEND #=> 2 Oniguruma::OPTION_MULTILINE #=> 4 Regexp.new(r.source, :options => Oniguruma::OPTION_EXTEND ) #=> 2
# File lib/oniguruma.rb, line 225 def options @options[:options] end
Both forms iterate through str, matching the pattern. For each match, a MatchData object is generated and passed to the block, and added to the resulting array of MatchData objects.
If str does not match pattern, nil is returned.
static VALUE oregexp_m_scan(VALUE self, VALUE str) { OnigRegion * region = onig_region_new(); struct scan_packet call_args = {self, str, region}; return rb_ensure( oregexp_packed_scan, (VALUE)&call_args, oregexp_cleanup_region, (VALUE)region); }
Returns the original string of the pattern.
ORegex.new( 'ab+c', 'ix' ).source #=> "ab+c"
# File lib/oniguruma.rb, line 285 def source @pattern.freeze end
Returns a copy of str with the first occurrence of rxp pattern replaced with either replacement or the value of the block.
If a string is used as the replacement, the sequences 1, 2, and so on may be used to interpolate successive groups in the match.
In the block form, the current MatchData object is passed in as a parameter. The value returned by the block will be substituted for the match on each call.
static VALUE oregexp_m_sub(int argc, VALUE *argv, VALUE self) { return oregexp_safe_gsub(self, argc, argv, 0, 1); }
Performs the substitutions of ORegexp#sub in place, returning str, or nil if no substitutions were performed.
static VALUE oregexp_m_sub_bang(int argc, VALUE *argv, VALUE self) { return oregexp_safe_gsub(self, argc, argv, 1, 1); }
Returns a string containing the regular expression and its options (using the (?xxx:yyy) notation. This string can be fed back in to Regexp::new to a regular expression with the same semantics as the original. (However, Regexp#== may not return true when comparing the two, as the source of the regular expression itself may differ, as the example shows). Regexp#inspect produces a generally more readable version of rxp.
r1 = ORegexp.new( 'ab+c', :options OPTION_IGNORECASE | OPTION_EXTEND ) #=> /ab+c/ix s1 = r1.to_s #=> "(?ix-m:ab+c)" r2 = ORegexp.new(s1) #=> /(?ix-m:ab+c)/ r1 == r2 #=> false r1.source #=> "ab+c" r2.source #=> "(?ix-m:ab+c)"
# File lib/oniguruma.rb, line 247 def to_s opt_str = "(?" opt_str += "i" if (@options[:options] & OPTION_IGNORECASE) > 0 opt_str += "m" if (@options[:options] & OPTION_MULTILINE) > 0 opt_str += "x" if (@options[:options] & OPTION_EXTEND) > 0 unless opt_str == "(?imx" opt_str += "-" opt_str += "i" if (@options[:options] & OPTION_IGNORECASE) == 0 opt_str += "m" if (@options[:options] & OPTION_MULTILINE) == 0 opt_str += "x" if (@options[:options] & OPTION_EXTEND) == 0 end opt_str += ")" opt_str + @pattern end
Generated with the Darkfish Rdoc Generator 2.