Parent

Files

Class/Module Index [+]

Quicksearch

Oniguruma::ORegexp

Public Class Methods

escape(str) => a_str click to toggle source
quote(str) => a_str

Escapes any characters that would have special meaning in a regular expression. Returns a new escaped string, or self if no characters are escaped. For any string, Regexp.escape(str)=~str will be true.

ORegexp.escape('\\*?{}.')   #=> \\\\\*\?\{\}\.
# File lib/oniguruma.rb, line 100
def escape( *args )
   Regexp.escape( *args )
end
last_match => matchdata click to toggle source
last_match(fixnum) => str

The first form returns the MatchData object generated by the last successful pattern match. The second form returns the nth field in this MatchData object.

ORegexp.new( 'c(.)t' ) =~ 'cat'       #=> 0
ORegexp.last_match                    #=> #<MatchData:0x401b3d30>
ORegexp.last_match(0)                 #=> "cat"
ORegexp.last_match(1)                 #=> "a"
ORegexp.last_match(2)                 #=> nil
# File lib/oniguruma.rb, line 121
def last_match( index = nil)
   if index
      @@last_match[index]
   else
      @@last_match
   end
end
new( pattern, options_hash ) click to toggle source
new( pattern, option_str, encoding_str=nil, syntax_str=nil)

Constructs a new regular expression from pattern, which is a String. The second parameter may be a Hash of the form:

{ :options => option_value, :encoding => encoding_value, :syntax => syntax_value }

Where option_value is a bitwise OR of Oniguruma::OPTION_XXX constants; encoding_value is one of Oniguruma::ENCODING_XXX constants; and syntax_value is one of Oniguruma::SYNTAX_XXX constants.

r1 = ORegexp.new('^a-z+:\\s+\w+')                                            #=> /^a-z+:\s+\w+/
r2 = ORegexp.new('cat', :options => OPTION_IGNORECASE )                      #=> /cat/i
r3 = ORegexp.new('dog', :options => OPTION_EXTEND )                          #=> /dog/x

#Accept java syntax on SJIS encoding:
r4 = ORegexp.new('ape', :syntax  => SYNTAX_JAVA, :encoding => ENCODING_SJIS) #=> /ape/

Second form uses string shortcuts to set options and encoding:

r = ORegexp.new('cat', 'i', 'utf8', 'java')
# File lib/oniguruma.rb, line 160
def initialize( pattern, *args )
   defaults = { :options => OPTION_DEFAULT, :encoding => ENCODING_ASCII, :syntax => SYNTAX_DEFAULT}
   if args[0].is_a?(String)
     options = {}
     option_str, encoding_str, syntax_str = *args
     opt = 0
     option_str.each_byte {|x| opt |= (OPTIONS_SHORTCUTS[x.chr] || 0) }
     options[:options] = opt
     if encoding_str && Oniguruma::const_defined?("ENCODING_#{encoding_str.upcase}")
       options[:encoding] = Oniguruma::const_get("ENCODING_#{encoding_str.upcase}")
     end
     if syntax_str && Oniguruma::const_defined?("SYNTAX_#{syntax_str.upcase}")
       options[:syntax] = Oniguruma::const_get("SYNTAX_#{syntax_str.upcase}")
     end
   else
     options = args[0] || {}
   end
   old_initialize( pattern,  defaults.merge( options ).freeze )
end
new(p1, p2) click to toggle source
static VALUE oregexp_initialize( VALUE self, VALUE pattern, VALUE options ) { 
   ORegexp *oregexp;
   Data_Get_Struct( self, ORegexp, oregexp );

   VALUE pattern_str = StringValue( pattern );
   rb_iv_set( self, "@pattern", pattern_str );
   rb_iv_set( self, "@options", options );
   UChar* pat_ptr = RSTRING_PTR(pattern_str);
   int pat_len = RSTRING_LEN(pattern_str);
   VALUE rOptions = rb_hash_aref( options, ID2SYM( rb_intern( "options" ) ) );
   VALUE rEncoding = rb_hash_aref( options, ID2SYM( rb_intern( "encoding" ) ) );
   VALUE rSyntax = rb_hash_aref( options, ID2SYM( rb_intern( "syntax" ) ) );
   int iOptions = NUM2INT( rOptions );
   OnigEncodingType * iEncoding = int2encoding( rEncoding );
   OnigSyntaxType * iSyntax = int2syntax( rSyntax );


   int r;
   OnigErrorInfo einfo;
   r = onig_new(&(oregexp->reg), pat_ptr, pat_ptr + pat_len, iOptions, iEncoding, iSyntax, &einfo);
   if (r != ONIG_NORMAL) {
      char s[ONIG_MAX_ERROR_MESSAGE_LEN];
      onig_error_code_to_str(s, r, &einfo);
      rb_raise(rb_eArgError, "Oniguruma Error: %s", s);
   }
   return self;
}

Public Instance Methods

rxp == other_rxp => true or false click to toggle source
eql?(other_rxp) => true or false

Equality—Two regexps are equal if their patterns are identical, they have the same character set code, and their #casefold? values are the same.

# File lib/oniguruma.rb, line 188
def == regexp
   @pattern == regexp.source && kcode == regexp.kcode && casefold? == regexp.casefold?
end
Also aliased as: eql?
rxp === str => true or false click to toggle source

Case Equality—Synonym for ORegexp#=~ used in case statements.

a = "HELLO"
case a
when ORegexp.new('^[a-z]*$'); print "Lower case\n"
when ORegexp.new('^[A-Z]*$'); print "Upper case\n"
else;                         print "Mixed case\n"
end

produces:

Upper case
static VALUE oregexp_m_eqq(VALUE self, VALUE str) {
    VALUE match;

    if (TYPE(str) != T_STRING) {
        str = rb_check_string_type(str);
        if (NIL_P(str)) {
            return Qfalse;
        }
    }
    StringValue(str);
    VALUE args[] = {str};
    match = oregexp_match(1, args, self);
    if (Qnil == match) {
        return Qfalse;
    }
    return Qtrue;
}
rxp =~ string => int or nil click to toggle source
*

Matches rxp against string, returning the offset of the start of the match or nil if the match failed. Sets $~ to the corresponding MatchData or nil.

ORegexp.new( 'SIT' ) =~ "insensitive"                                 #=>    nil
ORegexp.new( 'SIT', :options => OPTION_IGNORECASE ) =~ "insensitive"  #=>    5
static VALUE oregexp_match_op(VALUE self, VALUE str) {
   VALUE args[] = {str};
   VALUE ret = oregexp_match(1, args, self);
   if(ret == Qnil)
      return Qnil;
   return INT2FIX(RMATCH(ret)->regs->beg[0]);
}
casefold? => true of false click to toggle source

Returns the value of the case-insensitive flag.

# File lib/oniguruma.rb, line 198
def casefold?
   (@options[:options] & OPTION_IGNORECASE) > 0
end
eql?(regexp) click to toggle source
Alias for: ==
gsub(str, replacement) click to toggle source
gsub(str) {|match_data| ... }

Returns a copy of str with all occurrences of rxp pattern replaced with either replacement or the value of the block.

If a string is used as the replacement, the sequences 1, 2, and so on may be used to interpolate successive groups in the match.

In the block form, the current MatchData object is passed in as a parameter. The value returned by the block will be substituted for the match on each call.

static VALUE oregexp_m_gsub(int argc, VALUE *argv, VALUE self) {
        return oregexp_safe_gsub(self, argc, argv, 0, 0);
}
gsub!(str, replacement) click to toggle source
gsub!(str) {|match_data| ... }

Performs the substitutions of ORegexp#gsub in place, returning str, or nil if no substitutions were performed.

static VALUE oregexp_m_gsub_bang(int argc, VALUE *argv, VALUE self) {
        return oregexp_safe_gsub(self, argc, argv, 1, 0);
}
inspect => string click to toggle source

Returns a readable version of rxp

ORegexp.new( 'cat', :options => OPTION_MULTILINE | OPTION_IGNORECASE ).inspect  => /cat/im
ORegexp.new( 'cat', :options => OPTION_MULTILINE | OPTION_IGNORECASE ).to_s     => (?im-x)cat
# File lib/oniguruma.rb, line 271
def inspect
   opt_str = ""
   opt_str += "i" if (@options[:options] & OPTION_IGNORECASE) > 0
   opt_str += "m" if (@options[:options] & OPTION_MULTILINE) > 0
   opt_str += "x" if (@options[:options] & OPTION_EXTEND) > 0
   "/" + @pattern + "/" + opt_str
end
kode => int click to toggle source

Returns the character set code for the regexp.

# File lib/oniguruma.rb, line 206
def kcode
   @options[:encoding]
end
match(str) => matchdata or nil click to toggle source
match(str, begin, end) => matchdata or nil

Returns a MatchData object describing the match, or nil if there was no match. This is equivalent to retrieving the value of the special variable $~ following a normal match.

ORegexp.new('(.)(.)(.)').match("abc")[2]   #=> "b"

The second form allows to perform the match in a region defined by begin and end while still taking into account look-behinds and look-forwards.

ORegexp.new('1*2*').match('11221122').offset       => [4,8]
ORegexp.new('(?<=2)1*2*').match('11221122').offset => [4,8]

Compare with:

ORegexp.new('(?<=2)1*2*').match('11221122'[4..-1]) => nil
static VALUE oregexp_match( int argc, VALUE * argv, VALUE self ) {
   ORegexp *oregexp;
   Data_Get_Struct( self, ORegexp, oregexp );
   
   
   if ( argc == 0 || argc > 2) {
      rb_raise(rb_eArgError, "wrong number of arguments (%d for 2)", argc);
      exit;
   }
   
   VALUE string_str = StringValue( argv[0] );
   UChar* str_ptr = RSTRING_PTR(string_str);
   int str_len = RSTRING_LEN(string_str);
   
   int begin = 0;
   int end = str_len;
   
   if (argc > 1 ) {
      begin = NUM2INT( argv[1] );
   }
//    if (argc > 2) {
//       end = NUM2INT( argv[2] );
//    }


   OnigRegion *region = onig_region_new();
   int r = onig_search(oregexp->reg, str_ptr, str_ptr + str_len, str_ptr + begin, str_ptr + end, region, ONIG_OPTION_NONE);
   rb_backref_set(Qnil);
   if (r >= 0) {
      VALUE matchData = oregexp_make_match_data( oregexp, region, string_str);
      onig_region_free(region, 1 );
      rb_backref_set(matchData);
      rb_match_busy(matchData);
      return matchData;
   } else if (r == ONIG_MISMATCH) {
      onig_region_free(region, 1 );
      return Qnil;
   } else {
      onig_region_free(region, 1 );
      char s[ONIG_MAX_ERROR_MESSAGE_LEN];
      onig_error_code_to_str(s, r);
      rb_raise(rb_eArgError, "Oniguruma Error: %s", s);
   }

}
match_all(p1) click to toggle source
Alias for: scan
options => fixnum click to toggle source

Returns the set of bits corresponding to the options used when creating this ORegexp (see ORegexp::new for details. Note that additional bits may be set in the returned options: these are used internally by the regular expression code. These extra bits are ignored if the options are passed to ORegexp::new.

Oniguruma::OPTION_IGNORECASE                                 #=> 1
Oniguruma::OPTION_EXTEND                                     #=> 2
Oniguruma::OPTION_MULTILINE                                  #=> 4

Regexp.new(r.source, :options => Oniguruma::OPTION_EXTEND )  #=> 2
# File lib/oniguruma.rb, line 225
def options
   @options[:options]
end
scan(str) # => [matchdata1, matchdata2,...] or nil click to toggle source
scan(str) {|match_data| ... } # => [matchdata1, matchdata2,...] or nil

Both forms iterate through str, matching the pattern. For each match, a MatchData object is generated and passed to the block, and added to the resulting array of MatchData objects.

If str does not match pattern, nil is returned.

static VALUE oregexp_m_scan(VALUE self, VALUE str) {
    OnigRegion * region = onig_region_new();
    struct scan_packet call_args = {self, str, region};
    return rb_ensure( oregexp_packed_scan, (VALUE)&call_args, oregexp_cleanup_region, (VALUE)region);
}
Also aliased as: match_all
source => str click to toggle source

Returns the original string of the pattern.

ORegex.new( 'ab+c', 'ix' ).source   #=> "ab+c"
# File lib/oniguruma.rb, line 285
def source
   @pattern.freeze
end
sub(str, replacement) click to toggle source
sub(str) {|match_data| ... }

Returns a copy of str with the first occurrence of rxp pattern replaced with either replacement or the value of the block.

If a string is used as the replacement, the sequences 1, 2, and so on may be used to interpolate successive groups in the match.

In the block form, the current MatchData object is passed in as a parameter. The value returned by the block will be substituted for the match on each call.

static VALUE oregexp_m_sub(int argc, VALUE *argv, VALUE self) {
        return oregexp_safe_gsub(self, argc, argv, 0, 1);
}
sub!(str, replacement) click to toggle source
sub!(str) {|match_data| ... }

Performs the substitutions of ORegexp#sub in place, returning str, or nil if no substitutions were performed.

static VALUE oregexp_m_sub_bang(int argc, VALUE *argv, VALUE self) {
        return oregexp_safe_gsub(self, argc, argv, 1, 1);
}
to_s => str click to toggle source

Returns a string containing the regular expression and its options (using the (?xxx:yyy) notation. This string can be fed back in to Regexp::new to a regular expression with the same semantics as the original. (However, Regexp#== may not return true when comparing the two, as the source of the regular expression itself may differ, as the example shows). Regexp#inspect produces a generally more readable version of rxp.

r1 = ORegexp.new( 'ab+c', :options OPTION_IGNORECASE | OPTION_EXTEND ) #=> /ab+c/ix
s1 = r1.to_s                                                           #=> "(?ix-m:ab+c)"
r2 = ORegexp.new(s1)                                                   #=> /(?ix-m:ab+c)/
r1 == r2                                                               #=> false
r1.source                                                              #=> "ab+c"
r2.source                                                              #=> "(?ix-m:ab+c)"
# File lib/oniguruma.rb, line 247
def to_s
   opt_str = "(?"
   opt_str += "i" if (@options[:options] & OPTION_IGNORECASE) > 0
   opt_str += "m" if (@options[:options] & OPTION_MULTILINE) > 0
   opt_str += "x" if (@options[:options] & OPTION_EXTEND) > 0
   unless opt_str == "(?imx"
      opt_str += "-"
      opt_str += "i" if (@options[:options] & OPTION_IGNORECASE) == 0
      opt_str += "m" if (@options[:options] & OPTION_MULTILINE) == 0
      opt_str += "x" if (@options[:options] & OPTION_EXTEND) == 0
   end
   opt_str += ")"
   opt_str + @pattern
end

[Validate]

Generated with the Darkfish Rdoc Generator 2.