class URI::RFC2396_Parser
Class
that parses String’s into URI’s.
It contains a Hash
set of patterns and Regexp’s that match and validate.
Attributes
The Hash
of patterns.
See also URI::Parser.initialize_pattern.
Public Class Methods
Synopsis¶ ↑
URI::Parser.new([opts])
Args¶ ↑
The constructor accepts a hash as options for parser. Keys of options are pattern names of URI
components and values of options are pattern strings. The constructor generates set of regexps for parsing URIs.
You can use the following keys:
* :ESCAPED (URI::PATTERN::ESCAPED in default) * :UNRESERVED (URI::PATTERN::UNRESERVED in default) * :DOMLABEL (URI::PATTERN::DOMLABEL in default) * :TOPLABEL (URI::PATTERN::TOPLABEL in default) * :HOSTNAME (URI::PATTERN::HOSTNAME in default)
Examples¶ ↑
p = URI::Parser.new(:ESCAPED => "(?:%[a-fA-F0-9]{2}|%u[a-fA-F0-9]{4})") u = p.parse("http://example.jp/%uABCD") #=> #<URI::HTTP http://example.jp/%uABCD> URI.parse(u.to_s) #=> raises URI::InvalidURIError s = "http://example.com/ABCD" u1 = p.parse(s) #=> #<URI::HTTP http://example.com/ABCD> u2 = URI.parse(s) #=> #<URI::HTTP http://example.com/ABCD> u1 == u2 #=> true u1.eql?(u2) #=> false
# File lib/uri/rfc2396_parser.rb, line 99 def initialize(opts = {}) @pattern = initialize_pattern(opts) @pattern.each_value(&:freeze) @pattern.freeze @regexp = initialize_regexp(@pattern) @regexp.each_value(&:freeze) @regexp.freeze end
Public Instance Methods
Args¶ ↑
Description¶ ↑
Constructs a safe String
from str
, removing unsafe characters, replacing them with codes.
# File lib/uri/rfc2396_parser.rb, line 287 def escape(str, unsafe = @regexp[:UNSAFE]) unless unsafe.kind_of?(Regexp) # perhaps unsafe is String object unsafe = Regexp.new("[#{Regexp.quote(unsafe)}]", false) end str.gsub(unsafe) do us = $& tmp = '' us.each_byte do |uc| tmp << sprintf('%%%02X', uc) end tmp end.force_encoding(Encoding::US_ASCII) end
Args¶ ↑
str
-
String
to search schemes
-
Patterns to apply to
str
Description¶ ↑
Attempts to parse and merge a set of URIs. If no block
given, then returns the result, else it calls block
for each element in result.
See also URI::Parser.make_regexp.
# File lib/uri/rfc2396_parser.rb, line 249 def extract(str, schemes = nil) if block_given? str.scan(make_regexp(schemes)) { yield $& } nil else result = [] str.scan(make_regexp(schemes)) { result.push $& } result end end
# File lib/uri/rfc2396_parser.rb, line 326 def inspect @@to_s.bind_call(self) end
Returns Regexp
that is default self.regexp[:ABS_URI_REF]
, unless schemes
is provided. Then it is a Regexp.union
with self.pattern[:X_ABS_URI]
.
# File lib/uri/rfc2396_parser.rb, line 262 def make_regexp(schemes = nil) unless schemes @regexp[:ABS_URI_REF] else /(?=#{Regexp.union(*schemes)}:)#{@pattern[:X_ABS_URI]}/x end end
Args¶ ↑
uri
Description¶ ↑
Parses uri
and constructs either matching URI
scheme object (File
, FTP
, HTTP
, HTTPS
, LDAP
, LDAPS
, or MailTo
) or URI::Generic
.
Usage¶ ↑
p = URI::Parser.new p.parse("ldap://ldap.example.com/dc=example?user=john") #=> #<URI::LDAP ldap://ldap.example.com/dc=example?user=john>
# File lib/uri/rfc2396_parser.rb, line 209 def parse(uri) URI.for(*self.split(uri), self) end
Returns a split URI
against regexp[:ABS_URI]
.
# File lib/uri/rfc2396_parser.rb, line 120 def split(uri) case uri when '' # null uri when @regexp[:ABS_URI] scheme, opaque, userinfo, host, port, registry, path, query, fragment = $~[1..-1] # URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ] # absoluteURI = scheme ":" ( hier_part | opaque_part ) # hier_part = ( net_path | abs_path ) [ "?" query ] # opaque_part = uric_no_slash *uric # abs_path = "/" path_segments # net_path = "//" authority [ abs_path ] # authority = server | reg_name # server = [ [ userinfo "@" ] hostport ] if !scheme raise InvalidURIError, "bad URI (absolute but no scheme): #{uri}" end if !opaque && (!path && (!host && !registry)) raise InvalidURIError, "bad URI (absolute but no path): #{uri}" end when @regexp[:REL_URI] scheme = nil opaque = nil userinfo, host, port, registry, rel_segment, abs_path, query, fragment = $~[1..-1] if rel_segment && abs_path path = rel_segment + abs_path elsif rel_segment path = rel_segment elsif abs_path path = abs_path end # URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ] # relativeURI = ( net_path | abs_path | rel_path ) [ "?" query ] # net_path = "//" authority [ abs_path ] # abs_path = "/" path_segments # rel_path = rel_segment [ abs_path ] # authority = server | reg_name # server = [ [ userinfo "@" ] hostport ] else raise InvalidURIError, "bad URI (is not URI?): #{uri}" end path = '' if !path && !opaque # (see RFC2396 Section 5.2) ret = [ scheme, userinfo, host, port, # X registry, # X path, # Y opaque, # Y query, fragment ] return ret end