class Psych::ScalarScanner

Scan scalars for built in types

Constants

FLOAT

Taken from yaml.org/type/float.html Base 60, [-+]inf and NaN are handled separately

INTEGER_LEGACY

Same as above, but allows commas. Not to YML spec, but kept for backwards compatibility

INTEGER_STRICT

Taken from yaml.org/type/int.html and modified to ensure at least one numerical symbol exists

TIME

Taken from yaml.org/type/timestamp.html

Attributes

Public Class Methods

Create a new scanner

# File ext/psych/lib/psych/scalar_scanner.rb, line 32
def initialize class_loader, strict_integer: false
  @symbol_cache = {}
  @class_loader = class_loader
  @strict_integer = strict_integer
end

Public Instance Methods

Parse and return an int from string

# File ext/psych/lib/psych/scalar_scanner.rb, line 110
def parse_int string
  Integer(string.delete(',_'))
end

Parse and return a Time from string

# File ext/psych/lib/psych/scalar_scanner.rb, line 116
def parse_time string
  klass = class_loader.load 'Time'

  date, time = *(string.split(/[ tT]/, 2))
  (yy, m, dd) = date.match(/^(-?\d{4})-(\d{1,2})-(\d{1,2})/).captures.map { |x| x.to_i }
  md = time.match(/(\d+:\d+:\d+)(?:\.(\d*))?\s*(Z|[-+]\d+(:\d\d)?)?/)

  (hh, mm, ss) = md[1].split(':').map { |x| x.to_i }
  us = (md[2] ? Rational("0.#{md[2]}") : 0) * 1000000

  time = klass.utc(yy, m, dd, hh, mm, ss, us)

  return time if 'Z' == md[3]
  return klass.at(time.to_i, us) unless md[3]

  tz = md[3].match(/^([+\-]?\d{1,2})\:?(\d{1,2})?$/)[1..-1].compact.map { |digit| Integer(digit, 10) }
  offset = tz.first * 3600

  if offset < 0
    offset -= ((tz[1] || 0) * 60)
  else
    offset += ((tz[1] || 0) * 60)
  end

  klass.new(yy, m, dd, hh, mm, ss+us/(1_000_000r), offset)
end

Tokenize string returning the Ruby object

# File ext/psych/lib/psych/scalar_scanner.rb, line 39
def tokenize string
  return nil if string.empty?
  return @symbol_cache[string] if @symbol_cache.key?(string)
  integer_regex = @strict_integer ? INTEGER_STRICT : INTEGER_LEGACY
  # Check for a String type, being careful not to get caught by hash keys, hex values, and
  # special floats (e.g., -.inf).
  if string.match?(%r{^[^\d.:-]?[[:alpha:]_\s!@#$%\^&*(){}<>|/\\~;=]+}) || string.match?(/\n/)
    return string if string.length > 5

    if string.match?(/^[^ytonf~]/i)
      string
    elsif string == '~' || string.match?(/^null$/i)
      nil
    elsif string.match?(/^(yes|true|on)$/i)
      true
    elsif string.match?(/^(no|false|off)$/i)
      false
    else
      string
    end
  elsif string.match?(TIME)
    begin
      parse_time string
    rescue ArgumentError
      string
    end
  elsif string.match?(/^\d{4}-(?:1[012]|0\d|\d)-(?:[12]\d|3[01]|0\d|\d)$/)
    begin
      class_loader.date.strptime(string, '%F', Date::GREGORIAN)
    rescue ArgumentError
      string
    end
  elsif string.match?(/^\+?\.inf$/i)
    Float::INFINITY
  elsif string.match?(/^-\.inf$/i)
    -Float::INFINITY
  elsif string.match?(/^\.nan$/i)
    Float::NAN
  elsif string.match?(/^:./)
    if string =~ /^:(["'])(.*)\1/
      @symbol_cache[string] = class_loader.symbolize($2.sub(/^:/, ''))
    else
      @symbol_cache[string] = class_loader.symbolize(string.sub(/^:/, ''))
    end
  elsif string.match?(/^[-+]?[0-9][0-9_]*(:[0-5]?[0-9]){1,2}$/)
    i = 0
    string.split(':').each_with_index do |n,e|
      i += (n.to_i * 60 ** (e - 2).abs)
    end
    i
  elsif string.match?(/^[-+]?[0-9][0-9_]*(:[0-5]?[0-9]){1,2}\.[0-9_]*$/)
    i = 0
    string.split(':').each_with_index do |n,e|
      i += (n.to_f * 60 ** (e - 2).abs)
    end
    i
  elsif string.match?(FLOAT)
    if string.match?(/\A[-+]?\.\Z/)
      string
    else
      Float(string.delete(',_').gsub(/\.([Ee]|$)/, '\1'))
    end
  elsif string.match?(integer_regex)
    parse_int string
  else
    string
  end
end