class Prism::Translation::Parser::Lexer
Accepts a list of prism tokens and converts them into the expected format for the parser gem.
Attributes
lexed
[R]
An array of tuples that contain prism tokens and their associated lex state when they were lexed.
offset_cache
[R]
A hash that maps offsets in bytes to offsets in characters.
source_buffer
[R]
The Parser::Source::Buffer that the tokens were lexed from.
Public Class Methods
new
(source_buffer, lexed, offset_cache)
Initialize the lexer with the given source buffer, prism tokens, and offset cache.
# File lib/prism/translation/parser/lexer.rb, line 217 def initialize(source_buffer, lexed, offset_cache) @source_buffer = source_buffer @lexed = lexed @offset_cache = offset_cache end
Public Instance Methods
to_a
()
Convert the prism tokens into the expected format for the parser gem.
# File lib/prism/translation/parser/lexer.rb, line 227 def to_a tokens = [] index = 0 length = lexed.length heredoc_identifier_stack = [] while index < length token, state = lexed[index] index += 1 next if %i[IGNORED_NEWLINE __END__ EOF].include?(token.type) type = TYPES.fetch(token.type) value = token.value location = Range.new(source_buffer, offset_cache[token.location.start_offset], offset_cache[token.location.end_offset]) case type when :kDO types = tokens.map(&:first) nearest_lambda_token_type = types.reverse.find { |type| LAMBDA_TOKEN_TYPES.include?(type) } if nearest_lambda_token_type == :tLAMBDA type = :kDO_LAMBDA end when :tCHARACTER value.delete_prefix!("?") when :tCOMMENT if token.type == :EMBDOC_BEGIN start_index = index while !((next_token = lexed[index][0]) && next_token.type == :EMBDOC_END) && (index < length - 1) value += next_token.value index += 1 end if start_index != index value += next_token.value location = Range.new(source_buffer, offset_cache[token.location.start_offset], offset_cache[lexed[index][0].location.end_offset]) index += 1 end else value.chomp! location = Range.new(source_buffer, offset_cache[token.location.start_offset], offset_cache[token.location.end_offset - 1]) end when :tNL value = nil when :tFLOAT value = parse_float(value) when :tIMAGINARY value = parse_complex(value) when :tINTEGER if value.start_with?("+") tokens << [:tUNARY_NUM, ["+", Range.new(source_buffer, offset_cache[token.location.start_offset], offset_cache[token.location.start_offset + 1])]] location = Range.new(source_buffer, offset_cache[token.location.start_offset + 1], offset_cache[token.location.end_offset]) end value = parse_integer(value) when :tLABEL value.chomp!(":") when :tLABEL_END value.chomp!(":") when :tLCURLY type = :tLBRACE if state == EXPR_BEG | EXPR_LABEL when :tLPAREN2 type = :tLPAREN if tokens.empty? || LPAREN_CONVERSION_TOKEN_TYPES.include?(tokens.dig(-1, 0)) when :tNTH_REF value = parse_integer(value.delete_prefix("$")) when :tOP_ASGN value.chomp!("=") when :tRATIONAL value = parse_rational(value) when :tSPACE value = nil when :tSTRING_BEG if token.type == :HEREDOC_START heredoc_identifier_stack.push(value.match(/<<[-~]?["'`]?(?<heredoc_identifier>.*?)["'`]?\z/)[:heredoc_identifier]) end if ["\"", "'"].include?(value) && (next_token = lexed[index][0]) && next_token.type == :STRING_END next_location = token.location.join(next_token.location) type = :tSTRING value = "" location = Range.new(source_buffer, offset_cache[next_location.start_offset], offset_cache[next_location.end_offset]) index += 1 elsif ["\"", "'"].include?(value) && (next_token = lexed[index][0]) && next_token.type == :STRING_CONTENT && next_token.value.lines.count <= 1 && (next_next_token = lexed[index + 1][0]) && next_next_token.type == :STRING_END next_location = token.location.join(next_next_token.location) type = :tSTRING value = next_token.value.gsub("\\\\", "\\") location = Range.new(source_buffer, offset_cache[next_location.start_offset], offset_cache[next_location.end_offset]) index += 2 elsif value.start_with?("<<") quote = value[2] == "-" || value[2] == "~" ? value[3] : value[2] if quote == "`" type = :tXSTRING_BEG value = "<<`" else value = "<<#{quote == "'" || quote == "\"" ? quote : "\""}" end end when :tSTRING_CONTENT unless (lines = token.value.lines).one? start_offset = offset_cache[token.location.start_offset] lines.map do |line| newline = line.end_with?("\r\n") ? "\r\n" : "\n" chomped_line = line.chomp if match = chomped_line.match(/(?<backslashes>\\+)\z/) adjustment = match[:backslashes].size / 2 adjusted_line = chomped_line.delete_suffix("\\" * adjustment) if match[:backslashes].size.odd? adjusted_line.delete_suffix!("\\") adjustment += 2 else adjusted_line << newline end else adjusted_line = line adjustment = 0 end end_offset = start_offset + adjusted_line.length + adjustment tokens << [:tSTRING_CONTENT, [adjusted_line, Range.new(source_buffer, offset_cache[start_offset], offset_cache[end_offset])]] start_offset = end_offset end next end when :tSTRING_DVAR value = nil when :tSTRING_END if token.type == :HEREDOC_END && value.end_with?("\n") newline_length = value.end_with?("\r\n") ? 2 : 1 value = heredoc_identifier_stack.pop location = Range.new(source_buffer, offset_cache[token.location.start_offset], offset_cache[token.location.end_offset - newline_length]) elsif token.type == :REGEXP_END value = value[0] location = Range.new(source_buffer, offset_cache[token.location.start_offset], offset_cache[token.location.start_offset + 1]) end when :tSYMBEG if (next_token = lexed[index][0]) && next_token.type != :STRING_CONTENT && next_token.type != :EMBEXPR_BEGIN && next_token.type != :EMBVAR && next_token.type != :STRING_END next_location = token.location.join(next_token.location) type = :tSYMBOL value = next_token.value value = { "~@" => "~", "!@" => "!" }.fetch(value, value) location = Range.new(source_buffer, offset_cache[next_location.start_offset], offset_cache[next_location.end_offset]) index += 1 end when :tFID if !tokens.empty? && tokens.dig(-1, 0) == :kDEF type = :tIDENTIFIER end when :tXSTRING_BEG if (next_token = lexed[index][0]) && next_token.type != :STRING_CONTENT && next_token.type != :STRING_END type = :tBACK_REF2 end end tokens << [type, [value, location]] if token.type == :REGEXP_END tokens << [:tREGEXP_OPT, [token.value[1..], Range.new(source_buffer, offset_cache[token.location.start_offset + 1], offset_cache[token.location.end_offset])]] end end tokens end