From d96dad1984a695ca2171bd06b8562fb657127fe9 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Fri, 27 Jan 2023 10:43:37 -0500 Subject: [PATCH 01/58] Include parser translation --- .github/workflows/main.yml | 2 + .gitmodules | 3 + .rubocop.yml | 1 + Rakefile | 10 +- lib/syntax_tree.rb | 2 + lib/syntax_tree/translation.rb | 20 + lib/syntax_tree/translation/parser.rb | 1426 +++++++++++++++++++++++++ test/ruby_syntax_fixtures_test.rb | 4 + test/suites/helper.rb | 3 + test/suites/parse_helper.rb | 149 +++ test/suites/parser | 1 + 11 files changed, 1620 insertions(+), 1 deletion(-) create mode 100644 lib/syntax_tree/translation.rb create mode 100644 lib/syntax_tree/translation/parser.rb create mode 100644 test/suites/helper.rb create mode 100644 test/suites/parse_helper.rb create mode 160000 test/suites/parser diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 3f811317..8bca2fc4 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -23,6 +23,8 @@ jobs: # TESTOPTS: --verbose steps: - uses: actions/checkout@master + with: + submodules: true - uses: ruby/setup-ruby@v1 with: bundler-cache: true diff --git a/.gitmodules b/.gitmodules index 1a2c45cc..8287c5e3 100644 --- a/.gitmodules +++ b/.gitmodules @@ -7,3 +7,6 @@ [submodule "test/ruby-syntax-fixtures"] path = test/ruby-syntax-fixtures url = https://github.com/ruby-syntax-tree/ruby-syntax-fixtures +[submodule "test/suites/parser"] + path = test/suites/parser + url = https://github.com/whitequark/parser diff --git a/.rubocop.yml b/.rubocop.yml index bc98a43a..381d7a27 100644 --- a/.rubocop.yml +++ b/.rubocop.yml @@ -9,6 +9,7 @@ AllCops: Exclude: - '{.git,.github,bin,coverage,pkg,spec,test/fixtures,vendor,tmp}/**/*' - test/ruby-syntax-fixtures/**/* + - test/suites/parser/**/* - test.rb Gemspec/DevelopmentDependencies: diff --git a/Rakefile b/Rakefile index f06d8cf8..cb96e7bf 100644 --- a/Rakefile +++ b/Rakefile @@ -6,8 +6,16 @@ require "syntax_tree/rake_tasks" Rake::TestTask.new(:test) do |t| t.libs << "test" + t.libs << "test/suites" t.libs << "lib" - t.test_files = FileList["test/**/*_test.rb"] + + # These are our own tests. + test_files = FileList["test/**/*_test.rb"] + + # This is a big test file from the parser gem that tests its functionality. + test_files << "test/suites/parser/test/test_parser.rb" + + t.test_files = test_files end task default: :test diff --git a/lib/syntax_tree.rb b/lib/syntax_tree.rb index f5c71aba..73add469 100644 --- a/lib/syntax_tree.rb +++ b/lib/syntax_tree.rb @@ -40,6 +40,8 @@ require_relative "syntax_tree/yarv/assembler" require_relative "syntax_tree/yarv/vm" +require_relative "syntax_tree/translation" + # Syntax Tree is a suite of tools built on top of the internal CRuby parser. It # provides the ability to generate a syntax tree from source, as well as the # tools necessary to inspect and manipulate that syntax tree. It can be used to diff --git a/lib/syntax_tree/translation.rb b/lib/syntax_tree/translation.rb new file mode 100644 index 00000000..37785ea2 --- /dev/null +++ b/lib/syntax_tree/translation.rb @@ -0,0 +1,20 @@ +# frozen_string_literal: true + +module SyntaxTree + # This module is responsible for translating the Syntax Tree syntax tree into + # other representations. + module Translation + # This method translates the given node into the representation defined by + # the whitequark/parser gem. We don't explicitly list it as a dependency + # because it's not required for the core functionality of Syntax Tree. + def self.to_parser(node, source) + require "parser" + require_relative "translation/parser" + + buffer = ::Parser::Source::Buffer.new("(string)") + buffer.source = source + + node.accept(Parser.new(buffer)) + end + end +end diff --git a/lib/syntax_tree/translation/parser.rb b/lib/syntax_tree/translation/parser.rb new file mode 100644 index 00000000..3443df37 --- /dev/null +++ b/lib/syntax_tree/translation/parser.rb @@ -0,0 +1,1426 @@ +# frozen_string_literal: true + +module SyntaxTree + module Translation + class Parser < BasicVisitor + attr_reader :buffer, :stack + + def initialize(buffer) + @buffer = buffer + @stack = [] + end + + # For each node that we visit, we keep track of it in a stack as we + # descend into its children. We do this so that child nodes can reflect on + # their parents if they need additional information about their context. + def visit(node) + stack << node + result = super + stack.pop + result + end + + # Visit an AliasNode node. + def visit_alias(node) + s(:alias, [visit(node.left), visit(node.right)]) + end + + # Visit an ARefNode. + def visit_aref(node) + if ::Parser::Builders::Default.emit_index + if node.index.nil? + s(:index, [visit(node.collection)]) + else + s(:index, [visit(node.collection), *visit_all(node.index.parts)]) + end + else + if node.index.nil? + s(:send, [visit(node.collection), :[], nil]) + else + s( + :send, + [visit(node.collection), :[], *visit_all(node.index.parts)] + ) + end + end + end + + # Visit an ARefField node. + def visit_aref_field(node) + if ::Parser::Builders::Default.emit_index + if node.index.nil? + s(:indexasgn, [visit(node.collection), nil]) + else + s( + :indexasgn, + [visit(node.collection), *visit_all(node.index.parts)] + ) + end + else + if node.index.nil? + s(:send, [visit(node.collection), :[]=, nil]) + else + s( + :send, + [visit(node.collection), :[]=, *visit_all(node.index.parts)] + ) + end + end + end + + # Visit an ArgBlock node. + def visit_arg_block(node) + s(:block_pass, [visit(node.value)]) + end + + # Visit an ArgStar node. + def visit_arg_star(node) + if stack[-3].is_a?(MLHSParen) && stack[-3].contents.is_a?(MLHS) + case node.value + when nil + s(:restarg) + when Ident + s(:restarg, [node.value.value.to_sym]) + else + s(:restarg, [node.value.value.value.to_sym]) + end + else + node.value.nil? ? s(:splat) : s(:splat, [visit(node.value)]) + end + end + + # Visit an ArgsForward node. + def visit_args_forward(_node) + s(:forwarded_args) + end + + # Visit an ArrayLiteral node. + def visit_array(node) + if node.contents.nil? + s(:array) + else + s(:array, visit_all(node.contents.parts)) + end + end + + # Visit an AryPtn node. + def visit_aryptn(node) + type = :array_pattern + children = visit_all(node.requireds) + + if node.rest.is_a?(VarField) + if !node.rest.value.nil? + children << s(:match_rest, [visit(node.rest)]) + elsif node.posts.empty? && + node.rest.location.start_char == node.rest.location.end_char + # Here we have an implicit rest, as in [foo,]. parser has a specific + # type for these patterns. + type = :array_pattern_with_tail + else + children << s(:match_rest) + end + end + + inner = s(type, children + visit_all(node.posts)) + node.constant ? s(:const_pattern, [visit(node.constant), inner]) : inner + end + + # Visit an Assign node. + def visit_assign(node) + target = visit(node.target) + s(target.type, target.children + [visit(node.value)]) + end + + # Visit an Assoc node. + def visit_assoc(node) + if node.value.nil? + type = node.key.value.start_with?(/[A-Z]/) ? :const : :send + s( + :pair, + [visit(node.key), s(type, [nil, node.key.value.chomp(":").to_sym])] + ) + else + s(:pair, [visit(node.key), visit(node.value)]) + end + end + + # Visit an AssocSplat node. + def visit_assoc_splat(node) + s(:kwsplat, [visit(node.value)]) + end + + # Visit a Backref node. + def visit_backref(node) + if node.value.match?(/^\$\d+$/) + s(:nth_ref, [node.value[1..].to_i]) + else + s(:back_ref, [node.value.to_sym]) + end + end + + # Visit a BareAssocHash node. + def visit_bare_assoc_hash(node) + type = + if ::Parser::Builders::Default.emit_kwargs && + !stack[-2].is_a?(ArrayLiteral) + :kwargs + else + :hash + end + + s(type, visit_all(node.assocs)) + end + + # Visit a BEGINBlock node. + def visit_BEGIN(node) + s(:preexe, [visit(node.statements)]) + end + + # Visit a Begin node. + def visit_begin(node) + if node.bodystmt.empty? + s(:kwbegin) + elsif node.bodystmt.rescue_clause.nil? && + node.bodystmt.ensure_clause.nil? && node.bodystmt.else_clause.nil? + visited = visit(node.bodystmt.statements) + s(:kwbegin, visited.type == :begin ? visited.children : [visited]) + else + s(:kwbegin, [visit(node.bodystmt)]) + end + end + + # Visit a Binary node. + def visit_binary(node) + case node.operator + when :| + current = -2 + current -= 1 while stack[current].is_a?(Binary) && + stack[current].operator == :| + + if stack[current].is_a?(In) + s(:match_alt, [visit(node.left), visit(node.right)]) + else + s(:send, [visit(node.left), node.operator, visit(node.right)]) + end + when :"=>" + s(:match_as, [visit(node.left), visit(node.right)]) + when :"&&", :and + s(:and, [visit(node.left), visit(node.right)]) + when :"||", :or + s(:or, [visit(node.left), visit(node.right)]) + when :=~ + if node.left.is_a?(RegexpLiteral) && node.left.parts.length == 1 && + node.left.parts.first.is_a?(TStringContent) + s(:match_with_lvasgn, [visit(node.left), visit(node.right)]) + else + s(:send, [visit(node.left), node.operator, visit(node.right)]) + end + else + s(:send, [visit(node.left), node.operator, visit(node.right)]) + end + end + + # Visit a BlockArg node. + def visit_blockarg(node) + if node.name.nil? + s(:blockarg, [nil]) + else + s(:blockarg, [node.name.value.to_sym]) + end + end + + # Visit a BlockVar node. + def visit_block_var(node) + shadowargs = + node.locals.map { |local| s(:shadowarg, [local.value.to_sym]) } + + # There is a special node type in the parser gem for when a single + # required parameter to a block would potentially be expanded + # automatically. We handle that case here. + if ::Parser::Builders::Default.emit_procarg0 + params = node.params + + if params.requireds.length == 1 && params.optionals.empty? && + params.rest.nil? && params.posts.empty? && + params.keywords.empty? && params.keyword_rest.nil? && + params.block.nil? + required = params.requireds.first + + procarg0 = + if ::Parser::Builders::Default.emit_arg_inside_procarg0 && + required.is_a?(Ident) + s(:procarg0, [s(:arg, [required.value.to_sym])]) + else + s(:procarg0, visit(required).children) + end + + return s(:args, [procarg0] + shadowargs) + end + end + + s(:args, visit(node.params).children + shadowargs) + end + + # Visit a BodyStmt node. + def visit_bodystmt(node) + inner = visit(node.statements) + + if node.rescue_clause + children = [inner] + visit(node.rescue_clause).children + + if node.else_clause + children.pop + children << visit(node.else_clause) + end + + inner = s(:rescue, children) + end + + if node.ensure_clause + inner = s(:ensure, [inner] + visit(node.ensure_clause).children) + end + + inner + end + + # Visit a Break node. + def visit_break(node) + s(:break, visit_all(node.arguments.parts)) + end + + # Visit a CallNode node. + def visit_call(node) + if node.receiver.nil? + children = [nil, node.message.value.to_sym] + + if node.arguments.is_a?(ArgParen) + case node.arguments.arguments + when nil + # skip + when ArgsForward + children << s(:forwarded_args) + else + children += visit_all(node.arguments.arguments.parts) + end + end + + s(:send, children) + elsif node.message == :call + children = [visit(node.receiver), :call] + + unless node.arguments.arguments.nil? + children += visit_all(node.arguments.arguments.parts) + end + + s(send_type(node.operator), children) + else + children = [visit(node.receiver), node.message.value.to_sym] + + case node.arguments + when Args + children += visit_all(node.arguments.parts) + when ArgParen + unless node.arguments.arguments.nil? + children += visit_all(node.arguments.arguments.parts) + end + end + + s(send_type(node.operator), children) + end + end + + # Visit a Case node. + def visit_case(node) + clauses = [node.consequent] + while clauses.last && !clauses.last.is_a?(Else) + clauses << clauses.last.consequent + end + + type = node.consequent.is_a?(In) ? :case_match : :case + s(type, [visit(node.value)] + clauses.map { |clause| visit(clause) }) + end + + # Visit a CHAR node. + def visit_CHAR(node) + s(:str, [node.value[1..]]) + end + + # Visit a ClassDeclaration node. + def visit_class(node) + s( + :class, + [visit(node.constant), visit(node.superclass), visit(node.bodystmt)] + ) + end + + # Visit a Command node. + def visit_command(node) + call = + s( + :send, + [nil, node.message.value.to_sym, *visit_all(node.arguments.parts)] + ) + + if node.block + type, arguments = block_children(node.block) + s(type, [call, arguments, visit(node.block.bodystmt)]) + else + call + end + end + + # Visit a CommandCall node. + def visit_command_call(node) + children = [visit(node.receiver), node.message.value.to_sym] + + case node.arguments + when Args + children += visit_all(node.arguments.parts) + when ArgParen + children += visit_all(node.arguments.arguments.parts) + end + + call = s(send_type(node.operator), children) + + if node.block + type, arguments = block_children(node.block) + s(type, [call, arguments, visit(node.block.bodystmt)]) + else + call + end + end + + # Visit a Const node. + def visit_const(node) + s(:const, [nil, node.value.to_sym]) + end + + # Visit a ConstPathField node. + def visit_const_path_field(node) + if node.parent.is_a?(VarRef) && node.parent.value.is_a?(Kw) && + node.parent.value.value == "self" && node.constant.is_a?(Ident) + s(:send, [visit(node.parent), :"#{node.constant.value}="]) + else + s(:casgn, [visit(node.parent), node.constant.value.to_sym]) + end + end + + # Visit a ConstPathRef node. + def visit_const_path_ref(node) + s(:const, [visit(node.parent), node.constant.value.to_sym]) + end + + # Visit a ConstRef node. + def visit_const_ref(node) + s(:const, [nil, node.constant.value.to_sym]) + end + + # Visit a CVar node. + def visit_cvar(node) + s(:cvar, [node.value.to_sym]) + end + + # Visit a DefNode node. + def visit_def(node) + name = node.name.value.to_sym + args = + case node.params + when Params + visit(node.params) + when Paren + visit(node.params.contents) + else + s(:args) + end + + if node.target + target = node.target.is_a?(Paren) ? node.target.contents : node.target + s(:defs, [visit(target), name, args, visit(node.bodystmt)]) + else + s(:def, [name, args, visit(node.bodystmt)]) + end + end + + # Visit a Defined node. + def visit_defined(node) + s(:defined?, [visit(node.value)]) + end + + # Visit a DynaSymbol node. + def visit_dyna_symbol(node) + if node.parts.length == 1 && node.parts.first.is_a?(TStringContent) + s(:sym, ["\"#{node.parts.first.value}\"".undump.to_sym]) + else + s(:dsym, visit_all(node.parts)) + end + end + + # Visit an Else node. + def visit_else(node) + if node.statements.empty? && stack[-2].is_a?(Case) + s(:empty_else) + else + visit(node.statements) + end + end + + # Visit an Elsif node. + def visit_elsif(node) + s( + :if, + [ + visit(node.predicate), + visit(node.statements), + visit(node.consequent) + ] + ) + end + + # Visit an ENDBlock node. + def visit_END(node) + s(:postexe, [visit(node.statements)]) + end + + # Visit an Ensure node. + def visit_ensure(node) + s(:ensure, [visit(node.statements)]) + end + + # Visit a Field node. + def visit_field(node) + case stack[-2] + when Assign, MLHS + s( + send_type(node.operator), + [visit(node.parent), :"#{node.name.value}="] + ) + else + s( + send_type(node.operator), + [visit(node.parent), node.name.value.to_sym] + ) + end + end + + # Visit a FloatLiteral node. + def visit_float(node) + s(:float, [node.value.to_f]) + end + + # Visit a FndPtn node. + def visit_fndptn(node) + make_match_rest = ->(child) do + if child.is_a?(VarField) && child.value.nil? + s(:match_rest, []) + else + s(:match_rest, [visit(child)]) + end + end + + inner = + s( + :find_pattern, + [ + make_match_rest[node.left], + *visit_all(node.values), + make_match_rest[node.right] + ] + ) + node.constant ? s(:const_pattern, [visit(node.constant), inner]) : inner + end + + # Visit a For node. + def visit_for(node) + s( + :for, + [visit(node.index), visit(node.collection), visit(node.statements)] + ) + end + + # Visit a GVar node. + def visit_gvar(node) + s(:gvar, [node.value.to_sym]) + end + + # Visit a HashLiteral node. + def visit_hash(node) + s(:hash, visit_all(node.assocs)) + end + + # Heredocs are represented _very_ differently in the parser gem from how + # they are represented in the Syntax Tree AST. This class is responsible + # for handling the translation. + class HeredocSegments + HeredocLine = Struct.new(:value, :segments) + + attr_reader :node, :segments + + def initialize(node) + @node = node + @segments = [] + end + + def <<(segment) + if segment.type == :str && segments.last && + segments.last.type == :str && + !segments.last.children.first.end_with?("\n") + segments.last.children.first << segment.children.first + else + segments << segment + end + end + + def trim! + return unless node.beginning.value[2] == "~" + lines = [HeredocLine.new(+"", [])] + + segments.each do |segment| + lines.last.segments << segment + + if segment.type == :str + lines.last.value << segment.children.first + + if lines.last.value.end_with?("\n") + lines << HeredocLine.new(+"", []) + end + end + end + + lines.pop if lines.last.value.empty? + return if lines.empty? + + segments.clear + lines.each do |line| + remaining = node.dedent + + line.segments.each do |segment| + if segment.type == :str + if remaining > 0 + whitespace = segment.children.first[/^\s{0,#{remaining}}/] + segment.children.first.sub!(/^#{whitespace}/, "") + remaining -= whitespace.length + end + + if node.beginning.value[3] != "'" && segments.any? && + segments.last.type == :str && + segments.last.children.first.end_with?("\\\n") + segments.last.children.first.gsub!(/\\\n\z/, "") + segments.last.children.first.concat(segment.children.first) + elsif !segment.children.first.empty? + segments << segment + end + else + segments << segment + end + end + end + end + end + + # Visit a Heredoc node. + def visit_heredoc(node) + heredoc_segments = HeredocSegments.new(node) + + node.parts.each do |part| + if part.is_a?(TStringContent) && part.value.count("\n") > 1 + part + .value + .split("\n") + .each { |line| heredoc_segments << s(:str, ["#{line}\n"]) } + else + heredoc_segments << visit(part) + end + end + + heredoc_segments.trim! + + if node.beginning.value.match?(/`\w+`\z/) + s(:xstr, heredoc_segments.segments) + elsif heredoc_segments.segments.length > 1 + s(:dstr, heredoc_segments.segments) + elsif heredoc_segments.segments.empty? + s(:dstr) + else + heredoc_segments.segments.first + end + end + + # Visit a HshPtn node. + def visit_hshptn(node) + children = + node.keywords.map do |(keyword, value)| + next s(:pair, [visit(keyword), visit(value)]) if value + + case keyword + when Label + s(:match_var, [keyword.value.chomp(":").to_sym]) + when StringContent + raise if keyword.parts.length > 1 + s(:match_var, [keyword.parts.first.value.to_sym]) + end + end + + if node.keyword_rest.is_a?(VarField) + children << if node.keyword_rest.value.nil? + s(:match_rest) + elsif node.keyword_rest.value == :nil + s(:match_nil_pattern) + else + s(:match_rest, [visit(node.keyword_rest)]) + end + end + + inner = s(:hash_pattern, children) + node.constant ? s(:const_pattern, [visit(node.constant), inner]) : inner + end + + # Visit an Ident node. + def visit_ident(node) + s(:lvar, [node.value.to_sym]) + end + + # Visit an IfNode node. + def visit_if(node) + predicate = + case node.predicate + when RangeNode + type = + node.predicate.operator.value == ".." ? :iflipflop : :eflipflop + s(type, visit(node.predicate).children) + when RegexpLiteral + s(:match_current_line, [visit(node.predicate)]) + when Unary + if node.predicate.operator.value == "!" && + node.predicate.statement.is_a?(RegexpLiteral) + s( + :send, + [s(:match_current_line, [visit(node.predicate.statement)]), :!] + ) + else + visit(node.predicate) + end + else + visit(node.predicate) + end + + s(:if, [predicate, visit(node.statements), visit(node.consequent)]) + end + + # Visit an IfOp node. + def visit_if_op(node) + s(:if, [visit(node.predicate), visit(node.truthy), visit(node.falsy)]) + end + + # Visit an Imaginary node. + def visit_imaginary(node) + # We have to do an eval here in order to get the value in case it's + # something like 42ri. to_c will not give the right value in that case. + # Maybe there's an API for this but I can't find it. + s(:complex, [eval(node.value)]) + end + + # Visit an In node. + def visit_in(node) + case node.pattern + when IfNode + s( + :in_pattern, + [ + visit(node.pattern.statements), + s(:if_guard, [visit(node.pattern.predicate)]), + visit(node.statements) + ] + ) + when UnlessNode + s( + :in_pattern, + [ + visit(node.pattern.statements), + s(:unless_guard, [visit(node.pattern.predicate)]), + visit(node.statements) + ] + ) + else + s(:in_pattern, [visit(node.pattern), nil, visit(node.statements)]) + end + end + + # Visit an Int node. + def visit_int(node) + s(:int, [node.value.to_i]) + end + + # Visit an IVar node. + def visit_ivar(node) + s(:ivar, [node.value.to_sym]) + end + + # Visit a Kw node. + def visit_kw(node) + case node.value + when "__FILE__" + s(:str, [buffer.name]) + when "__LINE__" + s(:int, [node.location.start_line + buffer.first_line - 1]) + when "__ENCODING__" + if ::Parser::Builders::Default.emit_encoding + s(:__ENCODING__) + else + s(:const, [s(:const, [nil, :Encoding]), :UTF_8]) + end + else + s(node.value.to_sym) + end + end + + # Visit a KwRestParam node. + def visit_kwrest_param(node) + node.name.nil? ? s(:kwrestarg) : s(:kwrestarg, [node.name.value.to_sym]) + end + + # Visit a Label node. + def visit_label(node) + s(:sym, [node.value.chomp(":").to_sym]) + end + + # Visit a Lambda node. + def visit_lambda(node) + args = node.params.is_a?(LambdaVar) ? node.params : node.params.contents + + arguments = visit(args) + child = + if ::Parser::Builders::Default.emit_lambda + s(:lambda) + else + s(:send, [nil, :lambda]) + end + + type = :block + if args.empty? && (maximum = num_block_type(node.statements)) + type = :numblock + arguments = maximum + end + + s(type, [child, arguments, visit(node.statements)]) + end + + # Visit a LambdaVar node. + def visit_lambda_var(node) + shadowargs = + node.locals.map { |local| s(:shadowarg, [local.value.to_sym]) } + + s(:args, visit(node.params).children + shadowargs) + end + + # Visit an MAssign node. + def visit_massign(node) + s(:masgn, [visit(node.target), visit(node.value)]) + end + + # Visit a MethodAddBlock node. + def visit_method_add_block(node) + type, arguments = block_children(node.block) + + case node.call + when Break, Next, ReturnNode + call = visit(node.call) + s( + call.type, + [s(type, [*call.children, arguments, visit(node.block.bodystmt)])] + ) + else + s(type, [visit(node.call), arguments, visit(node.block.bodystmt)]) + end + end + + # Visit an MLHS node. + def visit_mlhs(node) + s( + :mlhs, + node.parts.map do |part| + part.is_a?(Ident) ? s(:arg, [part.value.to_sym]) : visit(part) + end + ) + end + + # Visit an MLHSParen node. + def visit_mlhs_paren(node) + visit(node.contents) + end + + # Visit a ModuleDeclaration node. + def visit_module(node) + s(:module, [visit(node.constant), visit(node.bodystmt)]) + end + + # Visit an MRHS node. + def visit_mrhs(node) + s(:array, visit_all(node.parts)) + end + + # Visit a Next node. + def visit_next(node) + s(:next, visit_all(node.arguments.parts)) + end + + # Visit a Not node. + def visit_not(node) + if node.statement.nil? + s(:send, [s(:begin), :!]) + else + s(:send, [visit(node.statement), :!]) + end + end + + # Visit an OpAssign node. + def visit_opassign(node) + case node.operator.value + when "||=" + s(:or_asgn, [visit(node.target), visit(node.value)]) + when "&&=" + s(:and_asgn, [visit(node.target), visit(node.value)]) + else + s( + :op_asgn, + [ + visit(node.target), + node.operator.value.chomp("=").to_sym, + visit(node.value) + ] + ) + end + end + + # Visit a Params node. + def visit_params(node) + children = [] + + children += + node.requireds.map do |required| + case required + when MLHSParen + visit(required) + else + s(:arg, [required.value.to_sym]) + end + end + + children += + node.optionals.map do |(name, value)| + s(:optarg, [name.value.to_sym, visit(value)]) + end + if node.rest && !node.rest.is_a?(ExcessedComma) + children << visit(node.rest) + end + children += node.posts.map { |post| s(:arg, [post.value.to_sym]) } + children += + node.keywords.map do |(name, value)| + key = name.value.chomp(":").to_sym + value ? s(:kwoptarg, [key, visit(value)]) : s(:kwarg, [key]) + end + + case node.keyword_rest + when nil, ArgsForward + # do nothing + when :nil + children << s(:kwnilarg) + else + children << visit(node.keyword_rest) + end + + children << visit(node.block) if node.block + + if node.keyword_rest.is_a?(ArgsForward) + if children.empty? && !::Parser::Builders::Default.emit_forward_arg + return s(:forward_args) + end + + children.insert( + node.requireds.length + node.optionals.length + + node.keywords.length, + s(:forward_arg) + ) + end + + s(:args, children) + end + + # Visit a Paren node. + def visit_paren(node) + if node.contents.nil? || + ( + node.contents.is_a?(Statements) && + node.contents.body.length == 1 && + node.contents.body.first.is_a?(VoidStmt) + ) + s(:begin) + elsif stack[-2].is_a?(DefNode) && stack[-2].target.nil? && + stack[-2].target == node + visit(node.contents) + else + visited = visit(node.contents) + visited.type == :begin ? visited : s(:begin, [visited]) + end + end + + # Visit a PinnedBegin node. + def visit_pinned_begin(node) + s(:pin, [s(:begin, [visit(node.statement)])]) + end + + # Visit a PinnedVarRef node. + def visit_pinned_var_ref(node) + s(:pin, [visit(node.value)]) + end + + # Visit a Program node. + def visit_program(node) + visit(node.statements) + end + + # Visit a QSymbols node. + def visit_qsymbols(node) + s( + :array, + node.elements.map { |element| s(:sym, [element.value.to_sym]) } + ) + end + + # Visit a QWords node. + def visit_qwords(node) + s(:array, visit_all(node.elements)) + end + + # Visit a RangeNode node. + def visit_range(node) + type = node.operator.value == ".." ? :irange : :erange + s(type, [visit(node.left), visit(node.right)]) + end + + # Visit an RAssign node. + def visit_rassign(node) + type = node.operator.value == "=>" ? :match_pattern : :match_pattern_p + s(type, [visit(node.value), visit(node.pattern)]) + end + + # Visit a Rational node. + def visit_rational(node) + s(:rational, [node.value.to_r]) + end + + # Visit a Redo node. + def visit_redo(_node) + s(:redo) + end + + # Visit a RegexpLiteral node. + def visit_regexp_literal(node) + s( + :regexp, + visit_all(node.parts) + + [s(:regopt, node.ending.scan(/[a-z]/).sort.map(&:to_sym))] + ) + end + + # Visit a Rescue node. + def visit_rescue(node) + exceptions = + case node.exception&.exceptions + when nil + nil + when VarRef + s(:array, [visit(node.exception.exceptions)]) + when MRHS + s(:array, visit_all(node.exception.exceptions.parts)) + else + s(:array, [visit(node.exception.exceptions)]) + end + + resbody = + if node.exception.nil? + s(:resbody, [nil, nil, visit(node.statements)]) + elsif node.exception.variable.nil? + s(:resbody, [exceptions, nil, visit(node.statements)]) + else + s( + :resbody, + [ + exceptions, + visit(node.exception.variable), + visit(node.statements) + ] + ) + end + + children = [resbody] + if node.consequent + children += visit(node.consequent).children + else + children << nil + end + + s(:rescue, children) + end + + # Visit a RescueMod node. + def visit_rescue_mod(node) + s( + :rescue, + [ + visit(node.statement), + s(:resbody, [nil, nil, visit(node.value)]), + nil + ] + ) + end + + # Visit a RestParam node. + def visit_rest_param(node) + s(:restarg, node.name ? [node.name.value.to_sym] : []) + end + + # Visit a Retry node. + def visit_retry(_node) + s(:retry) + end + + # Visit a ReturnNode node. + def visit_return(node) + s(:return, node.arguments ? visit_all(node.arguments.parts) : []) + end + + # Visit an SClass node. + def visit_sclass(node) + s(:sclass, [visit(node.target), visit(node.bodystmt)]) + end + + # Visit a Statements node. + def visit_statements(node) + children = + node.body.reject do |child| + child.is_a?(Comment) || child.is_a?(EmbDoc) || + child.is_a?(EndContent) || child.is_a?(VoidStmt) + end + + case children.length + when 0 + nil + when 1 + visit(children.first) + else + s(:begin, visit_all(children)) + end + end + + # Visit a StringConcat node. + def visit_string_concat(node) + s(:dstr, [visit(node.left), visit(node.right)]) + end + + # Visit a StringContent node. + def visit_string_content(node) + # Can get here if you're inside a hash pattern, e.g., in "a": 1 + s(:sym, [node.parts.first.value.to_sym]) + end + + # Visit a StringDVar node. + def visit_string_dvar(node) + visit(node.variable) + end + + # Visit a StringEmbExpr node. + def visit_string_embexpr(node) + child = visit(node.statements) + s(:begin, child ? [child] : []) + end + + # Visit a StringLiteral node. + def visit_string_literal(node) + if node.parts.empty? + s(:str, [""]) + elsif node.parts.length == 1 && node.parts.first.is_a?(TStringContent) + visit(node.parts.first) + else + s(:dstr, visit_all(node.parts)) + end + end + + # Visit a Super node. + def visit_super(node) + if node.arguments.is_a?(Args) + s(:super, visit_all(node.arguments.parts)) + else + case node.arguments.arguments + when nil + s(:super) + when ArgsForward + s(:super, [visit(node.arguments.arguments)]) + else + s(:super, visit_all(node.arguments.arguments.parts)) + end + end + end + + # Visit a SymbolLiteral node. + def visit_symbol_literal(node) + s(:sym, [node.value.value.to_sym]) + end + + # Visit a Symbols node. + def visit_symbols(node) + children = + node.elements.map do |element| + if element.parts.length > 1 || + !element.parts.first.is_a?(TStringContent) + s(:dsym, visit_all(element.parts)) + else + s(:sym, [element.parts.first.value.to_sym]) + end + end + + s(:array, children) + end + + # Visit a TopConstField node. + def visit_top_const_field(node) + s(:casgn, [s(:cbase), node.constant.value.to_sym]) + end + + # Visit a TopConstRef node. + def visit_top_const_ref(node) + s(:const, [s(:cbase), node.constant.value.to_sym]) + end + + # Visit a TStringContent node. + def visit_tstring_content(node) + value = node.value.gsub(/([^[:ascii:]])/) { $1.dump[1...-1] } + s(:str, ["\"#{value}\"".undump]) + end + + # Visit a Unary node. + def visit_unary(node) + # Special handling here for flipflops + if node.statement.is_a?(Paren) && + node.statement.contents.is_a?(Statements) && + node.statement.contents.body.length == 1 && + (range = node.statement.contents.body.first).is_a?(RangeNode) && + node.operator == "!" + type = range.operator.value == ".." ? :iflipflop : :eflipflop + return s(:send, [s(:begin, [s(type, visit(range).children)]), :!]) + end + + case node.operator + when "+" + case node.statement + when Int + s(:int, [node.statement.value.to_i]) + when FloatLiteral + s(:float, [node.statement.value.to_f]) + else + s(:send, [visit(node.statement), :+@]) + end + when "-" + case node.statement + when Int + s(:int, [-node.statement.value.to_i]) + when FloatLiteral + s(:float, [-node.statement.value.to_f]) + else + s(:send, [visit(node.statement), :-@]) + end + else + s(:send, [visit(node.statement), node.operator.to_sym]) + end + end + + # Visit an Undef node. + def visit_undef(node) + s(:undef, visit_all(node.symbols)) + end + + # Visit an UnlessNode node. + def visit_unless(node) + predicate = + case node.predicate + when RegexpLiteral + s(:match_current_line, [visit(node.predicate)]) + when Unary + if node.predicate.operator.value == "!" && + node.predicate.statement.is_a?(RegexpLiteral) + s( + :send, + [s(:match_current_line, [visit(node.predicate.statement)]), :!] + ) + else + visit(node.predicate) + end + else + visit(node.predicate) + end + + s(:if, [predicate, visit(node.consequent), visit(node.statements)]) + end + + # Visit an UntilNode node. + def visit_until(node) + type = + if node.modifier? && node.statements.is_a?(Statements) && + node.statements.body.length == 1 && + node.statements.body.first.is_a?(Begin) + :until_post + else + :until + end + + s(type, [visit(node.predicate), visit(node.statements)]) + end + + # Visit a VarField node. + def visit_var_field(node) + is_match_var = ->(parent) do + case parent + when AryPtn, FndPtn, HshPtn, In, RAssign + true + when Binary + parent.operator == :"=>" + else + false + end + end + + if [stack[-3], stack[-2]].any?(&is_match_var) + return s(:match_var, [node.value.value.to_sym]) + end + + case node.value + when Const + s(:casgn, [nil, node.value.value.to_sym]) + when CVar + s(:cvasgn, [node.value.value.to_sym]) + when GVar + s(:gvasgn, [node.value.value.to_sym]) + when Ident + s(:lvasgn, [node.value.value.to_sym]) + when IVar + s(:ivasgn, [node.value.value.to_sym]) + when VarRef + s(:lvasgn, [node.value.value.to_sym]) + else + s(:match_rest) + end + end + + # Visit a VarRef node. + def visit_var_ref(node) + visit(node.value) + end + + # Visit a VCall node. + def visit_vcall(node) + range = + ::Parser::Source::Range.new( + buffer, + node.location.start_char, + node.location.end_char + ) + location = ::Parser::Source::Map::Send.new(nil, range, nil, nil, range) + + s(:send, [nil, node.value.value.to_sym], location: location) + end + + # Visit a When node. + def visit_when(node) + s(:when, visit_all(node.arguments.parts) + [visit(node.statements)]) + end + + # Visit a WhileNode node. + def visit_while(node) + type = + if node.modifier? && node.statements.is_a?(Statements) && + node.statements.body.length == 1 && + node.statements.body.first.is_a?(Begin) + :while_post + else + :while + end + + s(type, [visit(node.predicate), visit(node.statements)]) + end + + # Visit a Word node. + def visit_word(node) + if node.parts.length == 1 && node.parts.first.is_a?(TStringContent) + visit(node.parts.first) + else + s(:dstr, visit_all(node.parts)) + end + end + + # Visit a Words node. + def visit_words(node) + s(:array, visit_all(node.elements)) + end + + # Visit an XStringLiteral node. + def visit_xstring_literal(node) + s(:xstr, visit_all(node.parts)) + end + + def visit_yield(node) + case node.arguments + when nil + s(:yield) + when Args + s(:yield, visit_all(node.arguments.parts)) + else + s(:yield, visit_all(node.arguments.contents.parts)) + end + end + + # Visit a ZSuper node. + def visit_zsuper(_node) + s(:zsuper) + end + + private + + def block_children(node) + arguments = (node.block_var ? visit(node.block_var) : s(:args)) + + type = :block + if !node.block_var && (maximum = num_block_type(node.bodystmt)) + type = :numblock + arguments = maximum + end + + [type, arguments] + end + + # We need to find if we should transform this block into a numblock + # since there could be new numbered variables like _1. + def num_block_type(statements) + variables = [] + queue = [statements] + + while (child_node = queue.shift) + if child_node.is_a?(VarRef) && child_node.value.is_a?(Ident) && + child_node.value.value =~ /^_(\d+)$/ + variables << $1.to_i + end + + queue += child_node.child_nodes.compact + end + + variables.max + end + + def s(type, children = [], opts = {}) + ::Parser::AST::Node.new(type, children, opts) + end + + def send_type(operator) + operator.is_a?(Op) && operator.value == "&." ? :csend : :send + end + end + end +end diff --git a/test/ruby_syntax_fixtures_test.rb b/test/ruby_syntax_fixtures_test.rb index 0cf89310..c5c13b27 100644 --- a/test/ruby_syntax_fixtures_test.rb +++ b/test/ruby_syntax_fixtures_test.rb @@ -1,5 +1,9 @@ # frozen_string_literal: true +# The ruby-syntax-fixtures repository tests against the current Ruby syntax, so +# we don't execute this test unless we're running 3.2 or above. +return unless RUBY_VERSION >= "3.2" + require_relative "test_helper" module SyntaxTree diff --git a/test/suites/helper.rb b/test/suites/helper.rb new file mode 100644 index 00000000..b0f8c427 --- /dev/null +++ b/test/suites/helper.rb @@ -0,0 +1,3 @@ +# frozen_string_literal: true + +require "parser/current" diff --git a/test/suites/parse_helper.rb b/test/suites/parse_helper.rb new file mode 100644 index 00000000..685cd6d2 --- /dev/null +++ b/test/suites/parse_helper.rb @@ -0,0 +1,149 @@ +# frozen_string_literal: true + +module ParseHelper + include AST::Sexp + + CURRENT_VERSION = RUBY_VERSION.split(".")[0..1].join(".").freeze + ALL_VERSIONS = %w[1.8 1.9 2.0 2.1 2.2 2.3 2.4 2.5 2.6 2.7 3.0 3.1 3.2 mac ios] + + known_failures = [ + # I think this may be a bug in the parser gem's precedence calculation. + # Unary plus appears to be parsed as part of the number literal in CRuby, + # but parser is parsing it as a separate operator. + "test_unary_num_pow_precedence:3505", + + # Not much to be done about this. Basically, regular expressions with named + # capture groups that use the =~ operator inject local variables into the + # current scope. In the parser gem, it detects this and changes future + # references to that name to be a local variable instead of a potential + # method call. CRuby does not do this. + "test_lvar_injecting_match:3778", + + # This is failing because CRuby is not marking values captured in hash + # patterns as local variables, while the parser gem is. + "test_pattern_matching_hash:8971", + + # This is not actually allowed in the CRuby parser but the parser gem thinks + # it is allowed. + "test_pattern_matching_hash_with_string_keys:9016", + "test_pattern_matching_hash_with_string_keys:9027", + "test_pattern_matching_hash_with_string_keys:9038", + "test_pattern_matching_hash_with_string_keys:9060", + "test_pattern_matching_hash_with_string_keys:9071", + "test_pattern_matching_hash_with_string_keys:9082", + + # This happens with pattern matching where you're matching a literal value + # inside parentheses, which doesn't really do anything. Ripper doesn't + # capture that this value is inside a parentheses, so it's hard to translate + # properly. + "test_pattern_matching_expr_in_paren:9206", + + # These are also failing because of CRuby not marking values captured in + # hash patterns as local variables. + "test_pattern_matching_single_line_allowed_omission_of_parentheses:9205", + "test_pattern_matching_single_line_allowed_omission_of_parentheses:9581", + "test_pattern_matching_single_line_allowed_omission_of_parentheses:9611", + + # I'm not even sure what this is testing, because the code is invalid in + # CRuby. + "test_control_meta_escape_chars_in_regexp__since_31:*", + ] + + # These are failures that we need to take care of (or determine the reason + # that we're not going to handle them). + todo_failures = [ + "test_dedenting_heredoc:334", + "test_dedenting_heredoc:390", + "test_dedenting_heredoc:399", + "test_slash_newline_in_heredocs:7194", + "test_parser_slash_slash_n_escaping_in_literals:*", + "test_cond_match_current_line:4801", + "test_forwarded_restarg:*", + "test_forwarded_kwrestarg:*", + "test_forwarded_argument_with_restarg:*", + "test_forwarded_argument_with_kwrestarg:*" + ] + + if CURRENT_VERSION <= "2.7" + # I'm not sure why this is failing on 2.7.0, but we'll turn it off for now + # until we have more time to investigate. + todo_failures.push("test_pattern_matching_hash:*") + end + + if CURRENT_VERSION <= "3.0" + # In < 3.0, there are some changes to the way the parser gem handles + # forwarded args. We should eventually support this, but for now we're going + # to mark them as todo. + todo_failures.push( + "test_forward_arg:*", + "test_forward_args_legacy:*", + "test_endless_method_forwarded_args_legacy:*", + "test_trailing_forward_arg:*" + ) + end + + if CURRENT_VERSION == "3.1" + # This test actually fails on 3.1.0, even though it's marked as being since + # 3.1. So we're going to skip this test on 3.1, but leave it in for other + # versions. + known_failures.push( + "test_multiple_pattern_matches:11086", + "test_multiple_pattern_matches:11102" + ) + end + + # This is the list of all failures. + FAILURES = (known_failures + todo_failures).freeze + + private + + def assert_context(*) + end + + def assert_diagnoses(*) + end + + def assert_diagnoses_many(*) + end + + def refute_diagnoses(*) + end + + def with_versions(*) + end + + def assert_parses(_ast, code, _source_maps = "", versions = ALL_VERSIONS) + # We're going to skip any examples that aren't for the current version of + # Ruby. + return unless versions.include?(CURRENT_VERSION) + + # We're going to skip any examples that are for older Ruby versions that we + # do not support. + return if (versions & %w[3.1 3.2]).empty? + + caller(1, 3).each do |line| + _, lineno, name = *line.match(/(\d+):in `(.+)'/) + + # Return directly and don't do anything if it's a known failure. + return if FAILURES.include?("#{name}:#{lineno}") + return if FAILURES.include?("#{name}:*") + end + + expected = parse(code) + return if expected.nil? + + actual = SyntaxTree::Translation.to_parser(SyntaxTree.parse(code), code) + assert_equal(expected, actual) + end + + def parse(code) + parser = Parser::CurrentRuby.default_parser + parser.diagnostics.consumer = ->(*) {} + + buffer = Parser::Source::Buffer.new("(string)", 1) + buffer.source = code + + parser.parse(buffer) + rescue Parser::SyntaxError + end +end diff --git a/test/suites/parser b/test/suites/parser new file mode 160000 index 00000000..8de8b7fa --- /dev/null +++ b/test/suites/parser @@ -0,0 +1 @@ +Subproject commit 8de8b7fa7af471a2159860d6a0a5b615eac9c83c From 1155f851226b552e1ca7e435ab134783c997ac81 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Mon, 30 Jan 2023 10:34:35 -0500 Subject: [PATCH 02/58] BasicVisitor::visit_methods --- README.md | 21 ++++++++++++++ lib/syntax_tree/basic_visitor.rb | 49 ++++++++++++++++++++++++++++---- test/visitor_test.rb | 14 +++++++++ 3 files changed, 79 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 3c437947..6ca9b01a 100644 --- a/README.md +++ b/README.md @@ -40,6 +40,7 @@ It is built with only standard library dependencies. It additionally ships with - [construct_keys](#construct_keys) - [Visitor](#visitor) - [visit_method](#visit_method) + - [visit_methods](#visit_methods) - [BasicVisitor](#basicvisitor) - [MutationVisitor](#mutationvisitor) - [WithEnvironment](#withenvironment) @@ -517,6 +518,26 @@ Did you mean? visit_binary from bin/console:8:in `
' ``` +### visit_methods + +Similar to `visit_method`, `visit_methods` also checks that methods defined are valid visit methods. This variation however accepts a block and checks that all methods defined within that block are valid visit methods. It's meant to be used like: + +```ruby +class ArithmeticVisitor < SyntaxTree::Visitor + visit_methods do + def visit_binary(node) + # ... + end + + def visit_int(node) + # ... + end + end +end +``` + +This is only checked when the methods are defined and does not impose any kind of runtime overhead after that. It is very useful for upgrading versions of Syntax Tree in case these methods names change. + ### BasicVisitor When you're defining your own visitor, by default it will walk down the tree even if you don't define `visit_*` methods. This is to ensure you can define a subset of the necessary methods in order to only interact with the nodes you're interested in. If you'd like to change this default to instead raise an error if you visit a node you haven't explicitly handled, you can instead inherit from `BasicVisitor`. diff --git a/lib/syntax_tree/basic_visitor.rb b/lib/syntax_tree/basic_visitor.rb index 34b7876e..bd8ea5f2 100644 --- a/lib/syntax_tree/basic_visitor.rb +++ b/lib/syntax_tree/basic_visitor.rb @@ -29,7 +29,7 @@ def initialize(error) def corrections @corrections ||= DidYouMean::SpellChecker.new( - dictionary: Visitor.visit_methods + dictionary: BasicVisitor.valid_visit_methods ).correct(visit_method) end @@ -40,7 +40,40 @@ def corrections end end + # This module is responsible for checking all of the methods defined within + # a given block to ensure that they are valid visit methods. + class VisitMethodsChecker < Module + Status = Struct.new(:checking) + + # This is the status of the checker. It's used to determine whether or not + # we should be checking the methods that are defined. It is kept as an + # instance variable so that it can be disabled later. + attr_reader :status + + def initialize + # We need the status to be an instance variable so that it can be + # accessed by the disable! method, but also a local variable so that it + # can be captured by the define_method block. + status = @status = Status.new(true) + + define_method(:method_added) do |name| + BasicVisitor.visit_method(name) if status.checking + super(name) + end + end + + def disable! + status.checking = false + end + end + class << self + # This is the list of all of the valid visit methods. + def valid_visit_methods + @valid_visit_methods ||= + Visitor.instance_methods.grep(/^visit_(?!child_nodes)/) + end + # This method is here to help folks write visitors. # # It's not always easy to ensure you're writing the correct method name in @@ -51,15 +84,21 @@ class << self # name. It will raise an error if the visit method you're defining isn't # actually a method on the parent visitor. def visit_method(method_name) - return if visit_methods.include?(method_name) + return if valid_visit_methods.include?(method_name) raise VisitMethodError, method_name end - # This is the list of all of the valid visit methods. + # This method is here to help folks write visitors. + # + # Within the given block, every method that is defined will be checked to + # ensure it's a valid visit method using the BasicVisitor::visit_method + # method defined above. def visit_methods - @visit_methods ||= - Visitor.instance_methods.grep(/^visit_(?!child_nodes)/) + checker = VisitMethodsChecker.new + extend(checker) + yield + checker.disable! end end diff --git a/test/visitor_test.rb b/test/visitor_test.rb index 74f3df75..86ff1b01 100644 --- a/test/visitor_test.rb +++ b/test/visitor_test.rb @@ -53,5 +53,19 @@ def test_visit_method_correction assert_match(/visit_binary/, message) end end + + class VisitMethodsTestVisitor < BasicVisitor + end + + def test_visit_methods + VisitMethodsTestVisitor.visit_methods do + assert_raises(BasicVisitor::VisitMethodError) do + # In reality, this would be a method defined using the def keyword, + # but we're using method_added here to trigger the checker so that we + # aren't defining methods dynamically in the test suite. + VisitMethodsTestVisitor.method_added(:visit_foo) + end + end + end end end From db2979f87f1841719ff0cdd33e324d8a53631986 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Mon, 30 Jan 2023 15:09:26 -0500 Subject: [PATCH 03/58] Additionally provide parser gem location information --- bin/compare | 59 + lib/syntax_tree/translation.rb | 5 +- lib/syntax_tree/translation/parser.rb | 2092 ++++++++++++++++++++----- test/suites/parse_helper.rb | 28 +- 4 files changed, 1823 insertions(+), 361 deletions(-) create mode 100755 bin/compare diff --git a/bin/compare b/bin/compare new file mode 100755 index 00000000..bdca5a9a --- /dev/null +++ b/bin/compare @@ -0,0 +1,59 @@ +#!/usr/bin/env ruby +# frozen_string_literal: true + +require "bundler/setup" +require "parser/current" + +$:.unshift(File.expand_path("../lib", __dir__)) +require "syntax_tree" + +# First, opt in to every AST feature. +# Parser::Builders::Default.modernize + +# Modify the source map == check so that it doesn't check against the node +# itself so we don't get into a recursive loop. +Parser::Source::Map.prepend( + Module.new { + def ==(other) + self.class == other.class && + (instance_variables - %i[@node]).map do |ivar| + instance_variable_get(ivar) == other.instance_variable_get(ivar) + end.reduce(:&) + end + } +) + +# Next, ensure that we're comparing the nodes and also comparing the source +# ranges so that we're getting all of the necessary information. +Parser::AST::Node.prepend( + Module.new { + def ==(other) + super && (location == other.location) + end + } +) + +source = ARGF.read + +parser = Parser::CurrentRuby.new +parser.diagnostics.all_errors_are_fatal = true + +buffer = Parser::Source::Buffer.new("(string)", 1) +buffer.source = source.dup.force_encoding(parser.default_encoding) + +stree = SyntaxTree::Translation.to_parser(SyntaxTree.parse(source), buffer) +ptree = parser.parse(buffer) + +if stree == ptree + puts "Syntax trees are equivalent." +else + warn "Syntax trees are different." + + warn "syntax_tree:" + pp stree + + warn "parser:" + pp ptree + + binding.irb +end diff --git a/lib/syntax_tree/translation.rb b/lib/syntax_tree/translation.rb index 37785ea2..d3f2e56f 100644 --- a/lib/syntax_tree/translation.rb +++ b/lib/syntax_tree/translation.rb @@ -7,13 +7,10 @@ module Translation # This method translates the given node into the representation defined by # the whitequark/parser gem. We don't explicitly list it as a dependency # because it's not required for the core functionality of Syntax Tree. - def self.to_parser(node, source) + def self.to_parser(node, buffer) require "parser" require_relative "translation/parser" - buffer = ::Parser::Source::Buffer.new("(string)") - buffer.source = source - node.accept(Parser.new(buffer)) end end diff --git a/lib/syntax_tree/translation/parser.rb b/lib/syntax_tree/translation/parser.rb index 3443df37..8a61ad94 100644 --- a/lib/syntax_tree/translation/parser.rb +++ b/lib/syntax_tree/translation/parser.rb @@ -2,6 +2,8 @@ module SyntaxTree module Translation + # This visitor is responsible for converting the syntax tree produced by + # Syntax Tree into the syntax tree produced by the whitequark/parser gem. class Parser < BasicVisitor attr_reader :buffer, :stack @@ -22,24 +24,81 @@ def visit(node) # Visit an AliasNode node. def visit_alias(node) - s(:alias, [visit(node.left), visit(node.right)]) + s( + :alias, + [visit(node.left), visit(node.right)], + source_map_keyword( + keyword: source_range_length(node.location.start_char, 5), + expression: source_range_node(node) + ) + ) end # Visit an ARefNode. def visit_aref(node) if ::Parser::Builders::Default.emit_index if node.index.nil? - s(:index, [visit(node.collection)]) + s( + :index, + [visit(node.collection)], + source_map_index( + begin_token: + source_range_find( + node.collection.location.end_char, + node.location.end_char, + "[" + ), + end_token: source_range_length(node.location.end_char, -1), + expression: source_range_node(node) + ) + ) else - s(:index, [visit(node.collection), *visit_all(node.index.parts)]) + s( + :index, + [visit(node.collection)].concat(visit_all(node.index.parts)), + source_map_index( + begin_token: + source_range_find( + node.collection.location.end_char, + node.index.location.start_char, + "[" + ), + end_token: source_range_length(node.location.end_char, -1), + expression: source_range_node(node) + ) + ) end else if node.index.nil? - s(:send, [visit(node.collection), :[], nil]) + s( + :send, + [visit(node.collection), :[]], + source_map_send( + selector: + source_range_find( + node.collection.location.end_char, + node.location.end_char, + "[]" + ), + expression: source_range_node(node) + ) + ) else s( :send, - [visit(node.collection), :[], *visit_all(node.index.parts)] + [visit(node.collection), :[], *visit_all(node.index.parts)], + source_map_send( + selector: + source_range( + source_range_find( + node.collection.location.end_char, + node.index.location.start_char, + "[" + ).begin_pos, + node.location.end_char + ), + expression: source_range_node(node) + ) ) end end @@ -49,20 +108,69 @@ def visit_aref(node) def visit_aref_field(node) if ::Parser::Builders::Default.emit_index if node.index.nil? - s(:indexasgn, [visit(node.collection), nil]) + s( + :indexasgn, + [visit(node.collection)], + source_map_index( + begin_token: + source_range_find( + node.collection.location.end_char, + node.location.end_char, + "[" + ), + end_token: source_range_length(node.location.end_char, -1), + expression: source_range_node(node) + ) + ) else s( :indexasgn, - [visit(node.collection), *visit_all(node.index.parts)] + [visit(node.collection)].concat(visit_all(node.index.parts)), + source_map_index( + begin_token: + source_range_find( + node.collection.location.end_char, + node.index.location.start_char, + "[" + ), + end_token: source_range_length(node.location.end_char, -1), + expression: source_range_node(node) + ) ) end else if node.index.nil? - s(:send, [visit(node.collection), :[]=, nil]) + s( + :send, + [visit(node.collection), :[]=], + source_map_send( + selector: + source_range_find( + node.collection.location.end_char, + node.location.end_char, + "[]" + ), + expression: source_range_node(node) + ) + ) else s( :send, - [visit(node.collection), :[]=, *visit_all(node.index.parts)] + [visit(node.collection), :[]=].concat( + visit_all(node.index.parts) + ), + source_map_send( + selector: + source_range( + source_range_find( + node.collection.location.end_char, + node.index.location.start_char, + "[" + ).begin_pos, + node.location.end_char + ), + expression: source_range_node(node) + ) ) end end @@ -70,7 +178,14 @@ def visit_aref_field(node) # Visit an ArgBlock node. def visit_arg_block(node) - s(:block_pass, [visit(node.value)]) + s( + :block_pass, + [visit(node.value)], + source_map_operator( + operator: source_range_length(node.location.start_char, 1), + expression: source_range_node(node) + ) + ) end # Visit an ArgStar node. @@ -78,29 +193,44 @@ def visit_arg_star(node) if stack[-3].is_a?(MLHSParen) && stack[-3].contents.is_a?(MLHS) case node.value when nil - s(:restarg) + s(:restarg, [], nil) when Ident - s(:restarg, [node.value.value.to_sym]) + s(:restarg, [node.value.value.to_sym], nil) else - s(:restarg, [node.value.value.value.to_sym]) + s(:restarg, [node.value.value.value.to_sym], nil) end else - node.value.nil? ? s(:splat) : s(:splat, [visit(node.value)]) + s( + :splat, + node.value.nil? ? [] : [visit(node.value)], + source_map_operator( + operator: source_range_length(node.location.start_char, 1), + expression: source_range_node(node) + ) + ) end end # Visit an ArgsForward node. def visit_args_forward(_node) - s(:forwarded_args) + s(:forwarded_args, [], nil) end # Visit an ArrayLiteral node. def visit_array(node) - if node.contents.nil? - s(:array) - else - s(:array, visit_all(node.contents.parts)) - end + s( + :array, + node.contents ? visit_all(node.contents.parts) : [], + if node.lbracket.nil? + source_map_collection(expression: source_range_node(node)) + else + source_map_collection( + begin_token: source_range_node(node.lbracket), + end_token: source_range_length(node.location.end_char, -1), + expression: source_range_node(node) + ) + end + ) end # Visit an AryPtn node. @@ -110,82 +240,142 @@ def visit_aryptn(node) if node.rest.is_a?(VarField) if !node.rest.value.nil? - children << s(:match_rest, [visit(node.rest)]) + children << s(:match_rest, [visit(node.rest)], nil) elsif node.posts.empty? && node.rest.location.start_char == node.rest.location.end_char # Here we have an implicit rest, as in [foo,]. parser has a specific # type for these patterns. type = :array_pattern_with_tail else - children << s(:match_rest) + children << s(:match_rest, [], nil) end end - inner = s(type, children + visit_all(node.posts)) - node.constant ? s(:const_pattern, [visit(node.constant), inner]) : inner + inner = s(type, children + visit_all(node.posts), nil) + if node.constant + s(:const_pattern, [visit(node.constant), inner], nil) + else + inner + end end # Visit an Assign node. def visit_assign(node) target = visit(node.target) - s(target.type, target.children + [visit(node.value)]) + location = + target + .location + .with_operator( + source_range_find( + node.target.location.end_char, + node.value.location.start_char, + "=" + ) + ) + .with_expression(source_range_node(node)) + + s(target.type, target.children + [visit(node.value)], location) end # Visit an Assoc node. def visit_assoc(node) if node.value.nil? type = node.key.value.start_with?(/[A-Z]/) ? :const : :send + s( :pair, - [visit(node.key), s(type, [nil, node.key.value.chomp(":").to_sym])] + [ + visit(node.key), + s(type, [nil, node.key.value.chomp(":").to_sym], nil) + ], + nil ) else - s(:pair, [visit(node.key), visit(node.value)]) + s( + :pair, + [visit(node.key), visit(node.value)], + source_map_operator( + operator: source_range_length(node.key.location.end_char, -1), + expression: source_range_node(node) + ) + ) end end # Visit an AssocSplat node. def visit_assoc_splat(node) - s(:kwsplat, [visit(node.value)]) + s( + :kwsplat, + [visit(node.value)], + source_map_operator( + operator: source_range_length(node.location.start_char, 2), + expression: source_range_node(node) + ) + ) end # Visit a Backref node. def visit_backref(node) + location = source_map(expression: source_range_node(node)) + if node.value.match?(/^\$\d+$/) - s(:nth_ref, [node.value[1..].to_i]) + s(:nth_ref, [node.value[1..].to_i], location) else - s(:back_ref, [node.value.to_sym]) + s(:back_ref, [node.value.to_sym], location) end end # Visit a BareAssocHash node. def visit_bare_assoc_hash(node) - type = + s( if ::Parser::Builders::Default.emit_kwargs && !stack[-2].is_a?(ArrayLiteral) :kwargs else :hash - end - - s(type, visit_all(node.assocs)) + end, + visit_all(node.assocs), + source_map_collection(expression: source_range_node(node)) + ) end # Visit a BEGINBlock node. def visit_BEGIN(node) - s(:preexe, [visit(node.statements)]) + s( + :preexe, + [visit(node.statements)], + source_map_keyword( + keyword: source_range_length(node.location.start_char, 5), + begin_token: + source_range_find( + node.location.start_char + 5, + node.statements.location.start_char, + "{" + ), + end_token: source_range_length(node.location.end_char, -1), + expression: source_range_node(node) + ) + ) end # Visit a Begin node. def visit_begin(node) + location = + source_map_collection( + begin_token: source_range_length(node.location.start_char, 5), + end_token: source_range_length(node.location.end_char, -3), + expression: source_range_node(node) + ) + if node.bodystmt.empty? - s(:kwbegin) + s(:kwbegin, [], location) elsif node.bodystmt.rescue_clause.nil? && node.bodystmt.ensure_clause.nil? && node.bodystmt.else_clause.nil? - visited = visit(node.bodystmt.statements) - s(:kwbegin, visited.type == :begin ? visited.children : [visited]) + child = visit(node.bodystmt.statements) + + s(:kwbegin, child.type == :begin ? child.children : [child], location) else - s(:kwbegin, [visit(node.bodystmt)]) + s(:kwbegin, [visit(node.bodystmt)], location) end end @@ -194,45 +384,80 @@ def visit_binary(node) case node.operator when :| current = -2 - current -= 1 while stack[current].is_a?(Binary) && - stack[current].operator == :| + while stack[current].is_a?(Binary) && stack[current].operator == :| + current -= 1 + end if stack[current].is_a?(In) - s(:match_alt, [visit(node.left), visit(node.right)]) + s(:match_alt, [visit(node.left), visit(node.right)], nil) else - s(:send, [visit(node.left), node.operator, visit(node.right)]) + visit(canonical_binary(node)) end - when :"=>" - s(:match_as, [visit(node.left), visit(node.right)]) - when :"&&", :and - s(:and, [visit(node.left), visit(node.right)]) - when :"||", :or - s(:or, [visit(node.left), visit(node.right)]) + when :"=>", :"&&", :and, :"||", :or + s( + { "=>": :match_as, "&&": :and, "||": :or }.fetch( + node.operator, + node.operator + ), + [visit(node.left), visit(node.right)], + source_map_operator( + operator: + source_range_find( + node.left.location.end_char, + node.right.location.start_char, + node.operator.to_s + ), + expression: source_range_node(node) + ) + ) when :=~ if node.left.is_a?(RegexpLiteral) && node.left.parts.length == 1 && node.left.parts.first.is_a?(TStringContent) - s(:match_with_lvasgn, [visit(node.left), visit(node.right)]) + s( + :match_with_lvasgn, + [visit(node.left), visit(node.right)], + source_map_operator( + operator: + source_range_find( + node.left.location.end_char, + node.right.location.start_char, + node.operator.to_s + ), + expression: source_range_node(node) + ) + ) else - s(:send, [visit(node.left), node.operator, visit(node.right)]) + visit(canonical_binary(node)) end else - s(:send, [visit(node.left), node.operator, visit(node.right)]) + visit(canonical_binary(node)) end end # Visit a BlockArg node. def visit_blockarg(node) if node.name.nil? - s(:blockarg, [nil]) + s( + :blockarg, + [nil], + source_map_variable(expression: source_range_node(node)) + ) else - s(:blockarg, [node.name.value.to_sym]) + s( + :blockarg, + [node.name.value.to_sym], + source_map_variable( + name: source_range_node(node.name), + expression: source_range_node(node) + ) + ) end end # Visit a BlockVar node. def visit_block_var(node) shadowargs = - node.locals.map { |local| s(:shadowarg, [local.value.to_sym]) } + node.locals.map { |local| s(:shadowarg, [local.value.to_sym], nil) } # There is a special node type in the parser gem for when a single # required parameter to a block would potentially be expanded @@ -249,16 +474,16 @@ def visit_block_var(node) procarg0 = if ::Parser::Builders::Default.emit_arg_inside_procarg0 && required.is_a?(Ident) - s(:procarg0, [s(:arg, [required.value.to_sym])]) + s(:procarg0, [s(:arg, [required.value.to_sym], nil)], nil) else - s(:procarg0, visit(required).children) + s(:procarg0, visit(required).children, nil) end - return s(:args, [procarg0] + shadowargs) + return s(:args, [procarg0] + shadowargs, nil) end end - s(:args, visit(node.params).children + shadowargs) + s(:args, visit(node.params).children + shadowargs, nil) end # Visit a BodyStmt node. @@ -273,11 +498,11 @@ def visit_bodystmt(node) children << visit(node.else_clause) end - inner = s(:rescue, children) + inner = s(:rescue, children, nil) end if node.ensure_clause - inner = s(:ensure, [inner] + visit(node.ensure_clause).children) + inner = s(:ensure, [inner] + visit(node.ensure_clause).children, nil) end inner @@ -285,48 +510,21 @@ def visit_bodystmt(node) # Visit a Break node. def visit_break(node) - s(:break, visit_all(node.arguments.parts)) + s(:break, visit_all(node.arguments.parts), nil) end # Visit a CallNode node. def visit_call(node) - if node.receiver.nil? - children = [nil, node.message.value.to_sym] - - if node.arguments.is_a?(ArgParen) - case node.arguments.arguments - when nil - # skip - when ArgsForward - children << s(:forwarded_args) - else - children += visit_all(node.arguments.arguments.parts) - end - end - - s(:send, children) - elsif node.message == :call - children = [visit(node.receiver), :call] - - unless node.arguments.arguments.nil? - children += visit_all(node.arguments.arguments.parts) - end - - s(send_type(node.operator), children) - else - children = [visit(node.receiver), node.message.value.to_sym] - - case node.arguments - when Args - children += visit_all(node.arguments.parts) - when ArgParen - unless node.arguments.arguments.nil? - children += visit_all(node.arguments.arguments.parts) - end - end - - s(send_type(node.operator), children) - end + visit_command_call( + CommandCall.new( + receiver: node.receiver, + operator: node.operator, + message: node.message, + arguments: node.arguments, + block: nil, + location: node.location + ) + ) end # Visit a Case node. @@ -336,55 +534,157 @@ def visit_case(node) clauses << clauses.last.consequent end - type = node.consequent.is_a?(In) ? :case_match : :case - s(type, [visit(node.value)] + clauses.map { |clause| visit(clause) }) + else_token = + if clauses.last.is_a?(Else) + source_range_length(clauses.last.location.start_char, 4) + end + + s( + node.consequent.is_a?(In) ? :case_match : :case, + [visit(node.value)] + clauses.map { |clause| visit(clause) }, + source_map_condition( + keyword: source_range_length(node.location.start_char, 4), + else_token: else_token, + end_token: source_range_length(node.location.end_char, -3), + expression: source_range_node(node) + ) + ) end # Visit a CHAR node. def visit_CHAR(node) - s(:str, [node.value[1..]]) + s( + :str, + [node.value[1..]], + source_map_collection( + begin_token: source_range_length(node.location.start_char, 1), + expression: source_range_node(node) + ) + ) end # Visit a ClassDeclaration node. def visit_class(node) + operator = + if node.superclass + source_range_find( + node.constant.location.end_char, + node.superclass.location.start_char, + "<" + ) + end + s( :class, - [visit(node.constant), visit(node.superclass), visit(node.bodystmt)] + [visit(node.constant), visit(node.superclass), visit(node.bodystmt)], + source_map_definition( + keyword: source_range_length(node.location.start_char, 5), + operator: operator, + name: source_range_node(node.constant), + end_token: source_range_length(node.location.end_char, -3) + ).with_expression(source_range_node(node)) ) end # Visit a Command node. def visit_command(node) - call = - s( - :send, - [nil, node.message.value.to_sym, *visit_all(node.arguments.parts)] + visit_command_call( + CommandCall.new( + receiver: nil, + operator: nil, + message: node.message, + arguments: node.arguments, + block: node.block, + location: node.location ) - - if node.block - type, arguments = block_children(node.block) - s(type, [call, arguments, visit(node.block.bodystmt)]) - else - call - end + ) end # Visit a CommandCall node. def visit_command_call(node) - children = [visit(node.receiver), node.message.value.to_sym] + children = [ + visit(node.receiver), + node.message == :call ? :call : node.message.value.to_sym + ] + begin_token = nil + end_token = nil case node.arguments when Args children += visit_all(node.arguments.parts) when ArgParen - children += visit_all(node.arguments.arguments.parts) + case node.arguments.arguments + when nil + # skip + when ArgsForward + children << visit(node.arguments.arguments) + else + children += visit_all(node.arguments.arguments.parts) + end + + begin_token = + source_range_length(node.arguments.location.start_char, 1) + end_token = source_range_length(node.arguments.location.end_char, -1) end - call = s(send_type(node.operator), children) + dot_bound = + if node.arguments + node.arguments.location.start_char + elsif node.block + node.block.location.start_char + else + node.location.end_char + end + + call = + s( + if node.operator.is_a?(Op) && node.operator.value == "&." + :csend + else + :send + end, + children, + source_map_send( + dot: + if node.operator == :"::" + source_range_find( + node.receiver.location.end_char, + ( + if node.message == :call + dot_bound + else + node.message.location.start_char + end + ), + "::" + ) + elsif node.operator + source_range_node(node.operator) + end, + begin_token: begin_token, + end_token: end_token, + selector: + node.message == :call ? nil : source_range_node(node.message), + expression: source_range_node(node) + ) + ) if node.block type, arguments = block_children(node.block) - s(type, [call, arguments, visit(node.block.bodystmt)]) + + s( + type, + [call, arguments, visit(node.block.bodystmt)], + source_map_collection( + begin_token: source_range_node(node.block.opening), + end_token: + source_range_length( + node.location.end_char, + node.block.opening.is_a?(Kw) ? -3 : -1 + ), + expression: source_range_node(node) + ) + ) else call end @@ -392,32 +692,79 @@ def visit_command_call(node) # Visit a Const node. def visit_const(node) - s(:const, [nil, node.value.to_sym]) + s( + :const, + [nil, node.value.to_sym], + source_map_constant( + name: source_range_node(node), + expression: source_range_node(node) + ) + ) end # Visit a ConstPathField node. def visit_const_path_field(node) if node.parent.is_a?(VarRef) && node.parent.value.is_a?(Kw) && node.parent.value.value == "self" && node.constant.is_a?(Ident) - s(:send, [visit(node.parent), :"#{node.constant.value}="]) + s(:send, [visit(node.parent), :"#{node.constant.value}="], nil) else - s(:casgn, [visit(node.parent), node.constant.value.to_sym]) + s( + :casgn, + [visit(node.parent), node.constant.value.to_sym], + source_map_constant( + double_colon: + source_range_find( + node.parent.location.end_char, + node.constant.location.start_char, + "::" + ), + name: source_range_node(node.constant), + expression: source_range_node(node) + ) + ) end end # Visit a ConstPathRef node. def visit_const_path_ref(node) - s(:const, [visit(node.parent), node.constant.value.to_sym]) + s( + :const, + [visit(node.parent), node.constant.value.to_sym], + source_map_constant( + double_colon: + source_range_find( + node.parent.location.end_char, + node.constant.location.start_char, + "::" + ), + name: source_range_node(node.constant), + expression: source_range_node(node) + ) + ) end # Visit a ConstRef node. def visit_const_ref(node) - s(:const, [nil, node.constant.value.to_sym]) + s( + :const, + [nil, node.constant.value.to_sym], + source_map_constant( + name: source_range_node(node.constant), + expression: source_range_node(node) + ) + ) end # Visit a CVar node. def visit_cvar(node) - s(:cvar, [node.value.to_sym]) + s( + :cvar, + [node.value.to_sym], + source_map_variable( + name: source_range_node(node), + expression: source_range_node(node) + ) + ) end # Visit a DefNode node. @@ -426,39 +773,110 @@ def visit_def(node) args = case node.params when Params - visit(node.params) + child = visit(node.params) + + s( + child.type, + child.children, + source_map_collection(expression: nil) + ) when Paren - visit(node.params.contents) + child = visit(node.params.contents) + + s( + child.type, + child.children, + source_map_collection( + begin_token: + source_range_length(node.params.location.start_char, 1), + end_token: + source_range_length(node.params.location.end_char, -1), + expression: source_range_node(node.params) + ) + ) else - s(:args) + s(:args, [], source_map_collection(expression: nil)) end if node.target target = node.target.is_a?(Paren) ? node.target.contents : node.target - s(:defs, [visit(target), name, args, visit(node.bodystmt)]) + + s( + :defs, + [visit(target), name, args, visit(node.bodystmt)], + source_map_method_definition( + keyword: source_range_length(node.location.start_char, 3), + operator: source_range_node(node.operator), + name: source_range_node(node.name), + end_token: source_range_length(node.location.end_char, -3), + expression: source_range_node(node) + ) + ) else - s(:def, [name, args, visit(node.bodystmt)]) + s( + :def, + [name, args, visit(node.bodystmt)], + source_map_method_definition( + keyword: source_range_length(node.location.start_char, 3), + name: source_range_node(node.name), + end_token: source_range_length(node.location.end_char, -3), + expression: source_range_node(node) + ) + ) end end # Visit a Defined node. def visit_defined(node) - s(:defined?, [visit(node.value)]) + paren_range = (node.location.start_char + 8)...node.location.end_char + begin_token, end_token = + if buffer.source[paren_range].include?("(") + [ + source_range_find(paren_range.begin, paren_range.end, "("), + source_range_length(node.location.end_char, -1) + ] + end + + s( + :defined?, + [visit(node.value)], + source_map_keyword( + keyword: source_range_length(node.location.start_char, 8), + begin_token: begin_token, + end_token: end_token, + expression: source_range_node(node) + ) + ) end # Visit a DynaSymbol node. def visit_dyna_symbol(node) + location = + if node.quote + source_map_collection( + begin_token: + source_range_length( + node.location.start_char, + node.quote.length + ), + end_token: source_range_length(node.location.end_char, -1), + expression: source_range_node(node) + ) + else + source_map_collection(expression: source_range_node(node)) + end + if node.parts.length == 1 && node.parts.first.is_a?(TStringContent) - s(:sym, ["\"#{node.parts.first.value}\"".undump.to_sym]) + s(:sym, ["\"#{node.parts.first.value}\"".undump.to_sym], location) else - s(:dsym, visit_all(node.parts)) + s(:dsym, visit_all(node.parts), location) end end # Visit an Else node. def visit_else(node) if node.statements.empty? && stack[-2].is_a?(Case) - s(:empty_else) + s(:empty_else, [], nil) else visit(node.statements) end @@ -466,54 +884,108 @@ def visit_else(node) # Visit an Elsif node. def visit_elsif(node) + else_token = + case node.consequent + when Elsif + source_range_length(node.consequent.location.start_char, 5) + when Else + source_range_length(node.consequent.location.start_char, 4) + end + + expression = + source_range( + node.location.start_char, + node.statements.location.end_char - 1 + ) + s( :if, [ visit(node.predicate), visit(node.statements), visit(node.consequent) - ] + ], + source_map_condition( + keyword: source_range_length(node.location.start_char, 5), + else_token: else_token, + expression: expression + ) ) end # Visit an ENDBlock node. def visit_END(node) - s(:postexe, [visit(node.statements)]) + s( + :postexe, + [visit(node.statements)], + source_map_keyword( + keyword: source_range_length(node.location.start_char, 3), + begin_token: + source_range_find( + node.location.start_char + 3, + node.statements.location.start_char, + "{" + ), + end_token: source_range_length(node.location.end_char, -1), + expression: source_range_node(node) + ) + ) end # Visit an Ensure node. def visit_ensure(node) - s(:ensure, [visit(node.statements)]) + s(:ensure, [visit(node.statements)], nil) end # Visit a Field node. def visit_field(node) - case stack[-2] - when Assign, MLHS - s( - send_type(node.operator), - [visit(node.parent), :"#{node.name.value}="] - ) - else - s( - send_type(node.operator), - [visit(node.parent), node.name.value.to_sym] + message = + case stack[-2] + when Assign, MLHS + Ident.new( + value: :"#{node.name.value}=", + location: node.name.location + ) + else + node.name + end + + visit_command_call( + CommandCall.new( + receiver: node.parent, + operator: node.operator, + message: message, + arguments: nil, + block: nil, + location: node.location ) - end + ) end # Visit a FloatLiteral node. def visit_float(node) - s(:float, [node.value.to_f]) + operator = + if %w[+ -].include?(buffer.source[node.location.start_char]) + source_range_length(node.location.start_char, 1) + end + + s( + :float, + [node.value.to_f], + source_map_operator( + operator: operator, + expression: source_range_node(node) + ) + ) end # Visit a FndPtn node. def visit_fndptn(node) make_match_rest = ->(child) do if child.is_a?(VarField) && child.value.nil? - s(:match_rest, []) + s(:match_rest, [], nil) else - s(:match_rest, [visit(child)]) + s(:match_rest, [visit(child)], nil) end end @@ -524,27 +996,49 @@ def visit_fndptn(node) make_match_rest[node.left], *visit_all(node.values), make_match_rest[node.right] - ] + ], + nil ) - node.constant ? s(:const_pattern, [visit(node.constant), inner]) : inner + + if node.constant + s(:const_pattern, [visit(node.constant), inner], nil) + else + inner + end end # Visit a For node. def visit_for(node) s( :for, - [visit(node.index), visit(node.collection), visit(node.statements)] + [visit(node.index), visit(node.collection), visit(node.statements)], + nil ) end # Visit a GVar node. def visit_gvar(node) - s(:gvar, [node.value.to_sym]) + s( + :gvar, + [node.value.to_sym], + source_map_variable( + name: source_range_node(node), + expression: source_range_node(node) + ) + ) end # Visit a HashLiteral node. def visit_hash(node) - s(:hash, visit_all(node.assocs)) + s( + :hash, + visit_all(node.assocs), + source_map_collection( + begin_token: source_range_length(node.location.start_char, 1), + end_token: source_range_length(node.location.end_char, -1), + expression: source_range_node(node) + ) + ) end # Heredocs are represented _very_ differently in the parser gem from how @@ -626,7 +1120,7 @@ def visit_heredoc(node) part .value .split("\n") - .each { |line| heredoc_segments << s(:str, ["#{line}\n"]) } + .each { |line| heredoc_segments << s(:str, ["#{line}\n"], nil) } else heredoc_segments << visit(part) end @@ -635,11 +1129,11 @@ def visit_heredoc(node) heredoc_segments.trim! if node.beginning.value.match?(/`\w+`\z/) - s(:xstr, heredoc_segments.segments) + s(:xstr, heredoc_segments.segments, nil) elsif heredoc_segments.segments.length > 1 - s(:dstr, heredoc_segments.segments) + s(:dstr, heredoc_segments.segments, nil) elsif heredoc_segments.segments.empty? - s(:dstr) + s(:dstr, [], nil) else heredoc_segments.segments.first end @@ -649,34 +1143,45 @@ def visit_heredoc(node) def visit_hshptn(node) children = node.keywords.map do |(keyword, value)| - next s(:pair, [visit(keyword), visit(value)]) if value + next s(:pair, [visit(keyword), visit(value)], nil) if value case keyword when Label - s(:match_var, [keyword.value.chomp(":").to_sym]) + s(:match_var, [keyword.value.chomp(":").to_sym], nil) when StringContent raise if keyword.parts.length > 1 - s(:match_var, [keyword.parts.first.value.to_sym]) + s(:match_var, [keyword.parts.first.value.to_sym], nil) end end if node.keyword_rest.is_a?(VarField) children << if node.keyword_rest.value.nil? - s(:match_rest) + s(:match_rest, [], nil) elsif node.keyword_rest.value == :nil - s(:match_nil_pattern) + s(:match_nil_pattern, [], nil) else - s(:match_rest, [visit(node.keyword_rest)]) + s(:match_rest, [visit(node.keyword_rest)], nil) end end - inner = s(:hash_pattern, children) - node.constant ? s(:const_pattern, [visit(node.constant), inner]) : inner + inner = s(:hash_pattern, children, nil) + if node.constant + s(:const_pattern, [visit(node.constant), inner], nil) + else + inner + end end # Visit an Ident node. def visit_ident(node) - s(:lvar, [node.value.to_sym]) + s( + :lvar, + [node.value.to_sym], + source_map_variable( + name: source_range_node(node), + expression: source_range_node(node) + ) + ) end # Visit an IfNode node. @@ -686,15 +1191,16 @@ def visit_if(node) when RangeNode type = node.predicate.operator.value == ".." ? :iflipflop : :eflipflop - s(type, visit(node.predicate).children) + s(type, visit(node.predicate).children, nil) when RegexpLiteral - s(:match_current_line, [visit(node.predicate)]) + s(:match_current_line, [visit(node.predicate)], nil) when Unary if node.predicate.operator.value == "!" && node.predicate.statement.is_a?(RegexpLiteral) s( :send, - [s(:match_current_line, [visit(node.predicate.statement)]), :!] + [s(:match_current_line, [visit(node.predicate.statement)]), :!], + nil ) else visit(node.predicate) @@ -703,20 +1209,59 @@ def visit_if(node) visit(node.predicate) end - s(:if, [predicate, visit(node.statements), visit(node.consequent)]) + s( + :if, + [predicate, visit(node.statements), visit(node.consequent)], + if node.modifier? + source_map_keyword( + keyword: + source_range_find( + node.statements.location.end_char, + node.predicate.location.start_char, + "if" + ), + expression: source_range_node(node) + ) + else + else_token = + case node.consequent + when Elsif + source_range_length(node.consequent.location.start_char, 5) + when Else + source_range_length(node.consequent.location.start_char, 4) + end + + source_map_condition( + keyword: source_range_length(node.location.start_char, 2), + else_token: else_token, + end_token: source_range_length(node.location.end_char, -3), + expression: source_range_node(node) + ) + end + ) end # Visit an IfOp node. def visit_if_op(node) - s(:if, [visit(node.predicate), visit(node.truthy), visit(node.falsy)]) + s( + :if, + [visit(node.predicate), visit(node.truthy), visit(node.falsy)], + nil + ) end # Visit an Imaginary node. def visit_imaginary(node) - # We have to do an eval here in order to get the value in case it's - # something like 42ri. to_c will not give the right value in that case. - # Maybe there's an API for this but I can't find it. - s(:complex, [eval(node.value)]) + s( + :complex, + [ + # We have to do an eval here in order to get the value in case it's + # something like 42ri. to_c will not give the right value in that + # case. Maybe there's an API for this but I can't find it. + eval(node.value) + ], + source_map_operator(expression: source_range_node(node)) + ) end # Visit an In node. @@ -727,62 +1272,111 @@ def visit_in(node) :in_pattern, [ visit(node.pattern.statements), - s(:if_guard, [visit(node.pattern.predicate)]), + s(:if_guard, [visit(node.pattern.predicate)], nil), visit(node.statements) - ] + ], + nil ) when UnlessNode s( :in_pattern, [ visit(node.pattern.statements), - s(:unless_guard, [visit(node.pattern.predicate)]), + s(:unless_guard, [visit(node.pattern.predicate)], nil), visit(node.statements) - ] + ], + nil ) else - s(:in_pattern, [visit(node.pattern), nil, visit(node.statements)]) + s( + :in_pattern, + [visit(node.pattern), nil, visit(node.statements)], + nil + ) end end # Visit an Int node. def visit_int(node) - s(:int, [node.value.to_i]) + operator = + if %w[+ -].include?(buffer.source[node.location.start_char]) + source_range_length(node.location.start_char, 1) + end + + s( + :int, + [node.value.to_i], + source_map_operator( + operator: operator, + expression: source_range_node(node) + ) + ) end # Visit an IVar node. def visit_ivar(node) - s(:ivar, [node.value.to_sym]) + s( + :ivar, + [node.value.to_sym], + source_map_variable( + name: source_range_node(node), + expression: source_range_node(node) + ) + ) end # Visit a Kw node. def visit_kw(node) + location = source_map(expression: source_range_node(node)) + case node.value when "__FILE__" - s(:str, [buffer.name]) + s(:str, [buffer.name], location) when "__LINE__" - s(:int, [node.location.start_line + buffer.first_line - 1]) + s(:int, [node.location.start_line + buffer.first_line - 1], location) when "__ENCODING__" if ::Parser::Builders::Default.emit_encoding - s(:__ENCODING__) + s(:__ENCODING__, [], location) else - s(:const, [s(:const, [nil, :Encoding]), :UTF_8]) + s(:const, [s(:const, [nil, :Encoding], nil), :UTF_8], location) end else - s(node.value.to_sym) + s(node.value.to_sym, [], location) end end # Visit a KwRestParam node. def visit_kwrest_param(node) - node.name.nil? ? s(:kwrestarg) : s(:kwrestarg, [node.name.value.to_sym]) + if node.name.nil? + s( + :kwrestarg, + [], + source_map_variable(expression: source_range_node(node)) + ) + else + s( + :kwrestarg, + [node.name.value.to_sym], + source_map_variable( + name: source_range_node(node.name), + expression: source_range_node(node) + ) + ) + end end # Visit a Label node. def visit_label(node) - s(:sym, [node.value.chomp(":").to_sym]) - end - + s( + :sym, + [node.value.chomp(":").to_sym], + source_map_collection( + expression: + source_range(node.location.start_char, node.location.end_char - 1) + ) + ) + end + # Visit a Lambda node. def visit_lambda(node) args = node.params.is_a?(LambdaVar) ? node.params : node.params.contents @@ -790,9 +1384,9 @@ def visit_lambda(node) arguments = visit(args) child = if ::Parser::Builders::Default.emit_lambda - s(:lambda) + s(:lambda, [], nil) else - s(:send, [nil, :lambda]) + s(:send, [nil, :lambda], nil) end type = :block @@ -801,20 +1395,32 @@ def visit_lambda(node) arguments = maximum end - s(type, [child, arguments, visit(node.statements)]) + s(type, [child, arguments, visit(node.statements)], nil) end # Visit a LambdaVar node. def visit_lambda_var(node) shadowargs = - node.locals.map { |local| s(:shadowarg, [local.value.to_sym]) } + node.locals.map { |local| s(:shadowarg, [local.value.to_sym], nil) } - s(:args, visit(node.params).children + shadowargs) + s(:args, visit(node.params).children + shadowargs, nil) end # Visit an MAssign node. def visit_massign(node) - s(:masgn, [visit(node.target), visit(node.value)]) + s( + :masgn, + [visit(node.target), visit(node.value)], + source_map_operator( + operator: + source_range_find( + node.target.location.end_char, + node.value.location.start_char, + "=" + ), + expression: source_range_node(node) + ) + ) end # Visit a MethodAddBlock node. @@ -826,10 +1432,21 @@ def visit_method_add_block(node) call = visit(node.call) s( call.type, - [s(type, [*call.children, arguments, visit(node.block.bodystmt)])] + [ + s( + type, + [*call.children, arguments, visit(node.block.bodystmt)], + nil + ) + ], + nil ) else - s(type, [visit(node.call), arguments, visit(node.block.bodystmt)]) + s( + type, + [visit(node.call), arguments, visit(node.block.bodystmt)], + nil + ) end end @@ -838,8 +1455,9 @@ def visit_mlhs(node) s( :mlhs, node.parts.map do |part| - part.is_a?(Ident) ? s(:arg, [part.value.to_sym]) : visit(part) - end + part.is_a?(Ident) ? s(:arg, [part.value.to_sym], nil) : visit(part) + end, + source_map_collection(expression: source_range_node(node)) ) end @@ -850,35 +1468,104 @@ def visit_mlhs_paren(node) # Visit a ModuleDeclaration node. def visit_module(node) - s(:module, [visit(node.constant), visit(node.bodystmt)]) + s( + :module, + [visit(node.constant), visit(node.bodystmt)], + source_map_definition( + keyword: source_range_length(node.location.start_char, 6), + name: source_range_node(node.constant), + end_token: source_range_length(node.location.end_char, -3) + ).with_expression(source_range_node(node)) + ) end # Visit an MRHS node. def visit_mrhs(node) - s(:array, visit_all(node.parts)) + visit_array( + ArrayLiteral.new( + lbracket: nil, + contents: Args.new(parts: node.parts, location: node.location), + location: node.location + ) + ) end # Visit a Next node. def visit_next(node) - s(:next, visit_all(node.arguments.parts)) + s( + :next, + visit_all(node.arguments.parts), + source_map_keyword( + keyword: source_range_length(node.location.start_char, 4), + expression: source_range_node(node) + ) + ) end # Visit a Not node. def visit_not(node) if node.statement.nil? - s(:send, [s(:begin), :!]) + begin_token = source_range_find(node.location.start_char, nil, "(") + end_token = source_range_find(node.location.start_char, nil, ")") + + s( + :send, + [ + s( + :begin, + [], + source_map_collection( + begin_token: begin_token, + end_token: end_token, + expression: begin_token.join(end_token) + ) + ), + :! + ], + source_map_send( + selector: source_range_length(node.location.start_char, 3), + expression: source_range_node(node) + ) + ) else - s(:send, [visit(node.statement), :!]) + begin_token, end_token = + if node.parentheses? + [ + source_range_find( + node.location.start_char + 3, + node.statement.location.start_char, + "(" + ), + source_range_length(node.location.end_char, -1) + ] + end + + s( + :send, + [visit(node.statement), :!], + source_map_send( + begin_token: begin_token, + end_token: end_token, + selector: source_range_length(node.location.start_char, 3), + expression: source_range_node(node) + ) + ) end end # Visit an OpAssign node. def visit_opassign(node) + location = + source_map_variable( + name: source_range_node(node.target), + expression: source_range_node(node) + ).with_operator(source_range_node(node.operator)) + case node.operator.value when "||=" - s(:or_asgn, [visit(node.target), visit(node.value)]) + s(:or_asgn, [visit(node.target), visit(node.value)], location) when "&&=" - s(:and_asgn, [visit(node.target), visit(node.value)]) + s(:and_asgn, [visit(node.target), visit(node.value)], location) else s( :op_asgn, @@ -886,7 +1573,8 @@ def visit_opassign(node) visit(node.target), node.operator.value.chomp("=").to_sym, visit(node.value) - ] + ], + location ) end end @@ -901,29 +1589,91 @@ def visit_params(node) when MLHSParen visit(required) else - s(:arg, [required.value.to_sym]) + s( + :arg, + [required.value.to_sym], + source_map_variable( + name: source_range_node(required), + expression: source_range_node(required) + ) + ) end end children += node.optionals.map do |(name, value)| - s(:optarg, [name.value.to_sym, visit(value)]) + s( + :optarg, + [name.value.to_sym, visit(value)], + source_map_variable( + name: source_range_node(name), + expression: + source_range_node(name).join(source_range_node(value)) + ).with_operator( + source_range_find( + name.location.end_char, + value.location.start_char, + "=" + ) + ) + ) end + if node.rest && !node.rest.is_a?(ExcessedComma) children << visit(node.rest) end - children += node.posts.map { |post| s(:arg, [post.value.to_sym]) } + + children += + node.posts.map do |post| + s( + :arg, + [post.value.to_sym], + source_map_variable( + name: source_range_node(post), + expression: source_range_node(post) + ) + ) + end + children += node.keywords.map do |(name, value)| key = name.value.chomp(":").to_sym - value ? s(:kwoptarg, [key, visit(value)]) : s(:kwarg, [key]) + + if value + s( + :kwoptarg, + [key, visit(value)], + source_map_variable( + name: + source_range( + name.location.start_char, + name.location.end_char - 1 + ), + expression: + source_range_node(name).join(source_range_node(value)) + ) + ) + else + s( + :kwarg, + [key], + source_map_variable( + name: + source_range( + name.location.start_char, + name.location.end_char - 1 + ), + expression: source_range_node(name) + ) + ) + end end case node.keyword_rest when nil, ArgsForward # do nothing when :nil - children << s(:kwnilarg) + children << s(:kwnilarg, [], nil) else children << visit(node.keyword_rest) end @@ -932,17 +1682,17 @@ def visit_params(node) if node.keyword_rest.is_a?(ArgsForward) if children.empty? && !::Parser::Builders::Default.emit_forward_arg - return s(:forward_args) + return s(:forward_args, [], nil) end children.insert( node.requireds.length + node.optionals.length + node.keywords.length, - s(:forward_arg) + s(:forward_arg, [], nil) ) end - s(:args, children) + s(:args, children, nil) end # Visit a Paren node. @@ -953,24 +1703,36 @@ def visit_paren(node) node.contents.body.length == 1 && node.contents.body.first.is_a?(VoidStmt) ) - s(:begin) + s(:begin, [], nil) elsif stack[-2].is_a?(DefNode) && stack[-2].target.nil? && stack[-2].target == node visit(node.contents) else - visited = visit(node.contents) - visited.type == :begin ? visited : s(:begin, [visited]) + child = visit(node.contents) + if child.type == :begin + child + else + s( + :begin, + [child], + source_map_collection( + begin_token: source_range_length(node.location.start_char, 1), + end_token: source_range_length(node.location.end_char, -1), + expression: source_range_node(node) + ) + ) + end end end # Visit a PinnedBegin node. def visit_pinned_begin(node) - s(:pin, [s(:begin, [visit(node.statement)])]) + s(:pin, [s(:begin, [visit(node.statement)], nil)], nil) end # Visit a PinnedVarRef node. def visit_pinned_var_ref(node) - s(:pin, [visit(node.value)]) + s(:pin, [visit(node.value)], nil) end # Visit a Program node. @@ -980,45 +1742,106 @@ def visit_program(node) # Visit a QSymbols node. def visit_qsymbols(node) - s( - :array, - node.elements.map { |element| s(:sym, [element.value.to_sym]) } + parts = + node.elements.map do |element| + SymbolLiteral.new(value: element, location: element.location) + end + + visit_array( + ArrayLiteral.new( + lbracket: node.beginning, + contents: Args.new(parts: parts, location: node.location), + location: node.location + ) ) end # Visit a QWords node. def visit_qwords(node) - s(:array, visit_all(node.elements)) + visit_array( + ArrayLiteral.new( + lbracket: node.beginning, + contents: Args.new(parts: node.elements, location: node.location), + location: node.location + ) + ) end # Visit a RangeNode node. def visit_range(node) - type = node.operator.value == ".." ? :irange : :erange - s(type, [visit(node.left), visit(node.right)]) + s( + node.operator.value == ".." ? :irange : :erange, + [visit(node.left), visit(node.right)], + source_map_operator( + operator: source_range_node(node.operator), + expression: source_range_node(node) + ) + ) end # Visit an RAssign node. def visit_rassign(node) - type = node.operator.value == "=>" ? :match_pattern : :match_pattern_p - s(type, [visit(node.value), visit(node.pattern)]) + s( + node.operator.value == "=>" ? :match_pattern : :match_pattern_p, + [visit(node.value), visit(node.pattern)], + source_map_operator( + operator: source_range_node(node.operator), + expression: source_range_node(node) + ) + ) end # Visit a Rational node. def visit_rational(node) - s(:rational, [node.value.to_r]) + s( + :rational, + [node.value.to_r], + source_map_operator(expression: source_range_node(node)) + ) end # Visit a Redo node. - def visit_redo(_node) - s(:redo) + def visit_redo(node) + s( + :redo, + [], + source_map_keyword( + keyword: source_range_node(node), + expression: source_range_node(node) + ) + ) end # Visit a RegexpLiteral node. def visit_regexp_literal(node) s( :regexp, - visit_all(node.parts) + - [s(:regopt, node.ending.scan(/[a-z]/).sort.map(&:to_sym))] + visit_all(node.parts).push( + s( + :regopt, + node.ending.scan(/[a-z]/).sort.map(&:to_sym), + source_map( + expression: + source_range_length( + node.location.end_char, + -(node.ending.length - 1) + ) + ) + ) + ), + source_map_collection( + begin_token: + source_range_length( + node.location.start_char, + node.beginning.length + ), + end_token: + source_range_length( + node.location.end_char - node.ending.length, + 1 + ), + expression: source_range_node(node) + ) ) end @@ -1029,18 +1852,18 @@ def visit_rescue(node) when nil nil when VarRef - s(:array, [visit(node.exception.exceptions)]) + s(:array, [visit(node.exception.exceptions)], nil) when MRHS - s(:array, visit_all(node.exception.exceptions.parts)) + s(:array, visit_all(node.exception.exceptions.parts), nil) else - s(:array, [visit(node.exception.exceptions)]) + s(:array, [visit(node.exception.exceptions)], nil) end resbody = if node.exception.nil? - s(:resbody, [nil, nil, visit(node.statements)]) + s(:resbody, [nil, nil, visit(node.statements)], nil) elsif node.exception.variable.nil? - s(:resbody, [exceptions, nil, visit(node.statements)]) + s(:resbody, [exceptions, nil, visit(node.statements)], nil) else s( :resbody, @@ -1048,7 +1871,8 @@ def visit_rescue(node) exceptions, visit(node.exception.variable), visit(node.statements) - ] + ], + nil ) end @@ -1059,39 +1883,96 @@ def visit_rescue(node) children << nil end - s(:rescue, children) + s(:rescue, children, nil) end # Visit a RescueMod node. def visit_rescue_mod(node) + keyword = + source_range_find( + node.statement.location.end_char, + node.value.location.start_char, + "rescue" + ) + s( :rescue, [ visit(node.statement), - s(:resbody, [nil, nil, visit(node.value)]), + s( + :resbody, + [nil, nil, visit(node.value)], + source_map_rescue_body( + keyword: keyword, + expression: keyword.join(source_range_node(node.value)) + ) + ), nil - ] + ], + source_map_condition(expression: source_range_node(node)) ) end # Visit a RestParam node. def visit_rest_param(node) - s(:restarg, node.name ? [node.name.value.to_sym] : []) + if node.name + s( + :restarg, + [node.name.value.to_sym], + source_map_variable( + name: source_range_node(node.name), + expression: source_range_node(node) + ) + ) + else + s( + :restarg, + [], + source_map_variable(expression: source_range_node(node)) + ) + end end # Visit a Retry node. - def visit_retry(_node) - s(:retry) + def visit_retry(node) + s( + :retry, + [], + source_map_keyword( + keyword: source_range_node(node), + expression: source_range_node(node) + ) + ) end # Visit a ReturnNode node. def visit_return(node) - s(:return, node.arguments ? visit_all(node.arguments.parts) : []) + s( + :return, + node.arguments ? visit_all(node.arguments.parts) : [], + source_map_keyword( + keyword: source_range_length(node.location.start_char, 6), + expression: source_range_node(node) + ) + ) end # Visit an SClass node. def visit_sclass(node) - s(:sclass, [visit(node.target), visit(node.bodystmt)]) + s( + :sclass, + [visit(node.target), visit(node.bodystmt)], + source_map_definition( + keyword: source_range_length(node.location.start_char, 5), + operator: + source_range_find( + node.location.start_char + 5, + node.target.location.start_char, + "<<" + ), + end_token: source_range_length(node.location.end_char, -3) + ).with_expression(source_range_node(node)) + ) end # Visit a Statements node. @@ -1108,19 +1989,35 @@ def visit_statements(node) when 1 visit(children.first) else - s(:begin, visit_all(children)) + s( + :begin, + visit_all(children), + source_map_collection( + expression: + source_range( + children.first.location.start_char, + children.last.location.end_char + ) + ) + ) end end # Visit a StringConcat node. def visit_string_concat(node) - s(:dstr, [visit(node.left), visit(node.right)]) + visit_string_literal( + StringLiteral.new( + parts: [node.left, node.right], + quote: nil, + location: node.location + ) + ) end # Visit a StringContent node. def visit_string_content(node) # Can get here if you're inside a hash pattern, e.g., in "a": 1 - s(:sym, [node.parts.first.value.to_sym]) + s(:sym, [node.parts.first.value.to_sym], nil) end # Visit a StringDVar node. @@ -1130,71 +2027,187 @@ def visit_string_dvar(node) # Visit a StringEmbExpr node. def visit_string_embexpr(node) - child = visit(node.statements) - s(:begin, child ? [child] : []) + s( + :begin, + visit(node.statements).then { |child| child ? [child] : [] }, + source_map_collection( + begin_token: source_range_length(node.location.start_char, 2), + end_token: source_range_length(node.location.end_char, -1), + expression: source_range_node(node) + ) + ) end # Visit a StringLiteral node. def visit_string_literal(node) + location = + if node.quote + source_map_collection( + begin_token: source_range_length(node.location.start_char, 1), + end_token: source_range_length(node.location.end_char, -1), + expression: source_range_node(node) + ) + else + source_map_collection(expression: source_range_node(node)) + end + if node.parts.empty? - s(:str, [""]) + s(:str, [""], location) elsif node.parts.length == 1 && node.parts.first.is_a?(TStringContent) - visit(node.parts.first) + child = visit(node.parts.first) + s(child.type, child.children, location) else - s(:dstr, visit_all(node.parts)) + s(:dstr, visit_all(node.parts), location) end end # Visit a Super node. def visit_super(node) if node.arguments.is_a?(Args) - s(:super, visit_all(node.arguments.parts)) + s( + :super, + visit_all(node.arguments.parts), + source_map_keyword( + keyword: source_range_node(node), + expression: source_range_node(node) + ) + ) else case node.arguments.arguments when nil - s(:super) + s( + :super, + [], + source_map_keyword( + keyword: source_range_length(node.location.start_char, 5), + begin_token: + source_range_find( + node.location.start_char + 5, + node.location.end_char, + "(" + ), + end_token: source_range_length(node.location.end_char, -1), + expression: source_range_node(node) + ) + ) when ArgsForward - s(:super, [visit(node.arguments.arguments)]) + s(:super, [visit(node.arguments.arguments)], nil) else - s(:super, visit_all(node.arguments.arguments.parts)) + s( + :super, + visit_all(node.arguments.arguments.parts), + source_map_keyword( + keyword: source_range_length(node.location.start_char, 5), + begin_token: + source_range_find( + node.location.start_char + 5, + node.location.end_char, + "(" + ), + end_token: source_range_length(node.location.end_char, -1), + expression: source_range_node(node) + ) + ) end end end # Visit a SymbolLiteral node. def visit_symbol_literal(node) - s(:sym, [node.value.value.to_sym]) + begin_token = + if buffer.source[node.location.start_char] == ":" + source_range_length(node.location.start_char, 1) + end + + s( + :sym, + [node.value.value.to_sym], + source_map_collection( + begin_token: begin_token, + expression: source_range_node(node) + ) + ) end # Visit a Symbols node. def visit_symbols(node) - children = + parts = node.elements.map do |element| - if element.parts.length > 1 || - !element.parts.first.is_a?(TStringContent) - s(:dsym, visit_all(element.parts)) + part = element.parts.first + + if element.parts.length == 1 && part.is_a?(TStringContent) + SymbolLiteral.new(value: part, location: part.location) else - s(:sym, [element.parts.first.value.to_sym]) + DynaSymbol.new( + parts: element.parts, + quote: nil, + location: element.location + ) end end - s(:array, children) + visit_array( + ArrayLiteral.new( + lbracket: node.beginning, + contents: Args.new(parts: parts, location: node.location), + location: node.location + ) + ) end # Visit a TopConstField node. def visit_top_const_field(node) - s(:casgn, [s(:cbase), node.constant.value.to_sym]) + s( + :casgn, + [ + s( + :cbase, + [], + source_map( + expression: source_range_length(node.location.start_char, 2) + ) + ), + node.constant.value.to_sym + ], + source_map_constant( + double_colon: source_range_length(node.location.start_char, 2), + name: source_range_node(node.constant), + expression: source_range_node(node) + ) + ) end # Visit a TopConstRef node. def visit_top_const_ref(node) - s(:const, [s(:cbase), node.constant.value.to_sym]) + s( + :const, + [ + s( + :cbase, + [], + source_map( + expression: source_range_length(node.location.start_char, 2) + ) + ), + node.constant.value.to_sym + ], + source_map_constant( + double_colon: source_range_length(node.location.start_char, 2), + name: source_range_node(node.constant), + expression: source_range_node(node) + ) + ) end # Visit a TStringContent node. def visit_tstring_content(node) - value = node.value.gsub(/([^[:ascii:]])/) { $1.dump[1...-1] } - s(:str, ["\"#{value}\"".undump]) + dumped = node.value.gsub(/([^[:ascii:]])/) { $1.dump[1...-1] } + + s( + :str, + ["\"#{dumped}\"".undump], + source_map_collection(expression: source_range_node(node)) + ) end # Visit a Unary node. @@ -1206,36 +2219,28 @@ def visit_unary(node) (range = node.statement.contents.body.first).is_a?(RangeNode) && node.operator == "!" type = range.operator.value == ".." ? :iflipflop : :eflipflop - return s(:send, [s(:begin, [s(type, visit(range).children)]), :!]) + return( + s( + :send, + [s(:begin, [s(type, visit(range).children, nil)], nil), :!], + nil + ) + ) end - case node.operator - when "+" - case node.statement - when Int - s(:int, [node.statement.value.to_i]) - when FloatLiteral - s(:float, [node.statement.value.to_f]) - else - s(:send, [visit(node.statement), :+@]) - end - when "-" - case node.statement - when Int - s(:int, [-node.statement.value.to_i]) - when FloatLiteral - s(:float, [-node.statement.value.to_f]) - else - s(:send, [visit(node.statement), :-@]) - end - else - s(:send, [visit(node.statement), node.operator.to_sym]) - end + visit(canonical_unary(node)) end # Visit an Undef node. def visit_undef(node) - s(:undef, visit_all(node.symbols)) + s( + :undef, + visit_all(node.symbols), + source_map_keyword( + keyword: source_range_length(node.location.start_char, 5), + expression: source_range_node(node) + ) + ) end # Visit an UnlessNode node. @@ -1243,13 +2248,14 @@ def visit_unless(node) predicate = case node.predicate when RegexpLiteral - s(:match_current_line, [visit(node.predicate)]) + s(:match_current_line, [visit(node.predicate)], nil) when Unary if node.predicate.operator.value == "!" && node.predicate.statement.is_a?(RegexpLiteral) s( :send, - [s(:match_current_line, [visit(node.predicate.statement)]), :!] + [s(:match_current_line, [visit(node.predicate.statement)]), :!], + nil ) else visit(node.predicate) @@ -1258,21 +2264,52 @@ def visit_unless(node) visit(node.predicate) end - s(:if, [predicate, visit(node.consequent), visit(node.statements)]) + s( + :if, + [predicate, visit(node.consequent), visit(node.statements)], + if node.modifier? + source_map_keyword( + keyword: + source_range_find( + node.statements.location.end_char, + node.predicate.location.start_char, + "unless" + ), + expression: source_range_node(node) + ) + else + source_map_condition( + keyword: source_range_length(node.location.start_char, 6), + end_token: source_range_length(node.location.end_char, -3), + expression: source_range_node(node) + ) + end + ) end # Visit an UntilNode node. def visit_until(node) - type = - if node.modifier? && node.statements.is_a?(Statements) && - node.statements.body.length == 1 && - node.statements.body.first.is_a?(Begin) - :until_post + s( + loop_post?(node) ? :until_post : :until, + [visit(node.predicate), visit(node.statements)], + if node.modifier? + source_map_keyword( + keyword: + source_range_find( + node.statements.location.end_char, + node.predicate.location.start_char, + "until" + ), + expression: source_range_node(node) + ) else - :until + source_map_keyword( + keyword: source_range_length(node.location.start_char, 5), + end_token: source_range_length(node.location.end_char, -3), + expression: source_range_node(node) + ) end - - s(type, [visit(node.predicate), visit(node.statements)]) + ) end # Visit a VarField node. @@ -1289,24 +2326,47 @@ def visit_var_field(node) end if [stack[-3], stack[-2]].any?(&is_match_var) - return s(:match_var, [node.value.value.to_sym]) + return( + s( + :match_var, + [node.value.value.to_sym], + source_map_variable( + name: source_range_node(node), + expression: source_range_node(node) + ) + ) + ) end case node.value when Const - s(:casgn, [nil, node.value.value.to_sym]) - when CVar - s(:cvasgn, [node.value.value.to_sym]) - when GVar - s(:gvasgn, [node.value.value.to_sym]) - when Ident - s(:lvasgn, [node.value.value.to_sym]) - when IVar - s(:ivasgn, [node.value.value.to_sym]) - when VarRef - s(:lvasgn, [node.value.value.to_sym]) + s( + :casgn, + [nil, node.value.value.to_sym], + source_map_constant( + name: source_range_node(node.value), + expression: source_range_node(node) + ) + ) + when CVar, GVar, Ident, IVar, VarRef + s( + { + CVar => :cvasgn, + GVar => :gvasgn, + Ident => :lvasgn, + IVar => :ivasgn, + VarRef => :lvasgn + }[ + node.value.class + ], + [node.value.value.to_sym], + source_map_variable( + name: source_range_node(node), + expression: source_range_node(node) + ) + ) else - s(:match_rest) + s(:match_rest, [], nil) end end @@ -1317,75 +2377,147 @@ def visit_var_ref(node) # Visit a VCall node. def visit_vcall(node) - range = - ::Parser::Source::Range.new( - buffer, - node.location.start_char, - node.location.end_char + visit_command_call( + CommandCall.new( + receiver: nil, + operator: nil, + message: node.value, + arguments: nil, + block: nil, + location: node.location ) - location = ::Parser::Source::Map::Send.new(nil, range, nil, nil, range) - - s(:send, [nil, node.value.value.to_sym], location: location) + ) end # Visit a When node. def visit_when(node) - s(:when, visit_all(node.arguments.parts) + [visit(node.statements)]) + keyword = source_range_length(node.location.start_char, 4) + + s( + :when, + visit_all(node.arguments.parts) + [visit(node.statements)], + source_map_keyword( + keyword: keyword, + expression: + source_range( + keyword.begin_pos, + node.statements.location.end_char - 1 + ) + ) + ) end # Visit a WhileNode node. def visit_while(node) - type = - if node.modifier? && node.statements.is_a?(Statements) && - node.statements.body.length == 1 && - node.statements.body.first.is_a?(Begin) - :while_post + s( + loop_post?(node) ? :while_post : :while, + [visit(node.predicate), visit(node.statements)], + if node.modifier? + source_map_keyword( + keyword: + source_range_find( + node.statements.location.end_char, + node.predicate.location.start_char, + "while" + ), + expression: source_range_node(node) + ) else - :while + source_map_keyword( + keyword: source_range_length(node.location.start_char, 5), + end_token: source_range_length(node.location.end_char, -3), + expression: source_range_node(node) + ) end - - s(type, [visit(node.predicate), visit(node.statements)]) + ) end # Visit a Word node. def visit_word(node) - if node.parts.length == 1 && node.parts.first.is_a?(TStringContent) - visit(node.parts.first) - else - s(:dstr, visit_all(node.parts)) - end + visit_string_literal( + StringLiteral.new( + parts: node.parts, + quote: nil, + location: node.location + ) + ) end # Visit a Words node. def visit_words(node) - s(:array, visit_all(node.elements)) + visit_array( + ArrayLiteral.new( + lbracket: node.beginning, + contents: Args.new(parts: node.elements, location: node.location), + location: node.location + ) + ) end # Visit an XStringLiteral node. def visit_xstring_literal(node) - s(:xstr, visit_all(node.parts)) + s( + :xstr, + visit_all(node.parts), + source_map_collection( + begin_token: source_range_length(node.location.start_char, 1), + end_token: source_range_length(node.location.end_char, -1), + expression: source_range_node(node) + ) + ) end def visit_yield(node) case node.arguments when nil - s(:yield) + s( + :yield, + [], + source_map_keyword( + keyword: source_range_length(node.location.start_char, 5), + expression: source_range_node(node) + ) + ) when Args - s(:yield, visit_all(node.arguments.parts)) + s( + :yield, + visit_all(node.arguments.parts), + source_map_keyword( + keyword: source_range_length(node.location.start_char, 5), + expression: source_range_node(node) + ) + ) else - s(:yield, visit_all(node.arguments.contents.parts)) + s( + :yield, + visit_all(node.arguments.contents.parts), + source_map_keyword( + keyword: source_range_length(node.location.start_char, 5), + begin_token: + source_range_length(node.arguments.location.start_char, 1), + end_token: source_range_length(node.location.end_char, -1), + expression: source_range_node(node) + ) + ) end end # Visit a ZSuper node. - def visit_zsuper(_node) - s(:zsuper) + def visit_zsuper(node) + s( + :zsuper, + [], + source_map_keyword( + keyword: source_range_length(node.location.start_char, 5), + expression: source_range_node(node) + ) + ) end private def block_children(node) - arguments = (node.block_var ? visit(node.block_var) : s(:args)) + arguments = (node.block_var ? visit(node.block_var) : s(:args, [], nil)) type = :block if !node.block_var && (maximum = num_block_type(node.bodystmt)) @@ -1396,6 +2528,89 @@ def block_children(node) [type, arguments] end + # Convert a Unary node into a canonical CommandCall node. + def canonical_unary(node) + # For integers and floats with a leading + or -, parser represents them + # as just their values with the signs attached. + if %w[+ -].include?(node.operator) && + (node.statement.is_a?(Int) || node.statement.is_a?(FloatLiteral)) + return( + node.statement.class.new( + value: "#{node.operator}#{node.statement.value}", + location: node.location + ) + ) + end + + value = { "+" => "+@", "-" => "-@" }.fetch(node.operator, node.operator) + length = node.operator.length + + CommandCall.new( + receiver: node.statement, + operator: nil, + message: + Op.new( + value: value, + location: + Location.new( + start_line: node.location.start_line, + start_char: node.location.start_char, + start_column: node.location.start_column, + end_line: node.location.start_line, + end_char: node.location.start_char + length, + end_column: node.location.start_column + length + ) + ), + arguments: nil, + block: nil, + location: node.location + ) + end + + # Convert a Binary node into a canonical CommandCall node. + def canonical_binary(node) + operator = node.operator.to_s + + start_char = node.left.location.end_char + end_char = node.right.location.start_char + + index = buffer.source[start_char...end_char].index(operator) + start_line = + node.location.start_line + + buffer.source[start_char...index].count("\n") + start_column = + index - (buffer.source[start_char...index].rindex("\n") || 0) + + op_location = + Location.new( + start_line: start_line, + start_column: start_column, + start_char: start_char + index, + end_line: start_line, + end_column: start_column + operator.length, + end_char: start_char + index + operator.length + ) + + CommandCall.new( + receiver: node.left, + operator: nil, + message: Op.new(value: operator, location: op_location), + arguments: + Args.new(parts: [node.right], location: node.right.location), + block: nil, + location: node.location + ) + end + + # When you have a begin..end while or begin..end until, it's a special + # kind of syntax that executes the block in a loop. In this case the + # parser gem has a special node type for it. + def loop_post?(node) + node.modifier? && node.statements.is_a?(Statements) && + node.statements.body.length == 1 && + node.statements.body.first.is_a?(Begin) + end + # We need to find if we should transform this block into a numblock # since there could be new numbered variables like _1. def num_block_type(statements) @@ -1414,12 +2629,177 @@ def num_block_type(statements) variables.max end - def s(type, children = [], opts = {}) - ::Parser::AST::Node.new(type, children, opts) + # This method comes almost directly from the parser gem and creates a new + # parser gem node from the given s-expression. type is expected to be a + # symbol, children is expected to be an array, and location is expected to + # be a source map. + def s(type, children, location) + ::Parser::AST::Node.new(type, children, location: location) + end + + # Constructs a plain source map just for an expression. + def source_map(expression:) + ::Parser::Source::Map.new(expression) + end + + # Constructs a new source map for a collection. + def source_map_collection(begin_token: nil, end_token: nil, expression:) + ::Parser::Source::Map::Collection.new( + begin_token, + end_token, + expression + ) + end + + # Constructs a new source map for a conditional expression. + def source_map_condition( + keyword: nil, + begin_token: nil, + else_token: nil, + end_token: nil, + expression: + ) + ::Parser::Source::Map::Condition.new( + keyword, + begin_token, + else_token, + end_token, + expression + ) + end + + # Constructs a new source map for a constant reference. + def source_map_constant(double_colon: nil, name: nil, expression:) + ::Parser::Source::Map::Constant.new(double_colon, name, expression) + end + + # Constructs a new source map for a class definition. + def source_map_definition( + keyword: nil, + operator: nil, + name: nil, + end_token: nil + ) + ::Parser::Source::Map::Definition.new( + keyword, + operator, + name, + end_token + ) + end + + # Construct a source map for an index operation. + def source_map_index(begin_token: nil, end_token: nil, expression:) + ::Parser::Source::Map::Index.new(begin_token, end_token, expression) + end + + # Constructs a new source map for the use of a keyword. + def source_map_keyword( + keyword: nil, + begin_token: nil, + end_token: nil, + expression: + ) + ::Parser::Source::Map::Keyword.new( + keyword, + begin_token, + end_token, + expression + ) + end + + # Constructs a new source map for a method definition. + def source_map_method_definition( + keyword: nil, + operator: nil, + name: nil, + end_token: nil, + assignment: nil, + expression: + ) + ::Parser::Source::Map::MethodDefinition.new( + keyword, + operator, + name, + end_token, + assignment, + expression + ) + end + + # Constructs a new source map for an operator. + def source_map_operator(operator: nil, expression:) + ::Parser::Source::Map::Operator.new(operator, expression) + end + + # Constructs a source map for the body of a rescue clause. + def source_map_rescue_body( + keyword: nil, + assoc: nil, + begin_token: nil, + expression: + ) + ::Parser::Source::Map::RescueBody.new( + keyword, + assoc, + begin_token, + expression + ) + end + + # Constructs a new source map for a method call. + def source_map_send( + dot: nil, + selector: nil, + begin_token: nil, + end_token: nil, + expression: + ) + ::Parser::Source::Map::Send.new( + dot, + selector, + begin_token, + end_token, + expression + ) + end + + # Constructs a new source map for a variable. + def source_map_variable(name: nil, expression:) + ::Parser::Source::Map::Variable.new(name, expression) + end + + # Constructs a new source range from the given start and end offsets. + def source_range(start_char, end_char) + ::Parser::Source::Range.new(buffer, start_char, end_char) + end + + # Constructs a new source range by finding the given needle in the given + # range of the source. + def source_range_find(start_char, end_char, needle) + index = buffer.source[start_char...end_char].index(needle) + unless index + slice = buffer.source[start_char...end_char].inspect + raise "Could not find #{needle.inspect} in #{slice}" + end + + offset = start_char + index + source_range(offset, offset + needle.length) + end + + # Constructs a new source range from the given start offset and length. + def source_range_length(start_char, length) + if length > 0 + source_range(start_char, start_char + length) + else + source_range(start_char + length, start_char) + end end - def send_type(operator) - operator.is_a?(Op) && operator.value == "&." ? :csend : :send + # Constructs a new source range using the given node's location. + def source_range_node(node) + location = node.location + source_range(location.start_char, location.end_char) end end end diff --git a/test/suites/parse_helper.rb b/test/suites/parse_helper.rb index 685cd6d2..04fe8123 100644 --- a/test/suites/parse_helper.rb +++ b/test/suites/parse_helper.rb @@ -132,7 +132,8 @@ def assert_parses(_ast, code, _source_maps = "", versions = ALL_VERSIONS) expected = parse(code) return if expected.nil? - actual = SyntaxTree::Translation.to_parser(SyntaxTree.parse(code), code) + buffer = expected.location.expression.source_buffer + actual = SyntaxTree::Translation.to_parser(SyntaxTree.parse(code), buffer) assert_equal(expected, actual) end @@ -147,3 +148,28 @@ def parse(code) rescue Parser::SyntaxError end end + +if ENV["PARSER_LOCATION"] + # Modify the source map == check so that it doesn't check against the node + # itself so we don't get into a recursive loop. + Parser::Source::Map.prepend( + Module.new do + def ==(other) + self.class == other.class && + (instance_variables - %i[@node]).map do |ivar| + instance_variable_get(ivar) == other.instance_variable_get(ivar) + end.reduce(:&) + end + end + ) + + # Next, ensure that we're comparing the nodes and also comparing the source + # ranges so that we're getting all of the necessary information. + Parser::AST::Node.prepend( + Module.new do + def ==(other) + super && (location == other.location) + end + end + ) +end From 5cc7e3d8bc23ad69279a60be81228aaa282db60e Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Wed, 1 Feb 2023 09:43:05 -0500 Subject: [PATCH 04/58] Even more locations --- bin/{compare => whitequark} | 34 +- lib/syntax_tree/node.rb | 8 + lib/syntax_tree/parser.rb | 88 ++-- lib/syntax_tree/translation/parser.rb | 687 ++++++++++++++++++++------ test/fixtures/break.rb | 6 + test/node_test.rb | 2 +- 6 files changed, 634 insertions(+), 191 deletions(-) rename bin/{compare => whitequark} (66%) diff --git a/bin/compare b/bin/whitequark similarity index 66% rename from bin/compare rename to bin/whitequark index bdca5a9a..121bcd53 100755 --- a/bin/compare +++ b/bin/whitequark @@ -8,7 +8,7 @@ $:.unshift(File.expand_path("../lib", __dir__)) require "syntax_tree" # First, opt in to every AST feature. -# Parser::Builders::Default.modernize +Parser::Builders::Default.modernize # Modify the source map == check so that it doesn't check against the node # itself so we don't get into a recursive loop. @@ -46,14 +46,34 @@ ptree = parser.parse(buffer) if stree == ptree puts "Syntax trees are equivalent." -else - warn "Syntax trees are different." +elsif stree.inspect == ptree.inspect + warn "Syntax tree locations are different." + + queue = [[stree, ptree]] + while (left, right = queue.shift) + if left.location != right.location + warn "Different node:" + pp left + + warn "Different location:" + + warn "Syntax Tree:" + pp left.location + + warn "whitequark/parser:" + pp right.location - warn "syntax_tree:" + exit + end + + left.children.zip(right.children).each do |left_child, right_child| + queue << [left_child, right_child] if left_child.is_a?(Parser::AST::Node) + end + end +else + warn "Syntax Tree:" pp stree - warn "parser:" + warn "whitequark/parser:" pp ptree - - binding.irb end diff --git a/lib/syntax_tree/node.rb b/lib/syntax_tree/node.rb index fc5517cf..b0d1b97a 100644 --- a/lib/syntax_tree/node.rb +++ b/lib/syntax_tree/node.rb @@ -2149,6 +2149,14 @@ def ===(other) other.is_a?(BlockVar) && params === other.params && ArrayMatch.call(locals, other.locals) end + + # When a single required parameter is declared for a block, it gets + # automatically expanded if the values being yielded into it are an array. + def arg0? + params.requireds.length == 1 && params.optionals.empty? && + params.rest.nil? && params.posts.empty? && params.keywords.empty? && + params.keyword_rest.nil? && params.block.nil? + end end # BlockArg represents declaring a block parameter on a method definition. diff --git a/lib/syntax_tree/parser.rb b/lib/syntax_tree/parser.rb index 99b703d0..75af65bf 100644 --- a/lib/syntax_tree/parser.rb +++ b/lib/syntax_tree/parser.rb @@ -670,18 +670,22 @@ def self.visit(node, tokens) # (nil | Array[untyped]) posts # ) -> AryPtn def on_aryptn(constant, requireds, rest, posts) - parts = [constant, *requireds, rest, *posts].compact + lbracket = find_token(LBracket) + lbracket ||= find_token(LParen) if constant - # If there aren't any parts (no constant, no positional arguments), then - # we're matching an empty array. In this case, we're going to look for the - # left and right brackets explicitly. Otherwise, we'll just use the bounds - # of the various parts. - location = - if parts.empty? - consume_token(LBracket).location.to(consume_token(RBracket).location) - else - parts[0].location.to(parts[-1].location) - end + rbracket = find_token(RBracket) + rbracket ||= find_token(RParen) if constant + + parts = [constant, lbracket, *requireds, rest, *posts, rbracket].compact + + # The location is going to be determined by the first part to the last + # part. This includes potential brackets. + location = parts[0].location.to(parts[-1].location) + + # Now that we have the location calculated, we can remove the brackets + # from the list of tokens. + tokens.delete(lbracket) if lbracket + tokens.delete(rbracket) if rbracket # If there is a plain *, then we're going to fix up the location of it # here because it currently doesn't have anything to use for its precise @@ -2353,23 +2357,30 @@ def on_method_add_arg(call, arguments) # :call-seq: # on_method_add_block: ( - # (Call | Command | CommandCall) call, + # (Break | Call | Command | CommandCall) call, # Block block - # ) -> MethodAddBlock + # ) -> Break | MethodAddBlock def on_method_add_block(call, block) location = call.location.to(block.location) case call + when Break + parts = call.arguments.parts + + node = parts.pop + copied = + node.copy(block: block, location: node.location.to(block.location)) + + copied.comments.concat(call.comments) + parts << copied + + call.copy(location: location) when Command, CommandCall node = call.copy(block: block, location: location) node.comments.concat(call.comments) node else - MethodAddBlock.new( - call: call, - block: block, - location: call.location.to(block.location) - ) + MethodAddBlock.new(call: call, block: block, location: location) end end @@ -2592,19 +2603,40 @@ def on_params( # have a `nil` for the value instead of a `false`. keywords&.map! { |(key, value)| [key, value || nil] } - parts = [ - *requireds, - *optionals&.flatten(1), - rest, - *posts, - *keywords&.flatten(1), - (keyword_rest if keyword_rest != :nil), - (block if block != :&) - ].compact + # Here we're going to build up a list of all of the params so that we can + # determine our location information. + parts = [] + + requireds&.each { |required| parts << required.location } + optionals&.each do |(key, value)| + parts << key.location + parts << value.location if value + end + + parts << rest.location if rest + posts&.each { |post| parts << post.location } + + keywords&.each do |(key, value)| + parts << key.location + parts << value.location if value + end + + if keyword_rest == :nil + # When we get a :nil here, it means that we have **nil syntax, which + # means this set of parameters accepts no more keyword arguments. In + # this case we need to go and find the location of these two tokens. + operator = consume_operator(:**) + parts << operator.location.to(consume_keyword(:nil).location) + elsif keyword_rest + parts << keyword_rest.location + end + + parts << block.location if block && block != :& + parts = parts.compact location = if parts.any? - parts[0].location.to(parts[-1].location) + parts[0].to(parts[-1]) else Location.fixed(line: lineno, char: char_pos, column: current_column) end diff --git a/lib/syntax_tree/translation/parser.rb b/lib/syntax_tree/translation/parser.rb index 8a61ad94..1e47b4e7 100644 --- a/lib/syntax_tree/translation/parser.rb +++ b/lib/syntax_tree/translation/parser.rb @@ -191,13 +191,21 @@ def visit_arg_block(node) # Visit an ArgStar node. def visit_arg_star(node) if stack[-3].is_a?(MLHSParen) && stack[-3].contents.is_a?(MLHS) - case node.value - when nil - s(:restarg, [], nil) - when Ident - s(:restarg, [node.value.value.to_sym], nil) + if node.value.nil? + s( + :restarg, + [], + source_map_variable(expression: source_range_node(node)) + ) else - s(:restarg, [node.value.value.value.to_sym], nil) + s( + :restarg, + [node.value.value.to_sym], + source_map_variable( + name: source_range_node(node.value), + expression: source_range_node(node) + ) + ) end else s( @@ -212,8 +220,8 @@ def visit_arg_star(node) end # Visit an ArgsForward node. - def visit_args_forward(_node) - s(:forwarded_args, [], nil) + def visit_args_forward(node) + s(:forwarded_args, [], source_map(expression: source_range_node(node))) end # Visit an ArrayLiteral node. @@ -251,11 +259,44 @@ def visit_aryptn(node) end end - inner = s(type, children + visit_all(node.posts), nil) if node.constant - s(:const_pattern, [visit(node.constant), inner], nil) + s( + :const_pattern, + [ + visit(node.constant), + s( + type, + children + visit_all(node.posts), + source_map_collection( + expression: + source_range( + node.constant.location.end_char + 1, + node.location.end_char - 1 + ) + ) + ) + ], + source_map_collection( + begin_token: + source_range_length(node.constant.location.end_char, 1), + end_token: source_range_length(node.location.end_char, -1), + expression: source_range_node(node) + ) + ) else - inner + s( + type, + children + visit_all(node.posts), + if buffer.source[node.location.start_char] == "[" + source_map_collection( + begin_token: source_range_length(node.location.start_char, 1), + end_token: source_range_length(node.location.end_char, -1), + expression: source_range_node(node) + ) + else + source_map_collection(expression: source_range_node(node)) + end + ) end end @@ -280,15 +321,23 @@ def visit_assign(node) # Visit an Assoc node. def visit_assoc(node) if node.value.nil? - type = node.key.value.start_with?(/[A-Z]/) ? :const : :send + expression = + source_range(node.location.start_char, node.location.end_char - 1) s( :pair, [ visit(node.key), - s(type, [nil, node.key.value.chomp(":").to_sym], nil) + s( + node.key.value.start_with?(/[A-Z]/) ? :const : :send, + [nil, node.key.value.chomp(":").to_sym], + source_map_send(selector: expression, expression: expression) + ) ], - nil + source_map_operator( + operator: source_range_length(node.key.location.end_char, -1), + expression: source_range_node(node) + ) ) else s( @@ -411,6 +460,11 @@ def visit_binary(node) ) ) when :=~ + # When you use a regular expression on the left hand side of a =~ + # operator and it doesn't have interpolatoin, then its named capture + # groups introduce local variables into the scope. In this case the + # parser gem has a different node (match_with_lvasgn) instead of the + # regular send. if node.left.is_a?(RegexpLiteral) && node.left.parts.length == 1 && node.left.parts.first.is_a?(TStringContent) s( @@ -457,60 +511,124 @@ def visit_blockarg(node) # Visit a BlockVar node. def visit_block_var(node) shadowargs = - node.locals.map { |local| s(:shadowarg, [local.value.to_sym], nil) } - - # There is a special node type in the parser gem for when a single - # required parameter to a block would potentially be expanded - # automatically. We handle that case here. - if ::Parser::Builders::Default.emit_procarg0 - params = node.params - - if params.requireds.length == 1 && params.optionals.empty? && - params.rest.nil? && params.posts.empty? && - params.keywords.empty? && params.keyword_rest.nil? && - params.block.nil? - required = params.requireds.first + node.locals.map do |local| + s( + :shadowarg, + [local.value.to_sym], + source_map_variable( + name: source_range_node(local), + expression: source_range_node(local) + ) + ) + end + params = node.params + children = + if ::Parser::Builders::Default.emit_procarg0 && node.arg0? + # There is a special node type in the parser gem for when a single + # required parameter to a block would potentially be expanded + # automatically. We handle that case here. + required = params.requireds.first procarg0 = if ::Parser::Builders::Default.emit_arg_inside_procarg0 && required.is_a?(Ident) - s(:procarg0, [s(:arg, [required.value.to_sym], nil)], nil) + s( + :procarg0, + [ + s( + :arg, + [required.value.to_sym], + source_map_variable( + name: source_range_node(required), + expression: source_range_node(required) + ) + ) + ], + source_map_collection(expression: source_range_node(required)) + ) else - s(:procarg0, visit(required).children, nil) + child = visit(required) + s(:procarg0, child, child.location) end - return s(:args, [procarg0] + shadowargs, nil) + [procarg0] + else + visit(params).children end - end - s(:args, visit(node.params).children + shadowargs, nil) + s( + :args, + children + shadowargs, + source_map_collection( + begin_token: source_range_length(node.location.start_char, 1), + end_token: source_range_length(node.location.end_char, -1), + expression: source_range_node(node) + ) + ) end # Visit a BodyStmt node. def visit_bodystmt(node) - inner = visit(node.statements) + result = visit(node.statements) if node.rescue_clause - children = [inner] + visit(node.rescue_clause).children + rescue_node = visit(node.rescue_clause) + + children = [result] + rescue_node.children + location = rescue_node.location if node.else_clause children.pop children << visit(node.else_clause) + + location = + source_map_condition( + else_token: + source_range_length( + node.else_clause.location.start_char - 3, + -4 + ), + expression: + source_range( + location.expression.begin_pos, + node.else_clause.location.end_char + ) + ) end - inner = s(:rescue, children, nil) + result = s(rescue_node.type, children, location) end if node.ensure_clause - inner = s(:ensure, [inner] + visit(node.ensure_clause).children, nil) + ensure_node = visit(node.ensure_clause) + + expression = + ( + if result + result.location.expression.join(ensure_node.location.expression) + else + ensure_node.location.expression + end + ) + location = ensure_node.location.with_expression(expression) + + result = + s(ensure_node.type, [result] + ensure_node.children, location) end - inner + result end # Visit a Break node. def visit_break(node) - s(:break, visit_all(node.arguments.parts), nil) + s( + :break, + visit_all(node.arguments.parts), + source_map_keyword( + keyword: source_range_length(node.location.start_char, 5), + expression: source_range_node(node) + ) + ) end # Visit a CallNode node. @@ -606,6 +724,7 @@ def visit_command_call(node) visit(node.receiver), node.message == :call ? :call : node.message.value.to_sym ] + begin_token = nil end_token = nil @@ -649,13 +768,11 @@ def visit_command_call(node) if node.operator == :"::" source_range_find( node.receiver.location.end_char, - ( - if node.message == :call - dot_bound - else - node.message.location.start_char - end - ), + if node.message == :call + dot_bound + else + node.message.location.start_char + end, "::" ) elsif node.operator @@ -665,7 +782,18 @@ def visit_command_call(node) end_token: end_token, selector: node.message == :call ? nil : source_range_node(node.message), - expression: source_range_node(node) + expression: + if node.arguments.is_a?(ArgParen) || + (node.arguments.is_a?(Args) && node.arguments.parts.any?) + source_range( + node.location.start_char, + node.arguments.location.end_char + ) + elsif node.block + source_range_node(node.message) + else + source_range_node(node) + end ) ) @@ -798,31 +926,45 @@ def visit_def(node) s(:args, [], source_map_collection(expression: nil)) end - if node.target - target = node.target.is_a?(Paren) ? node.target.contents : node.target - - s( - :defs, - [visit(target), name, args, visit(node.bodystmt)], + location = + if node.endless? source_map_method_definition( keyword: source_range_length(node.location.start_char, 3), - operator: source_range_node(node.operator), + assignment: + source_range_find( + (node.params || node.name).location.end_char, + node.bodystmt.location.start_char, + "=" + ), name: source_range_node(node.name), - end_token: source_range_length(node.location.end_char, -3), expression: source_range_node(node) ) - ) - else - s( - :def, - [name, args, visit(node.bodystmt)], + else source_map_method_definition( keyword: source_range_length(node.location.start_char, 3), name: source_range_node(node.name), end_token: source_range_length(node.location.end_char, -3), expression: source_range_node(node) ) + end + + if node.target + target = node.target.is_a?(Paren) ? node.target.contents : node.target + + s( + :defs, + [visit(target), name, args, visit(node.bodystmt)], + source_map_method_definition( + keyword: location.keyword, + assignment: location.assignment, + operator: source_range_node(node.operator), + name: location.name, + end_token: location.end, + expression: location.expression + ) ) + else + s(:def, [name, args, visit(node.bodystmt)], location) end end @@ -934,7 +1076,22 @@ def visit_END(node) # Visit an Ensure node. def visit_ensure(node) - s(:ensure, [visit(node.statements)], nil) + start_char = node.location.start_char + end_char = + if node.statements.empty? + start_char + 6 + else + node.statements.body.last.location.end_char + end + + s( + :ensure, + [visit(node.statements)], + source_map_condition( + keyword: source_range_length(start_char, 6), + expression: source_range(start_char, end_char) + ) + ) end # Visit a Field node. @@ -1009,10 +1166,29 @@ def visit_fndptn(node) # Visit a For node. def visit_for(node) + begin_start = node.collection.location.end_char + begin_end = node.statements.location.start_char + + begin_token = + if buffer.source[begin_start...begin_end].include?("do") + source_range_find(begin_start, begin_end, "do") + end + s( :for, [visit(node.index), visit(node.collection), visit(node.statements)], - nil + source_map_for( + keyword: source_range_length(node.location.start_char, 3), + in_token: + source_range_find( + node.index.location.end_char, + node.collection.location.start_char, + "in" + ), + begin_token: begin_token, + end_token: source_range_length(node.location.end_char, -3), + expression: source_range_node(node) + ) ) end @@ -1223,6 +1399,19 @@ def visit_if(node) expression: source_range_node(node) ) else + begin_start = node.predicate.location.end_char + begin_end = + if node.statements.empty? + node.statements.location.end_char + else + node.statements.body.first.location.start_char + end + + begin_token = + if buffer.source[begin_start...begin_end].include?("then") + source_range_find(begin_start, begin_end, "then") + end + else_token = case node.consequent when Elsif @@ -1233,6 +1422,7 @@ def visit_if(node) source_map_condition( keyword: source_range_length(node.location.start_char, 2), + begin_token: begin_token, else_token: else_token, end_token: source_range_length(node.location.end_char, -3), expression: source_range_node(node) @@ -1288,10 +1478,20 @@ def visit_in(node) nil ) else + end_char = + if node.statements.empty? + node.statements.location.end_char - 1 + else + node.statements.body.first.location.start_char + end + s( :in_pattern, [visit(node.pattern), nil, visit(node.statements)], - nil + source_map_keyword( + keyword: source_range_length(node.location.start_char, 2), + expression: source_range(node.location.start_char, end_char) + ) ) end end @@ -1380,30 +1580,79 @@ def visit_label(node) # Visit a Lambda node. def visit_lambda(node) args = node.params.is_a?(LambdaVar) ? node.params : node.params.contents - - arguments = visit(args) - child = - if ::Parser::Builders::Default.emit_lambda - s(:lambda, [], nil) - else - s(:send, [nil, :lambda], nil) - end + args_node = visit(args) type = :block if args.empty? && (maximum = num_block_type(node.statements)) type = :numblock - arguments = maximum + args_node = maximum end - s(type, [child, arguments, visit(node.statements)], nil) + begin_start = node.params.location.end_char + begin_token, end_token = + if buffer.source[begin_start - 1] == "{" + [ + source_range_length(begin_start, -1), + source_range_length(node.location.end_char, -1) + ] + else + [ + source_range_length(begin_start, -2), + source_range_length(node.location.end_char, -3) + ] + end + + selector = source_range_length(node.location.start_char, 2) + + s( + type, + [ + if ::Parser::Builders::Default.emit_lambda + s(:lambda, [], source_map(expression: selector)) + else + s( + :send, + [nil, :lambda], + source_map_send(selector: selector, expression: selector) + ) + end, + args_node, + visit(node.statements) + ], + source_map_collection( + begin_token: begin_token, + end_token: end_token, + expression: source_range_node(node) + ) + ) end # Visit a LambdaVar node. def visit_lambda_var(node) shadowargs = - node.locals.map { |local| s(:shadowarg, [local.value.to_sym], nil) } + node.locals.map do |local| + s( + :shadowarg, + [local.value.to_sym], + source_map_variable( + name: source_range_node(local), + expression: source_range_node(local) + ) + ) + end + + location = + if node.location.start_char == node.location.end_char + source_map_collection(expression: nil) + else + source_map_collection( + begin_token: source_range_length(node.location.start_char, 1), + end_token: source_range_length(node.location.end_char, -1), + expression: source_range_node(node) + ) + end - s(:args, visit(node.params).children + shadowargs, nil) + s(:args, visit(node.params).children + shadowargs, location) end # Visit an MAssign node. @@ -1425,11 +1674,11 @@ def visit_massign(node) # Visit a MethodAddBlock node. def visit_method_add_block(node) - type, arguments = block_children(node.block) - case node.call when Break, Next, ReturnNode + type, arguments = block_children(node.block) call = visit(node.call) + s( call.type, [ @@ -1441,12 +1690,25 @@ def visit_method_add_block(node) ], nil ) - else + when ARef, Super, ZSuper + type, arguments = block_children(node.block) + s( type, [visit(node.call), arguments, visit(node.block.bodystmt)], nil ) + else + visit_command_call( + CommandCall.new( + receiver: node.call.receiver, + operator: node.call.operator, + message: node.call.message, + arguments: node.call.arguments, + block: node.block, + location: node.location + ) + ) end end @@ -1455,7 +1717,18 @@ def visit_mlhs(node) s( :mlhs, node.parts.map do |part| - part.is_a?(Ident) ? s(:arg, [part.value.to_sym], nil) : visit(part) + if part.is_a?(Ident) + s( + :arg, + [part.value.to_sym], + source_map_variable( + name: source_range_node(part), + expression: source_range_node(part) + ) + ) + else + visit(part) + end end, source_map_collection(expression: source_range_node(node)) ) @@ -1463,7 +1736,17 @@ def visit_mlhs(node) # Visit an MLHSParen node. def visit_mlhs_paren(node) - visit(node.contents) + child = visit(node.contents) + + s( + child.type, + child.children, + source_map_collection( + begin_token: source_range_length(node.location.start_char, 1), + end_token: source_range_length(node.location.end_char, -1), + expression: source_range_node(node) + ) + ) end # Visit a ModuleDeclaration node. @@ -1673,7 +1956,14 @@ def visit_params(node) when nil, ArgsForward # do nothing when :nil - children << s(:kwnilarg, [], nil) + children << s( + :kwnilarg, + [], + source_map_variable( + name: source_range_length(node.location.end_char, -3), + expression: source_range_node(node) + ) + ) else children << visit(node.keyword_rest) end @@ -1681,15 +1971,21 @@ def visit_params(node) children << visit(node.block) if node.block if node.keyword_rest.is_a?(ArgsForward) + location = + source_map(expression: source_range_node(node.keyword_rest)) + + # If there are no other arguments and we have the emit_forward_arg + # option enabled, then the entire argument list is represented by a + # single forward_args node. if children.empty? && !::Parser::Builders::Default.emit_forward_arg - return s(:forward_args, [], nil) + return s(:forward_args, [], location) end - children.insert( - node.requireds.length + node.optionals.length + - node.keywords.length, - s(:forward_arg, [], nil) - ) + # Otherwise, we need to insert a forward_arg node into the list of + # parameters before any keyword rest or block parameters. + index = + node.requireds.length + node.optionals.length + node.keywords.length + children.insert(index, s(:forward_arg, [], location)) end s(:args, children, nil) @@ -1697,31 +1993,19 @@ def visit_params(node) # Visit a Paren node. def visit_paren(node) + location = + source_map_collection( + begin_token: source_range_length(node.location.start_char, 1), + end_token: source_range_length(node.location.end_char, -1), + expression: source_range_node(node) + ) + if node.contents.nil? || - ( - node.contents.is_a?(Statements) && - node.contents.body.length == 1 && - node.contents.body.first.is_a?(VoidStmt) - ) - s(:begin, [], nil) - elsif stack[-2].is_a?(DefNode) && stack[-2].target.nil? && - stack[-2].target == node - visit(node.contents) + (node.contents.is_a?(Statements) && node.contents.empty?) + s(:begin, [], location) else child = visit(node.contents) - if child.type == :begin - child - else - s( - :begin, - [child], - source_map_collection( - begin_token: source_range_length(node.location.start_char, 1), - end_token: source_range_length(node.location.end_char, -1), - expression: source_range_node(node) - ) - ) - end + child.type == :begin ? child : s(:begin, [child], location) end end @@ -1847,23 +2131,86 @@ def visit_regexp_literal(node) # Visit a Rescue node. def visit_rescue(node) + # In the parser gem, there is a separation between the rescue node and + # the rescue body. They have different bounds, so we have to calculate + # those here. + start_char = node.location.start_char + + body_end_char = + if node.statements.empty? + start_char + 6 + else + node.statements.body.last.location.end_char + end + + end_char = + if node.consequent + end_node = node.consequent + end_node = end_node.consequent while end_node.consequent + + if end_node.statements.empty? + start_char + 6 + else + end_node.statements.body.last.location.end_char + end + else + body_end_char + end + + # These locations are reused for multiple children. + keyword = source_range_length(start_char, 6) + body_expression = source_range(start_char, body_end_char) + expression = source_range(start_char, end_char) + exceptions = case node.exception&.exceptions when nil nil - when VarRef - s(:array, [visit(node.exception.exceptions)], nil) when MRHS - s(:array, visit_all(node.exception.exceptions.parts), nil) + visit_array( + ArrayLiteral.new( + lbracket: nil, + contents: + Args.new( + parts: node.exception.exceptions.parts, + location: node.exception.exceptions.location + ), + location: node.exception.exceptions.location + ) + ) else - s(:array, [visit(node.exception.exceptions)], nil) + visit_array( + ArrayLiteral.new( + lbracket: nil, + contents: + Args.new( + parts: [node.exception.exceptions], + location: node.exception.exceptions.location + ), + location: node.exception.exceptions.location + ) + ) end resbody = if node.exception.nil? - s(:resbody, [nil, nil, visit(node.statements)], nil) + s( + :resbody, + [nil, nil, visit(node.statements)], + source_map_rescue_body( + keyword: keyword, + expression: body_expression + ) + ) elsif node.exception.variable.nil? - s(:resbody, [exceptions, nil, visit(node.statements)], nil) + s( + :resbody, + [exceptions, nil, visit(node.statements)], + source_map_rescue_body( + keyword: keyword, + expression: body_expression + ) + ) else s( :resbody, @@ -1872,7 +2219,16 @@ def visit_rescue(node) visit(node.exception.variable), visit(node.statements) ], - nil + source_map_rescue_body( + keyword: keyword, + assoc: + source_range_find( + node.location.start_char + 6, + node.exception.variable.location.start_char, + "=>" + ), + expression: body_expression + ) ) end @@ -1883,7 +2239,7 @@ def visit_rescue(node) children << nil end - s(:rescue, children, nil) + s(:rescue, children, source_map_condition(expression: expression)) end # Visit a RescueMod node. @@ -2314,59 +2670,58 @@ def visit_until(node) # Visit a VarField node. def visit_var_field(node) - is_match_var = ->(parent) do - case parent - when AryPtn, FndPtn, HshPtn, In, RAssign - true - when Binary - parent.operator == :"=>" - else - false + name = node.value.value.to_sym + match_var = + [stack[-3], stack[-2]].any? do |parent| + case parent + when AryPtn, FndPtn, HshPtn, In, RAssign + true + when Binary + parent.operator == :"=>" + else + false + end end - end - if [stack[-3], stack[-2]].any?(&is_match_var) - return( - s( - :match_var, - [node.value.value.to_sym], - source_map_variable( - name: source_range_node(node), - expression: source_range_node(node) - ) + if match_var + s( + :match_var, + [name], + source_map_variable( + name: source_range_node(node), + expression: source_range_node(node) ) ) - end - - case node.value - when Const + elsif node.value.is_a?(Const) s( :casgn, - [nil, node.value.value.to_sym], + [nil, name], source_map_constant( name: source_range_node(node.value), expression: source_range_node(node) ) ) - when CVar, GVar, Ident, IVar, VarRef - s( - { - CVar => :cvasgn, - GVar => :gvasgn, - Ident => :lvasgn, - IVar => :ivasgn, - VarRef => :lvasgn - }[ - node.value.class - ], - [node.value.value.to_sym], + else + location = source_map_variable( name: source_range_node(node), expression: source_range_node(node) ) - ) - else - s(:match_rest, [], nil) + + case node.value + when CVar + s(:cvasgn, [name], location) + when GVar + s(:gvasgn, [name], location) + when Ident + s(:lvasgn, [name], location) + when IVar + s(:ivasgn, [name], location) + when VarRef + s(:lvasgn, [name], location) + else + s(:match_rest, [], nil) + end end end @@ -2517,7 +2872,12 @@ def visit_zsuper(node) private def block_children(node) - arguments = (node.block_var ? visit(node.block_var) : s(:args, [], nil)) + arguments = + if node.block_var + visit(node.block_var) + else + s(:args, [], source_map_collection(expression: nil)) + end type = :block if !node.block_var && (maximum = num_block_type(node.bodystmt)) @@ -2688,6 +3048,23 @@ def source_map_definition( ) end + # Constructs a new source map for a for loop. + def source_map_for( + keyword: nil, + in_token: nil, + begin_token: nil, + end_token: nil, + expression: + ) + ::Parser::Source::Map::For.new( + keyword, + in_token, + begin_token, + end_token, + expression + ) + end + # Construct a source map for an index operation. def source_map_index(begin_token: nil, end_token: nil, expression:) ::Parser::Source::Map::Index.new(begin_token, end_token, expression) diff --git a/test/fixtures/break.rb b/test/fixtures/break.rb index a77c6b35..a608a6b2 100644 --- a/test/fixtures/break.rb +++ b/test/fixtures/break.rb @@ -27,3 +27,9 @@ ) % break foo.bar :baz do |qux| qux end +- +break( + foo.bar :baz do |qux| + qux + end +) diff --git a/test/node_test.rb b/test/node_test.rb index 7254c086..9660b341 100644 --- a/test/node_test.rb +++ b/test/node_test.rb @@ -131,7 +131,7 @@ def test_aryptn end SOURCE - at = location(lines: 2..2, chars: 18..47) + at = location(lines: 2..2, chars: 18..48) assert_node(AryPtn, source, at: at) { |node| node.consequent.pattern } end From 0f11b7e1d1afe7f3c9b284d5b140fed15ecf2a72 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Thu, 2 Feb 2023 13:31:56 -0500 Subject: [PATCH 05/58] Add query methods for instructions for branching logic --- lib/syntax_tree/yarv/instructions.rb | 774 ++++++--------------------- lib/syntax_tree/yarv/legacy.rb | 36 +- test/yarv_test.rb | 34 +- 3 files changed, 193 insertions(+), 651 deletions(-) diff --git a/lib/syntax_tree/yarv/instructions.rb b/lib/syntax_tree/yarv/instructions.rb index bba06f8d..c387e763 100644 --- a/lib/syntax_tree/yarv/instructions.rb +++ b/lib/syntax_tree/yarv/instructions.rb @@ -63,6 +63,50 @@ def self.calldata( CallData.new(method, argc, flags, kw_arg) end + # This is a base class for all YARV instructions. It provides a few + # convenience methods for working with instructions. + class Instruction + # This method creates an instruction that represents the canonical + # (non-specialized) form of this instruction. If this instruction is not + # a specialized instruction, then this method returns `self`. + def canonical + self + end + + # This returns the size of the instruction in terms of the number of slots + # it occupies in the instruction sequence. Effectively this is 1 plus the + # number of operands. + def length + 1 + end + + # This returns the number of values that are pushed onto the stack. + def pushes + 0 + end + + # This returns the number of values that are popped off the stack. + def pops + 0 + end + + # Whether or not this instruction is a branch instruction. + def branches? + false + end + + # Whether or not this instruction leaves the current frame. + def leaves? + false + end + + # Whether or not this instruction falls through to the next instruction if + # its branching fails. + def falls_through? + false + end + end + # ### Summary # # `adjuststack` accepts a single integer argument and removes that many @@ -76,7 +120,7 @@ def self.calldata( # x[0] # ~~~ # - class AdjustStack + class AdjustStack < Instruction attr_reader :number def initialize(number) @@ -107,14 +151,6 @@ def pops number end - def pushes - 0 - end - - def canonical - self - end - def call(vm) vm.pop(number) end @@ -138,7 +174,7 @@ def call(vm) # "#{5}" # ~~~ # - class AnyToString + class AnyToString < Instruction def disasm(fmt) fmt.instruction("anytostring") end @@ -155,10 +191,6 @@ def ==(other) other.is_a?(AnyToString) end - def length - 1 - end - def pops 2 end @@ -167,10 +199,6 @@ def pushes 1 end - def canonical - self - end - def call(vm) original, value = vm.pop(2) @@ -198,7 +226,7 @@ def call(vm) # puts x # ~~~ # - class BranchIf + class BranchIf < Instruction attr_reader :label def initialize(label) @@ -229,16 +257,16 @@ def pops 1 end - def pushes - 0 + def call(vm) + vm.jump(label) if vm.pop end - def canonical - self + def branches? + true end - def call(vm) - vm.jump(label) if vm.pop + def falls_through? + true end end @@ -259,7 +287,7 @@ def call(vm) # end # ~~~ # - class BranchNil + class BranchNil < Instruction attr_reader :label def initialize(label) @@ -290,16 +318,16 @@ def pops 1 end - def pushes - 0 + def call(vm) + vm.jump(label) if vm.pop.nil? end - def canonical - self + def branches? + true end - def call(vm) - vm.jump(label) if vm.pop.nil? + def falls_through? + true end end @@ -319,7 +347,7 @@ def call(vm) # end # ~~~ # - class BranchUnless + class BranchUnless < Instruction attr_reader :label def initialize(label) @@ -350,16 +378,16 @@ def pops 1 end - def pushes - 0 + def call(vm) + vm.jump(label) unless vm.pop end - def canonical - self + def branches? + true end - def call(vm) - vm.jump(label) unless vm.pop + def falls_through? + true end end @@ -382,7 +410,7 @@ def call(vm) # evaluate(value: 3) # ~~~ # - class CheckKeyword + class CheckKeyword < Instruction attr_reader :keyword_bits_index, :keyword_index def initialize(keyword_bits_index, keyword_index) @@ -419,18 +447,10 @@ def length 3 end - def pops - 0 - end - def pushes 1 end - def canonical - self - end - def call(vm) vm.push(vm.local_get(keyword_bits_index, 0)[keyword_index]) end @@ -448,7 +468,7 @@ def call(vm) # foo in Foo # ~~~ # - class CheckMatch + class CheckMatch < Instruction VM_CHECKMATCH_TYPE_WHEN = 1 VM_CHECKMATCH_TYPE_CASE = 2 VM_CHECKMATCH_TYPE_RESCUE = 3 @@ -489,10 +509,6 @@ def pushes 1 end - def canonical - self - end - def call(vm) target, pattern = vm.pop(2) @@ -536,7 +552,7 @@ def check?(pattern, target) # foo in [bar] # ~~~ # - class CheckType + class CheckType < Instruction TYPE_OBJECT = 0x01 TYPE_CLASS = 0x02 TYPE_MODULE = 0x03 @@ -643,10 +659,6 @@ def pushes 2 end - def canonical - self - end - def call(vm) object = vm.pop result = @@ -713,7 +725,7 @@ def call(vm) # [1, *2] # ~~~ # - class ConcatArray + class ConcatArray < Instruction def disasm(fmt) fmt.instruction("concatarray") end @@ -730,10 +742,6 @@ def ==(other) other.is_a?(ConcatArray) end - def length - 1 - end - def pops 2 end @@ -742,10 +750,6 @@ def pushes 1 end - def canonical - self - end - def call(vm) left, right = vm.pop(2) vm.push([*left, *right]) @@ -767,7 +771,7 @@ def call(vm) # "#{5}" # ~~~ # - class ConcatStrings + class ConcatStrings < Instruction attr_reader :number def initialize(number) @@ -802,10 +806,6 @@ def pushes 1 end - def canonical - self - end - def call(vm) vm.push(vm.pop(number).join) end @@ -826,7 +826,7 @@ def call(vm) # end # ~~~ # - class DefineClass + class DefineClass < Instruction TYPE_CLASS = 0 TYPE_SINGLETON_CLASS = 1 TYPE_MODULE = 2 @@ -874,10 +874,6 @@ def pushes 1 end - def canonical - self - end - def call(vm) object, superclass = vm.pop(2) @@ -914,7 +910,7 @@ def call(vm) # defined?(x) # ~~~ # - class Defined + class Defined < Instruction TYPE_NIL = 1 TYPE_IVAR = 2 TYPE_LVAR = 3 @@ -1011,10 +1007,6 @@ def pushes 1 end - def canonical - self - end - def call(vm) object = vm.pop @@ -1069,7 +1061,7 @@ def call(vm) # def value = "value" # ~~~ # - class DefineMethod + class DefineMethod < Instruction attr_reader :method_name, :method_iseq def initialize(method_name, method_iseq) @@ -1102,18 +1094,6 @@ def length 3 end - def pops - 0 - end - - def pushes - 0 - end - - def canonical - self - end - def call(vm) name = method_name nesting = vm.frame.nesting @@ -1150,7 +1130,7 @@ def call(vm) # def self.value = "value" # ~~~ # - class DefineSMethod + class DefineSMethod < Instruction attr_reader :method_name, :method_iseq def initialize(method_name, method_iseq) @@ -1187,14 +1167,6 @@ def pops 1 end - def pushes - 0 - end - - def canonical - self - end - def call(vm) name = method_name nesting = vm.frame.nesting @@ -1227,7 +1199,7 @@ def call(vm) # $global = 5 # ~~~ # - class Dup + class Dup < Instruction def disasm(fmt) fmt.instruction("dup") end @@ -1244,10 +1216,6 @@ def ==(other) other.is_a?(Dup) end - def length - 1 - end - def pops 1 end @@ -1256,10 +1224,6 @@ def pushes 2 end - def canonical - self - end - def call(vm) vm.push(vm.stack.last.dup) end @@ -1275,7 +1239,7 @@ def call(vm) # [true] # ~~~ # - class DupArray + class DupArray < Instruction attr_reader :object def initialize(object) @@ -1302,18 +1266,10 @@ def length 2 end - def pops - 0 - end - def pushes 1 end - def canonical - self - end - def call(vm) vm.push(object.dup) end @@ -1329,7 +1285,7 @@ def call(vm) # { a: 1 } # ~~~ # - class DupHash + class DupHash < Instruction attr_reader :object def initialize(object) @@ -1356,18 +1312,10 @@ def length 2 end - def pops - 0 - end - def pushes 1 end - def canonical - self - end - def call(vm) vm.push(object.dup) end @@ -1383,7 +1331,7 @@ def call(vm) # Object::X ||= true # ~~~ # - class DupN + class DupN < Instruction attr_reader :number def initialize(number) @@ -1410,18 +1358,10 @@ def length 2 end - def pops - 0 - end - def pushes number end - def canonical - self - end - def call(vm) values = vm.pop(number) vm.push(*values) @@ -1441,7 +1381,7 @@ def call(vm) # x, = [true, false, nil] # ~~~ # - class ExpandArray + class ExpandArray < Instruction attr_reader :number, :flags def initialize(number, flags) @@ -1478,10 +1418,6 @@ def pushes number end - def canonical - self - end - def call(vm) object = vm.pop object = @@ -1539,7 +1475,7 @@ def call(vm) # end # ~~~ # - class GetBlockParam + class GetBlockParam < Instruction attr_reader :index, :level def initialize(index, level) @@ -1570,18 +1506,10 @@ def length 3 end - def pops - 0 - end - def pushes 1 end - def canonical - self - end - def call(vm) vm.push(vm.local_get(index, level)) end @@ -1602,7 +1530,7 @@ def call(vm) # end # ~~~ # - class GetBlockParamProxy + class GetBlockParamProxy < Instruction attr_reader :index, :level def initialize(index, level) @@ -1636,18 +1564,10 @@ def length 3 end - def pops - 0 - end - def pushes 1 end - def canonical - self - end - def call(vm) vm.push(vm.local_get(index, level)) end @@ -1665,7 +1585,7 @@ def call(vm) # @@class_variable # ~~~ # - class GetClassVariable + class GetClassVariable < Instruction attr_reader :name, :cache def initialize(name, cache) @@ -1697,18 +1617,10 @@ def length 3 end - def pops - 0 - end - def pushes 1 end - def canonical - self - end - def call(vm) clazz = vm.frame._self clazz = clazz.class unless clazz.is_a?(Class) @@ -1728,7 +1640,7 @@ def call(vm) # Constant # ~~~ # - class GetConstant + class GetConstant < Instruction attr_reader :name def initialize(name) @@ -1763,10 +1675,6 @@ def pushes 1 end - def canonical - self - end - def call(vm) const_base, allow_nil = vm.pop(2) @@ -1798,7 +1706,7 @@ def call(vm) # $$ # ~~~ # - class GetGlobal + class GetGlobal < Instruction attr_reader :name def initialize(name) @@ -1825,18 +1733,10 @@ def length 2 end - def pops - 0 - end - def pushes 1 end - def canonical - self - end - def call(vm) # Evaluating the name of the global variable because there isn't a # reflection API for global variables. @@ -1861,7 +1761,7 @@ def call(vm) # @instance_variable # ~~~ # - class GetInstanceVariable + class GetInstanceVariable < Instruction attr_reader :name, :cache def initialize(name, cache) @@ -1893,18 +1793,10 @@ def length 3 end - def pops - 0 - end - def pushes 1 end - def canonical - self - end - def call(vm) method = Object.instance_method(:instance_variable_get) vm.push(method.bind(vm.frame._self).call(name)) @@ -1925,7 +1817,7 @@ def call(vm) # tap { tap { value } } # ~~~ # - class GetLocal + class GetLocal < Instruction attr_reader :index, :level def initialize(index, level) @@ -1955,18 +1847,10 @@ def length 3 end - def pops - 0 - end - def pushes 1 end - def canonical - self - end - def call(vm) vm.push(vm.local_get(index, level)) end @@ -1985,7 +1869,7 @@ def call(vm) # value # ~~~ # - class GetLocalWC0 + class GetLocalWC0 < Instruction attr_reader :index def initialize(index) @@ -2012,10 +1896,6 @@ def length 2 end - def pops - 0 - end - def pushes 1 end @@ -2042,7 +1922,7 @@ def call(vm) # self.then { value } # ~~~ # - class GetLocalWC1 + class GetLocalWC1 < Instruction attr_reader :index def initialize(index) @@ -2069,10 +1949,6 @@ def length 2 end - def pops - 0 - end - def pushes 1 end @@ -2096,7 +1972,7 @@ def call(vm) # 1 if (a == 1) .. (b == 2) # ~~~ # - class GetSpecial + class GetSpecial < Instruction SVAR_LASTLINE = 0 # $_ SVAR_BACKREF = 1 # $~ SVAR_FLIPFLOP_START = 2 # flipflop @@ -2128,18 +2004,10 @@ def length 3 end - def pops - 0 - end - def pushes 1 end - def canonical - self - end - def call(vm) case key when SVAR_LASTLINE @@ -2163,7 +2031,7 @@ def call(vm) # :"#{"foo"}" # ~~~ # - class Intern + class Intern < Instruction def disasm(fmt) fmt.instruction("intern") end @@ -2180,10 +2048,6 @@ def ==(other) other.is_a?(Intern) end - def length - 1 - end - def pops 1 end @@ -2192,10 +2056,6 @@ def pushes 1 end - def canonical - self - end - def call(vm) vm.push(vm.pop.to_sym) end @@ -2215,7 +2075,7 @@ def call(vm) # end # ~~~ # - class InvokeBlock + class InvokeBlock < Instruction attr_reader :calldata def initialize(calldata) @@ -2250,10 +2110,6 @@ def pushes 1 end - def canonical - self - end - def call(vm) vm.push(vm.frame_yield.block.call(*vm.pop(calldata.argc))) end @@ -2273,7 +2129,7 @@ def call(vm) # end # ~~~ # - class InvokeSuper + class InvokeSuper < Instruction attr_reader :calldata, :block_iseq def initialize(calldata, block_iseq) @@ -2302,10 +2158,6 @@ def ==(other) other.block_iseq == block_iseq end - def length - 1 - end - def pops argb = (calldata.flag?(CallData::CALL_ARGS_BLOCKARG) ? 1 : 0) argb + calldata.argc + 1 @@ -2315,10 +2167,6 @@ def pushes 1 end - def canonical - self - end - def call(vm) block = if (iseq = block_iseq) @@ -2358,7 +2206,7 @@ def call(vm) # end # ~~~ # - class Jump + class Jump < Instruction attr_reader :label def initialize(label) @@ -2385,21 +2233,13 @@ def length 2 end - def pops - 0 - end - - def pushes - 0 - end - - def canonical - self - end - def call(vm) vm.jump(label) end + + def branches? + true + end end # ### Summary @@ -2412,7 +2252,7 @@ def call(vm) # ;; # ~~~ # - class Leave + class Leave < Instruction def disasm(fmt) fmt.instruction("leave") end @@ -2429,10 +2269,6 @@ def ==(other) other.is_a?(Leave) end - def length - 1 - end - def pops 1 end @@ -2443,13 +2279,17 @@ def pushes 0 end - def canonical - self - end - def call(vm) vm.leave end + + def branches? + true + end + + def leaves? + true + end end # ### Summary @@ -2464,7 +2304,7 @@ def call(vm) # ["string"] # ~~~ # - class NewArray + class NewArray < Instruction attr_reader :number def initialize(number) @@ -2499,10 +2339,6 @@ def pushes 1 end - def canonical - self - end - def call(vm) vm.push(vm.pop(number)) end @@ -2520,7 +2356,7 @@ def call(vm) # ["string", **{ foo: "bar" }] # ~~~ # - class NewArrayKwSplat + class NewArrayKwSplat < Instruction attr_reader :number def initialize(number) @@ -2555,10 +2391,6 @@ def pushes 1 end - def canonical - self - end - def call(vm) vm.push(vm.pop(number)) end @@ -2578,7 +2410,7 @@ def call(vm) # end # ~~~ # - class NewHash + class NewHash < Instruction attr_reader :number def initialize(number) @@ -2613,10 +2445,6 @@ def pushes 1 end - def canonical - self - end - def call(vm) vm.push(vm.pop(number).each_slice(2).to_h) end @@ -2637,7 +2465,7 @@ def call(vm) # p (x..y), (x...y) # ~~~ # - class NewRange + class NewRange < Instruction attr_reader :exclude_end def initialize(exclude_end) @@ -2672,10 +2500,6 @@ def pushes 1 end - def canonical - self - end - def call(vm) vm.push(Range.new(*vm.pop(2), exclude_end == 1)) end @@ -2692,7 +2516,7 @@ def call(vm) # raise rescue true # ~~~ # - class Nop + class Nop < Instruction def disasm(fmt) fmt.instruction("nop") end @@ -2709,22 +2533,6 @@ def ==(other) other.is_a?(Nop) end - def length - 1 - end - - def pops - 0 - end - - def pushes - 0 - end - - def canonical - self - end - def call(vm) end end @@ -2743,7 +2551,7 @@ def call(vm) # "#{5}" # ~~~ # - class ObjToString + class ObjToString < Instruction attr_reader :calldata def initialize(calldata) @@ -2778,10 +2586,6 @@ def pushes 1 end - def canonical - self - end - def call(vm) vm.push(vm.pop.to_s) end @@ -2800,7 +2604,7 @@ def call(vm) # END { puts "END" } # ~~~ # - class Once + class Once < Instruction attr_reader :iseq, :cache def initialize(iseq, cache) @@ -2829,18 +2633,10 @@ def length 3 end - def pops - 0 - end - def pushes 1 end - def canonical - self - end - def call(vm) return if @executed vm.push(vm.run_block_frame(iseq, vm.frame)) @@ -2861,7 +2657,7 @@ def call(vm) # 2 & 3 # ~~~ # - class OptAnd + class OptAnd < Instruction attr_reader :calldata def initialize(calldata) @@ -2917,7 +2713,7 @@ def call(vm) # 7[2] # ~~~ # - class OptAref + class OptAref < Instruction attr_reader :calldata def initialize(calldata) @@ -2974,7 +2770,7 @@ def call(vm) # { 'test' => true }['test'] # ~~~ # - class OptArefWith + class OptArefWith < Instruction attr_reader :object, :calldata def initialize(object, calldata) @@ -3014,10 +2810,6 @@ def pushes 1 end - def canonical - self - end - def call(vm) vm.push(vm.pop[object]) end @@ -3036,7 +2828,7 @@ def call(vm) # {}[:key] = value # ~~~ # - class OptAset + class OptAset < Instruction attr_reader :calldata def initialize(calldata) @@ -3092,7 +2884,7 @@ def call(vm) # {}["key"] = value # ~~~ # - class OptAsetWith + class OptAsetWith < Instruction attr_reader :object, :calldata def initialize(object, calldata) @@ -3132,10 +2924,6 @@ def pushes 1 end - def canonical - self - end - def call(vm) hash, value = vm.pop(2) vm.push(hash[object] = value) @@ -3165,7 +2953,7 @@ def call(vm) # end # ~~~ # - class OptCaseDispatch + class OptCaseDispatch < Instruction attr_reader :case_dispatch_hash, :else_label def initialize(case_dispatch_hash, else_label) @@ -3206,16 +2994,16 @@ def pops 1 end - def pushes - 0 + def call(vm) + vm.jump(case_dispatch_hash.fetch(vm.pop, else_label)) end - def canonical - self + def branches? + true end - def call(vm) - vm.jump(case_dispatch_hash.fetch(vm.pop, else_label)) + def falls_through? + true end end @@ -3232,7 +3020,7 @@ def call(vm) # 2 / 3 # ~~~ # - class OptDiv + class OptDiv < Instruction attr_reader :calldata def initialize(calldata) @@ -3288,7 +3076,7 @@ def call(vm) # "".empty? # ~~~ # - class OptEmptyP + class OptEmptyP < Instruction attr_reader :calldata def initialize(calldata) @@ -3345,7 +3133,7 @@ def call(vm) # 2 == 2 # ~~~ # - class OptEq + class OptEq < Instruction attr_reader :calldata def initialize(calldata) @@ -3402,7 +3190,7 @@ def call(vm) # 4 >= 3 # ~~~ # - class OptGE + class OptGE < Instruction attr_reader :calldata def initialize(calldata) @@ -3458,7 +3246,7 @@ def call(vm) # ::Object # ~~~ # - class OptGetConstantPath + class OptGetConstantPath < Instruction attr_reader :names def initialize(names) @@ -3486,18 +3274,10 @@ def length 2 end - def pops - 0 - end - def pushes 1 end - def canonical - self - end - def call(vm) current = vm.frame._self current = current.class unless current.is_a?(Class) @@ -3523,7 +3303,7 @@ def call(vm) # 4 > 3 # ~~~ # - class OptGT + class OptGT < Instruction attr_reader :calldata def initialize(calldata) @@ -3580,7 +3360,7 @@ def call(vm) # 3 <= 4 # ~~~ # - class OptLE + class OptLE < Instruction attr_reader :calldata def initialize(calldata) @@ -3637,7 +3417,7 @@ def call(vm) # "".length # ~~~ # - class OptLength + class OptLength < Instruction attr_reader :calldata def initialize(calldata) @@ -3694,7 +3474,7 @@ def call(vm) # 3 < 4 # ~~~ # - class OptLT + class OptLT < Instruction attr_reader :calldata def initialize(calldata) @@ -3751,7 +3531,7 @@ def call(vm) # "" << 2 # ~~~ # - class OptLTLT + class OptLTLT < Instruction attr_reader :calldata def initialize(calldata) @@ -3809,7 +3589,7 @@ def call(vm) # 3 - 2 # ~~~ # - class OptMinus + class OptMinus < Instruction attr_reader :calldata def initialize(calldata) @@ -3866,7 +3646,7 @@ def call(vm) # 4 % 2 # ~~~ # - class OptMod + class OptMod < Instruction attr_reader :calldata def initialize(calldata) @@ -3923,7 +3703,7 @@ def call(vm) # 3 * 2 # ~~~ # - class OptMult + class OptMult < Instruction attr_reader :calldata def initialize(calldata) @@ -3982,7 +3762,7 @@ def call(vm) # 2 != 2 # ~~~ # - class OptNEq + class OptNEq < Instruction attr_reader :eq_calldata, :neq_calldata def initialize(eq_calldata, neq_calldata) @@ -4022,10 +3802,6 @@ def pushes 1 end - def canonical - self - end - def call(vm) receiver, argument = vm.pop(2) vm.push(receiver != argument) @@ -4044,7 +3820,7 @@ def call(vm) # [a, b, c].max # ~~~ # - class OptNewArrayMax + class OptNewArrayMax < Instruction attr_reader :number def initialize(number) @@ -4079,10 +3855,6 @@ def pushes 1 end - def canonical - self - end - def call(vm) vm.push(vm.pop(number).max) end @@ -4100,7 +3872,7 @@ def call(vm) # [a, b, c].min # ~~~ # - class OptNewArrayMin + class OptNewArrayMin < Instruction attr_reader :number def initialize(number) @@ -4135,10 +3907,6 @@ def pushes 1 end - def canonical - self - end - def call(vm) vm.push(vm.pop(number).min) end @@ -4157,7 +3925,7 @@ def call(vm) # "".nil? # ~~~ # - class OptNilP + class OptNilP < Instruction attr_reader :calldata def initialize(calldata) @@ -4212,7 +3980,7 @@ def call(vm) # !true # ~~~ # - class OptNot + class OptNot < Instruction attr_reader :calldata def initialize(calldata) @@ -4269,7 +4037,7 @@ def call(vm) # 2 | 3 # ~~~ # - class OptOr + class OptOr < Instruction attr_reader :calldata def initialize(calldata) @@ -4326,7 +4094,7 @@ def call(vm) # 2 + 3 # ~~~ # - class OptPlus + class OptPlus < Instruction attr_reader :calldata def initialize(calldata) @@ -4382,7 +4150,7 @@ def call(vm) # /a/ =~ "a" # ~~~ # - class OptRegExpMatch2 + class OptRegExpMatch2 < Instruction attr_reader :calldata def initialize(calldata) @@ -4438,7 +4206,7 @@ def call(vm) # puts "Hello, world!" # ~~~ # - class OptSendWithoutBlock + class OptSendWithoutBlock < Instruction attr_reader :calldata def initialize(calldata) @@ -4495,7 +4263,7 @@ def call(vm) # "".size # ~~~ # - class OptSize + class OptSize < Instruction attr_reader :calldata def initialize(calldata) @@ -4551,7 +4319,7 @@ def call(vm) # "hello".freeze # ~~~ # - class OptStrFreeze + class OptStrFreeze < Instruction attr_reader :object, :calldata def initialize(object, calldata) @@ -4583,18 +4351,10 @@ def length 3 end - def pops - 0 - end - def pushes 1 end - def canonical - self - end - def call(vm) vm.push(object.freeze) end @@ -4612,7 +4372,7 @@ def call(vm) # -"string" # ~~~ # - class OptStrUMinus + class OptStrUMinus < Instruction attr_reader :object, :calldata def initialize(object, calldata) @@ -4644,18 +4404,10 @@ def length 3 end - def pops - 0 - end - def pushes 1 end - def canonical - self - end - def call(vm) vm.push(-object) end @@ -4674,7 +4426,7 @@ def call(vm) # "".succ # ~~~ # - class OptSucc + class OptSucc < Instruction attr_reader :calldata def initialize(calldata) @@ -4728,7 +4480,7 @@ def call(vm) # a ||= 2 # ~~~ # - class Pop + class Pop < Instruction def disasm(fmt) fmt.instruction("pop") end @@ -4745,22 +4497,10 @@ def ==(other) other.is_a?(Pop) end - def length - 1 - end - def pops 1 end - def pushes - 0 - end - - def canonical - self - end - def call(vm) vm.pop end @@ -4776,7 +4516,7 @@ def call(vm) # nil # ~~~ # - class PutNil + class PutNil < Instruction def disasm(fmt) fmt.instruction("putnil") end @@ -4793,14 +4533,6 @@ def ==(other) other.is_a?(PutNil) end - def length - 1 - end - - def pops - 0 - end - def pushes 1 end @@ -4824,7 +4556,7 @@ def call(vm) # 5 # ~~~ # - class PutObject + class PutObject < Instruction attr_reader :object def initialize(object) @@ -4851,18 +4583,10 @@ def length 2 end - def pops - 0 - end - def pushes 1 end - def canonical - self - end - def call(vm) vm.push(object) end @@ -4880,7 +4604,7 @@ def call(vm) # 0 # ~~~ # - class PutObjectInt2Fix0 + class PutObjectInt2Fix0 < Instruction def disasm(fmt) fmt.instruction("putobject_INT2FIX_0_") end @@ -4897,14 +4621,6 @@ def ==(other) other.is_a?(PutObjectInt2Fix0) end - def length - 1 - end - - def pops - 0 - end - def pushes 1 end @@ -4930,7 +4646,7 @@ def call(vm) # 1 # ~~~ # - class PutObjectInt2Fix1 + class PutObjectInt2Fix1 < Instruction def disasm(fmt) fmt.instruction("putobject_INT2FIX_1_") end @@ -4947,14 +4663,6 @@ def ==(other) other.is_a?(PutObjectInt2Fix1) end - def length - 1 - end - - def pops - 0 - end - def pushes 1 end @@ -4978,7 +4686,7 @@ def call(vm) # puts "Hello, world!" # ~~~ # - class PutSelf + class PutSelf < Instruction def disasm(fmt) fmt.instruction("putself") end @@ -4995,22 +4703,10 @@ def ==(other) other.is_a?(PutSelf) end - def length - 1 - end - - def pops - 0 - end - def pushes 1 end - def canonical - self - end - def call(vm) vm.push(vm.frame._self) end @@ -5028,7 +4724,7 @@ def call(vm) # alias foo bar # ~~~ # - class PutSpecialObject + class PutSpecialObject < Instruction OBJECT_VMCORE = 1 OBJECT_CBASE = 2 OBJECT_CONST_BASE = 3 @@ -5059,18 +4755,10 @@ def length 2 end - def pops - 0 - end - def pushes 1 end - def canonical - self - end - def call(vm) case object when OBJECT_VMCORE @@ -5095,7 +4783,7 @@ def call(vm) # "foo" # ~~~ # - class PutString + class PutString < Instruction attr_reader :object def initialize(object) @@ -5122,18 +4810,10 @@ def length 2 end - def pops - 0 - end - def pushes 1 end - def canonical - self - end - def call(vm) vm.push(object.dup) end @@ -5152,7 +4832,7 @@ def call(vm) # "hello".tap { |i| p i } # ~~~ # - class Send + class Send < Instruction attr_reader :calldata, :block_iseq def initialize(calldata, block_iseq) @@ -5194,10 +4874,6 @@ def pushes 1 end - def canonical - self - end - def call(vm) block = if (iseq = block_iseq) @@ -5240,7 +4916,7 @@ def call(vm) # end # ~~~ # - class SetBlockParam + class SetBlockParam < Instruction attr_reader :index, :level def initialize(index, level) @@ -5275,14 +4951,6 @@ def pops 1 end - def pushes - 0 - end - - def canonical - self - end - def call(vm) vm.local_set(index, level, vm.pop) end @@ -5301,7 +4969,7 @@ def call(vm) # @@class_variable = 1 # ~~~ # - class SetClassVariable + class SetClassVariable < Instruction attr_reader :name, :cache def initialize(name, cache) @@ -5337,14 +5005,6 @@ def pops 1 end - def pushes - 0 - end - - def canonical - self - end - def call(vm) clazz = vm.frame._self clazz = clazz.class unless clazz.is_a?(Class) @@ -5363,7 +5023,7 @@ def call(vm) # Constant = 1 # ~~~ # - class SetConstant + class SetConstant < Instruction attr_reader :name def initialize(name) @@ -5394,14 +5054,6 @@ def pops 2 end - def pushes - 0 - end - - def canonical - self - end - def call(vm) value, parent = vm.pop(2) parent.const_set(name, value) @@ -5419,7 +5071,7 @@ def call(vm) # $global = 5 # ~~~ # - class SetGlobal + class SetGlobal < Instruction attr_reader :name def initialize(name) @@ -5450,14 +5102,6 @@ def pops 1 end - def pushes - 0 - end - - def canonical - self - end - def call(vm) # Evaluating the name of the global variable because there isn't a # reflection API for global variables. @@ -5481,7 +5125,7 @@ def call(vm) # @instance_variable = 1 # ~~~ # - class SetInstanceVariable + class SetInstanceVariable < Instruction attr_reader :name, :cache def initialize(name, cache) @@ -5517,14 +5161,6 @@ def pops 1 end - def pushes - 0 - end - - def canonical - self - end - def call(vm) method = Object.instance_method(:instance_variable_set) method.bind(vm.frame._self).call(name, vm.pop) @@ -5545,7 +5181,7 @@ def call(vm) # tap { tap { value = 10 } } # ~~~ # - class SetLocal + class SetLocal < Instruction attr_reader :index, :level def initialize(index, level) @@ -5579,14 +5215,6 @@ def pops 1 end - def pushes - 0 - end - - def canonical - self - end - def call(vm) vm.local_set(index, level, vm.pop) end @@ -5605,7 +5233,7 @@ def call(vm) # value = 5 # ~~~ # - class SetLocalWC0 + class SetLocalWC0 < Instruction attr_reader :index def initialize(index) @@ -5636,10 +5264,6 @@ def pops 1 end - def pushes - 0 - end - def canonical SetLocal.new(index, 0) end @@ -5662,7 +5286,7 @@ def call(vm) # self.then { value = 10 } # ~~~ # - class SetLocalWC1 + class SetLocalWC1 < Instruction attr_reader :index def initialize(index) @@ -5693,10 +5317,6 @@ def pops 1 end - def pushes - 0 - end - def canonical SetLocal.new(index, 1) end @@ -5717,7 +5337,7 @@ def call(vm) # {}[:key] = 'val' # ~~~ # - class SetN + class SetN < Instruction attr_reader :number def initialize(number) @@ -5752,10 +5372,6 @@ def pushes 1 end - def canonical - self - end - def call(vm) vm.stack[-number - 1] = vm.stack.last end @@ -5773,7 +5389,7 @@ def call(vm) # baz if (foo == 1) .. (bar == 1) # ~~~ # - class SetSpecial + class SetSpecial < Instruction attr_reader :key def initialize(key) @@ -5804,14 +5420,6 @@ def pops 1 end - def pushes - 0 - end - - def canonical - self - end - def call(vm) case key when GetSpecial::SVAR_LASTLINE @@ -5836,7 +5444,7 @@ def call(vm) # x = *(5) # ~~~ # - class SplatArray + class SplatArray < Instruction attr_reader :flag def initialize(flag) @@ -5871,10 +5479,6 @@ def pushes 1 end - def canonical - self - end - def call(vm) value = vm.pop @@ -5914,7 +5518,7 @@ def call(vm) # !!defined?([[]]) # ~~~ # - class Swap + class Swap < Instruction def disasm(fmt) fmt.instruction("swap") end @@ -5931,10 +5535,6 @@ def ==(other) other.is_a?(Swap) end - def length - 1 - end - def pops 2 end @@ -5943,10 +5543,6 @@ def pushes 2 end - def canonical - self - end - def call(vm) left, right = vm.pop(2) vm.push(right, left) @@ -5965,7 +5561,7 @@ def call(vm) # [1, 2, 3].map { break 2 } # ~~~ # - class Throw + class Throw < Instruction RUBY_TAG_NONE = 0x0 RUBY_TAG_RETURN = 0x1 RUBY_TAG_BREAK = 0x2 @@ -6013,10 +5609,6 @@ def pushes 1 end - def canonical - self - end - def call(vm) state = type & VM_THROW_STATE_MASK value = vm.pop @@ -6072,7 +5664,7 @@ def error_backtrace(vm) # end # ~~~ # - class TopN + class TopN < Instruction attr_reader :number def initialize(number) @@ -6099,18 +5691,10 @@ def length 2 end - def pops - 0 - end - def pushes 1 end - def canonical - self - end - def call(vm) vm.push(vm.stack[-number - 1]) end @@ -6127,7 +5711,7 @@ def call(vm) # /foo #{bar}/ # ~~~ # - class ToRegExp + class ToRegExp < Instruction attr_reader :options, :length def initialize(options, length) @@ -6160,10 +5744,6 @@ def pushes 1 end - def canonical - self - end - def call(vm) vm.push(Regexp.new(vm.pop(length).join, options)) end diff --git a/lib/syntax_tree/yarv/legacy.rb b/lib/syntax_tree/yarv/legacy.rb index ab9b00df..8e12ff16 100644 --- a/lib/syntax_tree/yarv/legacy.rb +++ b/lib/syntax_tree/yarv/legacy.rb @@ -19,7 +19,7 @@ module Legacy # @@class_variable # ~~~ # - class GetClassVariable + class GetClassVariable < Instruction attr_reader :name def initialize(name) @@ -46,10 +46,6 @@ def length 2 end - def pops - 0 - end - def pushes 1 end @@ -79,7 +75,7 @@ def call(vm) # Constant # ~~~ # - class OptGetInlineCache + class OptGetInlineCache < Instruction attr_reader :label, :cache def initialize(label, cache) @@ -111,21 +107,21 @@ def length 3 end - def pops - 0 - end - def pushes 1 end - def canonical - self - end - def call(vm) vm.push(nil) end + + def branches? + true + end + + def falls_through? + true + end end # ### Summary @@ -143,7 +139,7 @@ def call(vm) # Constant # ~~~ # - class OptSetInlineCache + class OptSetInlineCache < Instruction attr_reader :cache def initialize(cache) @@ -178,10 +174,6 @@ def pushes 1 end - def canonical - self - end - def call(vm) end end @@ -200,7 +192,7 @@ def call(vm) # @@class_variable = 1 # ~~~ # - class SetClassVariable + class SetClassVariable < Instruction attr_reader :name def initialize(name) @@ -231,10 +223,6 @@ def pops 1 end - def pushes - 0 - end - def canonical YARV::SetClassVariable.new(name, nil) end diff --git a/test/yarv_test.rb b/test/yarv_test.rb index e3995435..c4c4c3bd 100644 --- a/test/yarv_test.rb +++ b/test/yarv_test.rb @@ -288,38 +288,12 @@ def value end end - instructions = - YARV.constants.map { YARV.const_get(_1) } + - YARV::Legacy.constants.map { YARV::Legacy.const_get(_1) } - - [ - YARV::Assembler, - YARV::Bf, - YARV::CallData, - YARV::Compiler, - YARV::Decompiler, - YARV::Disassembler, - YARV::InstructionSequence, - YARV::Legacy, - YARV::LocalTable, - YARV::VM - ] + ObjectSpace.each_object(YARV::Instruction.singleton_class) do |instruction| + next if instruction == YARV::Instruction - interface = %i[ - disasm - to_a - deconstruct_keys - length - pops - pushes - canonical - call - == - ] - - instructions.each do |instruction| define_method("test_instruction_interface_#{instruction.name}") do - instance_methods = instruction.instance_methods(false) - assert_empty(interface - instance_methods) + methods = instruction.instance_methods(false) + assert_empty(%i[disasm to_a deconstruct_keys call ==] - methods) end end From 33d36ed2bbd61da601cfe6b7f5e248cd405d356f Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Thu, 2 Feb 2023 14:13:47 -0500 Subject: [PATCH 06/58] Add a control flow graph --- lib/syntax_tree.rb | 1 + lib/syntax_tree/yarv/control_flow_graph.rb | 162 +++++++++++++++++++++ 2 files changed, 163 insertions(+) create mode 100644 lib/syntax_tree/yarv/control_flow_graph.rb diff --git a/lib/syntax_tree.rb b/lib/syntax_tree.rb index 73add469..ea365172 100644 --- a/lib/syntax_tree.rb +++ b/lib/syntax_tree.rb @@ -31,6 +31,7 @@ require_relative "syntax_tree/yarv" require_relative "syntax_tree/yarv/bf" require_relative "syntax_tree/yarv/compiler" +require_relative "syntax_tree/yarv/control_flow_graph" require_relative "syntax_tree/yarv/decompiler" require_relative "syntax_tree/yarv/disassembler" require_relative "syntax_tree/yarv/instruction_sequence" diff --git a/lib/syntax_tree/yarv/control_flow_graph.rb b/lib/syntax_tree/yarv/control_flow_graph.rb new file mode 100644 index 00000000..15e0a767 --- /dev/null +++ b/lib/syntax_tree/yarv/control_flow_graph.rb @@ -0,0 +1,162 @@ +# frozen_string_literal: true + +module SyntaxTree + module YARV + # Constructs a control-flow-graph of a YARV instruction sequence. We use + # conventional basic-blocks. + class ControlFlowGraph + # This object represents a single basic block, wherein all contained + # instructions do not branch except for the last one. + class BasicBlock + # This is the index into the list of instructions where this block + # starts. + attr_reader :block_start + + # This is the set of instructions that this block contains. + attr_reader :insns + + # This is an array of basic blocks that are predecessors to this block. + attr_reader :preds + + # This is an array of basic blocks that are successors to this block. + attr_reader :succs + + def initialize(block_start, insns) + @block_start = block_start + @insns = insns + + @preds = [] + @succs = [] + end + + def id + "block_#{block_start}" + end + + def last + insns.last + end + end + + # This is the instruction sequence that this control flow graph + # corresponds to. + attr_reader :iseq + + # This is the list of instructions that this control flow graph contains. + # It is effectively the same as the list of instructions in the + # instruction sequence but with line numbers and events filtered out. + attr_reader :insns + + # This is the set of basic blocks that this control-flow graph contains. + attr_reader :blocks + + def initialize(iseq, insns, blocks) + @iseq = iseq + @insns = insns + @blocks = blocks + end + + def self.compile(iseq) + # First, we need to find all of the instructions that immediately follow + # labels so that when we are looking at instructions that branch we know + # where they branch to. + labels = {} + insns = [] + + iseq.insns.each do |insn| + case insn + when Instruction + insns << insn + when InstructionSequence::Label + labels[insn] = insns.length + end + end + + # Now we need to find the indices of the instructions that start a basic + # block because they're either: + # + # * the start of an instruction sequence + # * the target of a branch + # * fallen through to from a branch + # + block_starts = Set.new([0]) + + insns.each_with_index do |insn, index| + if insn.branches? + block_starts.add(labels[insn.label]) if insn.respond_to?(:label) + block_starts.add(index + 1) if insn.falls_through? + end + end + + block_starts = block_starts.to_a.sort + + # Now we can build up a set of basic blocks by iterating over the starts + # of each block. They are keyed by the index of their first instruction. + blocks = {} + block_starts.each_with_index do |block_start, block_index| + block_stop = (block_starts[(block_index + 1)..] + [insns.length]).min + + blocks[block_start] = + BasicBlock.new(block_start, insns[block_start...block_stop]) + end + + # Now we need to connect the blocks by letting them know which blocks + # precede them and which blocks follow them. + blocks.each do |block_start, block| + insn = block.last + + if insn.branches? && insn.respond_to?(:label) + block.succs << blocks.fetch(labels[insn.label]) + end + + if (!insn.branches? && !insn.leaves?) || insn.falls_through? + block.succs << blocks.fetch(block_start + block.insns.length) + end + + block.succs.each { |succ| succ.preds << block } + end + + # Here we're going to verify that we set up the control flow graph + # correctly. To do so we will assert that the only instruction in any + # given block that branches is the last instruction in the block. + blocks.each_value do |block| + block.insns[0...-1].each { |insn| raise if insn.branches? } + end + + # Finally we can return a new control flow graph with the given + # instruction sequence and our set of basic blocks. + new(iseq, insns, blocks.values) + end + + def disasm + fmt = Disassembler.new + + output = StringIO.new + output.puts "== cfg #{iseq.name}" + + blocks.each do |block| + output.print(block.id) + + unless block.preds.empty? + output.print(" # from: #{block.preds.map(&:id).join(", ")}") + end + + output.puts + + block.insns.each do |insn| + output.print(" ") + output.puts(insn.disasm(fmt)) + end + + succs = block.succs.map(&:id) + succs << "leaves" if block.last.leaves? + output.print(" # to: #{succs.join(", ")}") unless succs.empty? + + output.puts + end + + output.string + end + end + end +end From 7e6e4d139ccc83d8a3a9dec301fb955919ee98f9 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Thu, 2 Feb 2023 14:53:37 -0500 Subject: [PATCH 07/58] Build a data flow graph --- lib/syntax_tree.rb | 1 + lib/syntax_tree/yarv/control_flow_graph.rb | 1 - lib/syntax_tree/yarv/data_flow_graph.rb | 214 +++++++++++++++++++++ 3 files changed, 215 insertions(+), 1 deletion(-) create mode 100644 lib/syntax_tree/yarv/data_flow_graph.rb diff --git a/lib/syntax_tree.rb b/lib/syntax_tree.rb index ea365172..c6f1223b 100644 --- a/lib/syntax_tree.rb +++ b/lib/syntax_tree.rb @@ -32,6 +32,7 @@ require_relative "syntax_tree/yarv/bf" require_relative "syntax_tree/yarv/compiler" require_relative "syntax_tree/yarv/control_flow_graph" +require_relative "syntax_tree/yarv/data_flow_graph" require_relative "syntax_tree/yarv/decompiler" require_relative "syntax_tree/yarv/disassembler" require_relative "syntax_tree/yarv/instruction_sequence" diff --git a/lib/syntax_tree/yarv/control_flow_graph.rb b/lib/syntax_tree/yarv/control_flow_graph.rb index 15e0a767..26849b64 100644 --- a/lib/syntax_tree/yarv/control_flow_graph.rb +++ b/lib/syntax_tree/yarv/control_flow_graph.rb @@ -130,7 +130,6 @@ def self.compile(iseq) def disasm fmt = Disassembler.new - output = StringIO.new output.puts "== cfg #{iseq.name}" diff --git a/lib/syntax_tree/yarv/data_flow_graph.rb b/lib/syntax_tree/yarv/data_flow_graph.rb new file mode 100644 index 00000000..b028c521 --- /dev/null +++ b/lib/syntax_tree/yarv/data_flow_graph.rb @@ -0,0 +1,214 @@ +# frozen_string_literal: true + +module SyntaxTree + module YARV + # Constructs a data-flow-graph of a YARV instruction sequence, via a + # control-flow-graph. Data flow is discovered locally and then globally. The + # graph only considers data flow through the stack - local variables and + # objects are considered fully escaped in this analysis. + class DataFlowGraph + # This object represents the flow of data between instructions. + class DataFlow + attr_reader :in + attr_reader :out + + def initialize + @in = [] + @out = [] + end + end + + attr_reader :cfg, :insn_flows, :block_flows + + def initialize(cfg, insn_flows, block_flows) + @cfg = cfg + @insn_flows = insn_flows + @block_flows = block_flows + end + + def self.compile(cfg) + # First, create a data structure to encode data flow between + # instructions. + insn_flows = {} + cfg.insns.each_with_index do |insn, index| + insn_flows[index] = DataFlow.new + end + + # Next, create a data structure to encode data flow between basic + # blocks. + block_flows = {} + cfg.blocks.each do |block| + block_flows[block.block_start] = DataFlow.new + end + + # Now, discover the data flow within each basic block. Using an abstract + # stack, connect from consumers of data to the producers of that data. + cfg.blocks.each do |block| + block_flow = block_flows.fetch(block.block_start) + + stack = [] + stack_initial_depth = 0 + + # Go through each instruction in the block... + block.insns.each.with_index(block.block_start) do |insn, index| + insn_flow = insn_flows[index] + + # How many values will be missing from the local stack to run this + # instruction? + missing_stack_values = insn.pops - stack.size + + # For every value the instruction pops off the stack... + insn.pops.times do + # Was the value it pops off from another basic block? + if stack.empty? + # This is a basic block argument. + name = :"in_#{missing_stack_values - 1}" + + insn_flow.in.unshift(name) + block_flow.in.unshift(name) + + stack_initial_depth += 1 + missing_stack_values -= 1 + else + # Connect this consumer to the producer of the value. + insn_flow.in.unshift(stack.pop) + end + end + + # Record on our abstract stack that this instruction pushed + # this value onto the stack. + insn.pushes.times { stack << index } + end + + # Values that are left on the stack after going through all + # instructions are arguments to the basic block that we jump to. + stack.reverse_each.with_index do |producer, index| + block_flow.out << producer + insn_flows[producer].out << :"out_#{index}" + end + end + + # Go backwards and connect from producers to consumers. + cfg.insns.each_with_index do |insn, index| + # For every instruction that produced a value used in this + # instruction... + insn_flows[index].in.each do |producer| + # If it's actually another instruction and not a basic block + # argument... + if producer.is_a?(Integer) + # Record in the producing instruction that it produces a value + # used by this construction. + insn_flows[producer].out << index + end + end + end + + # Now, discover the data flow between basic blocks. + stack = [*cfg.blocks] + until stack.empty? + succ = stack.pop + succ_flow = block_flows.fetch(succ.block_start) + succ.preds.each do |pred| + pred_flow = block_flows.fetch(pred.block_start) + + # Does a predecessor block have fewer outputs than the successor + # has inputs? + if pred_flow.out.size < succ_flow.in.size + # If so then add arguments to pass data through from the + # predecessor's predecessors. + (succ_flow.in.size - pred_flow.out.size).times do |index| + name = :"pass_#{index}" + pred_flow.in.unshift(name) + pred_flow.out.unshift(name) + end + + # Since we modified the predecessor, add it back to the worklist + # so it'll be considered as a successor again, and propogate the + # global data flow back up the control flow graph. + stack << pred + end + end + end + + # Verify that we constructed the data flow graph correctly. Check that + # the first block has no arguments. + raise unless block_flows.fetch(cfg.blocks.first.block_start).in.empty? + + # Check all control flow edges between blocks pass the right number of + # arguments. + cfg.blocks.each do |pred| + pred_flow = block_flows.fetch(pred.block_start) + + if pred.succs.empty? + # With no successors, there should be no output arguments. + raise unless pred_flow.out.empty? + else + # Check with successor... + pred.succs.each do |succ| + succ_flow = block_flows.fetch(succ.block_start) + + # The predecessor should have as many output arguments as the + # success has input arguments. + raise unless pred_flow.out.size == succ_flow.in.size + end + end + end + + # Finally we can return the data flow graph. + new(cfg, insn_flows, block_flows) + end + + def disasm + fmt = Disassembler.new + output = StringIO.new + output.puts "== dfg #{cfg.iseq.name}" + + cfg.blocks.each do |block| + output.print(block.id) + unless block.preds.empty? + output.print(" # from: #{block.preds.map(&:id).join(", ")}") + end + output.puts + + block_flow = block_flows.fetch(block.block_start) + unless block_flow.in.empty? + output.puts " # in: #{block_flow.in.join(", ")}" + end + + block.insns.each.with_index(block.block_start) do |insn, index| + output.print(" ") + output.print(insn.disasm(fmt)) + + insn_flow = insn_flows[index] + if insn_flow.in.empty? && insn_flow.out.empty? + output.puts + next + end + + output.print(" # ") + unless insn_flow.in.empty? + output.print("in: #{insn_flow.in.join(", ")}") + output.print("; ") unless insn_flow.out.empty? + end + + unless insn_flow.out.empty? + output.print("out: #{insn_flow.out.join(", ")}") + end + + output.puts + end + + succs = block.succs.map(&:id) + succs << "leaves" if block.last.leaves? + output.puts(" # to: #{succs.join(", ")}") unless succs.empty? + + unless block_flow.out.empty? + output.puts " # out: #{block_flow.out.join(", ")}" + end + end + + output.string + end + end + end +end \ No newline at end of file From 907cf23b2e8245cd99b6839f06a2bae40b0ae393 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Thu, 2 Feb 2023 16:38:08 -0500 Subject: [PATCH 08/58] More documentation --- lib/syntax_tree/yarv/control_flow_graph.rb | 180 +++++++++++++-------- test/yarv_test.rb | 63 ++++++++ 2 files changed, 174 insertions(+), 69 deletions(-) diff --git a/lib/syntax_tree/yarv/control_flow_graph.rb b/lib/syntax_tree/yarv/control_flow_graph.rb index 26849b64..cd8a8324 100644 --- a/lib/syntax_tree/yarv/control_flow_graph.rb +++ b/lib/syntax_tree/yarv/control_flow_graph.rb @@ -2,12 +2,24 @@ module SyntaxTree module YARV - # Constructs a control-flow-graph of a YARV instruction sequence. We use - # conventional basic-blocks. + # This class represents a control flow graph of a YARV instruction sequence. + # It constructs a graph of basic blocks that hold subsets of the list of + # instructions from the instruction sequence. + # + # You can use this class by calling the ::compile method and passing it a + # YARV instruction sequence. It will return a control flow graph object. + # + # iseq = RubyVM::InstructionSequence.compile("1 + 2") + # iseq = SyntaxTree::YARV::InstructionSequence.from(iseq.to_a) + # cfg = SyntaxTree::YARV::ControlFlowGraph.compile(iseq) + # class ControlFlowGraph # This object represents a single basic block, wherein all contained # instructions do not branch except for the last one. class BasicBlock + # This is the unique identifier for this basic block. + attr_reader :id + # This is the index into the list of instructions where this block # starts. attr_reader :block_start @@ -22,6 +34,8 @@ class BasicBlock attr_reader :succs def initialize(block_start, insns) + @id = "block_#{block_start}" + @block_start = block_start @insns = insns @@ -29,8 +43,11 @@ def initialize(block_start, insns) @succs = [] end - def id - "block_#{block_start}" + # This method is used to verify that the basic block is well formed. It + # checks that the only instruction in this basic block that branches is + # the last instruction. + def verify + insns[0...-1].each { |insn| raise if insn.branches? } end def last @@ -38,94 +55,108 @@ def last end end - # This is the instruction sequence that this control flow graph - # corresponds to. - attr_reader :iseq - - # This is the list of instructions that this control flow graph contains. - # It is effectively the same as the list of instructions in the - # instruction sequence but with line numbers and events filtered out. - attr_reader :insns - - # This is the set of basic blocks that this control-flow graph contains. - attr_reader :blocks - - def initialize(iseq, insns, blocks) - @iseq = iseq - @insns = insns - @blocks = blocks - end - - def self.compile(iseq) - # First, we need to find all of the instructions that immediately follow - # labels so that when we are looking at instructions that branch we know - # where they branch to. - labels = {} - insns = [] - - iseq.insns.each do |insn| - case insn - when Instruction - insns << insn - when InstructionSequence::Label - labels[insn] = insns.length + # This class is responsible for creating a control flow graph from the + # given instruction sequence. + class Compiler + attr_reader :iseq, :labels, :insns + + def initialize(iseq) + @iseq = iseq + + # We need to find all of the instructions that immediately follow + # labels so that when we are looking at instructions that branch we + # know where they branch to. + @labels = {} + @insns = [] + + iseq.insns.each do |insn| + case insn + when Instruction + @insns << insn + when InstructionSequence::Label + @labels[insn] = @insns.length + end end end - # Now we need to find the indices of the instructions that start a basic - # block because they're either: + # This method is used to compile the instruction sequence into a control + # flow graph. It returns an instance of ControlFlowGraph. + def compile + blocks = connect_basic_blocks(build_basic_blocks) + ControlFlowGraph.new(iseq, insns, blocks.values).tap(&:verify) + end + + private + + # Finds the indices of the instructions that start a basic block because + # they're either: # # * the start of an instruction sequence # * the target of a branch # * fallen through to from a branch # - block_starts = Set.new([0]) - - insns.each_with_index do |insn, index| - if insn.branches? - block_starts.add(labels[insn.label]) if insn.respond_to?(:label) - block_starts.add(index + 1) if insn.falls_through? + def find_basic_block_starts + block_starts = Set.new([0]) + + insns.each_with_index do |insn, index| + if insn.branches? + block_starts.add(labels[insn.label]) if insn.respond_to?(:label) + block_starts.add(index + 1) if insn.falls_through? + end end + + block_starts.to_a.sort end - block_starts = block_starts.to_a.sort + # Builds up a set of basic blocks by iterating over the starts of each + # block. They are keyed by the index of their first instruction. + def build_basic_blocks + block_starts = find_basic_block_starts + blocks = {} - # Now we can build up a set of basic blocks by iterating over the starts - # of each block. They are keyed by the index of their first instruction. - blocks = {} - block_starts.each_with_index do |block_start, block_index| - block_stop = (block_starts[(block_index + 1)..] + [insns.length]).min + block_starts.each_with_index.to_h do |block_start, block_index| + block_end = (block_starts[(block_index + 1)..] + [insns.length]).min + block_insns = insns[block_start...block_end] - blocks[block_start] = - BasicBlock.new(block_start, insns[block_start...block_stop]) + [block_start, BasicBlock.new(block_start, block_insns)] + end end # Now we need to connect the blocks by letting them know which blocks # precede them and which blocks follow them. - blocks.each do |block_start, block| - insn = block.last + def connect_basic_blocks(blocks) + blocks.each do |block_start, block| + insn = block.last - if insn.branches? && insn.respond_to?(:label) - block.succs << blocks.fetch(labels[insn.label]) - end + if insn.branches? && insn.respond_to?(:label) + block.succs << blocks.fetch(labels[insn.label]) + end - if (!insn.branches? && !insn.leaves?) || insn.falls_through? - block.succs << blocks.fetch(block_start + block.insns.length) - end + if (!insn.branches? && !insn.leaves?) || insn.falls_through? + block.succs << blocks.fetch(block_start + block.insns.length) + end - block.succs.each { |succ| succ.preds << block } + block.succs.each { |succ| succ.preds << block } + end end + end - # Here we're going to verify that we set up the control flow graph - # correctly. To do so we will assert that the only instruction in any - # given block that branches is the last instruction in the block. - blocks.each_value do |block| - block.insns[0...-1].each { |insn| raise if insn.branches? } - end + # This is the instruction sequence that this control flow graph + # corresponds to. + attr_reader :iseq + + # This is the list of instructions that this control flow graph contains. + # It is effectively the same as the list of instructions in the + # instruction sequence but with line numbers and events filtered out. + attr_reader :insns + + # This is the set of basic blocks that this control-flow graph contains. + attr_reader :blocks - # Finally we can return a new control flow graph with the given - # instruction sequence and our set of basic blocks. - new(iseq, insns, blocks.values) + def initialize(iseq, insns, blocks) + @iseq = iseq + @insns = insns + @blocks = blocks end def disasm @@ -156,6 +187,17 @@ def disasm output.string end + + # This method is used to verify that the control flow graph is well + # formed. It does this by checking that each basic block is itself well + # formed. + def verify + blocks.each(&:verify) + end + + def self.compile(iseq) + Compiler.new(iseq).compile + end end end end diff --git a/test/yarv_test.rb b/test/yarv_test.rb index c4c4c3bd..e37afb63 100644 --- a/test/yarv_test.rb +++ b/test/yarv_test.rb @@ -297,6 +297,69 @@ def value end end + def test_cfg + iseq = RubyVM::InstructionSequence.compile("100 + (14 < 0 ? -1 : +1)") + iseq = SyntaxTree::YARV::InstructionSequence.from(iseq.to_a) + cfg = SyntaxTree::YARV::ControlFlowGraph.compile(iseq) + + assert_equal(<<~CFG, cfg.disasm) + == cfg + block_0 + putobject 100 + putobject 14 + putobject_INT2FIX_0_ + opt_lt + branchunless 13 + # to: block_7, block_5 + block_5 # from: block_0 + putobject -1 + jump 14 + # to: block_8 + block_7 # from: block_0 + putobject_INT2FIX_1_ + # to: block_8 + block_8 # from: block_5, block_7 + opt_plus + leave + # to: leaves + CFG + end + + def test_dfg + iseq = RubyVM::InstructionSequence.compile("100 + (14 < 0 ? -1 : +1)") + iseq = SyntaxTree::YARV::InstructionSequence.from(iseq.to_a) + cfg = SyntaxTree::YARV::ControlFlowGraph.compile(iseq) + dfg = SyntaxTree::YARV::DataFlowGraph.compile(cfg) + + assert_equal(<<~DFG, dfg.disasm) + == dfg + block_0 + putobject 100 # out: out_0 + putobject 14 # out: 3 + putobject_INT2FIX_0_ # out: 3 + opt_lt # in: 1, 2; out: 4 + branchunless 13 # in: 3 + # to: block_7, block_5 + # out: 0 + block_5 # from: block_0 + # in: pass_0 + putobject -1 # out: out_0 + jump 14 + # to: block_8 + # out: pass_0, 5 + block_7 # from: block_0 + # in: pass_0 + putobject_INT2FIX_1_ # out: out_0 + # to: block_8 + # out: pass_0, 7 + block_8 # from: block_5, block_7 + # in: in_0, in_1 + opt_plus # in: in_0, in_1; out: 9 + leave # in: 8 + # to: leaves + DFG + end + private def assert_decompiles(expected, source) From 7578736beb2f444a76f9ce60ca2181438922ef51 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Thu, 2 Feb 2023 16:43:46 -0500 Subject: [PATCH 09/58] More moving around and documentation --- lib/syntax_tree/yarv/control_flow_graph.rb | 136 +++++++++++---------- lib/syntax_tree/yarv/data_flow_graph.rb | 18 +-- 2 files changed, 78 insertions(+), 76 deletions(-) diff --git a/lib/syntax_tree/yarv/control_flow_graph.rb b/lib/syntax_tree/yarv/control_flow_graph.rb index cd8a8324..fa9823f1 100644 --- a/lib/syntax_tree/yarv/control_flow_graph.rb +++ b/lib/syntax_tree/yarv/control_flow_graph.rb @@ -14,6 +14,64 @@ module YARV # cfg = SyntaxTree::YARV::ControlFlowGraph.compile(iseq) # class ControlFlowGraph + # This is the instruction sequence that this control flow graph + # corresponds to. + attr_reader :iseq + + # This is the list of instructions that this control flow graph contains. + # It is effectively the same as the list of instructions in the + # instruction sequence but with line numbers and events filtered out. + attr_reader :insns + + # This is the set of basic blocks that this control-flow graph contains. + attr_reader :blocks + + def initialize(iseq, insns, blocks) + @iseq = iseq + @insns = insns + @blocks = blocks + end + + def disasm + fmt = Disassembler.new + output = StringIO.new + output.puts "== cfg #{iseq.name}" + + blocks.each do |block| + output.print(block.id) + + unless block.predecessors.empty? + output.print(" # from: #{block.predecessors.map(&:id).join(", ")}") + end + + output.puts + + block.insns.each do |insn| + output.print(" ") + output.puts(insn.disasm(fmt)) + end + + successors = block.successors.map(&:id) + successors << "leaves" if block.last.leaves? + output.print(" # to: #{successors.join(", ")}") unless successors.empty? + + output.puts + end + + output.string + end + + # This method is used to verify that the control flow graph is well + # formed. It does this by checking that each basic block is itself well + # formed. + def verify + blocks.each(&:verify) + end + + def self.compile(iseq) + Compiler.new(iseq).compile + end + # This object represents a single basic block, wherein all contained # instructions do not branch except for the last one. class BasicBlock @@ -28,10 +86,10 @@ class BasicBlock attr_reader :insns # This is an array of basic blocks that are predecessors to this block. - attr_reader :preds + attr_reader :predecessors # This is an array of basic blocks that are successors to this block. - attr_reader :succs + attr_reader :successors def initialize(block_start, insns) @id = "block_#{block_start}" @@ -39,8 +97,8 @@ def initialize(block_start, insns) @block_start = block_start @insns = insns - @preds = [] - @succs = [] + @predecessors = [] + @successors = [] end # This method is used to verify that the basic block is well formed. It @@ -122,81 +180,25 @@ def build_basic_blocks end end - # Now we need to connect the blocks by letting them know which blocks - # precede them and which blocks follow them. + # Connect the blocks by letting them know which blocks precede them and + # which blocks succeed them. def connect_basic_blocks(blocks) blocks.each do |block_start, block| insn = block.last if insn.branches? && insn.respond_to?(:label) - block.succs << blocks.fetch(labels[insn.label]) + block.successors << blocks.fetch(labels[insn.label]) end if (!insn.branches? && !insn.leaves?) || insn.falls_through? - block.succs << blocks.fetch(block_start + block.insns.length) + block.successors << blocks.fetch(block_start + block.insns.length) end - block.succs.each { |succ| succ.preds << block } - end - end - end - - # This is the instruction sequence that this control flow graph - # corresponds to. - attr_reader :iseq - - # This is the list of instructions that this control flow graph contains. - # It is effectively the same as the list of instructions in the - # instruction sequence but with line numbers and events filtered out. - attr_reader :insns - - # This is the set of basic blocks that this control-flow graph contains. - attr_reader :blocks - - def initialize(iseq, insns, blocks) - @iseq = iseq - @insns = insns - @blocks = blocks - end - - def disasm - fmt = Disassembler.new - output = StringIO.new - output.puts "== cfg #{iseq.name}" - - blocks.each do |block| - output.print(block.id) - - unless block.preds.empty? - output.print(" # from: #{block.preds.map(&:id).join(", ")}") - end - - output.puts - - block.insns.each do |insn| - output.print(" ") - output.puts(insn.disasm(fmt)) + block.successors.each do |successor| + successor.predecessors << block + end end - - succs = block.succs.map(&:id) - succs << "leaves" if block.last.leaves? - output.print(" # to: #{succs.join(", ")}") unless succs.empty? - - output.puts end - - output.string - end - - # This method is used to verify that the control flow graph is well - # formed. It does this by checking that each basic block is itself well - # formed. - def verify - blocks.each(&:verify) - end - - def self.compile(iseq) - Compiler.new(iseq).compile end end end diff --git a/lib/syntax_tree/yarv/data_flow_graph.rb b/lib/syntax_tree/yarv/data_flow_graph.rb index b028c521..13089dc7 100644 --- a/lib/syntax_tree/yarv/data_flow_graph.rb +++ b/lib/syntax_tree/yarv/data_flow_graph.rb @@ -108,7 +108,7 @@ def self.compile(cfg) until stack.empty? succ = stack.pop succ_flow = block_flows.fetch(succ.block_start) - succ.preds.each do |pred| + succ.predecessors.each do |pred| pred_flow = block_flows.fetch(pred.block_start) # Does a predecessor block have fewer outputs than the successor @@ -139,12 +139,12 @@ def self.compile(cfg) cfg.blocks.each do |pred| pred_flow = block_flows.fetch(pred.block_start) - if pred.succs.empty? + if pred.successors.empty? # With no successors, there should be no output arguments. raise unless pred_flow.out.empty? else # Check with successor... - pred.succs.each do |succ| + pred.successors.each do |succ| succ_flow = block_flows.fetch(succ.block_start) # The predecessor should have as many output arguments as the @@ -165,8 +165,8 @@ def disasm cfg.blocks.each do |block| output.print(block.id) - unless block.preds.empty? - output.print(" # from: #{block.preds.map(&:id).join(", ")}") + unless block.predecessors.empty? + output.print(" # from: #{block.predecessors.map(&:id).join(", ")}") end output.puts @@ -198,9 +198,9 @@ def disasm output.puts end - succs = block.succs.map(&:id) - succs << "leaves" if block.last.leaves? - output.puts(" # to: #{succs.join(", ")}") unless succs.empty? + successors = block.successors.map(&:id) + successors << "leaves" if block.last.leaves? + output.puts(" # to: #{successors.join(", ")}") unless successors.empty? unless block_flow.out.empty? output.puts " # out: #{block_flow.out.join(", ")}" @@ -211,4 +211,4 @@ def disasm end end end -end \ No newline at end of file +end From 7088c153057d92bbb03feb5120214fcfcdd553ea Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Thu, 2 Feb 2023 16:57:23 -0500 Subject: [PATCH 10/58] Support multiple branch targets per instruction --- lib/syntax_tree/yarv/control_flow_graph.rb | 17 +++++++----- lib/syntax_tree/yarv/instructions.rb | 30 ++++++++++------------ lib/syntax_tree/yarv/legacy.rb | 4 +-- 3 files changed, 26 insertions(+), 25 deletions(-) diff --git a/lib/syntax_tree/yarv/control_flow_graph.rb b/lib/syntax_tree/yarv/control_flow_graph.rb index fa9823f1..1d271768 100644 --- a/lib/syntax_tree/yarv/control_flow_graph.rb +++ b/lib/syntax_tree/yarv/control_flow_graph.rb @@ -105,7 +105,7 @@ def initialize(block_start, insns) # checks that the only instruction in this basic block that branches is # the last instruction. def verify - insns[0...-1].each { |insn| raise if insn.branches? } + insns[0...-1].each { |insn| raise unless insn.branch_targets.empty? } end def last @@ -157,8 +157,13 @@ def find_basic_block_starts block_starts = Set.new([0]) insns.each_with_index do |insn, index| - if insn.branches? - block_starts.add(labels[insn.label]) if insn.respond_to?(:label) + branch_targets = insn.branch_targets + + if branch_targets.any? + branch_targets.each do |branch_target| + block_starts.add(labels[branch_target]) + end + block_starts.add(index + 1) if insn.falls_through? end end @@ -186,11 +191,11 @@ def connect_basic_blocks(blocks) blocks.each do |block_start, block| insn = block.last - if insn.branches? && insn.respond_to?(:label) - block.successors << blocks.fetch(labels[insn.label]) + insn.branch_targets.each do |branch_target| + block.successors << blocks.fetch(labels[branch_target]) end - if (!insn.branches? && !insn.leaves?) || insn.falls_through? + if (insn.branch_targets.empty? && !insn.leaves?) || insn.falls_through? block.successors << blocks.fetch(block_start + block.insns.length) end diff --git a/lib/syntax_tree/yarv/instructions.rb b/lib/syntax_tree/yarv/instructions.rb index c387e763..97ccce15 100644 --- a/lib/syntax_tree/yarv/instructions.rb +++ b/lib/syntax_tree/yarv/instructions.rb @@ -90,9 +90,9 @@ def pops 0 end - # Whether or not this instruction is a branch instruction. - def branches? - false + # This returns an array of labels. + def branch_targets + [] end # Whether or not this instruction leaves the current frame. @@ -261,8 +261,8 @@ def call(vm) vm.jump(label) if vm.pop end - def branches? - true + def branch_targets + [label] end def falls_through? @@ -322,8 +322,8 @@ def call(vm) vm.jump(label) if vm.pop.nil? end - def branches? - true + def branch_targets + [label] end def falls_through? @@ -382,8 +382,8 @@ def call(vm) vm.jump(label) unless vm.pop end - def branches? - true + def branch_targets + [label] end def falls_through? @@ -2237,8 +2237,8 @@ def call(vm) vm.jump(label) end - def branches? - true + def branch_targets + [label] end end @@ -2283,10 +2283,6 @@ def call(vm) vm.leave end - def branches? - true - end - def leaves? true end @@ -2998,8 +2994,8 @@ def call(vm) vm.jump(case_dispatch_hash.fetch(vm.pop, else_label)) end - def branches? - true + def branch_targets + case_dispatch_hash.values.push(else_label) end def falls_through? diff --git a/lib/syntax_tree/yarv/legacy.rb b/lib/syntax_tree/yarv/legacy.rb index 8e12ff16..e20729d9 100644 --- a/lib/syntax_tree/yarv/legacy.rb +++ b/lib/syntax_tree/yarv/legacy.rb @@ -115,8 +115,8 @@ def call(vm) vm.push(nil) end - def branches? - true + def branch_targets + [label] end def falls_through? From b8dc90189aeb476913d8e12f2304b7223f5ccba9 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Thu, 2 Feb 2023 16:58:34 -0500 Subject: [PATCH 11/58] Remove BasicBlock.last --- lib/syntax_tree/yarv/control_flow_graph.rb | 8 ++------ lib/syntax_tree/yarv/data_flow_graph.rb | 2 +- 2 files changed, 3 insertions(+), 7 deletions(-) diff --git a/lib/syntax_tree/yarv/control_flow_graph.rb b/lib/syntax_tree/yarv/control_flow_graph.rb index 1d271768..1761127c 100644 --- a/lib/syntax_tree/yarv/control_flow_graph.rb +++ b/lib/syntax_tree/yarv/control_flow_graph.rb @@ -52,7 +52,7 @@ def disasm end successors = block.successors.map(&:id) - successors << "leaves" if block.last.leaves? + successors << "leaves" if block.insns.last.leaves? output.print(" # to: #{successors.join(", ")}") unless successors.empty? output.puts @@ -107,10 +107,6 @@ def initialize(block_start, insns) def verify insns[0...-1].each { |insn| raise unless insn.branch_targets.empty? } end - - def last - insns.last - end end # This class is responsible for creating a control flow graph from the @@ -189,7 +185,7 @@ def build_basic_blocks # which blocks succeed them. def connect_basic_blocks(blocks) blocks.each do |block_start, block| - insn = block.last + insn = block.insns.last insn.branch_targets.each do |branch_target| block.successors << blocks.fetch(labels[branch_target]) diff --git a/lib/syntax_tree/yarv/data_flow_graph.rb b/lib/syntax_tree/yarv/data_flow_graph.rb index 13089dc7..2af51883 100644 --- a/lib/syntax_tree/yarv/data_flow_graph.rb +++ b/lib/syntax_tree/yarv/data_flow_graph.rb @@ -199,7 +199,7 @@ def disasm end successors = block.successors.map(&:id) - successors << "leaves" if block.last.leaves? + successors << "leaves" if block.insns.last.leaves? output.puts(" # to: #{successors.join(", ")}") unless successors.empty? unless block_flow.out.empty? From 92cbfcae048c6867d0d5a6db5265591ed0b53076 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Thu, 2 Feb 2023 17:03:49 -0500 Subject: [PATCH 12/58] Provide BasicBlock.each_with_index --- lib/syntax_tree/yarv/control_flow_graph.rb | 7 ++++++- lib/syntax_tree/yarv/data_flow_graph.rb | 20 ++++++++++---------- 2 files changed, 16 insertions(+), 11 deletions(-) diff --git a/lib/syntax_tree/yarv/control_flow_graph.rb b/lib/syntax_tree/yarv/control_flow_graph.rb index 1761127c..5b4b5605 100644 --- a/lib/syntax_tree/yarv/control_flow_graph.rb +++ b/lib/syntax_tree/yarv/control_flow_graph.rb @@ -101,6 +101,12 @@ def initialize(block_start, insns) @successors = [] end + # Yield each instruction in this basic block along with its index from + # the original instruction sequence. + def each_with_index(&block) + insns.each.with_index(block_start, &block) + end + # This method is used to verify that the basic block is well formed. It # checks that the only instruction in this basic block that branches is # the last instruction. @@ -171,7 +177,6 @@ def find_basic_block_starts # block. They are keyed by the index of their first instruction. def build_basic_blocks block_starts = find_basic_block_starts - blocks = {} block_starts.each_with_index.to_h do |block_start, block_index| block_end = (block_starts[(block_index + 1)..] + [insns.length]).min diff --git a/lib/syntax_tree/yarv/data_flow_graph.rb b/lib/syntax_tree/yarv/data_flow_graph.rb index 2af51883..295308bd 100644 --- a/lib/syntax_tree/yarv/data_flow_graph.rb +++ b/lib/syntax_tree/yarv/data_flow_graph.rb @@ -38,19 +38,19 @@ def self.compile(cfg) # blocks. block_flows = {} cfg.blocks.each do |block| - block_flows[block.block_start] = DataFlow.new + block_flows[block.id] = DataFlow.new end # Now, discover the data flow within each basic block. Using an abstract # stack, connect from consumers of data to the producers of that data. cfg.blocks.each do |block| - block_flow = block_flows.fetch(block.block_start) + block_flow = block_flows.fetch(block.id) stack = [] stack_initial_depth = 0 # Go through each instruction in the block... - block.insns.each.with_index(block.block_start) do |insn, index| + block.each_with_index do |insn, index| insn_flow = insn_flows[index] # How many values will be missing from the local stack to run this @@ -107,9 +107,9 @@ def self.compile(cfg) stack = [*cfg.blocks] until stack.empty? succ = stack.pop - succ_flow = block_flows.fetch(succ.block_start) + succ_flow = block_flows.fetch(succ.id) succ.predecessors.each do |pred| - pred_flow = block_flows.fetch(pred.block_start) + pred_flow = block_flows.fetch(pred.id) # Does a predecessor block have fewer outputs than the successor # has inputs? @@ -132,12 +132,12 @@ def self.compile(cfg) # Verify that we constructed the data flow graph correctly. Check that # the first block has no arguments. - raise unless block_flows.fetch(cfg.blocks.first.block_start).in.empty? + raise unless block_flows.fetch(cfg.blocks.first.id).in.empty? # Check all control flow edges between blocks pass the right number of # arguments. cfg.blocks.each do |pred| - pred_flow = block_flows.fetch(pred.block_start) + pred_flow = block_flows.fetch(pred.id) if pred.successors.empty? # With no successors, there should be no output arguments. @@ -145,7 +145,7 @@ def self.compile(cfg) else # Check with successor... pred.successors.each do |succ| - succ_flow = block_flows.fetch(succ.block_start) + succ_flow = block_flows.fetch(succ.id) # The predecessor should have as many output arguments as the # success has input arguments. @@ -170,12 +170,12 @@ def disasm end output.puts - block_flow = block_flows.fetch(block.block_start) + block_flow = block_flows.fetch(block.id) unless block_flow.in.empty? output.puts " # in: #{block_flow.in.join(", ")}" end - block.insns.each.with_index(block.block_start) do |insn, index| + block.each_with_index do |insn, index| output.print(" ") output.print(insn.disasm(fmt)) From 439ffb6336f9af6c2386c291bb529488c6d79d03 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Thu, 2 Feb 2023 18:27:03 -0500 Subject: [PATCH 13/58] Refactor various graphs --- lib/syntax_tree.rb | 1 + lib/syntax_tree/yarv/basic_block.rb | 47 ++++ lib/syntax_tree/yarv/control_flow_graph.rb | 65 +---- lib/syntax_tree/yarv/data_flow_graph.rb | 296 +++++++++++---------- 4 files changed, 218 insertions(+), 191 deletions(-) create mode 100644 lib/syntax_tree/yarv/basic_block.rb diff --git a/lib/syntax_tree.rb b/lib/syntax_tree.rb index c6f1223b..e0e2a6be 100644 --- a/lib/syntax_tree.rb +++ b/lib/syntax_tree.rb @@ -29,6 +29,7 @@ require_relative "syntax_tree/index" require_relative "syntax_tree/yarv" +require_relative "syntax_tree/yarv/basic_block" require_relative "syntax_tree/yarv/bf" require_relative "syntax_tree/yarv/compiler" require_relative "syntax_tree/yarv/control_flow_graph" diff --git a/lib/syntax_tree/yarv/basic_block.rb b/lib/syntax_tree/yarv/basic_block.rb new file mode 100644 index 00000000..774a4c00 --- /dev/null +++ b/lib/syntax_tree/yarv/basic_block.rb @@ -0,0 +1,47 @@ +# frozen_string_literal: true + +module SyntaxTree + module YARV + # This object represents a single basic block, wherein all contained + # instructions do not branch except for the last one. + class BasicBlock + # This is the unique identifier for this basic block. + attr_reader :id + + # This is the index into the list of instructions where this block starts. + attr_reader :block_start + + # This is the set of instructions that this block contains. + attr_reader :insns + + # This is an array of basic blocks that lead into this block. + attr_reader :incoming_blocks + + # This is an array of basic blocks that this block leads into. + attr_reader :outgoing_blocks + + def initialize(block_start, insns) + @id = "block_#{block_start}" + + @block_start = block_start + @insns = insns + + @incoming_blocks = [] + @outgoing_blocks = [] + end + + # Yield each instruction in this basic block along with its index from the + # original instruction sequence. + def each_with_index(&block) + insns.each.with_index(block_start, &block) + end + + # This method is used to verify that the basic block is well formed. It + # checks that the only instruction in this basic block that branches is + # the last instruction. + def verify + insns[0...-1].each { |insn| raise unless insn.branch_targets.empty? } + end + end + end +end diff --git a/lib/syntax_tree/yarv/control_flow_graph.rb b/lib/syntax_tree/yarv/control_flow_graph.rb index 5b4b5605..27df308e 100644 --- a/lib/syntax_tree/yarv/control_flow_graph.rb +++ b/lib/syntax_tree/yarv/control_flow_graph.rb @@ -40,8 +40,8 @@ def disasm blocks.each do |block| output.print(block.id) - unless block.predecessors.empty? - output.print(" # from: #{block.predecessors.map(&:id).join(", ")}") + unless block.incoming_blocks.empty? + output.print(" # from: #{block.incoming_blocks.map(&:id).join(", ")}") end output.puts @@ -51,9 +51,9 @@ def disasm output.puts(insn.disasm(fmt)) end - successors = block.successors.map(&:id) - successors << "leaves" if block.insns.last.leaves? - output.print(" # to: #{successors.join(", ")}") unless successors.empty? + dests = block.outgoing_blocks.map(&:id) + dests << "leaves" if block.insns.last.leaves? + output.print(" # to: #{dests.join(", ")}") unless dests.empty? output.puts end @@ -72,49 +72,6 @@ def self.compile(iseq) Compiler.new(iseq).compile end - # This object represents a single basic block, wherein all contained - # instructions do not branch except for the last one. - class BasicBlock - # This is the unique identifier for this basic block. - attr_reader :id - - # This is the index into the list of instructions where this block - # starts. - attr_reader :block_start - - # This is the set of instructions that this block contains. - attr_reader :insns - - # This is an array of basic blocks that are predecessors to this block. - attr_reader :predecessors - - # This is an array of basic blocks that are successors to this block. - attr_reader :successors - - def initialize(block_start, insns) - @id = "block_#{block_start}" - - @block_start = block_start - @insns = insns - - @predecessors = [] - @successors = [] - end - - # Yield each instruction in this basic block along with its index from - # the original instruction sequence. - def each_with_index(&block) - insns.each.with_index(block_start, &block) - end - - # This method is used to verify that the basic block is well formed. It - # checks that the only instruction in this basic block that branches is - # the last instruction. - def verify - insns[0...-1].each { |insn| raise unless insn.branch_targets.empty? } - end - end - # This class is responsible for creating a control flow graph from the # given instruction sequence. class Compiler @@ -186,22 +143,22 @@ def build_basic_blocks end end - # Connect the blocks by letting them know which blocks precede them and - # which blocks succeed them. + # Connect the blocks by letting them know which blocks are incoming and + # outgoing from each block. def connect_basic_blocks(blocks) blocks.each do |block_start, block| insn = block.insns.last insn.branch_targets.each do |branch_target| - block.successors << blocks.fetch(labels[branch_target]) + block.outgoing_blocks << blocks.fetch(labels[branch_target]) end if (insn.branch_targets.empty? && !insn.leaves?) || insn.falls_through? - block.successors << blocks.fetch(block_start + block.insns.length) + block.outgoing_blocks << blocks.fetch(block_start + block.insns.length) end - block.successors.each do |successor| - successor.predecessors << block + block.outgoing_blocks.each do |outgoing_block| + outgoing_block.incoming_blocks << block end end end diff --git a/lib/syntax_tree/yarv/data_flow_graph.rb b/lib/syntax_tree/yarv/data_flow_graph.rb index 295308bd..737518ce 100644 --- a/lib/syntax_tree/yarv/data_flow_graph.rb +++ b/lib/syntax_tree/yarv/data_flow_graph.rb @@ -26,138 +26,6 @@ def initialize(cfg, insn_flows, block_flows) @block_flows = block_flows end - def self.compile(cfg) - # First, create a data structure to encode data flow between - # instructions. - insn_flows = {} - cfg.insns.each_with_index do |insn, index| - insn_flows[index] = DataFlow.new - end - - # Next, create a data structure to encode data flow between basic - # blocks. - block_flows = {} - cfg.blocks.each do |block| - block_flows[block.id] = DataFlow.new - end - - # Now, discover the data flow within each basic block. Using an abstract - # stack, connect from consumers of data to the producers of that data. - cfg.blocks.each do |block| - block_flow = block_flows.fetch(block.id) - - stack = [] - stack_initial_depth = 0 - - # Go through each instruction in the block... - block.each_with_index do |insn, index| - insn_flow = insn_flows[index] - - # How many values will be missing from the local stack to run this - # instruction? - missing_stack_values = insn.pops - stack.size - - # For every value the instruction pops off the stack... - insn.pops.times do - # Was the value it pops off from another basic block? - if stack.empty? - # This is a basic block argument. - name = :"in_#{missing_stack_values - 1}" - - insn_flow.in.unshift(name) - block_flow.in.unshift(name) - - stack_initial_depth += 1 - missing_stack_values -= 1 - else - # Connect this consumer to the producer of the value. - insn_flow.in.unshift(stack.pop) - end - end - - # Record on our abstract stack that this instruction pushed - # this value onto the stack. - insn.pushes.times { stack << index } - end - - # Values that are left on the stack after going through all - # instructions are arguments to the basic block that we jump to. - stack.reverse_each.with_index do |producer, index| - block_flow.out << producer - insn_flows[producer].out << :"out_#{index}" - end - end - - # Go backwards and connect from producers to consumers. - cfg.insns.each_with_index do |insn, index| - # For every instruction that produced a value used in this - # instruction... - insn_flows[index].in.each do |producer| - # If it's actually another instruction and not a basic block - # argument... - if producer.is_a?(Integer) - # Record in the producing instruction that it produces a value - # used by this construction. - insn_flows[producer].out << index - end - end - end - - # Now, discover the data flow between basic blocks. - stack = [*cfg.blocks] - until stack.empty? - succ = stack.pop - succ_flow = block_flows.fetch(succ.id) - succ.predecessors.each do |pred| - pred_flow = block_flows.fetch(pred.id) - - # Does a predecessor block have fewer outputs than the successor - # has inputs? - if pred_flow.out.size < succ_flow.in.size - # If so then add arguments to pass data through from the - # predecessor's predecessors. - (succ_flow.in.size - pred_flow.out.size).times do |index| - name = :"pass_#{index}" - pred_flow.in.unshift(name) - pred_flow.out.unshift(name) - end - - # Since we modified the predecessor, add it back to the worklist - # so it'll be considered as a successor again, and propogate the - # global data flow back up the control flow graph. - stack << pred - end - end - end - - # Verify that we constructed the data flow graph correctly. Check that - # the first block has no arguments. - raise unless block_flows.fetch(cfg.blocks.first.id).in.empty? - - # Check all control flow edges between blocks pass the right number of - # arguments. - cfg.blocks.each do |pred| - pred_flow = block_flows.fetch(pred.id) - - if pred.successors.empty? - # With no successors, there should be no output arguments. - raise unless pred_flow.out.empty? - else - # Check with successor... - pred.successors.each do |succ| - succ_flow = block_flows.fetch(succ.id) - - # The predecessor should have as many output arguments as the - # success has input arguments. - raise unless pred_flow.out.size == succ_flow.in.size - end - end - end - - # Finally we can return the data flow graph. - new(cfg, insn_flows, block_flows) - end - def disasm fmt = Disassembler.new output = StringIO.new @@ -165,8 +33,9 @@ def disasm cfg.blocks.each do |block| output.print(block.id) - unless block.predecessors.empty? - output.print(" # from: #{block.predecessors.map(&:id).join(", ")}") + unless block.incoming_blocks.empty? + srcs = block.incoming_blocks.map(&:id) + output.print(" # from: #{srcs.join(", ")}") end output.puts @@ -198,9 +67,9 @@ def disasm output.puts end - successors = block.successors.map(&:id) - successors << "leaves" if block.insns.last.leaves? - output.puts(" # to: #{successors.join(", ")}") unless successors.empty? + dests = block.outgoing_blocks.map(&:id) + dests << "leaves" if block.insns.last.leaves? + output.puts(" # to: #{dests.join(", ")}") unless dests.empty? unless block_flow.out.empty? output.puts " # out: #{block_flow.out.join(", ")}" @@ -209,6 +78,159 @@ def disasm output.string end + + # Verify that we constructed the data flow graph correctly. + def verify + # Check that the first block has no arguments. + raise unless block_flows.fetch(cfg.blocks.first.id).in.empty? + + # Check all control flow edges between blocks pass the right number of + # arguments. + cfg.blocks.each do |block| + block_flow = block_flows.fetch(block.id) + + if block.outgoing_blocks.empty? + # With no outgoing blocks, there should be no output arguments. + raise unless block_flow.out.empty? + else + # Check with outgoing blocks... + block.outgoing_blocks.each do |outgoing_block| + outgoing_flow = block_flows.fetch(outgoing_block.id) + + # The block should have as many output arguments as the + # outgoing block has input arguments. + raise unless block_flow.out.size == outgoing_flow.in.size + end + end + end + end + + def self.compile(cfg) + Compiler.new(cfg).compile + end + + # This class is responsible for creating a data flow graph from the given + # control flow graph. + class Compiler + attr_reader :cfg, :insn_flows, :block_flows + + def initialize(cfg) + @cfg = cfg + + # This data structure will hold the data flow between instructions + # within individual basic blocks. + @insn_flows = {} + cfg.insns.each_with_index do |insn, index| + @insn_flows[index] = DataFlow.new + end + + # This data structure will hold the data flow between basic blocks. + @block_flows = {} + cfg.blocks.each do |block| + @block_flows[block.id] = DataFlow.new + end + end + + def compile + find_local_flow + find_global_flow + DataFlowGraph.new(cfg, insn_flows, block_flows).tap(&:verify) + end + + private + + # Find the data flow within each basic block. Using an abstract stack, + # connect from consumers of data to the producers of that data. + def find_local_flow + cfg.blocks.each do |block| + block_flow = block_flows.fetch(block.id) + stack = [] + + # Go through each instruction in the block... + block.each_with_index do |insn, index| + insn_flow = insn_flows[index] + + # How many values will be missing from the local stack to run this + # instruction? + missing = insn.pops - stack.size + + # For every value the instruction pops off the stack... + insn.pops.times do + # Was the value it pops off from another basic block? + if stack.empty? + # This is a basic block argument. + missing -= 1 + name = :"in_#{missing}" + + insn_flow.in.unshift(name) + block_flow.in.unshift(name) + else + # Connect this consumer to the producer of the value. + insn_flow.in.unshift(stack.pop) + end + end + + # Record on our abstract stack that this instruction pushed + # this value onto the stack. + insn.pushes.times { stack << index } + end + + # Values that are left on the stack after going through all + # instructions are arguments to the basic block that we jump to. + stack.reverse_each.with_index do |producer, index| + block_flow.out << producer + insn_flows[producer].out << :"out_#{index}" + end + end + + # Go backwards and connect from producers to consumers. + cfg.insns.each_with_index do |insn, index| + # For every instruction that produced a value used in this + # instruction... + insn_flows[index].in.each do |producer| + # If it's actually another instruction and not a basic block + # argument... + if producer.is_a?(Integer) + # Record in the producing instruction that it produces a value + # used by this construction. + insn_flows[producer].out << index + end + end + end + end + + # Find the data that flows between basic blocks. + def find_global_flow + stack = [*cfg.blocks] + + until stack.empty? + block = stack.pop + block_flow = block_flows.fetch(block.id) + + block.incoming_blocks.each do |incoming_block| + incoming_flow = block_flows.fetch(incoming_block.id) + + # Does a predecessor block have fewer outputs than the successor + # has inputs? + if incoming_flow.out.size < block_flow.in.size + # If so then add arguments to pass data through from the + # incoming block's incoming blocks. + (block_flow.in.size - incoming_flow.out.size).times do |index| + name = :"pass_#{index}" + + incoming_flow.in.unshift(name) + incoming_flow.out.unshift(name) + end + + # Since we modified the incoming block, add it back to the stack + # so it'll be considered as an outgoing block again, and + # propogate the global data flow back up the control flow graph. + stack << incoming_block + end + end + end + end + end end end end From f600b0694e2c64bb4c6ce7d0d29d60533fdc1ab6 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Thu, 2 Feb 2023 20:07:06 -0500 Subject: [PATCH 14/58] Properly use the disassembler for the cfg --- lib/syntax_tree/yarv/control_flow_graph.rb | 22 ++-- lib/syntax_tree/yarv/disassembler.rb | 112 ++++++++++--------- lib/syntax_tree/yarv/instruction_sequence.rb | 7 +- test/yarv_test.rb | 20 ++-- 4 files changed, 82 insertions(+), 79 deletions(-) diff --git a/lib/syntax_tree/yarv/control_flow_graph.rb b/lib/syntax_tree/yarv/control_flow_graph.rb index 27df308e..3b3f9b82 100644 --- a/lib/syntax_tree/yarv/control_flow_graph.rb +++ b/lib/syntax_tree/yarv/control_flow_graph.rb @@ -33,32 +33,28 @@ def initialize(iseq, insns, blocks) end def disasm - fmt = Disassembler.new - output = StringIO.new - output.puts "== cfg #{iseq.name}" + fmt = Disassembler.new(iseq) + fmt.output.puts "== cfg #{iseq.name}" blocks.each do |block| - output.print(block.id) + fmt.output.print(block.id) unless block.incoming_blocks.empty? - output.print(" # from: #{block.incoming_blocks.map(&:id).join(", ")}") + fmt.output.print(" # from: #{block.incoming_blocks.map(&:id).join(", ")}") end - output.puts + fmt.output.puts - block.insns.each do |insn| - output.print(" ") - output.puts(insn.disasm(fmt)) - end + fmt.with_prefix(" ") { fmt.format_insns!(block.insns) } dests = block.outgoing_blocks.map(&:id) dests << "leaves" if block.insns.last.leaves? - output.print(" # to: #{dests.join(", ")}") unless dests.empty? + fmt.output.print(" # to: #{dests.join(", ")}") unless dests.empty? - output.puts + fmt.output.puts end - output.string + fmt.string end # This method is used to verify that the control flow graph is well diff --git a/lib/syntax_tree/yarv/disassembler.rb b/lib/syntax_tree/yarv/disassembler.rb index d303bcb7..0b445e02 100644 --- a/lib/syntax_tree/yarv/disassembler.rb +++ b/lib/syntax_tree/yarv/disassembler.rb @@ -4,15 +4,16 @@ module SyntaxTree module YARV class Disassembler attr_reader :output, :queue + attr_reader :current_prefix attr_accessor :current_iseq - def initialize + def initialize(current_iseq = nil) @output = StringIO.new @queue = [] @current_prefix = "" - @current_iseq = nil + @current_iseq = current_iseq end ######################################################################## @@ -97,16 +98,69 @@ def object(value) end ######################################################################## - # Main entrypoint + # Entrypoints ######################################################################## + def string + output.string + end + def format! while (@current_iseq = queue.shift) output << "\n" if output.pos > 0 format_iseq(@current_iseq) end + end - output.string + def format_insns!(insns, length = 0) + events = [] + lines = [] + + insns.each do |insn| + case insn + when Integer + lines << insn + when Symbol + events << event(insn) + when InstructionSequence::Label + # skip + else + output << "#{current_prefix}%04d " % length + + disasm = insn.disasm(self) + output << disasm + + if lines.any? + output << " " * (65 - disasm.length) if disasm.length < 65 + elsif events.any? + output << " " * (39 - disasm.length) if disasm.length < 39 + end + + if lines.any? + output << "(%4d)" % lines.last + lines.clear + end + + if events.any? + output << "[#{events.join}]" + events.clear + end + + output << "\n" + length += insn.length + end + end + end + + def with_prefix(value) + previous = @current_prefix + + begin + @current_prefix = value + yield + ensure + @current_prefix = previous + end end private @@ -157,55 +211,7 @@ def format_iseq(iseq) output << "#{current_prefix}#{locals.join(" ")}\n" end - length = 0 - events = [] - lines = [] - - iseq.insns.each do |insn| - case insn - when Integer - lines << insn - when Symbol - events << event(insn) - when InstructionSequence::Label - # skip - else - output << "#{current_prefix}%04d " % length - - disasm = insn.disasm(self) - output << disasm - - if lines.any? - output << " " * (65 - disasm.length) if disasm.length < 65 - elsif events.any? - output << " " * (39 - disasm.length) if disasm.length < 39 - end - - if lines.any? - output << "(%4d)" % lines.last - lines.clear - end - - if events.any? - output << "[#{events.join}]" - events.clear - end - - output << "\n" - length += insn.length - end - end - end - - def with_prefix(value) - previous = @current_prefix - - begin - @current_prefix = value - yield - ensure - @current_prefix = previous - end + format_insns!(iseq.insns) end end end diff --git a/lib/syntax_tree/yarv/instruction_sequence.rb b/lib/syntax_tree/yarv/instruction_sequence.rb index 6aa7279e..1281eba4 100644 --- a/lib/syntax_tree/yarv/instruction_sequence.rb +++ b/lib/syntax_tree/yarv/instruction_sequence.rb @@ -270,9 +270,10 @@ def to_a end def disasm - disassembler = Disassembler.new - disassembler.enqueue(self) - disassembler.format! + fmt = Disassembler.new + fmt.enqueue(self) + fmt.format! + fmt.string end # This method converts our linked list of instructions into a final array diff --git a/test/yarv_test.rb b/test/yarv_test.rb index e37afb63..91147dc3 100644 --- a/test/yarv_test.rb +++ b/test/yarv_test.rb @@ -305,22 +305,22 @@ def test_cfg assert_equal(<<~CFG, cfg.disasm) == cfg block_0 - putobject 100 - putobject 14 - putobject_INT2FIX_0_ - opt_lt - branchunless 13 + 0000 putobject 100 + 0002 putobject 14 + 0004 putobject_INT2FIX_0_ + 0005 opt_lt + 0007 branchunless 13 # to: block_7, block_5 block_5 # from: block_0 - putobject -1 - jump 14 + 0000 putobject -1 + 0002 jump 14 # to: block_8 block_7 # from: block_0 - putobject_INT2FIX_1_ + 0000 putobject_INT2FIX_1_ # to: block_8 block_8 # from: block_5, block_7 - opt_plus - leave + 0000 opt_plus + 0002 leave # to: leaves CFG end From d66c977eb37d7f01f3221fdc0bcde086e56e1b8e Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Thu, 2 Feb 2023 20:43:29 -0500 Subject: [PATCH 15/58] Use length for offsets to make it more readable --- lib/syntax_tree/yarv/basic_block.rb | 10 +++- lib/syntax_tree/yarv/control_flow_graph.rb | 69 +++++++++++++--------- lib/syntax_tree/yarv/data_flow_graph.rb | 20 +++---- test/yarv_test.rb | 57 +++++++++--------- 4 files changed, 90 insertions(+), 66 deletions(-) diff --git a/lib/syntax_tree/yarv/basic_block.rb b/lib/syntax_tree/yarv/basic_block.rb index 774a4c00..6798a092 100644 --- a/lib/syntax_tree/yarv/basic_block.rb +++ b/lib/syntax_tree/yarv/basic_block.rb @@ -32,8 +32,14 @@ def initialize(block_start, insns) # Yield each instruction in this basic block along with its index from the # original instruction sequence. - def each_with_index(&block) - insns.each.with_index(block_start, &block) + def each_with_length + return enum_for(:each_with_length) unless block_given? + + length = block_start + insns.each do |insn| + yield insn, length + length += insn.length + end end # This method is used to verify that the basic block is well formed. It diff --git a/lib/syntax_tree/yarv/control_flow_graph.rb b/lib/syntax_tree/yarv/control_flow_graph.rb index 3b3f9b82..bcf9f26e 100644 --- a/lib/syntax_tree/yarv/control_flow_graph.rb +++ b/lib/syntax_tree/yarv/control_flow_graph.rb @@ -34,24 +34,23 @@ def initialize(iseq, insns, blocks) def disasm fmt = Disassembler.new(iseq) - fmt.output.puts "== cfg #{iseq.name}" + fmt.output.print("== cfg: #:1 ") + fmt.output.puts("(#{iseq.line},0)-(#{iseq.line},0)>") blocks.each do |block| - fmt.output.print(block.id) - - unless block.incoming_blocks.empty? - fmt.output.print(" # from: #{block.incoming_blocks.map(&:id).join(", ")}") - end - - fmt.output.puts - - fmt.with_prefix(" ") { fmt.format_insns!(block.insns) } + fmt.output.puts(block.id) + fmt.with_prefix(" ") do + unless block.incoming_blocks.empty? + from = block.incoming_blocks.map(&:id).join(", ") + fmt.output.puts("#{fmt.current_prefix}== from: #{from}") + end - dests = block.outgoing_blocks.map(&:id) - dests << "leaves" if block.insns.last.leaves? - fmt.output.print(" # to: #{dests.join(", ")}") unless dests.empty? + fmt.format_insns!(block.insns, block.block_start) - fmt.output.puts + to = block.outgoing_blocks.map(&:id) + to << "leaves" if block.insns.last.leaves? + fmt.output.puts("#{fmt.current_prefix}== to: #{to.join(", ")}") + end end fmt.string @@ -71,23 +70,34 @@ def self.compile(iseq) # This class is responsible for creating a control flow graph from the # given instruction sequence. class Compiler - attr_reader :iseq, :labels, :insns + # This is the instruction sequence that is being compiled. + attr_reader :iseq + + # This is a hash of indices in the YARV instruction sequence that point + # to their corresponding instruction. + attr_reader :insns + + # This is a hash of labels that point to their corresponding index into + # the YARV instruction sequence. Note that this is not the same as the + # index into the list of instructions on the instruction sequence + # object. Instead, this is the index into the C array, so it includes + # operands. + attr_reader :labels def initialize(iseq) @iseq = iseq - # We need to find all of the instructions that immediately follow - # labels so that when we are looking at instructions that branch we - # know where they branch to. + @insns = {} @labels = {} - @insns = [] + length = 0 iseq.insns.each do |insn| case insn when Instruction - @insns << insn + @insns[length] = insn + length += insn.length when InstructionSequence::Label - @labels[insn] = @insns.length + @labels[insn] = length end end end @@ -111,7 +121,7 @@ def compile def find_basic_block_starts block_starts = Set.new([0]) - insns.each_with_index do |insn, index| + insns.each do |index, insn| branch_targets = insn.branch_targets if branch_targets.any? @@ -119,7 +129,7 @@ def find_basic_block_starts block_starts.add(labels[branch_target]) end - block_starts.add(index + 1) if insn.falls_through? + block_starts.add(index + insn.length) if insn.falls_through? end end @@ -131,10 +141,14 @@ def find_basic_block_starts def build_basic_blocks block_starts = find_basic_block_starts - block_starts.each_with_index.to_h do |block_start, block_index| - block_end = (block_starts[(block_index + 1)..] + [insns.length]).min - block_insns = insns[block_start...block_end] + length = 0 + blocks = + iseq.insns.grep(Instruction).slice_after do |insn| + length += insn.length + block_starts.include?(length) + end + block_starts.zip(blocks).to_h do |block_start, block_insns| [block_start, BasicBlock.new(block_start, block_insns)] end end @@ -150,7 +164,8 @@ def connect_basic_blocks(blocks) end if (insn.branch_targets.empty? && !insn.leaves?) || insn.falls_through? - block.outgoing_blocks << blocks.fetch(block_start + block.insns.length) + fall_through_start = block_start + block.insns.sum(&:length) + block.outgoing_blocks << blocks.fetch(fall_through_start) end block.outgoing_blocks.each do |outgoing_block| diff --git a/lib/syntax_tree/yarv/data_flow_graph.rb b/lib/syntax_tree/yarv/data_flow_graph.rb index 737518ce..670e0daf 100644 --- a/lib/syntax_tree/yarv/data_flow_graph.rb +++ b/lib/syntax_tree/yarv/data_flow_graph.rb @@ -44,11 +44,11 @@ def disasm output.puts " # in: #{block_flow.in.join(", ")}" end - block.each_with_index do |insn, index| + block.each_with_length do |insn, length| output.print(" ") output.print(insn.disasm(fmt)) - insn_flow = insn_flows[index] + insn_flow = insn_flows[length] if insn_flow.in.empty? && insn_flow.out.empty? output.puts next @@ -120,8 +120,8 @@ def initialize(cfg) # This data structure will hold the data flow between instructions # within individual basic blocks. @insn_flows = {} - cfg.insns.each_with_index do |insn, index| - @insn_flows[index] = DataFlow.new + cfg.insns.each_key do |length| + @insn_flows[length] = DataFlow.new end # This data structure will hold the data flow between basic blocks. @@ -147,8 +147,8 @@ def find_local_flow stack = [] # Go through each instruction in the block... - block.each_with_index do |insn, index| - insn_flow = insn_flows[index] + block.each_with_length do |insn, length| + insn_flow = insn_flows[length] # How many values will be missing from the local stack to run this # instruction? @@ -172,7 +172,7 @@ def find_local_flow # Record on our abstract stack that this instruction pushed # this value onto the stack. - insn.pushes.times { stack << index } + insn.pushes.times { stack << length } end # Values that are left on the stack after going through all @@ -184,16 +184,16 @@ def find_local_flow end # Go backwards and connect from producers to consumers. - cfg.insns.each_with_index do |insn, index| + cfg.insns.each_key do |length| # For every instruction that produced a value used in this # instruction... - insn_flows[index].in.each do |producer| + insn_flows[length].in.each do |producer| # If it's actually another instruction and not a basic block # argument... if producer.is_a?(Integer) # Record in the producing instruction that it produces a value # used by this construction. - insn_flows[producer].out << index + insn_flows[producer].out << length end end end diff --git a/test/yarv_test.rb b/test/yarv_test.rb index 91147dc3..7a998fa4 100644 --- a/test/yarv_test.rb +++ b/test/yarv_test.rb @@ -303,25 +303,28 @@ def test_cfg cfg = SyntaxTree::YARV::ControlFlowGraph.compile(iseq) assert_equal(<<~CFG, cfg.disasm) - == cfg + == cfg: #@:1 (1,0)-(1,0)> block_0 0000 putobject 100 0002 putobject 14 0004 putobject_INT2FIX_0_ 0005 opt_lt 0007 branchunless 13 - # to: block_7, block_5 - block_5 # from: block_0 - 0000 putobject -1 - 0002 jump 14 - # to: block_8 - block_7 # from: block_0 - 0000 putobject_INT2FIX_1_ - # to: block_8 - block_8 # from: block_5, block_7 - 0000 opt_plus - 0002 leave - # to: leaves + == to: block_13, block_9 + block_9 + == from: block_0 + 0009 putobject -1 + 0011 jump 14 + == to: block_14 + block_13 + == from: block_0 + 0013 putobject_INT2FIX_1_ + == to: block_14 + block_14 + == from: block_9, block_13 + 0014 opt_plus + 0016 leave + == to: leaves CFG end @@ -335,27 +338,27 @@ def test_dfg == dfg block_0 putobject 100 # out: out_0 - putobject 14 # out: 3 - putobject_INT2FIX_0_ # out: 3 - opt_lt # in: 1, 2; out: 4 - branchunless 13 # in: 3 - # to: block_7, block_5 + putobject 14 # out: 5 + putobject_INT2FIX_0_ # out: 5 + opt_lt # in: 2, 4; out: 7 + branchunless 13 # in: 5 + # to: block_13, block_9 # out: 0 - block_5 # from: block_0 + block_9 # from: block_0 # in: pass_0 putobject -1 # out: out_0 jump 14 - # to: block_8 - # out: pass_0, 5 - block_7 # from: block_0 + # to: block_14 + # out: pass_0, 9 + block_13 # from: block_0 # in: pass_0 putobject_INT2FIX_1_ # out: out_0 - # to: block_8 - # out: pass_0, 7 - block_8 # from: block_5, block_7 + # to: block_14 + # out: pass_0, 13 + block_14 # from: block_9, block_13 # in: in_0, in_1 - opt_plus # in: in_0, in_1; out: 9 - leave # in: 8 + opt_plus # in: in_0, in_1; out: 16 + leave # in: 14 # to: leaves DFG end From 7d1cf1ce3aba3bc1a1251637304f298cb9f84fae Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Thu, 2 Feb 2023 20:55:33 -0500 Subject: [PATCH 16/58] Properly use disassembler for DFG --- lib/syntax_tree/yarv/control_flow_graph.rb | 3 +- lib/syntax_tree/yarv/data_flow_graph.rb | 68 +++++++++----------- lib/syntax_tree/yarv/disassembler.rb | 12 ++-- lib/syntax_tree/yarv/instruction_sequence.rb | 4 ++ test/yarv_test.rb | 51 ++++++++------- 5 files changed, 67 insertions(+), 71 deletions(-) diff --git a/lib/syntax_tree/yarv/control_flow_graph.rb b/lib/syntax_tree/yarv/control_flow_graph.rb index bcf9f26e..ef779c54 100644 --- a/lib/syntax_tree/yarv/control_flow_graph.rb +++ b/lib/syntax_tree/yarv/control_flow_graph.rb @@ -34,8 +34,7 @@ def initialize(iseq, insns, blocks) def disasm fmt = Disassembler.new(iseq) - fmt.output.print("== cfg: #:1 ") - fmt.output.puts("(#{iseq.line},0)-(#{iseq.line},0)>") + fmt.output.puts("== cfg: #{iseq.inspect}") blocks.each do |block| fmt.output.puts(block.id) diff --git a/lib/syntax_tree/yarv/data_flow_graph.rb b/lib/syntax_tree/yarv/data_flow_graph.rb index 670e0daf..09ba84a4 100644 --- a/lib/syntax_tree/yarv/data_flow_graph.rb +++ b/lib/syntax_tree/yarv/data_flow_graph.rb @@ -27,56 +27,48 @@ def initialize(cfg, insn_flows, block_flows) end def disasm - fmt = Disassembler.new - output = StringIO.new - output.puts "== dfg #{cfg.iseq.name}" + fmt = Disassembler.new(cfg.iseq) + fmt.output.puts("== dfg: #{cfg.iseq.inspect}") cfg.blocks.each do |block| - output.print(block.id) - unless block.incoming_blocks.empty? - srcs = block.incoming_blocks.map(&:id) - output.print(" # from: #{srcs.join(", ")}") - end - output.puts - - block_flow = block_flows.fetch(block.id) - unless block_flow.in.empty? - output.puts " # in: #{block_flow.in.join(", ")}" - end - - block.each_with_length do |insn, length| - output.print(" ") - output.print(insn.disasm(fmt)) - - insn_flow = insn_flows[length] - if insn_flow.in.empty? && insn_flow.out.empty? - output.puts - next + fmt.output.puts(block.id) + fmt.with_prefix(" ") do + unless block.incoming_blocks.empty? + from = block.incoming_blocks.map(&:id).join(", ") + fmt.output.puts("#{fmt.current_prefix}== from: #{from}") end - output.print(" # ") - unless insn_flow.in.empty? - output.print("in: #{insn_flow.in.join(", ")}") - output.print("; ") unless insn_flow.out.empty? + block_flow = block_flows.fetch(block.id) + unless block_flow.in.empty? + fmt.output.puts("#{fmt.current_prefix}== in: #{block_flow.in.join(", ")}") end - unless insn_flow.out.empty? - output.print("out: #{insn_flow.out.join(", ")}") + fmt.format_insns!(block.insns, block.block_start) do |insn, length| + insn_flow = insn_flows[length] + next if insn_flow.in.empty? && insn_flow.out.empty? + + fmt.output.print(" # ") + unless insn_flow.in.empty? + fmt.output.print("in: #{insn_flow.in.join(", ")}") + fmt.output.print("; ") unless insn_flow.out.empty? + end + + unless insn_flow.out.empty? + fmt.output.print("out: #{insn_flow.out.join(", ")}") + end end - output.puts - end - - dests = block.outgoing_blocks.map(&:id) - dests << "leaves" if block.insns.last.leaves? - output.puts(" # to: #{dests.join(", ")}") unless dests.empty? + to = block.outgoing_blocks.map(&:id) + to << "leaves" if block.insns.last.leaves? + fmt.output.puts("#{fmt.current_prefix}== to: #{to.join(", ")}") - unless block_flow.out.empty? - output.puts " # out: #{block_flow.out.join(", ")}" + unless block_flow.out.empty? + fmt.output.puts("#{fmt.current_prefix}== out: #{block_flow.out.join(", ")}") + end end end - output.string + fmt.string end # Verify that we constructed the data flow graph correctly. diff --git a/lib/syntax_tree/yarv/disassembler.rb b/lib/syntax_tree/yarv/disassembler.rb index 0b445e02..8b86851e 100644 --- a/lib/syntax_tree/yarv/disassembler.rb +++ b/lib/syntax_tree/yarv/disassembler.rb @@ -146,6 +146,10 @@ def format_insns!(insns, length = 0) events.clear end + # A hook here to allow for custom formatting of instructions after + # the main body has been processed. + yield insn, length if block_given? + output << "\n" length += insn.length end @@ -166,13 +170,7 @@ def with_prefix(value) private def format_iseq(iseq) - output << "#{current_prefix}== disasm: " - output << "#:1 " - - location = Location.fixed(line: iseq.line, char: 0, column: 0) - output << "(#{location.start_line},#{location.start_column})-" - output << "(#{location.end_line},#{location.end_column})" - output << "> " + output << "#{current_prefix}== disasm: #{iseq.inspect} " if iseq.catch_table.any? output << "(catch: TRUE)\n" diff --git a/lib/syntax_tree/yarv/instruction_sequence.rb b/lib/syntax_tree/yarv/instruction_sequence.rb index 1281eba4..83453837 100644 --- a/lib/syntax_tree/yarv/instruction_sequence.rb +++ b/lib/syntax_tree/yarv/instruction_sequence.rb @@ -276,6 +276,10 @@ def disasm fmt.string end + def inspect + "#:1 (#{line},#{0})-(#{line},#{0})>" + end + # This method converts our linked list of instructions into a final array # and performs any other compilation steps necessary. def compile! diff --git a/test/yarv_test.rb b/test/yarv_test.rb index 7a998fa4..5ac37504 100644 --- a/test/yarv_test.rb +++ b/test/yarv_test.rb @@ -335,31 +335,34 @@ def test_dfg dfg = SyntaxTree::YARV::DataFlowGraph.compile(cfg) assert_equal(<<~DFG, dfg.disasm) - == dfg + == dfg: #@:1 (1,0)-(1,0)> block_0 - putobject 100 # out: out_0 - putobject 14 # out: 5 - putobject_INT2FIX_0_ # out: 5 - opt_lt # in: 2, 4; out: 7 - branchunless 13 # in: 5 - # to: block_13, block_9 - # out: 0 - block_9 # from: block_0 - # in: pass_0 - putobject -1 # out: out_0 - jump 14 - # to: block_14 - # out: pass_0, 9 - block_13 # from: block_0 - # in: pass_0 - putobject_INT2FIX_1_ # out: out_0 - # to: block_14 - # out: pass_0, 13 - block_14 # from: block_9, block_13 - # in: in_0, in_1 - opt_plus # in: in_0, in_1; out: 16 - leave # in: 14 - # to: leaves + 0000 putobject 100 # out: out_0 + 0002 putobject 14 # out: 5 + 0004 putobject_INT2FIX_0_ # out: 5 + 0005 opt_lt # in: 2, 4; out: 7 + 0007 branchunless 13 # in: 5 + == to: block_13, block_9 + == out: 0 + block_9 + == from: block_0 + == in: pass_0 + 0009 putobject -1 # out: out_0 + 0011 jump 14 + == to: block_14 + == out: pass_0, 9 + block_13 + == from: block_0 + == in: pass_0 + 0013 putobject_INT2FIX_1_ # out: out_0 + == to: block_14 + == out: pass_0, 13 + block_14 + == from: block_9, block_13 + == in: in_0, in_1 + 0014 opt_plus # in: in_0, in_1; out: 16 + 0016 leave # in: 14 + == to: leaves DFG end From 28c5a4ac92745c26590794366f014742bc02eebd Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Thu, 2 Feb 2023 21:01:49 -0500 Subject: [PATCH 17/58] Various formatting for CFG and DFG --- lib/syntax_tree/yarv/control_flow_graph.rb | 30 +++++++------ lib/syntax_tree/yarv/data_flow_graph.rb | 45 +++++++++----------- lib/syntax_tree/yarv/disassembler.rb | 2 +- lib/syntax_tree/yarv/instruction_sequence.rb | 2 +- 4 files changed, 41 insertions(+), 38 deletions(-) diff --git a/lib/syntax_tree/yarv/control_flow_graph.rb b/lib/syntax_tree/yarv/control_flow_graph.rb index ef779c54..fb8f97f3 100644 --- a/lib/syntax_tree/yarv/control_flow_graph.rb +++ b/lib/syntax_tree/yarv/control_flow_graph.rb @@ -38,17 +38,17 @@ def disasm blocks.each do |block| fmt.output.puts(block.id) - fmt.with_prefix(" ") do + fmt.with_prefix(" ") do |prefix| unless block.incoming_blocks.empty? - from = block.incoming_blocks.map(&:id).join(", ") - fmt.output.puts("#{fmt.current_prefix}== from: #{from}") + from = block.incoming_blocks.map(&:id) + fmt.output.puts("#{prefix}== from: #{from.join(", ")}") end fmt.format_insns!(block.insns, block.block_start) to = block.outgoing_blocks.map(&:id) to << "leaves" if block.insns.last.leaves? - fmt.output.puts("#{fmt.current_prefix}== to: #{to.join(", ")}") + fmt.output.puts("#{prefix}== to: #{to.join(", ")}") end end @@ -142,14 +142,19 @@ def build_basic_blocks length = 0 blocks = - iseq.insns.grep(Instruction).slice_after do |insn| - length += insn.length - block_starts.include?(length) - end + iseq + .insns + .grep(Instruction) + .slice_after do |insn| + length += insn.length + block_starts.include?(length) + end - block_starts.zip(blocks).to_h do |block_start, block_insns| - [block_start, BasicBlock.new(block_start, block_insns)] - end + block_starts + .zip(blocks) + .to_h do |block_start, block_insns| + [block_start, BasicBlock.new(block_start, block_insns)] + end end # Connect the blocks by letting them know which blocks are incoming and @@ -162,7 +167,8 @@ def connect_basic_blocks(blocks) block.outgoing_blocks << blocks.fetch(labels[branch_target]) end - if (insn.branch_targets.empty? && !insn.leaves?) || insn.falls_through? + if (insn.branch_targets.empty? && !insn.leaves?) || + insn.falls_through? fall_through_start = block_start + block.insns.sum(&:length) block.outgoing_blocks << blocks.fetch(fall_through_start) end diff --git a/lib/syntax_tree/yarv/data_flow_graph.rb b/lib/syntax_tree/yarv/data_flow_graph.rb index 09ba84a4..614d1233 100644 --- a/lib/syntax_tree/yarv/data_flow_graph.rb +++ b/lib/syntax_tree/yarv/data_flow_graph.rb @@ -32,27 +32,27 @@ def disasm cfg.blocks.each do |block| fmt.output.puts(block.id) - fmt.with_prefix(" ") do + fmt.with_prefix(" ") do |prefix| unless block.incoming_blocks.empty? - from = block.incoming_blocks.map(&:id).join(", ") - fmt.output.puts("#{fmt.current_prefix}== from: #{from}") + from = block.incoming_blocks.map(&:id) + fmt.output.puts("#{prefix}== from: #{from.join(", ")}") end block_flow = block_flows.fetch(block.id) unless block_flow.in.empty? - fmt.output.puts("#{fmt.current_prefix}== in: #{block_flow.in.join(", ")}") + fmt.output.puts("#{prefix}== in: #{block_flow.in.join(", ")}") end - fmt.format_insns!(block.insns, block.block_start) do |insn, length| + fmt.format_insns!(block.insns, block.block_start) do |_, length| insn_flow = insn_flows[length] next if insn_flow.in.empty? && insn_flow.out.empty? - + fmt.output.print(" # ") unless insn_flow.in.empty? fmt.output.print("in: #{insn_flow.in.join(", ")}") fmt.output.print("; ") unless insn_flow.out.empty? end - + unless insn_flow.out.empty? fmt.output.print("out: #{insn_flow.out.join(", ")}") end @@ -60,11 +60,11 @@ def disasm to = block.outgoing_blocks.map(&:id) to << "leaves" if block.insns.last.leaves? - fmt.output.puts("#{fmt.current_prefix}== to: #{to.join(", ")}") + fmt.output.puts("#{prefix}== to: #{to.join(", ")}") unless block_flow.out.empty? - fmt.output.puts("#{fmt.current_prefix}== out: #{block_flow.out.join(", ")}") - end + fmt.output.puts("#{prefix}== out: #{block_flow.out.join(", ")}") + end end end @@ -104,23 +104,20 @@ def self.compile(cfg) # This class is responsible for creating a data flow graph from the given # control flow graph. class Compiler - attr_reader :cfg, :insn_flows, :block_flows + # This is the control flow graph that is being compiled. + attr_reader :cfg - def initialize(cfg) - @cfg = cfg + # This data structure will hold the data flow between instructions + # within individual basic blocks. + attr_reader :insn_flows - # This data structure will hold the data flow between instructions - # within individual basic blocks. - @insn_flows = {} - cfg.insns.each_key do |length| - @insn_flows[length] = DataFlow.new - end + # This data structure will hold the data flow between basic blocks. + attr_reader :block_flows - # This data structure will hold the data flow between basic blocks. - @block_flows = {} - cfg.blocks.each do |block| - @block_flows[block.id] = DataFlow.new - end + def initialize(cfg) + @cfg = cfg + @insn_flows = cfg.insns.to_h { |length, _| [length, DataFlow.new] } + @block_flows = cfg.blocks.to_h { |block| [block.id, DataFlow.new] } end def compile diff --git a/lib/syntax_tree/yarv/disassembler.rb b/lib/syntax_tree/yarv/disassembler.rb index 8b86851e..7756d125 100644 --- a/lib/syntax_tree/yarv/disassembler.rb +++ b/lib/syntax_tree/yarv/disassembler.rb @@ -161,7 +161,7 @@ def with_prefix(value) begin @current_prefix = value - yield + yield value ensure @current_prefix = previous end diff --git a/lib/syntax_tree/yarv/instruction_sequence.rb b/lib/syntax_tree/yarv/instruction_sequence.rb index 83453837..45fc6121 100644 --- a/lib/syntax_tree/yarv/instruction_sequence.rb +++ b/lib/syntax_tree/yarv/instruction_sequence.rb @@ -277,7 +277,7 @@ def disasm end def inspect - "#:1 (#{line},#{0})-(#{line},#{0})>" + "#:1 (#{line},0)-(#{line},0)>" end # This method converts our linked list of instructions into a final array From 5526f399e81e7ec418a4a667e7c86d0082de9b1f Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Thu, 2 Feb 2023 21:13:00 -0500 Subject: [PATCH 18/58] Split out calldata into its own file --- lib/syntax_tree.rb | 1 + lib/syntax_tree/yarv/calldata.rb | 91 ++++++++++++++++++++++++++++ lib/syntax_tree/yarv/disassembler.rb | 25 +------- lib/syntax_tree/yarv/instructions.rb | 61 ------------------- 4 files changed, 93 insertions(+), 85 deletions(-) create mode 100644 lib/syntax_tree/yarv/calldata.rb diff --git a/lib/syntax_tree.rb b/lib/syntax_tree.rb index e0e2a6be..ade9ff5e 100644 --- a/lib/syntax_tree.rb +++ b/lib/syntax_tree.rb @@ -31,6 +31,7 @@ require_relative "syntax_tree/yarv" require_relative "syntax_tree/yarv/basic_block" require_relative "syntax_tree/yarv/bf" +require_relative "syntax_tree/yarv/calldata" require_relative "syntax_tree/yarv/compiler" require_relative "syntax_tree/yarv/control_flow_graph" require_relative "syntax_tree/yarv/data_flow_graph" diff --git a/lib/syntax_tree/yarv/calldata.rb b/lib/syntax_tree/yarv/calldata.rb new file mode 100644 index 00000000..fadea61b --- /dev/null +++ b/lib/syntax_tree/yarv/calldata.rb @@ -0,0 +1,91 @@ +# frozen_string_literal: true + +module SyntaxTree + module YARV + # This is an operand to various YARV instructions that represents the + # information about a specific call site. + class CallData + CALL_ARGS_SPLAT = 1 << 0 + CALL_ARGS_BLOCKARG = 1 << 1 + CALL_FCALL = 1 << 2 + CALL_VCALL = 1 << 3 + CALL_ARGS_SIMPLE = 1 << 4 + CALL_BLOCKISEQ = 1 << 5 + CALL_KWARG = 1 << 6 + CALL_KW_SPLAT = 1 << 7 + CALL_TAILCALL = 1 << 8 + CALL_SUPER = 1 << 9 + CALL_ZSUPER = 1 << 10 + CALL_OPT_SEND = 1 << 11 + CALL_KW_SPLAT_MUT = 1 << 12 + + attr_reader :method, :argc, :flags, :kw_arg + + def initialize( + method, + argc = 0, + flags = CallData::CALL_ARGS_SIMPLE, + kw_arg = nil + ) + @method = method + @argc = argc + @flags = flags + @kw_arg = kw_arg + end + + def flag?(mask) + (flags & mask) > 0 + end + + def to_h + result = { mid: method, flag: flags, orig_argc: argc } + result[:kw_arg] = kw_arg if kw_arg + result + end + + def inspect + names = [] + names << :ARGS_SPLAT if flag?(CALL_ARGS_SPLAT) + names << :ARGS_BLOCKARG if flag?(CALL_ARGS_BLOCKARG) + names << :FCALL if flag?(CALL_FCALL) + names << :VCALL if flag?(CALL_VCALL) + names << :ARGS_SIMPLE if flag?(CALL_ARGS_SIMPLE) + names << :BLOCKISEQ if flag?(CALL_BLOCKISEQ) + names << :KWARG if flag?(CALL_KWARG) + names << :KW_SPLAT if flag?(CALL_KW_SPLAT) + names << :TAILCALL if flag?(CALL_TAILCALL) + names << :SUPER if flag?(CALL_SUPER) + names << :ZSUPER if flag?(CALL_ZSUPER) + names << :OPT_SEND if flag?(CALL_OPT_SEND) + names << :KW_SPLAT_MUT if flag?(CALL_KW_SPLAT_MUT) + + parts = [] + parts << "mid:#{method}" if method + parts << "argc:#{argc}" + parts << "kw:[#{kw_arg.join(", ")}]" if kw_arg + parts << names.join("|") if names.any? + + "" + end + + def self.from(serialized) + new( + serialized[:mid], + serialized[:orig_argc], + serialized[:flag], + serialized[:kw_arg] + ) + end + end + + # A convenience method for creating a CallData object. + def self.calldata( + method, + argc = 0, + flags = CallData::CALL_ARGS_SIMPLE, + kw_arg = nil + ) + CallData.new(method, argc, flags, kw_arg) + end + end +end diff --git a/lib/syntax_tree/yarv/disassembler.rb b/lib/syntax_tree/yarv/disassembler.rb index 7756d125..ad66d0bf 100644 --- a/lib/syntax_tree/yarv/disassembler.rb +++ b/lib/syntax_tree/yarv/disassembler.rb @@ -21,30 +21,7 @@ def initialize(current_iseq = nil) ######################################################################## def calldata(value) - flag_names = [] - flag_names << :ARGS_SPLAT if value.flag?(CallData::CALL_ARGS_SPLAT) - if value.flag?(CallData::CALL_ARGS_BLOCKARG) - flag_names << :ARGS_BLOCKARG - end - flag_names << :FCALL if value.flag?(CallData::CALL_FCALL) - flag_names << :VCALL if value.flag?(CallData::CALL_VCALL) - flag_names << :ARGS_SIMPLE if value.flag?(CallData::CALL_ARGS_SIMPLE) - flag_names << :BLOCKISEQ if value.flag?(CallData::CALL_BLOCKISEQ) - flag_names << :KWARG if value.flag?(CallData::CALL_KWARG) - flag_names << :KW_SPLAT if value.flag?(CallData::CALL_KW_SPLAT) - flag_names << :TAILCALL if value.flag?(CallData::CALL_TAILCALL) - flag_names << :SUPER if value.flag?(CallData::CALL_SUPER) - flag_names << :ZSUPER if value.flag?(CallData::CALL_ZSUPER) - flag_names << :OPT_SEND if value.flag?(CallData::CALL_OPT_SEND) - flag_names << :KW_SPLAT_MUT if value.flag?(CallData::CALL_KW_SPLAT_MUT) - - parts = [] - parts << "mid:#{value.method}" if value.method - parts << "argc:#{value.argc}" - parts << "kw:[#{value.kw_arg.join(", ")}]" if value.kw_arg - parts << flag_names.join("|") if flag_names.any? - - "" + value.inspect end def enqueue(iseq) diff --git a/lib/syntax_tree/yarv/instructions.rb b/lib/syntax_tree/yarv/instructions.rb index 97ccce15..9bd8f0cd 100644 --- a/lib/syntax_tree/yarv/instructions.rb +++ b/lib/syntax_tree/yarv/instructions.rb @@ -2,67 +2,6 @@ module SyntaxTree module YARV - # This is an operand to various YARV instructions that represents the - # information about a specific call site. - class CallData - CALL_ARGS_SPLAT = 1 << 0 - CALL_ARGS_BLOCKARG = 1 << 1 - CALL_FCALL = 1 << 2 - CALL_VCALL = 1 << 3 - CALL_ARGS_SIMPLE = 1 << 4 - CALL_BLOCKISEQ = 1 << 5 - CALL_KWARG = 1 << 6 - CALL_KW_SPLAT = 1 << 7 - CALL_TAILCALL = 1 << 8 - CALL_SUPER = 1 << 9 - CALL_ZSUPER = 1 << 10 - CALL_OPT_SEND = 1 << 11 - CALL_KW_SPLAT_MUT = 1 << 12 - - attr_reader :method, :argc, :flags, :kw_arg - - def initialize( - method, - argc = 0, - flags = CallData::CALL_ARGS_SIMPLE, - kw_arg = nil - ) - @method = method - @argc = argc - @flags = flags - @kw_arg = kw_arg - end - - def flag?(mask) - (flags & mask) > 0 - end - - def to_h - result = { mid: method, flag: flags, orig_argc: argc } - result[:kw_arg] = kw_arg if kw_arg - result - end - - def self.from(serialized) - new( - serialized[:mid], - serialized[:orig_argc], - serialized[:flag], - serialized[:kw_arg] - ) - end - end - - # A convenience method for creating a CallData object. - def self.calldata( - method, - argc = 0, - flags = CallData::CALL_ARGS_SIMPLE, - kw_arg = nil - ) - CallData.new(method, argc, flags, kw_arg) - end - # This is a base class for all YARV instructions. It provides a few # convenience methods for working with instructions. class Instruction From 02ec2ad5441b797382d026ecd31b5cc4eeeed35b Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Thu, 2 Feb 2023 21:32:23 -0500 Subject: [PATCH 19/58] Simplify disassembler API --- lib/syntax_tree/yarv/control_flow_graph.rb | 8 +++--- lib/syntax_tree/yarv/data_flow_graph.rb | 29 ++++++++++++++-------- lib/syntax_tree/yarv/disassembler.rb | 16 +++++++++--- 3 files changed, 35 insertions(+), 18 deletions(-) diff --git a/lib/syntax_tree/yarv/control_flow_graph.rb b/lib/syntax_tree/yarv/control_flow_graph.rb index fb8f97f3..dc900e50 100644 --- a/lib/syntax_tree/yarv/control_flow_graph.rb +++ b/lib/syntax_tree/yarv/control_flow_graph.rb @@ -34,21 +34,21 @@ def initialize(iseq, insns, blocks) def disasm fmt = Disassembler.new(iseq) - fmt.output.puts("== cfg: #{iseq.inspect}") + fmt.puts("== cfg: #{iseq.inspect}") blocks.each do |block| - fmt.output.puts(block.id) + fmt.puts(block.id) fmt.with_prefix(" ") do |prefix| unless block.incoming_blocks.empty? from = block.incoming_blocks.map(&:id) - fmt.output.puts("#{prefix}== from: #{from.join(", ")}") + fmt.puts("#{prefix}== from: #{from.join(", ")}") end fmt.format_insns!(block.insns, block.block_start) to = block.outgoing_blocks.map(&:id) to << "leaves" if block.insns.last.leaves? - fmt.output.puts("#{prefix}== to: #{to.join(", ")}") + fmt.puts("#{prefix}== to: #{to.join(", ")}") end end diff --git a/lib/syntax_tree/yarv/data_flow_graph.rb b/lib/syntax_tree/yarv/data_flow_graph.rb index 614d1233..f98eedda 100644 --- a/lib/syntax_tree/yarv/data_flow_graph.rb +++ b/lib/syntax_tree/yarv/data_flow_graph.rb @@ -6,6 +6,15 @@ module YARV # control-flow-graph. Data flow is discovered locally and then globally. The # graph only considers data flow through the stack - local variables and # objects are considered fully escaped in this analysis. + # + # You can use this class by calling the ::compile method and passing it a + # control flow graph. It will return a data flow graph object. + # + # iseq = RubyVM::InstructionSequence.compile("1 + 2") + # iseq = SyntaxTree::YARV::InstructionSequence.from(iseq.to_a) + # cfg = SyntaxTree::YARV::ControlFlowGraph.compile(iseq) + # dfg = SyntaxTree::YARV::DataFlowGraph.compile(cfg) + # class DataFlowGraph # This object represents the flow of data between instructions. class DataFlow @@ -28,42 +37,42 @@ def initialize(cfg, insn_flows, block_flows) def disasm fmt = Disassembler.new(cfg.iseq) - fmt.output.puts("== dfg: #{cfg.iseq.inspect}") + fmt.puts("== dfg: #{cfg.iseq.inspect}") cfg.blocks.each do |block| - fmt.output.puts(block.id) + fmt.puts(block.id) fmt.with_prefix(" ") do |prefix| unless block.incoming_blocks.empty? from = block.incoming_blocks.map(&:id) - fmt.output.puts("#{prefix}== from: #{from.join(", ")}") + fmt.puts("#{prefix}== from: #{from.join(", ")}") end block_flow = block_flows.fetch(block.id) unless block_flow.in.empty? - fmt.output.puts("#{prefix}== in: #{block_flow.in.join(", ")}") + fmt.puts("#{prefix}== in: #{block_flow.in.join(", ")}") end fmt.format_insns!(block.insns, block.block_start) do |_, length| insn_flow = insn_flows[length] next if insn_flow.in.empty? && insn_flow.out.empty? - fmt.output.print(" # ") + fmt.print(" # ") unless insn_flow.in.empty? - fmt.output.print("in: #{insn_flow.in.join(", ")}") - fmt.output.print("; ") unless insn_flow.out.empty? + fmt.print("in: #{insn_flow.in.join(", ")}") + fmt.print("; ") unless insn_flow.out.empty? end unless insn_flow.out.empty? - fmt.output.print("out: #{insn_flow.out.join(", ")}") + fmt.print("out: #{insn_flow.out.join(", ")}") end end to = block.outgoing_blocks.map(&:id) to << "leaves" if block.insns.last.leaves? - fmt.output.puts("#{prefix}== to: #{to.join(", ")}") + fmt.puts("#{prefix}== to: #{to.join(", ")}") unless block_flow.out.empty? - fmt.output.puts("#{prefix}== out: #{block_flow.out.join(", ")}") + fmt.puts("#{prefix}== out: #{block_flow.out.join(", ")}") end end end diff --git a/lib/syntax_tree/yarv/disassembler.rb b/lib/syntax_tree/yarv/disassembler.rb index ad66d0bf..a758bce3 100644 --- a/lib/syntax_tree/yarv/disassembler.rb +++ b/lib/syntax_tree/yarv/disassembler.rb @@ -78,10 +78,6 @@ def object(value) # Entrypoints ######################################################################## - def string - output.string - end - def format! while (@current_iseq = queue.shift) output << "\n" if output.pos > 0 @@ -133,6 +129,18 @@ def format_insns!(insns, length = 0) end end + def print(string) + output.print(string) + end + + def puts(string) + output.puts(string) + end + + def string + output.string + end + def with_prefix(value) previous = @current_prefix From b34e5d4f0e75bd44e9ce34faeddca3616c546d92 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Fri, 3 Feb 2023 10:26:22 -0500 Subject: [PATCH 20/58] Speed up ractor tests --- test/ractor_test.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/ractor_test.rb b/test/ractor_test.rb index bcdb2a51..7e0201ca 100644 --- a/test/ractor_test.rb +++ b/test/ractor_test.rb @@ -33,7 +33,7 @@ def test_formatting private def filepaths - Dir.glob(File.expand_path("../lib/syntax_tree/{node,parser}.rb", __dir__)) + Dir.glob(File.expand_path("../lib/syntax_tree/plugin/*.rb", __dir__)) end # Ractors still warn about usage, so I'm disabling that warning here just to From da08570e9b46e0d29085e185fc76a82b04e0ae6e Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Sat, 4 Feb 2023 16:40:23 -0500 Subject: [PATCH 21/58] EmbDoc fixes --- lib/syntax_tree/node.rb | 31 +++++++++++++++++++++++++++++-- test/fixtures/call.rb | 5 +++++ test/fixtures/def.rb | 16 ++++++++++++++++ test/fixtures/symbols.rb | 5 +++++ 4 files changed, 55 insertions(+), 2 deletions(-) diff --git a/lib/syntax_tree/node.rb b/lib/syntax_tree/node.rb index fc5517cf..55b381c3 100644 --- a/lib/syntax_tree/node.rb +++ b/lib/syntax_tree/node.rb @@ -4090,7 +4090,8 @@ def deconstruct_keys(_keys) def format(q) q.group do q.group do - q.text("def ") + q.text("def") + q.text(" ") if target || name.comments.empty? if target q.format(target) @@ -4872,6 +4873,25 @@ class EmbDoc < Node def initialize(value:, location:) @value = value @location = location + + @leading = false + @trailing = false + end + + def leading! + @leading = true + end + + def leading? + @leading + end + + def trailing! + @trailing = true + end + + def trailing? + @trailing end def inline? @@ -4908,7 +4928,13 @@ def deconstruct_keys(_keys) end def format(q) - q.trim + if (q.parent.is_a?(DefNode) && q.parent.endless?) || + q.parent.is_a?(Statements) + q.trim + else + q.breakable_return + end + q.text(value) end @@ -10465,6 +10491,7 @@ def deconstruct_keys(_keys) def format(q) q.text(":") + q.text("\\") if value.comments.any? q.format(value) end diff --git a/test/fixtures/call.rb b/test/fixtures/call.rb index c41ee4ac..d35c6036 100644 --- a/test/fixtures/call.rb +++ b/test/fixtures/call.rb @@ -60,3 +60,8 @@ % a b do end.c d +% +self. +=begin +=end + to_s diff --git a/test/fixtures/def.rb b/test/fixtures/def.rb index a827adfe..1441bf04 100644 --- a/test/fixtures/def.rb +++ b/test/fixtures/def.rb @@ -23,3 +23,19 @@ def foo() # comment def foo( # comment ) end +% +def +=begin +=end +a +end +% +def a() +=begin +=end +=1 +- +def a() = +=begin +=end + 1 diff --git a/test/fixtures/symbols.rb b/test/fixtures/symbols.rb index 5e2673f3..12f0a22f 100644 --- a/test/fixtures/symbols.rb +++ b/test/fixtures/symbols.rb @@ -19,3 +19,8 @@ %I[foo] # comment % %I{foo[]} +% +:\ +=begin +=end +symbol From a5ad966a44c70f2861ed3ad2a26804d58a3515e0 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Sun, 5 Feb 2023 09:32:36 -0500 Subject: [PATCH 22/58] Fix up Ruby 2.7.0 build --- test/fixtures/def.rb | 10 ---------- test/fixtures/def_endless.rb | 10 ++++++++++ 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/test/fixtures/def.rb b/test/fixtures/def.rb index 1441bf04..0cc49e0a 100644 --- a/test/fixtures/def.rb +++ b/test/fixtures/def.rb @@ -29,13 +29,3 @@ def foo( # comment =end a end -% -def a() -=begin -=end -=1 -- -def a() = -=begin -=end - 1 diff --git a/test/fixtures/def_endless.rb b/test/fixtures/def_endless.rb index 4595fba9..8d1f9d33 100644 --- a/test/fixtures/def_endless.rb +++ b/test/fixtures/def_endless.rb @@ -22,3 +22,13 @@ def self.foo = bar baz end def foo? = true +% +def a() +=begin +=end +=1 +- +def a() = +=begin +=end + 1 From 4ec195bef0f61cbd098119eab56bc16190dd925b Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Mon, 6 Feb 2023 13:55:59 -0500 Subject: [PATCH 23/58] Mermaid visitor --- lib/syntax_tree.rb | 2 + lib/syntax_tree/node.rb | 12 ++-- lib/syntax_tree/visitor/mermaid_visitor.rb | 81 ++++++++++++++++++++++ 3 files changed, 90 insertions(+), 5 deletions(-) create mode 100644 lib/syntax_tree/visitor/mermaid_visitor.rb diff --git a/lib/syntax_tree.rb b/lib/syntax_tree.rb index ade9ff5e..1af1b476 100644 --- a/lib/syntax_tree.rb +++ b/lib/syntax_tree.rb @@ -1,5 +1,6 @@ # frozen_string_literal: true +require "cgi" require "etc" require "fiddle" require "json" @@ -18,6 +19,7 @@ require_relative "syntax_tree/visitor/field_visitor" require_relative "syntax_tree/visitor/json_visitor" require_relative "syntax_tree/visitor/match_visitor" +require_relative "syntax_tree/visitor/mermaid_visitor" require_relative "syntax_tree/visitor/mutation_visitor" require_relative "syntax_tree/visitor/pretty_print_visitor" require_relative "syntax_tree/visitor/environment" diff --git a/lib/syntax_tree/node.rb b/lib/syntax_tree/node.rb index 1a814aaf..8ffbcd2d 100644 --- a/lib/syntax_tree/node.rb +++ b/lib/syntax_tree/node.rb @@ -127,17 +127,19 @@ def format(q) end def pretty_print(q) - visitor = Visitor::PrettyPrintVisitor.new(q) - visitor.visit(self) + accept(Visitor::PrettyPrintVisitor.new(q)) end def to_json(*opts) - visitor = Visitor::JSONVisitor.new - visitor.visit(self).to_json(*opts) + accept(Visitor::JSONVisitor.new).to_json(*opts) end def construct_keys - PrettierPrint.format(+"") { |q| Visitor::MatchVisitor.new(q).visit(self) } + PrettierPrint.format(+"") { |q| accept(Visitor::MatchVisitor.new(q)) } + end + + def mermaid + accept(Visitor::MermaidVisitor.new) end end diff --git a/lib/syntax_tree/visitor/mermaid_visitor.rb b/lib/syntax_tree/visitor/mermaid_visitor.rb new file mode 100644 index 00000000..2b06049a --- /dev/null +++ b/lib/syntax_tree/visitor/mermaid_visitor.rb @@ -0,0 +1,81 @@ +# frozen_string_literal: true + +module SyntaxTree + class Visitor + # This visitor transforms the AST into a mermaid flow chart. + class MermaidVisitor < FieldVisitor + attr_reader :output, :target + + def initialize + @output = StringIO.new + @output.puts("flowchart TD") + + @target = nil + end + + def visit_program(node) + super + output.string + end + + private + + def comments(node) + # Ignore + end + + def field(name, value) + case value + when Node + node_id = visit(value) + output.puts(" #{target} -- \"#{name}\" --> #{node_id}") + when String + node_id = "#{target}_#{name}" + output.puts(" #{node_id}([#{CGI.escapeHTML(value.inspect)}])") + output.puts(" #{target} -- \"#{name}\" --> #{node_id}") + when nil + # skip + else + node_id = "#{target}_#{name}" + output.puts(" #{node_id}([\"#{CGI.escapeHTML(value.inspect)}\"])") + output.puts(" #{target} -- \"#{name}\" --> #{node_id}") + end + end + + def list(name, values) + values.each_with_index do |value, index| + field("#{name}[#{index}]", value) + end + end + + def node(node, type) + previous_target = target + + begin + @target = "node_#{node.object_id}" + + yield + + output.puts(" #{@target}[\"#{type}\"]") + @target + ensure + @target = previous_target + end + end + + def pairs(name, values) + values.each_with_index do |(key, value), index| + node_id = "#{target}_#{name}_#{index}" + output.puts(" #{node_id}((\" \"))") + output.puts(" #{target} -- \"#{name}[#{index}]\" --> #{node_id}") + output.puts(" #{node_id} -- \"[0]\" --> #{visit(key)}") + output.puts(" #{node_id} -- \"[1]\" --> #{visit(value)}") if value + end + end + + def text(name, value) + field(name, value) + end + end + end +end From e7c5adf1de9fcac198fdbbdc1350515c3bf02210 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Mon, 6 Feb 2023 14:12:36 -0500 Subject: [PATCH 24/58] Control flow graphs to mermaid --- .rubocop.yml | 3 ++ lib/syntax_tree/node.rb | 8 ++--- lib/syntax_tree/yarv/control_flow_graph.rb | 34 +++++++++++++++++++++ lib/syntax_tree/yarv/disassembler.rb | 35 ++++++++++++++++++++++ 4 files changed, 76 insertions(+), 4 deletions(-) diff --git a/.rubocop.yml b/.rubocop.yml index 381d7a27..62e78453 100644 --- a/.rubocop.yml +++ b/.rubocop.yml @@ -90,6 +90,9 @@ Style/CaseLikeIf: Style/ClassVars: Enabled: false +Style/CombinableLoops: + Enabled: false + Style/DocumentDynamicEvalDefinition: Enabled: false diff --git a/lib/syntax_tree/node.rb b/lib/syntax_tree/node.rb index 8ffbcd2d..b1ecfdc7 100644 --- a/lib/syntax_tree/node.rb +++ b/lib/syntax_tree/node.rb @@ -134,12 +134,12 @@ def to_json(*opts) accept(Visitor::JSONVisitor.new).to_json(*opts) end - def construct_keys - PrettierPrint.format(+"") { |q| accept(Visitor::MatchVisitor.new(q)) } + def to_mermaid + accept(Visitor::MermaidVisitor.new) end - def mermaid - accept(Visitor::MermaidVisitor.new) + def construct_keys + PrettierPrint.format(+"") { |q| accept(Visitor::MatchVisitor.new(q)) } end end diff --git a/lib/syntax_tree/yarv/control_flow_graph.rb b/lib/syntax_tree/yarv/control_flow_graph.rb index dc900e50..a9f3e093 100644 --- a/lib/syntax_tree/yarv/control_flow_graph.rb +++ b/lib/syntax_tree/yarv/control_flow_graph.rb @@ -55,6 +55,40 @@ def disasm fmt.string end + def to_mermaid + output = StringIO.new + output.puts("flowchart TD") + + fmt = Disassembler::Mermaid.new + blocks.each do |block| + output.puts(" subgraph #{block.id}") + previous = nil + + block.each_with_length do |insn, length| + node_id = "node_#{length}" + label = "%04d %s" % [length, insn.disasm(fmt)] + + output.puts(" #{node_id}(\"#{CGI.escapeHTML(label)}\")") + output.puts(" #{previous} --> #{node_id}") if previous + + previous = node_id + end + + output.puts(" end") + end + + blocks.each do |block| + block.outgoing_blocks.each do |outgoing| + offset = + block.block_start + block.insns.sum(&:length) - + block.insns.last.length + output.puts(" node_#{offset} --> node_#{outgoing.block_start}") + end + end + + output.string + end + # This method is used to verify that the control flow graph is well # formed. It does this by checking that each basic block is itself well # formed. diff --git a/lib/syntax_tree/yarv/disassembler.rb b/lib/syntax_tree/yarv/disassembler.rb index a758bce3..f60af0fd 100644 --- a/lib/syntax_tree/yarv/disassembler.rb +++ b/lib/syntax_tree/yarv/disassembler.rb @@ -3,6 +3,41 @@ module SyntaxTree module YARV class Disassembler + # This class is another object that handles disassembling a YARV + # instruction sequence but it does so in order to provide a label for a + # mermaid diagram. + class Mermaid + def calldata(value) + value.inspect + end + + def enqueue(iseq) + end + + def event(name) + end + + def inline_storage(cache) + "" + end + + def instruction(name, operands = []) + operands.empty? ? name : "#{name} #{operands.join(", ")}" + end + + def label(value) + "%04d" % value.name["label_".length..] + end + + def local(index, **) + index.inspect + end + + def object(value) + value.inspect + end + end + attr_reader :output, :queue attr_reader :current_prefix From e642348dc2da3e2a8299ebc9e56b0fe6e965446f Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Mon, 6 Feb 2023 14:32:44 -0500 Subject: [PATCH 25/58] DFG to mermaid --- lib/syntax_tree/yarv/control_flow_graph.rb | 1 + lib/syntax_tree/yarv/data_flow_graph.rb | 61 ++++++++++++++++++++++ 2 files changed, 62 insertions(+) diff --git a/lib/syntax_tree/yarv/control_flow_graph.rb b/lib/syntax_tree/yarv/control_flow_graph.rb index a9f3e093..328ffc4c 100644 --- a/lib/syntax_tree/yarv/control_flow_graph.rb +++ b/lib/syntax_tree/yarv/control_flow_graph.rb @@ -82,6 +82,7 @@ def to_mermaid offset = block.block_start + block.insns.sum(&:length) - block.insns.last.length + output.puts(" node_#{offset} --> node_#{outgoing.block_start}") end end diff --git a/lib/syntax_tree/yarv/data_flow_graph.rb b/lib/syntax_tree/yarv/data_flow_graph.rb index f98eedda..7423d022 100644 --- a/lib/syntax_tree/yarv/data_flow_graph.rb +++ b/lib/syntax_tree/yarv/data_flow_graph.rb @@ -80,6 +80,67 @@ def disasm fmt.string end + def to_mermaid + output = StringIO.new + output.puts("flowchart TD") + + fmt = Disassembler::Mermaid.new + links = [] + + cfg.blocks.each do |block| + block_flow = block_flows.fetch(block.id) + graph_name = + if block_flow.in.any? + "#{block.id} #{block_flows[block.id].in.join(", ")}" + else + block.id + end + + output.puts(" subgraph \"#{CGI.escapeHTML(graph_name)}\"") + previous = nil + + block.each_with_length do |insn, length| + node_id = "node_#{length}" + label = "%04d %s" % [length, insn.disasm(fmt)] + + output.puts(" #{node_id}(\"#{CGI.escapeHTML(label)}\")") + + if previous + output.puts(" #{previous} --> #{node_id}") + links << "red" + end + + insn_flows[length].in.each do |input| + if input.is_a?(Integer) + output.puts(" node_#{input} --> #{node_id}") + links << "green" + end + end + + previous = node_id + end + + output.puts(" end") + end + + cfg.blocks.each do |block| + block.outgoing_blocks.each do |outgoing| + offset = + block.block_start + block.insns.sum(&:length) - + block.insns.last.length + + output.puts(" node_#{offset} --> node_#{outgoing.block_start}") + links << "red" + end + end + + links.each_with_index do |color, index| + output.puts(" linkStyle #{index} stroke:#{color}") + end + + output.string + end + # Verify that we constructed the data flow graph correctly. def verify # Check that the first block has no arguments. From 4796d1cae3c22431e1256703a7cb194023696064 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Sat, 4 Feb 2023 16:25:06 -0500 Subject: [PATCH 26/58] Sea of nodes --- lib/syntax_tree.rb | 1 + lib/syntax_tree/yarv/control_flow_graph.rb | 212 +++++---- lib/syntax_tree/yarv/data_flow_graph.rb | 99 +++- lib/syntax_tree/yarv/instruction_sequence.rb | 4 + lib/syntax_tree/yarv/instructions.rb | 39 ++ lib/syntax_tree/yarv/sea_of_nodes.rb | 464 +++++++++++++++++++ test/yarv_test.rb | 110 +++++ 7 files changed, 817 insertions(+), 112 deletions(-) create mode 100644 lib/syntax_tree/yarv/sea_of_nodes.rb diff --git a/lib/syntax_tree.rb b/lib/syntax_tree.rb index 1af1b476..cd1f1ce4 100644 --- a/lib/syntax_tree.rb +++ b/lib/syntax_tree.rb @@ -43,6 +43,7 @@ require_relative "syntax_tree/yarv/instructions" require_relative "syntax_tree/yarv/legacy" require_relative "syntax_tree/yarv/local_table" +require_relative "syntax_tree/yarv/sea_of_nodes" require_relative "syntax_tree/yarv/assembler" require_relative "syntax_tree/yarv/vm" diff --git a/lib/syntax_tree/yarv/control_flow_graph.rb b/lib/syntax_tree/yarv/control_flow_graph.rb index 328ffc4c..1a361e5e 100644 --- a/lib/syntax_tree/yarv/control_flow_graph.rb +++ b/lib/syntax_tree/yarv/control_flow_graph.rb @@ -14,93 +14,6 @@ module YARV # cfg = SyntaxTree::YARV::ControlFlowGraph.compile(iseq) # class ControlFlowGraph - # This is the instruction sequence that this control flow graph - # corresponds to. - attr_reader :iseq - - # This is the list of instructions that this control flow graph contains. - # It is effectively the same as the list of instructions in the - # instruction sequence but with line numbers and events filtered out. - attr_reader :insns - - # This is the set of basic blocks that this control-flow graph contains. - attr_reader :blocks - - def initialize(iseq, insns, blocks) - @iseq = iseq - @insns = insns - @blocks = blocks - end - - def disasm - fmt = Disassembler.new(iseq) - fmt.puts("== cfg: #{iseq.inspect}") - - blocks.each do |block| - fmt.puts(block.id) - fmt.with_prefix(" ") do |prefix| - unless block.incoming_blocks.empty? - from = block.incoming_blocks.map(&:id) - fmt.puts("#{prefix}== from: #{from.join(", ")}") - end - - fmt.format_insns!(block.insns, block.block_start) - - to = block.outgoing_blocks.map(&:id) - to << "leaves" if block.insns.last.leaves? - fmt.puts("#{prefix}== to: #{to.join(", ")}") - end - end - - fmt.string - end - - def to_mermaid - output = StringIO.new - output.puts("flowchart TD") - - fmt = Disassembler::Mermaid.new - blocks.each do |block| - output.puts(" subgraph #{block.id}") - previous = nil - - block.each_with_length do |insn, length| - node_id = "node_#{length}" - label = "%04d %s" % [length, insn.disasm(fmt)] - - output.puts(" #{node_id}(\"#{CGI.escapeHTML(label)}\")") - output.puts(" #{previous} --> #{node_id}") if previous - - previous = node_id - end - - output.puts(" end") - end - - blocks.each do |block| - block.outgoing_blocks.each do |outgoing| - offset = - block.block_start + block.insns.sum(&:length) - - block.insns.last.length - - output.puts(" node_#{offset} --> node_#{outgoing.block_start}") - end - end - - output.string - end - - # This method is used to verify that the control flow graph is well - # formed. It does this by checking that each basic block is itself well - # formed. - def verify - blocks.each(&:verify) - end - - def self.compile(iseq) - Compiler.new(iseq).compile - end - # This class is responsible for creating a control flow graph from the # given instruction sequence. class Compiler @@ -139,7 +52,11 @@ def initialize(iseq) # This method is used to compile the instruction sequence into a control # flow graph. It returns an instance of ControlFlowGraph. def compile - blocks = connect_basic_blocks(build_basic_blocks) + blocks = build_basic_blocks + + connect_basic_blocks(blocks) + prune_basic_blocks(blocks) + ControlFlowGraph.new(iseq, insns, blocks.values).tap(&:verify) end @@ -187,7 +104,16 @@ def build_basic_blocks block_starts .zip(blocks) - .to_h do |block_start, block_insns| + .to_h do |block_start, insns| + # It's possible that we have not detected a block start but still + # have branching instructions inside of a basic block. This can + # happen if you have an unconditional jump which is followed by + # instructions that are unreachable. As of Ruby 3.2, this is + # possible with something as simple as "1 => a". In this case we + # can discard all instructions that follow branching instructions. + block_insns = + insns.slice_after { |insn| insn.branch_targets.any? }.first + [block_start, BasicBlock.new(block_start, block_insns)] end end @@ -213,6 +139,114 @@ def connect_basic_blocks(blocks) end end end + + # If there are blocks that are unreachable, we can remove them from the + # graph entirely at this point. + def prune_basic_blocks(blocks) + visited = Set.new + queue = [blocks.fetch(0)] + + until queue.empty? + current_block = queue.shift + next if visited.include?(current_block) + + visited << current_block + queue.concat(current_block.outgoing_blocks) + end + + blocks.select! { |_, block| visited.include?(block) } + end + end + + # This is the instruction sequence that this control flow graph + # corresponds to. + attr_reader :iseq + + # This is the list of instructions that this control flow graph contains. + # It is effectively the same as the list of instructions in the + # instruction sequence but with line numbers and events filtered out. + attr_reader :insns + + # This is the set of basic blocks that this control-flow graph contains. + attr_reader :blocks + + def initialize(iseq, insns, blocks) + @iseq = iseq + @insns = insns + @blocks = blocks + end + + def disasm + fmt = Disassembler.new(iseq) + fmt.puts("== cfg: #{iseq.inspect}") + + blocks.each do |block| + fmt.puts(block.id) + fmt.with_prefix(" ") do |prefix| + unless block.incoming_blocks.empty? + from = block.incoming_blocks.map(&:id) + fmt.puts("#{prefix}== from: #{from.join(", ")}") + end + + fmt.format_insns!(block.insns, block.block_start) + + to = block.outgoing_blocks.map(&:id) + to << "leaves" if block.insns.last.leaves? + fmt.puts("#{prefix}== to: #{to.join(", ")}") + end + end + + fmt.string + end + + def to_dfg + DataFlowGraph.compile(self) + end + + def to_mermaid + output = StringIO.new + output.puts("flowchart TD") + + fmt = Disassembler::Mermaid.new + blocks.each do |block| + output.puts(" subgraph #{block.id}") + previous = nil + + block.each_with_length do |insn, length| + node_id = "node_#{length}" + label = "%04d %s" % [length, insn.disasm(fmt)] + + output.puts(" #{node_id}(\"#{CGI.escapeHTML(label)}\")") + output.puts(" #{previous} --> #{node_id}") if previous + + previous = node_id + end + + output.puts(" end") + end + + blocks.each do |block| + block.outgoing_blocks.each do |outgoing| + offset = + block.block_start + block.insns.sum(&:length) - + block.insns.last.length + + output.puts(" node_#{offset} --> node_#{outgoing.block_start}") + end + end + + output.string + end + + # This method is used to verify that the control flow graph is well + # formed. It does this by checking that each basic block is itself well + # formed. + def verify + blocks.each(&:verify) + end + + def self.compile(iseq) + Compiler.new(iseq).compile end end end diff --git a/lib/syntax_tree/yarv/data_flow_graph.rb b/lib/syntax_tree/yarv/data_flow_graph.rb index 7423d022..ace40296 100644 --- a/lib/syntax_tree/yarv/data_flow_graph.rb +++ b/lib/syntax_tree/yarv/data_flow_graph.rb @@ -27,6 +27,42 @@ def initialize end end + # This represents an object that goes on the stack that is passed between + # basic blocks. + class BlockArgument + attr_reader :name + + def initialize(name) + @name = name + end + + def local? + false + end + + def to_str + name.to_s + end + end + + # This represents an object that goes on the stack that is passed between + # instructions within a basic block. + class LocalArgument + attr_reader :name, :length + + def initialize(length) + @length = length + end + + def local? + true + end + + def to_str + length.to_s + end + end + attr_reader :cfg, :insn_flows, :block_flows def initialize(cfg, insn_flows, block_flows) @@ -35,11 +71,15 @@ def initialize(cfg, insn_flows, block_flows) @block_flows = block_flows end + def blocks + cfg.blocks + end + def disasm fmt = Disassembler.new(cfg.iseq) fmt.puts("== dfg: #{cfg.iseq.inspect}") - cfg.blocks.each do |block| + blocks.each do |block| fmt.puts(block.id) fmt.with_prefix(" ") do |prefix| unless block.incoming_blocks.empty? @@ -80,6 +120,10 @@ def disasm fmt.string end + def to_son + SeaOfNodes.compile(self) + end + def to_mermaid output = StringIO.new output.puts("flowchart TD") @@ -87,7 +131,7 @@ def to_mermaid fmt = Disassembler::Mermaid.new links = [] - cfg.blocks.each do |block| + blocks.each do |block| block_flow = block_flows.fetch(block.id) graph_name = if block_flow.in.any? @@ -123,7 +167,7 @@ def to_mermaid output.puts(" end") end - cfg.blocks.each do |block| + blocks.each do |block| block.outgoing_blocks.each do |outgoing| offset = block.block_start + block.insns.sum(&:length) - @@ -144,11 +188,11 @@ def to_mermaid # Verify that we constructed the data flow graph correctly. def verify # Check that the first block has no arguments. - raise unless block_flows.fetch(cfg.blocks.first.id).in.empty? + raise unless block_flows.fetch(blocks.first.id).in.empty? # Check all control flow edges between blocks pass the right number of # arguments. - cfg.blocks.each do |block| + blocks.each do |block| block_flow = block_flows.fetch(block.id) if block.outgoing_blocks.empty? @@ -191,8 +235,8 @@ def initialize(cfg) end def compile - find_local_flow - find_global_flow + find_internal_flow + find_external_flow DataFlowGraph.new(cfg, insn_flows, block_flows).tap(&:verify) end @@ -200,45 +244,53 @@ def compile # Find the data flow within each basic block. Using an abstract stack, # connect from consumers of data to the producers of that data. - def find_local_flow + def find_internal_flow cfg.blocks.each do |block| block_flow = block_flows.fetch(block.id) stack = [] - # Go through each instruction in the block... + # Go through each instruction in the block. block.each_with_length do |insn, length| insn_flow = insn_flows[length] # How many values will be missing from the local stack to run this - # instruction? + # instruction? This will be used to determine if the values that + # are being used by this instruction are coming from previous + # instructions or from previous basic blocks. missing = insn.pops - stack.size - # For every value the instruction pops off the stack... + # For every value the instruction pops off the stack. insn.pops.times do # Was the value it pops off from another basic block? if stack.empty? - # This is a basic block argument. + # If the stack is empty, then there aren't enough values being + # pushed from previous instructions to fulfill the needs of + # this instruction. In that case the values must be coming + # from previous basic blocks. missing -= 1 - name = :"in_#{missing}" + argument = BlockArgument.new(:"in_#{missing}") - insn_flow.in.unshift(name) - block_flow.in.unshift(name) + insn_flow.in.unshift(argument) + block_flow.in.unshift(argument) else - # Connect this consumer to the producer of the value. + # Since there are values in the stack, we can connect this + # consumer to the producer of the value. insn_flow.in.unshift(stack.pop) end end # Record on our abstract stack that this instruction pushed # this value onto the stack. - insn.pushes.times { stack << length } + insn.pushes.times { stack << LocalArgument.new(length) } end # Values that are left on the stack after going through all # instructions are arguments to the basic block that we jump to. stack.reverse_each.with_index do |producer, index| block_flow.out << producer - insn_flows[producer].out << :"out_#{index}" + + argument = BlockArgument.new(:"out_#{index}") + insn_flows[producer.length].out << argument end end @@ -249,17 +301,17 @@ def find_local_flow insn_flows[length].in.each do |producer| # If it's actually another instruction and not a basic block # argument... - if producer.is_a?(Integer) + if producer.is_a?(LocalArgument) # Record in the producing instruction that it produces a value # used by this construction. - insn_flows[producer].out << length + insn_flows[producer.length].out << LocalArgument.new(length) end end end end # Find the data that flows between basic blocks. - def find_global_flow + def find_external_flow stack = [*cfg.blocks] until stack.empty? @@ -275,7 +327,7 @@ def find_global_flow # If so then add arguments to pass data through from the # incoming block's incoming blocks. (block_flow.in.size - incoming_flow.out.size).times do |index| - name = :"pass_#{index}" + name = BlockArgument.new(:"pass_#{index}") incoming_flow.in.unshift(name) incoming_flow.out.unshift(name) @@ -283,7 +335,8 @@ def find_global_flow # Since we modified the incoming block, add it back to the stack # so it'll be considered as an outgoing block again, and - # propogate the global data flow back up the control flow graph. + # propogate the external data flow back up the control flow + # graph. stack << incoming_block end end diff --git a/lib/syntax_tree/yarv/instruction_sequence.rb b/lib/syntax_tree/yarv/instruction_sequence.rb index 45fc6121..918a3c86 100644 --- a/lib/syntax_tree/yarv/instruction_sequence.rb +++ b/lib/syntax_tree/yarv/instruction_sequence.rb @@ -269,6 +269,10 @@ def to_a ] end + def to_cfg + ControlFlowGraph.compile(self) + end + def disasm fmt = Disassembler.new fmt.enqueue(self) diff --git a/lib/syntax_tree/yarv/instructions.rb b/lib/syntax_tree/yarv/instructions.rb index 9bd8f0cd..38c80fde 100644 --- a/lib/syntax_tree/yarv/instructions.rb +++ b/lib/syntax_tree/yarv/instructions.rb @@ -44,6 +44,13 @@ def leaves? def falls_through? false end + + # Does the instruction have side effects? Control-flow counts as a + # side-effect, as do some special-case instructions like Leave. By default + # every instruction is marked as having side effects. + def side_effects? + true + end end # ### Summary @@ -1166,6 +1173,10 @@ def pushes def call(vm) vm.push(vm.stack.last.dup) end + + def side_effects? + false + end end # ### Summary @@ -2470,6 +2481,10 @@ def ==(other) def call(vm) end + + def side_effects? + false + end end # ### Summary @@ -4439,6 +4454,10 @@ def pops def call(vm) vm.pop end + + def side_effects? + false + end end # ### Summary @@ -4479,6 +4498,10 @@ def canonical def call(vm) canonical.call(vm) end + + def side_effects? + false + end end # ### Summary @@ -4525,6 +4548,10 @@ def pushes def call(vm) vm.push(object) end + + def side_effects? + false + end end # ### Summary @@ -4567,6 +4594,10 @@ def canonical def call(vm) canonical.call(vm) end + + def side_effects? + false + end end # ### Summary @@ -4609,6 +4640,10 @@ def canonical def call(vm) canonical.call(vm) end + + def side_effects? + false + end end # ### Summary @@ -4645,6 +4680,10 @@ def pushes def call(vm) vm.push(vm.frame._self) end + + def side_effects? + false + end end # ### Summary diff --git a/lib/syntax_tree/yarv/sea_of_nodes.rb b/lib/syntax_tree/yarv/sea_of_nodes.rb new file mode 100644 index 00000000..be027f39 --- /dev/null +++ b/lib/syntax_tree/yarv/sea_of_nodes.rb @@ -0,0 +1,464 @@ +# frozen_string_literal: true + +module SyntaxTree + module YARV + # A sea of nodes is an intermediate representation used by a compiler to + # represent both control and data flow in the same graph. The way we use it + # allows us to have the vertices of the graph represent either an + # instruction in the instruction sequence or a synthesized node that we add + # to the graph. The edges of the graph represent either control flow or data + # flow. + class SeaOfNodes + # This object represents a node in the graph that holds a YARV + # instruction. + class InsnNode + attr_reader :inputs, :outputs, :insn, :offset + + def initialize(insn, offset) + @inputs = [] + @outputs = [] + + @insn = insn + @offset = offset + end + + def id + offset + end + + def label + "%04d %s" % [offset, insn.disasm(Disassembler::Mermaid.new)] + end + end + + # Phi nodes are used to represent the merging of data flow from multiple + # incoming blocks. + class PhiNode + attr_reader :inputs, :outputs, :id + + def initialize(id) + @inputs = [] + @outputs = [] + @id = id + end + + def label + "#{id} φ" + end + end + + # Merge nodes are present in any block that has multiple incoming blocks. + # It provides a place for Phi nodes to attach their results. + class MergeNode + attr_reader :inputs, :outputs, :id + + def initialize(id) + @inputs = [] + @outputs = [] + @id = id + end + + def label + "#{id} ψ" + end + end + + # The edge of a graph represents either control flow or data flow. + class Edge + TYPES = %i[data control info].freeze + + attr_reader :from + attr_reader :to + attr_reader :type + attr_reader :label + + def initialize(from, to, type, label) + raise unless TYPES.include?(type) + + @from = from + @to = to + @type = type + @label = label + end + end + + # A subgraph represents the local data and control flow of a single basic + # block. + class SubGraph + attr_reader :first_fixed, :last_fixed, :inputs, :outputs + + def initialize(first_fixed, last_fixed, inputs, outputs) + @first_fixed = first_fixed + @last_fixed = last_fixed + @inputs = inputs + @outputs = outputs + end + end + + # The compiler is responsible for taking a data flow graph and turning it + # into a sea of nodes. + class Compiler + attr_reader :dfg, :nodes + + def initialize(dfg) + @dfg = dfg + @nodes = [] + + # We need to put a unique ID on the synthetic nodes in the graph, so + # we keep a counter that we increment any time we create a new + # synthetic node. + @id_counter = 999 + end + + def compile + local_graphs = {} + dfg.blocks.each do |block| + local_graphs[block.id] = create_local_graph(block) + end + + connect_local_graphs_control(local_graphs) + connect_local_graphs_data(local_graphs) + cleanup + + SeaOfNodes.new(dfg, nodes, local_graphs).tap(&:verify) + end + + private + + # Counter for synthetic nodes. + def id_counter + @id_counter += 1 + end + + # Create a sub-graph for a single basic block - block block argument + # inputs and outputs will be left dangling, to be connected later. + def create_local_graph(block) + block_flow = dfg.block_flows.fetch(block.id) + + # A map of instructions to nodes. + insn_nodes = {} + + # Create a node for each instruction in the block. + block.each_with_length do |insn, offset| + node = InsnNode.new(insn, offset) + insn_nodes[offset] = node + nodes << node + end + + # The first and last node in the sub-graph, and the last fixed node. + previous_fixed = nil + first_fixed = nil + last_fixed = nil + + # The merge node for the phi nodes to attach to. + merge_node = nil + + # If there is more than one predecessor and we have basic block + # arguments coming in, then we need a merge node for the phi nodes to + # attach to. + if block.incoming_blocks.size > 1 && !block_flow.in.empty? + merge_node = MergeNode.new(id_counter) + nodes << merge_node + + previous_fixed = merge_node + first_fixed = merge_node + last_fixed = merge_node + end + + # Connect local control flow (only nodes with side effects.) + block.each_with_length do |insn, length| + if insn.side_effects? + insn_node = insn_nodes[length] + connect previous_fixed, insn_node, :control if previous_fixed + previous_fixed = insn_node + first_fixed ||= insn_node + last_fixed = insn_node + end + end + + # Connect basic block arguments. + inputs = {} + outputs = {} + block_flow.in.each do |arg| + # Each basic block argument gets a phi node. Even if there's only + # one predecessor! We'll tidy this up later. + phi = PhiNode.new(id_counter) + connect(phi, merge_node, :info) if merge_node + nodes << phi + inputs[arg] = phi + + block.each_with_length do |_, consumer_offset| + consumer_flow = dfg.insn_flows[consumer_offset] + consumer_flow.in.each_with_index do |producer, input_index| + if producer == arg + connect(phi, insn_nodes[consumer_offset], :data, input_index) + end + end + end + + block_flow.out.each { |out| outputs[out] = phi if out == arg } + end + + # Connect local dataflow from consumers back to producers. + block.each_with_length do |_, consumer_offset| + consumer_flow = dfg.insn_flows.fetch(consumer_offset) + consumer_flow.in.each_with_index do |producer, input_index| + if producer.local? + connect( + insn_nodes[producer.length], + insn_nodes[consumer_offset], + :data, + input_index + ) + end + end + end + + # Connect dataflow from producers that leaves the block. + block.each_with_length do |_, producer_pc| + dfg + .insn_flows + .fetch(producer_pc) + .out + .each do |consumer| + unless consumer.local? + # This is an argument to the successor block - not to an + # instruction here. + outputs[consumer.name] = insn_nodes[producer_pc] + end + end + end + + # A graph with only side-effect free instructions will currently have + # no fixed nodes! In that case just use the first instruction's node + # for both first and last. But it's a bug that it'll appear in the + # control flow path! + SubGraph.new( + first_fixed || insn_nodes[block.block_start], + last_fixed || insn_nodes[block.block_start], + inputs, + outputs + ) + end + + # Connect control flow that flows between basic blocks. + def connect_local_graphs_control(local_graphs) + dfg.blocks.each do |predecessor| + predecessor_last = local_graphs[predecessor.id].last_fixed + predecessor.outgoing_blocks.each_with_index do |successor, index| + label = + if index > 0 && + index == (predecessor.outgoing_blocks.length - 1) + # If there are multiple outgoing blocks from this block, then + # the last one is a fallthrough. Otherwise it's a branch. + :fallthrough + else + :"branch#{index}" + end + + connect( + predecessor_last, + local_graphs[successor.id].first_fixed, + :control, + label + ) + end + end + end + + # Connect data flow that flows between basic blocks. + def connect_local_graphs_data(local_graphs) + dfg.blocks.each do |predecessor| + arg_outs = local_graphs[predecessor.id].outputs.values + arg_outs.each_with_index do |arg_out, arg_n| + predecessor.outgoing_blocks.each do |successor| + successor_graph = local_graphs[successor.id] + arg_in = successor_graph.inputs.values[arg_n] + + # We're connecting to a phi node, so we may need a special + # label. + raise unless arg_in.is_a?(PhiNode) + + label = + case arg_out + when InsnNode + # Instructions that go into a phi node are labelled by the + # offset of last instruction in the block that executed + # them. This way you know which value to use for the phi, + # based on the last instruction you executed. + dfg.blocks.find do |block| + block_start = block.block_start + block_end = + block_start + block.insns.sum(&:length) - + block.insns.last.length + + if (block_start..block_end).cover?(arg_out.offset) + break block_end + end + end + when PhiNode + # Phi nodes to phi nodes are not labelled. + else + raise + end + + connect(arg_out, arg_in, :data, label) + end + end + end + end + + # We don't always build things in an optimal way. Go back and fix up + # some mess we left. Ideally we wouldn't create these problems in the + # first place. + def cleanup + nodes.dup.each do |node| # dup because we're mutating + next unless node.is_a?(PhiNode) + + if node.inputs.size == 1 + # Remove phi nodes with a single input. + node.inputs.each do |producer_edge| + node.outputs.each do |consumer_edge| + connect( + producer_edge.from, + consumer_edge.to, + producer_edge.type, + consumer_edge.label + ) + end + end + + remove(node) + elsif node.inputs.map(&:from).uniq.size == 1 + # Remove phi nodes where all inputs are the same. + producer_edge = node.inputs.first + consumer_edge = node.outputs.find { |e| !e.to.is_a?(MergeNode) } + connect( + producer_edge.from, + consumer_edge.to, + :data, + consumer_edge.label + ) + remove(node) + end + end + end + + # Connect one node to another. + def connect(from, to, type, label = nil) + raise if from == to + raise if !to.is_a?(PhiNode) && type == :data && label.nil? + + edge = Edge.new(from, to, type, label) + from.outputs << edge + to.inputs << edge + end + + # Remove a node from the graph. + def remove(node) + node.inputs.each do |producer_edge| + producer_edge.from.outputs.reject! { |edge| edge.to == node } + end + + node.outputs.each do |consumer_edge| + consumer_edge.to.inputs.reject! { |edge| edge.from == node } + end + + nodes.delete(node) + end + end + + attr_reader :dfg, :nodes, :local_graphs + + def initialize(dfg, nodes, local_graphs) + @dfg = dfg + @nodes = nodes + @local_graphs = local_graphs + end + + def to_mermaid + output = StringIO.new + output.puts("flowchart TD") + + nodes.each do |node| + escaped = "\"#{CGI.escapeHTML(node.label)}\"" + output.puts(" node_#{node.id}(#{escaped})") + end + + link_counter = 0 + nodes.each do |producer| + producer.outputs.each do |consumer_edge| + case consumer_edge.type + when :data + edge = "-->" + edge_style = "stroke:green;" + when :control + edge = "-->" + edge_style = "stroke:red;" + when :info + edge = "-.->" + else + raise + end + + label = + if !consumer_edge.label + "" + elsif consumer_edge.to.is_a?(PhiNode) + # Edges into phi nodes are labelled by the offset of the + # instruction going into the merge. + "|%04d| " % consumer_edge.label + else + "|#{consumer_edge.label}| " + end + + to_id = "node_#{consumer_edge.to.id}" + output.puts(" node_#{producer.id} #{edge} #{label}#{to_id}") + + if edge_style + output.puts(" linkStyle #{link_counter} #{edge_style}") + end + + link_counter += 1 + end + end + + output.string + end + + def verify + # Verify edge labels. + nodes.each do |node| + # Not talking about phi nodes right now. + next if node.is_a?(PhiNode) + + if node.is_a?(InsnNode) && node.insn.branch_targets.any? && + !node.insn.is_a?(Leave) + # A branching node must have at least one branch edge and + # potentially a fallthrough edge coming out. + + labels = node.outputs.map(&:label).sort + raise if labels[0] != :branch0 + raise if labels[1] != :fallthrough && labels.size > 2 + else + labels = node.inputs.filter { |e| e.type == :data }.map(&:label) + next if labels.empty? + + # No nil labels + raise if labels.any?(&:nil?) + + # Labels should start at zero. + raise unless labels.min.zero? + + # Labels should be contiguous. + raise unless labels.sort == (labels.min..labels.max).to_a + end + end + end + + def self.compile(dfg) + Compiler.new(dfg).compile + end + end + end +end diff --git a/test/yarv_test.rb b/test/yarv_test.rb index 5ac37504..e6a3adda 100644 --- a/test/yarv_test.rb +++ b/test/yarv_test.rb @@ -366,6 +366,116 @@ def test_dfg DFG end + def test_son + iseq = RubyVM::InstructionSequence.compile("(14 < 0 ? -1 : +1) + 100") + iseq = SyntaxTree::YARV::InstructionSequence.from(iseq.to_a) + cfg = SyntaxTree::YARV::ControlFlowGraph.compile(iseq) + dfg = SyntaxTree::YARV::DataFlowGraph.compile(cfg) + son = SyntaxTree::YARV::SeaOfNodes.compile(dfg) + + assert_equal(<<~SON, son.to_mermaid) + flowchart TD + node_0("0000 putobject 14") + node_2("0002 putobject_INT2FIX_0_") + node_3("0003 opt_lt <calldata!mid:<, argc:1, ARGS_SIMPLE>") + node_5("0005 branchunless 0011") + node_7("0007 putobject -1") + node_9("0009 jump 0012") + node_11("0011 putobject_INT2FIX_1_") + node_12("0012 putobject 100") + node_14("0014 opt_plus <calldata!mid:+, argc:1, ARGS_SIMPLE>") + node_16("0016 leave") + node_1000("1000 ψ") + node_1001("1001 φ") + node_0 --> |0| node_3 + linkStyle 0 stroke:green; + node_2 --> |1| node_3 + linkStyle 1 stroke:green; + node_3 --> node_5 + linkStyle 2 stroke:red; + node_3 --> |0| node_5 + linkStyle 3 stroke:green; + node_5 --> |branch0| node_11 + linkStyle 4 stroke:red; + node_5 --> |fallthrough| node_9 + linkStyle 5 stroke:red; + node_7 --> |0009| node_1001 + linkStyle 6 stroke:green; + node_9 --> |branch0| node_1000 + linkStyle 7 stroke:red; + node_11 --> |branch0| node_1000 + linkStyle 8 stroke:red; + node_11 --> |0011| node_1001 + linkStyle 9 stroke:green; + node_12 --> |1| node_14 + linkStyle 10 stroke:green; + node_14 --> node_16 + linkStyle 11 stroke:red; + node_14 --> |0| node_16 + linkStyle 12 stroke:green; + node_1000 --> node_14 + linkStyle 13 stroke:red; + node_1001 -.-> node_1000 + node_1001 --> |0| node_14 + linkStyle 15 stroke:green; + SON + end + + def test_son_indirect_basic_block_argument + iseq = RubyVM::InstructionSequence.compile("100 + (14 < 0 ? -1 : +1)") + iseq = SyntaxTree::YARV::InstructionSequence.from(iseq.to_a) + cfg = SyntaxTree::YARV::ControlFlowGraph.compile(iseq) + dfg = SyntaxTree::YARV::DataFlowGraph.compile(cfg) + son = SyntaxTree::YARV::SeaOfNodes.compile(dfg) + + assert_equal(<<~SON, son.to_mermaid) + flowchart TD + node_0("0000 putobject 100") + node_2("0002 putobject 14") + node_4("0004 putobject_INT2FIX_0_") + node_5("0005 opt_lt <calldata!mid:<, argc:1, ARGS_SIMPLE>") + node_7("0007 branchunless 0013") + node_9("0009 putobject -1") + node_11("0011 jump 0014") + node_13("0013 putobject_INT2FIX_1_") + node_14("0014 opt_plus <calldata!mid:+, argc:1, ARGS_SIMPLE>") + node_16("0016 leave") + node_1002("1002 ψ") + node_1004("1004 φ") + node_0 --> |0| node_14 + linkStyle 0 stroke:green; + node_2 --> |0| node_5 + linkStyle 1 stroke:green; + node_4 --> |1| node_5 + linkStyle 2 stroke:green; + node_5 --> node_7 + linkStyle 3 stroke:red; + node_5 --> |0| node_7 + linkStyle 4 stroke:green; + node_7 --> |branch0| node_13 + linkStyle 5 stroke:red; + node_7 --> |fallthrough| node_11 + linkStyle 6 stroke:red; + node_9 --> |0011| node_1004 + linkStyle 7 stroke:green; + node_11 --> |branch0| node_1002 + linkStyle 8 stroke:red; + node_13 --> |branch0| node_1002 + linkStyle 9 stroke:red; + node_13 --> |0013| node_1004 + linkStyle 10 stroke:green; + node_14 --> node_16 + linkStyle 11 stroke:red; + node_14 --> |0| node_16 + linkStyle 12 stroke:green; + node_1002 --> node_14 + linkStyle 13 stroke:red; + node_1004 -.-> node_1002 + node_1004 --> |1| node_14 + linkStyle 15 stroke:green; + SON + end + private def assert_decompiles(expected, source) From 9e09fd005663d6539c2b5570a3cb8c11bf23e311 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Tue, 7 Feb 2023 08:33:30 -0500 Subject: [PATCH 27/58] Sea of nodes optimizations and convenience functions --- lib/syntax_tree/yarv/control_flow_graph.rb | 4 + lib/syntax_tree/yarv/instruction_sequence.rb | 8 ++ lib/syntax_tree/yarv/sea_of_nodes.rb | 91 +++++++++++++++++--- test/yarv_test.rb | 52 +++++------ 4 files changed, 113 insertions(+), 42 deletions(-) diff --git a/lib/syntax_tree/yarv/control_flow_graph.rb b/lib/syntax_tree/yarv/control_flow_graph.rb index 1a361e5e..73d30208 100644 --- a/lib/syntax_tree/yarv/control_flow_graph.rb +++ b/lib/syntax_tree/yarv/control_flow_graph.rb @@ -203,6 +203,10 @@ def to_dfg DataFlowGraph.compile(self) end + def to_son + to_dfg.to_son + end + def to_mermaid output = StringIO.new output.puts("flowchart TD") diff --git a/lib/syntax_tree/yarv/instruction_sequence.rb b/lib/syntax_tree/yarv/instruction_sequence.rb index 918a3c86..821738c9 100644 --- a/lib/syntax_tree/yarv/instruction_sequence.rb +++ b/lib/syntax_tree/yarv/instruction_sequence.rb @@ -273,6 +273,14 @@ def to_cfg ControlFlowGraph.compile(self) end + def to_dfg + to_cfg.to_dfg + end + + def to_son + to_dfg.to_son + end + def disasm fmt = Disassembler.new fmt.enqueue(self) diff --git a/lib/syntax_tree/yarv/sea_of_nodes.rb b/lib/syntax_tree/yarv/sea_of_nodes.rb index be027f39..fdf905a7 100644 --- a/lib/syntax_tree/yarv/sea_of_nodes.rb +++ b/lib/syntax_tree/yarv/sea_of_nodes.rb @@ -118,7 +118,8 @@ def compile connect_local_graphs_control(local_graphs) connect_local_graphs_data(local_graphs) - cleanup + cleanup_phi_nodes + cleanup_insn_nodes SeaOfNodes.new(dfg, nodes, local_graphs).tap(&:verify) end @@ -311,23 +312,13 @@ def connect_local_graphs_data(local_graphs) # We don't always build things in an optimal way. Go back and fix up # some mess we left. Ideally we wouldn't create these problems in the # first place. - def cleanup + def cleanup_phi_nodes nodes.dup.each do |node| # dup because we're mutating next unless node.is_a?(PhiNode) if node.inputs.size == 1 # Remove phi nodes with a single input. - node.inputs.each do |producer_edge| - node.outputs.each do |consumer_edge| - connect( - producer_edge.from, - consumer_edge.to, - producer_edge.type, - consumer_edge.label - ) - end - end - + connect_over(node) remove(node) elsif node.inputs.map(&:from).uniq.size == 1 # Remove phi nodes where all inputs are the same. @@ -344,6 +335,66 @@ def cleanup end end + # Eliminate as many unnecessary nodes as we can. + def cleanup_insn_nodes + nodes.dup.each do |node| + next unless node.is_a?(InsnNode) + + case node.insn + when AdjustStack + # If there are any inputs to the adjust stack that are immediately + # discarded, we can remove them from the input list. + number = node.insn.number + + node.inputs.dup.each do |input_edge| + next if input_edge.type != :data + + from = input_edge.from + next unless from.is_a?(InsnNode) + + if from.inputs.empty? && from.outputs.size == 1 + number -= 1 + remove(input_edge.from) + elsif from.insn.is_a?(Dup) + number -= 1 + connect_over(from) + remove(from) + + new_edge = node.inputs.last + new_edge.from.outputs.delete(new_edge) + node.inputs.delete(new_edge) + end + end + + if number == 0 + connect_over(node) + remove(node) + else + next_node = + if number == 1 + InsnNode.new(Pop.new, node.offset) + else + InsnNode.new(AdjustStack.new(number), node.offset) + end + + next_node.inputs.concat(node.inputs) + next_node.outputs.concat(node.outputs) + + # Dynamically finding the index of the node in the nodes array + # because we're mutating the array as we go. + nodes[nodes.index(node)] = next_node + end + when Jump + # When you have a jump instruction that only has one input and one + # output, you can just connect over top of it and remove it. + if node.inputs.size == 1 && node.outputs.size == 1 + connect_over(node) + remove(node) + end + end + end + end + # Connect one node to another. def connect(from, to, type, label = nil) raise if from == to @@ -354,6 +405,20 @@ def connect(from, to, type, label = nil) to.inputs << edge end + # Connect all of the inputs to all of the outputs of a node. + def connect_over(node) + node.inputs.each do |producer_edge| + node.outputs.each do |consumer_edge| + connect( + producer_edge.from, + consumer_edge.to, + producer_edge.type, + producer_edge.label + ) + end + end + end + # Remove a node from the graph. def remove(node) node.inputs.each do |producer_edge| diff --git a/test/yarv_test.rb b/test/yarv_test.rb index e6a3adda..a1e89568 100644 --- a/test/yarv_test.rb +++ b/test/yarv_test.rb @@ -302,7 +302,7 @@ def test_cfg iseq = SyntaxTree::YARV::InstructionSequence.from(iseq.to_a) cfg = SyntaxTree::YARV::ControlFlowGraph.compile(iseq) - assert_equal(<<~CFG, cfg.disasm) + assert_equal(<<~DISASM, cfg.disasm) == cfg: #@:1 (1,0)-(1,0)> block_0 0000 putobject 100 @@ -325,7 +325,7 @@ def test_cfg 0014 opt_plus 0016 leave == to: leaves - CFG + DISASM end def test_dfg @@ -334,7 +334,7 @@ def test_dfg cfg = SyntaxTree::YARV::ControlFlowGraph.compile(iseq) dfg = SyntaxTree::YARV::DataFlowGraph.compile(cfg) - assert_equal(<<~DFG, dfg.disasm) + assert_equal(<<~DISASM, dfg.disasm) == dfg: #@:1 (1,0)-(1,0)> block_0 0000 putobject 100 # out: out_0 @@ -363,7 +363,7 @@ def test_dfg 0014 opt_plus # in: in_0, in_1; out: 16 0016 leave # in: 14 == to: leaves - DFG + DISASM end def test_son @@ -373,14 +373,13 @@ def test_son dfg = SyntaxTree::YARV::DataFlowGraph.compile(cfg) son = SyntaxTree::YARV::SeaOfNodes.compile(dfg) - assert_equal(<<~SON, son.to_mermaid) + assert_equal(<<~MERMAID, son.to_mermaid) flowchart TD node_0("0000 putobject 14") node_2("0002 putobject_INT2FIX_0_") node_3("0003 opt_lt <calldata!mid:<, argc:1, ARGS_SIMPLE>") node_5("0005 branchunless 0011") node_7("0007 putobject -1") - node_9("0009 jump 0012") node_11("0011 putobject_INT2FIX_1_") node_12("0012 putobject 100") node_14("0014 opt_plus <calldata!mid:+, argc:1, ARGS_SIMPLE>") @@ -397,28 +396,26 @@ def test_son linkStyle 3 stroke:green; node_5 --> |branch0| node_11 linkStyle 4 stroke:red; - node_5 --> |fallthrough| node_9 + node_5 --> |fallthrough| node_1000 linkStyle 5 stroke:red; node_7 --> |0009| node_1001 linkStyle 6 stroke:green; - node_9 --> |branch0| node_1000 - linkStyle 7 stroke:red; node_11 --> |branch0| node_1000 - linkStyle 8 stroke:red; + linkStyle 7 stroke:red; node_11 --> |0011| node_1001 - linkStyle 9 stroke:green; + linkStyle 8 stroke:green; node_12 --> |1| node_14 - linkStyle 10 stroke:green; + linkStyle 9 stroke:green; node_14 --> node_16 - linkStyle 11 stroke:red; + linkStyle 10 stroke:red; node_14 --> |0| node_16 - linkStyle 12 stroke:green; + linkStyle 11 stroke:green; node_1000 --> node_14 - linkStyle 13 stroke:red; + linkStyle 12 stroke:red; node_1001 -.-> node_1000 node_1001 --> |0| node_14 - linkStyle 15 stroke:green; - SON + linkStyle 14 stroke:green; + MERMAID end def test_son_indirect_basic_block_argument @@ -428,7 +425,7 @@ def test_son_indirect_basic_block_argument dfg = SyntaxTree::YARV::DataFlowGraph.compile(cfg) son = SyntaxTree::YARV::SeaOfNodes.compile(dfg) - assert_equal(<<~SON, son.to_mermaid) + assert_equal(<<~MERMAID, son.to_mermaid) flowchart TD node_0("0000 putobject 100") node_2("0002 putobject 14") @@ -436,7 +433,6 @@ def test_son_indirect_basic_block_argument node_5("0005 opt_lt <calldata!mid:<, argc:1, ARGS_SIMPLE>") node_7("0007 branchunless 0013") node_9("0009 putobject -1") - node_11("0011 jump 0014") node_13("0013 putobject_INT2FIX_1_") node_14("0014 opt_plus <calldata!mid:+, argc:1, ARGS_SIMPLE>") node_16("0016 leave") @@ -454,26 +450,24 @@ def test_son_indirect_basic_block_argument linkStyle 4 stroke:green; node_7 --> |branch0| node_13 linkStyle 5 stroke:red; - node_7 --> |fallthrough| node_11 + node_7 --> |fallthrough| node_1002 linkStyle 6 stroke:red; node_9 --> |0011| node_1004 linkStyle 7 stroke:green; - node_11 --> |branch0| node_1002 - linkStyle 8 stroke:red; node_13 --> |branch0| node_1002 - linkStyle 9 stroke:red; + linkStyle 8 stroke:red; node_13 --> |0013| node_1004 - linkStyle 10 stroke:green; + linkStyle 9 stroke:green; node_14 --> node_16 - linkStyle 11 stroke:red; + linkStyle 10 stroke:red; node_14 --> |0| node_16 - linkStyle 12 stroke:green; + linkStyle 11 stroke:green; node_1002 --> node_14 - linkStyle 13 stroke:red; + linkStyle 12 stroke:red; node_1004 -.-> node_1002 node_1004 --> |1| node_14 - linkStyle 15 stroke:green; - SON + linkStyle 14 stroke:green; + MERMAID end private From 93ec53b1a042ff5d5575a0f6a5dba728884572fb Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Tue, 7 Feb 2023 11:12:01 -0500 Subject: [PATCH 28/58] Optimize pop nodes --- lib/syntax_tree/yarv/sea_of_nodes.rb | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/lib/syntax_tree/yarv/sea_of_nodes.rb b/lib/syntax_tree/yarv/sea_of_nodes.rb index fdf905a7..181d729c 100644 --- a/lib/syntax_tree/yarv/sea_of_nodes.rb +++ b/lib/syntax_tree/yarv/sea_of_nodes.rb @@ -391,6 +391,30 @@ def cleanup_insn_nodes connect_over(node) remove(node) end + when Pop + from = node.inputs.find { |edge| edge.type == :data }.from + next unless from.is_a?(InsnNode) + + removed = + if from.inputs.empty? && from.outputs.size == 1 + remove(from) + true + elsif from.insn.is_a?(Dup) + connect_over(from) + remove(from) + + new_edge = node.inputs.last + new_edge.from.outputs.delete(new_edge) + node.inputs.delete(new_edge) + true + else + false + end + + if removed + connect_over(node) + remove(node) + end end end end From 0411bdda92897879390b7541b133d553ef0707f5 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Tue, 7 Feb 2023 12:26:09 -0500 Subject: [PATCH 29/58] Documentation on changing the structure of the AST --- doc/changing_structure.md | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) create mode 100644 doc/changing_structure.md diff --git a/doc/changing_structure.md b/doc/changing_structure.md new file mode 100644 index 00000000..74012f26 --- /dev/null +++ b/doc/changing_structure.md @@ -0,0 +1,16 @@ +# Changing structure + +First and foremost, changing the structure of the tree in any way is a major breaking change. It forces the consumers to update their visitors, pattern matches, and method calls. It should not be taking lightly, and can only happen on a major version change. So keep that in mind. + +That said, if you do want to change the structure of the tree, there are a few steps that you have to take. They are enumerated below. + +1. Change the structure in the required node classes. This could mean adding/removing classes or adding/removing fields. Be sure to also update the `copy` and `===` methods to be sure that they are correct. +2. Update the parser to correctly create the new structure. +3. Update any visitor methods that are affected by the change. For example, if adding a new node make sure to create the new visit method alias in the `Visitor` class. +4. Update the `FieldVisitor` class to be sure that the various serializers, pretty printers, and matchers all get updated accordingly. +5. Update the `DSL` module to be sure that folks can correctly create nodes with the new structure. +6. Ensure the formatting of the code hasn't changed. This can mostly be done by running the tests, but if there's a corner case that we don't cover that is now exposed by your change be sure to add test cases. +7. Update the translation visitors to ensure we're still translating into other ASTs correctly. +8. Update the YARV compiler visitor to ensure we're still compiling correctly. +9. Make sure we aren't referencing the previous structure in any documentation or tests. +10. Be sure to update `CHANGELOG.md` with a description of the change that you made. From c13bfda6d167908437f0518d0dfe1cfe14d439c5 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Tue, 7 Feb 2023 12:10:06 -0500 Subject: [PATCH 30/58] More locations for the parser translation --- lib/syntax_tree/node.rb | 5 +- lib/syntax_tree/parser.rb | 33 +- lib/syntax_tree/translation/parser.rb | 819 ++++++++++++++------------ test/fixtures/next.rb | 7 + test/node_test.rb | 12 +- 5 files changed, 474 insertions(+), 402 deletions(-) diff --git a/lib/syntax_tree/node.rb b/lib/syntax_tree/node.rb index b1ecfdc7..ff8ee95a 100644 --- a/lib/syntax_tree/node.rb +++ b/lib/syntax_tree/node.rb @@ -11527,8 +11527,9 @@ def ===(other) # # To be clear, this method should just not exist. It's not good. It's a # place of shame. But it's necessary for now, so I'm keeping it. - def pin(parent) - replace = PinnedVarRef.new(value: value, location: location) + def pin(parent, pin) + replace = + PinnedVarRef.new(value: value, location: pin.location.to(location)) parent .deconstruct_keys([]) diff --git a/lib/syntax_tree/parser.rb b/lib/syntax_tree/parser.rb index 75af65bf..59128875 100644 --- a/lib/syntax_tree/parser.rb +++ b/lib/syntax_tree/parser.rb @@ -641,8 +641,7 @@ def visit(node) end def visit_var_ref(node) - pins.shift - node.pin(stack[-2]) + node.pin(stack[-2], pins.shift) end def self.visit(node, tokens) @@ -1683,6 +1682,22 @@ def on_float(value) # VarField right # ) -> FndPtn def on_fndptn(constant, left, values, right) + # The left and right of a find pattern are always going to be splats, so + # we're going to consume the * operators and use their location + # information to extend the location of the splats. + right, left = + [right, left].map do |node| + operator = consume_operator(:*) + location = + if node.value + operator.location.to(node.location) + else + operator.location + end + + node.copy(location: location) + end + # The opening of this find pattern is either going to be a left bracket, a # right left parenthesis, or the left splat. We're going to use this to # determine how to find the closing of the pattern, as well as determining @@ -1791,7 +1806,7 @@ def on_heredoc_beg(value) line: lineno, char: char_pos, column: current_column, - size: value.size + 1 + size: value.size ) # Here we're going to artificially create an extra node type so that if @@ -1826,7 +1841,7 @@ def on_heredoc_end(value) line: lineno, char: char_pos, column: current_column, - size: value.size + 1 + size: value.size ) heredoc_end = HeredocEnd.new(value: value.chomp, location: location) @@ -1841,9 +1856,9 @@ def on_heredoc_end(value) start_line: heredoc.location.start_line, start_char: heredoc.location.start_char, start_column: heredoc.location.start_column, - end_line: lineno, - end_char: char_pos, - end_column: current_column + end_line: location.end_line, + end_char: location.end_char, + end_column: location.end_column ) ) end @@ -2357,14 +2372,14 @@ def on_method_add_arg(call, arguments) # :call-seq: # on_method_add_block: ( - # (Break | Call | Command | CommandCall) call, + # (Break | Call | Command | CommandCall, Next) call, # Block block # ) -> Break | MethodAddBlock def on_method_add_block(call, block) location = call.location.to(block.location) case call - when Break + when Break, Next parts = call.arguments.parts node = parts.pop diff --git a/lib/syntax_tree/translation/parser.rb b/lib/syntax_tree/translation/parser.rb index 1e47b4e7..4a4b6ade 100644 --- a/lib/syntax_tree/translation/parser.rb +++ b/lib/syntax_tree/translation/parser.rb @@ -27,9 +27,9 @@ def visit_alias(node) s( :alias, [visit(node.left), visit(node.right)], - source_map_keyword( - keyword: source_range_length(node.location.start_char, 5), - expression: source_range_node(node) + source_map_keyword_bare( + source_range_length(node.location.start_char, 5), + source_range_node(node) ) ) end @@ -58,11 +58,7 @@ def visit_aref(node) [visit(node.collection)].concat(visit_all(node.index.parts)), source_map_index( begin_token: - source_range_find( - node.collection.location.end_char, - node.index.location.start_char, - "[" - ), + source_range_find_between(node.collection, node.index, "["), end_token: source_range_length(node.location.end_char, -1), expression: source_range_node(node) ) @@ -90,9 +86,9 @@ def visit_aref(node) source_map_send( selector: source_range( - source_range_find( - node.collection.location.end_char, - node.index.location.start_char, + source_range_find_between( + node.collection, + node.index, "[" ).begin_pos, node.location.end_char @@ -128,11 +124,7 @@ def visit_aref_field(node) [visit(node.collection)].concat(visit_all(node.index.parts)), source_map_index( begin_token: - source_range_find( - node.collection.location.end_char, - node.index.location.start_char, - "[" - ), + source_range_find_between(node.collection, node.index, "["), end_token: source_range_length(node.location.end_char, -1), expression: source_range_node(node) ) @@ -162,9 +154,9 @@ def visit_aref_field(node) source_map_send( selector: source_range( - source_range_find( - node.collection.location.end_char, - node.index.location.start_char, + source_range_find_between( + node.collection, + node.index, "[" ).begin_pos, node.location.end_char @@ -182,8 +174,8 @@ def visit_arg_block(node) :block_pass, [visit(node.value)], source_map_operator( - operator: source_range_length(node.location.start_char, 1), - expression: source_range_node(node) + source_range_length(node.location.start_char, 1), + source_range_node(node) ) ) end @@ -192,18 +184,14 @@ def visit_arg_block(node) def visit_arg_star(node) if stack[-3].is_a?(MLHSParen) && stack[-3].contents.is_a?(MLHS) if node.value.nil? - s( - :restarg, - [], - source_map_variable(expression: source_range_node(node)) - ) + s(:restarg, [], source_map_variable(nil, source_range_node(node))) else s( :restarg, [node.value.value.to_sym], source_map_variable( - name: source_range_node(node.value), - expression: source_range_node(node) + source_range_node(node.value), + source_range_node(node) ) ) end @@ -212,8 +200,8 @@ def visit_arg_star(node) :splat, node.value.nil? ? [] : [visit(node.value)], source_map_operator( - operator: source_range_length(node.location.start_char, 1), - expression: source_range_node(node) + source_range_length(node.location.start_char, 1), + source_range_node(node) ) ) end @@ -307,11 +295,7 @@ def visit_assign(node) target .location .with_operator( - source_range_find( - node.target.location.end_char, - node.value.location.start_char, - "=" - ) + source_range_find_between(node.target, node.value, "=") ) .with_expression(source_range_node(node)) @@ -324,19 +308,25 @@ def visit_assoc(node) expression = source_range(node.location.start_char, node.location.end_char - 1) + type, location = + if node.key.value.start_with?(/[A-Z]/) + [:const, source_map_constant(nil, expression, expression)] + else + [ + :send, + source_map_send(selector: expression, expression: expression) + ] + end + s( :pair, [ visit(node.key), - s( - node.key.value.start_with?(/[A-Z]/) ? :const : :send, - [nil, node.key.value.chomp(":").to_sym], - source_map_send(selector: expression, expression: expression) - ) + s(type, [nil, node.key.value.chomp(":").to_sym], location) ], source_map_operator( - operator: source_range_length(node.key.location.end_char, -1), - expression: source_range_node(node) + source_range_length(node.key.location.end_char, -1), + source_range_node(node) ) ) else @@ -344,8 +334,9 @@ def visit_assoc(node) :pair, [visit(node.key), visit(node.value)], source_map_operator( - operator: source_range_length(node.key.location.end_char, -1), - expression: source_range_node(node) + source_range_search_between(node.key, node.value, "=>") || + source_range_length(node.key.location.end_char, -1), + source_range_node(node) ) ) end @@ -357,8 +348,8 @@ def visit_assoc_splat(node) :kwsplat, [visit(node.value)], source_map_operator( - operator: source_range_length(node.location.start_char, 2), - expression: source_range_node(node) + source_range_length(node.location.start_char, 2), + source_range_node(node) ) ) end @@ -394,15 +385,14 @@ def visit_BEGIN(node) :preexe, [visit(node.statements)], source_map_keyword( - keyword: source_range_length(node.location.start_char, 5), - begin_token: - source_range_find( - node.location.start_char + 5, - node.statements.location.start_char, - "{" - ), - end_token: source_range_length(node.location.end_char, -1), - expression: source_range_node(node) + source_range_length(node.location.start_char, 5), + source_range_find( + node.location.start_char + 5, + node.statements.location.start_char, + "{" + ), + source_range_length(node.location.end_char, -1), + source_range_node(node) ) ) end @@ -450,13 +440,12 @@ def visit_binary(node) ), [visit(node.left), visit(node.right)], source_map_operator( - operator: - source_range_find( - node.left.location.end_char, - node.right.location.start_char, - node.operator.to_s - ), - expression: source_range_node(node) + source_range_find_between( + node.left, + node.right, + node.operator.to_s + ), + source_range_node(node) ) ) when :=~ @@ -471,13 +460,12 @@ def visit_binary(node) :match_with_lvasgn, [visit(node.left), visit(node.right)], source_map_operator( - operator: - source_range_find( - node.left.location.end_char, - node.right.location.start_char, - node.operator.to_s - ), - expression: source_range_node(node) + source_range_find_between( + node.left, + node.right, + node.operator.to_s + ), + source_range_node(node) ) ) else @@ -491,18 +479,14 @@ def visit_binary(node) # Visit a BlockArg node. def visit_blockarg(node) if node.name.nil? - s( - :blockarg, - [nil], - source_map_variable(expression: source_range_node(node)) - ) + s(:blockarg, [nil], source_map_variable(nil, source_range_node(node))) else s( :blockarg, [node.name.value.to_sym], source_map_variable( - name: source_range_node(node.name), - expression: source_range_node(node) + source_range_node(node.name), + source_range_node(node) ) ) end @@ -516,8 +500,8 @@ def visit_block_var(node) :shadowarg, [local.value.to_sym], source_map_variable( - name: source_range_node(local), - expression: source_range_node(local) + source_range_node(local), + source_range_node(local) ) ) end @@ -539,8 +523,8 @@ def visit_block_var(node) :arg, [required.value.to_sym], source_map_variable( - name: source_range_node(required), - expression: source_range_node(required) + source_range_node(required), + source_range_node(required) ) ) ], @@ -624,9 +608,9 @@ def visit_break(node) s( :break, visit_all(node.arguments.parts), - source_map_keyword( - keyword: source_range_length(node.location.start_char, 5), - expression: source_range_node(node) + source_map_keyword_bare( + source_range_length(node.location.start_char, 5), + source_range_node(node) ) ) end @@ -685,11 +669,7 @@ def visit_CHAR(node) def visit_class(node) operator = if node.superclass - source_range_find( - node.constant.location.end_char, - node.superclass.location.start_char, - "<" - ) + source_range_find_between(node.constant, node.superclass, "<") end s( @@ -824,8 +804,9 @@ def visit_const(node) :const, [nil, node.value.to_sym], source_map_constant( - name: source_range_node(node), - expression: source_range_node(node) + nil, + source_range_node(node), + source_range_node(node) ) ) end @@ -840,14 +821,9 @@ def visit_const_path_field(node) :casgn, [visit(node.parent), node.constant.value.to_sym], source_map_constant( - double_colon: - source_range_find( - node.parent.location.end_char, - node.constant.location.start_char, - "::" - ), - name: source_range_node(node.constant), - expression: source_range_node(node) + source_range_find_between(node.parent, node.constant, "::"), + source_range_node(node.constant), + source_range_node(node) ) ) end @@ -859,14 +835,9 @@ def visit_const_path_ref(node) :const, [visit(node.parent), node.constant.value.to_sym], source_map_constant( - double_colon: - source_range_find( - node.parent.location.end_char, - node.constant.location.start_char, - "::" - ), - name: source_range_node(node.constant), - expression: source_range_node(node) + source_range_find_between(node.parent, node.constant, "::"), + source_range_node(node.constant), + source_range_node(node) ) ) end @@ -877,8 +848,9 @@ def visit_const_ref(node) :const, [nil, node.constant.value.to_sym], source_map_constant( - name: source_range_node(node.constant), - expression: source_range_node(node) + nil, + source_range_node(node.constant), + source_range_node(node) ) ) end @@ -888,10 +860,7 @@ def visit_cvar(node) s( :cvar, [node.value.to_sym], - source_map_variable( - name: source_range_node(node), - expression: source_range_node(node) - ) + source_map_variable(source_range_node(node), source_range_node(node)) ) end @@ -931,9 +900,9 @@ def visit_def(node) source_map_method_definition( keyword: source_range_length(node.location.start_char, 3), assignment: - source_range_find( - (node.params || node.name).location.end_char, - node.bodystmt.location.start_char, + source_range_find_between( + (node.params || node.name), + node.bodystmt, "=" ), name: source_range_node(node.name), @@ -983,10 +952,10 @@ def visit_defined(node) :defined?, [visit(node.value)], source_map_keyword( - keyword: source_range_length(node.location.start_char, 8), - begin_token: begin_token, - end_token: end_token, - expression: source_range_node(node) + source_range_length(node.location.start_char, 8), + begin_token, + end_token, + source_range_node(node) ) ) end @@ -1061,15 +1030,14 @@ def visit_END(node) :postexe, [visit(node.statements)], source_map_keyword( - keyword: source_range_length(node.location.start_char, 3), - begin_token: - source_range_find( - node.location.start_char + 3, - node.statements.location.start_char, - "{" - ), - end_token: source_range_length(node.location.end_char, -1), - expression: source_range_node(node) + source_range_length(node.location.start_char, 3), + source_range_find( + node.location.start_char + 3, + node.statements.location.start_char, + "{" + ), + source_range_length(node.location.end_char, -1), + source_range_node(node) ) ) end @@ -1129,32 +1097,36 @@ def visit_float(node) s( :float, [node.value.to_f], - source_map_operator( - operator: operator, - expression: source_range_node(node) - ) + source_map_operator(operator, source_range_node(node)) ) end # Visit a FndPtn node. def visit_fndptn(node) - make_match_rest = ->(child) do - if child.is_a?(VarField) && child.value.nil? - s(:match_rest, [], nil) - else - s(:match_rest, [visit(child)], nil) + left, right = + [node.left, node.right].map do |child| + location = + source_map_operator( + source_range_length(child.location.start_char, 1), + source_range_node(child) + ) + + if child.is_a?(VarField) && child.value.nil? + s(:match_rest, [], location) + else + s(:match_rest, [visit(child)], location) + end end - end inner = s( :find_pattern, - [ - make_match_rest[node.left], - *visit_all(node.values), - make_match_rest[node.right] - ], - nil + [left, *visit_all(node.values), right], + source_map_collection( + begin_token: source_range_length(node.location.start_char, 1), + end_token: source_range_length(node.location.end_char, -1), + expression: source_range_node(node) + ) ) if node.constant @@ -1166,28 +1138,15 @@ def visit_fndptn(node) # Visit a For node. def visit_for(node) - begin_start = node.collection.location.end_char - begin_end = node.statements.location.start_char - - begin_token = - if buffer.source[begin_start...begin_end].include?("do") - source_range_find(begin_start, begin_end, "do") - end - s( :for, [visit(node.index), visit(node.collection), visit(node.statements)], source_map_for( - keyword: source_range_length(node.location.start_char, 3), - in_token: - source_range_find( - node.index.location.end_char, - node.collection.location.start_char, - "in" - ), - begin_token: begin_token, - end_token: source_range_length(node.location.end_char, -3), - expression: source_range_node(node) + source_range_length(node.location.start_char, 3), + source_range_find_between(node.index, node.collection, "in"), + source_range_search_between(node.collection, node.statements, "do"), + source_range_length(node.location.end_char, -3), + source_range_node(node) ) ) end @@ -1197,10 +1156,7 @@ def visit_gvar(node) s( :gvar, [node.value.to_sym], - source_map_variable( - name: source_range_node(node), - expression: source_range_node(node) - ) + source_map_variable(source_range_node(node), source_range_node(node)) ) end @@ -1303,15 +1259,32 @@ def visit_heredoc(node) end heredoc_segments.trim! + location = + source_map_heredoc( + source_range_node(node.beginning), + source_range( + if node.parts.empty? + node.beginning.location.end_char + else + node.parts.first.location.start_char + end, + node.ending.location.start_char + ), + source_range( + node.ending.location.start_char, + node.ending.location.end_char - 1 + ) + ) if node.beginning.value.match?(/`\w+`\z/) - s(:xstr, heredoc_segments.segments, nil) + s(:xstr, heredoc_segments.segments, location) elsif heredoc_segments.segments.length > 1 - s(:dstr, heredoc_segments.segments, nil) + s(:dstr, heredoc_segments.segments, location) elsif heredoc_segments.segments.empty? - s(:dstr, [], nil) + s(:dstr, [], location) else - heredoc_segments.segments.first + segment = heredoc_segments.segments.first + s(segment.type, segment.children, location) end end @@ -1353,10 +1326,7 @@ def visit_ident(node) s( :lvar, [node.value.to_sym], - source_map_variable( - name: source_range_node(node), - expression: source_range_node(node) - ) + source_map_variable(source_range_node(node), source_range_node(node)) ) end @@ -1389,14 +1359,9 @@ def visit_if(node) :if, [predicate, visit(node.statements), visit(node.consequent)], if node.modifier? - source_map_keyword( - keyword: - source_range_find( - node.statements.location.end_char, - node.predicate.location.start_char, - "if" - ), - expression: source_range_node(node) + source_map_keyword_bare( + source_range_find_between(node.statements, node.predicate, "if"), + source_range_node(node) ) else begin_start = node.predicate.location.end_char @@ -1410,6 +1375,8 @@ def visit_if(node) begin_token = if buffer.source[begin_start...begin_end].include?("then") source_range_find(begin_start, begin_end, "then") + elsif buffer.source[begin_start...begin_end].include?(";") + source_range_find(begin_start, begin_end, ";") end else_token = @@ -1450,7 +1417,7 @@ def visit_imaginary(node) # case. Maybe there's an API for this but I can't find it. eval(node.value) ], - source_map_operator(expression: source_range_node(node)) + source_map_operator(nil, source_range_node(node)) ) end @@ -1478,19 +1445,24 @@ def visit_in(node) nil ) else + begin_token = + source_range_search_between(node.pattern, node.statements, "then") + end_char = - if node.statements.empty? + if begin_token || node.statements.empty? node.statements.location.end_char - 1 else - node.statements.body.first.location.start_char + node.statements.body.last.location.start_char end s( :in_pattern, [visit(node.pattern), nil, visit(node.statements)], source_map_keyword( - keyword: source_range_length(node.location.start_char, 2), - expression: source_range(node.location.start_char, end_char) + source_range_length(node.location.start_char, 2), + begin_token, + nil, + source_range(node.location.start_char, end_char) ) ) end @@ -1506,10 +1478,7 @@ def visit_int(node) s( :int, [node.value.to_i], - source_map_operator( - operator: operator, - expression: source_range_node(node) - ) + source_map_operator(operator, source_range_node(node)) ) end @@ -1518,10 +1487,7 @@ def visit_ivar(node) s( :ivar, [node.value.to_sym], - source_map_variable( - name: source_range_node(node), - expression: source_range_node(node) - ) + source_map_variable(source_range_node(node), source_range_node(node)) ) end @@ -1548,18 +1514,14 @@ def visit_kw(node) # Visit a KwRestParam node. def visit_kwrest_param(node) if node.name.nil? - s( - :kwrestarg, - [], - source_map_variable(expression: source_range_node(node)) - ) + s(:kwrestarg, [], source_map_variable(nil, source_range_node(node))) else s( :kwrestarg, [node.name.value.to_sym], source_map_variable( - name: source_range_node(node.name), - expression: source_range_node(node) + source_range_node(node.name), + source_range_node(node) ) ) end @@ -1635,8 +1597,8 @@ def visit_lambda_var(node) :shadowarg, [local.value.to_sym], source_map_variable( - name: source_range_node(local), - expression: source_range_node(local) + source_range_node(local), + source_range_node(local) ) ) end @@ -1661,13 +1623,8 @@ def visit_massign(node) :masgn, [visit(node.target), visit(node.value)], source_map_operator( - operator: - source_range_find( - node.target.location.end_char, - node.value.location.start_char, - "=" - ), - expression: source_range_node(node) + source_range_find_between(node.target, node.value, "="), + source_range_node(node) ) ) end @@ -1722,8 +1679,8 @@ def visit_mlhs(node) :arg, [part.value.to_sym], source_map_variable( - name: source_range_node(part), - expression: source_range_node(part) + source_range_node(part), + source_range_node(part) ) ) else @@ -1778,9 +1735,9 @@ def visit_next(node) s( :next, visit_all(node.arguments.parts), - source_map_keyword( - keyword: source_range_length(node.location.start_char, 4), - expression: source_range_node(node) + source_map_keyword_bare( + source_range_length(node.location.start_char, 4), + source_range_node(node) ) ) end @@ -1839,10 +1796,45 @@ def visit_not(node) # Visit an OpAssign node. def visit_opassign(node) location = - source_map_variable( - name: source_range_node(node.target), - expression: source_range_node(node) - ).with_operator(source_range_node(node.operator)) + case node.target + when ARefField + source_map_index( + begin_token: + source_range_find( + node.target.collection.location.end_char, + if node.target.index + node.target.index.location.start_char + else + node.target.location.end_char + end, + "[" + ), + end_token: source_range_length(node.target.location.end_char, -1), + expression: source_range_node(node) + ) + when Field + source_map_send( + dot: + if node.target.operator == :"::" + source_range_find_between( + node.target.parent, + node.target.name, + "::" + ) + else + source_range_node(node.target.operator) + end, + selector: source_range_node(node.target.name), + expression: source_range_node(node) + ) + else + source_map_variable( + source_range_node(node.target), + source_range_node(node) + ) + end + + location = location.with_operator(source_range_node(node.operator)) case node.operator.value when "||=" @@ -1876,8 +1868,8 @@ def visit_params(node) :arg, [required.value.to_sym], source_map_variable( - name: source_range_node(required), - expression: source_range_node(required) + source_range_node(required), + source_range_node(required) ) ) end @@ -1889,16 +1881,9 @@ def visit_params(node) :optarg, [name.value.to_sym, visit(value)], source_map_variable( - name: source_range_node(name), - expression: - source_range_node(name).join(source_range_node(value)) - ).with_operator( - source_range_find( - name.location.end_char, - value.location.start_char, - "=" - ) - ) + source_range_node(name), + source_range_node(name).join(source_range_node(value)) + ).with_operator(source_range_find_between(name, value, "=")) ) end @@ -1912,8 +1897,8 @@ def visit_params(node) :arg, [post.value.to_sym], source_map_variable( - name: source_range_node(post), - expression: source_range_node(post) + source_range_node(post), + source_range_node(post) ) ) end @@ -1927,13 +1912,11 @@ def visit_params(node) :kwoptarg, [key, visit(value)], source_map_variable( - name: - source_range( - name.location.start_char, - name.location.end_char - 1 - ), - expression: - source_range_node(name).join(source_range_node(value)) + source_range( + name.location.start_char, + name.location.end_char - 1 + ), + source_range_node(name).join(source_range_node(value)) ) ) else @@ -1941,12 +1924,11 @@ def visit_params(node) :kwarg, [key], source_map_variable( - name: - source_range( - name.location.start_char, - name.location.end_char - 1 - ), - expression: source_range_node(name) + source_range( + name.location.start_char, + name.location.end_char - 1 + ), + source_range_node(name) ) ) end @@ -1960,8 +1942,8 @@ def visit_params(node) :kwnilarg, [], source_map_variable( - name: source_range_length(node.location.end_char, -3), - expression: source_range_node(node) + source_range_length(node.location.end_char, -3), + source_range_node(node) ) ) else @@ -2011,12 +1993,41 @@ def visit_paren(node) # Visit a PinnedBegin node. def visit_pinned_begin(node) - s(:pin, [s(:begin, [visit(node.statement)], nil)], nil) + s( + :pin, + [ + s( + :begin, + [visit(node.statement)], + source_map_collection( + begin_token: + source_range_length(node.location.start_char + 1, 1), + end_token: source_range_length(node.location.end_char, -1), + expression: + source_range( + node.location.start_char + 1, + node.location.end_char + ) + ) + ) + ], + source_map_send( + selector: source_range_length(node.location.start_char, 1), + expression: source_range_node(node) + ) + ) end # Visit a PinnedVarRef node. def visit_pinned_var_ref(node) - s(:pin, [visit(node.value)], nil) + s( + :pin, + [visit(node.value)], + source_map_send( + selector: source_range_length(node.location.start_char, 1), + expression: source_range_node(node) + ) + ) end # Visit a Program node. @@ -2057,8 +2068,8 @@ def visit_range(node) node.operator.value == ".." ? :irange : :erange, [visit(node.left), visit(node.right)], source_map_operator( - operator: source_range_node(node.operator), - expression: source_range_node(node) + source_range_node(node.operator), + source_range_node(node) ) ) end @@ -2069,8 +2080,8 @@ def visit_rassign(node) node.operator.value == "=>" ? :match_pattern : :match_pattern_p, [visit(node.value), visit(node.pattern)], source_map_operator( - operator: source_range_node(node.operator), - expression: source_range_node(node) + source_range_node(node.operator), + source_range_node(node) ) ) end @@ -2080,7 +2091,7 @@ def visit_rational(node) s( :rational, [node.value.to_r], - source_map_operator(expression: source_range_node(node)) + source_map_operator(nil, source_range_node(node)) ) end @@ -2089,9 +2100,9 @@ def visit_redo(node) s( :redo, [], - source_map_keyword( - keyword: source_range_node(node), - expression: source_range_node(node) + source_map_keyword_bare( + source_range_node(node), + source_range_node(node) ) ) end @@ -2245,11 +2256,7 @@ def visit_rescue(node) # Visit a RescueMod node. def visit_rescue_mod(node) keyword = - source_range_find( - node.statement.location.end_char, - node.value.location.start_char, - "rescue" - ) + source_range_find_between(node.statement, node.value, "rescue") s( :rescue, @@ -2276,16 +2283,12 @@ def visit_rest_param(node) :restarg, [node.name.value.to_sym], source_map_variable( - name: source_range_node(node.name), - expression: source_range_node(node) + source_range_node(node.name), + source_range_node(node) ) ) else - s( - :restarg, - [], - source_map_variable(expression: source_range_node(node)) - ) + s(:restarg, [], source_map_variable(nil, source_range_node(node))) end end @@ -2294,9 +2297,9 @@ def visit_retry(node) s( :retry, [], - source_map_keyword( - keyword: source_range_node(node), - expression: source_range_node(node) + source_map_keyword_bare( + source_range_node(node), + source_range_node(node) ) ) end @@ -2306,9 +2309,9 @@ def visit_return(node) s( :return, node.arguments ? visit_all(node.arguments.parts) : [], - source_map_keyword( - keyword: source_range_length(node.location.start_char, 6), - expression: source_range_node(node) + source_map_keyword_bare( + source_range_length(node.location.start_char, 6), + source_range_node(node) ) ) end @@ -2399,7 +2402,11 @@ def visit_string_literal(node) location = if node.quote source_map_collection( - begin_token: source_range_length(node.location.start_char, 1), + begin_token: + source_range_length( + node.location.start_char, + node.quote.length + ), end_token: source_range_length(node.location.end_char, -1), expression: source_range_node(node) ) @@ -2423,9 +2430,9 @@ def visit_super(node) s( :super, visit_all(node.arguments.parts), - source_map_keyword( - keyword: source_range_node(node), - expression: source_range_node(node) + source_map_keyword_bare( + source_range_length(node.location.start_char, 5), + source_range_node(node) ) ) else @@ -2435,15 +2442,14 @@ def visit_super(node) :super, [], source_map_keyword( - keyword: source_range_length(node.location.start_char, 5), - begin_token: - source_range_find( - node.location.start_char + 5, - node.location.end_char, - "(" - ), - end_token: source_range_length(node.location.end_char, -1), - expression: source_range_node(node) + source_range_length(node.location.start_char, 5), + source_range_find( + node.location.start_char + 5, + node.location.end_char, + "(" + ), + source_range_length(node.location.end_char, -1), + source_range_node(node) ) ) when ArgsForward @@ -2453,15 +2459,14 @@ def visit_super(node) :super, visit_all(node.arguments.arguments.parts), source_map_keyword( - keyword: source_range_length(node.location.start_char, 5), - begin_token: - source_range_find( - node.location.start_char + 5, - node.location.end_char, - "(" - ), - end_token: source_range_length(node.location.end_char, -1), - expression: source_range_node(node) + source_range_length(node.location.start_char, 5), + source_range_find( + node.location.start_char + 5, + node.location.end_char, + "(" + ), + source_range_length(node.location.end_char, -1), + source_range_node(node) ) ) end @@ -2526,9 +2531,9 @@ def visit_top_const_field(node) node.constant.value.to_sym ], source_map_constant( - double_colon: source_range_length(node.location.start_char, 2), - name: source_range_node(node.constant), - expression: source_range_node(node) + source_range_length(node.location.start_char, 2), + source_range_node(node.constant), + source_range_node(node) ) ) end @@ -2548,9 +2553,9 @@ def visit_top_const_ref(node) node.constant.value.to_sym ], source_map_constant( - double_colon: source_range_length(node.location.start_char, 2), - name: source_range_node(node.constant), - expression: source_range_node(node) + source_range_length(node.location.start_char, 2), + source_range_node(node.constant), + source_range_node(node) ) ) end @@ -2592,9 +2597,9 @@ def visit_undef(node) s( :undef, visit_all(node.symbols), - source_map_keyword( - keyword: source_range_length(node.location.start_char, 5), - expression: source_range_node(node) + source_map_keyword_bare( + source_range_length(node.location.start_char, 5), + source_range_node(node) ) ) end @@ -2624,14 +2629,13 @@ def visit_unless(node) :if, [predicate, visit(node.consequent), visit(node.statements)], if node.modifier? - source_map_keyword( - keyword: - source_range_find( - node.statements.location.end_char, - node.predicate.location.start_char, - "unless" - ), - expression: source_range_node(node) + source_map_keyword_bare( + source_range_find_between( + node.statements, + node.predicate, + "unless" + ), + source_range_node(node) ) else source_map_condition( @@ -2649,20 +2653,20 @@ def visit_until(node) loop_post?(node) ? :until_post : :until, [visit(node.predicate), visit(node.statements)], if node.modifier? - source_map_keyword( - keyword: - source_range_find( - node.statements.location.end_char, - node.predicate.location.start_char, - "until" - ), - expression: source_range_node(node) + source_map_keyword_bare( + source_range_find_between( + node.statements, + node.predicate, + "until" + ), + source_range_node(node) ) else source_map_keyword( - keyword: source_range_length(node.location.start_char, 5), - end_token: source_range_length(node.location.end_char, -3), - expression: source_range_node(node) + source_range_length(node.location.start_char, 5), + nil, + source_range_length(node.location.end_char, -3), + source_range_node(node) ) end ) @@ -2688,8 +2692,8 @@ def visit_var_field(node) :match_var, [name], source_map_variable( - name: source_range_node(node), - expression: source_range_node(node) + source_range_node(node.value), + source_range_node(node.value) ) ) elsif node.value.is_a?(Const) @@ -2697,15 +2701,16 @@ def visit_var_field(node) :casgn, [nil, name], source_map_constant( - name: source_range_node(node.value), - expression: source_range_node(node) + nil, + source_range_node(node.value), + source_range_node(node) ) ) else location = source_map_variable( - name: source_range_node(node), - expression: source_range_node(node) + source_range_node(node), + source_range_node(node) ) case node.value @@ -2747,17 +2752,26 @@ def visit_vcall(node) # Visit a When node. def visit_when(node) keyword = source_range_length(node.location.start_char, 4) + begin_token = + if buffer.source[node.statements.location.start_char] == ";" + source_range_length(node.statements.location.start_char, 1) + end + + end_char = + if node.statements.body.empty? + node.statements.location.end_char + else + node.statements.body.last.location.end_char + end s( :when, visit_all(node.arguments.parts) + [visit(node.statements)], source_map_keyword( - keyword: keyword, - expression: - source_range( - keyword.begin_pos, - node.statements.location.end_char - 1 - ) + keyword, + begin_token, + nil, + source_range(keyword.begin_pos, end_char) ) ) end @@ -2768,20 +2782,20 @@ def visit_while(node) loop_post?(node) ? :while_post : :while, [visit(node.predicate), visit(node.statements)], if node.modifier? - source_map_keyword( - keyword: - source_range_find( - node.statements.location.end_char, - node.predicate.location.start_char, - "while" - ), - expression: source_range_node(node) + source_map_keyword_bare( + source_range_find_between( + node.statements, + node.predicate, + "while" + ), + source_range_node(node) ) else source_map_keyword( - keyword: source_range_length(node.location.start_char, 5), - end_token: source_range_length(node.location.end_char, -3), - expression: source_range_node(node) + source_range_length(node.location.start_char, 5), + nil, + source_range_length(node.location.end_char, -3), + source_range_node(node) ) end ) @@ -2828,18 +2842,18 @@ def visit_yield(node) s( :yield, [], - source_map_keyword( - keyword: source_range_length(node.location.start_char, 5), - expression: source_range_node(node) + source_map_keyword_bare( + source_range_length(node.location.start_char, 5), + source_range_node(node) ) ) when Args s( :yield, visit_all(node.arguments.parts), - source_map_keyword( - keyword: source_range_length(node.location.start_char, 5), - expression: source_range_node(node) + source_map_keyword_bare( + source_range_length(node.location.start_char, 5), + source_range_node(node) ) ) else @@ -2847,11 +2861,10 @@ def visit_yield(node) :yield, visit_all(node.arguments.contents.parts), source_map_keyword( - keyword: source_range_length(node.location.start_char, 5), - begin_token: - source_range_length(node.arguments.location.start_char, 1), - end_token: source_range_length(node.location.end_char, -1), - expression: source_range_node(node) + source_range_length(node.location.start_char, 5), + source_range_length(node.arguments.location.start_char, 1), + source_range_length(node.location.end_char, -1), + source_range_node(node) ) ) end @@ -2862,9 +2875,9 @@ def visit_zsuper(node) s( :zsuper, [], - source_map_keyword( - keyword: source_range_length(node.location.start_char, 5), - expression: source_range_node(node) + source_map_keyword_bare( + source_range_length(node.location.start_char, 5), + source_range_node(node) ) ) end @@ -3029,7 +3042,7 @@ def source_map_condition( end # Constructs a new source map for a constant reference. - def source_map_constant(double_colon: nil, name: nil, expression:) + def source_map_constant(double_colon, name, expression) ::Parser::Source::Map::Constant.new(double_colon, name, expression) end @@ -3049,13 +3062,7 @@ def source_map_definition( end # Constructs a new source map for a for loop. - def source_map_for( - keyword: nil, - in_token: nil, - begin_token: nil, - end_token: nil, - expression: - ) + def source_map_for(keyword, in_token, begin_token, end_token, expression) ::Parser::Source::Map::For.new( keyword, in_token, @@ -3065,18 +3072,22 @@ def source_map_for( ) end + # Constructs a new source map for a heredoc. + def source_map_heredoc(expression, heredoc_body, heredoc_end) + ::Parser::Source::Map::Heredoc.new( + expression, + heredoc_body, + heredoc_end + ) + end + # Construct a source map for an index operation. def source_map_index(begin_token: nil, end_token: nil, expression:) ::Parser::Source::Map::Index.new(begin_token, end_token, expression) end # Constructs a new source map for the use of a keyword. - def source_map_keyword( - keyword: nil, - begin_token: nil, - end_token: nil, - expression: - ) + def source_map_keyword(keyword, begin_token, end_token, expression) ::Parser::Source::Map::Keyword.new( keyword, begin_token, @@ -3085,6 +3096,12 @@ def source_map_keyword( ) end + # Constructs a new source map for the use of a keyword without a begin or + # end token. + def source_map_keyword_bare(keyword, expression) + source_map_keyword(keyword, nil, nil, expression) + end + # Constructs a new source map for a method definition. def source_map_method_definition( keyword: nil, @@ -3105,7 +3122,7 @@ def source_map_method_definition( end # Constructs a new source map for an operator. - def source_map_operator(operator: nil, expression:) + def source_map_operator(operator, expression) ::Parser::Source::Map::Operator.new(operator, expression) end @@ -3142,7 +3159,7 @@ def source_map_send( end # Constructs a new source map for a variable. - def source_map_variable(name: nil, expression:) + def source_map_variable(name, expression) ::Parser::Source::Map::Variable.new(name, expression) end @@ -3152,16 +3169,48 @@ def source_range(start_char, end_char) end # Constructs a new source range by finding the given needle in the given - # range of the source. - def source_range_find(start_char, end_char, needle) + # range of the source. If the needle is not found, returns nil. + def source_range_search(start_char, end_char, needle) index = buffer.source[start_char...end_char].index(needle) - unless index + return unless index + + offset = start_char + index + source_range(offset, offset + needle.length) + end + + # Constructs a new source range by searching for the given needle between + # the end location of the start node and the start location of the end + # node. If the needle is not found, returns nil. + def source_range_search_between(start_node, end_node, needle) + source_range_search( + start_node.location.end_char, + end_node.location.start_char, + needle + ) + end + + # Constructs a new source range by finding the given needle in the given + # range of the source. If it needle is not found, raises an error. + def source_range_find(start_char, end_char, needle) + source_range = source_range_search(start_char, end_char, needle) + + unless source_range slice = buffer.source[start_char...end_char].inspect raise "Could not find #{needle.inspect} in #{slice}" end - offset = start_char + index - source_range(offset, offset + needle.length) + source_range + end + + # Constructs a new source range by finding the given needle between the + # end location of the start node and the start location of the end node. + # If the needle is not found, returns raises an error. + def source_range_find_between(start_node, end_node, needle) + source_range_find( + start_node.location.end_char, + end_node.location.start_char, + needle + ) end # Constructs a new source range from the given start offset and length. diff --git a/test/fixtures/next.rb b/test/fixtures/next.rb index be667951..79a8c62e 100644 --- a/test/fixtures/next.rb +++ b/test/fixtures/next.rb @@ -65,3 +65,10 @@ next([1, 2]) - next 1, 2 +% +next fun foo do end +- +next( + fun foo do + end +) diff --git a/test/node_test.rb b/test/node_test.rb index 9660b341..19fbeed2 100644 --- a/test/node_test.rb +++ b/test/node_test.rb @@ -60,7 +60,7 @@ def test_arg_paren_heredoc ARGUMENT SOURCE - at = location(lines: 1..3, chars: 6..28) + at = location(lines: 1..3, chars: 6..37) assert_node(ArgParen, source, at: at, &:arguments) end @@ -533,7 +533,7 @@ def test_heredoc HEREDOC SOURCE - at = location(lines: 1..3, chars: 0..22) + at = location(lines: 1..3, chars: 0..30) assert_node(Heredoc, source, at: at) end @@ -544,7 +544,7 @@ def test_heredoc_beg HEREDOC SOURCE - at = location(chars: 0..11) + at = location(chars: 0..10) assert_node(HeredocBeg, source, at: at, &:beginning) end @@ -555,7 +555,7 @@ def test_heredoc_end HEREDOC SOURCE - at = location(lines: 3..3, chars: 22..31, columns: 0..9) + at = location(lines: 3..3, chars: 22..30, columns: 0..8) assert_node(HeredocEnd, source, at: at, &:ending) end @@ -950,7 +950,7 @@ def test_var_field guard_version("3.1.0") do def test_pinned_var_ref source = "foo in ^bar" - at = location(chars: 8..11) + at = location(chars: 7..11) assert_node(PinnedVarRef, source, at: at, &:pattern) end @@ -1008,7 +1008,7 @@ def test_xstring_heredoc HEREDOC SOURCE - at = location(lines: 1..3, chars: 0..18) + at = location(lines: 1..3, chars: 0..26) assert_node(Heredoc, source, at: at) end From 3f308340c97c56eedb580263c66b0d5c65a23bf8 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Tue, 7 Feb 2023 16:25:29 -0500 Subject: [PATCH 31/58] Strip out whitequark/parser submodule --- .gitmodules | 6 - Rakefile | 18 +- tasks/spec.rake | 10 + tasks/whitequark.rake | 87 ++ test/ruby-syntax-fixtures | 1 - test/ruby_syntax_fixtures_test.rb | 19 - test/suites/helper.rb | 3 - test/suites/parse_helper.rb | 175 --- test/suites/parser | 1 - test/translation/parser.txt | 1824 +++++++++++++++++++++++++++++ test/translation/parser_test.rb | 168 +++ 11 files changed, 2092 insertions(+), 220 deletions(-) create mode 100644 tasks/spec.rake create mode 100644 tasks/whitequark.rake delete mode 160000 test/ruby-syntax-fixtures delete mode 100644 test/ruby_syntax_fixtures_test.rb delete mode 100644 test/suites/helper.rb delete mode 100644 test/suites/parse_helper.rb delete mode 160000 test/suites/parser create mode 100644 test/translation/parser.txt create mode 100644 test/translation/parser_test.rb diff --git a/.gitmodules b/.gitmodules index 8287c5e3..f5477ea3 100644 --- a/.gitmodules +++ b/.gitmodules @@ -4,9 +4,3 @@ [submodule "spec"] path = spec/ruby url = git@github.com:ruby/spec.git -[submodule "test/ruby-syntax-fixtures"] - path = test/ruby-syntax-fixtures - url = https://github.com/ruby-syntax-tree/ruby-syntax-fixtures -[submodule "test/suites/parser"] - path = test/suites/parser - url = https://github.com/whitequark/parser diff --git a/Rakefile b/Rakefile index cb96e7bf..aa8d29f6 100644 --- a/Rakefile +++ b/Rakefile @@ -4,18 +4,13 @@ require "bundler/gem_tasks" require "rake/testtask" require "syntax_tree/rake_tasks" +Rake.add_rakelib "tasks" + Rake::TestTask.new(:test) do |t| t.libs << "test" t.libs << "test/suites" t.libs << "lib" - - # These are our own tests. - test_files = FileList["test/**/*_test.rb"] - - # This is a big test file from the parser gem that tests its functionality. - test_files << "test/suites/parser/test/test_parser.rb" - - t.test_files = test_files + t.test_files = FileList["test/**/*_test.rb"] end task default: :test @@ -34,10 +29,3 @@ end SyntaxTree::Rake::CheckTask.new(&configure) SyntaxTree::Rake::WriteTask.new(&configure) - -desc "Run mspec tests using YARV emulation" -task :spec do - Dir["./spec/ruby/language/**/*_spec.rb"].each do |filepath| - sh "exe/yarv ./spec/mspec/bin/mspec-tag #{filepath}" - end -end diff --git a/tasks/spec.rake b/tasks/spec.rake new file mode 100644 index 00000000..c361fe8e --- /dev/null +++ b/tasks/spec.rake @@ -0,0 +1,10 @@ +# frozen_string_literal: true + +desc "Run mspec tests using YARV emulation" +task :spec do + specs = File.expand_path("../spec/ruby/language/**/*_spec.rb", __dir__) + + Dir[specs].each do |filepath| + sh "exe/yarv ./spec/mspec/bin/mspec-tag #{filepath}" + end +end diff --git a/tasks/whitequark.rake b/tasks/whitequark.rake new file mode 100644 index 00000000..4f7ee650 --- /dev/null +++ b/tasks/whitequark.rake @@ -0,0 +1,87 @@ +# frozen_string_literal: true + +# This file's purpose is to extract the examples from the whitequark/parser +# gem and generate a test file that we can use to ensure that our parser +# generates equivalent syntax trees when translating. To do this, it runs the +# parser's test suite but overrides the `assert_parses` method to collect the +# examples into a hash. Then, it writes out the hash to a file that we can use +# to generate our own tests. +# +# To run the test suite, it's important to note that we have to mirror both any +# APIs provided to the test suite (for example the ParseHelper module below). +# This is obviously relatively brittle, but it's effective for now. + +require "ast" + +module ParseHelper + # This object is going to collect all of the examples from the parser gem into + # a hash that we can use to generate our own tests. + COLLECTED = Hash.new { |hash, key| hash[key] = [] } + + include AST::Sexp + ALL_VERSIONS = %w[3.1 3.2] + + private + + def assert_context(*) + end + + def assert_diagnoses(*) + end + + def assert_diagnoses_many(*) + end + + def refute_diagnoses(*) + end + + def with_versions(*) + end + + def assert_parses(_ast, code, _source_maps = "", versions = ALL_VERSIONS) + # We're going to skip any examples that are for older Ruby versions + # that we do not support. + return if (versions & %w[3.1 3.2]).empty? + + entry = caller.find { _1.include?("test_parser.rb") } + _, lineno, name = *entry.match(/(\d+):in `(.+)'/) + + COLLECTED["#{name}:#{lineno}"] << code + end +end + +namespace :extract do + desc "Extract the whitequark/parser tests" + task :whitequark do + directory = File.expand_path("../tmp/parser", __dir__) + unless File.directory?(directory) + sh "git clone --depth 1 https://github.com/whitequark/parser #{directory}" + end + + mkdir_p "#{directory}/extract" + touch "#{directory}/extract/helper.rb" + touch "#{directory}/extract/parse_helper.rb" + touch "#{directory}/extract/extracted.txt" + $:.unshift "#{directory}/extract" + + require "parser/current" + require "minitest/autorun" + require_relative "#{directory}/test/test_parser" + + Minitest.after_run do + filepath = File.expand_path("../test/translation/parser.txt", __dir__) + + File.open(filepath, "w") do |file| + ParseHelper::COLLECTED.sort.each do |(key, codes)| + if codes.length == 1 + file.puts("!!! #{key}\n#{codes.first}") + else + codes.each_with_index do |code, index| + file.puts("!!! #{key}:#{index}\n#{code}") + end + end + end + end + end + end +end diff --git a/test/ruby-syntax-fixtures b/test/ruby-syntax-fixtures deleted file mode 160000 index 5b333f5a..00000000 --- a/test/ruby-syntax-fixtures +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 5b333f5a34d6fb08f88acc93b69c7d19b3fee8e7 diff --git a/test/ruby_syntax_fixtures_test.rb b/test/ruby_syntax_fixtures_test.rb deleted file mode 100644 index c5c13b27..00000000 --- a/test/ruby_syntax_fixtures_test.rb +++ /dev/null @@ -1,19 +0,0 @@ -# frozen_string_literal: true - -# The ruby-syntax-fixtures repository tests against the current Ruby syntax, so -# we don't execute this test unless we're running 3.2 or above. -return unless RUBY_VERSION >= "3.2" - -require_relative "test_helper" - -module SyntaxTree - class RubySyntaxFixturesTest < Minitest::Test - Dir[ - File.expand_path("ruby-syntax-fixtures/**/*.rb", __dir__) - ].each do |file| - define_method "test_ruby_syntax_fixtures_#{file}" do - refute_nil(SyntaxTree.parse(SyntaxTree.read(file))) - end - end - end -end diff --git a/test/suites/helper.rb b/test/suites/helper.rb deleted file mode 100644 index b0f8c427..00000000 --- a/test/suites/helper.rb +++ /dev/null @@ -1,3 +0,0 @@ -# frozen_string_literal: true - -require "parser/current" diff --git a/test/suites/parse_helper.rb b/test/suites/parse_helper.rb deleted file mode 100644 index 04fe8123..00000000 --- a/test/suites/parse_helper.rb +++ /dev/null @@ -1,175 +0,0 @@ -# frozen_string_literal: true - -module ParseHelper - include AST::Sexp - - CURRENT_VERSION = RUBY_VERSION.split(".")[0..1].join(".").freeze - ALL_VERSIONS = %w[1.8 1.9 2.0 2.1 2.2 2.3 2.4 2.5 2.6 2.7 3.0 3.1 3.2 mac ios] - - known_failures = [ - # I think this may be a bug in the parser gem's precedence calculation. - # Unary plus appears to be parsed as part of the number literal in CRuby, - # but parser is parsing it as a separate operator. - "test_unary_num_pow_precedence:3505", - - # Not much to be done about this. Basically, regular expressions with named - # capture groups that use the =~ operator inject local variables into the - # current scope. In the parser gem, it detects this and changes future - # references to that name to be a local variable instead of a potential - # method call. CRuby does not do this. - "test_lvar_injecting_match:3778", - - # This is failing because CRuby is not marking values captured in hash - # patterns as local variables, while the parser gem is. - "test_pattern_matching_hash:8971", - - # This is not actually allowed in the CRuby parser but the parser gem thinks - # it is allowed. - "test_pattern_matching_hash_with_string_keys:9016", - "test_pattern_matching_hash_with_string_keys:9027", - "test_pattern_matching_hash_with_string_keys:9038", - "test_pattern_matching_hash_with_string_keys:9060", - "test_pattern_matching_hash_with_string_keys:9071", - "test_pattern_matching_hash_with_string_keys:9082", - - # This happens with pattern matching where you're matching a literal value - # inside parentheses, which doesn't really do anything. Ripper doesn't - # capture that this value is inside a parentheses, so it's hard to translate - # properly. - "test_pattern_matching_expr_in_paren:9206", - - # These are also failing because of CRuby not marking values captured in - # hash patterns as local variables. - "test_pattern_matching_single_line_allowed_omission_of_parentheses:9205", - "test_pattern_matching_single_line_allowed_omission_of_parentheses:9581", - "test_pattern_matching_single_line_allowed_omission_of_parentheses:9611", - - # I'm not even sure what this is testing, because the code is invalid in - # CRuby. - "test_control_meta_escape_chars_in_regexp__since_31:*", - ] - - # These are failures that we need to take care of (or determine the reason - # that we're not going to handle them). - todo_failures = [ - "test_dedenting_heredoc:334", - "test_dedenting_heredoc:390", - "test_dedenting_heredoc:399", - "test_slash_newline_in_heredocs:7194", - "test_parser_slash_slash_n_escaping_in_literals:*", - "test_cond_match_current_line:4801", - "test_forwarded_restarg:*", - "test_forwarded_kwrestarg:*", - "test_forwarded_argument_with_restarg:*", - "test_forwarded_argument_with_kwrestarg:*" - ] - - if CURRENT_VERSION <= "2.7" - # I'm not sure why this is failing on 2.7.0, but we'll turn it off for now - # until we have more time to investigate. - todo_failures.push("test_pattern_matching_hash:*") - end - - if CURRENT_VERSION <= "3.0" - # In < 3.0, there are some changes to the way the parser gem handles - # forwarded args. We should eventually support this, but for now we're going - # to mark them as todo. - todo_failures.push( - "test_forward_arg:*", - "test_forward_args_legacy:*", - "test_endless_method_forwarded_args_legacy:*", - "test_trailing_forward_arg:*" - ) - end - - if CURRENT_VERSION == "3.1" - # This test actually fails on 3.1.0, even though it's marked as being since - # 3.1. So we're going to skip this test on 3.1, but leave it in for other - # versions. - known_failures.push( - "test_multiple_pattern_matches:11086", - "test_multiple_pattern_matches:11102" - ) - end - - # This is the list of all failures. - FAILURES = (known_failures + todo_failures).freeze - - private - - def assert_context(*) - end - - def assert_diagnoses(*) - end - - def assert_diagnoses_many(*) - end - - def refute_diagnoses(*) - end - - def with_versions(*) - end - - def assert_parses(_ast, code, _source_maps = "", versions = ALL_VERSIONS) - # We're going to skip any examples that aren't for the current version of - # Ruby. - return unless versions.include?(CURRENT_VERSION) - - # We're going to skip any examples that are for older Ruby versions that we - # do not support. - return if (versions & %w[3.1 3.2]).empty? - - caller(1, 3).each do |line| - _, lineno, name = *line.match(/(\d+):in `(.+)'/) - - # Return directly and don't do anything if it's a known failure. - return if FAILURES.include?("#{name}:#{lineno}") - return if FAILURES.include?("#{name}:*") - end - - expected = parse(code) - return if expected.nil? - - buffer = expected.location.expression.source_buffer - actual = SyntaxTree::Translation.to_parser(SyntaxTree.parse(code), buffer) - assert_equal(expected, actual) - end - - def parse(code) - parser = Parser::CurrentRuby.default_parser - parser.diagnostics.consumer = ->(*) {} - - buffer = Parser::Source::Buffer.new("(string)", 1) - buffer.source = code - - parser.parse(buffer) - rescue Parser::SyntaxError - end -end - -if ENV["PARSER_LOCATION"] - # Modify the source map == check so that it doesn't check against the node - # itself so we don't get into a recursive loop. - Parser::Source::Map.prepend( - Module.new do - def ==(other) - self.class == other.class && - (instance_variables - %i[@node]).map do |ivar| - instance_variable_get(ivar) == other.instance_variable_get(ivar) - end.reduce(:&) - end - end - ) - - # Next, ensure that we're comparing the nodes and also comparing the source - # ranges so that we're getting all of the necessary information. - Parser::AST::Node.prepend( - Module.new do - def ==(other) - super && (location == other.location) - end - end - ) -end diff --git a/test/suites/parser b/test/suites/parser deleted file mode 160000 index 8de8b7fa..00000000 --- a/test/suites/parser +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 8de8b7fa7af471a2159860d6a0a5b615eac9c83c diff --git a/test/translation/parser.txt b/test/translation/parser.txt new file mode 100644 index 00000000..5e9e8d31 --- /dev/null +++ b/test/translation/parser.txt @@ -0,0 +1,1824 @@ +!!! assert_parses_args:2249:0 +def f (foo: 1, bar: 2, **baz, &b); end +!!! assert_parses_args:2249:1 +def f (foo: 1, &b); end +!!! assert_parses_args:2249:2 +def f **baz, &b; end +!!! assert_parses_args:2249:3 +def f *, **; end +!!! assert_parses_args:2249:4 +def f a, o=1, *r, &b; end +!!! assert_parses_args:2249:5 +def f a, o=1, *r, p, &b; end +!!! assert_parses_args:2249:6 +def f a, o=1, &b; end +!!! assert_parses_args:2249:7 +def f a, o=1, p, &b; end +!!! assert_parses_args:2249:8 +def f a, *r, &b; end +!!! assert_parses_args:2249:9 +def f a, *r, p, &b; end +!!! assert_parses_args:2249:10 +def f a, &b; end +!!! assert_parses_args:2249:11 +def f o=1, *r, &b; end +!!! assert_parses_args:2249:12 +def f o=1, *r, p, &b; end +!!! assert_parses_args:2249:13 +def f o=1, &b; end +!!! assert_parses_args:2249:14 +def f o=1, p, &b; end +!!! assert_parses_args:2249:15 +def f *r, &b; end +!!! assert_parses_args:2249:16 +def f *r, p, &b; end +!!! assert_parses_args:2249:17 +def f &b; end +!!! assert_parses_args:2249:18 +def f ; end +!!! assert_parses_args:2249:19 +def f (((a))); end +!!! assert_parses_args:2249:20 +def f ((a, a1)); end +!!! assert_parses_args:2249:21 +def f ((a, *r)); end +!!! assert_parses_args:2249:22 +def f ((a, *r, p)); end +!!! assert_parses_args:2249:23 +def f ((a, *)); end +!!! assert_parses_args:2249:24 +def f ((a, *, p)); end +!!! assert_parses_args:2249:25 +def f ((*r)); end +!!! assert_parses_args:2249:26 +def f ((*r, p)); end +!!! assert_parses_args:2249:27 +def f ((*)); end +!!! assert_parses_args:2249:28 +def f ((*, p)); end +!!! assert_parses_args:2249:29 +def f foo: +; end +!!! assert_parses_args:2249:30 +def f foo: -1 +; end +!!! assert_parses_blockargs:2506:0 +f{ |a| } +!!! assert_parses_blockargs:2506:1 +f{ |a, b,| } +!!! assert_parses_blockargs:2506:2 +f{ |a| } +!!! assert_parses_blockargs:2506:3 +f{ |foo:| } +!!! assert_parses_blockargs:2506:4 +f{ } +!!! assert_parses_blockargs:2506:5 +f{ | | } +!!! assert_parses_blockargs:2506:6 +f{ |;a| } +!!! assert_parses_blockargs:2506:7 +f{ |; +a +| } +!!! assert_parses_blockargs:2506:8 +f{ || } +!!! assert_parses_blockargs:2506:9 +f{ |a| } +!!! assert_parses_blockargs:2506:10 +f{ |a, c| } +!!! assert_parses_blockargs:2506:11 +f{ |a,| } +!!! assert_parses_blockargs:2506:12 +f{ |a, &b| } +!!! assert_parses_blockargs:2506:13 +f{ |a, *s, &b| } +!!! assert_parses_blockargs:2506:14 +f{ |a, *, &b| } +!!! assert_parses_blockargs:2506:15 +f{ |a, *s| } +!!! assert_parses_blockargs:2506:16 +f{ |a, *| } +!!! assert_parses_blockargs:2506:17 +f{ |*s, &b| } +!!! assert_parses_blockargs:2506:18 +f{ |*, &b| } +!!! assert_parses_blockargs:2506:19 +f{ |*s| } +!!! assert_parses_blockargs:2506:20 +f{ |*| } +!!! assert_parses_blockargs:2506:21 +f{ |&b| } +!!! assert_parses_blockargs:2506:22 +f{ |a, o=1, o1=2, *r, &b| } +!!! assert_parses_blockargs:2506:23 +f{ |a, o=1, *r, p, &b| } +!!! assert_parses_blockargs:2506:24 +f{ |a, o=1, &b| } +!!! assert_parses_blockargs:2506:25 +f{ |a, o=1, p, &b| } +!!! assert_parses_blockargs:2506:26 +f{ |a, *r, p, &b| } +!!! assert_parses_blockargs:2506:27 +f{ |o=1, *r, &b| } +!!! assert_parses_blockargs:2506:28 +f{ |o=1, *r, p, &b| } +!!! assert_parses_blockargs:2506:29 +f{ |o=1, &b| } +!!! assert_parses_blockargs:2506:30 +f{ |o=1, p, &b| } +!!! assert_parses_blockargs:2506:31 +f{ |*r, p, &b| } +!!! assert_parses_blockargs:2506:32 +f{ |foo: 1, bar: 2, **baz, &b| } +!!! assert_parses_blockargs:2506:33 +f{ |foo: 1, &b| } +!!! assert_parses_blockargs:2506:34 +f{ |**baz, &b| } +!!! assert_parses_pattern_match:8503:0 +case foo; in self then true; end +!!! assert_parses_pattern_match:8503:1 +case foo; in 1..2 then true; end +!!! assert_parses_pattern_match:8503:2 +case foo; in 1.. then true; end +!!! assert_parses_pattern_match:8503:3 +case foo; in ..2 then true; end +!!! assert_parses_pattern_match:8503:4 +case foo; in 1...2 then true; end +!!! assert_parses_pattern_match:8503:5 +case foo; in 1... then true; end +!!! assert_parses_pattern_match:8503:6 +case foo; in ...2 then true; end +!!! assert_parses_pattern_match:8503:7 +case foo; in [*x, 1 => a, *y] then true; end +!!! assert_parses_pattern_match:8503:8 +case foo; in String(*, 1, *) then true; end +!!! assert_parses_pattern_match:8503:9 +case foo; in Array[*, 1, *] then true; end +!!! assert_parses_pattern_match:8503:10 +case foo; in *, 42, * then true; end +!!! assert_parses_pattern_match:8503:11 +case foo; in x, then nil; end +!!! assert_parses_pattern_match:8503:12 +case foo; in *x then nil; end +!!! assert_parses_pattern_match:8503:13 +case foo; in * then nil; end +!!! assert_parses_pattern_match:8503:14 +case foo; in x, y then nil; end +!!! assert_parses_pattern_match:8503:15 +case foo; in x, y, then nil; end +!!! assert_parses_pattern_match:8503:16 +case foo; in x, *y, z then nil; end +!!! assert_parses_pattern_match:8503:17 +case foo; in *x, y, z then nil; end +!!! assert_parses_pattern_match:8503:18 +case foo; in 1, "a", [], {} then nil; end +!!! assert_parses_pattern_match:8503:19 +case foo; in ->{ 42 } then true; end +!!! assert_parses_pattern_match:8503:20 +case foo; in A(1, 2) then true; end +!!! assert_parses_pattern_match:8503:21 +case foo; in A(x:) then true; end +!!! assert_parses_pattern_match:8503:22 +case foo; in A() then true; end +!!! assert_parses_pattern_match:8503:23 +case foo; in A[1, 2] then true; end +!!! assert_parses_pattern_match:8503:24 +case foo; in A[x:] then true; end +!!! assert_parses_pattern_match:8503:25 +case foo; in A[] then true; end +!!! assert_parses_pattern_match:8503:26 +case foo; in x then x; end +!!! assert_parses_pattern_match:8503:27 +case foo; in {} then true; end +!!! assert_parses_pattern_match:8503:28 +case foo; in a: 1 then true; end +!!! assert_parses_pattern_match:8503:29 +case foo; in { a: 1 } then true; end +!!! assert_parses_pattern_match:8503:30 +case foo; in { a: 1, } then true; end +!!! assert_parses_pattern_match:8503:31 +case foo; in a: then true; end +!!! assert_parses_pattern_match:8503:32 +case foo; in **a then true; end +!!! assert_parses_pattern_match:8503:33 +case foo; in ** then true; end +!!! assert_parses_pattern_match:8503:34 +case foo; in a: 1, b: 2 then true; end +!!! assert_parses_pattern_match:8503:35 +case foo; in a:, b: then true; end +!!! assert_parses_pattern_match:8503:36 +case foo; in a: 1, _a:, ** then true; end +!!! assert_parses_pattern_match:8503:37 +case foo; + in {a: 1 + } + false + ; end +!!! assert_parses_pattern_match:8503:38 +case foo; + in {a: + 2} + false + ; end +!!! assert_parses_pattern_match:8503:39 +case foo; + in {Foo: 42 + } + false + ; end +!!! assert_parses_pattern_match:8503:40 +case foo; + in a: {b:}, c: + p c + ; end +!!! assert_parses_pattern_match:8503:41 +case foo; + in {a: + } + true + ; end +!!! assert_parses_pattern_match:8503:42 +case foo; in A then true; end +!!! assert_parses_pattern_match:8503:43 +case foo; in A::B then true; end +!!! assert_parses_pattern_match:8503:44 +case foo; in ::A then true; end +!!! assert_parses_pattern_match:8503:45 +case foo; in [x] then nil; end +!!! assert_parses_pattern_match:8503:46 +case foo; in [x,] then nil; end +!!! assert_parses_pattern_match:8503:47 +case foo; in [x, y] then true; end +!!! assert_parses_pattern_match:8503:48 +case foo; in [x, y,] then true; end +!!! assert_parses_pattern_match:8503:49 +case foo; in [x, y, *] then true; end +!!! assert_parses_pattern_match:8503:50 +case foo; in [x, y, *z] then true; end +!!! assert_parses_pattern_match:8503:51 +case foo; in [x, *y, z] then true; end +!!! assert_parses_pattern_match:8503:52 +case foo; in [x, *, y] then true; end +!!! assert_parses_pattern_match:8503:53 +case foo; in [*x, y] then true; end +!!! assert_parses_pattern_match:8503:54 +case foo; in [*, x] then true; end +!!! assert_parses_pattern_match:8503:55 +case foo; in (1) then true; end +!!! assert_parses_pattern_match:8503:56 +case foo; in x if true; nil; end +!!! assert_parses_pattern_match:8503:57 +case foo; in x unless true; nil; end +!!! assert_parses_pattern_match:8503:58 +case foo; in 1; end +!!! assert_parses_pattern_match:8503:59 +case foo; in ^foo then nil; end +!!! assert_parses_pattern_match:8503:60 +case foo; in "a": then true; end +!!! assert_parses_pattern_match:8503:61 +case foo; in "#{ 'a' }": then true; end +!!! assert_parses_pattern_match:8503:62 +case foo; in "#{ %q{a} }": then true; end +!!! assert_parses_pattern_match:8503:63 +case foo; in "#{ %Q{a} }": then true; end +!!! assert_parses_pattern_match:8503:64 +case foo; in "a": 1 then true; end +!!! assert_parses_pattern_match:8503:65 +case foo; in "#{ 'a' }": 1 then true; end +!!! assert_parses_pattern_match:8503:66 +case foo; in "#{ %q{a} }": 1 then true; end +!!! assert_parses_pattern_match:8503:67 +case foo; in "#{ %Q{a} }": 1 then true; end +!!! assert_parses_pattern_match:8503:68 +case foo; in ^(42) then nil; end +!!! assert_parses_pattern_match:8503:69 +case foo; in { foo: ^(42) } then nil; end +!!! assert_parses_pattern_match:8503:70 +case foo; in ^(0+0) then nil; end +!!! assert_parses_pattern_match:8503:71 +case foo; in ^@a; end +!!! assert_parses_pattern_match:8503:72 +case foo; in ^@@TestPatternMatching; end +!!! assert_parses_pattern_match:8503:73 +case foo; in ^$TestPatternMatching; end +!!! assert_parses_pattern_match:8503:74 +case foo; in ^(1 +); end +!!! assert_parses_pattern_match:8503:75 +case foo; in 1 | 2 then true; end +!!! assert_parses_pattern_match:8503:76 +case foo; in 1 => a then true; end +!!! assert_parses_pattern_match:8503:77 +case foo; in **nil then true; end +!!! block in test_endless_comparison_method:10392:0 +def ===(other) = do_something +!!! block in test_endless_comparison_method:10392:1 +def ==(other) = do_something +!!! block in test_endless_comparison_method:10392:2 +def !=(other) = do_something +!!! block in test_endless_comparison_method:10392:3 +def <=(other) = do_something +!!! block in test_endless_comparison_method:10392:4 +def >=(other) = do_something +!!! block in test_endless_comparison_method:10392:5 +def !=(other) = do_something +!!! block in test_parser_slash_slash_n_escaping_in_literals:7327:0 +'a\ +b' +!!! block in test_parser_slash_slash_n_escaping_in_literals:7327:1 +<<-'HERE' +a\ +b +HERE +!!! block in test_parser_slash_slash_n_escaping_in_literals:7327:2 +%q{a\ +b} +!!! block in test_parser_slash_slash_n_escaping_in_literals:7327:3 +"a\ +b" +!!! block in test_parser_slash_slash_n_escaping_in_literals:7327:4 +<<-"HERE" +a\ +b +HERE +!!! block in test_parser_slash_slash_n_escaping_in_literals:7327:5 +%{a\ +b} +!!! block in test_parser_slash_slash_n_escaping_in_literals:7327:6 +%Q{a\ +b} +!!! block in test_parser_slash_slash_n_escaping_in_literals:7327:7 +%w{a\ +b} +!!! block in test_parser_slash_slash_n_escaping_in_literals:7327:8 +%W{a\ +b} +!!! block in test_parser_slash_slash_n_escaping_in_literals:7327:9 +%i{a\ +b} +!!! block in test_parser_slash_slash_n_escaping_in_literals:7327:10 +%I{a\ +b} +!!! block in test_parser_slash_slash_n_escaping_in_literals:7327:11 +:'a\ +b' +!!! block in test_parser_slash_slash_n_escaping_in_literals:7327:12 +%s{a\ +b} +!!! block in test_parser_slash_slash_n_escaping_in_literals:7327:13 +:"a\ +b" +!!! block in test_parser_slash_slash_n_escaping_in_literals:7327:14 +/a\ +b/ +!!! block in test_parser_slash_slash_n_escaping_in_literals:7327:15 +%r{a\ +b} +!!! block in test_parser_slash_slash_n_escaping_in_literals:7327:16 +%x{a\ +b} +!!! block in test_parser_slash_slash_n_escaping_in_literals:7327:17 +`a\ +b` +!!! block in test_parser_slash_slash_n_escaping_in_literals:7327:18 +<<-`HERE` +a\ +b +HERE +!!! block in test_ruby_bug_11873_a:6017:0 +a b{c d}, :e do end +!!! block in test_ruby_bug_11873_a:6017:1 +a b{c d}, 1 do end +!!! block in test_ruby_bug_11873_a:6017:2 +a b{c d}, 1.0 do end +!!! block in test_ruby_bug_11873_a:6017:3 +a b{c d}, 1.0r do end +!!! block in test_ruby_bug_11873_a:6017:4 +a b{c d}, 1.0i do end +!!! block in test_ruby_bug_11873_a:6022:0 +a b{c(d)}, :e do end +!!! block in test_ruby_bug_11873_a:6022:1 +a b{c(d)}, 1 do end +!!! block in test_ruby_bug_11873_a:6022:2 +a b{c(d)}, 1.0 do end +!!! block in test_ruby_bug_11873_a:6022:3 +a b{c(d)}, 1.0r do end +!!! block in test_ruby_bug_11873_a:6022:4 +a b{c(d)}, 1.0i do end +!!! block in test_ruby_bug_11873_a:6036:0 +a b(c d), :e do end +!!! block in test_ruby_bug_11873_a:6036:1 +a b(c d), 1 do end +!!! block in test_ruby_bug_11873_a:6036:2 +a b(c d), 1.0 do end +!!! block in test_ruby_bug_11873_a:6036:3 +a b(c d), 1.0r do end +!!! block in test_ruby_bug_11873_a:6036:4 +a b(c d), 1.0i do end +!!! block in test_ruby_bug_11873_a:6041:0 +a b(c(d)), :e do end +!!! block in test_ruby_bug_11873_a:6041:1 +a b(c(d)), 1 do end +!!! block in test_ruby_bug_11873_a:6041:2 +a b(c(d)), 1.0 do end +!!! block in test_ruby_bug_11873_a:6041:3 +a b(c(d)), 1.0r do end +!!! block in test_ruby_bug_11873_a:6041:4 +a b(c(d)), 1.0i do end +!!! test___ENCODING__:1037 +__ENCODING__ +!!! test___ENCODING___legacy_:1046 +__ENCODING__ +!!! test_alias:2020 +alias :foo bar +!!! test_alias_gvar:2032 +alias $a $b +!!! test_alias_gvar:2037 +alias $a $+ +!!! test_ambiuous_quoted_label_in_ternary_operator:7204 +a ? b & '': nil +!!! test_and:4447 +foo and bar +!!! test_and:4453 +foo && bar +!!! test_and_asgn:1748 +foo.a &&= 1 +!!! test_and_asgn:1758 +foo[0, 1] &&= 2 +!!! test_and_or_masgn:4475 +foo && (a, b = bar) +!!! test_and_or_masgn:4484 +foo || (a, b = bar) +!!! test_anonymous_blockarg:10861 +def foo(&); bar(&); end +!!! test_arg:2055 +def f(foo); end +!!! test_arg:2066 +def f(foo, bar); end +!!! test_arg_duplicate_ignored:2958 +def foo(_, _); end +!!! test_arg_duplicate_ignored:2972 +def foo(_a, _a); end +!!! test_arg_label:3012 +def foo() a:b end +!!! test_arg_label:3019 +def foo + a:b end +!!! test_arg_label:3026 +f { || a:b } +!!! test_arg_scope:2238 +lambda{|;a|a} +!!! test_args_args_assocs:4077 +fun(foo, :foo => 1) +!!! test_args_args_assocs:4083 +fun(foo, :foo => 1, &baz) +!!! test_args_args_assocs_comma:4092 +foo[bar, :baz => 1,] +!!! test_args_args_comma:3941 +foo[bar,] +!!! test_args_args_star:3908 +fun(foo, *bar) +!!! test_args_args_star:3913 +fun(foo, *bar, &baz) +!!! test_args_assocs:4001 +fun(:foo => 1) +!!! test_args_assocs:4006 +fun(:foo => 1, &baz) +!!! test_args_assocs:4012 +self[:bar => 1] +!!! test_args_assocs:4021 +self.[]= foo, :a => 1 +!!! test_args_assocs:4031 +yield(:foo => 42) +!!! test_args_assocs:4039 +super(:foo => 42) +!!! test_args_assocs_comma:4068 +foo[:baz => 1,] +!!! test_args_assocs_legacy:3951 +fun(:foo => 1) +!!! test_args_assocs_legacy:3956 +fun(:foo => 1, &baz) +!!! test_args_assocs_legacy:3962 +self[:bar => 1] +!!! test_args_assocs_legacy:3971 +self.[]= foo, :a => 1 +!!! test_args_assocs_legacy:3981 +yield(:foo => 42) +!!! test_args_assocs_legacy:3989 +super(:foo => 42) +!!! test_args_block_pass:3934 +fun(&bar) +!!! test_args_cmd:3901 +fun(f bar) +!!! test_args_star:3921 +fun(*bar) +!!! test_args_star:3926 +fun(*bar, &baz) +!!! test_array_assocs:629 +[ 1 => 2 ] +!!! test_array_assocs:637 +[ 1, 2 => 3 ] +!!! test_array_plain:589 +[1, 2] +!!! test_array_splat:598 +[1, *foo, 2] +!!! test_array_splat:611 +[1, *foo] +!!! test_array_splat:622 +[*foo] +!!! test_array_symbols:695 +%i[foo bar] +!!! test_array_symbols_empty:732 +%i[] +!!! test_array_symbols_empty:740 +%I() +!!! test_array_symbols_interp:706 +%I[foo #{bar}] +!!! test_array_symbols_interp:721 +%I[foo#{bar}] +!!! test_array_words:647 +%w[foo bar] +!!! test_array_words_empty:682 +%w[] +!!! test_array_words_empty:689 +%W() +!!! test_array_words_interp:657 +%W[foo #{bar}] +!!! test_array_words_interp:671 +%W[foo #{bar}foo#@baz] +!!! test_asgn_cmd:1126 +foo = m foo +!!! test_asgn_cmd:1130 +foo = bar = m foo +!!! test_asgn_mrhs:1449 +foo = bar, 1 +!!! test_asgn_mrhs:1456 +foo = *bar +!!! test_asgn_mrhs:1461 +foo = baz, *bar +!!! test_back_ref:995 +$+ +!!! test_bang:3434 +!foo +!!! test_bang_cmd:3448 +!m foo +!!! test_begin_cmdarg:5526 +p begin 1.times do 1 end end +!!! test_beginless_erange_after_newline:935 +foo +...100 +!!! test_beginless_irange_after_newline:923 +foo +..100 +!!! test_beginless_range:903 +..100 +!!! test_beginless_range:912 +...100 +!!! test_blockarg:2187 +def f(&block); end +!!! test_break:5037 +break(foo) +!!! test_break:5051 +break foo +!!! test_break:5057 +break() +!!! test_break:5064 +break +!!! test_break_block:5072 +break fun foo do end +!!! test_bug_435:7067 +"#{-> foo {}}" +!!! test_bug_447:7046 +m [] do end +!!! test_bug_447:7055 +m [], 1 do end +!!! test_bug_452:7080 +td (1_500).toString(); td.num do; end +!!! test_bug_466:7096 +foo "#{(1+1).to_i}" do; end +!!! test_bug_473:7113 +m "#{[]}" +!!! test_bug_480:7124 +m "#{}#{()}" +!!! test_bug_481:7136 +m def x(); end; 1.tap do end +!!! test_bug_ascii_8bit_in_literal:5880 +# coding:utf-8 + "\xD0\xBF\xD1\x80\xD0\xBE\xD0\xB2\xD0\xB5\xD1\x80\xD0\xBA\xD0\xB0" +!!! test_bug_cmd_string_lookahead:5752 +desc "foo" do end +!!! test_bug_cmdarg:5549 +assert dogs +!!! test_bug_cmdarg:5554 +assert do: true +!!! test_bug_cmdarg:5562 +f x: -> do meth do end end +!!! test_bug_def_no_paren_eql_begin:5799 +def foo +=begin +=end +end +!!! test_bug_do_block_in_call_args:5762 +bar def foo; self.each do end end +!!! test_bug_do_block_in_cmdarg:5777 +tap (proc do end) +!!! test_bug_do_block_in_hash_brace:6569 +p :foo, {a: proc do end, b: proc do end} +!!! test_bug_do_block_in_hash_brace:6587 +p :foo, {:a => proc do end, b: proc do end} +!!! test_bug_do_block_in_hash_brace:6605 +p :foo, {"a": proc do end, b: proc do end} +!!! test_bug_do_block_in_hash_brace:6623 +p :foo, {proc do end => proc do end, b: proc do end} +!!! test_bug_do_block_in_hash_brace:6643 +p :foo, {** proc do end, b: proc do end} +!!! test_bug_heredoc_do:5835 +f <<-TABLE do +TABLE +end +!!! test_bug_interp_single:5789 +"#{1}" +!!! test_bug_interp_single:5793 +%W"#{1}" +!!! test_bug_lambda_leakage:6550 +->(scope) {}; scope +!!! test_bug_regex_verification:6563 +/#)/x +!!! test_bug_rescue_empty_else:5813 +begin; rescue LoadError; else; end +!!! test_bug_while_not_parens_do:5805 +while not (true) do end +!!! test_case_cond:4844 +case; when foo; 'foo'; end +!!! test_case_cond_else:4857 +case; when foo; 'foo'; else 'bar'; end +!!! test_case_expr:4816 +case foo; when 'bar'; bar; end +!!! test_case_expr_else:4830 +case foo; when 'bar'; bar; else baz; end +!!! test_casgn_scoped:1192 +Bar::Foo = 10 +!!! test_casgn_toplevel:1181 +::Foo = 10 +!!! test_casgn_unscoped:1203 +Foo = 10 +!!! test_character:248 +?a +!!! test_class:1827 +class Foo; end +!!! test_class:1837 +class Foo end +!!! test_class_definition_in_while_cond:6870 +while class Foo; tap do end; end; break; end +!!! test_class_definition_in_while_cond:6882 +while class Foo a = tap do end; end; break; end +!!! test_class_definition_in_while_cond:6895 +while class << self; tap do end; end; break; end +!!! test_class_definition_in_while_cond:6907 +while class << self; a = tap do end; end; break; end +!!! test_class_super:1848 +class Foo < Bar; end +!!! test_class_super_label:1860 +class Foo < a:b; end +!!! test_comments_before_leading_dot__27:7750 +a # +# +.foo +!!! test_comments_before_leading_dot__27:7757 +a # + # +.foo +!!! test_comments_before_leading_dot__27:7764 +a # +# +&.foo +!!! test_comments_before_leading_dot__27:7771 +a # + # +&.foo +!!! test_complex:156 +42i +!!! test_complex:162 +42ri +!!! test_complex:168 +42.1i +!!! test_complex:174 +42.1ri +!!! test_cond_begin:4686 +if (bar); foo; end +!!! test_cond_begin_masgn:4695 +if (bar; a, b = foo); end +!!! test_cond_eflipflop:4758 +if foo...bar; end +!!! test_cond_eflipflop:4772 +!(foo...bar) +!!! test_cond_iflipflop:4735 +if foo..bar; end +!!! test_cond_iflipflop:4749 +!(foo..bar) +!!! test_cond_match_current_line:4781 +if /wat/; end +!!! test_cond_match_current_line:4801 +!/wat/ +!!! test_const_op_asgn:1536 +A += 1 +!!! test_const_op_asgn:1542 +::A += 1 +!!! test_const_op_asgn:1550 +B::A += 1 +!!! test_const_op_asgn:1558 +def x; self::A ||= 1; end +!!! test_const_op_asgn:1567 +def x; ::A ||= 1; end +!!! test_const_scoped:1020 +Bar::Foo +!!! test_const_toplevel:1011 +::Foo +!!! test_const_unscoped:1029 +Foo +!!! test_control_meta_escape_chars_in_regexp__since_31:10686 +/\c\xFF/ +!!! test_control_meta_escape_chars_in_regexp__since_31:10692 +/\c\M-\xFF/ +!!! test_control_meta_escape_chars_in_regexp__since_31:10698 +/\C-\xFF/ +!!! test_control_meta_escape_chars_in_regexp__since_31:10704 +/\C-\M-\xFF/ +!!! test_control_meta_escape_chars_in_regexp__since_31:10710 +/\M-\xFF/ +!!! test_control_meta_escape_chars_in_regexp__since_31:10716 +/\M-\C-\xFF/ +!!! test_control_meta_escape_chars_in_regexp__since_31:10722 +/\M-\c\xFF/ +!!! test_cpath:1807 +module ::Foo; end +!!! test_cpath:1813 +module Bar::Foo; end +!!! test_cvar:973 +@@foo +!!! test_cvasgn:1106 +@@var = 10 +!!! test_dedenting_heredoc:297 +p <<~E +E +!!! test_dedenting_heredoc:304 +p <<~E + E +!!! test_dedenting_heredoc:311 +p <<~E + x +E +!!! test_dedenting_heredoc:318 +p <<~E + ð +E +!!! test_dedenting_heredoc:325 +p <<~E + x + y +E +!!! test_dedenting_heredoc:334 +p <<~E + x + y +E +!!! test_dedenting_heredoc:343 +p <<~E + x + y +E +!!! test_dedenting_heredoc:352 +p <<~E + x + y +E +!!! test_dedenting_heredoc:361 +p <<~E + x + y +E +!!! test_dedenting_heredoc:370 +p <<~E + x + +y +E +!!! test_dedenting_heredoc:380 +p <<~E + x + + y +E +!!! test_dedenting_heredoc:390 +p <<~E + x + \ y +E +!!! test_dedenting_heredoc:399 +p <<~E + x + \ y +E +!!! test_dedenting_heredoc:408 +p <<~"E" + x + #{foo} +E +!!! test_dedenting_heredoc:419 +p <<~`E` + x + #{foo} +E +!!! test_dedenting_heredoc:430 +p <<~"E" + x + #{" y"} +E +!!! test_dedenting_interpolating_heredoc_fake_line_continuation:459 +<<~'FOO' + baz\\ + qux +FOO +!!! test_dedenting_non_interpolating_heredoc_line_continuation:451 +<<~'FOO' + baz\ + qux +FOO +!!! test_def:1899 +def foo; end +!!! test_def:1907 +def String; end +!!! test_def:1911 +def String=; end +!!! test_def:1915 +def until; end +!!! test_def:1919 +def BEGIN; end +!!! test_def:1923 +def END; end +!!! test_defined:1058 +defined? foo +!!! test_defined:1064 +defined?(foo) +!!! test_defined:1072 +defined? @foo +!!! test_defs:1929 +def self.foo; end +!!! test_defs:1937 +def self::foo; end +!!! test_defs:1945 +def (foo).foo; end +!!! test_defs:1949 +def String.foo; end +!!! test_defs:1954 +def String::foo; end +!!! test_empty_stmt:60 +!!! test_endless_method:9786 +def foo() = 42 +!!! test_endless_method:9798 +def inc(x) = x + 1 +!!! test_endless_method:9811 +def obj.foo() = 42 +!!! test_endless_method:9823 +def obj.inc(x) = x + 1 +!!! test_endless_method_command_syntax:9880 +def foo = puts "Hello" +!!! test_endless_method_command_syntax:9892 +def foo() = puts "Hello" +!!! test_endless_method_command_syntax:9904 +def foo(x) = puts x +!!! test_endless_method_command_syntax:9917 +def obj.foo = puts "Hello" +!!! test_endless_method_command_syntax:9931 +def obj.foo() = puts "Hello" +!!! test_endless_method_command_syntax:9945 +def rescued(x) = raise "to be caught" rescue "instance #{x}" +!!! test_endless_method_command_syntax:9964 +def self.rescued(x) = raise "to be caught" rescue "class #{x}" +!!! test_endless_method_command_syntax:9985 +def obj.foo(x) = puts x +!!! test_endless_method_forwarded_args_legacy:9840 +def foo(...) = bar(...) +!!! test_endless_method_with_rescue_mod:9855 +def m() = 1 rescue 2 +!!! test_endless_method_with_rescue_mod:9866 +def self.m() = 1 rescue 2 +!!! test_endless_method_without_args:10404 +def foo = 42 +!!! test_endless_method_without_args:10412 +def foo = 42 rescue nil +!!! test_endless_method_without_args:10423 +def self.foo = 42 +!!! test_endless_method_without_args:10432 +def self.foo = 42 rescue nil +!!! test_ensure:5261 +begin; meth; ensure; bar; end +!!! test_ensure_empty:5274 +begin ensure end +!!! test_false:96 +false +!!! test_float:129 +1.33 +!!! test_float:134 +-1.33 +!!! test_for:5002 +for a in foo do p a; end +!!! test_for:5014 +for a in foo; p a; end +!!! test_for_mlhs:5023 +for a, b in foo; p a, b; end +!!! test_forward_arg:7899 +def foo(...); bar(...); end +!!! test_forward_arg_with_open_args:10745 +def foo ... +end +!!! test_forward_arg_with_open_args:10752 +def foo a, b = 1, ... +end +!!! test_forward_arg_with_open_args:10770 +def foo(a, ...) bar(...) end +!!! test_forward_arg_with_open_args:10781 +def foo a, ... + bar(...) +end +!!! test_forward_arg_with_open_args:10792 +def foo b = 1, ... + bar(...) +end +!!! test_forward_arg_with_open_args:10804 +def foo ...; bar(...); end +!!! test_forward_arg_with_open_args:10814 +def foo a, ...; bar(...); end +!!! test_forward_arg_with_open_args:10825 +def foo b = 1, ...; bar(...); end +!!! test_forward_arg_with_open_args:10837 +(def foo ... + bar(...) +end) +!!! test_forward_arg_with_open_args:10848 +(def foo ...; bar(...); end) +!!! test_forward_args_legacy:7863 +def foo(...); bar(...); end +!!! test_forward_args_legacy:7875 +def foo(...); super(...); end +!!! test_forward_args_legacy:7887 +def foo(...); end +!!! test_forwarded_argument_with_kwrestarg:10962 +def foo(argument, **); bar(argument, **); end +!!! test_forwarded_argument_with_restarg:10923 +def foo(argument, *); bar(argument, *); end +!!! test_forwarded_kwrestarg:10943 +def foo(**); bar(**); end +!!! test_forwarded_restarg:10905 +def foo(*); bar(*); end +!!! test_gvar:980 +$foo +!!! test_gvasgn:1116 +$var = 10 +!!! test_hash_empty:750 +{ } +!!! test_hash_hashrocket:759 +{ 1 => 2 } +!!! test_hash_hashrocket:768 +{ 1 => 2, :foo => "bar" } +!!! test_hash_kwsplat:821 +{ foo: 2, **bar } +!!! test_hash_label:776 +{ foo: 2 } +!!! test_hash_label_end:789 +{ 'foo': 2 } +!!! test_hash_label_end:802 +{ 'foo': 2, 'bar': {}} +!!! test_hash_label_end:810 +f(a ? "a":1) +!!! test_hash_pair_value_omission:10040 +{a:, b:} +!!! test_hash_pair_value_omission:10054 +{puts:} +!!! test_hash_pair_value_omission:10065 +{BAR:} +!!! test_heredoc:263 +<(**nil) {} +!!! test_kwoptarg:2124 +def f(foo: 1); end +!!! test_kwrestarg_named:2135 +def f(**foo); end +!!! test_kwrestarg_unnamed:2146 +def f(**); end +!!! test_lbrace_arg_after_command_args:7235 +let (:a) { m do; end } +!!! test_lparenarg_after_lvar__since_25:6679 +meth (-1.3).abs +!!! test_lparenarg_after_lvar__since_25:6688 +foo (-1.3).abs +!!! test_lvar:959 +foo +!!! test_lvar_injecting_match:3778 +/(?bar)/ =~ 'bar'; match +!!! test_lvasgn:1084 +var = 10; var +!!! test_masgn:1247 +foo, bar = 1, 2 +!!! test_masgn:1258 +(foo, bar) = 1, 2 +!!! test_masgn:1268 +foo, bar, baz = 1, 2 +!!! test_masgn_attr:1390 +self.a, self[1, 2] = foo +!!! test_masgn_attr:1403 +self::a, foo = foo +!!! test_masgn_attr:1411 +self.A, foo = foo +!!! test_masgn_cmd:1439 +foo, bar = m foo +!!! test_masgn_const:1421 +self::A, foo = foo +!!! test_masgn_const:1429 +::A, foo = foo +!!! test_masgn_nested:1365 +a, (b, c) = foo +!!! test_masgn_nested:1379 +((b, )) = foo +!!! test_masgn_splat:1279 +@foo, @@bar = *foo +!!! test_masgn_splat:1288 +a, b = *foo, bar +!!! test_masgn_splat:1296 +a, *b = bar +!!! test_masgn_splat:1302 +a, *b, c = bar +!!! test_masgn_splat:1313 +a, * = bar +!!! test_masgn_splat:1319 +a, *, c = bar +!!! test_masgn_splat:1330 +*b = bar +!!! test_masgn_splat:1336 +*b, c = bar +!!! test_masgn_splat:1346 +* = bar +!!! test_masgn_splat:1352 +*, c, d = bar +!!! test_method_definition_in_while_cond:6816 +while def foo; tap do end; end; break; end +!!! test_method_definition_in_while_cond:6828 +while def self.foo; tap do end; end; break; end +!!! test_method_definition_in_while_cond:6841 +while def foo a = tap do end; end; break; end +!!! test_method_definition_in_while_cond:6854 +while def self.foo a = tap do end; end; break; end +!!! test_module:1789 +module Foo; end +!!! test_multiple_pattern_matches:11086 +{a: 0} => a: +{a: 0} => a: +!!! test_multiple_pattern_matches:11102 +{a: 0} in a: +{a: 0} in a: +!!! test_newline_in_hash_argument:11035 +obj.set foo: +1 +!!! test_newline_in_hash_argument:11046 +obj.set "foo": +1 +!!! test_newline_in_hash_argument:11057 +case foo +in a: +0 +true +in "b": +0 +true +end +!!! test_next:5131 +next(foo) +!!! test_next:5145 +next foo +!!! test_next:5151 +next() +!!! test_next:5158 +next +!!! test_next_block:5166 +next fun foo do end +!!! test_nil:66 +nil +!!! test_nil_expression:73 +() +!!! test_nil_expression:80 +begin end +!!! test_non_lvar_injecting_match:3793 +/#{1}(?bar)/ =~ 'bar' +!!! test_not:3462 +not foo +!!! test_not:3468 +not(foo) +!!! test_not:3474 +not() +!!! test_not_cmd:3488 +not m foo +!!! test_not_masgn__24:4672 +!(a, b = foo) +!!! test_nth_ref:1002 +$10 +!!! test_numbered_args_after_27:7358 +m { _1 + _9 } +!!! test_numbered_args_after_27:7373 +m do _1 + _9 end +!!! test_numbered_args_after_27:7390 +-> { _1 + _9} +!!! test_numbered_args_after_27:7405 +-> do _1 + _9 end +!!! test_numparam_outside_block:7512 +class A; _1; end +!!! test_numparam_outside_block:7520 +module A; _1; end +!!! test_numparam_outside_block:7528 +class << foo; _1; end +!!! test_numparam_outside_block:7536 +def self.m; _1; end +!!! test_numparam_outside_block:7545 +_1 +!!! test_op_asgn:1606 +foo.a += 1 +!!! test_op_asgn:1616 +foo::a += 1 +!!! test_op_asgn:1622 +foo.A += 1 +!!! test_op_asgn_cmd:1630 +foo.a += m foo +!!! test_op_asgn_cmd:1636 +foo::a += m foo +!!! test_op_asgn_cmd:1642 +foo.A += m foo +!!! test_op_asgn_cmd:1654 +foo::A += m foo +!!! test_op_asgn_index:1664 +foo[0, 1] += 2 +!!! test_op_asgn_index_cmd:1678 +foo[0, 1] += m foo +!!! test_optarg:2074 +def f foo = 1; end +!!! test_optarg:2084 +def f(foo=1, bar=2); end +!!! test_or:4461 +foo or bar +!!! test_or:4467 +foo || bar +!!! test_or_asgn:1724 +foo.a ||= 1 +!!! test_or_asgn:1734 +foo[0, 1] ||= 2 +!!! test_parser_bug_272:6528 +a @b do |c|;end +!!! test_parser_bug_490:7151 +def m; class << self; class C; end; end; end +!!! test_parser_bug_490:7162 +def m; class << self; module M; end; end; end +!!! test_parser_bug_490:7173 +def m; class << self; A = nil; end; end +!!! test_parser_bug_507:7265 +m = -> *args do end +!!! test_parser_bug_518:7277 +class A < B +end +!!! test_parser_bug_525:7287 +m1 :k => m2 do; m3() do end; end +!!! test_parser_bug_604:7737 +m a + b do end +!!! test_parser_bug_640:443 +<<~FOO + baz\ + qux +FOO +!!! test_parser_bug_645:9774 +-> (arg={}) {} +!!! test_parser_bug_830:10630 +/\(/ +!!! test_parser_drops_truncated_parts_of_squiggly_heredoc:10446 +<<~HERE + #{} +HERE +!!! test_pattern_matching__FILE__LINE_literals:9473 + case [__FILE__, __LINE__ + 1, __ENCODING__] + in [__FILE__, __LINE__, __ENCODING__] + end +!!! test_pattern_matching_blank_else:9390 +case 1; in 2; 3; else; end +!!! test_pattern_matching_else:9376 +case 1; in 2; 3; else; 4; end +!!! test_pattern_matching_single_line:9540 +1 => [a]; a +!!! test_pattern_matching_single_line:9552 +1 in [a]; a +!!! test_pattern_matching_single_line_allowed_omission_of_parentheses:9566 +[1, 2] => a, b; a +!!! test_pattern_matching_single_line_allowed_omission_of_parentheses:9581 +{a: 1} => a:; a +!!! test_pattern_matching_single_line_allowed_omission_of_parentheses:9596 +[1, 2] in a, b; a +!!! test_pattern_matching_single_line_allowed_omission_of_parentheses:9611 +{a: 1} in a:; a +!!! test_pattern_matching_single_line_allowed_omission_of_parentheses:9626 +{key: :value} in key: value; value +!!! test_pattern_matching_single_line_allowed_omission_of_parentheses:9643 +{key: :value} => key: value; value +!!! test_postexe:5486 +END { 1 } +!!! test_preexe:5467 +BEGIN { 1 } +!!! test_procarg0:2803 +m { |foo| } +!!! test_procarg0:2812 +m { |(foo, bar)| } +!!! test_range_endless:869 +1.. +!!! test_range_endless:877 +1... +!!! test_range_exclusive:861 +1...2 +!!! test_range_inclusive:853 +1..2 +!!! test_rational:142 +42r +!!! test_rational:148 +42.1r +!!! test_redo:5178 +redo +!!! test_regex_interp:551 +/foo#{bar}baz/ +!!! test_regex_plain:541 +/source/im +!!! test_resbody_list:5398 +begin; meth; rescue Exception; bar; end +!!! test_resbody_list_mrhs:5411 +begin; meth; rescue Exception, foo; bar; end +!!! test_resbody_list_var:5444 +begin; meth; rescue foo => ex; bar; end +!!! test_resbody_var:5426 +begin; meth; rescue => ex; bar; end +!!! test_resbody_var:5434 +begin; meth; rescue => @ex; bar; end +!!! test_rescue:5188 +begin; meth; rescue; foo; end +!!! test_rescue_else:5203 +begin; meth; rescue; foo; else; bar; end +!!! test_rescue_else_ensure:5302 +begin; meth; rescue; baz; else foo; ensure; bar end +!!! test_rescue_ensure:5286 +begin; meth; rescue; baz; ensure; bar; end +!!! test_rescue_in_lambda_block:6928 +-> do rescue; end +!!! test_rescue_mod:5319 +meth rescue bar +!!! test_rescue_mod_asgn:5331 +foo = meth rescue bar +!!! test_rescue_mod_masgn:5345 +foo, bar = meth rescue [1, 2] +!!! test_rescue_mod_op_assign:5365 +foo += meth rescue bar +!!! test_rescue_without_begin_end:5381 +meth do; foo; rescue; bar; end +!!! test_restarg_named:2094 +def f(*foo); end +!!! test_restarg_unnamed:2104 +def f(*); end +!!! test_retry:5457 +retry +!!! test_return:5084 +return(foo) +!!! test_return:5098 +return foo +!!! test_return:5104 +return() +!!! test_return:5111 +return +!!! test_return_block:5119 +return fun foo do end +!!! test_ruby_bug_10279:5905 +{a: if true then 42 end} +!!! test_ruby_bug_10653:5915 +true ? 1.tap do |n| p n end : 0 +!!! test_ruby_bug_10653:5945 +false ? raise {} : tap {} +!!! test_ruby_bug_10653:5958 +false ? raise do end : tap do end +!!! test_ruby_bug_11107:5973 +p ->() do a() do end end +!!! test_ruby_bug_11380:5985 +p -> { :hello }, a: 1 do end +!!! test_ruby_bug_11873:6353 +a b{c d}, "x" do end +!!! test_ruby_bug_11873:6367 +a b(c d), "x" do end +!!! test_ruby_bug_11873:6380 +a b{c(d)}, "x" do end +!!! test_ruby_bug_11873:6394 +a b(c(d)), "x" do end +!!! test_ruby_bug_11873:6407 +a b{c d}, /x/ do end +!!! test_ruby_bug_11873:6421 +a b(c d), /x/ do end +!!! test_ruby_bug_11873:6434 +a b{c(d)}, /x/ do end +!!! test_ruby_bug_11873:6448 +a b(c(d)), /x/ do end +!!! test_ruby_bug_11873:6461 +a b{c d}, /x/m do end +!!! test_ruby_bug_11873:6475 +a b(c d), /x/m do end +!!! test_ruby_bug_11873:6488 +a b{c(d)}, /x/m do end +!!! test_ruby_bug_11873:6502 +a b(c(d)), /x/m do end +!!! test_ruby_bug_11873_b:6050 +p p{p(p);p p}, tap do end +!!! test_ruby_bug_11989:6069 +p <<~"E" + x\n y +E +!!! test_ruby_bug_11990:6078 +p <<~E " y" + x +E +!!! test_ruby_bug_12073:6089 +a = 1; a b: 1 +!!! test_ruby_bug_12073:6102 +def foo raise; raise A::B, ''; end +!!! test_ruby_bug_12402:6116 +foo = raise(bar) rescue nil +!!! test_ruby_bug_12402:6127 +foo += raise(bar) rescue nil +!!! test_ruby_bug_12402:6139 +foo[0] += raise(bar) rescue nil +!!! test_ruby_bug_12402:6153 +foo.m += raise(bar) rescue nil +!!! test_ruby_bug_12402:6166 +foo::m += raise(bar) rescue nil +!!! test_ruby_bug_12402:6179 +foo.C += raise(bar) rescue nil +!!! test_ruby_bug_12402:6192 +foo::C ||= raise(bar) rescue nil +!!! test_ruby_bug_12402:6205 +foo = raise bar rescue nil +!!! test_ruby_bug_12402:6216 +foo += raise bar rescue nil +!!! test_ruby_bug_12402:6228 +foo[0] += raise bar rescue nil +!!! test_ruby_bug_12402:6242 +foo.m += raise bar rescue nil +!!! test_ruby_bug_12402:6255 +foo::m += raise bar rescue nil +!!! test_ruby_bug_12402:6268 +foo.C += raise bar rescue nil +!!! test_ruby_bug_12402:6281 +foo::C ||= raise bar rescue nil +!!! test_ruby_bug_12669:6296 +a = b = raise :x +!!! test_ruby_bug_12669:6305 +a += b = raise :x +!!! test_ruby_bug_12669:6314 +a = b += raise :x +!!! test_ruby_bug_12669:6323 +a += b += raise :x +!!! test_ruby_bug_12686:6334 +f (g rescue nil) +!!! test_ruby_bug_13547:7018 +meth[] {} +!!! test_ruby_bug_14690:7250 +let () { m(a) do; end } +!!! test_ruby_bug_15789:7622 +m ->(a = ->{_1}) {a} +!!! test_ruby_bug_15789:7636 +m ->(a: ->{_1}) {a} +!!! test_ruby_bug_9669:5889 +def a b: +return +end +!!! test_ruby_bug_9669:5895 +o = { +a: +1 +} +!!! test_sclass:1884 +class << foo; nil; end +!!! test_self:952 +self +!!! test_send_attr_asgn:3528 +foo.a = 1 +!!! test_send_attr_asgn:3536 +foo::a = 1 +!!! test_send_attr_asgn:3544 +foo.A = 1 +!!! test_send_attr_asgn:3552 +foo::A = 1 +!!! test_send_attr_asgn_conditional:3751 +a&.b = 1 +!!! test_send_binary_op:3308 +foo + 1 +!!! test_send_binary_op:3314 +foo - 1 +!!! test_send_binary_op:3318 +foo * 1 +!!! test_send_binary_op:3322 +foo / 1 +!!! test_send_binary_op:3326 +foo % 1 +!!! test_send_binary_op:3330 +foo ** 1 +!!! test_send_binary_op:3334 +foo | 1 +!!! test_send_binary_op:3338 +foo ^ 1 +!!! test_send_binary_op:3342 +foo & 1 +!!! test_send_binary_op:3346 +foo <=> 1 +!!! test_send_binary_op:3350 +foo < 1 +!!! test_send_binary_op:3354 +foo <= 1 +!!! test_send_binary_op:3358 +foo > 1 +!!! test_send_binary_op:3362 +foo >= 1 +!!! test_send_binary_op:3366 +foo == 1 +!!! test_send_binary_op:3376 +foo != 1 +!!! test_send_binary_op:3382 +foo === 1 +!!! test_send_binary_op:3386 +foo =~ 1 +!!! test_send_binary_op:3396 +foo !~ 1 +!!! test_send_binary_op:3402 +foo << 1 +!!! test_send_binary_op:3406 +foo >> 1 +!!! test_send_block_chain_cmd:3201 +meth 1 do end.fun bar +!!! test_send_block_chain_cmd:3212 +meth 1 do end.fun(bar) +!!! test_send_block_chain_cmd:3225 +meth 1 do end::fun bar +!!! test_send_block_chain_cmd:3236 +meth 1 do end::fun(bar) +!!! test_send_block_chain_cmd:3249 +meth 1 do end.fun bar do end +!!! test_send_block_chain_cmd:3261 +meth 1 do end.fun(bar) {} +!!! test_send_block_chain_cmd:3273 +meth 1 do end.fun {} +!!! test_send_block_conditional:3759 +foo&.bar {} +!!! test_send_call:3721 +foo.(1) +!!! test_send_call:3731 +foo::(1) +!!! test_send_conditional:3743 +a&.b +!!! test_send_index:3562 +foo[1, 2] +!!! test_send_index_asgn:3591 +foo[1, 2] = 3 +!!! test_send_index_asgn_legacy:3603 +foo[1, 2] = 3 +!!! test_send_index_cmd:3584 +foo[m bar] +!!! test_send_index_legacy:3573 +foo[1, 2] +!!! test_send_lambda:3615 +->{ } +!!! test_send_lambda:3625 +-> * { } +!!! test_send_lambda:3636 +-> do end +!!! test_send_lambda_args:3648 +->(a) { } +!!! test_send_lambda_args:3662 +-> (a) { } +!!! test_send_lambda_args_noparen:3686 +-> a: 1 { } +!!! test_send_lambda_args_noparen:3695 +-> a: { } +!!! test_send_lambda_args_shadow:3673 +->(a; foo, bar) { } +!!! test_send_lambda_legacy:3707 +->{ } +!!! test_send_op_asgn_conditional:3770 +a&.b &&= 1 +!!! test_send_plain:3105 +foo.fun +!!! test_send_plain:3112 +foo::fun +!!! test_send_plain:3119 +foo::Fun() +!!! test_send_plain_cmd:3128 +foo.fun bar +!!! test_send_plain_cmd:3135 +foo::fun bar +!!! test_send_plain_cmd:3142 +foo::Fun bar +!!! test_send_self:3044 +fun +!!! test_send_self:3050 +fun! +!!! test_send_self:3056 +fun(1) +!!! test_send_self_block:3066 +fun { } +!!! test_send_self_block:3070 +fun() { } +!!! test_send_self_block:3074 +fun(1) { } +!!! test_send_self_block:3078 +fun do end +!!! test_send_unary_op:3412 +-foo +!!! test_send_unary_op:3418 ++foo +!!! test_send_unary_op:3422 +~foo +!!! test_slash_newline_in_heredocs:7186 +<<~E + 1 \ + 2 + 3 +E +!!! test_slash_newline_in_heredocs:7194 +<<-E + 1 \ + 2 + 3 +E +!!! test_space_args_arg:4132 +fun (1) +!!! test_space_args_arg_block:4146 +fun (1) {} +!!! test_space_args_arg_block:4160 +foo.fun (1) {} +!!! test_space_args_arg_block:4176 +foo::fun (1) {} +!!! test_space_args_arg_call:4198 +fun (1).to_i +!!! test_space_args_arg_newline:4138 +fun (1 +) +!!! test_space_args_block:4430 +fun () {} +!!! test_space_args_cmd:4125 +fun (f bar) +!!! test_string___FILE__:241 +__FILE__ +!!! test_string_concat:226 +"foo#@a" "bar" +!!! test_string_dvar:215 +"#@a #@@a #$a" +!!! test_string_interp:200 +"foo#{bar}baz" +!!! test_string_plain:184 +'foobar' +!!! test_string_plain:191 +%q(foobar) +!!! test_super:3807 +super(foo) +!!! test_super:3815 +super foo +!!! test_super:3821 +super() +!!! test_super_block:3839 +super foo, bar do end +!!! test_super_block:3845 +super do end +!!! test_symbol_interp:484 +:"foo#{bar}baz" +!!! test_symbol_plain:469 +:foo +!!! test_symbol_plain:475 +:'foo' +!!! test_ternary:4605 +foo ? 1 : 2 +!!! test_ternary_ambiguous_symbol:4614 +t=1;(foo)?t:T +!!! test_trailing_forward_arg:8022 +def foo(a, b, ...); bar(a, 42, ...); end +!!! test_true:89 +true +!!! test_unary_num_pow_precedence:3505 ++2.0 ** 10 +!!! test_unary_num_pow_precedence:3512 +-2 ** 10 +!!! test_unary_num_pow_precedence:3519 +-2.0 ** 10 +!!! test_undef:2003 +undef foo, :bar, :"foo#{1}" +!!! test_unless:4529 +unless foo then bar; end +!!! test_unless:4537 +unless foo; bar; end +!!! test_unless_else:4573 +unless foo then bar; else baz; end +!!! test_unless_else:4582 +unless foo; bar; else baz; end +!!! test_unless_mod:4546 +bar unless foo +!!! test_until:4948 +until foo do meth end +!!! test_until:4955 +until foo; meth end +!!! test_until_mod:4963 +meth until foo +!!! test_until_post:4978 +begin meth end until foo +!!! test_var_and_asgn:1714 +a &&= 1 +!!! test_var_op_asgn:1498 +a += 1 +!!! test_var_op_asgn:1504 +@a |= 1 +!!! test_var_op_asgn:1510 +@@var |= 10 +!!! test_var_op_asgn:1514 +def a; @@var |= 10; end +!!! test_var_op_asgn_cmd:1521 +foo += m foo +!!! test_var_or_asgn:1706 +a ||= 1 +!!! test_when_multi:4895 +case foo; when 'bar', 'baz'; bar; end +!!! test_when_splat:4904 +case foo; when 1, *baz; bar; when *foo; end +!!! test_when_then:4883 +case foo; when 'bar' then bar; end +!!! test_while:4924 +while foo do meth end +!!! test_while:4932 +while foo; meth end +!!! test_while_mod:4941 +meth while foo +!!! test_while_post:4970 +begin meth end while foo +!!! test_xstring_interp:524 +`foo#{bar}baz` +!!! test_xstring_plain:515 +`foobar` +!!! test_yield:3855 +yield(foo) +!!! test_yield:3863 +yield foo +!!! test_yield:3869 +yield() +!!! test_yield:3877 +yield +!!! test_zsuper:3831 +super diff --git a/test/translation/parser_test.rb b/test/translation/parser_test.rb new file mode 100644 index 00000000..576d4ac1 --- /dev/null +++ b/test/translation/parser_test.rb @@ -0,0 +1,168 @@ +# frozen_string_literal: true + +require_relative "../test_helper" +require "parser/current" + +Parser::Builders::Default.modernize + +module SyntaxTree + module Translation + class ParserTest < Minitest::Test + known_failures = [ + # I think this may be a bug in the parser gem's precedence calculation. + # Unary plus appears to be parsed as part of the number literal in + # CRuby, but parser is parsing it as a separate operator. + "test_unary_num_pow_precedence:3505", + + # Not much to be done about this. Basically, regular expressions with + # named capture groups that use the =~ operator inject local variables + # into the current scope. In the parser gem, it detects this and changes + # future references to that name to be a local variable instead of a + # potential method call. CRuby does not do this. + "test_lvar_injecting_match:3778", + + # This is failing because CRuby is not marking values captured in hash + # patterns as local variables, while the parser gem is. + "test_pattern_matching_hash:8971", + + # This is not actually allowed in the CRuby parser but the parser gem + # thinks it is allowed. + "test_pattern_matching_hash_with_string_keys:9016", + "test_pattern_matching_hash_with_string_keys:9027", + "test_pattern_matching_hash_with_string_keys:9038", + "test_pattern_matching_hash_with_string_keys:9060", + "test_pattern_matching_hash_with_string_keys:9071", + "test_pattern_matching_hash_with_string_keys:9082", + + # This happens with pattern matching where you're matching a literal + # value inside parentheses, which doesn't really do anything. Ripper + # doesn't capture that this value is inside a parentheses, so it's hard + # to translate properly. + "test_pattern_matching_expr_in_paren:9206", + + # These are also failing because of CRuby not marking values captured in + # hash patterns as local variables. + "test_pattern_matching_single_line_allowed_omission_of_parentheses:*", + + # I'm not even sure what this is testing, because the code is invalid in + # CRuby. + "test_control_meta_escape_chars_in_regexp__since_31:*", + ] + + todo_failures = [ + "test_dedenting_heredoc:334", + "test_dedenting_heredoc:390", + "test_dedenting_heredoc:399", + "test_slash_newline_in_heredocs:7194", + "test_parser_slash_slash_n_escaping_in_literals:*", + "test_cond_match_current_line:4801", + "test_forwarded_restarg:*", + "test_forwarded_kwrestarg:*", + "test_forwarded_argument_with_restarg:*", + "test_forwarded_argument_with_kwrestarg:*" + ] + + current_version = RUBY_VERSION.split(".")[0..1].join(".") + + if current_version <= "2.7" + # I'm not sure why this is failing on 2.7.0, but we'll turn it off for + # now until we have more time to investigate. + todo_failures.push( + "test_pattern_matching_hash:*", + "test_pattern_matching_single_line:9552" + ) + end + + if current_version <= "3.0" + # In < 3.0, there are some changes to the way the parser gem handles + # forwarded args. We should eventually support this, but for now we're + # going to mark them as todo. + todo_failures.push( + "test_forward_arg:*", + "test_forward_args_legacy:*", + "test_endless_method_forwarded_args_legacy:*", + "test_trailing_forward_arg:*", + "test_forward_arg_with_open_args:10770", + ) + end + + if current_version == "3.1" + # This test actually fails on 3.1.0, even though it's marked as being + # since 3.1. So we're going to skip this test on 3.1, but leave it in + # for other versions. + known_failures.push( + "test_multiple_pattern_matches:11086", + "test_multiple_pattern_matches:11102" + ) + end + + if current_version < "3.2" || RUBY_ENGINE == "truffleruby" + known_failures.push( + "test_if_while_after_class__since_32:11004", + "test_if_while_after_class__since_32:11014", + "test_newline_in_hash_argument:11057" + ) + end + + all_failures = known_failures + todo_failures + + File + .foreach(File.expand_path("parser.txt", __dir__), chomp: true) + .slice_before { |line| line.start_with?("!!!") } + .each do |(prefix, *lines)| + name = prefix[4..] + next if all_failures.any? { |pattern| File.fnmatch?(pattern, name) } + + define_method(name) { assert_parses(lines.join("\n")) } + end + + private + + def assert_parses(source) + parser = ::Parser::CurrentRuby.default_parser + parser.diagnostics.consumer = ->(*) {} + + buffer = ::Parser::Source::Buffer.new("(string)", 1) + buffer.source = source + + expected = + begin + parser.parse(buffer) + rescue ::Parser::SyntaxError + # We can get a syntax error if we're parsing a fixture that was + # designed for a later Ruby version but we're running an earlier + # Ruby version. In this case we can just return early from the test. + end + + return if expected.nil? + node = SyntaxTree.parse(source) + assert_equal expected, SyntaxTree::Translation.to_parser(node, buffer) + end + end + end +end + +if ENV["PARSER_LOCATION"] + # Modify the source map == check so that it doesn't check against the node + # itself so we don't get into a recursive loop. + Parser::Source::Map.prepend( + Module.new do + def ==(other) + self.class == other.class && + (instance_variables - %i[@node]).map do |ivar| + instance_variable_get(ivar) == other.instance_variable_get(ivar) + end.reduce(:&) + end + end + ) + + # Next, ensure that we're comparing the nodes and also comparing the source + # ranges so that we're getting all of the necessary information. + Parser::AST::Node.prepend( + Module.new do + def ==(other) + super && (location == other.location) + end + end + ) +end From 946bc61c485c9fa325a7df60821c1815e76e995c Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Wed, 8 Feb 2023 14:20:06 -0500 Subject: [PATCH 32/58] Don't rely on parent being present --- lib/syntax_tree/node.rb | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/lib/syntax_tree/node.rb b/lib/syntax_tree/node.rb index ff8ee95a..70fbdf4c 100644 --- a/lib/syntax_tree/node.rb +++ b/lib/syntax_tree/node.rb @@ -1539,7 +1539,7 @@ def ===(other) private def format_contents(q) - q.parent.format_key(q, key) + (q.parent || HashKeyFormatter::Identity.new).format_key(q, key) return unless value if key.comments.empty? && AssignFormatting.skip_indent?(value) @@ -1756,6 +1756,20 @@ def format_key(q, key) end end + # When formatting a single assoc node without the context of the parent + # hash, this formatter is used. It uses whatever is present in the node, + # because there is nothing to be consistent with. + class Identity + def format_key(q, key) + if key.is_a?(Label) + q.format(key) + else + q.format(key) + q.text(" =>") + end + end + end + def self.for(container) labels = container.assocs.all? do |assoc| @@ -4328,7 +4342,7 @@ def format(q) # are no parentheses around the arguments to that command, so we need to # break the block. case q.parent - when Command, CommandCall + when nil, Command, CommandCall q.break_parent format_break(q, break_opening, break_closing) return @@ -4382,7 +4396,7 @@ def unchangeable_bounds?(q) # If we're a sibling of a control-flow keyword, then we're going to have to # use the do..end bounds. def forced_do_end_bounds?(q) - case q.parent.call + case q.parent&.call when Break, Next, ReturnNode, Super true else From 2119110732d4dcd426a4caf183c142b75d96eb27 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Wed, 8 Feb 2023 15:41:19 -0500 Subject: [PATCH 33/58] Do not rely on fiddle being present --- lib/syntax_tree.rb | 1 - lib/syntax_tree/yarv/instruction_sequence.rb | 38 ++++++++++++-------- 2 files changed, 23 insertions(+), 16 deletions(-) diff --git a/lib/syntax_tree.rb b/lib/syntax_tree.rb index cd1f1ce4..e5bc5ab5 100644 --- a/lib/syntax_tree.rb +++ b/lib/syntax_tree.rb @@ -2,7 +2,6 @@ require "cgi" require "etc" -require "fiddle" require "json" require "pp" require "prettier_print" diff --git a/lib/syntax_tree/yarv/instruction_sequence.rb b/lib/syntax_tree/yarv/instruction_sequence.rb index 821738c9..45b543e6 100644 --- a/lib/syntax_tree/yarv/instruction_sequence.rb +++ b/lib/syntax_tree/yarv/instruction_sequence.rb @@ -7,6 +7,28 @@ module YARV # list of instructions along with the metadata pertaining to them. It also # functions as a builder for the instruction sequence. class InstructionSequence + # This provides a handle to the rb_iseq_load function, which allows you + # to pass a serialized iseq to Ruby and have it return a + # RubyVM::InstructionSequence object. + def self.iseq_load(iseq) + require "fiddle" + + @iseq_load_function ||= + Fiddle::Function.new( + Fiddle::Handle::DEFAULT["rb_iseq_load"], + [Fiddle::TYPE_VOIDP] * 3, + Fiddle::TYPE_VOIDP + ) + + Fiddle.dlunwrap(@iseq_load_function.call(Fiddle.dlwrap(iseq), 0, nil)) + rescue LoadError + raise "Could not load the Fiddle library" + rescue NameError + raise "Unable to find rb_iseq_load" + rescue Fiddle::DLError + raise "Unable to perform a dynamic load" + end + # When the list of instructions is first being created, it's stored as a # linked list. This is to make it easier to perform peephole optimizations # and other transformations like instruction specialization. @@ -60,19 +82,6 @@ def push(instruction) MAGIC = "YARVInstructionSequence/SimpleDataFormat" - # This provides a handle to the rb_iseq_load function, which allows you to - # pass a serialized iseq to Ruby and have it return a - # RubyVM::InstructionSequence object. - ISEQ_LOAD = - begin - Fiddle::Function.new( - Fiddle::Handle::DEFAULT["rb_iseq_load"], - [Fiddle::TYPE_VOIDP] * 3, - Fiddle::TYPE_VOIDP - ) - rescue NameError, Fiddle::DLError - end - # This object is used to track the size of the stack at any given time. It # is effectively a mini symbolic interpreter. It's necessary because when # instruction sequences get serialized they include a :stack_max field on @@ -221,8 +230,7 @@ def length end def eval - raise "Unsupported platform" if ISEQ_LOAD.nil? - Fiddle.dlunwrap(ISEQ_LOAD.call(Fiddle.dlwrap(to_a), 0, nil)).eval + InstructionSequence.iseq_load(to_a).eval end def to_a From 4f76ffab5d742c42ab982a77b696256c8ffb9090 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Wed, 8 Feb 2023 15:47:35 -0500 Subject: [PATCH 34/58] Strip out mspec for now --- .gitmodules | 6 ------ Rakefile | 1 - spec/mspec | 1 - spec/ruby | 1 - tasks/spec.rake | 10 ---------- 5 files changed, 19 deletions(-) delete mode 100644 .gitmodules delete mode 160000 spec/mspec delete mode 160000 spec/ruby delete mode 100644 tasks/spec.rake diff --git a/.gitmodules b/.gitmodules deleted file mode 100644 index f5477ea3..00000000 --- a/.gitmodules +++ /dev/null @@ -1,6 +0,0 @@ -[submodule "mspec"] - path = spec/mspec - url = git@github.com:ruby/mspec.git -[submodule "spec"] - path = spec/ruby - url = git@github.com:ruby/spec.git diff --git a/Rakefile b/Rakefile index aa8d29f6..22d7d1fe 100644 --- a/Rakefile +++ b/Rakefile @@ -8,7 +8,6 @@ Rake.add_rakelib "tasks" Rake::TestTask.new(:test) do |t| t.libs << "test" - t.libs << "test/suites" t.libs << "lib" t.test_files = FileList["test/**/*_test.rb"] end diff --git a/spec/mspec b/spec/mspec deleted file mode 160000 index 4877d58d..00000000 --- a/spec/mspec +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 4877d58dff577641bc1ecd1bf3d3c3daa93b423f diff --git a/spec/ruby b/spec/ruby deleted file mode 160000 index 71873ae4..00000000 --- a/spec/ruby +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 71873ae4421f5b551a5af0f3427e901414736835 diff --git a/tasks/spec.rake b/tasks/spec.rake deleted file mode 100644 index c361fe8e..00000000 --- a/tasks/spec.rake +++ /dev/null @@ -1,10 +0,0 @@ -# frozen_string_literal: true - -desc "Run mspec tests using YARV emulation" -task :spec do - specs = File.expand_path("../spec/ruby/language/**/*_spec.rb", __dir__) - - Dir[specs].each do |filepath| - sh "exe/yarv ./spec/mspec/bin/mspec-tag #{filepath}" - end -end From f44046d115b04b1c442634cad84be8a8c6e01afd Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Wed, 8 Feb 2023 15:49:39 -0500 Subject: [PATCH 35/58] Update rubocop version --- .rubocop.yml | 3 +++ Gemfile.lock | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/.rubocop.yml b/.rubocop.yml index 62e78453..33636c44 100644 --- a/.rubocop.yml +++ b/.rubocop.yml @@ -30,6 +30,9 @@ Lint/AmbiguousRange: Lint/BooleanSymbol: Enabled: false +Lint/Debugger: + Enabled: false + Lint/DuplicateBranch: Enabled: false diff --git a/Gemfile.lock b/Gemfile.lock index 799bd891..46111ea4 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -19,7 +19,7 @@ GEM rake (13.0.6) regexp_parser (2.6.2) rexml (3.2.5) - rubocop (1.44.1) + rubocop (1.45.1) json (~> 2.3) parallel (~> 1.10) parser (>= 3.2.0.0) From 72c4f5c9c25d9e34b2e09b66c439c018bcf9a571 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Thu, 9 Feb 2023 11:14:30 -0500 Subject: [PATCH 36/58] Provide a reflection API --- bin/console | 1 + lib/syntax_tree/node.rb | 133 ++++++++++---------- lib/syntax_tree/reflection.rb | 224 ++++++++++++++++++++++++++++++++++ 3 files changed, 290 insertions(+), 68 deletions(-) create mode 100644 lib/syntax_tree/reflection.rb diff --git a/bin/console b/bin/console index 1c18bd62..6f35f1ec 100755 --- a/bin/console +++ b/bin/console @@ -3,6 +3,7 @@ require "bundler/setup" require "syntax_tree" +require "syntax_tree/reflection" require "irb" IRB.start(__FILE__) diff --git a/lib/syntax_tree/node.rb b/lib/syntax_tree/node.rb index 70fbdf4c..4ac5aa24 100644 --- a/lib/syntax_tree/node.rb +++ b/lib/syntax_tree/node.rb @@ -557,7 +557,7 @@ def var_alias? # collection[] # class ARef < Node - # [untyped] the value being indexed + # [Node] the value being indexed attr_reader :collection # [nil | Args] the value being passed within the brackets @@ -635,7 +635,7 @@ def ===(other) # collection[index] = value # class ARefField < Node - # [untyped] the value being indexed + # [Node] the value being indexed attr_reader :collection # [nil | Args] the value being passed within the brackets @@ -810,7 +810,7 @@ def trailing_comma? # method(first, second, third) # class Args < Node - # [Array[ untyped ]] the arguments that this node wraps + # [Array[ Node ]] the arguments that this node wraps attr_reader :parts # [Array[ Comment | EmbDoc ]] the comments attached to this node @@ -876,7 +876,7 @@ def arity # method(&expression) # class ArgBlock < Node - # [nil | untyped] the expression being turned into a block + # [nil | Node] the expression being turned into a block attr_reader :value # [Array[ Comment | EmbDoc ]] the comments attached to this node @@ -928,7 +928,7 @@ def ===(other) # method(*arguments) # class ArgStar < Node - # [nil | untyped] the expression being splatted + # [nil | Node] the expression being splatted attr_reader :value # [Array[ Comment | EmbDoc ]] the comments attached to this node @@ -1289,7 +1289,7 @@ def format(q) # [nil | VarRef] the optional constant wrapper attr_reader :constant - # [Array[ untyped ]] the regular positional arguments that this array + # [Array[ Node ]] the regular positional arguments that this array # pattern is matching against attr_reader :requireds @@ -1297,7 +1297,7 @@ def format(q) # positional arguments attr_reader :rest - # [Array[ untyped ]] the list of positional arguments occurring after the + # [Array[ Node ]] the list of positional arguments occurring after the # optional star if there is one attr_reader :posts @@ -1407,7 +1407,7 @@ class Assign < Node # to assign the result of the expression to attr_reader :target - # [untyped] the expression to be assigned + # [Node] the expression to be assigned attr_reader :value # [Array[ Comment | EmbDoc ]] the comments attached to this node @@ -1482,10 +1482,10 @@ def skip_indent? # # In the above example, the would be two Assoc nodes. class Assoc < Node - # [untyped] the key of this pair + # [Node] the key of this pair attr_reader :key - # [untyped] the value of this pair + # [Node] the value of this pair attr_reader :value # [Array[ Comment | EmbDoc ]] the comments attached to this node @@ -1560,7 +1560,7 @@ def format_contents(q) # { **pairs } # class AssocSplat < Node - # [nil | untyped] the expression that is being splatted + # [nil | Node] the expression that is being splatted attr_reader :value # [Array[ Comment | EmbDoc ]] the comments attached to this node @@ -1924,7 +1924,7 @@ def ===(other) # end # class PinnedBegin < Node - # [untyped] the expression being pinned + # [Node] the expression being pinned attr_reader :statement # [Array[ Comment | EmbDoc ]] the comments attached to this node @@ -2005,13 +2005,13 @@ def name } end - # [untyped] the left-hand side of the expression + # [Node] the left-hand side of the expression attr_reader :left # [Symbol] the operator used between the two expressions attr_reader :operator - # [untyped] the right-hand side of the expression + # [Node] the right-hand side of the expression attr_reader :right # [Array[ Comment | EmbDoc ]] the comments attached to this node @@ -2670,7 +2670,7 @@ def format(q) # Of course there are a lot of caveats to that, including trailing operators # when necessary, where comments are places, how blocks are aligned, etc. class CallChainFormatter - # [Call | MethodAddBlock] the top of the call chain + # [CallNode | MethodAddBlock] the top of the call chain attr_reader :node def initialize(node) @@ -2891,7 +2891,7 @@ def format_child( # receiver.message # class CallNode < Node - # [nil | untyped] the receiver of the method call + # [nil | Node] the receiver of the method call attr_reader :receiver # [nil | :"::" | Op | Period] the operator being used to send the message @@ -3067,7 +3067,7 @@ class Case < Node # [Kw] the keyword that opens this expression attr_reader :keyword - # [nil | untyped] optional value being switched on + # [nil | Node] optional value being switched on attr_reader :value # [In | When] the next clause in the chain @@ -3146,14 +3146,14 @@ def ===(other) # value => pattern # class RAssign < Node - # [untyped] the left-hand expression + # [Node] the left-hand expression attr_reader :value # [Kw | Op] the operator being used to match against the pattern, which is # either => or in attr_reader :operator - # [untyped] the pattern on the right-hand side of the expression + # [Node] the pattern on the right-hand side of the expression attr_reader :pattern # [Array[ Comment | EmbDoc ]] the comments attached to this node @@ -3264,7 +3264,7 @@ class ClassDeclaration < Node # defined attr_reader :constant - # [nil | untyped] the optional superclass declaration + # [nil | Node] the optional superclass declaration attr_reader :superclass # [BodyStmt] the expressions to execute within the context of the class @@ -3402,7 +3402,7 @@ class Command < Node # [Args] the arguments being sent with the message attr_reader :arguments - # [nil | Block] the optional block being passed to the method + # [nil | BlockNode] the optional block being passed to the method attr_reader :block # [Array[ Comment | EmbDoc ]] the comments attached to this node @@ -3508,7 +3508,7 @@ def align(q, node, &block) # object.method argument # class CommandCall < Node - # [untyped] the receiver of the message + # [Node] the receiver of the message attr_reader :receiver # [:"::" | Op | Period] the operator used to send the message @@ -3520,7 +3520,7 @@ class CommandCall < Node # [nil | Args] the arguments going along with the message attr_reader :arguments - # [nil | Block] the block associated with this method call + # [nil | BlockNode] the block associated with this method call attr_reader :block # [Array[ Comment | EmbDoc ]] the comments attached to this node @@ -3806,7 +3806,7 @@ def ===(other) # object::Const = value # class ConstPathField < Node - # [untyped] the source of the constant + # [Node] the source of the constant attr_reader :parent # [Const] the constant itself @@ -3870,7 +3870,7 @@ def ===(other) # object::Const # class ConstPathRef < Node - # [untyped] the source of the constant + # [Node] the source of the constant attr_reader :parent # [Const] the constant itself @@ -4039,7 +4039,7 @@ def ===(other) # def object.method(param) result end # class DefNode < Node - # [nil | untyped] the target where the method is being defined + # [nil | Node] the target where the method is being defined attr_reader :target # [nil | Op | Period] the operator being used to declare the method @@ -4051,7 +4051,7 @@ class DefNode < Node # [nil | Params | Paren] the parameter declaration for the method attr_reader :params - # [BodyStmt | untyped] the expressions to be executed by the method + # [BodyStmt | Node] the expressions to be executed by the method attr_reader :bodystmt # [Array[ Comment | EmbDoc ]] the comments attached to this node @@ -4185,7 +4185,7 @@ def arity # defined?(variable) # class Defined < Node - # [untyped] the value being sent to the keyword + # [Node] the value being sent to the keyword attr_reader :value # [Array[ Comment | EmbDoc ]] the comments attached to this node @@ -4476,13 +4476,13 @@ def format_flat(q, flat_opening, flat_closing) # # One of the sides of the expression may be nil, but not both. class RangeNode < Node - # [nil | untyped] the left side of the expression + # [nil | Node] the left side of the expression attr_reader :left # [Op] the operator used for this range attr_reader :operator - # [nil | untyped] the right side of the expression + # [nil | Node] the right side of the expression attr_reader :right # [Array[ Comment | EmbDoc ]] the comments attached to this node @@ -4801,7 +4801,7 @@ def ===(other) # end # class Elsif < Node - # [untyped] the expression to be checked + # [Node] the expression to be checked attr_reader :predicate # [Statements] the expressions to be executed @@ -5227,7 +5227,7 @@ def ===(other) # object.variable = value # class Field < Node - # [untyped] the parent object that owns the field being assigned + # [Node] the parent object that owns the field being assigned attr_reader :parent # [:"::" | Op | Period] the operator being used for the assignment @@ -5353,13 +5353,13 @@ def ===(other) # end # class FndPtn < Node - # [nil | untyped] the optional constant wrapper + # [nil | Node] the optional constant wrapper attr_reader :constant # [VarField] the splat on the left-hand side attr_reader :left - # [Array[ untyped ]] the list of positional expressions in the pattern that + # [Array[ Node ]] the list of positional expressions in the pattern that # are being matched attr_reader :values @@ -5455,7 +5455,7 @@ class For < Node # pull values out of the object being enumerated attr_reader :index - # [untyped] the object being enumerated in the loop + # [Node] the object being enumerated in the loop attr_reader :collection # [Statements] the statements to be executed @@ -5934,7 +5934,7 @@ class KeywordFormatter # [Label] the keyword being used attr_reader :key - # [untyped] the optional value for the keyword + # [Node] the optional value for the keyword attr_reader :value def initialize(key, value) @@ -5975,10 +5975,10 @@ def format(q) end end - # [nil | untyped] the optional constant wrapper + # [nil | Node] the optional constant wrapper attr_reader :constant - # [Array[ [Label, untyped] ]] the set of tuples representing the keywords + # [Array[ [Label, Node] ]] the set of tuples representing the keywords # that should be matched against in the pattern attr_reader :keywords @@ -6404,7 +6404,7 @@ def contains_conditional? # end # class IfNode < Node - # [untyped] the expression to be checked + # [Node] the expression to be checked attr_reader :predicate # [Statements] the expressions to be executed @@ -6477,13 +6477,13 @@ def modifier? # predicate ? truthy : falsy # class IfOp < Node - # [untyped] the expression to be checked + # [Node] the expression to be checked attr_reader :predicate - # [untyped] the expression to be executed if the predicate is truthy + # [Node] the expression to be executed if the predicate is truthy attr_reader :truthy - # [untyped] the expression to be executed if the predicate is falsy + # [Node] the expression to be executed if the predicate is falsy attr_reader :falsy # [Array[ Comment | EmbDoc ]] the comments attached to this node @@ -6667,7 +6667,7 @@ def ===(other) # end # class In < Node - # [untyped] the pattern to check against + # [Node] the pattern to check against attr_reader :pattern # [Statements] the expressions to execute if the pattern matched @@ -7450,7 +7450,7 @@ class MAssign < Node # [MLHS | MLHSParen] the target of the multiple assignment attr_reader :target - # [untyped] the value being assigned + # [Node] the value being assigned attr_reader :value # [Array[ Comment | EmbDoc ]] the comments attached to this node @@ -7510,10 +7510,10 @@ def ===(other) # method {} # class MethodAddBlock < Node - # [Call | Command | CommandCall] the method call + # [CallNode | Command | CommandCall] the method call attr_reader :call - # [Block] the block being sent with the method call + # [BlockNode] the block being sent with the method call attr_reader :block # [Array[ Comment | EmbDoc ]] the comments attached to this node @@ -7585,7 +7585,7 @@ def format_contents(q) # first, second, third = value # class MLHS < Node - # Array[ARefField | ArgStar | Field | Ident | MLHSParen | VarField] the + # [Array[ARefField | ArgStar | Field | Ident | MLHSParen | VarField]] the # parts of the left-hand side of a multiple assignment attr_reader :parts @@ -7812,7 +7812,7 @@ def format_declaration(q) # values = first, second, third # class MRHS < Node - # Array[untyped] the parts that are being assigned + # [Array[Node]] the parts that are being assigned attr_reader :parts # [Array[ Comment | EmbDoc ]] the comments attached to this node @@ -7988,7 +7988,7 @@ class OpAssign < Node # [Op] the operator being used for the assignment attr_reader :operator - # [untyped] the expression to be assigned + # [Node] the expression to be assigned attr_reader :value # [Array[ Comment | EmbDoc ]] the comments attached to this node @@ -8145,7 +8145,7 @@ class OptionalFormatter # [Ident] the name of the parameter attr_reader :name - # [untyped] the value of the parameter + # [Node] the value of the parameter attr_reader :value def initialize(name, value) @@ -8170,7 +8170,7 @@ class KeywordFormatter # [Ident] the name of the parameter attr_reader :name - # [nil | untyped] the value of the parameter + # [nil | Node] the value of the parameter attr_reader :value def initialize(name, value) @@ -8214,7 +8214,7 @@ def format(q) # [Array[ Ident ]] any required parameters attr_reader :requireds - # [Array[ [ Ident, untyped ] ]] any optional parameters and their default + # [Array[ [ Ident, Node ] ]] any optional parameters and their default # values attr_reader :optionals @@ -8226,7 +8226,7 @@ def format(q) # parameter attr_reader :posts - # [Array[ [ Ident, nil | untyped ] ]] any keyword parameters and their + # [Array[ [ Ident, nil | Node ] ]] any keyword parameters and their # optional default values attr_reader :keywords @@ -8419,7 +8419,7 @@ class Paren < Node # [LParen] the left parenthesis that opened this statement attr_reader :lparen - # [nil | untyped] the expression inside the parentheses + # [nil | Node] the expression inside the parentheses attr_reader :contents # [Array[ Comment | EmbDoc ]] the comments attached to this node @@ -9268,7 +9268,7 @@ def ambiguous?(q) # end # class RescueEx < Node - # [untyped] the list of exceptions being rescued + # [Node] the list of exceptions being rescued attr_reader :exceptions # [nil | Field | VarField] the expression being used to capture the raised @@ -9466,10 +9466,10 @@ def ===(other) # expression rescue value # class RescueMod < Node - # [untyped] the expression to execute + # [Node] the expression to execute attr_reader :statement - # [untyped] the value to use if the executed expression raises an error + # [Node] the value to use if the executed expression raises an error attr_reader :value # [Array[ Comment | EmbDoc ]] the comments attached to this node @@ -9728,7 +9728,7 @@ def ===(other) # end # class SClass < Node - # [untyped] the target of the singleton class to enter + # [Node] the target of the singleton class to enter attr_reader :target # [BodyStmt] the expressions to be executed @@ -9802,10 +9802,10 @@ def ===(other) # propagate that onto void_stmt nodes inside the stmts in order to make sure # all comments get printed appropriately. class Statements < Node - # [SyntaxTree] the parser that is generating this node + # [Parser] the parser that is generating this node attr_reader :parser - # [Array[ untyped ]] the list of expressions contained within this node + # [Array[ Node ]] the list of expressions contained within this node attr_reader :body # [Array[ Comment | EmbDoc ]] the comments attached to this node @@ -10985,7 +10985,7 @@ def ===(other) # not value # class Not < Node - # [nil | untyped] the statement on which to operate + # [nil | Node] the statement on which to operate attr_reader :statement # [boolean] whether or not parentheses were used @@ -11072,7 +11072,7 @@ class Unary < Node # [String] the operator being used attr_reader :operator - # [untyped] the statement on which to operate + # [Node] the statement on which to operate attr_reader :statement # [Array[ Comment | EmbDoc ]] the comments attached to this node @@ -11216,7 +11216,7 @@ def ===(other) # end # class UnlessNode < Node - # [untyped] the expression to be checked + # [Node] the expression to be checked attr_reader :predicate # [Statements] the expressions to be executed @@ -11362,7 +11362,7 @@ def format_break(q) # end # class UntilNode < Node - # [untyped] the expression to be checked + # [Node] the expression to be checked attr_reader :predicate # [Statements] the expressions to be executed @@ -11683,9 +11683,6 @@ def arity # ;; # class VoidStmt < Node - # [Location] the location of this node - attr_reader :location - # [Array[ Comment | EmbDoc ]] the comments attached to this node attr_reader :comments @@ -11846,7 +11843,7 @@ def ===(other) # end # class WhileNode < Node - # [untyped] the expression to be checked + # [Node] the expression to be checked attr_reader :predicate # [Statements] the expressions to be executed diff --git a/lib/syntax_tree/reflection.rb b/lib/syntax_tree/reflection.rb new file mode 100644 index 00000000..2457fe49 --- /dev/null +++ b/lib/syntax_tree/reflection.rb @@ -0,0 +1,224 @@ +# frozen_string_literal: true + +module SyntaxTree + # This module is used to provide some reflection on the various types of nodes + # and their attributes. As soon as it is required it collects all of its + # information. + module Reflection + # This module represents the type of the values being passed to attributes + # of nodes. It is used as part of the documentation of the attributes. + module Type + CONSTANTS = SyntaxTree.constants.to_h { [_1, SyntaxTree.const_get(_1)] } + + # Represents an array type that holds another type. + class ArrayType + attr_reader :type + + def initialize(type) + @type = type + end + + def ===(value) + value.is_a?(Array) && value.all? { type === _1 } + end + end + + # Represents a tuple type that holds a number of types in order. + class TupleType + attr_reader :types + + def initialize(types) + @types = types + end + + def ===(value) + value.is_a?(Array) && value.length == types.length && + value.zip(types).all? { _2 === _1 } + end + end + + # Represents a union type that can be one of a number of types. + class UnionType + attr_reader :types + + def initialize(types) + @types = types + end + + def ===(value) + types.any? { _1 === value } + end + end + + class << self + def parse(comment) + unless comment.start_with?("[") + raise "Comment does not start with a bracket: #{comment.inspect}" + end + + count = 1 + found = + comment.chars[1..].find.with_index(1) do |char, index| + count += { "[" => 1, "]" => -1 }.fetch(char, 0) + break index if count == 0 + end + + # If we weren't able to find the end of the balanced brackets, then + # the comment is malformed. + if found.nil? + raise "Comment does not have balanced brackets: #{comment.inspect}" + end + + parse_type(comment[1...found].strip) + end + + private + + def parse_type(value) + case value + when "Integer" + Integer + when "String" + String + when "Symbol" + Symbol + when "boolean" + UnionType.new([TrueClass, FalseClass]) + when "nil" + NilClass + when ":\"::\"" + :"::" + when ":call" + :call + when ":nil" + :nil + when /\AArray\[(.+)\]\z/ + ArrayType.new(parse_type($1.strip)) + when /\A\[(.+)\]\z/ + TupleType.new($1.strip.split(/\s*,\s*/).map { parse_type(_1) }) + else + if value.include?("|") + UnionType.new(value.split(/\s*\|\s*/).map { parse_type(_1) }) + else + CONSTANTS.fetch(value.to_sym) + end + end + end + end + end + + # This class represents one of the attributes on a node in the tree. + class Attribute + attr_reader :name, :comment, :type + + def initialize(name, comment) + @name = name + @comment = comment + @type = Type.parse(comment) + end + end + + # This class represents one of our nodes in the tree. We're going to use it + # as a placeholder for collecting all of the various places that nodes are + # used. + class Node + attr_reader :name, :comment, :attributes + + def initialize(name, comment, attributes) + @name = name + @comment = comment + @attributes = attributes + end + end + + class << self + # This is going to hold a hash of all of the nodes in the tree. The keys + # are the names of the nodes as symbols. + attr_reader :nodes + + # This expects a node name as a symbol and returns the node object for + # that node. + def node(name) + nodes.fetch(name) + end + + private + + def parse_comments(statements, index) + statements[0...index] + .reverse_each + .take_while { _1.is_a?(SyntaxTree::Comment) } + .reverse_each + .map { _1.value[2..] } + end + end + + @nodes = {} + + # For each node, we're going to parse out its attributes and other metadata. + # We'll use this as the basis for our report. + program = + SyntaxTree.parse(SyntaxTree.read(File.expand_path("node.rb", __dir__))) + + main_statements = program.statements.body.last.bodystmt.statements.body + main_statements.each_with_index do |main_statement, main_statement_index| + # Ensure we are only looking at class declarations. + next unless main_statement.is_a?(SyntaxTree::ClassDeclaration) + + # Ensure we're looking at class declarations with superclasses. + next unless main_statement.superclass.is_a?(SyntaxTree::VarRef) + + # Ensure we're looking at class declarations that inherit from Node. + next unless main_statement.superclass.value.value == "Node" + + # All child nodes inherit the location attr_reader from Node, so we'll add + # that to the list of attributes first. + attributes = { + location: + Attribute.new(:location, "[Location] the location of this node") + } + + statements = main_statement.bodystmt.statements.body + statements.each_with_index do |statement, statement_index| + case statement + when SyntaxTree::Command + # We only use commands in node classes to define attributes. So, we + # can safely assume that we're looking at an attribute definition. + unless %w[attr_reader attr_accessor].include?(statement.message.value) + raise "Unexpected command: #{statement.message.value.inspect}" + end + + # The arguments to the command are the attributes that we're defining. + # We want to ensure that we're only defining one at a time. + if statement.arguments.parts.length != 1 + raise "Declaring more than one attribute at a time is not permitted" + end + + attribute = + Attribute.new( + statement.arguments.parts.first.value.value.to_sym, + parse_comments(statements, statement_index).join(" ") + ) + + # Ensure that we don't already have an attribute named the same as + # this one, and then add it to the list of attributes. + if attributes.key?(attribute.name) + raise "Duplicate attribute: #{attribute.name}" + end + + attributes[attribute.name] = attribute + end + end + + # Finally, set it up in the hash of nodes so that we can use it later. + node = + Node.new( + main_statement.constant.constant.value.to_sym, + parse_comments(main_statements, main_statement_index).join("\n"), + attributes + ) + + @nodes[node.name] = node + end + end +end From 45d8c4c3dfb544b8ef5644bc3bab54377607f9b9 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Thu, 9 Feb 2023 12:40:53 -0500 Subject: [PATCH 37/58] Enforce types in the test suite --- lib/syntax_tree/node.rb | 86 +++++++++++++++++++-------- lib/syntax_tree/parser.rb | 67 +++++++++++++++++---- lib/syntax_tree/reflection.rb | 12 ++++ lib/syntax_tree/translation/parser.rb | 19 +++--- lib/syntax_tree/yarv/compiler.rb | 4 +- lib/syntax_tree/yarv/decompiler.rb | 2 +- test/formatting_test.rb | 1 + test/interface_test.rb | 72 ---------------------- test/test_helper.rb | 30 ++++++++++ 9 files changed, 173 insertions(+), 120 deletions(-) delete mode 100644 test/interface_test.rb diff --git a/lib/syntax_tree/node.rb b/lib/syntax_tree/node.rb index 4ac5aa24..4a98dae4 100644 --- a/lib/syntax_tree/node.rb +++ b/lib/syntax_tree/node.rb @@ -1131,7 +1131,8 @@ def format(q) end end - # [LBracket] the bracket that opens this array + # [nil | LBracket | QSymbolsBeg | QWordsBeg | SymbolsBeg | WordsBeg] the + # bracket that opens this array attr_reader :lbracket # [nil | Args] the contents of the array @@ -1485,7 +1486,7 @@ class Assoc < Node # [Node] the key of this pair attr_reader :key - # [Node] the value of this pair + # [nil | Node] the value of this pair attr_reader :value # [Array[ Comment | EmbDoc ]] the comments attached to this node @@ -3508,16 +3509,16 @@ def align(q, node, &block) # object.method argument # class CommandCall < Node - # [Node] the receiver of the message + # [nil | Node] the receiver of the message attr_reader :receiver - # [:"::" | Op | Period] the operator used to send the message + # [nil | :"::" | Op | Period] the operator used to send the message attr_reader :operator - # [Const | Ident | Op] the message being send + # [:call | Const | Ident | Op] the message being send attr_reader :message - # [nil | Args] the arguments going along with the message + # [nil | Args | ArgParen] the arguments going along with the message attr_reader :arguments # [nil | BlockNode] the block associated with this method call @@ -4603,7 +4604,7 @@ class DynaSymbol < Node # dynamic symbol attr_reader :parts - # [String] the quote used to delimit the dynamic symbol + # [nil | String] the quote used to delimit the dynamic symbol attr_reader :quote # [Array[ Comment | EmbDoc ]] the comments attached to this node @@ -5947,7 +5948,7 @@ def comments end def format(q) - q.format(key) + HashKeyFormatter::Labels.new.format_key(q, key) if value q.text(" ") @@ -5978,8 +5979,8 @@ def format(q) # [nil | Node] the optional constant wrapper attr_reader :constant - # [Array[ [Label, Node] ]] the set of tuples representing the keywords - # that should be matched against in the pattern + # [Array[ [DynaSymbol | Label, nil | Node] ]] the set of tuples + # representing the keywords that should be matched against in the pattern attr_reader :keywords # [nil | VarField] an optional parameter to gather up all remaining keywords @@ -7510,7 +7511,7 @@ def ===(other) # method {} # class MethodAddBlock < Node - # [CallNode | Command | CommandCall] the method call + # [ARef | CallNode | Command | CommandCall | Super | ZSuper] the method call attr_reader :call # [BlockNode] the block being sent with the method call @@ -7585,8 +7586,12 @@ def format_contents(q) # first, second, third = value # class MLHS < Node - # [Array[ARefField | ArgStar | Field | Ident | MLHSParen | VarField]] the - # parts of the left-hand side of a multiple assignment + # [ + # Array[ + # ARefField | ArgStar | ConstPathField | Field | Ident | MLHSParen | + # TopConstField | VarField + # ] + # ] the parts of the left-hand side of a multiple assignment attr_reader :parts # [boolean] whether or not there is a trailing comma at the end of this @@ -8211,7 +8216,7 @@ def format(q) end end - # [Array[ Ident ]] any required parameters + # [Array[ Ident | MLHSParen ]] any required parameters attr_reader :requireds # [Array[ [ Ident, Node ] ]] any optional parameters and their default @@ -8226,11 +8231,12 @@ def format(q) # parameter attr_reader :posts - # [Array[ [ Ident, nil | Node ] ]] any keyword parameters and their + # [Array[ [ Label, nil | Node ] ]] any keyword parameters and their # optional default values attr_reader :keywords - # [nil | :nil | KwRestParam] the optional keyword rest parameter + # [nil | :nil | ArgsForward | KwRestParam] the optional keyword rest + # parameter attr_reader :keyword_rest # [nil | BlockArg] the optional block parameter @@ -9268,7 +9274,7 @@ def ambiguous?(q) # end # class RescueEx < Node - # [Node] the list of exceptions being rescued + # [nil | Node] the list of exceptions being rescued attr_reader :exceptions # [nil | Field | VarField] the expression being used to capture the raised @@ -9346,7 +9352,7 @@ class Rescue < Node # [Kw] the rescue keyword attr_reader :keyword - # [RescueEx] the exceptions being rescued + # [nil | RescueEx] the exceptions being rescued attr_reader :exception # [Statements] the expressions to evaluate when an error is rescued @@ -9995,9 +10001,13 @@ class StringContent < Node # string attr_reader :parts + # [Array[ Comment | EmbDoc ]] the comments attached to this node + attr_reader :comments + def initialize(parts:, location:) @parts = parts @location = location + @comments = [] end def accept(visitor) @@ -10024,6 +10034,33 @@ def deconstruct_keys(_keys) def ===(other) other.is_a?(StringContent) && ArrayMatch.call(parts, other.parts) end + + def format(q) + q.text(q.quote) + q.group do + parts.each do |part| + if part.is_a?(TStringContent) + value = Quotes.normalize(part.value, q.quote) + first = true + + value.each_line(chomp: true) do |line| + if first + first = false + else + q.breakable_return + end + + q.text(line) + end + + q.breakable_return if value.end_with?("\n") + else + q.format(part) + end + end + end + q.text(q.quote) + end end # StringConcat represents concatenating two strings together using a backward @@ -10033,7 +10070,8 @@ def ===(other) # "second" # class StringConcat < Node - # [StringConcat | StringLiteral] the left side of the concatenation + # [Heredoc | StringConcat | StringLiteral] the left side of the + # concatenation attr_reader :left # [StringLiteral] the right side of the concatenation @@ -10230,7 +10268,7 @@ class StringLiteral < Node # string literal attr_reader :parts - # [String] which quote was used by the string literal + # [nil | String] which quote was used by the string literal attr_reader :quote # [Array[ Comment | EmbDoc ]] the comments attached to this node @@ -10475,8 +10513,8 @@ def ===(other) # :symbol # class SymbolLiteral < Node - # [Backtick | Const | CVar | GVar | Ident | IVar | Kw | Op] the value of the - # symbol + # [Backtick | Const | CVar | GVar | Ident | IVar | Kw | Op | TStringContent] + # the value of the symbol attr_reader :value # [Array[ Comment | EmbDoc ]] the comments attached to this node @@ -11430,7 +11468,7 @@ def modifier? # # In the example above, the VarField node represents the +variable+ token. class VarField < Node - # [nil | Const | CVar | GVar | Ident | IVar] the target of this node + # [nil | :nil | Const | CVar | GVar | Ident | IVar] the target of this node attr_reader :value # [Array[ Comment | EmbDoc ]] the comments attached to this node @@ -11569,7 +11607,7 @@ def pin(parent, pin) # This can be a plain local variable like the example above. It can also be a # a class variable, a global variable, or an instance variable. class PinnedVarRef < Node - # [VarRef] the value of this node + # [Const | CVar | GVar | Ident | IVar] the value of this node attr_reader :value # [Array[ Comment | EmbDoc ]] the comments attached to this node diff --git a/lib/syntax_tree/parser.rb b/lib/syntax_tree/parser.rb index 59128875..ca006c31 100644 --- a/lib/syntax_tree/parser.rb +++ b/lib/syntax_tree/parser.rb @@ -908,6 +908,13 @@ def on_blockarg(name) # (nil | Ensure) ensure_clause # ) -> BodyStmt def on_bodystmt(statements, rescue_clause, else_clause, ensure_clause) + # In certain versions of Ruby, the `statements` argument can be any node + # in the case that we're inside of an endless method definition. In this + # case we'll wrap it in a Statements node to be consistent. + unless statements.is_a?(Statements) + statements = Statements.new(self, body: [statements], location: statements.location) + end + parts = [statements, rescue_clause, else_clause, ensure_clause].compact BodyStmt.new( @@ -1157,13 +1164,23 @@ def on_const(value) end # :call-seq: - # on_const_path_field: (untyped parent, Const constant) -> ConstPathField + # on_const_path_field: (untyped parent, Const constant) -> + # ConstPathField | Field def on_const_path_field(parent, constant) - ConstPathField.new( - parent: parent, - constant: constant, - location: parent.location.to(constant.location) - ) + if constant.is_a?(Const) + ConstPathField.new( + parent: parent, + constant: constant, + location: parent.location.to(constant.location) + ) + else + Field.new( + parent: parent, + operator: consume_operator(:"::"), + name: constant, + location: parent.location.to(constant.location) + ) + end end # :call-seq: @@ -1866,10 +1883,40 @@ def on_heredoc_end(value) # :call-seq: # on_hshptn: ( # (nil | untyped) constant, - # Array[[Label, untyped]] keywords, + # Array[[Label | StringContent, untyped]] keywords, # (nil | VarField) keyword_rest # ) -> HshPtn def on_hshptn(constant, keywords, keyword_rest) + keywords = + (keywords || []).map do |(label, value)| + if label.is_a?(Label) + [label, value] + else + tstring_beg_index = + tokens.rindex do |token| + token.is_a?(TStringBeg) && token.location.start_char < label.location.start_char + end + + tstring_beg = tokens.delete_at(tstring_beg_index) + + label_end_index = + tokens.rindex do |token| + token.is_a?(LabelEnd) && token.location.start_char == label.location.end_char + end + + label_end = tokens.delete_at(label_end_index) + + [ + DynaSymbol.new( + parts: label.parts, + quote: label_end.value[0], + location: tstring_beg.location.to(label_end.location) + ), + value + ] + end + end + if keyword_rest # We're doing this to delete the token from the list so that it doesn't # confuse future patterns by thinking they have an extra ** on the end. @@ -1882,7 +1929,7 @@ def on_hshptn(constant, keywords, keyword_rest) keyword_rest = VarField.new(value: nil, location: token.location) end - parts = [constant, *keywords&.flatten(1), keyword_rest].compact + parts = [constant, *keywords.flatten(1), keyword_rest].compact # If there's no constant, there may be braces, so we're going to look for # those to get our bounds. @@ -1899,7 +1946,7 @@ def on_hshptn(constant, keywords, keyword_rest) HshPtn.new( constant: constant, - keywords: keywords || [], + keywords: keywords, keyword_rest: keyword_rest, location: parts[0].location.to(parts[-1].location) ) @@ -2379,7 +2426,7 @@ def on_method_add_block(call, block) location = call.location.to(block.location) case call - when Break, Next + when Break, Next, ReturnNode parts = call.arguments.parts node = parts.pop diff --git a/lib/syntax_tree/reflection.rb b/lib/syntax_tree/reflection.rb index 2457fe49..ec4345e1 100644 --- a/lib/syntax_tree/reflection.rb +++ b/lib/syntax_tree/reflection.rb @@ -21,6 +21,10 @@ def initialize(type) def ===(value) value.is_a?(Array) && value.all? { type === _1 } end + + def inspect + "Array<#{type.inspect}>" + end end # Represents a tuple type that holds a number of types in order. @@ -35,6 +39,10 @@ def ===(value) value.is_a?(Array) && value.length == types.length && value.zip(types).all? { _2 === _1 } end + + def inspect + "[#{types.map(&:inspect).join(", ")}]" + end end # Represents a union type that can be one of a number of types. @@ -48,6 +56,10 @@ def initialize(types) def ===(value) types.any? { _1 === value } end + + def inspect + types.map(&:inspect).join(" | ") + end end class << self diff --git a/lib/syntax_tree/translation/parser.rb b/lib/syntax_tree/translation/parser.rb index 4a4b6ade..65bf918d 100644 --- a/lib/syntax_tree/translation/parser.rb +++ b/lib/syntax_tree/translation/parser.rb @@ -1068,7 +1068,7 @@ def visit_field(node) case stack[-2] when Assign, MLHS Ident.new( - value: :"#{node.name.value}=", + value: "#{node.name.value}=", location: node.name.location ) else @@ -1295,11 +1295,11 @@ def visit_hshptn(node) next s(:pair, [visit(keyword), visit(value)], nil) if value case keyword - when Label - s(:match_var, [keyword.value.chomp(":").to_sym], nil) - when StringContent + when DynaSymbol raise if keyword.parts.length > 1 s(:match_var, [keyword.parts.first.value.to_sym], nil) + when Label + s(:match_var, [keyword.value.chomp(":").to_sym], nil) end end @@ -2364,13 +2364,10 @@ def visit_statements(node) # Visit a StringConcat node. def visit_string_concat(node) - visit_string_literal( - StringLiteral.new( - parts: [node.left, node.right], - quote: nil, - location: node.location - ) - ) + location = + source_map_collection(expression: source_range_node(node)) + + s(:dstr, [visit(node.left), visit(node.right)], location) end # Visit a StringContent node. diff --git a/lib/syntax_tree/yarv/compiler.rb b/lib/syntax_tree/yarv/compiler.rb index c1b4d6dd..1899140a 100644 --- a/lib/syntax_tree/yarv/compiler.rb +++ b/lib/syntax_tree/yarv/compiler.rb @@ -1050,11 +1050,11 @@ def visit_if_op(node) visit_if( IfNode.new( predicate: node.predicate, - statements: node.truthy, + statements: Statements.new(nil, body: [node.truthy], location: Location.default), consequent: Else.new( keyword: Kw.new(value: "else", location: Location.default), - statements: node.falsy, + statements: Statements.new(nil, body: [node.falsy], location: Location.default), location: Location.default ), location: Location.default diff --git a/lib/syntax_tree/yarv/decompiler.rb b/lib/syntax_tree/yarv/decompiler.rb index 753ba80a..4ea99e3a 100644 --- a/lib/syntax_tree/yarv/decompiler.rb +++ b/lib/syntax_tree/yarv/decompiler.rb @@ -151,7 +151,7 @@ def decompile(iseq) elsif argc == 1 && method.end_with?("=") receiver, argument = clause.pop(2) clause << Assign( - CallNode(receiver, Period("."), Ident(method[0..-2]), nil), + Field(receiver, Period("."), Ident(method[0..-2])), argument ) else diff --git a/test/formatting_test.rb b/test/formatting_test.rb index 37ca29e1..5e5f9e9f 100644 --- a/test/formatting_test.rb +++ b/test/formatting_test.rb @@ -7,6 +7,7 @@ class FormattingTest < Minitest::Test Fixtures.each_fixture do |fixture| define_method(:"test_formatted_#{fixture.name}") do assert_equal(fixture.formatted, SyntaxTree.format(fixture.source)) + assert_syntax_tree(SyntaxTree.parse(fixture.source)) end end diff --git a/test/interface_test.rb b/test/interface_test.rb deleted file mode 100644 index 5086680e..00000000 --- a/test/interface_test.rb +++ /dev/null @@ -1,72 +0,0 @@ -# frozen_string_literal: true - -require_relative "test_helper" - -module SyntaxTree - class InterfaceTest < Minitest::Test - ObjectSpace.each_object(Node.singleton_class) do |klass| - next if klass == Node - - define_method(:"test_instantiate_#{klass.name}") do - assert_syntax_tree(instantiate(klass)) - end - end - - Fixtures.each_fixture do |fixture| - define_method(:"test_#{fixture.name}") do - assert_syntax_tree(SyntaxTree.parse(fixture.source)) - end - end - - private - - # This method is supposed to instantiate a new instance of the given class. - # The class is always a descendant from SyntaxTree::Node, so we can make - # certain assumptions about the way the initialize method is set up. If it - # needs to be special-cased, it's done so at the end of this method. - def instantiate(klass) - params = {} - - # Set up all of the keyword parameters for the class. - klass - .instance_method(:initialize) - .parameters - .each { |(type, name)| params[name] = nil if type.start_with?("key") } - - # Set up any default values that have to be arrays. - %i[ - assocs - comments - elements - keywords - locals - optionals - parts - posts - requireds - symbols - values - ].each { |key| params[key] = [] if params.key?(key) } - - # Set up a default location for the node. - params[:location] = Location.fixed(line: 0, char: 0, column: 0) - - case klass.name - when "SyntaxTree::Binary" - klass.new(**params, operator: :+) - when "SyntaxTree::Kw" - klass.new(**params, value: "kw") - when "SyntaxTree::Label" - klass.new(**params, value: "label:") - when "SyntaxTree::Op" - klass.new(**params, value: "+") - when "SyntaxTree::RegexpLiteral" - klass.new(**params, ending: "/") - when "SyntaxTree::Statements" - klass.new(nil, **params, body: []) - else - klass.new(**params) - end - end - end -end diff --git a/test/test_helper.rb b/test/test_helper.rb index 77627e26..b307db3d 100644 --- a/test/test_helper.rb +++ b/test/test_helper.rb @@ -11,6 +11,36 @@ require "syntax_tree" require "syntax_tree/cli" +require "syntax_tree/reflection" + +SyntaxTree::Reflection.nodes.each do |name, node| + next if name == :Statements + + clazz = SyntaxTree.const_get(name) + parameters = clazz.instance_method(:initialize).parameters + + # First, verify that all of the parameters listed in the list of attributes. + # If there are any parameters that aren't listed in the attributes, then + # something went wrong with the parsing in the reflection module. + raise unless (parameters.map(&:last) - node.attributes.keys).empty? + + # Now we're going to use an alias chain to redefine the initialize method to + # include type checking. + clazz.alias_method(:initialize_without_verify, :initialize) + clazz.define_method(:initialize) do |**kwargs| + kwargs.each do |kwarg, value| + attribute = node.attributes.fetch(kwarg) + + unless attribute.type === value + raise TypeError, "invalid type for #{name}##{kwarg}, expected " \ + "#{attribute.type.inspect}, got #{value.inspect}" + end + end + + initialize_without_verify(**kwargs) + end +end + require "json" require "tempfile" require "pp" From 42572ac17ad319b27cb63dc340f3e7354c83f1f6 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Thu, 9 Feb 2023 14:46:49 -0500 Subject: [PATCH 38/58] Generate sorbet types in a rake task --- Rakefile | 11 +- lib/syntax_tree/dsl.rb | 32 ++- lib/syntax_tree/parser.rb | 15 +- lib/syntax_tree/reflection.rb | 21 +- lib/syntax_tree/translation/parser.rb | 3 +- lib/syntax_tree/yarv/compiler.rb | 14 +- tasks/sorbet.rake | 277 ++++++++++++++++++++++++++ test/test_helper.rb | 5 +- 8 files changed, 349 insertions(+), 29 deletions(-) create mode 100644 tasks/sorbet.rake diff --git a/Rakefile b/Rakefile index 22d7d1fe..fb4f8847 100644 --- a/Rakefile +++ b/Rakefile @@ -16,7 +16,16 @@ task default: :test configure = ->(task) do task.source_files = - FileList[%w[Gemfile Rakefile syntax_tree.gemspec lib/**/*.rb test/*.rb]] + FileList[ + %w[ + Gemfile + Rakefile + syntax_tree.gemspec + lib/**/*.rb + tasks/*.rake + test/*.rb + ] + ] # Since Syntax Tree supports back to Ruby 2.7.0, we need to make sure that we # format our code such that it's compatible with that version. This actually diff --git a/lib/syntax_tree/dsl.rb b/lib/syntax_tree/dsl.rb index 860a1fe5..1af19644 100644 --- a/lib/syntax_tree/dsl.rb +++ b/lib/syntax_tree/dsl.rb @@ -210,12 +210,17 @@ def RAssign(value, operator, pattern) end # Create a new ClassDeclaration node. - def ClassDeclaration(constant, superclass, bodystmt) + def ClassDeclaration( + constant, + superclass, + bodystmt, + location = Location.default + ) ClassDeclaration.new( constant: constant, superclass: superclass, bodystmt: bodystmt, - location: Location.default + location: location ) end @@ -225,12 +230,12 @@ def Comma(value) end # Create a new Command node. - def Command(message, arguments, block) + def Command(message, arguments, block, location = Location.default) Command.new( message: message, arguments: arguments, block: block, - location: Location.default + location: location ) end @@ -247,8 +252,8 @@ def CommandCall(receiver, operator, message, arguments, block) end # Create a new Comment node. - def Comment(value, inline) - Comment.new(value: value, inline: inline, location: Location.default) + def Comment(value, inline, location = Location.default) + Comment.new(value: value, inline: inline, location: location) end # Create a new Const node. @@ -285,14 +290,21 @@ def CVar(value) end # Create a new DefNode node. - def DefNode(target, operator, name, params, bodystmt) + def DefNode( + target, + operator, + name, + params, + bodystmt, + location = Location.default + ) DefNode.new( target: target, operator: operator, name: name, params: params, bodystmt: bodystmt, - location: Location.default + location: location ) end @@ -565,8 +577,8 @@ def MAssign(target, value) end # Create a new MethodAddBlock node. - def MethodAddBlock(call, block) - MethodAddBlock.new(call: call, block: block, location: Location.default) + def MethodAddBlock(call, block, location = Location.default) + MethodAddBlock.new(call: call, block: block, location: location) end # Create a new MLHS node. diff --git a/lib/syntax_tree/parser.rb b/lib/syntax_tree/parser.rb index ca006c31..c15a0339 100644 --- a/lib/syntax_tree/parser.rb +++ b/lib/syntax_tree/parser.rb @@ -912,7 +912,12 @@ def on_bodystmt(statements, rescue_clause, else_clause, ensure_clause) # in the case that we're inside of an endless method definition. In this # case we'll wrap it in a Statements node to be consistent. unless statements.is_a?(Statements) - statements = Statements.new(self, body: [statements], location: statements.location) + statements = + Statements.new( + self, + body: [statements], + location: statements.location + ) end parts = [statements, rescue_clause, else_clause, ensure_clause].compact @@ -1894,14 +1899,16 @@ def on_hshptn(constant, keywords, keyword_rest) else tstring_beg_index = tokens.rindex do |token| - token.is_a?(TStringBeg) && token.location.start_char < label.location.start_char + token.is_a?(TStringBeg) && + token.location.start_char < label.location.start_char end tstring_beg = tokens.delete_at(tstring_beg_index) label_end_index = tokens.rindex do |token| - token.is_a?(LabelEnd) && token.location.start_char == label.location.end_char + token.is_a?(LabelEnd) && + token.location.start_char == label.location.end_char end label_end = tokens.delete_at(label_end_index) @@ -1913,7 +1920,7 @@ def on_hshptn(constant, keywords, keyword_rest) location: tstring_beg.location.to(label_end.location) ), value - ] + ] end end diff --git a/lib/syntax_tree/reflection.rb b/lib/syntax_tree/reflection.rb index ec4345e1..bf4b95f3 100644 --- a/lib/syntax_tree/reflection.rb +++ b/lib/syntax_tree/reflection.rb @@ -34,10 +34,10 @@ class TupleType def initialize(types) @types = types end - + def ===(value) value.is_a?(Array) && value.length == types.length && - value.zip(types).all? { _2 === _1 } + value.zip(types).all? { |item, type| type === item } end def inspect @@ -64,16 +64,20 @@ def inspect class << self def parse(comment) + comment = comment.gsub(/\n/, " ") + unless comment.start_with?("[") raise "Comment does not start with a bracket: #{comment.inspect}" end count = 1 found = - comment.chars[1..].find.with_index(1) do |char, index| - count += { "[" => 1, "]" => -1 }.fetch(char, 0) - break index if count == 0 - end + comment.chars[1..] + .find + .with_index(1) do |char, index| + count += { "[" => 1, "]" => -1 }.fetch(char, 0) + break index if count == 0 + end # If we weren't able to find the end of the balanced brackets, then # the comment is malformed. @@ -209,7 +213,7 @@ def parse_comments(statements, index) attribute = Attribute.new( statement.arguments.parts.first.value.value.to_sym, - parse_comments(statements, statement_index).join(" ") + "#{parse_comments(statements, statement_index).join("\n")}\n" ) # Ensure that we don't already have an attribute named the same as @@ -223,10 +227,11 @@ def parse_comments(statements, index) end # Finally, set it up in the hash of nodes so that we can use it later. + comments = parse_comments(main_statements, main_statement_index) node = Node.new( main_statement.constant.constant.value.to_sym, - parse_comments(main_statements, main_statement_index).join("\n"), + "#{comments.join("\n")}\n", attributes ) diff --git a/lib/syntax_tree/translation/parser.rb b/lib/syntax_tree/translation/parser.rb index 65bf918d..184bb165 100644 --- a/lib/syntax_tree/translation/parser.rb +++ b/lib/syntax_tree/translation/parser.rb @@ -2364,8 +2364,7 @@ def visit_statements(node) # Visit a StringConcat node. def visit_string_concat(node) - location = - source_map_collection(expression: source_range_node(node)) + location = source_map_collection(expression: source_range_node(node)) s(:dstr, [visit(node.left), visit(node.right)], location) end diff --git a/lib/syntax_tree/yarv/compiler.rb b/lib/syntax_tree/yarv/compiler.rb index 1899140a..3aff3fe5 100644 --- a/lib/syntax_tree/yarv/compiler.rb +++ b/lib/syntax_tree/yarv/compiler.rb @@ -1050,11 +1050,21 @@ def visit_if_op(node) visit_if( IfNode.new( predicate: node.predicate, - statements: Statements.new(nil, body: [node.truthy], location: Location.default), + statements: + Statements.new( + nil, + body: [node.truthy], + location: Location.default + ), consequent: Else.new( keyword: Kw.new(value: "else", location: Location.default), - statements: Statements.new(nil, body: [node.falsy], location: Location.default), + statements: + Statements.new( + nil, + body: [node.falsy], + location: Location.default + ), location: Location.default ), location: Location.default diff --git a/tasks/sorbet.rake b/tasks/sorbet.rake new file mode 100644 index 00000000..e4152664 --- /dev/null +++ b/tasks/sorbet.rake @@ -0,0 +1,277 @@ +# frozen_string_literal: true + +module SyntaxTree + class RBI + include DSL + + attr_reader :body, :line + + def initialize + @body = [] + @line = 1 + end + + def generate + require "syntax_tree/reflection" + + body << Comment("# typed: strict", false, location) + @line += 2 + + generate_parent + Reflection.nodes.sort.each { |(_, node)| generate_node(node) } + + Formatter.format(nil, Program(Statements(body))) + end + + private + + def generate_comments(comment) + comment + .lines(chomp: true) + .map { |line| Comment("# #{line}", false, location).tap { @line += 1 } } + end + + def generate_parent + attribute = Reflection.nodes[:Program].attributes[:location] + class_location = location + + node_body = generate_comments(attribute.comment) + node_body << sig_block { sig_returns { sig_type_for(attribute.type) } } + @line += 1 + + node_body << Command( + Ident("attr_reader"), + Args([SymbolLiteral(Ident("location"))]), + nil, + location + ) + @line += 1 + + body << ClassDeclaration( + ConstPathRef(VarRef(Const("SyntaxTree")), Const("Node")), + nil, + BodyStmt(Statements(node_body), nil, nil, nil, nil), + class_location + ) + @line += 2 + end + + def generate_node(node) + body.concat(generate_comments(node.comment)) + class_location = location + @line += 2 + + body << ClassDeclaration( + ConstPathRef(VarRef(Const("SyntaxTree")), Const(node.name.to_s)), + ConstPathRef(VarRef(Const("SyntaxTree")), Const("Node")), + BodyStmt(Statements(generate_node_body(node)), nil, nil, nil, nil), + class_location + ) + + @line += 2 + end + + def generate_node_body(node) + node_body = [] + node.attributes.sort.each do |(name, attribute)| + next if name == :location + + node_body.concat(generate_comments(attribute.comment)) + node_body << sig_block { sig_returns { sig_type_for(attribute.type) } } + @line += 1 + + node_body << Command( + Ident("attr_reader"), + Args([SymbolLiteral(Ident(attribute.name.to_s))]), + nil, + location + ) + @line += 2 + end + + node_body.concat(generate_initialize(node)) + + node_body << sig_block do + CallNode( + sig_params do + BareAssocHash( + [Assoc(Label("visitor:"), sig_type_for(BasicVisitor))] + ) + end, + Period("."), + Ident("returns"), + ArgParen( + Args( + [CallNode(VarRef(Const("T")), Period("."), Ident("untyped"), nil)] + ) + ) + ) + end + @line += 1 + + node_body << generate_def_node( + "accept", + Paren( + LParen("("), + Params.new(requireds: [Ident("visitor")], location: location) + ) + ) + @line += 2 + + node_body << generate_child_nodes + @line += 1 + + node_body << generate_def_node("child_nodes", nil) + @line += 1 + + node_body + end + + def generate_initialize(node) + parameters = + SyntaxTree.const_get(node.name).instance_method(:initialize).parameters + + assocs = + parameters.map do |(_, name)| + Assoc(Label("#{name}:"), sig_type_for(node.attributes[name].type)) + end + + node_body = [] + node_body << sig_block do + CallNode( + sig_params { BareAssocHash(assocs) }, + Period("."), + Ident("void"), + nil + ) + end + @line += 1 + + params = Params.new(location: location) + parameters.each do |(type, name)| + case type + when :req + params.requireds << Ident(name.to_s) + when :keyreq + params.keywords << [Label("#{name}:"), nil] + when :key + params.keywords << [ + Label("#{name}:"), + CallNode( + VarRef(Const("T")), + Period("."), + Ident("unsafe"), + ArgParen(Args([VarRef(Kw("nil"))])) + ) + ] + else + raise + end + end + + node_body << generate_def_node("initialize", Paren(LParen("("), params)) + @line += 2 + + node_body + end + + def generate_child_nodes + type = + Reflection::Type::ArrayType.new( + Reflection::Type::UnionType.new([NilClass, Node]) + ) + + sig_block { sig_returns { sig_type_for(type) } } + end + + def generate_def_node(name, params) + DefNode( + nil, + nil, + Ident(name), + params, + BodyStmt(Statements([VoidStmt()]), nil, nil, nil, nil), + location + ) + end + + def sig_block + MethodAddBlock( + CallNode(nil, nil, Ident("sig"), nil), + BlockNode( + LBrace("{"), + nil, + BodyStmt(Statements([yield]), nil, nil, nil, nil) + ), + location + ) + end + + def sig_params + CallNode(nil, nil, Ident("params"), ArgParen(Args([yield]))) + end + + def sig_returns + CallNode(nil, nil, Ident("returns"), ArgParen(Args([yield]))) + end + + def sig_type_for(type) + case type + when Reflection::Type::ArrayType + ARef( + ConstPathRef(VarRef(Const("T")), Const("Array")), + sig_type_for(type.type) + ) + when Reflection::Type::TupleType + ArrayLiteral(LBracket("["), Args(type.types.map { sig_type_for(_1) })) + when Reflection::Type::UnionType + if type.types.include?(NilClass) + selected = type.types.reject { _1 == NilClass } + subtype = + if selected.size == 1 + selected.first + else + Reflection::Type::UnionType.new(selected) + end + + CallNode( + VarRef(Const("T")), + Period("."), + Ident("nilable"), + ArgParen(Args([sig_type_for(subtype)])) + ) + else + CallNode( + VarRef(Const("T")), + Period("."), + Ident("any"), + ArgParen(Args(type.types.map { sig_type_for(_1) })) + ) + end + when Symbol + ConstRef(Const("Symbol")) + else + *parents, constant = type.name.split("::").map { Const(_1) } + + if parents.empty? + ConstRef(constant) + else + [*parents[1..], constant].inject( + VarRef(parents.first) + ) { |accum, const| ConstPathRef(accum, const) } + end + end + end + + def location + Location.fixed(line: line, char: 0, column: 0) + end + end +end + +namespace :sorbet do + desc "Generate RBI files for Sorbet" + task :rbi do + puts SyntaxTree::RBI.new.generate + end +end diff --git a/test/test_helper.rb b/test/test_helper.rb index b307db3d..18159fab 100644 --- a/test/test_helper.rb +++ b/test/test_helper.rb @@ -32,8 +32,9 @@ attribute = node.attributes.fetch(kwarg) unless attribute.type === value - raise TypeError, "invalid type for #{name}##{kwarg}, expected " \ - "#{attribute.type.inspect}, got #{value.inspect}" + raise TypeError, + "invalid type for #{name}##{kwarg}, expected " \ + "#{attribute.type.inspect}, got #{value.inspect}" end end From e0be5793aeecc1d0c44a3ff118dd24c653a2e8af Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Thu, 9 Feb 2023 16:15:56 -0500 Subject: [PATCH 39/58] More documentation in the test helper --- test/test_helper.rb | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/test/test_helper.rb b/test/test_helper.rb index 18159fab..e4452e3d 100644 --- a/test/test_helper.rb +++ b/test/test_helper.rb @@ -11,8 +11,10 @@ require "syntax_tree" require "syntax_tree/cli" +# Here we are going to establish type verification whenever a new node is +# created. We do this through the reflection module, which in turn parses the +# source code of the node classes. require "syntax_tree/reflection" - SyntaxTree::Reflection.nodes.each do |name, node| next if name == :Statements From da19f6a2dc787411e34e4ec90547b136467e7149 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Wed, 8 Feb 2023 11:01:27 -0500 Subject: [PATCH 40/58] Location information for parser nodes --- lib/syntax_tree/formatter.rb | 2 +- lib/syntax_tree/node.rb | 8 + lib/syntax_tree/parser.rb | 197 +++- lib/syntax_tree/translation/parser.rb | 1529 ++++++++++--------------- test/syntax_tree_test.rb | 2 +- test/translation/parser_test.rb | 2 +- 6 files changed, 774 insertions(+), 966 deletions(-) diff --git a/lib/syntax_tree/formatter.rb b/lib/syntax_tree/formatter.rb index c64cf7d1..60858bf2 100644 --- a/lib/syntax_tree/formatter.rb +++ b/lib/syntax_tree/formatter.rb @@ -138,7 +138,7 @@ def format(node, stackable: true) # going to just print out the node as it was seen in the source. doc = if last_leading&.ignore? - range = source[node.location.start_char...node.location.end_char] + range = source[node.start_char...node.end_char] first = true range.each_line(chomp: true) do |line| diff --git a/lib/syntax_tree/node.rb b/lib/syntax_tree/node.rb index 4a98dae4..627deab1 100644 --- a/lib/syntax_tree/node.rb +++ b/lib/syntax_tree/node.rb @@ -126,6 +126,14 @@ def format(q) raise NotImplementedError end + def start_char + location.start_char + end + + def end_char + location.end_char + end + def pretty_print(q) accept(Visitor::PrettyPrintVisitor.new(q)) end diff --git a/lib/syntax_tree/parser.rb b/lib/syntax_tree/parser.rb index c15a0339..cf3982f9 100644 --- a/lib/syntax_tree/parser.rb +++ b/lib/syntax_tree/parser.rb @@ -256,11 +256,37 @@ def find_token(type) tokens[index] if index end + def find_token_between(type, left, right) + bounds = left.location.end_char...right.location.start_char + index = + tokens.rindex do |token| + char = token.location.start_char + break if char < bounds.begin + + token.is_a?(type) && bounds.cover?(char) + end + + tokens[index] if index + end + def find_keyword(name) index = tokens.rindex { |token| token.is_a?(Kw) && (token.name == name) } tokens[index] if index end + def find_keyword_between(name, left, right) + bounds = left.location.end_char...right.location.start_char + index = + tokens.rindex do |token| + char = token.location.start_char + break if char < bounds.begin + + token.is_a?(Kw) && (token.name == name) && bounds.cover?(char) + end + + tokens[index] if index + end + def find_operator(name) index = tokens.rindex { |token| token.is_a?(Op) && (token.name == name) } tokens[index] if index @@ -645,7 +671,7 @@ def visit_var_ref(node) end def self.visit(node, tokens) - start_char = node.location.start_char + start_char = node.start_char allocated = [] tokens.reverse_each do |token| @@ -874,13 +900,34 @@ def on_binary(left, operator, right) # on_block_var: (Params params, (nil | Array[Ident]) locals) -> BlockVar def on_block_var(params, locals) index = - tokens.rindex do |node| - node.is_a?(Op) && %w[| ||].include?(node.value) && - node.location.start_char < params.location.start_char - end + tokens.rindex { |node| node.is_a?(Op) && %w[| ||].include?(node.value) } + + ending = tokens.delete_at(index) + beginning = ending.value == "||" ? ending : consume_operator(:|) + + # If there are no parameters, then we didn't have anything to base the + # location information of off. Now that we have an opening of the + # block, we can correct this. + if params.empty? + start_line = params.location.start_line + start_char = + ( + if beginning.value == "||" + beginning.location.start_char + else + find_next_statement_start(beginning.location.end_char) + end + ) + + location = + Location.fixed( + line: start_line, + char: start_char, + column: start_char - line_counts[start_line - 1].start + ) - beginning = tokens[index] - ending = tokens[-1] + params = params.copy(location: location) + end BlockVar.new( params: params, @@ -1762,15 +1809,13 @@ def on_for(index, collection, statements) # Consume the do keyword if it exists so that it doesn't get confused for # some other block - keyword = find_keyword(:do) - if keyword && - keyword.location.start_char > collection.location.end_char && - keyword.location.end_char < ending.location.start_char + if (keyword = find_keyword_between(:do, collection, ending)) tokens.delete(keyword) end start_char = find_next_statement_start((keyword || collection).location.end_char) + statements.bind( start_char, start_char - @@ -1984,7 +2029,12 @@ def on_if(predicate, statements, consequent) beginning = consume_keyword(:if) ending = consequent || consume_keyword(:end) - start_char = find_next_statement_start(predicate.location.end_char) + if (keyword = find_keyword_between(:then, predicate, ending)) + tokens.delete(keyword) + end + + start_char = + find_next_statement_start((keyword || predicate).location.end_char) statements.bind( start_char, start_char - line_counts[predicate.location.end_line - 1].start, @@ -2068,7 +2118,8 @@ def on_in(pattern, statements, consequent) statements_start = token end - start_char = find_next_statement_start(statements_start.location.end_char) + start_char = + find_next_statement_start((token || statements_start).location.end_char) statements.bind( start_char, start_char - @@ -2194,12 +2245,19 @@ def on_lambda(params, statements) token.location.start_char > beginning.location.start_char end + if braces + opening = consume_token(TLamBeg) + closing = consume_token(RBrace) + else + opening = consume_keyword(:do) + closing = consume_keyword(:end) + end + # We need to do some special mapping here. Since ripper doesn't support - # capturing lambda var until 3.2, we need to normalize all of that here. + # capturing lambda vars, we need to normalize all of that here. params = - case params - when Paren - # In this case we've gotten to the <3.2 parentheses wrapping a set of + if params.is_a?(Paren) + # In this case we've gotten to the parentheses wrapping a set of # parameters case. Here we need to manually scan for lambda locals. range = (params.location.start_char + 1)...params.location.end_char locals = lambda_locals(source[range]) @@ -2221,25 +2279,28 @@ def on_lambda(params, statements) node.comments.concat(params.comments) node - when Params - # In this case we've gotten to the <3.2 plain set of parameters. In - # this case there cannot be lambda locals, so we will wrap the - # parameters into a lambda var that has no locals. + else + # If there are no parameters, then we didn't have anything to base the + # location information of off. Now that we have an opening of the + # block, we can correct this. + if params.empty? + opening_location = opening.location + location = + Location.fixed( + line: opening_location.start_line, + char: opening_location.start_char, + column: opening_location.start_column + ) + + params = params.copy(location: location) + end + + # In this case we've gotten to the plain set of parameters. In this + # case there cannot be lambda locals, so we will wrap the parameters + # into a lambda var that has no locals. LambdaVar.new(params: params, locals: [], location: params.location) - when LambdaVar - # In this case we've gotten to 3.2+ lambda var. In this case we don't - # need to do anything and can just the value as given. - params end - if braces - opening = consume_token(TLamBeg) - closing = consume_token(RBrace) - else - opening = consume_keyword(:do) - closing = consume_keyword(:end) - end - start_char = find_next_statement_start(opening.location.end_char) statements.bind( start_char, @@ -3134,7 +3195,7 @@ def on_rescue(exceptions, variable, statements, consequent) exceptions = exceptions[0] if exceptions.is_a?(Array) last_node = variable || exceptions || keyword - start_char = find_next_statement_start(last_node.location.end_char) + start_char = find_next_statement_start(last_node.end_char) statements.bind( start_char, start_char - line_counts[last_node.location.start_line - 1].start, @@ -3156,7 +3217,7 @@ def on_rescue(exceptions, variable, statements, consequent) start_char: keyword.location.end_char + 1, start_column: keyword.location.end_column + 1, end_line: last_node.location.end_line, - end_char: last_node.location.end_char, + end_char: last_node.end_char, end_column: last_node.location.end_column ) ) @@ -3267,9 +3328,27 @@ def on_sclass(target, bodystmt) ) end - # def on_semicolon(value) - # value - # end + class Semicolon + attr_reader :location + + def initialize(location:) + @location = location + end + end + + # :call-seq: + # on_semicolon: (String value) -> Semicolon + def on_semicolon(value) + tokens << Semicolon.new( + location: + Location.token( + line: lineno, + char: char_pos, + column: current_column, + size: value.size + ) + ) + end # def on_sp(value) # value @@ -3706,7 +3785,12 @@ def on_unless(predicate, statements, consequent) beginning = consume_keyword(:unless) ending = consequent || consume_keyword(:end) - start_char = find_next_statement_start(predicate.location.end_char) + if (keyword = find_keyword_between(:then, predicate, ending)) + tokens.delete(keyword) + end + + start_char = + find_next_statement_start((keyword || predicate).location.end_char) statements.bind( start_char, start_char - line_counts[predicate.location.end_line - 1].start, @@ -3742,16 +3826,16 @@ def on_until(predicate, statements) beginning = consume_keyword(:until) ending = consume_keyword(:end) - # Consume the do keyword if it exists so that it doesn't get confused for - # some other block - keyword = find_keyword(:do) - if keyword && keyword.location.start_char > predicate.location.end_char && - keyword.location.end_char < ending.location.start_char - tokens.delete(keyword) - end + delimiter = + find_keyword_between(:do, predicate, statements) || + find_token_between(Semicolon, predicate, statements) + + tokens.delete(delimiter) if delimiter # Update the Statements location information - start_char = find_next_statement_start(predicate.location.end_char) + start_char = + find_next_statement_start((delimiter || predicate).location.end_char) + statements.bind( start_char, start_char - line_counts[predicate.location.end_line - 1].start, @@ -3845,7 +3929,8 @@ def on_when(arguments, statements, consequent) statements_start = token end - start_char = find_next_statement_start(statements_start.location.end_char) + start_char = + find_next_statement_start((token || statements_start).location.end_char) statements.bind( start_char, @@ -3869,16 +3954,16 @@ def on_while(predicate, statements) beginning = consume_keyword(:while) ending = consume_keyword(:end) - # Consume the do keyword if it exists so that it doesn't get confused for - # some other block - keyword = find_keyword(:do) - if keyword && keyword.location.start_char > predicate.location.end_char && - keyword.location.end_char < ending.location.start_char - tokens.delete(keyword) - end + delimiter = + find_keyword_between(:do, predicate, statements) || + find_token_between(Semicolon, predicate, statements) + + tokens.delete(delimiter) if delimiter # Update the Statements location information - start_char = find_next_statement_start(predicate.location.end_char) + start_char = + find_next_statement_start((delimiter || predicate).location.end_char) + statements.bind( start_char, start_char - line_counts[predicate.location.end_line - 1].start, diff --git a/lib/syntax_tree/translation/parser.rb b/lib/syntax_tree/translation/parser.rb index 184bb165..b9e91e5f 100644 --- a/lib/syntax_tree/translation/parser.rb +++ b/lib/syntax_tree/translation/parser.rb @@ -27,9 +27,9 @@ def visit_alias(node) s( :alias, [visit(node.left), visit(node.right)], - source_map_keyword_bare( - source_range_length(node.location.start_char, 5), - source_range_node(node) + smap_keyword_bare( + srange_length(node.start_char, 5), + srange_node(node) ) ) end @@ -41,26 +41,20 @@ def visit_aref(node) s( :index, [visit(node.collection)], - source_map_index( - begin_token: - source_range_find( - node.collection.location.end_char, - node.location.end_char, - "[" - ), - end_token: source_range_length(node.location.end_char, -1), - expression: source_range_node(node) + smap_index( + srange_find(node.collection.end_char, node.end_char, "["), + srange_length(node.end_char, -1), + srange_node(node) ) ) else s( :index, [visit(node.collection)].concat(visit_all(node.index.parts)), - source_map_index( - begin_token: - source_range_find_between(node.collection, node.index, "["), - end_token: source_range_length(node.location.end_char, -1), - expression: source_range_node(node) + smap_index( + srange_find_between(node.collection, node.index, "["), + srange_length(node.end_char, -1), + srange_node(node) ) ) end @@ -69,31 +63,25 @@ def visit_aref(node) s( :send, [visit(node.collection), :[]], - source_map_send( - selector: - source_range_find( - node.collection.location.end_char, - node.location.end_char, - "[]" - ), - expression: source_range_node(node) + smap_send_bare( + srange_find(node.collection.end_char, node.end_char, "[]"), + srange_node(node) ) ) else s( :send, [visit(node.collection), :[], *visit_all(node.index.parts)], - source_map_send( - selector: - source_range( - source_range_find_between( - node.collection, - node.index, - "[" - ).begin_pos, - node.location.end_char - ), - expression: source_range_node(node) + smap_send_bare( + srange( + srange_find_between( + node.collection, + node.index, + "[" + ).begin_pos, + node.end_char + ), + srange_node(node) ) ) end @@ -107,26 +95,20 @@ def visit_aref_field(node) s( :indexasgn, [visit(node.collection)], - source_map_index( - begin_token: - source_range_find( - node.collection.location.end_char, - node.location.end_char, - "[" - ), - end_token: source_range_length(node.location.end_char, -1), - expression: source_range_node(node) + smap_index( + srange_find(node.collection.end_char, node.end_char, "["), + srange_length(node.end_char, -1), + srange_node(node) ) ) else s( :indexasgn, [visit(node.collection)].concat(visit_all(node.index.parts)), - source_map_index( - begin_token: - source_range_find_between(node.collection, node.index, "["), - end_token: source_range_length(node.location.end_char, -1), - expression: source_range_node(node) + smap_index( + srange_find_between(node.collection, node.index, "["), + srange_length(node.end_char, -1), + srange_node(node) ) ) end @@ -135,14 +117,9 @@ def visit_aref_field(node) s( :send, [visit(node.collection), :[]=], - source_map_send( - selector: - source_range_find( - node.collection.location.end_char, - node.location.end_char, - "[]" - ), - expression: source_range_node(node) + smap_send_bare( + srange_find(node.collection.end_char, node.end_char, "[]"), + srange_node(node) ) ) else @@ -151,17 +128,16 @@ def visit_aref_field(node) [visit(node.collection), :[]=].concat( visit_all(node.index.parts) ), - source_map_send( - selector: - source_range( - source_range_find_between( - node.collection, - node.index, - "[" - ).begin_pos, - node.location.end_char - ), - expression: source_range_node(node) + smap_send_bare( + srange( + srange_find_between( + node.collection, + node.index, + "[" + ).begin_pos, + node.end_char + ), + srange_node(node) ) ) end @@ -173,10 +149,7 @@ def visit_arg_block(node) s( :block_pass, [visit(node.value)], - source_map_operator( - source_range_length(node.location.start_char, 1), - source_range_node(node) - ) + smap_operator(srange_length(node.start_char, 1), srange_node(node)) ) end @@ -184,32 +157,26 @@ def visit_arg_block(node) def visit_arg_star(node) if stack[-3].is_a?(MLHSParen) && stack[-3].contents.is_a?(MLHS) if node.value.nil? - s(:restarg, [], source_map_variable(nil, source_range_node(node))) + s(:restarg, [], smap_variable(nil, srange_node(node))) else s( :restarg, [node.value.value.to_sym], - source_map_variable( - source_range_node(node.value), - source_range_node(node) - ) + smap_variable(srange_node(node.value), srange_node(node)) ) end else s( :splat, node.value.nil? ? [] : [visit(node.value)], - source_map_operator( - source_range_length(node.location.start_char, 1), - source_range_node(node) - ) + smap_operator(srange_length(node.start_char, 1), srange_node(node)) ) end end # Visit an ArgsForward node. def visit_args_forward(node) - s(:forwarded_args, [], source_map(expression: source_range_node(node))) + s(:forwarded_args, [], smap(srange_node(node))) end # Visit an ArrayLiteral node. @@ -218,12 +185,12 @@ def visit_array(node) :array, node.contents ? visit_all(node.contents.parts) : [], if node.lbracket.nil? - source_map_collection(expression: source_range_node(node)) + smap_collection_bare(srange_node(node)) else - source_map_collection( - begin_token: source_range_node(node.lbracket), - end_token: source_range_length(node.location.end_char, -1), - expression: source_range_node(node) + smap_collection( + srange_node(node.lbracket), + srange_length(node.end_char, -1), + srange_node(node) ) end ) @@ -237,8 +204,7 @@ def visit_aryptn(node) if node.rest.is_a?(VarField) if !node.rest.value.nil? children << s(:match_rest, [visit(node.rest)], nil) - elsif node.posts.empty? && - node.rest.location.start_char == node.rest.location.end_char + elsif node.posts.empty? && node.rest.start_char == node.rest.end_char # Here we have an implicit rest, as in [foo,]. parser has a specific # type for these patterns. type = :array_pattern_with_tail @@ -255,34 +221,29 @@ def visit_aryptn(node) s( type, children + visit_all(node.posts), - source_map_collection( - expression: - source_range( - node.constant.location.end_char + 1, - node.location.end_char - 1 - ) + smap_collection_bare( + srange(node.constant.end_char + 1, node.end_char - 1) ) ) ], - source_map_collection( - begin_token: - source_range_length(node.constant.location.end_char, 1), - end_token: source_range_length(node.location.end_char, -1), - expression: source_range_node(node) + smap_collection( + srange_length(node.constant.end_char, 1), + srange_length(node.end_char, -1), + srange_node(node) ) ) else s( type, children + visit_all(node.posts), - if buffer.source[node.location.start_char] == "[" - source_map_collection( - begin_token: source_range_length(node.location.start_char, 1), - end_token: source_range_length(node.location.end_char, -1), - expression: source_range_node(node) + if buffer.source[node.start_char] == "[" + smap_collection( + srange_length(node.start_char, 1), + srange_length(node.end_char, -1), + srange_node(node) ) else - source_map_collection(expression: source_range_node(node)) + smap_collection_bare(srange_node(node)) end ) end @@ -294,10 +255,8 @@ def visit_assign(node) location = target .location - .with_operator( - source_range_find_between(node.target, node.value, "=") - ) - .with_expression(source_range_node(node)) + .with_operator(srange_find_between(node.target, node.value, "=")) + .with_expression(srange_node(node)) s(target.type, target.children + [visit(node.value)], location) end @@ -305,17 +264,13 @@ def visit_assign(node) # Visit an Assoc node. def visit_assoc(node) if node.value.nil? - expression = - source_range(node.location.start_char, node.location.end_char - 1) + expression = srange(node.start_char, node.end_char - 1) type, location = if node.key.value.start_with?(/[A-Z]/) - [:const, source_map_constant(nil, expression, expression)] + [:const, smap_constant(nil, expression, expression)] else - [ - :send, - source_map_send(selector: expression, expression: expression) - ] + [:send, smap_send_bare(expression, expression)] end s( @@ -324,19 +279,19 @@ def visit_assoc(node) visit(node.key), s(type, [nil, node.key.value.chomp(":").to_sym], location) ], - source_map_operator( - source_range_length(node.key.location.end_char, -1), - source_range_node(node) + smap_operator( + srange_length(node.key.end_char, -1), + srange_node(node) ) ) else s( :pair, [visit(node.key), visit(node.value)], - source_map_operator( - source_range_search_between(node.key, node.value, "=>") || - source_range_length(node.key.location.end_char, -1), - source_range_node(node) + smap_operator( + srange_search_between(node.key, node.value, "=>") || + srange_length(node.key.end_char, -1), + srange_node(node) ) ) end @@ -347,16 +302,13 @@ def visit_assoc_splat(node) s( :kwsplat, [visit(node.value)], - source_map_operator( - source_range_length(node.location.start_char, 2), - source_range_node(node) - ) + smap_operator(srange_length(node.start_char, 2), srange_node(node)) ) end # Visit a Backref node. def visit_backref(node) - location = source_map(expression: source_range_node(node)) + location = smap(srange_node(node)) if node.value.match?(/^\$\d+$/) s(:nth_ref, [node.value[1..].to_i], location) @@ -375,7 +327,7 @@ def visit_bare_assoc_hash(node) :hash end, visit_all(node.assocs), - source_map_collection(expression: source_range_node(node)) + smap_collection_bare(srange_node(node)) ) end @@ -384,15 +336,11 @@ def visit_BEGIN(node) s( :preexe, [visit(node.statements)], - source_map_keyword( - source_range_length(node.location.start_char, 5), - source_range_find( - node.location.start_char + 5, - node.statements.location.start_char, - "{" - ), - source_range_length(node.location.end_char, -1), - source_range_node(node) + smap_keyword( + srange_length(node.start_char, 5), + srange_find(node.start_char + 5, node.statements.start_char, "{"), + srange_length(node.end_char, -1), + srange_node(node) ) ) end @@ -400,10 +348,10 @@ def visit_BEGIN(node) # Visit a Begin node. def visit_begin(node) location = - source_map_collection( - begin_token: source_range_length(node.location.start_char, 5), - end_token: source_range_length(node.location.end_char, -3), - expression: source_range_node(node) + smap_collection( + srange_length(node.start_char, 5), + srange_length(node.end_char, -3), + srange_node(node) ) if node.bodystmt.empty? @@ -439,13 +387,9 @@ def visit_binary(node) node.operator ), [visit(node.left), visit(node.right)], - source_map_operator( - source_range_find_between( - node.left, - node.right, - node.operator.to_s - ), - source_range_node(node) + smap_operator( + srange_find_between(node.left, node.right, node.operator.to_s), + srange_node(node) ) ) when :=~ @@ -459,13 +403,9 @@ def visit_binary(node) s( :match_with_lvasgn, [visit(node.left), visit(node.right)], - source_map_operator( - source_range_find_between( - node.left, - node.right, - node.operator.to_s - ), - source_range_node(node) + smap_operator( + srange_find_between(node.left, node.right, node.operator.to_s), + srange_node(node) ) ) else @@ -479,15 +419,12 @@ def visit_binary(node) # Visit a BlockArg node. def visit_blockarg(node) if node.name.nil? - s(:blockarg, [nil], source_map_variable(nil, source_range_node(node))) + s(:blockarg, [nil], smap_variable(nil, srange_node(node))) else s( :blockarg, [node.name.value.to_sym], - source_map_variable( - source_range_node(node.name), - source_range_node(node) - ) + smap_variable(srange_node(node.name), srange_node(node)) ) end end @@ -499,10 +436,7 @@ def visit_block_var(node) s( :shadowarg, [local.value.to_sym], - source_map_variable( - source_range_node(local), - source_range_node(local) - ) + smap_variable(srange_node(local), srange_node(local)) ) end @@ -522,13 +456,13 @@ def visit_block_var(node) s( :arg, [required.value.to_sym], - source_map_variable( - source_range_node(required), - source_range_node(required) + smap_variable( + srange_node(required), + srange_node(required) ) ) ], - source_map_collection(expression: source_range_node(required)) + smap_collection_bare(srange_node(required)) ) else child = visit(required) @@ -543,10 +477,10 @@ def visit_block_var(node) s( :args, children + shadowargs, - source_map_collection( - begin_token: source_range_length(node.location.start_char, 1), - end_token: source_range_length(node.location.end_char, -1), - expression: source_range_node(node) + smap_collection( + srange_length(node.start_char, 1), + srange_length(node.end_char, -1), + srange_node(node) ) ) end @@ -566,17 +500,12 @@ def visit_bodystmt(node) children << visit(node.else_clause) location = - source_map_condition( - else_token: - source_range_length( - node.else_clause.location.start_char - 3, - -4 - ), - expression: - source_range( - location.expression.begin_pos, - node.else_clause.location.end_char - ) + smap_condition( + nil, + nil, + srange_length(node.else_clause.start_char - 3, -4), + nil, + srange(location.expression.begin_pos, node.else_clause.end_char) ) end @@ -608,9 +537,9 @@ def visit_break(node) s( :break, visit_all(node.arguments.parts), - source_map_keyword_bare( - source_range_length(node.location.start_char, 5), - source_range_node(node) + smap_keyword_bare( + srange_length(node.start_char, 5), + srange_node(node) ) ) end @@ -638,17 +567,18 @@ def visit_case(node) else_token = if clauses.last.is_a?(Else) - source_range_length(clauses.last.location.start_char, 4) + srange_length(clauses.last.start_char, 4) end s( node.consequent.is_a?(In) ? :case_match : :case, [visit(node.value)] + clauses.map { |clause| visit(clause) }, - source_map_condition( - keyword: source_range_length(node.location.start_char, 4), - else_token: else_token, - end_token: source_range_length(node.location.end_char, -3), - expression: source_range_node(node) + smap_condition( + srange_length(node.start_char, 4), + nil, + else_token, + srange_length(node.end_char, -3), + srange_node(node) ) ) end @@ -658,9 +588,10 @@ def visit_CHAR(node) s( :str, [node.value[1..]], - source_map_collection( - begin_token: source_range_length(node.location.start_char, 1), - expression: source_range_node(node) + smap_collection( + srange_length(node.start_char, 1), + nil, + srange_node(node) ) ) end @@ -669,18 +600,18 @@ def visit_CHAR(node) def visit_class(node) operator = if node.superclass - source_range_find_between(node.constant, node.superclass, "<") + srange_find_between(node.constant, node.superclass, "<") end s( :class, [visit(node.constant), visit(node.superclass), visit(node.bodystmt)], - source_map_definition( - keyword: source_range_length(node.location.start_char, 5), - operator: operator, - name: source_range_node(node.constant), - end_token: source_range_length(node.location.end_char, -3) - ).with_expression(source_range_node(node)) + smap_definition( + srange_length(node.start_char, 5), + operator, + srange_node(node.constant), + srange_length(node.end_char, -3) + ).with_expression(srange_node(node)) ) end @@ -721,18 +652,17 @@ def visit_command_call(node) children += visit_all(node.arguments.arguments.parts) end - begin_token = - source_range_length(node.arguments.location.start_char, 1) - end_token = source_range_length(node.arguments.location.end_char, -1) + begin_token = srange_length(node.arguments.start_char, 1) + end_token = srange_length(node.arguments.end_char, -1) end dot_bound = if node.arguments - node.arguments.location.start_char + node.arguments.start_char elsif node.block - node.block.location.start_char + node.block.start_char else - node.location.end_char + node.end_char end call = @@ -743,37 +673,31 @@ def visit_command_call(node) :send end, children, - source_map_send( - dot: - if node.operator == :"::" - source_range_find( - node.receiver.location.end_char, - if node.message == :call - dot_bound - else - node.message.location.start_char - end, - "::" - ) - elsif node.operator - source_range_node(node.operator) - end, - begin_token: begin_token, - end_token: end_token, - selector: - node.message == :call ? nil : source_range_node(node.message), - expression: - if node.arguments.is_a?(ArgParen) || - (node.arguments.is_a?(Args) && node.arguments.parts.any?) - source_range( - node.location.start_char, - node.arguments.location.end_char - ) - elsif node.block - source_range_node(node.message) - else - source_range_node(node) - end + smap_send( + if node.operator == :"::" + srange_find( + node.receiver.end_char, + if node.message == :call + dot_bound + else + node.message.start_char + end, + "::" + ) + elsif node.operator + srange_node(node.operator) + end, + node.message == :call ? nil : srange_node(node.message), + begin_token, + end_token, + if node.arguments.is_a?(ArgParen) || + (node.arguments.is_a?(Args) && node.arguments.parts.any?) + srange(node.start_char, node.arguments.end_char) + elsif node.block + srange_node(node.message) + else + srange_node(node) + end ) ) @@ -783,14 +707,13 @@ def visit_command_call(node) s( type, [call, arguments, visit(node.block.bodystmt)], - source_map_collection( - begin_token: source_range_node(node.block.opening), - end_token: - source_range_length( - node.location.end_char, - node.block.opening.is_a?(Kw) ? -3 : -1 - ), - expression: source_range_node(node) + smap_collection( + srange_node(node.block.opening), + srange_length( + node.end_char, + node.block.opening.is_a?(Kw) ? -3 : -1 + ), + srange_node(node) ) ) else @@ -803,11 +726,7 @@ def visit_const(node) s( :const, [nil, node.value.to_sym], - source_map_constant( - nil, - source_range_node(node), - source_range_node(node) - ) + smap_constant(nil, srange_node(node), srange_node(node)) ) end @@ -820,10 +739,10 @@ def visit_const_path_field(node) s( :casgn, [visit(node.parent), node.constant.value.to_sym], - source_map_constant( - source_range_find_between(node.parent, node.constant, "::"), - source_range_node(node.constant), - source_range_node(node) + smap_constant( + srange_find_between(node.parent, node.constant, "::"), + srange_node(node.constant), + srange_node(node) ) ) end @@ -834,10 +753,10 @@ def visit_const_path_ref(node) s( :const, [visit(node.parent), node.constant.value.to_sym], - source_map_constant( - source_range_find_between(node.parent, node.constant, "::"), - source_range_node(node.constant), - source_range_node(node) + smap_constant( + srange_find_between(node.parent, node.constant, "::"), + srange_node(node.constant), + srange_node(node) ) ) end @@ -847,11 +766,7 @@ def visit_const_ref(node) s( :const, [nil, node.constant.value.to_sym], - source_map_constant( - nil, - source_range_node(node.constant), - source_range_node(node) - ) + smap_constant(nil, srange_node(node.constant), srange_node(node)) ) end @@ -860,7 +775,7 @@ def visit_cvar(node) s( :cvar, [node.value.to_sym], - source_map_variable(source_range_node(node), source_range_node(node)) + smap_variable(srange_node(node), srange_node(node)) ) end @@ -875,7 +790,7 @@ def visit_def(node) s( child.type, child.children, - source_map_collection(expression: nil) + smap_collection_bare(child.location&.expression) ) when Paren child = visit(node.params.contents) @@ -883,37 +798,38 @@ def visit_def(node) s( child.type, child.children, - source_map_collection( - begin_token: - source_range_length(node.params.location.start_char, 1), - end_token: - source_range_length(node.params.location.end_char, -1), - expression: source_range_node(node.params) + smap_collection( + srange_length(node.params.start_char, 1), + srange_length(node.params.end_char, -1), + srange_node(node.params) ) ) else - s(:args, [], source_map_collection(expression: nil)) + s(:args, [], smap_collection_bare(nil)) end location = if node.endless? - source_map_method_definition( - keyword: source_range_length(node.location.start_char, 3), - assignment: - source_range_find_between( - (node.params || node.name), - node.bodystmt, - "=" - ), - name: source_range_node(node.name), - expression: source_range_node(node) + smap_method_definition( + srange_length(node.start_char, 3), + nil, + srange_node(node.name), + nil, + srange_find_between( + (node.params || node.name), + node.bodystmt, + "=" + ), + srange_node(node) ) else - source_map_method_definition( - keyword: source_range_length(node.location.start_char, 3), - name: source_range_node(node.name), - end_token: source_range_length(node.location.end_char, -3), - expression: source_range_node(node) + smap_method_definition( + srange_length(node.start_char, 3), + nil, + srange_node(node.name), + srange_length(node.end_char, -3), + nil, + srange_node(node) ) end @@ -923,13 +839,13 @@ def visit_def(node) s( :defs, [visit(target), name, args, visit(node.bodystmt)], - source_map_method_definition( - keyword: location.keyword, - assignment: location.assignment, - operator: source_range_node(node.operator), - name: location.name, - end_token: location.end, - expression: location.expression + smap_method_definition( + location.keyword, + srange_node(node.operator), + location.name, + location.end, + location.assignment, + location.expression ) ) else @@ -939,23 +855,23 @@ def visit_def(node) # Visit a Defined node. def visit_defined(node) - paren_range = (node.location.start_char + 8)...node.location.end_char + paren_range = (node.start_char + 8)...node.end_char begin_token, end_token = if buffer.source[paren_range].include?("(") [ - source_range_find(paren_range.begin, paren_range.end, "("), - source_range_length(node.location.end_char, -1) + srange_find(paren_range.begin, paren_range.end, "("), + srange_length(node.end_char, -1) ] end s( :defined?, [visit(node.value)], - source_map_keyword( - source_range_length(node.location.start_char, 8), + smap_keyword( + srange_length(node.start_char, 8), begin_token, end_token, - source_range_node(node) + srange_node(node) ) ) end @@ -964,17 +880,13 @@ def visit_defined(node) def visit_dyna_symbol(node) location = if node.quote - source_map_collection( - begin_token: - source_range_length( - node.location.start_char, - node.quote.length - ), - end_token: source_range_length(node.location.end_char, -1), - expression: source_range_node(node) + smap_collection( + srange_length(node.start_char, node.quote.length), + srange_length(node.end_char, -1), + srange_node(node) ) else - source_map_collection(expression: source_range_node(node)) + smap_collection_bare(srange_node(node)) end if node.parts.length == 1 && node.parts.first.is_a?(TStringContent) @@ -998,16 +910,12 @@ def visit_elsif(node) else_token = case node.consequent when Elsif - source_range_length(node.consequent.location.start_char, 5) + srange_length(node.consequent.start_char, 5) when Else - source_range_length(node.consequent.location.start_char, 4) + srange_length(node.consequent.start_char, 4) end - expression = - source_range( - node.location.start_char, - node.statements.location.end_char - 1 - ) + expression = srange(node.start_char, node.statements.end_char - 1) s( :if, @@ -1016,10 +924,12 @@ def visit_elsif(node) visit(node.statements), visit(node.consequent) ], - source_map_condition( - keyword: source_range_length(node.location.start_char, 5), - else_token: else_token, - expression: expression + smap_condition( + srange_length(node.start_char, 5), + nil, + else_token, + nil, + expression ) ) end @@ -1029,35 +939,34 @@ def visit_END(node) s( :postexe, [visit(node.statements)], - source_map_keyword( - source_range_length(node.location.start_char, 3), - source_range_find( - node.location.start_char + 3, - node.statements.location.start_char, - "{" - ), - source_range_length(node.location.end_char, -1), - source_range_node(node) + smap_keyword( + srange_length(node.start_char, 3), + srange_find(node.start_char + 3, node.statements.start_char, "{"), + srange_length(node.end_char, -1), + srange_node(node) ) ) end # Visit an Ensure node. def visit_ensure(node) - start_char = node.location.start_char + start_char = node.start_char end_char = if node.statements.empty? start_char + 6 else - node.statements.body.last.location.end_char + node.statements.body.last.end_char end s( :ensure, [visit(node.statements)], - source_map_condition( - keyword: source_range_length(start_char, 6), - expression: source_range(start_char, end_char) + smap_condition( + srange_length(start_char, 6), + nil, + nil, + nil, + srange(start_char, end_char) ) ) end @@ -1090,15 +999,11 @@ def visit_field(node) # Visit a FloatLiteral node. def visit_float(node) operator = - if %w[+ -].include?(buffer.source[node.location.start_char]) - source_range_length(node.location.start_char, 1) + if %w[+ -].include?(buffer.source[node.start_char]) + srange_length(node.start_char, 1) end - s( - :float, - [node.value.to_f], - source_map_operator(operator, source_range_node(node)) - ) + s(:float, [node.value.to_f], smap_operator(operator, srange_node(node))) end # Visit a FndPtn node. @@ -1106,9 +1011,9 @@ def visit_fndptn(node) left, right = [node.left, node.right].map do |child| location = - source_map_operator( - source_range_length(child.location.start_char, 1), - source_range_node(child) + smap_operator( + srange_length(child.start_char, 1), + srange_node(child) ) if child.is_a?(VarField) && child.value.nil? @@ -1122,10 +1027,10 @@ def visit_fndptn(node) s( :find_pattern, [left, *visit_all(node.values), right], - source_map_collection( - begin_token: source_range_length(node.location.start_char, 1), - end_token: source_range_length(node.location.end_char, -1), - expression: source_range_node(node) + smap_collection( + srange_length(node.start_char, 1), + srange_length(node.end_char, -1), + srange_node(node) ) ) @@ -1141,12 +1046,12 @@ def visit_for(node) s( :for, [visit(node.index), visit(node.collection), visit(node.statements)], - source_map_for( - source_range_length(node.location.start_char, 3), - source_range_find_between(node.index, node.collection, "in"), - source_range_search_between(node.collection, node.statements, "do"), - source_range_length(node.location.end_char, -3), - source_range_node(node) + smap_for( + srange_length(node.start_char, 3), + srange_find_between(node.index, node.collection, "in"), + srange_search_between(node.collection, node.statements, "do"), + srange_length(node.end_char, -3), + srange_node(node) ) ) end @@ -1156,7 +1061,7 @@ def visit_gvar(node) s( :gvar, [node.value.to_sym], - source_map_variable(source_range_node(node), source_range_node(node)) + smap_variable(srange_node(node), srange_node(node)) ) end @@ -1165,10 +1070,10 @@ def visit_hash(node) s( :hash, visit_all(node.assocs), - source_map_collection( - begin_token: source_range_length(node.location.start_char, 1), - end_token: source_range_length(node.location.end_char, -1), - expression: source_range_node(node) + smap_collection( + srange_length(node.start_char, 1), + srange_length(node.end_char, -1), + srange_node(node) ) ) end @@ -1260,20 +1165,17 @@ def visit_heredoc(node) heredoc_segments.trim! location = - source_map_heredoc( - source_range_node(node.beginning), - source_range( + smap_heredoc( + srange_node(node.beginning), + srange( if node.parts.empty? - node.beginning.location.end_char + node.beginning.end_char else - node.parts.first.location.start_char + node.parts.first.start_char end, - node.ending.location.start_char + node.ending.start_char ), - source_range( - node.ending.location.start_char, - node.ending.location.end_char - 1 - ) + srange(node.ending.start_char, node.ending.end_char - 1) ) if node.beginning.value.match?(/`\w+`\z/) @@ -1326,7 +1228,7 @@ def visit_ident(node) s( :lvar, [node.value.to_sym], - source_map_variable(source_range_node(node), source_range_node(node)) + smap_variable(srange_node(node), srange_node(node)) ) end @@ -1359,40 +1261,40 @@ def visit_if(node) :if, [predicate, visit(node.statements), visit(node.consequent)], if node.modifier? - source_map_keyword_bare( - source_range_find_between(node.statements, node.predicate, "if"), - source_range_node(node) + smap_keyword_bare( + srange_find_between(node.statements, node.predicate, "if"), + srange_node(node) ) else - begin_start = node.predicate.location.end_char + begin_start = node.predicate.end_char begin_end = if node.statements.empty? - node.statements.location.end_char + node.statements.end_char else - node.statements.body.first.location.start_char + node.statements.body.first.start_char end begin_token = if buffer.source[begin_start...begin_end].include?("then") - source_range_find(begin_start, begin_end, "then") + srange_find(begin_start, begin_end, "then") elsif buffer.source[begin_start...begin_end].include?(";") - source_range_find(begin_start, begin_end, ";") + srange_find(begin_start, begin_end, ";") end else_token = case node.consequent when Elsif - source_range_length(node.consequent.location.start_char, 5) + srange_length(node.consequent.start_char, 5) when Else - source_range_length(node.consequent.location.start_char, 4) + srange_length(node.consequent.start_char, 4) end - source_map_condition( - keyword: source_range_length(node.location.start_char, 2), - begin_token: begin_token, - else_token: else_token, - end_token: source_range_length(node.location.end_char, -3), - expression: source_range_node(node) + smap_condition( + srange_length(node.start_char, 2), + begin_token, + else_token, + srange_length(node.end_char, -3), + srange_node(node) ) end ) @@ -1403,7 +1305,11 @@ def visit_if_op(node) s( :if, [visit(node.predicate), visit(node.truthy), visit(node.falsy)], - nil + smap_ternary( + srange_find_between(node.predicate, node.truthy, "?"), + srange_find_between(node.truthy, node.falsy, ":"), + srange_node(node) + ) ) end @@ -1417,7 +1323,7 @@ def visit_imaginary(node) # case. Maybe there's an API for this but I can't find it. eval(node.value) ], - source_map_operator(nil, source_range_node(node)) + smap_operator(nil, srange_node(node)) ) end @@ -1446,23 +1352,23 @@ def visit_in(node) ) else begin_token = - source_range_search_between(node.pattern, node.statements, "then") + srange_search_between(node.pattern, node.statements, "then") end_char = if begin_token || node.statements.empty? - node.statements.location.end_char - 1 + node.statements.end_char - 1 else - node.statements.body.last.location.start_char + node.statements.body.last.start_char end s( :in_pattern, [visit(node.pattern), nil, visit(node.statements)], - source_map_keyword( - source_range_length(node.location.start_char, 2), + smap_keyword( + srange_length(node.start_char, 2), begin_token, nil, - source_range(node.location.start_char, end_char) + srange(node.start_char, end_char) ) ) end @@ -1471,15 +1377,11 @@ def visit_in(node) # Visit an Int node. def visit_int(node) operator = - if %w[+ -].include?(buffer.source[node.location.start_char]) - source_range_length(node.location.start_char, 1) + if %w[+ -].include?(buffer.source[node.start_char]) + srange_length(node.start_char, 1) end - s( - :int, - [node.value.to_i], - source_map_operator(operator, source_range_node(node)) - ) + s(:int, [node.value.to_i], smap_operator(operator, srange_node(node))) end # Visit an IVar node. @@ -1487,13 +1389,13 @@ def visit_ivar(node) s( :ivar, [node.value.to_sym], - source_map_variable(source_range_node(node), source_range_node(node)) + smap_variable(srange_node(node), srange_node(node)) ) end # Visit a Kw node. def visit_kw(node) - location = source_map(expression: source_range_node(node)) + location = smap(srange_node(node)) case node.value when "__FILE__" @@ -1514,15 +1416,12 @@ def visit_kw(node) # Visit a KwRestParam node. def visit_kwrest_param(node) if node.name.nil? - s(:kwrestarg, [], source_map_variable(nil, source_range_node(node))) + s(:kwrestarg, [], smap_variable(nil, srange_node(node))) else s( :kwrestarg, [node.name.value.to_sym], - source_map_variable( - source_range_node(node.name), - source_range_node(node) - ) + smap_variable(srange_node(node.name), srange_node(node)) ) end end @@ -1532,10 +1431,7 @@ def visit_label(node) s( :sym, [node.value.chomp(":").to_sym], - source_map_collection( - expression: - source_range(node.location.start_char, node.location.end_char - 1) - ) + smap_collection_bare(srange(node.start_char, node.end_char - 1)) ) end @@ -1550,42 +1446,30 @@ def visit_lambda(node) args_node = maximum end - begin_start = node.params.location.end_char begin_token, end_token = - if buffer.source[begin_start - 1] == "{" - [ - source_range_length(begin_start, -1), - source_range_length(node.location.end_char, -1) - ] + if (srange = srange_search_between(node.params, node.statements, "{")) + [srange, srange_length(node.end_char, -1)] else [ - source_range_length(begin_start, -2), - source_range_length(node.location.end_char, -3) + srange_find_between(node.params, node.statements, "do"), + srange_length(node.end_char, -3) ] end - selector = source_range_length(node.location.start_char, 2) + selector = srange_length(node.start_char, 2) s( type, [ if ::Parser::Builders::Default.emit_lambda - s(:lambda, [], source_map(expression: selector)) + s(:lambda, [], smap(selector)) else - s( - :send, - [nil, :lambda], - source_map_send(selector: selector, expression: selector) - ) + s(:send, [nil, :lambda], smap_send_bare(selector, selector)) end, args_node, visit(node.statements) ], - source_map_collection( - begin_token: begin_token, - end_token: end_token, - expression: source_range_node(node) - ) + smap_collection(begin_token, end_token, srange_node(node)) ) end @@ -1596,21 +1480,18 @@ def visit_lambda_var(node) s( :shadowarg, [local.value.to_sym], - source_map_variable( - source_range_node(local), - source_range_node(local) - ) + smap_variable(srange_node(local), srange_node(local)) ) end location = - if node.location.start_char == node.location.end_char - source_map_collection(expression: nil) + if node.start_char == node.end_char + smap_collection_bare(nil) else - source_map_collection( - begin_token: source_range_length(node.location.start_char, 1), - end_token: source_range_length(node.location.end_char, -1), - expression: source_range_node(node) + smap_collection( + srange_length(node.start_char, 1), + srange_length(node.end_char, -1), + srange_node(node) ) end @@ -1622,9 +1503,9 @@ def visit_massign(node) s( :masgn, [visit(node.target), visit(node.value)], - source_map_operator( - source_range_find_between(node.target, node.value, "="), - source_range_node(node) + smap_operator( + srange_find_between(node.target, node.value, "="), + srange_node(node) ) ) end @@ -1678,16 +1559,13 @@ def visit_mlhs(node) s( :arg, [part.value.to_sym], - source_map_variable( - source_range_node(part), - source_range_node(part) - ) + smap_variable(srange_node(part), srange_node(part)) ) else visit(part) end end, - source_map_collection(expression: source_range_node(node)) + smap_collection_bare(srange_node(node)) ) end @@ -1698,10 +1576,10 @@ def visit_mlhs_paren(node) s( child.type, child.children, - source_map_collection( - begin_token: source_range_length(node.location.start_char, 1), - end_token: source_range_length(node.location.end_char, -1), - expression: source_range_node(node) + smap_collection( + srange_length(node.start_char, 1), + srange_length(node.end_char, -1), + srange_node(node) ) ) end @@ -1711,11 +1589,12 @@ def visit_module(node) s( :module, [visit(node.constant), visit(node.bodystmt)], - source_map_definition( - keyword: source_range_length(node.location.start_char, 6), - name: source_range_node(node.constant), - end_token: source_range_length(node.location.end_char, -3) - ).with_expression(source_range_node(node)) + smap_definition( + srange_length(node.start_char, 6), + nil, + srange_node(node.constant), + srange_length(node.end_char, -3) + ).with_expression(srange_node(node)) ) end @@ -1735,9 +1614,9 @@ def visit_next(node) s( :next, visit_all(node.arguments.parts), - source_map_keyword_bare( - source_range_length(node.location.start_char, 4), - source_range_node(node) + smap_keyword_bare( + srange_length(node.start_char, 4), + srange_node(node) ) ) end @@ -1745,8 +1624,8 @@ def visit_next(node) # Visit a Not node. def visit_not(node) if node.statement.nil? - begin_token = source_range_find(node.location.start_char, nil, "(") - end_token = source_range_find(node.location.start_char, nil, ")") + begin_token = srange_find(node.start_char, nil, "(") + end_token = srange_find(node.start_char, nil, ")") s( :send, @@ -1754,40 +1633,38 @@ def visit_not(node) s( :begin, [], - source_map_collection( - begin_token: begin_token, - end_token: end_token, - expression: begin_token.join(end_token) + smap_collection( + begin_token, + end_token, + begin_token.join(end_token) ) ), :! ], - source_map_send( - selector: source_range_length(node.location.start_char, 3), - expression: source_range_node(node) - ) + smap_send_bare(srange_length(node.start_char, 3), srange_node(node)) ) else begin_token, end_token = if node.parentheses? [ - source_range_find( - node.location.start_char + 3, - node.statement.location.start_char, + srange_find( + node.start_char + 3, + node.statement.start_char, "(" ), - source_range_length(node.location.end_char, -1) + srange_length(node.end_char, -1) ] end s( :send, [visit(node.statement), :!], - source_map_send( - begin_token: begin_token, - end_token: end_token, - selector: source_range_length(node.location.start_char, 3), - expression: source_range_node(node) + smap_send( + nil, + srange_length(node.start_char, 3), + begin_token, + end_token, + srange_node(node) ) ) end @@ -1795,60 +1672,22 @@ def visit_not(node) # Visit an OpAssign node. def visit_opassign(node) + target = visit(node.target) location = - case node.target - when ARefField - source_map_index( - begin_token: - source_range_find( - node.target.collection.location.end_char, - if node.target.index - node.target.index.location.start_char - else - node.target.location.end_char - end, - "[" - ), - end_token: source_range_length(node.target.location.end_char, -1), - expression: source_range_node(node) - ) - when Field - source_map_send( - dot: - if node.target.operator == :"::" - source_range_find_between( - node.target.parent, - node.target.name, - "::" - ) - else - source_range_node(node.target.operator) - end, - selector: source_range_node(node.target.name), - expression: source_range_node(node) - ) - else - source_map_variable( - source_range_node(node.target), - source_range_node(node) - ) - end - - location = location.with_operator(source_range_node(node.operator)) + target + .location + .with_expression(srange_node(node)) + .with_operator(srange_node(node.operator)) case node.operator.value when "||=" - s(:or_asgn, [visit(node.target), visit(node.value)], location) + s(:or_asgn, [target, visit(node.value)], location) when "&&=" - s(:and_asgn, [visit(node.target), visit(node.value)], location) + s(:and_asgn, [target, visit(node.value)], location) else s( :op_asgn, - [ - visit(node.target), - node.operator.value.chomp("=").to_sym, - visit(node.value) - ], + [target, node.operator.value.chomp("=").to_sym, visit(node.value)], location ) end @@ -1867,10 +1706,7 @@ def visit_params(node) s( :arg, [required.value.to_sym], - source_map_variable( - source_range_node(required), - source_range_node(required) - ) + smap_variable(srange_node(required), srange_node(required)) ) end end @@ -1880,10 +1716,10 @@ def visit_params(node) s( :optarg, [name.value.to_sym, visit(value)], - source_map_variable( - source_range_node(name), - source_range_node(name).join(source_range_node(value)) - ).with_operator(source_range_find_between(name, value, "=")) + smap_variable( + srange_node(name), + srange_node(name).join(srange_node(value)) + ).with_operator(srange_find_between(name, value, "=")) ) end @@ -1896,10 +1732,7 @@ def visit_params(node) s( :arg, [post.value.to_sym], - source_map_variable( - source_range_node(post), - source_range_node(post) - ) + smap_variable(srange_node(post), srange_node(post)) ) end @@ -1911,24 +1744,18 @@ def visit_params(node) s( :kwoptarg, [key, visit(value)], - source_map_variable( - source_range( - name.location.start_char, - name.location.end_char - 1 - ), - source_range_node(name).join(source_range_node(value)) + smap_variable( + srange(name.start_char, name.end_char - 1), + srange_node(name).join(srange_node(value)) ) ) else s( :kwarg, [key], - source_map_variable( - source_range( - name.location.start_char, - name.location.end_char - 1 - ), - source_range_node(name) + smap_variable( + srange(name.start_char, name.end_char - 1), + srange_node(name) ) ) end @@ -1941,10 +1768,7 @@ def visit_params(node) children << s( :kwnilarg, [], - source_map_variable( - source_range_length(node.location.end_char, -3), - source_range_node(node) - ) + smap_variable(srange_length(node.end_char, -3), srange_node(node)) ) else children << visit(node.keyword_rest) @@ -1953,8 +1777,7 @@ def visit_params(node) children << visit(node.block) if node.block if node.keyword_rest.is_a?(ArgsForward) - location = - source_map(expression: source_range_node(node.keyword_rest)) + location = smap(srange_node(node.keyword_rest)) # If there are no other arguments and we have the emit_forward_arg # option enabled, then the entire argument list is represented by a @@ -1970,16 +1793,23 @@ def visit_params(node) children.insert(index, s(:forward_arg, [], location)) end - s(:args, children, nil) + location = + unless children.empty? + first = children.first.location.expression + last = children.last.location.expression + smap_collection_bare(first.join(last)) + end + + s(:args, children, location) end # Visit a Paren node. def visit_paren(node) location = - source_map_collection( - begin_token: source_range_length(node.location.start_char, 1), - end_token: source_range_length(node.location.end_char, -1), - expression: source_range_node(node) + smap_collection( + srange_length(node.start_char, 1), + srange_length(node.end_char, -1), + srange_node(node) ) if node.contents.nil? || @@ -1999,22 +1829,14 @@ def visit_pinned_begin(node) s( :begin, [visit(node.statement)], - source_map_collection( - begin_token: - source_range_length(node.location.start_char + 1, 1), - end_token: source_range_length(node.location.end_char, -1), - expression: - source_range( - node.location.start_char + 1, - node.location.end_char - ) + smap_collection( + srange_length(node.start_char + 1, 1), + srange_length(node.end_char, -1), + srange(node.start_char + 1, node.end_char) ) ) ], - source_map_send( - selector: source_range_length(node.location.start_char, 1), - expression: source_range_node(node) - ) + smap_send_bare(srange_length(node.start_char, 1), srange_node(node)) ) end @@ -2023,10 +1845,7 @@ def visit_pinned_var_ref(node) s( :pin, [visit(node.value)], - source_map_send( - selector: source_range_length(node.location.start_char, 1), - expression: source_range_node(node) - ) + smap_send_bare(srange_length(node.start_char, 1), srange_node(node)) ) end @@ -2067,10 +1886,7 @@ def visit_range(node) s( node.operator.value == ".." ? :irange : :erange, [visit(node.left), visit(node.right)], - source_map_operator( - source_range_node(node.operator), - source_range_node(node) - ) + smap_operator(srange_node(node.operator), srange_node(node)) ) end @@ -2079,32 +1895,18 @@ def visit_rassign(node) s( node.operator.value == "=>" ? :match_pattern : :match_pattern_p, [visit(node.value), visit(node.pattern)], - source_map_operator( - source_range_node(node.operator), - source_range_node(node) - ) + smap_operator(srange_node(node.operator), srange_node(node)) ) end # Visit a Rational node. def visit_rational(node) - s( - :rational, - [node.value.to_r], - source_map_operator(nil, source_range_node(node)) - ) + s(:rational, [node.value.to_r], smap_operator(nil, srange_node(node))) end # Visit a Redo node. def visit_redo(node) - s( - :redo, - [], - source_map_keyword_bare( - source_range_node(node), - source_range_node(node) - ) - ) + s(:redo, [], smap_keyword_bare(srange_node(node), srange_node(node))) end # Visit a RegexpLiteral node. @@ -2115,27 +1917,13 @@ def visit_regexp_literal(node) s( :regopt, node.ending.scan(/[a-z]/).sort.map(&:to_sym), - source_map( - expression: - source_range_length( - node.location.end_char, - -(node.ending.length - 1) - ) - ) + smap(srange_length(node.end_char, -(node.ending.length - 1))) ) ), - source_map_collection( - begin_token: - source_range_length( - node.location.start_char, - node.beginning.length - ), - end_token: - source_range_length( - node.location.end_char - node.ending.length, - 1 - ), - expression: source_range_node(node) + smap_collection( + srange_length(node.start_char, node.beginning.length), + srange_length(node.end_char - node.ending.length, 1), + srange_node(node) ) ) end @@ -2145,13 +1933,13 @@ def visit_rescue(node) # In the parser gem, there is a separation between the rescue node and # the rescue body. They have different bounds, so we have to calculate # those here. - start_char = node.location.start_char + start_char = node.start_char body_end_char = if node.statements.empty? start_char + 6 else - node.statements.body.last.location.end_char + node.statements.body.last.end_char end end_char = @@ -2162,16 +1950,16 @@ def visit_rescue(node) if end_node.statements.empty? start_char + 6 else - end_node.statements.body.last.location.end_char + end_node.statements.body.last.end_char end else body_end_char end # These locations are reused for multiple children. - keyword = source_range_length(start_char, 6) - body_expression = source_range(start_char, body_end_char) - expression = source_range(start_char, end_char) + keyword = srange_length(start_char, 6) + body_expression = srange(start_char, body_end_char) + expression = srange(start_char, end_char) exceptions = case node.exception&.exceptions @@ -2208,19 +1996,13 @@ def visit_rescue(node) s( :resbody, [nil, nil, visit(node.statements)], - source_map_rescue_body( - keyword: keyword, - expression: body_expression - ) + smap_rescue_body(keyword, nil, nil, body_expression) ) elsif node.exception.variable.nil? s( :resbody, [exceptions, nil, visit(node.statements)], - source_map_rescue_body( - keyword: keyword, - expression: body_expression - ) + smap_rescue_body(keyword, nil, nil, body_expression) ) else s( @@ -2230,15 +2012,15 @@ def visit_rescue(node) visit(node.exception.variable), visit(node.statements) ], - source_map_rescue_body( - keyword: keyword, - assoc: - source_range_find( - node.location.start_char + 6, - node.exception.variable.location.start_char, - "=>" - ), - expression: body_expression + smap_rescue_body( + keyword, + srange_find( + node.start_char + 6, + node.exception.variable.start_char, + "=>" + ), + nil, + body_expression ) ) end @@ -2250,13 +2032,12 @@ def visit_rescue(node) children << nil end - s(:rescue, children, source_map_condition(expression: expression)) + s(:rescue, children, smap_condition_bare(expression)) end # Visit a RescueMod node. def visit_rescue_mod(node) - keyword = - source_range_find_between(node.statement, node.value, "rescue") + keyword = srange_find_between(node.statement, node.value, "rescue") s( :rescue, @@ -2265,14 +2046,16 @@ def visit_rescue_mod(node) s( :resbody, [nil, nil, visit(node.value)], - source_map_rescue_body( - keyword: keyword, - expression: keyword.join(source_range_node(node.value)) + smap_rescue_body( + keyword, + nil, + nil, + keyword.join(srange_node(node.value)) ) ), nil ], - source_map_condition(expression: source_range_node(node)) + smap_condition_bare(srange_node(node)) ) end @@ -2282,26 +2065,16 @@ def visit_rest_param(node) s( :restarg, [node.name.value.to_sym], - source_map_variable( - source_range_node(node.name), - source_range_node(node) - ) + smap_variable(srange_node(node.name), srange_node(node)) ) else - s(:restarg, [], source_map_variable(nil, source_range_node(node))) + s(:restarg, [], smap_variable(nil, srange_node(node))) end end # Visit a Retry node. def visit_retry(node) - s( - :retry, - [], - source_map_keyword_bare( - source_range_node(node), - source_range_node(node) - ) - ) + s(:retry, [], smap_keyword_bare(srange_node(node), srange_node(node))) end # Visit a ReturnNode node. @@ -2309,9 +2082,9 @@ def visit_return(node) s( :return, node.arguments ? visit_all(node.arguments.parts) : [], - source_map_keyword_bare( - source_range_length(node.location.start_char, 6), - source_range_node(node) + smap_keyword_bare( + srange_length(node.start_char, 6), + srange_node(node) ) ) end @@ -2321,16 +2094,12 @@ def visit_sclass(node) s( :sclass, [visit(node.target), visit(node.bodystmt)], - source_map_definition( - keyword: source_range_length(node.location.start_char, 5), - operator: - source_range_find( - node.location.start_char + 5, - node.target.location.start_char, - "<<" - ), - end_token: source_range_length(node.location.end_char, -3) - ).with_expression(source_range_node(node)) + smap_definition( + srange_length(node.start_char, 5), + srange_find(node.start_char + 5, node.target.start_char, "<<"), + nil, + srange_length(node.end_char, -3) + ).with_expression(srange_node(node)) ) end @@ -2351,12 +2120,8 @@ def visit_statements(node) s( :begin, visit_all(children), - source_map_collection( - expression: - source_range( - children.first.location.start_char, - children.last.location.end_char - ) + smap_collection_bare( + srange(children.first.start_char, children.last.end_char) ) ) end @@ -2364,15 +2129,11 @@ def visit_statements(node) # Visit a StringConcat node. def visit_string_concat(node) - location = source_map_collection(expression: source_range_node(node)) - - s(:dstr, [visit(node.left), visit(node.right)], location) - end - - # Visit a StringContent node. - def visit_string_content(node) - # Can get here if you're inside a hash pattern, e.g., in "a": 1 - s(:sym, [node.parts.first.value.to_sym], nil) + s( + :dstr, + [visit(node.left), visit(node.right)], + smap_collection_bare(srange_node(node)) + ) end # Visit a StringDVar node. @@ -2385,10 +2146,10 @@ def visit_string_embexpr(node) s( :begin, visit(node.statements).then { |child| child ? [child] : [] }, - source_map_collection( - begin_token: source_range_length(node.location.start_char, 2), - end_token: source_range_length(node.location.end_char, -1), - expression: source_range_node(node) + smap_collection( + srange_length(node.start_char, 2), + srange_length(node.end_char, -1), + srange_node(node) ) ) end @@ -2397,17 +2158,13 @@ def visit_string_embexpr(node) def visit_string_literal(node) location = if node.quote - source_map_collection( - begin_token: - source_range_length( - node.location.start_char, - node.quote.length - ), - end_token: source_range_length(node.location.end_char, -1), - expression: source_range_node(node) + smap_collection( + srange_length(node.start_char, node.quote.length), + srange_length(node.end_char, -1), + srange_node(node) ) else - source_map_collection(expression: source_range_node(node)) + smap_collection_bare(srange_node(node)) end if node.parts.empty? @@ -2426,9 +2183,9 @@ def visit_super(node) s( :super, visit_all(node.arguments.parts), - source_map_keyword_bare( - source_range_length(node.location.start_char, 5), - source_range_node(node) + smap_keyword_bare( + srange_length(node.start_char, 5), + srange_node(node) ) ) else @@ -2437,15 +2194,11 @@ def visit_super(node) s( :super, [], - source_map_keyword( - source_range_length(node.location.start_char, 5), - source_range_find( - node.location.start_char + 5, - node.location.end_char, - "(" - ), - source_range_length(node.location.end_char, -1), - source_range_node(node) + smap_keyword( + srange_length(node.start_char, 5), + srange_find(node.start_char + 5, node.end_char, "("), + srange_length(node.end_char, -1), + srange_node(node) ) ) when ArgsForward @@ -2454,15 +2207,11 @@ def visit_super(node) s( :super, visit_all(node.arguments.arguments.parts), - source_map_keyword( - source_range_length(node.location.start_char, 5), - source_range_find( - node.location.start_char + 5, - node.location.end_char, - "(" - ), - source_range_length(node.location.end_char, -1), - source_range_node(node) + smap_keyword( + srange_length(node.start_char, 5), + srange_find(node.start_char + 5, node.end_char, "("), + srange_length(node.end_char, -1), + srange_node(node) ) ) end @@ -2472,17 +2221,14 @@ def visit_super(node) # Visit a SymbolLiteral node. def visit_symbol_literal(node) begin_token = - if buffer.source[node.location.start_char] == ":" - source_range_length(node.location.start_char, 1) + if buffer.source[node.start_char] == ":" + srange_length(node.start_char, 1) end s( :sym, [node.value.value.to_sym], - source_map_collection( - begin_token: begin_token, - expression: source_range_node(node) - ) + smap_collection(begin_token, nil, srange_node(node)) ) end @@ -2517,19 +2263,13 @@ def visit_top_const_field(node) s( :casgn, [ - s( - :cbase, - [], - source_map( - expression: source_range_length(node.location.start_char, 2) - ) - ), + s(:cbase, [], smap(srange_length(node.start_char, 2))), node.constant.value.to_sym ], - source_map_constant( - source_range_length(node.location.start_char, 2), - source_range_node(node.constant), - source_range_node(node) + smap_constant( + srange_length(node.start_char, 2), + srange_node(node.constant), + srange_node(node) ) ) end @@ -2539,19 +2279,13 @@ def visit_top_const_ref(node) s( :const, [ - s( - :cbase, - [], - source_map( - expression: source_range_length(node.location.start_char, 2) - ) - ), + s(:cbase, [], smap(srange_length(node.start_char, 2))), node.constant.value.to_sym ], - source_map_constant( - source_range_length(node.location.start_char, 2), - source_range_node(node.constant), - source_range_node(node) + smap_constant( + srange_length(node.start_char, 2), + srange_node(node.constant), + srange_node(node) ) ) end @@ -2563,7 +2297,7 @@ def visit_tstring_content(node) s( :str, ["\"#{dumped}\"".undump], - source_map_collection(expression: source_range_node(node)) + smap_collection_bare(srange_node(node)) ) end @@ -2593,9 +2327,9 @@ def visit_undef(node) s( :undef, visit_all(node.symbols), - source_map_keyword_bare( - source_range_length(node.location.start_char, 5), - source_range_node(node) + smap_keyword_bare( + srange_length(node.start_char, 5), + srange_node(node) ) ) end @@ -2625,19 +2359,17 @@ def visit_unless(node) :if, [predicate, visit(node.consequent), visit(node.statements)], if node.modifier? - source_map_keyword_bare( - source_range_find_between( - node.statements, - node.predicate, - "unless" - ), - source_range_node(node) + smap_keyword_bare( + srange_find_between(node.statements, node.predicate, "unless"), + srange_node(node) ) else - source_map_condition( - keyword: source_range_length(node.location.start_char, 6), - end_token: source_range_length(node.location.end_char, -3), - expression: source_range_node(node) + smap_condition( + srange_length(node.start_char, 6), + srange_search_between(node.predicate, node.statements, "then"), + nil, + srange_length(node.end_char, -3), + srange_node(node) ) end ) @@ -2649,20 +2381,17 @@ def visit_until(node) loop_post?(node) ? :until_post : :until, [visit(node.predicate), visit(node.statements)], if node.modifier? - source_map_keyword_bare( - source_range_find_between( - node.statements, - node.predicate, - "until" - ), - source_range_node(node) + smap_keyword_bare( + srange_find_between(node.statements, node.predicate, "until"), + srange_node(node) ) else - source_map_keyword( - source_range_length(node.location.start_char, 5), - nil, - source_range_length(node.location.end_char, -3), - source_range_node(node) + smap_keyword( + srange_length(node.start_char, 5), + srange_search_between(node.predicate, node.statements, "do") || + srange_search_between(node.predicate, node.statements, ";"), + srange_length(node.end_char, -3), + srange_node(node) ) end ) @@ -2687,27 +2416,16 @@ def visit_var_field(node) s( :match_var, [name], - source_map_variable( - source_range_node(node.value), - source_range_node(node.value) - ) + smap_variable(srange_node(node.value), srange_node(node.value)) ) elsif node.value.is_a?(Const) s( :casgn, [nil, name], - source_map_constant( - nil, - source_range_node(node.value), - source_range_node(node) - ) + smap_constant(nil, srange_node(node.value), srange_node(node)) ) else - location = - source_map_variable( - source_range_node(node), - source_range_node(node) - ) + location = smap_variable(srange_node(node), srange_node(node)) case node.value when CVar @@ -2747,27 +2465,27 @@ def visit_vcall(node) # Visit a When node. def visit_when(node) - keyword = source_range_length(node.location.start_char, 4) + keyword = srange_length(node.start_char, 4) begin_token = - if buffer.source[node.statements.location.start_char] == ";" - source_range_length(node.statements.location.start_char, 1) + if buffer.source[node.statements.start_char] == ";" + srange_length(node.statements.start_char, 1) end end_char = if node.statements.body.empty? - node.statements.location.end_char + node.statements.end_char else - node.statements.body.last.location.end_char + node.statements.body.last.end_char end s( :when, visit_all(node.arguments.parts) + [visit(node.statements)], - source_map_keyword( + smap_keyword( keyword, begin_token, nil, - source_range(keyword.begin_pos, end_char) + srange(keyword.begin_pos, end_char) ) ) end @@ -2778,20 +2496,17 @@ def visit_while(node) loop_post?(node) ? :while_post : :while, [visit(node.predicate), visit(node.statements)], if node.modifier? - source_map_keyword_bare( - source_range_find_between( - node.statements, - node.predicate, - "while" - ), - source_range_node(node) + smap_keyword_bare( + srange_find_between(node.statements, node.predicate, "while"), + srange_node(node) ) else - source_map_keyword( - source_range_length(node.location.start_char, 5), - nil, - source_range_length(node.location.end_char, -3), - source_range_node(node) + smap_keyword( + srange_length(node.start_char, 5), + srange_search_between(node.predicate, node.statements, "do") || + srange_search_between(node.predicate, node.statements, ";"), + srange_length(node.end_char, -3), + srange_node(node) ) end ) @@ -2824,10 +2539,13 @@ def visit_xstring_literal(node) s( :xstr, visit_all(node.parts), - source_map_collection( - begin_token: source_range_length(node.location.start_char, 1), - end_token: source_range_length(node.location.end_char, -1), - expression: source_range_node(node) + smap_collection( + srange_length( + node.start_char, + buffer.source[node.start_char] == "%" ? 3 : 1 + ), + srange_length(node.end_char, -1), + srange_node(node) ) ) end @@ -2838,29 +2556,29 @@ def visit_yield(node) s( :yield, [], - source_map_keyword_bare( - source_range_length(node.location.start_char, 5), - source_range_node(node) + smap_keyword_bare( + srange_length(node.start_char, 5), + srange_node(node) ) ) when Args s( :yield, visit_all(node.arguments.parts), - source_map_keyword_bare( - source_range_length(node.location.start_char, 5), - source_range_node(node) + smap_keyword_bare( + srange_length(node.start_char, 5), + srange_node(node) ) ) else s( :yield, visit_all(node.arguments.contents.parts), - source_map_keyword( - source_range_length(node.location.start_char, 5), - source_range_length(node.arguments.location.start_char, 1), - source_range_length(node.location.end_char, -1), - source_range_node(node) + smap_keyword( + srange_length(node.start_char, 5), + srange_length(node.arguments.start_char, 1), + srange_length(node.end_char, -1), + srange_node(node) ) ) end @@ -2871,9 +2589,9 @@ def visit_zsuper(node) s( :zsuper, [], - source_map_keyword_bare( - source_range_length(node.location.start_char, 5), - source_range_node(node) + smap_keyword_bare( + srange_length(node.start_char, 5), + srange_node(node) ) ) end @@ -2885,7 +2603,7 @@ def block_children(node) if node.block_var visit(node.block_var) else - s(:args, [], source_map_collection(expression: nil)) + s(:args, [], smap_collection_bare(nil)) end type = :block @@ -2923,10 +2641,10 @@ def canonical_unary(node) location: Location.new( start_line: node.location.start_line, - start_char: node.location.start_char, + start_char: node.start_char, start_column: node.location.start_column, end_line: node.location.start_line, - end_char: node.location.start_char + length, + end_char: node.start_char + length, end_column: node.location.start_column + length ) ), @@ -2940,8 +2658,8 @@ def canonical_unary(node) def canonical_binary(node) operator = node.operator.to_s - start_char = node.left.location.end_char - end_char = node.right.location.start_char + start_char = node.left.end_char + end_char = node.right.start_char index = buffer.source[start_char...end_char].index(operator) start_line = @@ -3007,12 +2725,12 @@ def s(type, children, location) end # Constructs a plain source map just for an expression. - def source_map(expression:) + def smap(expression) ::Parser::Source::Map.new(expression) end # Constructs a new source map for a collection. - def source_map_collection(begin_token: nil, end_token: nil, expression:) + def smap_collection(begin_token, end_token, expression) ::Parser::Source::Map::Collection.new( begin_token, end_token, @@ -3020,13 +2738,18 @@ def source_map_collection(begin_token: nil, end_token: nil, expression:) ) end + # Constructs a new source map for a collection without a begin or end. + def smap_collection_bare(expression) + smap_collection(nil, nil, expression) + end + # Constructs a new source map for a conditional expression. - def source_map_condition( - keyword: nil, - begin_token: nil, - else_token: nil, - end_token: nil, - expression: + def smap_condition( + keyword, + begin_token, + else_token, + end_token, + expression ) ::Parser::Source::Map::Condition.new( keyword, @@ -3037,18 +2760,19 @@ def source_map_condition( ) end + # Constructs a new source map for a conditional expression with no begin + # or end. + def smap_condition_bare(expression) + smap_condition(nil, nil, nil, nil, expression) + end + # Constructs a new source map for a constant reference. - def source_map_constant(double_colon, name, expression) + def smap_constant(double_colon, name, expression) ::Parser::Source::Map::Constant.new(double_colon, name, expression) end # Constructs a new source map for a class definition. - def source_map_definition( - keyword: nil, - operator: nil, - name: nil, - end_token: nil - ) + def smap_definition(keyword, operator, name, end_token) ::Parser::Source::Map::Definition.new( keyword, operator, @@ -3058,7 +2782,7 @@ def source_map_definition( end # Constructs a new source map for a for loop. - def source_map_for(keyword, in_token, begin_token, end_token, expression) + def smap_for(keyword, in_token, begin_token, end_token, expression) ::Parser::Source::Map::For.new( keyword, in_token, @@ -3069,7 +2793,7 @@ def source_map_for(keyword, in_token, begin_token, end_token, expression) end # Constructs a new source map for a heredoc. - def source_map_heredoc(expression, heredoc_body, heredoc_end) + def smap_heredoc(expression, heredoc_body, heredoc_end) ::Parser::Source::Map::Heredoc.new( expression, heredoc_body, @@ -3078,12 +2802,12 @@ def source_map_heredoc(expression, heredoc_body, heredoc_end) end # Construct a source map for an index operation. - def source_map_index(begin_token: nil, end_token: nil, expression:) + def smap_index(begin_token, end_token, expression) ::Parser::Source::Map::Index.new(begin_token, end_token, expression) end # Constructs a new source map for the use of a keyword. - def source_map_keyword(keyword, begin_token, end_token, expression) + def smap_keyword(keyword, begin_token, end_token, expression) ::Parser::Source::Map::Keyword.new( keyword, begin_token, @@ -3094,18 +2818,18 @@ def source_map_keyword(keyword, begin_token, end_token, expression) # Constructs a new source map for the use of a keyword without a begin or # end token. - def source_map_keyword_bare(keyword, expression) - source_map_keyword(keyword, nil, nil, expression) + def smap_keyword_bare(keyword, expression) + smap_keyword(keyword, nil, nil, expression) end # Constructs a new source map for a method definition. - def source_map_method_definition( - keyword: nil, - operator: nil, - name: nil, - end_token: nil, - assignment: nil, - expression: + def smap_method_definition( + keyword, + operator, + name, + end_token, + assignment, + expression ) ::Parser::Source::Map::MethodDefinition.new( keyword, @@ -3118,17 +2842,12 @@ def source_map_method_definition( end # Constructs a new source map for an operator. - def source_map_operator(operator, expression) + def smap_operator(operator, expression) ::Parser::Source::Map::Operator.new(operator, expression) end # Constructs a source map for the body of a rescue clause. - def source_map_rescue_body( - keyword: nil, - assoc: nil, - begin_token: nil, - expression: - ) + def smap_rescue_body(keyword, assoc, begin_token, expression) ::Parser::Source::Map::RescueBody.new( keyword, assoc, @@ -3138,13 +2857,7 @@ def source_map_rescue_body( end # Constructs a new source map for a method call. - def source_map_send( - dot: nil, - selector: nil, - begin_token: nil, - end_token: nil, - expression: - ) + def smap_send(dot, selector, begin_token, end_token, expression) ::Parser::Source::Map::Send.new( dot, selector, @@ -3154,74 +2867,76 @@ def source_map_send( ) end + # Constructs a new source map for a method call without a begin or end. + def smap_send_bare(selector, expression) + smap_send(nil, selector, nil, nil, expression) + end + + # Constructs a new source map for a ternary expression. + def smap_ternary(question, colon, expression) + ::Parser::Source::Map::Ternary.new(question, colon, expression) + end + # Constructs a new source map for a variable. - def source_map_variable(name, expression) + def smap_variable(name, expression) ::Parser::Source::Map::Variable.new(name, expression) end # Constructs a new source range from the given start and end offsets. - def source_range(start_char, end_char) + def srange(start_char, end_char) ::Parser::Source::Range.new(buffer, start_char, end_char) end # Constructs a new source range by finding the given needle in the given # range of the source. If the needle is not found, returns nil. - def source_range_search(start_char, end_char, needle) + def srange_search(start_char, end_char, needle) index = buffer.source[start_char...end_char].index(needle) return unless index offset = start_char + index - source_range(offset, offset + needle.length) + srange(offset, offset + needle.length) end # Constructs a new source range by searching for the given needle between # the end location of the start node and the start location of the end # node. If the needle is not found, returns nil. - def source_range_search_between(start_node, end_node, needle) - source_range_search( - start_node.location.end_char, - end_node.location.start_char, - needle - ) + def srange_search_between(start_node, end_node, needle) + srange_search(start_node.end_char, end_node.start_char, needle) end # Constructs a new source range by finding the given needle in the given # range of the source. If it needle is not found, raises an error. - def source_range_find(start_char, end_char, needle) - source_range = source_range_search(start_char, end_char, needle) + def srange_find(start_char, end_char, needle) + srange = srange_search(start_char, end_char, needle) - unless source_range + unless srange slice = buffer.source[start_char...end_char].inspect raise "Could not find #{needle.inspect} in #{slice}" end - source_range + srange end # Constructs a new source range by finding the given needle between the # end location of the start node and the start location of the end node. # If the needle is not found, returns raises an error. - def source_range_find_between(start_node, end_node, needle) - source_range_find( - start_node.location.end_char, - end_node.location.start_char, - needle - ) + def srange_find_between(start_node, end_node, needle) + srange_find(start_node.end_char, end_node.start_char, needle) end # Constructs a new source range from the given start offset and length. - def source_range_length(start_char, length) + def srange_length(start_char, length) if length > 0 - source_range(start_char, start_char + length) + srange(start_char, start_char + length) else - source_range(start_char + length, start_char) + srange(start_char + length, start_char) end end # Constructs a new source range using the given node's location. - def source_range_node(node) + def srange_node(node) location = node.location - source_range(location.start_char, location.end_char) + srange(location.start_char, location.end_char) end end end diff --git a/test/syntax_tree_test.rb b/test/syntax_tree_test.rb index 05242d94..f12065b8 100644 --- a/test/syntax_tree_test.rb +++ b/test/syntax_tree_test.rb @@ -22,7 +22,7 @@ def method # comment SOURCE bodystmt = SyntaxTree.parse(source).statements.body.first.bodystmt - assert_equal(20, bodystmt.location.start_char) + assert_equal(20, bodystmt.start_char) end def test_parse_error diff --git a/test/translation/parser_test.rb b/test/translation/parser_test.rb index 576d4ac1..ad87d8c6 100644 --- a/test/translation/parser_test.rb +++ b/test/translation/parser_test.rb @@ -113,7 +113,7 @@ class ParserTest < Minitest::Test name = prefix[4..] next if all_failures.any? { |pattern| File.fnmatch?(pattern, name) } - define_method(name) { assert_parses(lines.join("\n")) } + define_method(name) { assert_parses("#{lines.join("\n")}\n") } end private From 52f44038ca66a4542d97aff05b85e1e6e84b002a Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Thu, 9 Feb 2023 16:25:29 -0500 Subject: [PATCH 41/58] Add a rubocop ast translator --- lib/syntax_tree/parser.rb | 34 ++-- lib/syntax_tree/translation.rb | 11 ++ lib/syntax_tree/translation/parser.rb | 213 ++++++++++++--------- lib/syntax_tree/translation/rubocop_ast.rb | 21 ++ 4 files changed, 169 insertions(+), 110 deletions(-) create mode 100644 lib/syntax_tree/translation/rubocop_ast.rb diff --git a/lib/syntax_tree/parser.rb b/lib/syntax_tree/parser.rb index cf3982f9..be6265d1 100644 --- a/lib/syntax_tree/parser.rb +++ b/lib/syntax_tree/parser.rb @@ -275,7 +275,7 @@ def find_keyword(name) end def find_keyword_between(name, left, right) - bounds = left.location.end_char...right.location.start_char + bounds = left.end_char...right.start_char index = tokens.rindex do |token| char = token.location.start_char @@ -1807,19 +1807,19 @@ def on_for(index, collection, statements) in_keyword = consume_keyword(:in) ending = consume_keyword(:end) - # Consume the do keyword if it exists so that it doesn't get confused for - # some other block - if (keyword = find_keyword_between(:do, collection, ending)) - tokens.delete(keyword) - end + delimiter = + find_keyword_between(:do, collection, ending) || + find_token_between(Semicolon, collection, ending) + + tokens.delete(delimiter) if delimiter start_char = - find_next_statement_start((keyword || collection).location.end_char) + find_next_statement_start((delimiter || collection).location.end_char) statements.bind( start_char, start_char - - line_counts[(keyword || collection).location.end_line - 1].start, + line_counts[(delimiter || collection).location.end_line - 1].start, ending.location.start_char, ending.location.start_column ) @@ -3328,10 +3328,13 @@ def on_sclass(target, bodystmt) ) end + # Semicolons are tokens that get added to the token list but never get + # attached to the AST. Because of this they only need to track their + # associated location so they can be used for computing bounds. class Semicolon attr_reader :location - def initialize(location:) + def initialize(location) @location = location end end @@ -3340,13 +3343,12 @@ def initialize(location:) # on_semicolon: (String value) -> Semicolon def on_semicolon(value) tokens << Semicolon.new( - location: - Location.token( - line: lineno, - char: char_pos, - column: current_column, - size: value.size - ) + Location.token( + line: lineno, + char: char_pos, + column: current_column, + size: value.size + ) ) end diff --git a/lib/syntax_tree/translation.rb b/lib/syntax_tree/translation.rb index d3f2e56f..6fc96f00 100644 --- a/lib/syntax_tree/translation.rb +++ b/lib/syntax_tree/translation.rb @@ -13,5 +13,16 @@ def self.to_parser(node, buffer) node.accept(Parser.new(buffer)) end + + # This method translates the given node into the representation defined by + # the rubocop/rubocop-ast gem. We don't explicitly list it as a dependency + # because it's not required for the core functionality of Syntax Tree. + def self.to_rubocop_ast(node, buffer) + require "rubocop/ast" + require_relative "translation/parser" + require_relative "translation/rubocop_ast" + + node.accept(RuboCopAST.new(buffer)) + end end end diff --git a/lib/syntax_tree/translation/parser.rb b/lib/syntax_tree/translation/parser.rb index b9e91e5f..70c98336 100644 --- a/lib/syntax_tree/translation/parser.rb +++ b/lib/syntax_tree/translation/parser.rb @@ -5,6 +5,73 @@ module Translation # This visitor is responsible for converting the syntax tree produced by # Syntax Tree into the syntax tree produced by the whitequark/parser gem. class Parser < BasicVisitor + # Heredocs are represented _very_ differently in the parser gem from how + # they are represented in the Syntax Tree AST. This class is responsible + # for handling the translation. + class HeredocBuilder + Line = Struct.new(:value, :segments) + + attr_reader :node, :segments + + def initialize(node) + @node = node + @segments = [] + end + + def <<(segment) + if segment.type == :str && segments.last && + segments.last.type == :str && + !segments.last.children.first.end_with?("\n") + segments.last.children.first << segment.children.first + else + segments << segment + end + end + + def trim! + return unless node.beginning.value[2] == "~" + lines = [Line.new(+"", [])] + + segments.each do |segment| + lines.last.segments << segment + + if segment.type == :str + lines.last.value << segment.children.first + lines << Line.new(+"", []) if lines.last.value.end_with?("\n") + end + end + + lines.pop if lines.last.value.empty? + return if lines.empty? + + segments.clear + lines.each do |line| + remaining = node.dedent + + line.segments.each do |segment| + if segment.type == :str + if remaining > 0 + whitespace = segment.children.first[/^\s{0,#{remaining}}/] + segment.children.first.sub!(/^#{whitespace}/, "") + remaining -= whitespace.length + end + + if node.beginning.value[3] != "'" && segments.any? && + segments.last.type == :str && + segments.last.children.first.end_with?("\\\n") + segments.last.children.first.gsub!(/\\\n\z/, "") + segments.last.children.first.concat(segment.children.first) + elsif !segment.children.first.empty? + segments << segment + end + else + segments << segment + end + end + end + end + end + attr_reader :buffer, :stack def initialize(buffer) @@ -665,6 +732,25 @@ def visit_command_call(node) node.end_char end + expression = + if node.arguments.is_a?(ArgParen) + srange(node.start_char, node.arguments.end_char) + elsif node.arguments.is_a?(Args) && node.arguments.parts.any? + last_part = node.arguments.parts.last + end_char = + if last_part.is_a?(Heredoc) + last_part.beginning.end_char + else + last_part.end_char + end + + srange(node.start_char, end_char) + elsif node.block + srange_node(node.message) + else + srange_node(node) + end + call = s( if node.operator.is_a?(Op) && node.operator.value == "&." @@ -690,14 +776,7 @@ def visit_command_call(node) node.message == :call ? nil : srange_node(node.message), begin_token, end_token, - if node.arguments.is_a?(ArgParen) || - (node.arguments.is_a?(Args) && node.arguments.parts.any?) - srange(node.start_char, node.arguments.end_char) - elsif node.block - srange_node(node.message) - else - srange_node(node) - end + expression ) ) @@ -1049,7 +1128,8 @@ def visit_for(node) smap_for( srange_length(node.start_char, 3), srange_find_between(node.index, node.collection, "in"), - srange_search_between(node.collection, node.statements, "do"), + srange_search_between(node.collection, node.statements, "do") || + srange_search_between(node.collection, node.statements, ";"), srange_length(node.end_char, -3), srange_node(node) ) @@ -1078,98 +1158,43 @@ def visit_hash(node) ) end - # Heredocs are represented _very_ differently in the parser gem from how - # they are represented in the Syntax Tree AST. This class is responsible - # for handling the translation. - class HeredocSegments - HeredocLine = Struct.new(:value, :segments) - - attr_reader :node, :segments - - def initialize(node) - @node = node - @segments = [] - end - - def <<(segment) - if segment.type == :str && segments.last && - segments.last.type == :str && - !segments.last.children.first.end_with?("\n") - segments.last.children.first << segment.children.first - else - segments << segment - end - end - - def trim! - return unless node.beginning.value[2] == "~" - lines = [HeredocLine.new(+"", [])] - - segments.each do |segment| - lines.last.segments << segment - - if segment.type == :str - lines.last.value << segment.children.first - - if lines.last.value.end_with?("\n") - lines << HeredocLine.new(+"", []) - end - end - end - - lines.pop if lines.last.value.empty? - return if lines.empty? - - segments.clear - lines.each do |line| - remaining = node.dedent - - line.segments.each do |segment| - if segment.type == :str - if remaining > 0 - whitespace = segment.children.first[/^\s{0,#{remaining}}/] - segment.children.first.sub!(/^#{whitespace}/, "") - remaining -= whitespace.length - end - - if node.beginning.value[3] != "'" && segments.any? && - segments.last.type == :str && - segments.last.children.first.end_with?("\\\n") - segments.last.children.first.gsub!(/\\\n\z/, "") - segments.last.children.first.concat(segment.children.first) - elsif !segment.children.first.empty? - segments << segment - end - else - segments << segment - end - end - end - end - end - # Visit a Heredoc node. def visit_heredoc(node) - heredoc_segments = HeredocSegments.new(node) + heredoc = HeredocBuilder.new(node) + # For each part of the heredoc, if it's a string content node, split it + # into multiple string content nodes, one for each line. Otherwise, + # visit the node as normal. node.parts.each do |part| if part.is_a?(TStringContent) && part.value.count("\n") > 1 - part - .value - .split("\n") - .each { |line| heredoc_segments << s(:str, ["#{line}\n"], nil) } + index = part.start_char + lines = part.value.split("\n") + + lines.each do |line| + length = line.length + 1 + location = smap_collection_bare(srange_length(index, length)) + + heredoc << s(:str, ["#{line}\n"], location) + index += length + end else - heredoc_segments << visit(part) + heredoc << visit(part) end end - heredoc_segments.trim! + # Now that we have all of the pieces on the heredoc, we can trim it if + # it is a heredoc that supports trimming (i.e., it has a ~ on the + # declaration). + heredoc.trim! + + # Generate the location for the heredoc, which goes from the declaration + # to the ending delimiter. location = smap_heredoc( srange_node(node.beginning), srange( if node.parts.empty? - node.beginning.end_char + node.beginning.end_char + 1 else node.parts.first.start_char end, @@ -1178,15 +1203,15 @@ def visit_heredoc(node) srange(node.ending.start_char, node.ending.end_char - 1) ) + # Finally, decide which kind of heredoc node to generate based on its + # declaration and contents. if node.beginning.value.match?(/`\w+`\z/) - s(:xstr, heredoc_segments.segments, location) - elsif heredoc_segments.segments.length > 1 - s(:dstr, heredoc_segments.segments, location) - elsif heredoc_segments.segments.empty? - s(:dstr, [], location) - else - segment = heredoc_segments.segments.first + s(:xstr, heredoc.segments, location) + elsif heredoc.segments.length == 1 + segment = heredoc.segments.first s(segment.type, segment.children, location) + else + s(:dstr, heredoc.segments, location) end end diff --git a/lib/syntax_tree/translation/rubocop_ast.rb b/lib/syntax_tree/translation/rubocop_ast.rb new file mode 100644 index 00000000..53c6737b --- /dev/null +++ b/lib/syntax_tree/translation/rubocop_ast.rb @@ -0,0 +1,21 @@ +# frozen_string_literal: true + +module SyntaxTree + module Translation + # This visitor is responsible for converting the syntax tree produced by + # Syntax Tree into the syntax tree produced by the rubocop/rubocop-ast gem. + class RuboCopAST < Parser + private + + # This method is effectively the same thing as the parser gem except that + # it uses the rubocop-ast specializations of the nodes. + def s(type, children, location) + ::RuboCop::AST::Builder::NODE_MAP.fetch(type, ::RuboCop::AST::Node).new( + type, + children, + location: location + ) + end + end + end +end From cd882e8f621a37887d8c16540f1491a5591c70fe Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Thu, 9 Feb 2023 17:15:51 -0500 Subject: [PATCH 42/58] Remove the parser from the statements node --- lib/syntax_tree/dsl.rb | 2 +- lib/syntax_tree/node.rb | 18 +++++-------- lib/syntax_tree/parser.rb | 46 +++++++++++++++++++++----------- lib/syntax_tree/yarv/compiler.rb | 7 +---- 4 files changed, 39 insertions(+), 34 deletions(-) diff --git a/lib/syntax_tree/dsl.rb b/lib/syntax_tree/dsl.rb index 1af19644..4506aa04 100644 --- a/lib/syntax_tree/dsl.rb +++ b/lib/syntax_tree/dsl.rb @@ -791,7 +791,7 @@ def SClass(target, bodystmt) # Create a new Statements node. def Statements(body) - Statements.new(nil, body: body, location: Location.default) + Statements.new(body: body, location: Location.default) end # Create a new StringContent node. diff --git a/lib/syntax_tree/node.rb b/lib/syntax_tree/node.rb index 627deab1..0a495890 100644 --- a/lib/syntax_tree/node.rb +++ b/lib/syntax_tree/node.rb @@ -2275,7 +2275,7 @@ def initialize( @comments = [] end - def bind(start_char, start_column, end_char, end_column) + def bind(parser, start_char, start_column, end_char, end_column) @location = Location.new( start_line: location.start_line, @@ -2289,6 +2289,7 @@ def bind(start_char, start_column, end_char, end_column) # Here we're going to determine the bounds for the statements consequent = rescue_clause || else_clause || ensure_clause statements.bind( + parser, start_char, start_column, consequent ? consequent.location.start_char : end_char, @@ -9816,23 +9817,19 @@ def ===(other) # propagate that onto void_stmt nodes inside the stmts in order to make sure # all comments get printed appropriately. class Statements < Node - # [Parser] the parser that is generating this node - attr_reader :parser - # [Array[ Node ]] the list of expressions contained within this node attr_reader :body # [Array[ Comment | EmbDoc ]] the comments attached to this node attr_reader :comments - def initialize(parser, body:, location:) - @parser = parser + def initialize(body:, location:) @body = body @location = location @comments = [] end - def bind(start_char, start_column, end_char, end_column) + def bind(parser, start_char, start_column, end_char, end_column) @location = Location.new( start_line: location.start_line, @@ -9858,7 +9855,7 @@ def bind(start_char, start_column, end_char, end_column) body[0] = VoidStmt.new(location: location) end - attach_comments(start_char, end_char) + attach_comments(parser, start_char, end_char) end def bind_end(end_char, end_column) @@ -9890,7 +9887,6 @@ def child_nodes def copy(body: nil, location: nil) node = Statements.new( - parser, body: body || self.body, location: location || self.location ) @@ -9902,7 +9898,7 @@ def copy(body: nil, location: nil) alias deconstruct child_nodes def deconstruct_keys(_keys) - { parser: parser, body: body, location: location, comments: comments } + { body: body, location: location, comments: comments } end def format(q) @@ -9962,7 +9958,7 @@ def ===(other) # As efficiently as possible, gather up all of the comments that have been # found while this statements list was being parsed and add them into the # body. - def attach_comments(start_char, end_char) + def attach_comments(parser, start_char, end_char) parser_comments = parser.comments comment_index = 0 diff --git a/lib/syntax_tree/parser.rb b/lib/syntax_tree/parser.rb index be6265d1..8059b18c 100644 --- a/lib/syntax_tree/parser.rb +++ b/lib/syntax_tree/parser.rb @@ -374,6 +374,7 @@ def on_BEGIN(statements) start_char = find_next_statement_start(lbrace.location.end_char) statements.bind( + self, start_char, start_char - line_counts[lbrace.location.start_line - 1].start, rbrace.location.start_char, @@ -412,6 +413,7 @@ def on_END(statements) start_char = find_next_statement_start(lbrace.location.end_char) statements.bind( + self, start_char, start_char - line_counts[lbrace.location.start_line - 1].start, rbrace.location.start_char, @@ -849,6 +851,7 @@ def on_begin(bodystmt) end bodystmt.bind( + self, find_next_statement_start(keyword.location.end_char), keyword.location.end_column, end_location.end_char, @@ -960,11 +963,7 @@ def on_bodystmt(statements, rescue_clause, else_clause, ensure_clause) # case we'll wrap it in a Statements node to be consistent. unless statements.is_a?(Statements) statements = - Statements.new( - self, - body: [statements], - location: statements.location - ) + Statements.new(body: [statements], location: statements.location) end parts = [statements, rescue_clause, else_clause, ensure_clause].compact @@ -991,6 +990,7 @@ def on_brace_block(block_var, statements) start_char = find_next_statement_start(location.end_char) statements.bind( + self, start_char, start_char - line_counts[location.start_line - 1].start, rbrace.location.start_char, @@ -1098,6 +1098,7 @@ def on_class(constant, superclass, bodystmt) start_char = find_next_statement_start(location.end_char) bodystmt.bind( + self, start_char, start_char - line_counts[location.start_line - 1].start, ending.location.start_char, @@ -1307,6 +1308,7 @@ def on_def(name, params, bodystmt) start_char = find_next_statement_start(params.location.end_char) bodystmt.bind( + self, start_char, start_char - line_counts[params.location.start_line - 1].start, ending.location.start_char, @@ -1395,6 +1397,7 @@ def on_defs(target, operator, name, params, bodystmt) start_char = find_next_statement_start(params.location.end_char) bodystmt.bind( + self, start_char, start_char - line_counts[params.location.start_line - 1].start, ending.location.start_char, @@ -1434,6 +1437,7 @@ def on_do_block(block_var, bodystmt) start_char = find_next_statement_start(location.end_char) bodystmt.bind( + self, start_char, start_char - line_counts[location.start_line - 1].start, ending.location.start_char, @@ -1529,6 +1533,7 @@ def on_else(statements) start_char = find_next_statement_start(keyword.location.end_char) statements.bind( + self, start_char, start_char - line_counts[keyword.location.start_line - 1].start, ending.location.start_char, @@ -1554,6 +1559,7 @@ def on_elsif(predicate, statements, consequent) start_char = find_next_statement_start(predicate.location.end_char) statements.bind( + self, start_char, start_char - line_counts[predicate.location.start_line - 1].start, ending.location.start_char, @@ -1677,6 +1683,7 @@ def on_ensure(statements) ending = find_keyword(:end) start_char = find_next_statement_start(keyword.location.end_char) statements.bind( + self, start_char, start_char - line_counts[keyword.location.start_line - 1].start, ending.location.start_char, @@ -1817,6 +1824,7 @@ def on_for(index, collection, statements) find_next_statement_start((delimiter || collection).location.end_char) statements.bind( + self, start_char, start_char - line_counts[(delimiter || collection).location.end_line - 1].start, @@ -2036,6 +2044,7 @@ def on_if(predicate, statements, consequent) start_char = find_next_statement_start((keyword || predicate).location.end_char) statements.bind( + self, start_char, start_char - line_counts[predicate.location.end_line - 1].start, ending.location.start_char, @@ -2069,7 +2078,7 @@ def on_if_mod(predicate, statement) IfNode.new( predicate: predicate, statements: - Statements.new(self, body: [statement], location: statement.location), + Statements.new(body: [statement], location: statement.location), consequent: nil, location: statement.location.to(predicate.location) ) @@ -2121,6 +2130,7 @@ def on_in(pattern, statements, consequent) start_char = find_next_statement_start((token || statements_start).location.end_char) statements.bind( + self, start_char, start_char - line_counts[statements_start.location.start_line - 1].start, @@ -2303,6 +2313,7 @@ def on_lambda(params, statements) start_char = find_next_statement_start(opening.location.end_char) statements.bind( + self, start_char, start_char - line_counts[opening.location.end_line - 1].start, closing.location.start_char, @@ -2587,6 +2598,7 @@ def on_module(constant, bodystmt) start_char = find_next_statement_start(constant.location.end_char) bodystmt.bind( + self, start_char, start_char - line_counts[constant.location.start_line - 1].start, ending.location.start_char, @@ -2863,7 +2875,7 @@ def on_program(statements) ) statements.body << @__end__ if @__end__ - statements.bind(0, 0, source.length, last_column) + statements.bind(self, 0, 0, source.length, last_column) program = Program.new(statements: statements, location: location) attach_comments(program, @comments) @@ -3197,6 +3209,7 @@ def on_rescue(exceptions, variable, statements, consequent) last_node = variable || exceptions || keyword start_char = find_next_statement_start(last_node.end_char) statements.bind( + self, start_char, start_char - line_counts[last_node.location.start_line - 1].start, char_pos, @@ -3315,6 +3328,7 @@ def on_sclass(target, bodystmt) start_char = find_next_statement_start(target.location.end_char) bodystmt.bind( + self, start_char, start_char - line_counts[target.location.start_line - 1].start, ending.location.start_char, @@ -3368,18 +3382,13 @@ def on_stmts_add(statements, statement) statements.location.to(statement.location) end - Statements.new( - self, - body: statements.body << statement, - location: location - ) + Statements.new(body: statements.body << statement, location: location) end # :call-seq: # on_stmts_new: () -> Statements def on_stmts_new Statements.new( - self, body: [], location: Location.fixed(line: lineno, char: char_pos, column: current_column) @@ -3444,6 +3453,7 @@ def on_string_embexpr(statements) embexpr_end = consume_token(EmbExprEnd) statements.bind( + self, embexpr_beg.location.end_char, embexpr_beg.location.end_column, embexpr_end.location.start_char, @@ -3794,6 +3804,7 @@ def on_unless(predicate, statements, consequent) start_char = find_next_statement_start((keyword || predicate).location.end_char) statements.bind( + self, start_char, start_char - line_counts[predicate.location.end_line - 1].start, ending.location.start_char, @@ -3816,7 +3827,7 @@ def on_unless_mod(predicate, statement) UnlessNode.new( predicate: predicate, statements: - Statements.new(self, body: [statement], location: statement.location), + Statements.new(body: [statement], location: statement.location), consequent: nil, location: statement.location.to(predicate.location) ) @@ -3839,6 +3850,7 @@ def on_until(predicate, statements) find_next_statement_start((delimiter || predicate).location.end_char) statements.bind( + self, start_char, start_char - line_counts[predicate.location.end_line - 1].start, ending.location.start_char, @@ -3860,7 +3872,7 @@ def on_until_mod(predicate, statement) UntilNode.new( predicate: predicate, statements: - Statements.new(self, body: [statement], location: statement.location), + Statements.new(body: [statement], location: statement.location), location: statement.location.to(predicate.location) ) end @@ -3935,6 +3947,7 @@ def on_when(arguments, statements, consequent) find_next_statement_start((token || statements_start).location.end_char) statements.bind( + self, start_char, start_char - line_counts[statements_start.location.start_line - 1].start, @@ -3967,6 +3980,7 @@ def on_while(predicate, statements) find_next_statement_start((delimiter || predicate).location.end_char) statements.bind( + self, start_char, start_char - line_counts[predicate.location.end_line - 1].start, ending.location.start_char, @@ -3988,7 +4002,7 @@ def on_while_mod(predicate, statement) WhileNode.new( predicate: predicate, statements: - Statements.new(self, body: [statement], location: statement.location), + Statements.new(body: [statement], location: statement.location), location: statement.location.to(predicate.location) ) end diff --git a/lib/syntax_tree/yarv/compiler.rb b/lib/syntax_tree/yarv/compiler.rb index 3aff3fe5..e1a8544a 100644 --- a/lib/syntax_tree/yarv/compiler.rb +++ b/lib/syntax_tree/yarv/compiler.rb @@ -1051,17 +1051,12 @@ def visit_if_op(node) IfNode.new( predicate: node.predicate, statements: - Statements.new( - nil, - body: [node.truthy], - location: Location.default - ), + Statements.new(body: [node.truthy], location: Location.default), consequent: Else.new( keyword: Kw.new(value: "else", location: Location.default), statements: Statements.new( - nil, body: [node.falsy], location: Location.default ), From 05401daab1fc49fc7a940c293e45b858851c9176 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Thu, 9 Feb 2023 17:26:36 -0500 Subject: [PATCH 43/58] Test that the syntax tree is marshalable --- test/syntax_tree_test.rb | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/test/syntax_tree_test.rb b/test/syntax_tree_test.rb index f12065b8..27aa6851 100644 --- a/test/syntax_tree_test.rb +++ b/test/syntax_tree_test.rb @@ -29,6 +29,11 @@ def test_parse_error assert_raises(Parser::ParseError) { SyntaxTree.parse("<>") } end + def test_marshalable + node = SyntaxTree.parse("1 + 2") + assert_operator(node, :===, Marshal.load(Marshal.dump(node))) + end + def test_maxwidth_format assert_equal("foo +\n bar\n", SyntaxTree.format("foo + bar", 5)) end From 7f4fe77b58e930106d391e4e91f055e7e0bf0e74 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Fri, 10 Feb 2023 10:11:40 -0500 Subject: [PATCH 44/58] Move mermaid rendering into its own file --- lib/syntax_tree.rb | 60 ++++++++------- lib/syntax_tree/mermaid.rb | 85 ++++++++++++++++++++++ lib/syntax_tree/visitor/mermaid_visitor.rb | 37 ++++------ 3 files changed, 130 insertions(+), 52 deletions(-) create mode 100644 lib/syntax_tree/mermaid.rb diff --git a/lib/syntax_tree.rb b/lib/syntax_tree.rb index e5bc5ab5..edf7688e 100644 --- a/lib/syntax_tree.rb +++ b/lib/syntax_tree.rb @@ -1,6 +1,5 @@ # frozen_string_literal: true -require "cgi" require "etc" require "json" require "pp" @@ -71,19 +70,6 @@ module SyntaxTree # that Syntax Tree can format arbitrary parts of a document. DEFAULT_INDENTATION = 0 - # This is a hook provided so that plugins can register themselves as the - # handler for a particular file type. - def self.register_handler(extension, handler) - HANDLERS[extension] = handler - end - - # Parses the given source and returns the syntax tree. - def self.parse(source) - parser = Parser.new(source) - response = parser.parse - response unless parser.error? - end - # Parses the given source and returns the formatted source. def self.format( source, @@ -98,6 +84,20 @@ def self.format( formatter.output.join end + # Indexes the given source code to return a list of all class, module, and + # method definitions. Used to quickly provide indexing capability for IDEs or + # documentation generation. + def self.index(source) + Index.index(source) + end + + # Indexes the given file to return a list of all class, module, and method + # definitions. Used to quickly provide indexing capability for IDEs or + # documentation generation. + def self.index_file(filepath) + Index.index_file(filepath) + end + # A convenience method for creating a new mutation visitor. def self.mutation visitor = Visitor::MutationVisitor.new @@ -105,6 +105,18 @@ def self.mutation visitor end + # Parses the given source and returns the syntax tree. + def self.parse(source) + parser = Parser.new(source) + response = parser.parse + response unless parser.error? + end + + # Parses the given file and returns the syntax tree. + def self.parse_file(filepath) + parse(read(filepath)) + end + # Returns the source from the given filepath taking into account any potential # magic encoding comments. def self.read(filepath) @@ -120,23 +132,15 @@ def self.read(filepath) File.read(filepath, encoding: encoding) end + # This is a hook provided so that plugins can register themselves as the + # handler for a particular file type. + def self.register_handler(extension, handler) + HANDLERS[extension] = handler + end + # Searches through the given source using the given pattern and yields each # node in the tree that matches the pattern to the given block. def self.search(source, query, &block) Search.new(Pattern.new(query).compile).scan(parse(source), &block) end - - # Indexes the given source code to return a list of all class, module, and - # method definitions. Used to quickly provide indexing capability for IDEs or - # documentation generation. - def self.index(source) - Index.index(source) - end - - # Indexes the given file to return a list of all class, module, and method - # definitions. Used to quickly provide indexing capability for IDEs or - # documentation generation. - def self.index_file(filepath) - Index.index_file(filepath) - end end diff --git a/lib/syntax_tree/mermaid.rb b/lib/syntax_tree/mermaid.rb new file mode 100644 index 00000000..fa923876 --- /dev/null +++ b/lib/syntax_tree/mermaid.rb @@ -0,0 +1,85 @@ +# frozen_string_literal: true + +require "cgi" + +module SyntaxTree + # This module is responsible for rendering mermaid flow charts. + module Mermaid + class Node + SHAPES = %i[circle rectangle stadium].freeze + + attr_reader :id, :label, :shape + + def initialize(id, label, shape) + raise unless SHAPES.include?(shape) + + @id = id + @label = label + @shape = shape + end + + def render + left_bound, right_bound = + case shape + when :circle + ["((", "))"] + when :rectangle + ["[", "]"] + when :stadium + ["([", "])"] + end + + " #{id}#{left_bound}\"#{CGI.escapeHTML(label)}\"#{right_bound}" + end + end + + class Edge + TYPES = %i[directed].freeze + + attr_reader :from, :to, :label, :type + + def initialize(from, to, label, type) + raise unless TYPES.include?(type) + + @from = from + @to = to + @label = label + @type = type + end + + def render + case type + when :directed + " #{from.id} -- \"#{CGI.escapeHTML(label)}\" --> #{to.id}" + end + end + end + + class FlowChart + attr_reader :nodes, :edges + + def initialize + @nodes = {} + @edges = [] + end + + def edge(from, to, label, type = :directed) + edges << Edge.new(from, to, label, type) + end + + def node(id, label, shape = :rectangle) + nodes[id] = Node.new(id, label, shape) + end + + def render + output = StringIO.new + output.puts("flowchart TD") + + nodes.each_value { |node| output.puts(node.render) } + edges.each { |edge| output.puts(edge.render) } + + output.string + end + end + end +end diff --git a/lib/syntax_tree/visitor/mermaid_visitor.rb b/lib/syntax_tree/visitor/mermaid_visitor.rb index 2b06049a..e63ee2a6 100644 --- a/lib/syntax_tree/visitor/mermaid_visitor.rb +++ b/lib/syntax_tree/visitor/mermaid_visitor.rb @@ -4,18 +4,16 @@ module SyntaxTree class Visitor # This visitor transforms the AST into a mermaid flow chart. class MermaidVisitor < FieldVisitor - attr_reader :output, :target + attr_reader :flowchart, :target def initialize - @output = StringIO.new - @output.puts("flowchart TD") - + @flowchart = Mermaid::FlowChart.new @target = nil end def visit_program(node) super - output.string + flowchart.render end private @@ -26,19 +24,13 @@ def comments(node) def field(name, value) case value - when Node - node_id = visit(value) - output.puts(" #{target} -- \"#{name}\" --> #{node_id}") - when String - node_id = "#{target}_#{name}" - output.puts(" #{node_id}([#{CGI.escapeHTML(value.inspect)}])") - output.puts(" #{target} -- \"#{name}\" --> #{node_id}") when nil # skip + when Node + flowchart.edge(target, visit(value), name) else - node_id = "#{target}_#{name}" - output.puts(" #{node_id}([\"#{CGI.escapeHTML(value.inspect)}\"])") - output.puts(" #{target} -- \"#{name}\" --> #{node_id}") + to = flowchart.node("#{target.id}_#{name}", value.inspect, :stadium) + flowchart.edge(target, to, name) end end @@ -52,11 +44,8 @@ def node(node, type) previous_target = target begin - @target = "node_#{node.object_id}" - + @target = flowchart.node("node_#{node.object_id}", type) yield - - output.puts(" #{@target}[\"#{type}\"]") @target ensure @target = previous_target @@ -65,11 +54,11 @@ def node(node, type) def pairs(name, values) values.each_with_index do |(key, value), index| - node_id = "#{target}_#{name}_#{index}" - output.puts(" #{node_id}((\" \"))") - output.puts(" #{target} -- \"#{name}[#{index}]\" --> #{node_id}") - output.puts(" #{node_id} -- \"[0]\" --> #{visit(key)}") - output.puts(" #{node_id} -- \"[1]\" --> #{visit(value)}") if value + to = flowchart.node("#{target.id}_#{name}_#{index}", " ", :circle) + + flowchart.edge(target, to, "#{name}[#{index}]") + flowchart.edge(to, visit(key), "[0]") + flowchart.edge(to, visit(value), "[1]") if value end end From 103236bb822f7cb7a449a559321e82f0bef75e4c Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Fri, 10 Feb 2023 10:26:34 -0500 Subject: [PATCH 45/58] Render CFG using new mermaid code --- lib/syntax_tree.rb | 1 + lib/syntax_tree/mermaid.rb | 75 +++++++++++++++------- lib/syntax_tree/visitor/mermaid_visitor.rb | 4 +- lib/syntax_tree/yarv/control_flow_graph.rb | 37 +++++------ 4 files changed, 74 insertions(+), 43 deletions(-) diff --git a/lib/syntax_tree.rb b/lib/syntax_tree.rb index edf7688e..9cbd49c7 100644 --- a/lib/syntax_tree.rb +++ b/lib/syntax_tree.rb @@ -23,6 +23,7 @@ require_relative "syntax_tree/visitor/environment" require_relative "syntax_tree/visitor/with_environment" +require_relative "syntax_tree/mermaid" require_relative "syntax_tree/parser" require_relative "syntax_tree/pattern" require_relative "syntax_tree/search" diff --git a/lib/syntax_tree/mermaid.rb b/lib/syntax_tree/mermaid.rb index fa923876..f5c85f2f 100644 --- a/lib/syntax_tree/mermaid.rb +++ b/lib/syntax_tree/mermaid.rb @@ -6,7 +6,7 @@ module SyntaxTree # This module is responsible for rendering mermaid flow charts. module Mermaid class Node - SHAPES = %i[circle rectangle stadium].freeze + SHAPES = %i[circle rectangle rounded stadium].freeze attr_reader :id, :label, :shape @@ -19,17 +19,23 @@ def initialize(id, label, shape) end def render - left_bound, right_bound = - case shape - when :circle - ["((", "))"] - when :rectangle - ["[", "]"] - when :stadium - ["([", "])"] - end + left_bound, right_bound = bounds + "#{id}#{left_bound}\"#{CGI.escapeHTML(label)}\"#{right_bound}" + end - " #{id}#{left_bound}\"#{CGI.escapeHTML(label)}\"#{right_bound}" + private + + def bounds + case shape + when :circle + ["((", "))"] + when :rectangle + ["[", "]"] + when :rounded + ["(", ")"] + when :stadium + ["([", "])"] + end end end @@ -50,34 +56,57 @@ def initialize(from, to, label, type) def render case type when :directed - " #{from.id} -- \"#{CGI.escapeHTML(label)}\" --> #{to.id}" + if label + "#{from.id} -- \"#{CGI.escapeHTML(label)}\" --> #{to.id}" + else + "#{from.id} --> #{to.id}" + end end end end class FlowChart - attr_reader :nodes, :edges + attr_reader :output, :prefix, :nodes def initialize + @output = StringIO.new + @output.puts("flowchart TD") + @prefix = " " @nodes = {} - @edges = [] end - def edge(from, to, label, type = :directed) - edges << Edge.new(from, to, label, type) + def edge(from, to, label = nil, type: :directed) + edge = Edge.new(from, to, label, type) + output.puts("#{prefix}#{edge.render}") end - def node(id, label, shape = :rectangle) - nodes[id] = Node.new(id, label, shape) + def fetch(id) + nodes.fetch(id) end - def render - output = StringIO.new - output.puts("flowchart TD") + def node(id, label, shape: :rectangle) + node = Node.new(id, label, shape) + nodes[id] = node + + output.puts("#{prefix}#{nodes[id].render}") + node + end + + def subgraph(id) + output.puts("#{prefix}subgraph #{id}") + + previous = prefix + @prefix = "#{prefix} " - nodes.each_value { |node| output.puts(node.render) } - edges.each { |edge| output.puts(edge.render) } + begin + yield + ensure + @prefix = previous + output.puts("#{prefix}end") + end + end + def render output.string end end diff --git a/lib/syntax_tree/visitor/mermaid_visitor.rb b/lib/syntax_tree/visitor/mermaid_visitor.rb index e63ee2a6..1694952d 100644 --- a/lib/syntax_tree/visitor/mermaid_visitor.rb +++ b/lib/syntax_tree/visitor/mermaid_visitor.rb @@ -29,7 +29,7 @@ def field(name, value) when Node flowchart.edge(target, visit(value), name) else - to = flowchart.node("#{target.id}_#{name}", value.inspect, :stadium) + to = flowchart.node("#{target.id}_#{name}", value.inspect, shape: :stadium) flowchart.edge(target, to, name) end end @@ -54,7 +54,7 @@ def node(node, type) def pairs(name, values) values.each_with_index do |(key, value), index| - to = flowchart.node("#{target.id}_#{name}_#{index}", " ", :circle) + to = flowchart.node("#{target.id}_#{name}_#{index}", " ", shape: :circle) flowchart.edge(target, to, "#{name}[#{index}]") flowchart.edge(to, visit(key), "[0]") diff --git a/lib/syntax_tree/yarv/control_flow_graph.rb b/lib/syntax_tree/yarv/control_flow_graph.rb index 73d30208..927f535a 100644 --- a/lib/syntax_tree/yarv/control_flow_graph.rb +++ b/lib/syntax_tree/yarv/control_flow_graph.rb @@ -208,25 +208,24 @@ def to_son end def to_mermaid - output = StringIO.new - output.puts("flowchart TD") + flowchart = Mermaid::FlowChart.new + disasm = Disassembler::Mermaid.new - fmt = Disassembler::Mermaid.new blocks.each do |block| - output.puts(" subgraph #{block.id}") - previous = nil - - block.each_with_length do |insn, length| - node_id = "node_#{length}" - label = "%04d %s" % [length, insn.disasm(fmt)] - - output.puts(" #{node_id}(\"#{CGI.escapeHTML(label)}\")") - output.puts(" #{previous} --> #{node_id}") if previous - - previous = node_id + flowchart.subgraph(block.id) do + previous = nil + + block.each_with_length do |insn, length| + node = + flowchart.node( + "node_#{length}", + "%04d %s" % [length, insn.disasm(disasm)] + ) + + flowchart.edge(previous, node) if previous + previous = node + end end - - output.puts(" end") end blocks.each do |block| @@ -235,11 +234,13 @@ def to_mermaid block.block_start + block.insns.sum(&:length) - block.insns.last.length - output.puts(" node_#{offset} --> node_#{outgoing.block_start}") + from = flowchart.fetch("node_#{offset}") + to = flowchart.fetch("node_#{outgoing.block_start}") + flowchart.edge(from, to) end end - output.string + flowchart.render end # This method is used to verify that the control flow graph is well From 6dbe713baf4dd6fd87183d77dfc38340d7bbbf6f Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Fri, 10 Feb 2023 10:29:21 -0500 Subject: [PATCH 46/58] Fix up data flow mermaid rendering --- lib/syntax_tree/yarv/data_flow_graph.rb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/syntax_tree/yarv/data_flow_graph.rb b/lib/syntax_tree/yarv/data_flow_graph.rb index ace40296..185eeee5 100644 --- a/lib/syntax_tree/yarv/data_flow_graph.rb +++ b/lib/syntax_tree/yarv/data_flow_graph.rb @@ -155,8 +155,8 @@ def to_mermaid end insn_flows[length].in.each do |input| - if input.is_a?(Integer) - output.puts(" node_#{input} --> #{node_id}") + if input.is_a?(LocalArgument) + output.puts(" node_#{input.length} --> #{node_id}") links << "green" end end From 72619fb4469786b62a3e97d63c30d62c404f31b3 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Fri, 10 Feb 2023 10:38:17 -0500 Subject: [PATCH 47/58] Render DFG with new mermaid renderer --- lib/syntax_tree/mermaid.rb | 88 +++++++++++++--------- lib/syntax_tree/visitor/mermaid_visitor.rb | 10 +-- lib/syntax_tree/yarv/control_flow_graph.rb | 4 +- lib/syntax_tree/yarv/data_flow_graph.rb | 57 ++++++-------- 4 files changed, 84 insertions(+), 75 deletions(-) diff --git a/lib/syntax_tree/mermaid.rb b/lib/syntax_tree/mermaid.rb index f5c85f2f..28cc095a 100644 --- a/lib/syntax_tree/mermaid.rb +++ b/lib/syntax_tree/mermaid.rb @@ -5,6 +5,39 @@ module SyntaxTree # This module is responsible for rendering mermaid flow charts. module Mermaid + def self.escape(label) + "\"#{CGI.escapeHTML(label)}\"" + end + + class Link + TYPES = %i[directed].freeze + COLORS = %i[green red].freeze + + attr_reader :from, :to, :label, :type, :color + + def initialize(from, to, label, type, color) + raise if !TYPES.include?(type) + raise if color && !COLORS.include?(color) + + @from = from + @to = to + @label = label + @type = type + @color = color + end + + def render + case type + when :directed + if label + "#{from.id} -- #{Mermaid.escape(label)} --> #{to.id}" + else + "#{from.id} --> #{to.id}" + end + end + end + end + class Node SHAPES = %i[circle rectangle rounded stadium].freeze @@ -20,7 +53,7 @@ def initialize(id, label, shape) def render left_bound, right_bound = bounds - "#{id}#{left_bound}\"#{CGI.escapeHTML(label)}\"#{right_bound}" + "#{id}#{left_bound}#{Mermaid.escape(label)}#{right_bound}" end private @@ -39,51 +72,30 @@ def bounds end end - class Edge - TYPES = %i[directed].freeze - - attr_reader :from, :to, :label, :type - - def initialize(from, to, label, type) - raise unless TYPES.include?(type) - - @from = from - @to = to - @label = label - @type = type - end - - def render - case type - when :directed - if label - "#{from.id} -- \"#{CGI.escapeHTML(label)}\" --> #{to.id}" - else - "#{from.id} --> #{to.id}" - end - end - end - end - class FlowChart - attr_reader :output, :prefix, :nodes + attr_reader :output, :prefix, :nodes, :links def initialize @output = StringIO.new @output.puts("flowchart TD") @prefix = " " - @nodes = {} - end - def edge(from, to, label = nil, type: :directed) - edge = Edge.new(from, to, label, type) - output.puts("#{prefix}#{edge.render}") + @nodes = {} + @links = [] end def fetch(id) nodes.fetch(id) end + def link(from, to, label = nil, type: :directed, color: nil) + link = Link.new(from, to, label, type, color) + links << link + + output.puts("#{prefix}#{link.render}") + link + end + def node(id, label, shape: :rectangle) node = Node.new(id, label, shape) nodes[id] = node @@ -92,8 +104,8 @@ def node(id, label, shape: :rectangle) node end - def subgraph(id) - output.puts("#{prefix}subgraph #{id}") + def subgraph(label) + output.puts("#{prefix}subgraph #{Mermaid.escape(label)}") previous = prefix @prefix = "#{prefix} " @@ -107,6 +119,12 @@ def subgraph(id) end def render + links.each_with_index do |link, index| + if link.color + output.puts("#{prefix}linkStyle #{index} stroke:#{link.color}") + end + end + output.string end end diff --git a/lib/syntax_tree/visitor/mermaid_visitor.rb b/lib/syntax_tree/visitor/mermaid_visitor.rb index 1694952d..542fe192 100644 --- a/lib/syntax_tree/visitor/mermaid_visitor.rb +++ b/lib/syntax_tree/visitor/mermaid_visitor.rb @@ -27,10 +27,10 @@ def field(name, value) when nil # skip when Node - flowchart.edge(target, visit(value), name) + flowchart.link(target, visit(value), name) else to = flowchart.node("#{target.id}_#{name}", value.inspect, shape: :stadium) - flowchart.edge(target, to, name) + flowchart.link(target, to, name) end end @@ -56,9 +56,9 @@ def pairs(name, values) values.each_with_index do |(key, value), index| to = flowchart.node("#{target.id}_#{name}_#{index}", " ", shape: :circle) - flowchart.edge(target, to, "#{name}[#{index}]") - flowchart.edge(to, visit(key), "[0]") - flowchart.edge(to, visit(value), "[1]") if value + flowchart.link(target, to, "#{name}[#{index}]") + flowchart.link(to, visit(key), "[0]") + flowchart.link(to, visit(value), "[1]") if value end end diff --git a/lib/syntax_tree/yarv/control_flow_graph.rb b/lib/syntax_tree/yarv/control_flow_graph.rb index 927f535a..5da2cc14 100644 --- a/lib/syntax_tree/yarv/control_flow_graph.rb +++ b/lib/syntax_tree/yarv/control_flow_graph.rb @@ -222,7 +222,7 @@ def to_mermaid "%04d %s" % [length, insn.disasm(disasm)] ) - flowchart.edge(previous, node) if previous + flowchart.link(previous, node) if previous previous = node end end @@ -236,7 +236,7 @@ def to_mermaid from = flowchart.fetch("node_#{offset}") to = flowchart.fetch("node_#{outgoing.block_start}") - flowchart.edge(from, to) + flowchart.link(from, to) end end diff --git a/lib/syntax_tree/yarv/data_flow_graph.rb b/lib/syntax_tree/yarv/data_flow_graph.rb index 185eeee5..4adf2bcf 100644 --- a/lib/syntax_tree/yarv/data_flow_graph.rb +++ b/lib/syntax_tree/yarv/data_flow_graph.rb @@ -125,11 +125,8 @@ def to_son end def to_mermaid - output = StringIO.new - output.puts("flowchart TD") - - fmt = Disassembler::Mermaid.new - links = [] + flowchart = Mermaid::FlowChart.new + disasm = Disassembler::Mermaid.new blocks.each do |block| block_flow = block_flows.fetch(block.id) @@ -140,31 +137,28 @@ def to_mermaid block.id end - output.puts(" subgraph \"#{CGI.escapeHTML(graph_name)}\"") - previous = nil - - block.each_with_length do |insn, length| - node_id = "node_#{length}" - label = "%04d %s" % [length, insn.disasm(fmt)] - - output.puts(" #{node_id}(\"#{CGI.escapeHTML(label)}\")") + flowchart.subgraph(graph_name) do + previous = nil - if previous - output.puts(" #{previous} --> #{node_id}") - links << "red" - end - - insn_flows[length].in.each do |input| - if input.is_a?(LocalArgument) - output.puts(" node_#{input.length} --> #{node_id}") - links << "green" + block.each_with_length do |insn, length| + node = + flowchart.node( + "node_#{length}", + "%04d %s" % [length, insn.disasm(disasm)], + shape: :rounded + ) + + flowchart.link(previous, node, color: :red) if previous + insn_flows[length].in.each do |input| + if input.is_a?(LocalArgument) + from = flowchart.fetch("node_#{input.length}") + flowchart.link(from, node, color: :green) + end end - end - previous = node_id + previous = node + end end - - output.puts(" end") end blocks.each do |block| @@ -173,16 +167,13 @@ def to_mermaid block.block_start + block.insns.sum(&:length) - block.insns.last.length - output.puts(" node_#{offset} --> node_#{outgoing.block_start}") - links << "red" + from = flowchart.fetch("node_#{offset}") + to = flowchart.fetch("node_#{outgoing.block_start}") + flowchart.link(from, to, color: :red) end end - links.each_with_index do |color, index| - output.puts(" linkStyle #{index} stroke:#{color}") - end - - output.string + flowchart.render end # Verify that we constructed the data flow graph correctly. From a8fd78b0c6e4070fdf92d17bb4de834946e154df Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Fri, 10 Feb 2023 10:47:00 -0500 Subject: [PATCH 48/58] Render sea of nodes to mermaid using new API --- .rubocop.yml | 3 + lib/syntax_tree/mermaid.rb | 170 +++++++++++++-------- lib/syntax_tree/visitor/mermaid_visitor.rb | 11 +- lib/syntax_tree/yarv/control_flow_graph.rb | 55 ++++--- lib/syntax_tree/yarv/data_flow_graph.rb | 79 +++++----- lib/syntax_tree/yarv/disassembler.rb | 6 +- lib/syntax_tree/yarv/sea_of_nodes.rb | 67 +++----- test/yarv_test.rb | 100 ++++++------ 8 files changed, 261 insertions(+), 230 deletions(-) diff --git a/.rubocop.yml b/.rubocop.yml index 33636c44..21beca1b 100644 --- a/.rubocop.yml +++ b/.rubocop.yml @@ -117,6 +117,9 @@ Style/FormatStringToken: Style/GuardClause: Enabled: false +Style/HashLikeCase: + Enabled: false + Style/IdenticalConditionalBranches: Enabled: false diff --git a/lib/syntax_tree/mermaid.rb b/lib/syntax_tree/mermaid.rb index 28cc095a..70cbc054 100644 --- a/lib/syntax_tree/mermaid.rb +++ b/lib/syntax_tree/mermaid.rb @@ -3,20 +3,85 @@ require "cgi" module SyntaxTree - # This module is responsible for rendering mermaid flow charts. + # This module is responsible for rendering mermaid (https://mermaid.js.org/) + # flow charts. module Mermaid - def self.escape(label) - "\"#{CGI.escapeHTML(label)}\"" + # This is the main class that handles rendering a flowchart. It keeps track + # of its nodes and links and renders them according to the mermaid syntax. + class FlowChart + attr_reader :output, :prefix, :nodes, :links + + def initialize + @output = StringIO.new + @output.puts("flowchart TD") + @prefix = " " + + @nodes = {} + @links = [] + end + + # Retrieve a node that has already been added to the flowchart by its id. + def fetch(id) + nodes.fetch(id) + end + + # Add a link to the flowchart between two nodes with an optional label. + def link(from, to, label = nil, type: :directed, color: nil) + link = Link.new(from, to, label, type, color) + links << link + + output.puts("#{prefix}#{link.render}") + link + end + + # Add a node to the flowchart with an optional label. + def node(id, label = " ", shape: :rectangle) + node = Node.new(id, label, shape) + nodes[id] = node + + output.puts("#{prefix}#{nodes[id].render}") + node + end + + # Add a subgraph to the flowchart. Within the given block, all of the + # nodes will be rendered within the subgraph. + def subgraph(label) + output.puts("#{prefix}subgraph #{Mermaid.escape(label)}") + + previous = prefix + @prefix = "#{prefix} " + + begin + yield + ensure + @prefix = previous + output.puts("#{prefix}end") + end + end + + # Return the rendered flowchart. + def render + links.each_with_index do |link, index| + if link.color + output.puts("#{prefix}linkStyle #{index} stroke:#{link.color}") + end + end + + output.string + end end + # This class represents a link between two nodes in a flowchart. It is not + # meant to be interacted with directly, but rather used as a data structure + # by the FlowChart class. class Link - TYPES = %i[directed].freeze + TYPES = %i[directed dotted].freeze COLORS = %i[green red].freeze attr_reader :from, :to, :label, :type, :color def initialize(from, to, label, type, color) - raise if !TYPES.include?(type) + raise unless TYPES.include?(type) raise if color && !COLORS.include?(color) @from = from @@ -27,17 +92,31 @@ def initialize(from, to, label, type, color) end def render + left_side, right_side, full_side = sides + + if label + escaped = Mermaid.escape(label) + "#{from.id} #{left_side} #{escaped} #{right_side} #{to.id}" + else + "#{from.id} #{full_side} #{to.id}" + end + end + + private + + def sides case type when :directed - if label - "#{from.id} -- #{Mermaid.escape(label)} --> #{to.id}" - else - "#{from.id} --> #{to.id}" - end + %w[-- --> -->] + when :dotted + %w[-. .-> -.->] end end end + # This class represents a node in a flowchart. Unlike the Link class, it can + # be used directly. It is the return value of the #node method, and is meant + # to be passed around to #link methods to create links between nodes. class Node SHAPES = %i[circle rectangle rounded stadium].freeze @@ -61,72 +140,37 @@ def render def bounds case shape when :circle - ["((", "))"] + %w[(( ))] when :rectangle ["[", "]"] when :rounded - ["(", ")"] + %w[( )] when :stadium ["([", "])"] end end end - class FlowChart - attr_reader :output, :prefix, :nodes, :links - - def initialize - @output = StringIO.new - @output.puts("flowchart TD") - @prefix = " " - - @nodes = {} - @links = [] - end - - def fetch(id) - nodes.fetch(id) - end - - def link(from, to, label = nil, type: :directed, color: nil) - link = Link.new(from, to, label, type, color) - links << link - - output.puts("#{prefix}#{link.render}") - link + class << self + # Escape a label to be used in the mermaid syntax. This is used to escape + # HTML entities such that they render properly within the quotes. + def escape(label) + "\"#{CGI.escapeHTML(label)}\"" end - def node(id, label, shape: :rectangle) - node = Node.new(id, label, shape) - nodes[id] = node - - output.puts("#{prefix}#{nodes[id].render}") - node - end - - def subgraph(label) - output.puts("#{prefix}subgraph #{Mermaid.escape(label)}") - - previous = prefix - @prefix = "#{prefix} " - - begin - yield - ensure - @prefix = previous - output.puts("#{prefix}end") + # Create a new flowchart. If a block is given, it will be yielded to and + # the flowchart will be rendered. Otherwise, the flowchart will be + # returned. + def flowchart + flowchart = FlowChart.new + + if block_given? + yield flowchart + flowchart.render + else + flowchart end end - - def render - links.each_with_index do |link, index| - if link.color - output.puts("#{prefix}linkStyle #{index} stroke:#{link.color}") - end - end - - output.string - end end end end diff --git a/lib/syntax_tree/visitor/mermaid_visitor.rb b/lib/syntax_tree/visitor/mermaid_visitor.rb index 542fe192..504e2fb0 100644 --- a/lib/syntax_tree/visitor/mermaid_visitor.rb +++ b/lib/syntax_tree/visitor/mermaid_visitor.rb @@ -7,7 +7,7 @@ class MermaidVisitor < FieldVisitor attr_reader :flowchart, :target def initialize - @flowchart = Mermaid::FlowChart.new + @flowchart = Mermaid.flowchart @target = nil end @@ -29,7 +29,12 @@ def field(name, value) when Node flowchart.link(target, visit(value), name) else - to = flowchart.node("#{target.id}_#{name}", value.inspect, shape: :stadium) + to = + flowchart.node( + "#{target.id}_#{name}", + value.inspect, + shape: :stadium + ) flowchart.link(target, to, name) end end @@ -54,7 +59,7 @@ def node(node, type) def pairs(name, values) values.each_with_index do |(key, value), index| - to = flowchart.node("#{target.id}_#{name}_#{index}", " ", shape: :circle) + to = flowchart.node("#{target.id}_#{name}_#{index}", shape: :circle) flowchart.link(target, to, "#{name}[#{index}]") flowchart.link(to, visit(key), "[0]") diff --git a/lib/syntax_tree/yarv/control_flow_graph.rb b/lib/syntax_tree/yarv/control_flow_graph.rb index 5da2cc14..2829bb21 100644 --- a/lib/syntax_tree/yarv/control_flow_graph.rb +++ b/lib/syntax_tree/yarv/control_flow_graph.rb @@ -208,39 +208,38 @@ def to_son end def to_mermaid - flowchart = Mermaid::FlowChart.new - disasm = Disassembler::Mermaid.new - - blocks.each do |block| - flowchart.subgraph(block.id) do - previous = nil - - block.each_with_length do |insn, length| - node = - flowchart.node( - "node_#{length}", - "%04d %s" % [length, insn.disasm(disasm)] - ) - - flowchart.link(previous, node) if previous - previous = node + Mermaid.flowchart do |flowchart| + disasm = Disassembler::Squished.new + + blocks.each do |block| + flowchart.subgraph(block.id) do + previous = nil + + block.each_with_length do |insn, length| + node = + flowchart.node( + "node_#{length}", + "%04d %s" % [length, insn.disasm(disasm)] + ) + + flowchart.link(previous, node) if previous + previous = node + end end end - end - blocks.each do |block| - block.outgoing_blocks.each do |outgoing| - offset = - block.block_start + block.insns.sum(&:length) - - block.insns.last.length - - from = flowchart.fetch("node_#{offset}") - to = flowchart.fetch("node_#{outgoing.block_start}") - flowchart.link(from, to) + blocks.each do |block| + block.outgoing_blocks.each do |outgoing| + offset = + block.block_start + block.insns.sum(&:length) - + block.insns.last.length + + from = flowchart.fetch("node_#{offset}") + to = flowchart.fetch("node_#{outgoing.block_start}") + flowchart.link(from, to) + end end end - - flowchart.render end # This method is used to verify that the control flow graph is well diff --git a/lib/syntax_tree/yarv/data_flow_graph.rb b/lib/syntax_tree/yarv/data_flow_graph.rb index 4adf2bcf..aedee9ba 100644 --- a/lib/syntax_tree/yarv/data_flow_graph.rb +++ b/lib/syntax_tree/yarv/data_flow_graph.rb @@ -125,55 +125,54 @@ def to_son end def to_mermaid - flowchart = Mermaid::FlowChart.new - disasm = Disassembler::Mermaid.new + Mermaid.flowchart do |flowchart| + disasm = Disassembler::Squished.new - blocks.each do |block| - block_flow = block_flows.fetch(block.id) - graph_name = - if block_flow.in.any? - "#{block.id} #{block_flows[block.id].in.join(", ")}" - else - block.id - end - - flowchart.subgraph(graph_name) do - previous = nil + blocks.each do |block| + block_flow = block_flows.fetch(block.id) + graph_name = + if block_flow.in.any? + "#{block.id} #{block_flows[block.id].in.join(", ")}" + else + block.id + end - block.each_with_length do |insn, length| - node = - flowchart.node( - "node_#{length}", - "%04d %s" % [length, insn.disasm(disasm)], - shape: :rounded - ) - - flowchart.link(previous, node, color: :red) if previous - insn_flows[length].in.each do |input| - if input.is_a?(LocalArgument) - from = flowchart.fetch("node_#{input.length}") - flowchart.link(from, node, color: :green) + flowchart.subgraph(graph_name) do + previous = nil + + block.each_with_length do |insn, length| + node = + flowchart.node( + "node_#{length}", + "%04d %s" % [length, insn.disasm(disasm)], + shape: :rounded + ) + + flowchart.link(previous, node, color: :red) if previous + insn_flows[length].in.each do |input| + if input.is_a?(LocalArgument) + from = flowchart.fetch("node_#{input.length}") + flowchart.link(from, node, color: :green) + end end - end - previous = node + previous = node + end end end - end - blocks.each do |block| - block.outgoing_blocks.each do |outgoing| - offset = - block.block_start + block.insns.sum(&:length) - - block.insns.last.length - - from = flowchart.fetch("node_#{offset}") - to = flowchart.fetch("node_#{outgoing.block_start}") - flowchart.link(from, to, color: :red) + blocks.each do |block| + block.outgoing_blocks.each do |outgoing| + offset = + block.block_start + block.insns.sum(&:length) - + block.insns.last.length + + from = flowchart.fetch("node_#{offset}") + to = flowchart.fetch("node_#{outgoing.block_start}") + flowchart.link(from, to, color: :red) + end end end - - flowchart.render end # Verify that we constructed the data flow graph correctly. diff --git a/lib/syntax_tree/yarv/disassembler.rb b/lib/syntax_tree/yarv/disassembler.rb index f60af0fd..dac220fd 100644 --- a/lib/syntax_tree/yarv/disassembler.rb +++ b/lib/syntax_tree/yarv/disassembler.rb @@ -4,9 +4,9 @@ module SyntaxTree module YARV class Disassembler # This class is another object that handles disassembling a YARV - # instruction sequence but it does so in order to provide a label for a - # mermaid diagram. - class Mermaid + # instruction sequence but it renders it without any of the extra spacing + # or alignment. + class Squished def calldata(value) value.inspect end diff --git a/lib/syntax_tree/yarv/sea_of_nodes.rb b/lib/syntax_tree/yarv/sea_of_nodes.rb index 181d729c..33ef14f7 100644 --- a/lib/syntax_tree/yarv/sea_of_nodes.rb +++ b/lib/syntax_tree/yarv/sea_of_nodes.rb @@ -27,7 +27,7 @@ def id end def label - "%04d %s" % [offset, insn.disasm(Disassembler::Mermaid.new)] + "%04d %s" % [offset, insn.disasm(Disassembler::Squished.new)] end end @@ -466,53 +466,34 @@ def initialize(dfg, nodes, local_graphs) end def to_mermaid - output = StringIO.new - output.puts("flowchart TD") - - nodes.each do |node| - escaped = "\"#{CGI.escapeHTML(node.label)}\"" - output.puts(" node_#{node.id}(#{escaped})") - end - - link_counter = 0 - nodes.each do |producer| - producer.outputs.each do |consumer_edge| - case consumer_edge.type - when :data - edge = "-->" - edge_style = "stroke:green;" - when :control - edge = "-->" - edge_style = "stroke:red;" - when :info - edge = "-.->" - else - raise - end - - label = - if !consumer_edge.label - "" - elsif consumer_edge.to.is_a?(PhiNode) - # Edges into phi nodes are labelled by the offset of the - # instruction going into the merge. - "|%04d| " % consumer_edge.label - else - "|#{consumer_edge.label}| " - end + Mermaid.flowchart do |flowchart| + nodes.each do |node| + flowchart.node("node_#{node.id}", node.label, shape: :rounded) + end - to_id = "node_#{consumer_edge.to.id}" - output.puts(" node_#{producer.id} #{edge} #{label}#{to_id}") + nodes.each do |producer| + producer.outputs.each do |consumer_edge| + label = + if !consumer_edge.label + # No label. + elsif consumer_edge.to.is_a?(PhiNode) + # Edges into phi nodes are labelled by the offset of the + # instruction going into the merge. + "%04d" % consumer_edge.label + else + consumer_edge.label.to_s + end - if edge_style - output.puts(" linkStyle #{link_counter} #{edge_style}") + flowchart.link( + flowchart.fetch("node_#{producer.id}"), + flowchart.fetch("node_#{consumer_edge.to.id}"), + label, + type: consumer_edge.type == :info ? :dotted : :directed, + color: { data: :green, control: :red }[consumer_edge.type] + ) end - - link_counter += 1 end end - - output.string end def verify diff --git a/test/yarv_test.rb b/test/yarv_test.rb index a1e89568..78622434 100644 --- a/test/yarv_test.rb +++ b/test/yarv_test.rb @@ -386,35 +386,35 @@ def test_son node_16("0016 leave") node_1000("1000 ψ") node_1001("1001 φ") - node_0 --> |0| node_3 - linkStyle 0 stroke:green; - node_2 --> |1| node_3 - linkStyle 1 stroke:green; + node_0 -- "0" --> node_3 + node_2 -- "1" --> node_3 node_3 --> node_5 - linkStyle 2 stroke:red; - node_3 --> |0| node_5 - linkStyle 3 stroke:green; - node_5 --> |branch0| node_11 - linkStyle 4 stroke:red; - node_5 --> |fallthrough| node_1000 - linkStyle 5 stroke:red; - node_7 --> |0009| node_1001 - linkStyle 6 stroke:green; - node_11 --> |branch0| node_1000 - linkStyle 7 stroke:red; - node_11 --> |0011| node_1001 - linkStyle 8 stroke:green; - node_12 --> |1| node_14 - linkStyle 9 stroke:green; + node_3 -- "0" --> node_5 + node_5 -- "branch0" --> node_11 + node_5 -- "fallthrough" --> node_1000 + node_7 -- "0009" --> node_1001 + node_11 -- "branch0" --> node_1000 + node_11 -- "0011" --> node_1001 + node_12 -- "1" --> node_14 node_14 --> node_16 - linkStyle 10 stroke:red; - node_14 --> |0| node_16 - linkStyle 11 stroke:green; + node_14 -- "0" --> node_16 node_1000 --> node_14 - linkStyle 12 stroke:red; node_1001 -.-> node_1000 - node_1001 --> |0| node_14 - linkStyle 14 stroke:green; + node_1001 -- "0" --> node_14 + linkStyle 0 stroke:green + linkStyle 1 stroke:green + linkStyle 2 stroke:red + linkStyle 3 stroke:green + linkStyle 4 stroke:red + linkStyle 5 stroke:red + linkStyle 6 stroke:green + linkStyle 7 stroke:red + linkStyle 8 stroke:green + linkStyle 9 stroke:green + linkStyle 10 stroke:red + linkStyle 11 stroke:green + linkStyle 12 stroke:red + linkStyle 14 stroke:green MERMAID end @@ -438,35 +438,35 @@ def test_son_indirect_basic_block_argument node_16("0016 leave") node_1002("1002 ψ") node_1004("1004 φ") - node_0 --> |0| node_14 - linkStyle 0 stroke:green; - node_2 --> |0| node_5 - linkStyle 1 stroke:green; - node_4 --> |1| node_5 - linkStyle 2 stroke:green; + node_0 -- "0" --> node_14 + node_2 -- "0" --> node_5 + node_4 -- "1" --> node_5 node_5 --> node_7 - linkStyle 3 stroke:red; - node_5 --> |0| node_7 - linkStyle 4 stroke:green; - node_7 --> |branch0| node_13 - linkStyle 5 stroke:red; - node_7 --> |fallthrough| node_1002 - linkStyle 6 stroke:red; - node_9 --> |0011| node_1004 - linkStyle 7 stroke:green; - node_13 --> |branch0| node_1002 - linkStyle 8 stroke:red; - node_13 --> |0013| node_1004 - linkStyle 9 stroke:green; + node_5 -- "0" --> node_7 + node_7 -- "branch0" --> node_13 + node_7 -- "fallthrough" --> node_1002 + node_9 -- "0011" --> node_1004 + node_13 -- "branch0" --> node_1002 + node_13 -- "0013" --> node_1004 node_14 --> node_16 - linkStyle 10 stroke:red; - node_14 --> |0| node_16 - linkStyle 11 stroke:green; + node_14 -- "0" --> node_16 node_1002 --> node_14 - linkStyle 12 stroke:red; node_1004 -.-> node_1002 - node_1004 --> |1| node_14 - linkStyle 14 stroke:green; + node_1004 -- "1" --> node_14 + linkStyle 0 stroke:green + linkStyle 1 stroke:green + linkStyle 2 stroke:green + linkStyle 3 stroke:red + linkStyle 4 stroke:green + linkStyle 5 stroke:red + linkStyle 6 stroke:red + linkStyle 7 stroke:green + linkStyle 8 stroke:red + linkStyle 9 stroke:green + linkStyle 10 stroke:red + linkStyle 11 stroke:green + linkStyle 12 stroke:red + linkStyle 14 stroke:green MERMAID end From db06d7ebe75f4fb68202435c06f81a56c82526b3 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Fri, 10 Feb 2023 11:06:53 -0500 Subject: [PATCH 49/58] Start autoloading more things --- lib/syntax_tree.rb | 28 ++++++++-------------------- lib/syntax_tree/yarv.rb | 16 ++++++++++++++++ 2 files changed, 24 insertions(+), 20 deletions(-) diff --git a/lib/syntax_tree.rb b/lib/syntax_tree.rb index 9cbd49c7..220389cb 100644 --- a/lib/syntax_tree.rb +++ b/lib/syntax_tree.rb @@ -9,7 +9,6 @@ require_relative "syntax_tree/formatter" require_relative "syntax_tree/node" -require_relative "syntax_tree/dsl" require_relative "syntax_tree/version" require_relative "syntax_tree/basic_visitor" @@ -23,29 +22,10 @@ require_relative "syntax_tree/visitor/environment" require_relative "syntax_tree/visitor/with_environment" -require_relative "syntax_tree/mermaid" require_relative "syntax_tree/parser" require_relative "syntax_tree/pattern" require_relative "syntax_tree/search" require_relative "syntax_tree/index" - -require_relative "syntax_tree/yarv" -require_relative "syntax_tree/yarv/basic_block" -require_relative "syntax_tree/yarv/bf" -require_relative "syntax_tree/yarv/calldata" -require_relative "syntax_tree/yarv/compiler" -require_relative "syntax_tree/yarv/control_flow_graph" -require_relative "syntax_tree/yarv/data_flow_graph" -require_relative "syntax_tree/yarv/decompiler" -require_relative "syntax_tree/yarv/disassembler" -require_relative "syntax_tree/yarv/instruction_sequence" -require_relative "syntax_tree/yarv/instructions" -require_relative "syntax_tree/yarv/legacy" -require_relative "syntax_tree/yarv/local_table" -require_relative "syntax_tree/yarv/sea_of_nodes" -require_relative "syntax_tree/yarv/assembler" -require_relative "syntax_tree/yarv/vm" - require_relative "syntax_tree/translation" # Syntax Tree is a suite of tools built on top of the internal CRuby parser. It @@ -53,6 +33,14 @@ # tools necessary to inspect and manipulate that syntax tree. It can be used to # build formatters, linters, language servers, and more. module SyntaxTree + # Syntax Tree the library has many features that aren't always used by the + # CLI. Requiring those features takes time, so we autoload as many constants + # as possible in order to keep the CLI as fast as possible. + + autoload :DSL, "syntax_tree/dsl" + autoload :Mermaid, "syntax_tree/mermaid" + autoload :YARV, "syntax_tree/yarv" + # This holds references to objects that respond to both #parse and #format # so that we can use them in the CLI. HANDLERS = {} diff --git a/lib/syntax_tree/yarv.rb b/lib/syntax_tree/yarv.rb index 7e4da7bb..ff8d3801 100644 --- a/lib/syntax_tree/yarv.rb +++ b/lib/syntax_tree/yarv.rb @@ -1,5 +1,21 @@ # frozen_string_literal: true +require_relative "yarv/basic_block" +require_relative "yarv/bf" +require_relative "yarv/calldata" +require_relative "yarv/compiler" +require_relative "yarv/control_flow_graph" +require_relative "yarv/data_flow_graph" +require_relative "yarv/decompiler" +require_relative "yarv/disassembler" +require_relative "yarv/instruction_sequence" +require_relative "yarv/instructions" +require_relative "yarv/legacy" +require_relative "yarv/local_table" +require_relative "yarv/sea_of_nodes" +require_relative "yarv/assembler" +require_relative "yarv/vm" + module SyntaxTree # This module provides an object representation of the YARV bytecode. module YARV From 0cf3e858b2dc3cee1af05a6ee3c0913d261727be Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Fri, 10 Feb 2023 11:26:23 -0500 Subject: [PATCH 50/58] Autoload a bunch of stuff --- README.md | 4 +- lib/syntax_tree.rb | 40 +- lib/syntax_tree/cli.rb | 4 +- lib/syntax_tree/field_visitor.rb | 1028 ++++++++++++++++ lib/syntax_tree/json_visitor.rb | 55 + lib/syntax_tree/language_server.rb | 157 ++- .../language_server/inlay_hints.rb | 159 --- lib/syntax_tree/match_visitor.rb | 120 ++ lib/syntax_tree/mermaid.rb | 1 + lib/syntax_tree/mermaid_visitor.rb | 73 ++ lib/syntax_tree/mutation_visitor.rb | 922 +++++++++++++++ lib/syntax_tree/node.rb | 8 +- lib/syntax_tree/pretty_print_visitor.rb | 83 ++ lib/syntax_tree/visitor/environment.rb | 84 -- lib/syntax_tree/visitor/field_visitor.rb | 1031 ----------------- lib/syntax_tree/visitor/json_visitor.rb | 55 - lib/syntax_tree/visitor/match_visitor.rb | 122 -- lib/syntax_tree/visitor/mermaid_visitor.rb | 75 -- lib/syntax_tree/visitor/mutation_visitor.rb | 924 --------------- .../visitor/pretty_print_visitor.rb | 85 -- .../{visitor => }/with_environment.rb | 81 ++ lib/syntax_tree/yarv.rb | 2 + lib/syntax_tree/yarv/compiler.rb | 2 +- test/test_helper.rb | 2 +- 24 files changed, 2549 insertions(+), 2568 deletions(-) create mode 100644 lib/syntax_tree/field_visitor.rb create mode 100644 lib/syntax_tree/json_visitor.rb delete mode 100644 lib/syntax_tree/language_server/inlay_hints.rb create mode 100644 lib/syntax_tree/match_visitor.rb create mode 100644 lib/syntax_tree/mermaid_visitor.rb create mode 100644 lib/syntax_tree/mutation_visitor.rb create mode 100644 lib/syntax_tree/pretty_print_visitor.rb delete mode 100644 lib/syntax_tree/visitor/environment.rb delete mode 100644 lib/syntax_tree/visitor/field_visitor.rb delete mode 100644 lib/syntax_tree/visitor/json_visitor.rb delete mode 100644 lib/syntax_tree/visitor/match_visitor.rb delete mode 100644 lib/syntax_tree/visitor/mermaid_visitor.rb delete mode 100644 lib/syntax_tree/visitor/mutation_visitor.rb delete mode 100644 lib/syntax_tree/visitor/pretty_print_visitor.rb rename lib/syntax_tree/{visitor => }/with_environment.rb (58%) diff --git a/README.md b/README.md index 6ca9b01a..5f447ad8 100644 --- a/README.md +++ b/README.md @@ -341,7 +341,7 @@ This function takes an input string containing Ruby code, parses it into its und ### SyntaxTree.mutation(&block) -This function yields a new mutation visitor to the block, and then returns the initialized visitor. It's effectively a shortcut for creating a `SyntaxTree::Visitor::MutationVisitor` without having to remember the class name. For more information on that visitor, see the definition below. +This function yields a new mutation visitor to the block, and then returns the initialized visitor. It's effectively a shortcut for creating a `SyntaxTree::MutationVisitor` without having to remember the class name. For more information on that visitor, see the definition below. ### SyntaxTree.search(source, query, &block) @@ -558,7 +558,7 @@ The `MutationVisitor` is a visitor that can be used to mutate the tree. It works ```ruby # Create a new visitor -visitor = SyntaxTree::Visitor::MutationVisitor.new +visitor = SyntaxTree::MutationVisitor.new # Specify that it should mutate If nodes with assignments in their predicates visitor.mutate("IfNode[predicate: Assign | OpAssign]") do |node| diff --git a/lib/syntax_tree.rb b/lib/syntax_tree.rb index 220389cb..0bdc4827 100644 --- a/lib/syntax_tree.rb +++ b/lib/syntax_tree.rb @@ -1,32 +1,15 @@ # frozen_string_literal: true -require "etc" -require "json" -require "pp" require "prettier_print" require "ripper" -require "stringio" -require_relative "syntax_tree/formatter" require_relative "syntax_tree/node" -require_relative "syntax_tree/version" - require_relative "syntax_tree/basic_visitor" require_relative "syntax_tree/visitor" -require_relative "syntax_tree/visitor/field_visitor" -require_relative "syntax_tree/visitor/json_visitor" -require_relative "syntax_tree/visitor/match_visitor" -require_relative "syntax_tree/visitor/mermaid_visitor" -require_relative "syntax_tree/visitor/mutation_visitor" -require_relative "syntax_tree/visitor/pretty_print_visitor" -require_relative "syntax_tree/visitor/environment" -require_relative "syntax_tree/visitor/with_environment" +require_relative "syntax_tree/formatter" require_relative "syntax_tree/parser" -require_relative "syntax_tree/pattern" -require_relative "syntax_tree/search" -require_relative "syntax_tree/index" -require_relative "syntax_tree/translation" +require_relative "syntax_tree/version" # Syntax Tree is a suite of tools built on top of the internal CRuby parser. It # provides the ability to generate a syntax tree from source, as well as the @@ -38,7 +21,19 @@ module SyntaxTree # as possible in order to keep the CLI as fast as possible. autoload :DSL, "syntax_tree/dsl" + autoload :FieldVisitor, "syntax_tree/field_visitor" + autoload :Index, "syntax_tree/index" + autoload :JSONVisitor, "syntax_tree/json_visitor" + autoload :LanguageServer, "syntax_tree/language_server" + autoload :MatchVisitor, "syntax_tree/match_visitor" autoload :Mermaid, "syntax_tree/mermaid" + autoload :MermaidVisitor, "syntax_tree/mermaid_visitor" + autoload :MutationVisitor, "syntax_tree/mutation_visitor" + autoload :Pattern, "syntax_tree/pattern" + autoload :PrettyPrintVisitor, "syntax_tree/pretty_print_visitor" + autoload :Search, "syntax_tree/search" + autoload :Translation, "syntax_tree/translation" + autoload :WithEnvironment, "syntax_tree/with_environment" autoload :YARV, "syntax_tree/yarv" # This holds references to objects that respond to both #parse and #format @@ -89,7 +84,7 @@ def self.index_file(filepath) # A convenience method for creating a new mutation visitor. def self.mutation - visitor = Visitor::MutationVisitor.new + visitor = MutationVisitor.new yield visitor visitor end @@ -130,6 +125,9 @@ def self.register_handler(extension, handler) # Searches through the given source using the given pattern and yields each # node in the tree that matches the pattern to the given block. def self.search(source, query, &block) - Search.new(Pattern.new(query).compile).scan(parse(source), &block) + pattern = Pattern.new(query).compile + program = parse(source) + + Search.new(pattern).scan(program, &block) end end diff --git a/lib/syntax_tree/cli.rb b/lib/syntax_tree/cli.rb index 7e6f4067..cbe10446 100644 --- a/lib/syntax_tree/cli.rb +++ b/lib/syntax_tree/cli.rb @@ -1,5 +1,6 @@ # frozen_string_literal: true +require "etc" require "optparse" module SyntaxTree @@ -238,7 +239,7 @@ def run(item) # representation. class Json < Action def run(item) - object = Visitor::JSONVisitor.new.visit(item.handler.parse(item.source)) + object = item.handler.parse(item.source).accept(JSONVisitor.new) puts JSON.pretty_generate(object) end end @@ -501,7 +502,6 @@ def run(argv) when "j", "json" Json.new(options) when "lsp" - require "syntax_tree/language_server" LanguageServer.new(print_width: options.print_width).run return 0 when "m", "match" diff --git a/lib/syntax_tree/field_visitor.rb b/lib/syntax_tree/field_visitor.rb new file mode 100644 index 00000000..f4fc00e3 --- /dev/null +++ b/lib/syntax_tree/field_visitor.rb @@ -0,0 +1,1028 @@ +# frozen_string_literal: true + +module SyntaxTree + # This is the parent class of a lot of built-in visitors for Syntax Tree. It + # reflects visiting each of the fields on every node in turn. It itself does + # not do anything with these fields, it leaves that behavior up to the + # subclass to implement. + # + # In order to properly use this class, you will need to subclass it and + # implement #comments, #field, #list, #node, #pairs, and #text. Those are + # documented here. + # + # == comments(node) + # + # This accepts the node that is being visited and does something depending on + # the comments attached to the node. + # + # == field(name, value) + # + # This accepts the name of the field being visited as a string (like "value") + # and the actual value of that field. The value can be a subclass of Node or + # any other type that can be held within the tree. + # + # == list(name, values) + # + # This accepts the name of the field being visited as well as a list of + # values. This is used, for example, when visiting something like the body of + # a Statements node. + # + # == node(name, node) + # + # This is the parent serialization method for each node. It is called with the + # node itself, as well as the type of the node as a string. The type is an + # internally used value that usually resembles the name of the ripper event + # that generated the node. The method should yield to the given block which + # then calls through to visit each of the fields on the node. + # + # == text(name, value) + # + # This accepts the name of the field being visited as well as a string value + # representing the value of the field. + # + # == pairs(name, values) + # + # This accepts the name of the field being visited as well as a list of pairs + # that represent the value of the field. It is used only in a couple of + # circumstances, like when visiting the list of optional parameters defined on + # a method. + # + class FieldVisitor < BasicVisitor + def visit_aref(node) + node(node, "aref") do + field("collection", node.collection) + field("index", node.index) + comments(node) + end + end + + def visit_aref_field(node) + node(node, "aref_field") do + field("collection", node.collection) + field("index", node.index) + comments(node) + end + end + + def visit_alias(node) + node(node, "alias") do + field("left", node.left) + field("right", node.right) + comments(node) + end + end + + def visit_arg_block(node) + node(node, "arg_block") do + field("value", node.value) if node.value + comments(node) + end + end + + def visit_arg_paren(node) + node(node, "arg_paren") do + field("arguments", node.arguments) + comments(node) + end + end + + def visit_arg_star(node) + node(node, "arg_star") do + field("value", node.value) + comments(node) + end + end + + def visit_args(node) + node(node, "args") do + list("parts", node.parts) + comments(node) + end + end + + def visit_args_forward(node) + node(node, "args_forward") { comments(node) } + end + + def visit_array(node) + node(node, "array") do + field("contents", node.contents) + comments(node) + end + end + + def visit_aryptn(node) + node(node, "aryptn") do + field("constant", node.constant) if node.constant + list("requireds", node.requireds) if node.requireds.any? + field("rest", node.rest) if node.rest + list("posts", node.posts) if node.posts.any? + comments(node) + end + end + + def visit_assign(node) + node(node, "assign") do + field("target", node.target) + field("value", node.value) + comments(node) + end + end + + def visit_assoc(node) + node(node, "assoc") do + field("key", node.key) + field("value", node.value) if node.value + comments(node) + end + end + + def visit_assoc_splat(node) + node(node, "assoc_splat") do + field("value", node.value) + comments(node) + end + end + + def visit_backref(node) + visit_token(node, "backref") + end + + def visit_backtick(node) + visit_token(node, "backtick") + end + + def visit_bare_assoc_hash(node) + node(node, "bare_assoc_hash") do + list("assocs", node.assocs) + comments(node) + end + end + + def visit_BEGIN(node) + node(node, "BEGIN") do + field("statements", node.statements) + comments(node) + end + end + + def visit_begin(node) + node(node, "begin") do + field("bodystmt", node.bodystmt) + comments(node) + end + end + + def visit_binary(node) + node(node, "binary") do + field("left", node.left) + text("operator", node.operator) + field("right", node.right) + comments(node) + end + end + + def visit_block(node) + node(node, "block") do + field("block_var", node.block_var) if node.block_var + field("bodystmt", node.bodystmt) + comments(node) + end + end + + def visit_blockarg(node) + node(node, "blockarg") do + field("name", node.name) if node.name + comments(node) + end + end + + def visit_block_var(node) + node(node, "block_var") do + field("params", node.params) + list("locals", node.locals) if node.locals.any? + comments(node) + end + end + + def visit_bodystmt(node) + node(node, "bodystmt") do + field("statements", node.statements) + field("rescue_clause", node.rescue_clause) if node.rescue_clause + field("else_clause", node.else_clause) if node.else_clause + field("ensure_clause", node.ensure_clause) if node.ensure_clause + comments(node) + end + end + + def visit_break(node) + node(node, "break") do + field("arguments", node.arguments) + comments(node) + end + end + + def visit_call(node) + node(node, "call") do + field("receiver", node.receiver) + field("operator", node.operator) + field("message", node.message) + field("arguments", node.arguments) if node.arguments + comments(node) + end + end + + def visit_case(node) + node(node, "case") do + field("keyword", node.keyword) + field("value", node.value) if node.value + field("consequent", node.consequent) + comments(node) + end + end + + def visit_CHAR(node) + visit_token(node, "CHAR") + end + + def visit_class(node) + node(node, "class") do + field("constant", node.constant) + field("superclass", node.superclass) if node.superclass + field("bodystmt", node.bodystmt) + comments(node) + end + end + + def visit_comma(node) + node(node, "comma") { field("value", node.value) } + end + + def visit_command(node) + node(node, "command") do + field("message", node.message) + field("arguments", node.arguments) + comments(node) + end + end + + def visit_command_call(node) + node(node, "command_call") do + field("receiver", node.receiver) + field("operator", node.operator) + field("message", node.message) + field("arguments", node.arguments) if node.arguments + comments(node) + end + end + + def visit_comment(node) + node(node, "comment") { field("value", node.value) } + end + + def visit_const(node) + visit_token(node, "const") + end + + def visit_const_path_field(node) + node(node, "const_path_field") do + field("parent", node.parent) + field("constant", node.constant) + comments(node) + end + end + + def visit_const_path_ref(node) + node(node, "const_path_ref") do + field("parent", node.parent) + field("constant", node.constant) + comments(node) + end + end + + def visit_const_ref(node) + node(node, "const_ref") do + field("constant", node.constant) + comments(node) + end + end + + def visit_cvar(node) + visit_token(node, "cvar") + end + + def visit_def(node) + node(node, "def") do + field("target", node.target) + field("operator", node.operator) + field("name", node.name) + field("params", node.params) + field("bodystmt", node.bodystmt) + comments(node) + end + end + + def visit_defined(node) + node(node, "defined") do + field("value", node.value) + comments(node) + end + end + + def visit_dyna_symbol(node) + node(node, "dyna_symbol") do + list("parts", node.parts) + comments(node) + end + end + + def visit_END(node) + node(node, "END") do + field("statements", node.statements) + comments(node) + end + end + + def visit_else(node) + node(node, "else") do + field("statements", node.statements) + comments(node) + end + end + + def visit_elsif(node) + node(node, "elsif") do + field("predicate", node.predicate) + field("statements", node.statements) + field("consequent", node.consequent) if node.consequent + comments(node) + end + end + + def visit_embdoc(node) + node(node, "embdoc") { field("value", node.value) } + end + + def visit_embexpr_beg(node) + node(node, "embexpr_beg") { field("value", node.value) } + end + + def visit_embexpr_end(node) + node(node, "embexpr_end") { field("value", node.value) } + end + + def visit_embvar(node) + node(node, "embvar") { field("value", node.value) } + end + + def visit_ensure(node) + node(node, "ensure") do + field("statements", node.statements) + comments(node) + end + end + + def visit_excessed_comma(node) + visit_token(node, "excessed_comma") + end + + def visit_field(node) + node(node, "field") do + field("parent", node.parent) + field("operator", node.operator) + field("name", node.name) + comments(node) + end + end + + def visit_float(node) + visit_token(node, "float") + end + + def visit_fndptn(node) + node(node, "fndptn") do + field("constant", node.constant) if node.constant + field("left", node.left) + list("values", node.values) + field("right", node.right) + comments(node) + end + end + + def visit_for(node) + node(node, "for") do + field("index", node.index) + field("collection", node.collection) + field("statements", node.statements) + comments(node) + end + end + + def visit_gvar(node) + visit_token(node, "gvar") + end + + def visit_hash(node) + node(node, "hash") do + list("assocs", node.assocs) if node.assocs.any? + comments(node) + end + end + + def visit_heredoc(node) + node(node, "heredoc") do + list("parts", node.parts) + comments(node) + end + end + + def visit_heredoc_beg(node) + visit_token(node, "heredoc_beg") + end + + def visit_heredoc_end(node) + visit_token(node, "heredoc_end") + end + + def visit_hshptn(node) + node(node, "hshptn") do + field("constant", node.constant) if node.constant + pairs("keywords", node.keywords) if node.keywords.any? + field("keyword_rest", node.keyword_rest) if node.keyword_rest + comments(node) + end + end + + def visit_ident(node) + visit_token(node, "ident") + end + + def visit_if(node) + node(node, "if") do + field("predicate", node.predicate) + field("statements", node.statements) + field("consequent", node.consequent) if node.consequent + comments(node) + end + end + + def visit_if_op(node) + node(node, "if_op") do + field("predicate", node.predicate) + field("truthy", node.truthy) + field("falsy", node.falsy) + comments(node) + end + end + + def visit_imaginary(node) + visit_token(node, "imaginary") + end + + def visit_in(node) + node(node, "in") do + field("pattern", node.pattern) + field("statements", node.statements) + field("consequent", node.consequent) if node.consequent + comments(node) + end + end + + def visit_int(node) + visit_token(node, "int") + end + + def visit_ivar(node) + visit_token(node, "ivar") + end + + def visit_kw(node) + visit_token(node, "kw") + end + + def visit_kwrest_param(node) + node(node, "kwrest_param") do + field("name", node.name) + comments(node) + end + end + + def visit_label(node) + visit_token(node, "label") + end + + def visit_label_end(node) + node(node, "label_end") { field("value", node.value) } + end + + def visit_lambda(node) + node(node, "lambda") do + field("params", node.params) + field("statements", node.statements) + comments(node) + end + end + + def visit_lambda_var(node) + node(node, "lambda_var") do + field("params", node.params) + list("locals", node.locals) if node.locals.any? + comments(node) + end + end + + def visit_lbrace(node) + visit_token(node, "lbrace") + end + + def visit_lbracket(node) + visit_token(node, "lbracket") + end + + def visit_lparen(node) + visit_token(node, "lparen") + end + + def visit_massign(node) + node(node, "massign") do + field("target", node.target) + field("value", node.value) + comments(node) + end + end + + def visit_method_add_block(node) + node(node, "method_add_block") do + field("call", node.call) + field("block", node.block) + comments(node) + end + end + + def visit_mlhs(node) + node(node, "mlhs") do + list("parts", node.parts) + comments(node) + end + end + + def visit_mlhs_paren(node) + node(node, "mlhs_paren") do + field("contents", node.contents) + comments(node) + end + end + + def visit_module(node) + node(node, "module") do + field("constant", node.constant) + field("bodystmt", node.bodystmt) + comments(node) + end + end + + def visit_mrhs(node) + node(node, "mrhs") do + list("parts", node.parts) + comments(node) + end + end + + def visit_next(node) + node(node, "next") do + field("arguments", node.arguments) + comments(node) + end + end + + def visit_not(node) + node(node, "not") do + field("statement", node.statement) + comments(node) + end + end + + def visit_op(node) + visit_token(node, "op") + end + + def visit_opassign(node) + node(node, "opassign") do + field("target", node.target) + field("operator", node.operator) + field("value", node.value) + comments(node) + end + end + + def visit_params(node) + node(node, "params") do + list("requireds", node.requireds) if node.requireds.any? + pairs("optionals", node.optionals) if node.optionals.any? + field("rest", node.rest) if node.rest + list("posts", node.posts) if node.posts.any? + pairs("keywords", node.keywords) if node.keywords.any? + field("keyword_rest", node.keyword_rest) if node.keyword_rest + field("block", node.block) if node.block + comments(node) + end + end + + def visit_paren(node) + node(node, "paren") do + field("contents", node.contents) + comments(node) + end + end + + def visit_period(node) + visit_token(node, "period") + end + + def visit_pinned_begin(node) + node(node, "pinned_begin") do + field("statement", node.statement) + comments(node) + end + end + + def visit_pinned_var_ref(node) + node(node, "pinned_var_ref") do + field("value", node.value) + comments(node) + end + end + + def visit_program(node) + node(node, "program") do + field("statements", node.statements) + comments(node) + end + end + + def visit_qsymbols(node) + node(node, "qsymbols") do + list("elements", node.elements) + comments(node) + end + end + + def visit_qsymbols_beg(node) + node(node, "qsymbols_beg") { field("value", node.value) } + end + + def visit_qwords(node) + node(node, "qwords") do + list("elements", node.elements) + comments(node) + end + end + + def visit_qwords_beg(node) + node(node, "qwords_beg") { field("value", node.value) } + end + + def visit_range(node) + node(node, "range") do + field("left", node.left) if node.left + field("operator", node.operator) + field("right", node.right) if node.right + comments(node) + end + end + + def visit_rassign(node) + node(node, "rassign") do + field("value", node.value) + field("operator", node.operator) + field("pattern", node.pattern) + comments(node) + end + end + + def visit_rational(node) + visit_token(node, "rational") + end + + def visit_rbrace(node) + node(node, "rbrace") { field("value", node.value) } + end + + def visit_rbracket(node) + node(node, "rbracket") { field("value", node.value) } + end + + def visit_redo(node) + node(node, "redo") { comments(node) } + end + + def visit_regexp_beg(node) + node(node, "regexp_beg") { field("value", node.value) } + end + + def visit_regexp_content(node) + node(node, "regexp_content") { list("parts", node.parts) } + end + + def visit_regexp_end(node) + node(node, "regexp_end") { field("value", node.value) } + end + + def visit_regexp_literal(node) + node(node, "regexp_literal") do + list("parts", node.parts) + field("options", node.options) + comments(node) + end + end + + def visit_rescue(node) + node(node, "rescue") do + field("exception", node.exception) if node.exception + field("statements", node.statements) + field("consequent", node.consequent) if node.consequent + comments(node) + end + end + + def visit_rescue_ex(node) + node(node, "rescue_ex") do + field("exceptions", node.exceptions) + field("variable", node.variable) + comments(node) + end + end + + def visit_rescue_mod(node) + node(node, "rescue_mod") do + field("statement", node.statement) + field("value", node.value) + comments(node) + end + end + + def visit_rest_param(node) + node(node, "rest_param") do + field("name", node.name) + comments(node) + end + end + + def visit_retry(node) + node(node, "retry") { comments(node) } + end + + def visit_return(node) + node(node, "return") do + field("arguments", node.arguments) + comments(node) + end + end + + def visit_rparen(node) + node(node, "rparen") { field("value", node.value) } + end + + def visit_sclass(node) + node(node, "sclass") do + field("target", node.target) + field("bodystmt", node.bodystmt) + comments(node) + end + end + + def visit_statements(node) + node(node, "statements") do + list("body", node.body) + comments(node) + end + end + + def visit_string_concat(node) + node(node, "string_concat") do + field("left", node.left) + field("right", node.right) + comments(node) + end + end + + def visit_string_content(node) + node(node, "string_content") { list("parts", node.parts) } + end + + def visit_string_dvar(node) + node(node, "string_dvar") do + field("variable", node.variable) + comments(node) + end + end + + def visit_string_embexpr(node) + node(node, "string_embexpr") do + field("statements", node.statements) + comments(node) + end + end + + def visit_string_literal(node) + node(node, "string_literal") do + list("parts", node.parts) + comments(node) + end + end + + def visit_super(node) + node(node, "super") do + field("arguments", node.arguments) + comments(node) + end + end + + def visit_symbeg(node) + node(node, "symbeg") { field("value", node.value) } + end + + def visit_symbol_content(node) + node(node, "symbol_content") { field("value", node.value) } + end + + def visit_symbol_literal(node) + node(node, "symbol_literal") do + field("value", node.value) + comments(node) + end + end + + def visit_symbols(node) + node(node, "symbols") do + list("elements", node.elements) + comments(node) + end + end + + def visit_symbols_beg(node) + node(node, "symbols_beg") { field("value", node.value) } + end + + def visit_tlambda(node) + node(node, "tlambda") { field("value", node.value) } + end + + def visit_tlambeg(node) + node(node, "tlambeg") { field("value", node.value) } + end + + def visit_top_const_field(node) + node(node, "top_const_field") do + field("constant", node.constant) + comments(node) + end + end + + def visit_top_const_ref(node) + node(node, "top_const_ref") do + field("constant", node.constant) + comments(node) + end + end + + def visit_tstring_beg(node) + node(node, "tstring_beg") { field("value", node.value) } + end + + def visit_tstring_content(node) + visit_token(node, "tstring_content") + end + + def visit_tstring_end(node) + node(node, "tstring_end") { field("value", node.value) } + end + + def visit_unary(node) + node(node, "unary") do + field("operator", node.operator) + field("statement", node.statement) + comments(node) + end + end + + def visit_undef(node) + node(node, "undef") do + list("symbols", node.symbols) + comments(node) + end + end + + def visit_unless(node) + node(node, "unless") do + field("predicate", node.predicate) + field("statements", node.statements) + field("consequent", node.consequent) if node.consequent + comments(node) + end + end + + def visit_until(node) + node(node, "until") do + field("predicate", node.predicate) + field("statements", node.statements) + comments(node) + end + end + + def visit_var_field(node) + node(node, "var_field") do + field("value", node.value) + comments(node) + end + end + + def visit_var_ref(node) + node(node, "var_ref") do + field("value", node.value) + comments(node) + end + end + + def visit_vcall(node) + node(node, "vcall") do + field("value", node.value) + comments(node) + end + end + + def visit_void_stmt(node) + node(node, "void_stmt") { comments(node) } + end + + def visit_when(node) + node(node, "when") do + field("arguments", node.arguments) + field("statements", node.statements) + field("consequent", node.consequent) if node.consequent + comments(node) + end + end + + def visit_while(node) + node(node, "while") do + field("predicate", node.predicate) + field("statements", node.statements) + comments(node) + end + end + + def visit_word(node) + node(node, "word") do + list("parts", node.parts) + comments(node) + end + end + + def visit_words(node) + node(node, "words") do + list("elements", node.elements) + comments(node) + end + end + + def visit_words_beg(node) + node(node, "words_beg") { field("value", node.value) } + end + + def visit_xstring(node) + node(node, "xstring") { list("parts", node.parts) } + end + + def visit_xstring_literal(node) + node(node, "xstring_literal") do + list("parts", node.parts) + comments(node) + end + end + + def visit_yield(node) + node(node, "yield") do + field("arguments", node.arguments) + comments(node) + end + end + + def visit_zsuper(node) + node(node, "zsuper") { comments(node) } + end + + def visit___end__(node) + visit_token(node, "__end__") + end + + private + + def visit_token(node, type) + node(node, type) do + field("value", node.value) + comments(node) + end + end + end +end diff --git a/lib/syntax_tree/json_visitor.rb b/lib/syntax_tree/json_visitor.rb new file mode 100644 index 00000000..7ad3fba0 --- /dev/null +++ b/lib/syntax_tree/json_visitor.rb @@ -0,0 +1,55 @@ +# frozen_string_literal: true + +require "json" + +module SyntaxTree + # This visitor transforms the AST into a hash that contains only primitives + # that can be easily serialized into JSON. + class JSONVisitor < FieldVisitor + attr_reader :target + + def initialize + @target = nil + end + + private + + def comments(node) + target[:comments] = visit_all(node.comments) + end + + def field(name, value) + target[name] = value.is_a?(Node) ? visit(value) : value + end + + def list(name, values) + target[name] = visit_all(values) + end + + def node(node, type) + previous = @target + @target = { type: type, location: visit_location(node.location) } + yield + @target + ensure + @target = previous + end + + def pairs(name, values) + target[name] = values.map { |(key, value)| [visit(key), visit(value)] } + end + + def text(name, value) + target[name] = value + end + + def visit_location(location) + [ + location.start_line, + location.start_char, + location.end_line, + location.end_char + ] + end + end +end diff --git a/lib/syntax_tree/language_server.rb b/lib/syntax_tree/language_server.rb index a7b23664..afb1540e 100644 --- a/lib/syntax_tree/language_server.rb +++ b/lib/syntax_tree/language_server.rb @@ -2,10 +2,9 @@ require "cgi" require "json" +require "pp" require "uri" -require_relative "language_server/inlay_hints" - module SyntaxTree # Syntax Tree additionally ships with a language server conforming to the # language server protocol. It can be invoked through the CLI by running: @@ -13,6 +12,160 @@ module SyntaxTree # stree lsp # class LanguageServer + # This class provides inlay hints for the language server. For more + # information, see the spec here: + # https://github.com/microsoft/language-server-protocol/issues/956. + class InlayHints < Visitor + # This represents a hint that is going to be displayed in the editor. + class Hint + attr_reader :line, :character, :label + + def initialize(line:, character:, label:) + @line = line + @character = character + @label = label + end + + # This is the shape that the LSP expects. + def to_json(*opts) + { + position: { + line: line, + character: character + }, + label: label + }.to_json(*opts) + end + end + + attr_reader :stack, :hints + + def initialize + @stack = [] + @hints = [] + end + + def visit(node) + stack << node + result = super + stack.pop + result + end + + # Adds parentheses around assignments contained within the default values + # of parameters. For example, + # + # def foo(a = b = c) + # end + # + # becomes + # + # def foo(a = ₍b = c₎) + # end + # + def visit_assign(node) + parentheses(node.location) if stack[-2].is_a?(Params) + super + end + + # Adds parentheses around binary expressions to make it clear which + # subexpression will be evaluated first. For example, + # + # a + b * c + # + # becomes + # + # a + ₍b * c₎ + # + def visit_binary(node) + case stack[-2] + when Assign, OpAssign + parentheses(node.location) + when Binary + parentheses(node.location) if stack[-2].operator != node.operator + end + + super + end + + # Adds parentheses around ternary operators contained within certain + # expressions where it could be confusing which subexpression will get + # evaluated first. For example, + # + # a ? b : c ? d : e + # + # becomes + # + # a ? b : ₍c ? d : e₎ + # + def visit_if_op(node) + case stack[-2] + when Assign, Binary, IfOp, OpAssign + parentheses(node.location) + end + + super + end + + # Adds the implicitly rescued StandardError into a bare rescue clause. For + # example, + # + # begin + # rescue + # end + # + # becomes + # + # begin + # rescue StandardError + # end + # + def visit_rescue(node) + if node.exception.nil? + hints << Hint.new( + line: node.location.start_line - 1, + character: node.location.start_column + "rescue".length, + label: " StandardError" + ) + end + + super + end + + # Adds parentheses around unary statements using the - operator that are + # contained within Binary nodes. For example, + # + # -a + b + # + # becomes + # + # ₍-a₎ + b + # + def visit_unary(node) + if stack[-2].is_a?(Binary) && (node.operator == "-") + parentheses(node.location) + end + + super + end + + private + + def parentheses(location) + hints << Hint.new( + line: location.start_line - 1, + character: location.start_column, + label: "₍" + ) + + hints << Hint.new( + line: location.end_line - 1, + character: location.end_column, + label: "₎" + ) + end + end + # This is a small module that effectively mirrors pattern matching. We're # using it so that we can support truffleruby without having to ignore the # language server. diff --git a/lib/syntax_tree/language_server/inlay_hints.rb b/lib/syntax_tree/language_server/inlay_hints.rb deleted file mode 100644 index dfd63b8d..00000000 --- a/lib/syntax_tree/language_server/inlay_hints.rb +++ /dev/null @@ -1,159 +0,0 @@ -# frozen_string_literal: true - -module SyntaxTree - class LanguageServer - # This class provides inlay hints for the language server. For more - # information, see the spec here: - # https://github.com/microsoft/language-server-protocol/issues/956. - class InlayHints < Visitor - # This represents a hint that is going to be displayed in the editor. - class Hint - attr_reader :line, :character, :label - - def initialize(line:, character:, label:) - @line = line - @character = character - @label = label - end - - # This is the shape that the LSP expects. - def to_json(*opts) - { - position: { - line: line, - character: character - }, - label: label - }.to_json(*opts) - end - end - - attr_reader :stack, :hints - - def initialize - @stack = [] - @hints = [] - end - - def visit(node) - stack << node - result = super - stack.pop - result - end - - # Adds parentheses around assignments contained within the default values - # of parameters. For example, - # - # def foo(a = b = c) - # end - # - # becomes - # - # def foo(a = ₍b = c₎) - # end - # - def visit_assign(node) - parentheses(node.location) if stack[-2].is_a?(Params) - super - end - - # Adds parentheses around binary expressions to make it clear which - # subexpression will be evaluated first. For example, - # - # a + b * c - # - # becomes - # - # a + ₍b * c₎ - # - def visit_binary(node) - case stack[-2] - when Assign, OpAssign - parentheses(node.location) - when Binary - parentheses(node.location) if stack[-2].operator != node.operator - end - - super - end - - # Adds parentheses around ternary operators contained within certain - # expressions where it could be confusing which subexpression will get - # evaluated first. For example, - # - # a ? b : c ? d : e - # - # becomes - # - # a ? b : ₍c ? d : e₎ - # - def visit_if_op(node) - case stack[-2] - when Assign, Binary, IfOp, OpAssign - parentheses(node.location) - end - - super - end - - # Adds the implicitly rescued StandardError into a bare rescue clause. For - # example, - # - # begin - # rescue - # end - # - # becomes - # - # begin - # rescue StandardError - # end - # - def visit_rescue(node) - if node.exception.nil? - hints << Hint.new( - line: node.location.start_line - 1, - character: node.location.start_column + "rescue".length, - label: " StandardError" - ) - end - - super - end - - # Adds parentheses around unary statements using the - operator that are - # contained within Binary nodes. For example, - # - # -a + b - # - # becomes - # - # ₍-a₎ + b - # - def visit_unary(node) - if stack[-2].is_a?(Binary) && (node.operator == "-") - parentheses(node.location) - end - - super - end - - private - - def parentheses(location) - hints << Hint.new( - line: location.start_line - 1, - character: location.start_column, - label: "₍" - ) - - hints << Hint.new( - line: location.end_line - 1, - character: location.end_column, - label: "₎" - ) - end - end - end -end diff --git a/lib/syntax_tree/match_visitor.rb b/lib/syntax_tree/match_visitor.rb new file mode 100644 index 00000000..ca5bf234 --- /dev/null +++ b/lib/syntax_tree/match_visitor.rb @@ -0,0 +1,120 @@ +# frozen_string_literal: true + +module SyntaxTree + # This visitor transforms the AST into a Ruby pattern matching expression that + # would match correctly against the AST. + class MatchVisitor < FieldVisitor + attr_reader :q + + def initialize(q) + @q = q + end + + def visit(node) + case node + when Node + super + when String + # pp will split up a string on newlines and concat them together using a + # "+" operator. This breaks the pattern matching expression. So instead + # we're going to check here for strings and manually put the entire + # value into the output buffer. + q.text(node.inspect) + else + node.pretty_print(q) + end + end + + private + + def comments(node) + return if node.comments.empty? + + q.nest(0) do + q.text("comments: [") + q.indent do + q.breakable("") + q.seplist(node.comments) { |comment| visit(comment) } + end + q.breakable("") + q.text("]") + end + end + + def field(name, value) + q.nest(0) do + q.text(name) + q.text(": ") + visit(value) + end + end + + def list(name, values) + q.group do + q.text(name) + q.text(": [") + q.indent do + q.breakable("") + q.seplist(values) { |value| visit(value) } + end + q.breakable("") + q.text("]") + end + end + + def node(node, _type) + items = [] + q.with_target(items) { yield } + + if items.empty? + q.text(node.class.name) + return + end + + q.group do + q.text(node.class.name) + q.text("[") + q.indent do + q.breakable("") + q.seplist(items) { |item| q.target << item } + end + q.breakable("") + q.text("]") + end + end + + def pairs(name, values) + q.group do + q.text(name) + q.text(": [") + q.indent do + q.breakable("") + q.seplist(values) do |(key, value)| + q.group do + q.text("[") + q.indent do + q.breakable("") + visit(key) + q.text(",") + q.breakable + visit(value || nil) + end + q.breakable("") + q.text("]") + end + end + end + q.breakable("") + q.text("]") + end + end + + def text(name, value) + q.nest(0) do + q.text(name) + q.text(": ") + value.pretty_print(q) + end + end + end +end diff --git a/lib/syntax_tree/mermaid.rb b/lib/syntax_tree/mermaid.rb index 70cbc054..68ea4734 100644 --- a/lib/syntax_tree/mermaid.rb +++ b/lib/syntax_tree/mermaid.rb @@ -1,6 +1,7 @@ # frozen_string_literal: true require "cgi" +require "stringio" module SyntaxTree # This module is responsible for rendering mermaid (https://mermaid.js.org/) diff --git a/lib/syntax_tree/mermaid_visitor.rb b/lib/syntax_tree/mermaid_visitor.rb new file mode 100644 index 00000000..52d1b5c6 --- /dev/null +++ b/lib/syntax_tree/mermaid_visitor.rb @@ -0,0 +1,73 @@ +# frozen_string_literal: true + +module SyntaxTree + # This visitor transforms the AST into a mermaid flow chart. + class MermaidVisitor < FieldVisitor + attr_reader :flowchart, :target + + def initialize + @flowchart = Mermaid.flowchart + @target = nil + end + + def visit_program(node) + super + flowchart.render + end + + private + + def comments(node) + # Ignore + end + + def field(name, value) + case value + when nil + # skip + when Node + flowchart.link(target, visit(value), name) + else + to = + flowchart.node( + "#{target.id}_#{name}", + value.inspect, + shape: :stadium + ) + flowchart.link(target, to, name) + end + end + + def list(name, values) + values.each_with_index do |value, index| + field("#{name}[#{index}]", value) + end + end + + def node(node, type) + previous_target = target + + begin + @target = flowchart.node("node_#{node.object_id}", type) + yield + @target + ensure + @target = previous_target + end + end + + def pairs(name, values) + values.each_with_index do |(key, value), index| + to = flowchart.node("#{target.id}_#{name}_#{index}", shape: :circle) + + flowchart.link(target, to, "#{name}[#{index}]") + flowchart.link(to, visit(key), "[0]") + flowchart.link(to, visit(value), "[1]") if value + end + end + + def text(name, value) + field(name, value) + end + end +end diff --git a/lib/syntax_tree/mutation_visitor.rb b/lib/syntax_tree/mutation_visitor.rb new file mode 100644 index 00000000..2d96620d --- /dev/null +++ b/lib/syntax_tree/mutation_visitor.rb @@ -0,0 +1,922 @@ +# frozen_string_literal: true + +module SyntaxTree + # This visitor walks through the tree and copies each node as it is being + # visited. This is useful for mutating the tree before it is formatted. + class MutationVisitor < BasicVisitor + attr_reader :mutations + + def initialize + @mutations = [] + end + + # Create a new mutation based on the given query that will mutate the node + # using the given block. The block should return a new node that will take + # the place of the given node in the tree. These blocks frequently make use + # of the `copy` method on nodes to create a new node with the same + # properties as the original node. + def mutate(query, &block) + mutations << [Pattern.new(query).compile, block] + end + + # This is the base visit method for each node in the tree. It first creates + # a copy of the node using the visit_* methods defined below. Then it checks + # each mutation in sequence and calls it if it finds a match. + def visit(node) + return unless node + result = node.accept(self) + + mutations.each do |(pattern, mutation)| + result = mutation.call(result) if pattern.call(result) + end + + result + end + + # Visit a BEGINBlock node. + def visit_BEGIN(node) + node.copy( + lbrace: visit(node.lbrace), + statements: visit(node.statements) + ) + end + + # Visit a CHAR node. + def visit_CHAR(node) + node.copy + end + + # Visit a ENDBlock node. + def visit_END(node) + node.copy( + lbrace: visit(node.lbrace), + statements: visit(node.statements) + ) + end + + # Visit a EndContent node. + def visit___end__(node) + node.copy + end + + # Visit a AliasNode node. + def visit_alias(node) + node.copy(left: visit(node.left), right: visit(node.right)) + end + + # Visit a ARef node. + def visit_aref(node) + node.copy(index: visit(node.index)) + end + + # Visit a ARefField node. + def visit_aref_field(node) + node.copy(index: visit(node.index)) + end + + # Visit a ArgParen node. + def visit_arg_paren(node) + node.copy(arguments: visit(node.arguments)) + end + + # Visit a Args node. + def visit_args(node) + node.copy(parts: visit_all(node.parts)) + end + + # Visit a ArgBlock node. + def visit_arg_block(node) + node.copy(value: visit(node.value)) + end + + # Visit a ArgStar node. + def visit_arg_star(node) + node.copy(value: visit(node.value)) + end + + # Visit a ArgsForward node. + def visit_args_forward(node) + node.copy + end + + # Visit a ArrayLiteral node. + def visit_array(node) + node.copy( + lbracket: visit(node.lbracket), + contents: visit(node.contents) + ) + end + + # Visit a AryPtn node. + def visit_aryptn(node) + node.copy( + constant: visit(node.constant), + requireds: visit_all(node.requireds), + rest: visit(node.rest), + posts: visit_all(node.posts) + ) + end + + # Visit a Assign node. + def visit_assign(node) + node.copy(target: visit(node.target)) + end + + # Visit a Assoc node. + def visit_assoc(node) + node.copy + end + + # Visit a AssocSplat node. + def visit_assoc_splat(node) + node.copy + end + + # Visit a Backref node. + def visit_backref(node) + node.copy + end + + # Visit a Backtick node. + def visit_backtick(node) + node.copy + end + + # Visit a BareAssocHash node. + def visit_bare_assoc_hash(node) + node.copy(assocs: visit_all(node.assocs)) + end + + # Visit a Begin node. + def visit_begin(node) + node.copy(bodystmt: visit(node.bodystmt)) + end + + # Visit a PinnedBegin node. + def visit_pinned_begin(node) + node.copy + end + + # Visit a Binary node. + def visit_binary(node) + node.copy + end + + # Visit a BlockVar node. + def visit_block_var(node) + node.copy(params: visit(node.params), locals: visit_all(node.locals)) + end + + # Visit a BlockArg node. + def visit_blockarg(node) + node.copy(name: visit(node.name)) + end + + # Visit a BodyStmt node. + def visit_bodystmt(node) + node.copy( + statements: visit(node.statements), + rescue_clause: visit(node.rescue_clause), + else_clause: visit(node.else_clause), + ensure_clause: visit(node.ensure_clause) + ) + end + + # Visit a Break node. + def visit_break(node) + node.copy(arguments: visit(node.arguments)) + end + + # Visit a Call node. + def visit_call(node) + node.copy( + receiver: visit(node.receiver), + operator: node.operator == :"::" ? :"::" : visit(node.operator), + message: node.message == :call ? :call : visit(node.message), + arguments: visit(node.arguments) + ) + end + + # Visit a Case node. + def visit_case(node) + node.copy( + keyword: visit(node.keyword), + value: visit(node.value), + consequent: visit(node.consequent) + ) + end + + # Visit a RAssign node. + def visit_rassign(node) + node.copy(operator: visit(node.operator)) + end + + # Visit a ClassDeclaration node. + def visit_class(node) + node.copy( + constant: visit(node.constant), + superclass: visit(node.superclass), + bodystmt: visit(node.bodystmt) + ) + end + + # Visit a Comma node. + def visit_comma(node) + node.copy + end + + # Visit a Command node. + def visit_command(node) + node.copy( + message: visit(node.message), + arguments: visit(node.arguments), + block: visit(node.block) + ) + end + + # Visit a CommandCall node. + def visit_command_call(node) + node.copy( + operator: node.operator == :"::" ? :"::" : visit(node.operator), + message: visit(node.message), + arguments: visit(node.arguments), + block: visit(node.block) + ) + end + + # Visit a Comment node. + def visit_comment(node) + node.copy + end + + # Visit a Const node. + def visit_const(node) + node.copy + end + + # Visit a ConstPathField node. + def visit_const_path_field(node) + node.copy(constant: visit(node.constant)) + end + + # Visit a ConstPathRef node. + def visit_const_path_ref(node) + node.copy(constant: visit(node.constant)) + end + + # Visit a ConstRef node. + def visit_const_ref(node) + node.copy(constant: visit(node.constant)) + end + + # Visit a CVar node. + def visit_cvar(node) + node.copy + end + + # Visit a Def node. + def visit_def(node) + node.copy( + target: visit(node.target), + operator: visit(node.operator), + name: visit(node.name), + params: visit(node.params), + bodystmt: visit(node.bodystmt) + ) + end + + # Visit a Defined node. + def visit_defined(node) + node.copy + end + + # Visit a Block node. + def visit_block(node) + node.copy( + opening: visit(node.opening), + block_var: visit(node.block_var), + bodystmt: visit(node.bodystmt) + ) + end + + # Visit a RangeNode node. + def visit_range(node) + node.copy( + left: visit(node.left), + operator: visit(node.operator), + right: visit(node.right) + ) + end + + # Visit a DynaSymbol node. + def visit_dyna_symbol(node) + node.copy(parts: visit_all(node.parts)) + end + + # Visit a Else node. + def visit_else(node) + node.copy( + keyword: visit(node.keyword), + statements: visit(node.statements) + ) + end + + # Visit a Elsif node. + def visit_elsif(node) + node.copy( + statements: visit(node.statements), + consequent: visit(node.consequent) + ) + end + + # Visit a EmbDoc node. + def visit_embdoc(node) + node.copy + end + + # Visit a EmbExprBeg node. + def visit_embexpr_beg(node) + node.copy + end + + # Visit a EmbExprEnd node. + def visit_embexpr_end(node) + node.copy + end + + # Visit a EmbVar node. + def visit_embvar(node) + node.copy + end + + # Visit a Ensure node. + def visit_ensure(node) + node.copy( + keyword: visit(node.keyword), + statements: visit(node.statements) + ) + end + + # Visit a ExcessedComma node. + def visit_excessed_comma(node) + node.copy + end + + # Visit a Field node. + def visit_field(node) + node.copy( + operator: node.operator == :"::" ? :"::" : visit(node.operator), + name: visit(node.name) + ) + end + + # Visit a FloatLiteral node. + def visit_float(node) + node.copy + end + + # Visit a FndPtn node. + def visit_fndptn(node) + node.copy( + constant: visit(node.constant), + left: visit(node.left), + values: visit_all(node.values), + right: visit(node.right) + ) + end + + # Visit a For node. + def visit_for(node) + node.copy(index: visit(node.index), statements: visit(node.statements)) + end + + # Visit a GVar node. + def visit_gvar(node) + node.copy + end + + # Visit a HashLiteral node. + def visit_hash(node) + node.copy(lbrace: visit(node.lbrace), assocs: visit_all(node.assocs)) + end + + # Visit a Heredoc node. + def visit_heredoc(node) + node.copy( + beginning: visit(node.beginning), + ending: visit(node.ending), + parts: visit_all(node.parts) + ) + end + + # Visit a HeredocBeg node. + def visit_heredoc_beg(node) + node.copy + end + + # Visit a HeredocEnd node. + def visit_heredoc_end(node) + node.copy + end + + # Visit a HshPtn node. + def visit_hshptn(node) + node.copy( + constant: visit(node.constant), + keywords: + node.keywords.map { |label, value| [visit(label), visit(value)] }, + keyword_rest: visit(node.keyword_rest) + ) + end + + # Visit a Ident node. + def visit_ident(node) + node.copy + end + + # Visit a IfNode node. + def visit_if(node) + node.copy( + predicate: visit(node.predicate), + statements: visit(node.statements), + consequent: visit(node.consequent) + ) + end + + # Visit a IfOp node. + def visit_if_op(node) + node.copy + end + + # Visit a Imaginary node. + def visit_imaginary(node) + node.copy + end + + # Visit a In node. + def visit_in(node) + node.copy( + statements: visit(node.statements), + consequent: visit(node.consequent) + ) + end + + # Visit a Int node. + def visit_int(node) + node.copy + end + + # Visit a IVar node. + def visit_ivar(node) + node.copy + end + + # Visit a Kw node. + def visit_kw(node) + node.copy + end + + # Visit a KwRestParam node. + def visit_kwrest_param(node) + node.copy(name: visit(node.name)) + end + + # Visit a Label node. + def visit_label(node) + node.copy + end + + # Visit a LabelEnd node. + def visit_label_end(node) + node.copy + end + + # Visit a Lambda node. + def visit_lambda(node) + node.copy( + params: visit(node.params), + statements: visit(node.statements) + ) + end + + # Visit a LambdaVar node. + def visit_lambda_var(node) + node.copy(params: visit(node.params), locals: visit_all(node.locals)) + end + + # Visit a LBrace node. + def visit_lbrace(node) + node.copy + end + + # Visit a LBracket node. + def visit_lbracket(node) + node.copy + end + + # Visit a LParen node. + def visit_lparen(node) + node.copy + end + + # Visit a MAssign node. + def visit_massign(node) + node.copy(target: visit(node.target)) + end + + # Visit a MethodAddBlock node. + def visit_method_add_block(node) + node.copy(call: visit(node.call), block: visit(node.block)) + end + + # Visit a MLHS node. + def visit_mlhs(node) + node.copy(parts: visit_all(node.parts)) + end + + # Visit a MLHSParen node. + def visit_mlhs_paren(node) + node.copy(contents: visit(node.contents)) + end + + # Visit a ModuleDeclaration node. + def visit_module(node) + node.copy( + constant: visit(node.constant), + bodystmt: visit(node.bodystmt) + ) + end + + # Visit a MRHS node. + def visit_mrhs(node) + node.copy(parts: visit_all(node.parts)) + end + + # Visit a Next node. + def visit_next(node) + node.copy(arguments: visit(node.arguments)) + end + + # Visit a Op node. + def visit_op(node) + node.copy + end + + # Visit a OpAssign node. + def visit_opassign(node) + node.copy(target: visit(node.target), operator: visit(node.operator)) + end + + # Visit a Params node. + def visit_params(node) + node.copy( + requireds: visit_all(node.requireds), + optionals: + node.optionals.map { |ident, value| [visit(ident), visit(value)] }, + rest: visit(node.rest), + posts: visit_all(node.posts), + keywords: + node.keywords.map { |ident, value| [visit(ident), visit(value)] }, + keyword_rest: + node.keyword_rest == :nil ? :nil : visit(node.keyword_rest), + block: visit(node.block) + ) + end + + # Visit a Paren node. + def visit_paren(node) + node.copy(lparen: visit(node.lparen), contents: visit(node.contents)) + end + + # Visit a Period node. + def visit_period(node) + node.copy + end + + # Visit a Program node. + def visit_program(node) + node.copy(statements: visit(node.statements)) + end + + # Visit a QSymbols node. + def visit_qsymbols(node) + node.copy( + beginning: visit(node.beginning), + elements: visit_all(node.elements) + ) + end + + # Visit a QSymbolsBeg node. + def visit_qsymbols_beg(node) + node.copy + end + + # Visit a QWords node. + def visit_qwords(node) + node.copy( + beginning: visit(node.beginning), + elements: visit_all(node.elements) + ) + end + + # Visit a QWordsBeg node. + def visit_qwords_beg(node) + node.copy + end + + # Visit a RationalLiteral node. + def visit_rational(node) + node.copy + end + + # Visit a RBrace node. + def visit_rbrace(node) + node.copy + end + + # Visit a RBracket node. + def visit_rbracket(node) + node.copy + end + + # Visit a Redo node. + def visit_redo(node) + node.copy + end + + # Visit a RegexpContent node. + def visit_regexp_content(node) + node.copy(parts: visit_all(node.parts)) + end + + # Visit a RegexpBeg node. + def visit_regexp_beg(node) + node.copy + end + + # Visit a RegexpEnd node. + def visit_regexp_end(node) + node.copy + end + + # Visit a RegexpLiteral node. + def visit_regexp_literal(node) + node.copy(parts: visit_all(node.parts)) + end + + # Visit a RescueEx node. + def visit_rescue_ex(node) + node.copy(variable: visit(node.variable)) + end + + # Visit a Rescue node. + def visit_rescue(node) + node.copy( + keyword: visit(node.keyword), + exception: visit(node.exception), + statements: visit(node.statements), + consequent: visit(node.consequent) + ) + end + + # Visit a RescueMod node. + def visit_rescue_mod(node) + node.copy + end + + # Visit a RestParam node. + def visit_rest_param(node) + node.copy(name: visit(node.name)) + end + + # Visit a Retry node. + def visit_retry(node) + node.copy + end + + # Visit a Return node. + def visit_return(node) + node.copy(arguments: visit(node.arguments)) + end + + # Visit a RParen node. + def visit_rparen(node) + node.copy + end + + # Visit a SClass node. + def visit_sclass(node) + node.copy(bodystmt: visit(node.bodystmt)) + end + + # Visit a Statements node. + def visit_statements(node) + node.copy(body: visit_all(node.body)) + end + + # Visit a StringContent node. + def visit_string_content(node) + node.copy(parts: visit_all(node.parts)) + end + + # Visit a StringConcat node. + def visit_string_concat(node) + node.copy(left: visit(node.left), right: visit(node.right)) + end + + # Visit a StringDVar node. + def visit_string_dvar(node) + node.copy(variable: visit(node.variable)) + end + + # Visit a StringEmbExpr node. + def visit_string_embexpr(node) + node.copy(statements: visit(node.statements)) + end + + # Visit a StringLiteral node. + def visit_string_literal(node) + node.copy(parts: visit_all(node.parts)) + end + + # Visit a Super node. + def visit_super(node) + node.copy(arguments: visit(node.arguments)) + end + + # Visit a SymBeg node. + def visit_symbeg(node) + node.copy + end + + # Visit a SymbolContent node. + def visit_symbol_content(node) + node.copy(value: visit(node.value)) + end + + # Visit a SymbolLiteral node. + def visit_symbol_literal(node) + node.copy(value: visit(node.value)) + end + + # Visit a Symbols node. + def visit_symbols(node) + node.copy( + beginning: visit(node.beginning), + elements: visit_all(node.elements) + ) + end + + # Visit a SymbolsBeg node. + def visit_symbols_beg(node) + node.copy + end + + # Visit a TLambda node. + def visit_tlambda(node) + node.copy + end + + # Visit a TLamBeg node. + def visit_tlambeg(node) + node.copy + end + + # Visit a TopConstField node. + def visit_top_const_field(node) + node.copy(constant: visit(node.constant)) + end + + # Visit a TopConstRef node. + def visit_top_const_ref(node) + node.copy(constant: visit(node.constant)) + end + + # Visit a TStringBeg node. + def visit_tstring_beg(node) + node.copy + end + + # Visit a TStringContent node. + def visit_tstring_content(node) + node.copy + end + + # Visit a TStringEnd node. + def visit_tstring_end(node) + node.copy + end + + # Visit a Not node. + def visit_not(node) + node.copy(statement: visit(node.statement)) + end + + # Visit a Unary node. + def visit_unary(node) + node.copy + end + + # Visit a Undef node. + def visit_undef(node) + node.copy(symbols: visit_all(node.symbols)) + end + + # Visit a UnlessNode node. + def visit_unless(node) + node.copy( + predicate: visit(node.predicate), + statements: visit(node.statements), + consequent: visit(node.consequent) + ) + end + + # Visit a UntilNode node. + def visit_until(node) + node.copy( + predicate: visit(node.predicate), + statements: visit(node.statements) + ) + end + + # Visit a VarField node. + def visit_var_field(node) + node.copy(value: visit(node.value)) + end + + # Visit a VarRef node. + def visit_var_ref(node) + node.copy(value: visit(node.value)) + end + + # Visit a PinnedVarRef node. + def visit_pinned_var_ref(node) + node.copy(value: visit(node.value)) + end + + # Visit a VCall node. + def visit_vcall(node) + node.copy(value: visit(node.value)) + end + + # Visit a VoidStmt node. + def visit_void_stmt(node) + node.copy + end + + # Visit a When node. + def visit_when(node) + node.copy( + arguments: visit(node.arguments), + statements: visit(node.statements), + consequent: visit(node.consequent) + ) + end + + # Visit a WhileNode node. + def visit_while(node) + node.copy( + predicate: visit(node.predicate), + statements: visit(node.statements) + ) + end + + # Visit a Word node. + def visit_word(node) + node.copy(parts: visit_all(node.parts)) + end + + # Visit a Words node. + def visit_words(node) + node.copy( + beginning: visit(node.beginning), + elements: visit_all(node.elements) + ) + end + + # Visit a WordsBeg node. + def visit_words_beg(node) + node.copy + end + + # Visit a XString node. + def visit_xstring(node) + node.copy(parts: visit_all(node.parts)) + end + + # Visit a XStringLiteral node. + def visit_xstring_literal(node) + node.copy(parts: visit_all(node.parts)) + end + + # Visit a YieldNode node. + def visit_yield(node) + node.copy(arguments: visit(node.arguments)) + end + + # Visit a ZSuper node. + def visit_zsuper(node) + node.copy + end + end +end diff --git a/lib/syntax_tree/node.rb b/lib/syntax_tree/node.rb index 0a495890..567ec0c8 100644 --- a/lib/syntax_tree/node.rb +++ b/lib/syntax_tree/node.rb @@ -135,19 +135,19 @@ def end_char end def pretty_print(q) - accept(Visitor::PrettyPrintVisitor.new(q)) + accept(PrettyPrintVisitor.new(q)) end def to_json(*opts) - accept(Visitor::JSONVisitor.new).to_json(*opts) + accept(JSONVisitor.new).to_json(*opts) end def to_mermaid - accept(Visitor::MermaidVisitor.new) + accept(MermaidVisitor.new) end def construct_keys - PrettierPrint.format(+"") { |q| accept(Visitor::MatchVisitor.new(q)) } + PrettierPrint.format(+"") { |q| accept(MatchVisitor.new(q)) } end end diff --git a/lib/syntax_tree/pretty_print_visitor.rb b/lib/syntax_tree/pretty_print_visitor.rb new file mode 100644 index 00000000..894e0cf4 --- /dev/null +++ b/lib/syntax_tree/pretty_print_visitor.rb @@ -0,0 +1,83 @@ +# frozen_string_literal: true + +module SyntaxTree + # This visitor pretty-prints the AST into an equivalent s-expression. + class PrettyPrintVisitor < FieldVisitor + attr_reader :q + + def initialize(q) + @q = q + end + + # This is here because we need to make sure the operator is cast to a string + # before we print it out. + def visit_binary(node) + node(node, "binary") do + field("left", node.left) + text("operator", node.operator.to_s) + field("right", node.right) + comments(node) + end + end + + # This is here to make it a little nicer to look at labels since they + # typically have their : at the end of the value. + def visit_label(node) + node(node, "label") do + q.breakable + q.text(":") + q.text(node.value[0...-1]) + comments(node) + end + end + + private + + def comments(node) + return if node.comments.empty? + + q.breakable + q.group(2, "(", ")") do + q.seplist(node.comments) { |comment| q.pp(comment) } + end + end + + def field(_name, value) + q.breakable + q.pp(value) + end + + def list(_name, values) + q.breakable + q.group(2, "(", ")") { q.seplist(values) { |value| q.pp(value) } } + end + + def node(_node, type) + q.group(2, "(", ")") do + q.text(type) + yield + end + end + + def pairs(_name, values) + q.group(2, "(", ")") do + q.seplist(values) do |(key, value)| + q.pp(key) + + if value + q.text("=") + q.group(2) do + q.breakable("") + q.pp(value) + end + end + end + end + end + + def text(_name, value) + q.breakable + q.text(value) + end + end +end diff --git a/lib/syntax_tree/visitor/environment.rb b/lib/syntax_tree/visitor/environment.rb deleted file mode 100644 index b07a5203..00000000 --- a/lib/syntax_tree/visitor/environment.rb +++ /dev/null @@ -1,84 +0,0 @@ -# frozen_string_literal: true - -module SyntaxTree - # The environment class is used to keep track of local variables and arguments - # inside a particular scope - class Environment - # This class tracks the occurrences of a local variable or argument - class Local - # [Symbol] The type of the local (e.g. :argument, :variable) - attr_reader :type - - # [Array[Location]] The locations of all definitions and assignments of - # this local - attr_reader :definitions - - # [Array[Location]] The locations of all usages of this local - attr_reader :usages - - # initialize: (Symbol type) -> void - def initialize(type) - @type = type - @definitions = [] - @usages = [] - end - - # add_definition: (Location location) -> void - def add_definition(location) - @definitions << location - end - - # add_usage: (Location location) -> void - def add_usage(location) - @usages << location - end - end - - # [Array[Local]] The local variables and arguments defined in this - # environment - attr_reader :locals - - # [Environment | nil] The parent environment - attr_reader :parent - - # initialize: (Environment | nil parent) -> void - def initialize(parent = nil) - @locals = {} - @parent = parent - end - - # Adding a local definition will either insert a new entry in the locals - # hash or append a new definition location to an existing local. Notice that - # it's not possible to change the type of a local after it has been - # registered - # add_local_definition: (Ident | Label identifier, Symbol type) -> void - def add_local_definition(identifier, type) - name = identifier.value.delete_suffix(":") - - @locals[name] ||= Local.new(type) - @locals[name].add_definition(identifier.location) - end - - # Adding a local usage will either insert a new entry in the locals - # hash or append a new usage location to an existing local. Notice that - # it's not possible to change the type of a local after it has been - # registered - # add_local_usage: (Ident | Label identifier, Symbol type) -> void - def add_local_usage(identifier, type) - name = identifier.value.delete_suffix(":") - - @locals[name] ||= Local.new(type) - @locals[name].add_usage(identifier.location) - end - - # Try to find the local given its name in this environment or any of its - # parents - # find_local: (String name) -> Local | nil - def find_local(name) - local = @locals[name] - return local unless local.nil? - - @parent&.find_local(name) - end - end -end diff --git a/lib/syntax_tree/visitor/field_visitor.rb b/lib/syntax_tree/visitor/field_visitor.rb deleted file mode 100644 index 6e643e09..00000000 --- a/lib/syntax_tree/visitor/field_visitor.rb +++ /dev/null @@ -1,1031 +0,0 @@ -# frozen_string_literal: true - -module SyntaxTree - class Visitor - # This is the parent class of a lot of built-in visitors for Syntax Tree. It - # reflects visiting each of the fields on every node in turn. It itself does - # not do anything with these fields, it leaves that behavior up to the - # subclass to implement. - # - # In order to properly use this class, you will need to subclass it and - # implement #comments, #field, #list, #node, #pairs, and #text. Those are - # documented here. - # - # == comments(node) - # - # This accepts the node that is being visited and does something depending - # on the comments attached to the node. - # - # == field(name, value) - # - # This accepts the name of the field being visited as a string (like - # "value") and the actual value of that field. The value can be a subclass - # of Node or any other type that can be held within the tree. - # - # == list(name, values) - # - # This accepts the name of the field being visited as well as a list of - # values. This is used, for example, when visiting something like the body - # of a Statements node. - # - # == node(name, node) - # - # This is the parent serialization method for each node. It is called with - # the node itself, as well as the type of the node as a string. The type - # is an internally used value that usually resembles the name of the - # ripper event that generated the node. The method should yield to the - # given block which then calls through to visit each of the fields on the - # node. - # - # == text(name, value) - # - # This accepts the name of the field being visited as well as a string - # value representing the value of the field. - # - # == pairs(name, values) - # - # This accepts the name of the field being visited as well as a list of - # pairs that represent the value of the field. It is used only in a couple - # of circumstances, like when visiting the list of optional parameters - # defined on a method. - # - class FieldVisitor < BasicVisitor - def visit_aref(node) - node(node, "aref") do - field("collection", node.collection) - field("index", node.index) - comments(node) - end - end - - def visit_aref_field(node) - node(node, "aref_field") do - field("collection", node.collection) - field("index", node.index) - comments(node) - end - end - - def visit_alias(node) - node(node, "alias") do - field("left", node.left) - field("right", node.right) - comments(node) - end - end - - def visit_arg_block(node) - node(node, "arg_block") do - field("value", node.value) if node.value - comments(node) - end - end - - def visit_arg_paren(node) - node(node, "arg_paren") do - field("arguments", node.arguments) - comments(node) - end - end - - def visit_arg_star(node) - node(node, "arg_star") do - field("value", node.value) - comments(node) - end - end - - def visit_args(node) - node(node, "args") do - list("parts", node.parts) - comments(node) - end - end - - def visit_args_forward(node) - node(node, "args_forward") { comments(node) } - end - - def visit_array(node) - node(node, "array") do - field("contents", node.contents) - comments(node) - end - end - - def visit_aryptn(node) - node(node, "aryptn") do - field("constant", node.constant) if node.constant - list("requireds", node.requireds) if node.requireds.any? - field("rest", node.rest) if node.rest - list("posts", node.posts) if node.posts.any? - comments(node) - end - end - - def visit_assign(node) - node(node, "assign") do - field("target", node.target) - field("value", node.value) - comments(node) - end - end - - def visit_assoc(node) - node(node, "assoc") do - field("key", node.key) - field("value", node.value) if node.value - comments(node) - end - end - - def visit_assoc_splat(node) - node(node, "assoc_splat") do - field("value", node.value) - comments(node) - end - end - - def visit_backref(node) - visit_token(node, "backref") - end - - def visit_backtick(node) - visit_token(node, "backtick") - end - - def visit_bare_assoc_hash(node) - node(node, "bare_assoc_hash") do - list("assocs", node.assocs) - comments(node) - end - end - - def visit_BEGIN(node) - node(node, "BEGIN") do - field("statements", node.statements) - comments(node) - end - end - - def visit_begin(node) - node(node, "begin") do - field("bodystmt", node.bodystmt) - comments(node) - end - end - - def visit_binary(node) - node(node, "binary") do - field("left", node.left) - text("operator", node.operator) - field("right", node.right) - comments(node) - end - end - - def visit_block(node) - node(node, "block") do - field("block_var", node.block_var) if node.block_var - field("bodystmt", node.bodystmt) - comments(node) - end - end - - def visit_blockarg(node) - node(node, "blockarg") do - field("name", node.name) if node.name - comments(node) - end - end - - def visit_block_var(node) - node(node, "block_var") do - field("params", node.params) - list("locals", node.locals) if node.locals.any? - comments(node) - end - end - - def visit_bodystmt(node) - node(node, "bodystmt") do - field("statements", node.statements) - field("rescue_clause", node.rescue_clause) if node.rescue_clause - field("else_clause", node.else_clause) if node.else_clause - field("ensure_clause", node.ensure_clause) if node.ensure_clause - comments(node) - end - end - - def visit_break(node) - node(node, "break") do - field("arguments", node.arguments) - comments(node) - end - end - - def visit_call(node) - node(node, "call") do - field("receiver", node.receiver) - field("operator", node.operator) - field("message", node.message) - field("arguments", node.arguments) if node.arguments - comments(node) - end - end - - def visit_case(node) - node(node, "case") do - field("keyword", node.keyword) - field("value", node.value) if node.value - field("consequent", node.consequent) - comments(node) - end - end - - def visit_CHAR(node) - visit_token(node, "CHAR") - end - - def visit_class(node) - node(node, "class") do - field("constant", node.constant) - field("superclass", node.superclass) if node.superclass - field("bodystmt", node.bodystmt) - comments(node) - end - end - - def visit_comma(node) - node(node, "comma") { field("value", node.value) } - end - - def visit_command(node) - node(node, "command") do - field("message", node.message) - field("arguments", node.arguments) - comments(node) - end - end - - def visit_command_call(node) - node(node, "command_call") do - field("receiver", node.receiver) - field("operator", node.operator) - field("message", node.message) - field("arguments", node.arguments) if node.arguments - comments(node) - end - end - - def visit_comment(node) - node(node, "comment") { field("value", node.value) } - end - - def visit_const(node) - visit_token(node, "const") - end - - def visit_const_path_field(node) - node(node, "const_path_field") do - field("parent", node.parent) - field("constant", node.constant) - comments(node) - end - end - - def visit_const_path_ref(node) - node(node, "const_path_ref") do - field("parent", node.parent) - field("constant", node.constant) - comments(node) - end - end - - def visit_const_ref(node) - node(node, "const_ref") do - field("constant", node.constant) - comments(node) - end - end - - def visit_cvar(node) - visit_token(node, "cvar") - end - - def visit_def(node) - node(node, "def") do - field("target", node.target) - field("operator", node.operator) - field("name", node.name) - field("params", node.params) - field("bodystmt", node.bodystmt) - comments(node) - end - end - - def visit_defined(node) - node(node, "defined") do - field("value", node.value) - comments(node) - end - end - - def visit_dyna_symbol(node) - node(node, "dyna_symbol") do - list("parts", node.parts) - comments(node) - end - end - - def visit_END(node) - node(node, "END") do - field("statements", node.statements) - comments(node) - end - end - - def visit_else(node) - node(node, "else") do - field("statements", node.statements) - comments(node) - end - end - - def visit_elsif(node) - node(node, "elsif") do - field("predicate", node.predicate) - field("statements", node.statements) - field("consequent", node.consequent) if node.consequent - comments(node) - end - end - - def visit_embdoc(node) - node(node, "embdoc") { field("value", node.value) } - end - - def visit_embexpr_beg(node) - node(node, "embexpr_beg") { field("value", node.value) } - end - - def visit_embexpr_end(node) - node(node, "embexpr_end") { field("value", node.value) } - end - - def visit_embvar(node) - node(node, "embvar") { field("value", node.value) } - end - - def visit_ensure(node) - node(node, "ensure") do - field("statements", node.statements) - comments(node) - end - end - - def visit_excessed_comma(node) - visit_token(node, "excessed_comma") - end - - def visit_field(node) - node(node, "field") do - field("parent", node.parent) - field("operator", node.operator) - field("name", node.name) - comments(node) - end - end - - def visit_float(node) - visit_token(node, "float") - end - - def visit_fndptn(node) - node(node, "fndptn") do - field("constant", node.constant) if node.constant - field("left", node.left) - list("values", node.values) - field("right", node.right) - comments(node) - end - end - - def visit_for(node) - node(node, "for") do - field("index", node.index) - field("collection", node.collection) - field("statements", node.statements) - comments(node) - end - end - - def visit_gvar(node) - visit_token(node, "gvar") - end - - def visit_hash(node) - node(node, "hash") do - list("assocs", node.assocs) if node.assocs.any? - comments(node) - end - end - - def visit_heredoc(node) - node(node, "heredoc") do - list("parts", node.parts) - comments(node) - end - end - - def visit_heredoc_beg(node) - visit_token(node, "heredoc_beg") - end - - def visit_heredoc_end(node) - visit_token(node, "heredoc_end") - end - - def visit_hshptn(node) - node(node, "hshptn") do - field("constant", node.constant) if node.constant - pairs("keywords", node.keywords) if node.keywords.any? - field("keyword_rest", node.keyword_rest) if node.keyword_rest - comments(node) - end - end - - def visit_ident(node) - visit_token(node, "ident") - end - - def visit_if(node) - node(node, "if") do - field("predicate", node.predicate) - field("statements", node.statements) - field("consequent", node.consequent) if node.consequent - comments(node) - end - end - - def visit_if_op(node) - node(node, "if_op") do - field("predicate", node.predicate) - field("truthy", node.truthy) - field("falsy", node.falsy) - comments(node) - end - end - - def visit_imaginary(node) - visit_token(node, "imaginary") - end - - def visit_in(node) - node(node, "in") do - field("pattern", node.pattern) - field("statements", node.statements) - field("consequent", node.consequent) if node.consequent - comments(node) - end - end - - def visit_int(node) - visit_token(node, "int") - end - - def visit_ivar(node) - visit_token(node, "ivar") - end - - def visit_kw(node) - visit_token(node, "kw") - end - - def visit_kwrest_param(node) - node(node, "kwrest_param") do - field("name", node.name) - comments(node) - end - end - - def visit_label(node) - visit_token(node, "label") - end - - def visit_label_end(node) - node(node, "label_end") { field("value", node.value) } - end - - def visit_lambda(node) - node(node, "lambda") do - field("params", node.params) - field("statements", node.statements) - comments(node) - end - end - - def visit_lambda_var(node) - node(node, "lambda_var") do - field("params", node.params) - list("locals", node.locals) if node.locals.any? - comments(node) - end - end - - def visit_lbrace(node) - visit_token(node, "lbrace") - end - - def visit_lbracket(node) - visit_token(node, "lbracket") - end - - def visit_lparen(node) - visit_token(node, "lparen") - end - - def visit_massign(node) - node(node, "massign") do - field("target", node.target) - field("value", node.value) - comments(node) - end - end - - def visit_method_add_block(node) - node(node, "method_add_block") do - field("call", node.call) - field("block", node.block) - comments(node) - end - end - - def visit_mlhs(node) - node(node, "mlhs") do - list("parts", node.parts) - comments(node) - end - end - - def visit_mlhs_paren(node) - node(node, "mlhs_paren") do - field("contents", node.contents) - comments(node) - end - end - - def visit_module(node) - node(node, "module") do - field("constant", node.constant) - field("bodystmt", node.bodystmt) - comments(node) - end - end - - def visit_mrhs(node) - node(node, "mrhs") do - list("parts", node.parts) - comments(node) - end - end - - def visit_next(node) - node(node, "next") do - field("arguments", node.arguments) - comments(node) - end - end - - def visit_not(node) - node(node, "not") do - field("statement", node.statement) - comments(node) - end - end - - def visit_op(node) - visit_token(node, "op") - end - - def visit_opassign(node) - node(node, "opassign") do - field("target", node.target) - field("operator", node.operator) - field("value", node.value) - comments(node) - end - end - - def visit_params(node) - node(node, "params") do - list("requireds", node.requireds) if node.requireds.any? - pairs("optionals", node.optionals) if node.optionals.any? - field("rest", node.rest) if node.rest - list("posts", node.posts) if node.posts.any? - pairs("keywords", node.keywords) if node.keywords.any? - field("keyword_rest", node.keyword_rest) if node.keyword_rest - field("block", node.block) if node.block - comments(node) - end - end - - def visit_paren(node) - node(node, "paren") do - field("contents", node.contents) - comments(node) - end - end - - def visit_period(node) - visit_token(node, "period") - end - - def visit_pinned_begin(node) - node(node, "pinned_begin") do - field("statement", node.statement) - comments(node) - end - end - - def visit_pinned_var_ref(node) - node(node, "pinned_var_ref") do - field("value", node.value) - comments(node) - end - end - - def visit_program(node) - node(node, "program") do - field("statements", node.statements) - comments(node) - end - end - - def visit_qsymbols(node) - node(node, "qsymbols") do - list("elements", node.elements) - comments(node) - end - end - - def visit_qsymbols_beg(node) - node(node, "qsymbols_beg") { field("value", node.value) } - end - - def visit_qwords(node) - node(node, "qwords") do - list("elements", node.elements) - comments(node) - end - end - - def visit_qwords_beg(node) - node(node, "qwords_beg") { field("value", node.value) } - end - - def visit_range(node) - node(node, "range") do - field("left", node.left) if node.left - field("operator", node.operator) - field("right", node.right) if node.right - comments(node) - end - end - - def visit_rassign(node) - node(node, "rassign") do - field("value", node.value) - field("operator", node.operator) - field("pattern", node.pattern) - comments(node) - end - end - - def visit_rational(node) - visit_token(node, "rational") - end - - def visit_rbrace(node) - node(node, "rbrace") { field("value", node.value) } - end - - def visit_rbracket(node) - node(node, "rbracket") { field("value", node.value) } - end - - def visit_redo(node) - node(node, "redo") { comments(node) } - end - - def visit_regexp_beg(node) - node(node, "regexp_beg") { field("value", node.value) } - end - - def visit_regexp_content(node) - node(node, "regexp_content") { list("parts", node.parts) } - end - - def visit_regexp_end(node) - node(node, "regexp_end") { field("value", node.value) } - end - - def visit_regexp_literal(node) - node(node, "regexp_literal") do - list("parts", node.parts) - field("options", node.options) - comments(node) - end - end - - def visit_rescue(node) - node(node, "rescue") do - field("exception", node.exception) if node.exception - field("statements", node.statements) - field("consequent", node.consequent) if node.consequent - comments(node) - end - end - - def visit_rescue_ex(node) - node(node, "rescue_ex") do - field("exceptions", node.exceptions) - field("variable", node.variable) - comments(node) - end - end - - def visit_rescue_mod(node) - node(node, "rescue_mod") do - field("statement", node.statement) - field("value", node.value) - comments(node) - end - end - - def visit_rest_param(node) - node(node, "rest_param") do - field("name", node.name) - comments(node) - end - end - - def visit_retry(node) - node(node, "retry") { comments(node) } - end - - def visit_return(node) - node(node, "return") do - field("arguments", node.arguments) - comments(node) - end - end - - def visit_rparen(node) - node(node, "rparen") { field("value", node.value) } - end - - def visit_sclass(node) - node(node, "sclass") do - field("target", node.target) - field("bodystmt", node.bodystmt) - comments(node) - end - end - - def visit_statements(node) - node(node, "statements") do - list("body", node.body) - comments(node) - end - end - - def visit_string_concat(node) - node(node, "string_concat") do - field("left", node.left) - field("right", node.right) - comments(node) - end - end - - def visit_string_content(node) - node(node, "string_content") { list("parts", node.parts) } - end - - def visit_string_dvar(node) - node(node, "string_dvar") do - field("variable", node.variable) - comments(node) - end - end - - def visit_string_embexpr(node) - node(node, "string_embexpr") do - field("statements", node.statements) - comments(node) - end - end - - def visit_string_literal(node) - node(node, "string_literal") do - list("parts", node.parts) - comments(node) - end - end - - def visit_super(node) - node(node, "super") do - field("arguments", node.arguments) - comments(node) - end - end - - def visit_symbeg(node) - node(node, "symbeg") { field("value", node.value) } - end - - def visit_symbol_content(node) - node(node, "symbol_content") { field("value", node.value) } - end - - def visit_symbol_literal(node) - node(node, "symbol_literal") do - field("value", node.value) - comments(node) - end - end - - def visit_symbols(node) - node(node, "symbols") do - list("elements", node.elements) - comments(node) - end - end - - def visit_symbols_beg(node) - node(node, "symbols_beg") { field("value", node.value) } - end - - def visit_tlambda(node) - node(node, "tlambda") { field("value", node.value) } - end - - def visit_tlambeg(node) - node(node, "tlambeg") { field("value", node.value) } - end - - def visit_top_const_field(node) - node(node, "top_const_field") do - field("constant", node.constant) - comments(node) - end - end - - def visit_top_const_ref(node) - node(node, "top_const_ref") do - field("constant", node.constant) - comments(node) - end - end - - def visit_tstring_beg(node) - node(node, "tstring_beg") { field("value", node.value) } - end - - def visit_tstring_content(node) - visit_token(node, "tstring_content") - end - - def visit_tstring_end(node) - node(node, "tstring_end") { field("value", node.value) } - end - - def visit_unary(node) - node(node, "unary") do - field("operator", node.operator) - field("statement", node.statement) - comments(node) - end - end - - def visit_undef(node) - node(node, "undef") do - list("symbols", node.symbols) - comments(node) - end - end - - def visit_unless(node) - node(node, "unless") do - field("predicate", node.predicate) - field("statements", node.statements) - field("consequent", node.consequent) if node.consequent - comments(node) - end - end - - def visit_until(node) - node(node, "until") do - field("predicate", node.predicate) - field("statements", node.statements) - comments(node) - end - end - - def visit_var_field(node) - node(node, "var_field") do - field("value", node.value) - comments(node) - end - end - - def visit_var_ref(node) - node(node, "var_ref") do - field("value", node.value) - comments(node) - end - end - - def visit_vcall(node) - node(node, "vcall") do - field("value", node.value) - comments(node) - end - end - - def visit_void_stmt(node) - node(node, "void_stmt") { comments(node) } - end - - def visit_when(node) - node(node, "when") do - field("arguments", node.arguments) - field("statements", node.statements) - field("consequent", node.consequent) if node.consequent - comments(node) - end - end - - def visit_while(node) - node(node, "while") do - field("predicate", node.predicate) - field("statements", node.statements) - comments(node) - end - end - - def visit_word(node) - node(node, "word") do - list("parts", node.parts) - comments(node) - end - end - - def visit_words(node) - node(node, "words") do - list("elements", node.elements) - comments(node) - end - end - - def visit_words_beg(node) - node(node, "words_beg") { field("value", node.value) } - end - - def visit_xstring(node) - node(node, "xstring") { list("parts", node.parts) } - end - - def visit_xstring_literal(node) - node(node, "xstring_literal") do - list("parts", node.parts) - comments(node) - end - end - - def visit_yield(node) - node(node, "yield") do - field("arguments", node.arguments) - comments(node) - end - end - - def visit_zsuper(node) - node(node, "zsuper") { comments(node) } - end - - def visit___end__(node) - visit_token(node, "__end__") - end - - private - - def visit_token(node, type) - node(node, type) do - field("value", node.value) - comments(node) - end - end - end - end -end diff --git a/lib/syntax_tree/visitor/json_visitor.rb b/lib/syntax_tree/visitor/json_visitor.rb deleted file mode 100644 index b516980c..00000000 --- a/lib/syntax_tree/visitor/json_visitor.rb +++ /dev/null @@ -1,55 +0,0 @@ -# frozen_string_literal: true - -module SyntaxTree - class Visitor - # This visitor transforms the AST into a hash that contains only primitives - # that can be easily serialized into JSON. - class JSONVisitor < FieldVisitor - attr_reader :target - - def initialize - @target = nil - end - - private - - def comments(node) - target[:comments] = visit_all(node.comments) - end - - def field(name, value) - target[name] = value.is_a?(Node) ? visit(value) : value - end - - def list(name, values) - target[name] = visit_all(values) - end - - def node(node, type) - previous = @target - @target = { type: type, location: visit_location(node.location) } - yield - @target - ensure - @target = previous - end - - def pairs(name, values) - target[name] = values.map { |(key, value)| [visit(key), visit(value)] } - end - - def text(name, value) - target[name] = value - end - - def visit_location(location) - [ - location.start_line, - location.start_char, - location.end_line, - location.end_char - ] - end - end - end -end diff --git a/lib/syntax_tree/visitor/match_visitor.rb b/lib/syntax_tree/visitor/match_visitor.rb deleted file mode 100644 index e0bdaf08..00000000 --- a/lib/syntax_tree/visitor/match_visitor.rb +++ /dev/null @@ -1,122 +0,0 @@ -# frozen_string_literal: true - -module SyntaxTree - class Visitor - # This visitor transforms the AST into a Ruby pattern matching expression - # that would match correctly against the AST. - class MatchVisitor < FieldVisitor - attr_reader :q - - def initialize(q) - @q = q - end - - def visit(node) - case node - when Node - super - when String - # pp will split up a string on newlines and concat them together using - # a "+" operator. This breaks the pattern matching expression. So - # instead we're going to check here for strings and manually put the - # entire value into the output buffer. - q.text(node.inspect) - else - node.pretty_print(q) - end - end - - private - - def comments(node) - return if node.comments.empty? - - q.nest(0) do - q.text("comments: [") - q.indent do - q.breakable("") - q.seplist(node.comments) { |comment| visit(comment) } - end - q.breakable("") - q.text("]") - end - end - - def field(name, value) - q.nest(0) do - q.text(name) - q.text(": ") - visit(value) - end - end - - def list(name, values) - q.group do - q.text(name) - q.text(": [") - q.indent do - q.breakable("") - q.seplist(values) { |value| visit(value) } - end - q.breakable("") - q.text("]") - end - end - - def node(node, _type) - items = [] - q.with_target(items) { yield } - - if items.empty? - q.text(node.class.name) - return - end - - q.group do - q.text(node.class.name) - q.text("[") - q.indent do - q.breakable("") - q.seplist(items) { |item| q.target << item } - end - q.breakable("") - q.text("]") - end - end - - def pairs(name, values) - q.group do - q.text(name) - q.text(": [") - q.indent do - q.breakable("") - q.seplist(values) do |(key, value)| - q.group do - q.text("[") - q.indent do - q.breakable("") - visit(key) - q.text(",") - q.breakable - visit(value || nil) - end - q.breakable("") - q.text("]") - end - end - end - q.breakable("") - q.text("]") - end - end - - def text(name, value) - q.nest(0) do - q.text(name) - q.text(": ") - value.pretty_print(q) - end - end - end - end -end diff --git a/lib/syntax_tree/visitor/mermaid_visitor.rb b/lib/syntax_tree/visitor/mermaid_visitor.rb deleted file mode 100644 index 504e2fb0..00000000 --- a/lib/syntax_tree/visitor/mermaid_visitor.rb +++ /dev/null @@ -1,75 +0,0 @@ -# frozen_string_literal: true - -module SyntaxTree - class Visitor - # This visitor transforms the AST into a mermaid flow chart. - class MermaidVisitor < FieldVisitor - attr_reader :flowchart, :target - - def initialize - @flowchart = Mermaid.flowchart - @target = nil - end - - def visit_program(node) - super - flowchart.render - end - - private - - def comments(node) - # Ignore - end - - def field(name, value) - case value - when nil - # skip - when Node - flowchart.link(target, visit(value), name) - else - to = - flowchart.node( - "#{target.id}_#{name}", - value.inspect, - shape: :stadium - ) - flowchart.link(target, to, name) - end - end - - def list(name, values) - values.each_with_index do |value, index| - field("#{name}[#{index}]", value) - end - end - - def node(node, type) - previous_target = target - - begin - @target = flowchart.node("node_#{node.object_id}", type) - yield - @target - ensure - @target = previous_target - end - end - - def pairs(name, values) - values.each_with_index do |(key, value), index| - to = flowchart.node("#{target.id}_#{name}_#{index}", shape: :circle) - - flowchart.link(target, to, "#{name}[#{index}]") - flowchart.link(to, visit(key), "[0]") - flowchart.link(to, visit(value), "[1]") if value - end - end - - def text(name, value) - field(name, value) - end - end - end -end diff --git a/lib/syntax_tree/visitor/mutation_visitor.rb b/lib/syntax_tree/visitor/mutation_visitor.rb deleted file mode 100644 index 65f8c5ba..00000000 --- a/lib/syntax_tree/visitor/mutation_visitor.rb +++ /dev/null @@ -1,924 +0,0 @@ -# frozen_string_literal: true - -module SyntaxTree - class Visitor - # This visitor walks through the tree and copies each node as it is being - # visited. This is useful for mutating the tree before it is formatted. - class MutationVisitor < BasicVisitor - attr_reader :mutations - - def initialize - @mutations = [] - end - - # Create a new mutation based on the given query that will mutate the node - # using the given block. The block should return a new node that will take - # the place of the given node in the tree. These blocks frequently make - # use of the `copy` method on nodes to create a new node with the same - # properties as the original node. - def mutate(query, &block) - mutations << [Pattern.new(query).compile, block] - end - - # This is the base visit method for each node in the tree. It first - # creates a copy of the node using the visit_* methods defined below. Then - # it checks each mutation in sequence and calls it if it finds a match. - def visit(node) - return unless node - result = node.accept(self) - - mutations.each do |(pattern, mutation)| - result = mutation.call(result) if pattern.call(result) - end - - result - end - - # Visit a BEGINBlock node. - def visit_BEGIN(node) - node.copy( - lbrace: visit(node.lbrace), - statements: visit(node.statements) - ) - end - - # Visit a CHAR node. - def visit_CHAR(node) - node.copy - end - - # Visit a ENDBlock node. - def visit_END(node) - node.copy( - lbrace: visit(node.lbrace), - statements: visit(node.statements) - ) - end - - # Visit a EndContent node. - def visit___end__(node) - node.copy - end - - # Visit a AliasNode node. - def visit_alias(node) - node.copy(left: visit(node.left), right: visit(node.right)) - end - - # Visit a ARef node. - def visit_aref(node) - node.copy(index: visit(node.index)) - end - - # Visit a ARefField node. - def visit_aref_field(node) - node.copy(index: visit(node.index)) - end - - # Visit a ArgParen node. - def visit_arg_paren(node) - node.copy(arguments: visit(node.arguments)) - end - - # Visit a Args node. - def visit_args(node) - node.copy(parts: visit_all(node.parts)) - end - - # Visit a ArgBlock node. - def visit_arg_block(node) - node.copy(value: visit(node.value)) - end - - # Visit a ArgStar node. - def visit_arg_star(node) - node.copy(value: visit(node.value)) - end - - # Visit a ArgsForward node. - def visit_args_forward(node) - node.copy - end - - # Visit a ArrayLiteral node. - def visit_array(node) - node.copy( - lbracket: visit(node.lbracket), - contents: visit(node.contents) - ) - end - - # Visit a AryPtn node. - def visit_aryptn(node) - node.copy( - constant: visit(node.constant), - requireds: visit_all(node.requireds), - rest: visit(node.rest), - posts: visit_all(node.posts) - ) - end - - # Visit a Assign node. - def visit_assign(node) - node.copy(target: visit(node.target)) - end - - # Visit a Assoc node. - def visit_assoc(node) - node.copy - end - - # Visit a AssocSplat node. - def visit_assoc_splat(node) - node.copy - end - - # Visit a Backref node. - def visit_backref(node) - node.copy - end - - # Visit a Backtick node. - def visit_backtick(node) - node.copy - end - - # Visit a BareAssocHash node. - def visit_bare_assoc_hash(node) - node.copy(assocs: visit_all(node.assocs)) - end - - # Visit a Begin node. - def visit_begin(node) - node.copy(bodystmt: visit(node.bodystmt)) - end - - # Visit a PinnedBegin node. - def visit_pinned_begin(node) - node.copy - end - - # Visit a Binary node. - def visit_binary(node) - node.copy - end - - # Visit a BlockVar node. - def visit_block_var(node) - node.copy(params: visit(node.params), locals: visit_all(node.locals)) - end - - # Visit a BlockArg node. - def visit_blockarg(node) - node.copy(name: visit(node.name)) - end - - # Visit a BodyStmt node. - def visit_bodystmt(node) - node.copy( - statements: visit(node.statements), - rescue_clause: visit(node.rescue_clause), - else_clause: visit(node.else_clause), - ensure_clause: visit(node.ensure_clause) - ) - end - - # Visit a Break node. - def visit_break(node) - node.copy(arguments: visit(node.arguments)) - end - - # Visit a Call node. - def visit_call(node) - node.copy( - receiver: visit(node.receiver), - operator: node.operator == :"::" ? :"::" : visit(node.operator), - message: node.message == :call ? :call : visit(node.message), - arguments: visit(node.arguments) - ) - end - - # Visit a Case node. - def visit_case(node) - node.copy( - keyword: visit(node.keyword), - value: visit(node.value), - consequent: visit(node.consequent) - ) - end - - # Visit a RAssign node. - def visit_rassign(node) - node.copy(operator: visit(node.operator)) - end - - # Visit a ClassDeclaration node. - def visit_class(node) - node.copy( - constant: visit(node.constant), - superclass: visit(node.superclass), - bodystmt: visit(node.bodystmt) - ) - end - - # Visit a Comma node. - def visit_comma(node) - node.copy - end - - # Visit a Command node. - def visit_command(node) - node.copy( - message: visit(node.message), - arguments: visit(node.arguments), - block: visit(node.block) - ) - end - - # Visit a CommandCall node. - def visit_command_call(node) - node.copy( - operator: node.operator == :"::" ? :"::" : visit(node.operator), - message: visit(node.message), - arguments: visit(node.arguments), - block: visit(node.block) - ) - end - - # Visit a Comment node. - def visit_comment(node) - node.copy - end - - # Visit a Const node. - def visit_const(node) - node.copy - end - - # Visit a ConstPathField node. - def visit_const_path_field(node) - node.copy(constant: visit(node.constant)) - end - - # Visit a ConstPathRef node. - def visit_const_path_ref(node) - node.copy(constant: visit(node.constant)) - end - - # Visit a ConstRef node. - def visit_const_ref(node) - node.copy(constant: visit(node.constant)) - end - - # Visit a CVar node. - def visit_cvar(node) - node.copy - end - - # Visit a Def node. - def visit_def(node) - node.copy( - target: visit(node.target), - operator: visit(node.operator), - name: visit(node.name), - params: visit(node.params), - bodystmt: visit(node.bodystmt) - ) - end - - # Visit a Defined node. - def visit_defined(node) - node.copy - end - - # Visit a Block node. - def visit_block(node) - node.copy( - opening: visit(node.opening), - block_var: visit(node.block_var), - bodystmt: visit(node.bodystmt) - ) - end - - # Visit a RangeNode node. - def visit_range(node) - node.copy( - left: visit(node.left), - operator: visit(node.operator), - right: visit(node.right) - ) - end - - # Visit a DynaSymbol node. - def visit_dyna_symbol(node) - node.copy(parts: visit_all(node.parts)) - end - - # Visit a Else node. - def visit_else(node) - node.copy( - keyword: visit(node.keyword), - statements: visit(node.statements) - ) - end - - # Visit a Elsif node. - def visit_elsif(node) - node.copy( - statements: visit(node.statements), - consequent: visit(node.consequent) - ) - end - - # Visit a EmbDoc node. - def visit_embdoc(node) - node.copy - end - - # Visit a EmbExprBeg node. - def visit_embexpr_beg(node) - node.copy - end - - # Visit a EmbExprEnd node. - def visit_embexpr_end(node) - node.copy - end - - # Visit a EmbVar node. - def visit_embvar(node) - node.copy - end - - # Visit a Ensure node. - def visit_ensure(node) - node.copy( - keyword: visit(node.keyword), - statements: visit(node.statements) - ) - end - - # Visit a ExcessedComma node. - def visit_excessed_comma(node) - node.copy - end - - # Visit a Field node. - def visit_field(node) - node.copy( - operator: node.operator == :"::" ? :"::" : visit(node.operator), - name: visit(node.name) - ) - end - - # Visit a FloatLiteral node. - def visit_float(node) - node.copy - end - - # Visit a FndPtn node. - def visit_fndptn(node) - node.copy( - constant: visit(node.constant), - left: visit(node.left), - values: visit_all(node.values), - right: visit(node.right) - ) - end - - # Visit a For node. - def visit_for(node) - node.copy(index: visit(node.index), statements: visit(node.statements)) - end - - # Visit a GVar node. - def visit_gvar(node) - node.copy - end - - # Visit a HashLiteral node. - def visit_hash(node) - node.copy(lbrace: visit(node.lbrace), assocs: visit_all(node.assocs)) - end - - # Visit a Heredoc node. - def visit_heredoc(node) - node.copy( - beginning: visit(node.beginning), - ending: visit(node.ending), - parts: visit_all(node.parts) - ) - end - - # Visit a HeredocBeg node. - def visit_heredoc_beg(node) - node.copy - end - - # Visit a HeredocEnd node. - def visit_heredoc_end(node) - node.copy - end - - # Visit a HshPtn node. - def visit_hshptn(node) - node.copy( - constant: visit(node.constant), - keywords: - node.keywords.map { |label, value| [visit(label), visit(value)] }, - keyword_rest: visit(node.keyword_rest) - ) - end - - # Visit a Ident node. - def visit_ident(node) - node.copy - end - - # Visit a IfNode node. - def visit_if(node) - node.copy( - predicate: visit(node.predicate), - statements: visit(node.statements), - consequent: visit(node.consequent) - ) - end - - # Visit a IfOp node. - def visit_if_op(node) - node.copy - end - - # Visit a Imaginary node. - def visit_imaginary(node) - node.copy - end - - # Visit a In node. - def visit_in(node) - node.copy( - statements: visit(node.statements), - consequent: visit(node.consequent) - ) - end - - # Visit a Int node. - def visit_int(node) - node.copy - end - - # Visit a IVar node. - def visit_ivar(node) - node.copy - end - - # Visit a Kw node. - def visit_kw(node) - node.copy - end - - # Visit a KwRestParam node. - def visit_kwrest_param(node) - node.copy(name: visit(node.name)) - end - - # Visit a Label node. - def visit_label(node) - node.copy - end - - # Visit a LabelEnd node. - def visit_label_end(node) - node.copy - end - - # Visit a Lambda node. - def visit_lambda(node) - node.copy( - params: visit(node.params), - statements: visit(node.statements) - ) - end - - # Visit a LambdaVar node. - def visit_lambda_var(node) - node.copy(params: visit(node.params), locals: visit_all(node.locals)) - end - - # Visit a LBrace node. - def visit_lbrace(node) - node.copy - end - - # Visit a LBracket node. - def visit_lbracket(node) - node.copy - end - - # Visit a LParen node. - def visit_lparen(node) - node.copy - end - - # Visit a MAssign node. - def visit_massign(node) - node.copy(target: visit(node.target)) - end - - # Visit a MethodAddBlock node. - def visit_method_add_block(node) - node.copy(call: visit(node.call), block: visit(node.block)) - end - - # Visit a MLHS node. - def visit_mlhs(node) - node.copy(parts: visit_all(node.parts)) - end - - # Visit a MLHSParen node. - def visit_mlhs_paren(node) - node.copy(contents: visit(node.contents)) - end - - # Visit a ModuleDeclaration node. - def visit_module(node) - node.copy( - constant: visit(node.constant), - bodystmt: visit(node.bodystmt) - ) - end - - # Visit a MRHS node. - def visit_mrhs(node) - node.copy(parts: visit_all(node.parts)) - end - - # Visit a Next node. - def visit_next(node) - node.copy(arguments: visit(node.arguments)) - end - - # Visit a Op node. - def visit_op(node) - node.copy - end - - # Visit a OpAssign node. - def visit_opassign(node) - node.copy(target: visit(node.target), operator: visit(node.operator)) - end - - # Visit a Params node. - def visit_params(node) - node.copy( - requireds: visit_all(node.requireds), - optionals: - node.optionals.map { |ident, value| [visit(ident), visit(value)] }, - rest: visit(node.rest), - posts: visit_all(node.posts), - keywords: - node.keywords.map { |ident, value| [visit(ident), visit(value)] }, - keyword_rest: - node.keyword_rest == :nil ? :nil : visit(node.keyword_rest), - block: visit(node.block) - ) - end - - # Visit a Paren node. - def visit_paren(node) - node.copy(lparen: visit(node.lparen), contents: visit(node.contents)) - end - - # Visit a Period node. - def visit_period(node) - node.copy - end - - # Visit a Program node. - def visit_program(node) - node.copy(statements: visit(node.statements)) - end - - # Visit a QSymbols node. - def visit_qsymbols(node) - node.copy( - beginning: visit(node.beginning), - elements: visit_all(node.elements) - ) - end - - # Visit a QSymbolsBeg node. - def visit_qsymbols_beg(node) - node.copy - end - - # Visit a QWords node. - def visit_qwords(node) - node.copy( - beginning: visit(node.beginning), - elements: visit_all(node.elements) - ) - end - - # Visit a QWordsBeg node. - def visit_qwords_beg(node) - node.copy - end - - # Visit a RationalLiteral node. - def visit_rational(node) - node.copy - end - - # Visit a RBrace node. - def visit_rbrace(node) - node.copy - end - - # Visit a RBracket node. - def visit_rbracket(node) - node.copy - end - - # Visit a Redo node. - def visit_redo(node) - node.copy - end - - # Visit a RegexpContent node. - def visit_regexp_content(node) - node.copy(parts: visit_all(node.parts)) - end - - # Visit a RegexpBeg node. - def visit_regexp_beg(node) - node.copy - end - - # Visit a RegexpEnd node. - def visit_regexp_end(node) - node.copy - end - - # Visit a RegexpLiteral node. - def visit_regexp_literal(node) - node.copy(parts: visit_all(node.parts)) - end - - # Visit a RescueEx node. - def visit_rescue_ex(node) - node.copy(variable: visit(node.variable)) - end - - # Visit a Rescue node. - def visit_rescue(node) - node.copy( - keyword: visit(node.keyword), - exception: visit(node.exception), - statements: visit(node.statements), - consequent: visit(node.consequent) - ) - end - - # Visit a RescueMod node. - def visit_rescue_mod(node) - node.copy - end - - # Visit a RestParam node. - def visit_rest_param(node) - node.copy(name: visit(node.name)) - end - - # Visit a Retry node. - def visit_retry(node) - node.copy - end - - # Visit a Return node. - def visit_return(node) - node.copy(arguments: visit(node.arguments)) - end - - # Visit a RParen node. - def visit_rparen(node) - node.copy - end - - # Visit a SClass node. - def visit_sclass(node) - node.copy(bodystmt: visit(node.bodystmt)) - end - - # Visit a Statements node. - def visit_statements(node) - node.copy(body: visit_all(node.body)) - end - - # Visit a StringContent node. - def visit_string_content(node) - node.copy(parts: visit_all(node.parts)) - end - - # Visit a StringConcat node. - def visit_string_concat(node) - node.copy(left: visit(node.left), right: visit(node.right)) - end - - # Visit a StringDVar node. - def visit_string_dvar(node) - node.copy(variable: visit(node.variable)) - end - - # Visit a StringEmbExpr node. - def visit_string_embexpr(node) - node.copy(statements: visit(node.statements)) - end - - # Visit a StringLiteral node. - def visit_string_literal(node) - node.copy(parts: visit_all(node.parts)) - end - - # Visit a Super node. - def visit_super(node) - node.copy(arguments: visit(node.arguments)) - end - - # Visit a SymBeg node. - def visit_symbeg(node) - node.copy - end - - # Visit a SymbolContent node. - def visit_symbol_content(node) - node.copy(value: visit(node.value)) - end - - # Visit a SymbolLiteral node. - def visit_symbol_literal(node) - node.copy(value: visit(node.value)) - end - - # Visit a Symbols node. - def visit_symbols(node) - node.copy( - beginning: visit(node.beginning), - elements: visit_all(node.elements) - ) - end - - # Visit a SymbolsBeg node. - def visit_symbols_beg(node) - node.copy - end - - # Visit a TLambda node. - def visit_tlambda(node) - node.copy - end - - # Visit a TLamBeg node. - def visit_tlambeg(node) - node.copy - end - - # Visit a TopConstField node. - def visit_top_const_field(node) - node.copy(constant: visit(node.constant)) - end - - # Visit a TopConstRef node. - def visit_top_const_ref(node) - node.copy(constant: visit(node.constant)) - end - - # Visit a TStringBeg node. - def visit_tstring_beg(node) - node.copy - end - - # Visit a TStringContent node. - def visit_tstring_content(node) - node.copy - end - - # Visit a TStringEnd node. - def visit_tstring_end(node) - node.copy - end - - # Visit a Not node. - def visit_not(node) - node.copy(statement: visit(node.statement)) - end - - # Visit a Unary node. - def visit_unary(node) - node.copy - end - - # Visit a Undef node. - def visit_undef(node) - node.copy(symbols: visit_all(node.symbols)) - end - - # Visit a UnlessNode node. - def visit_unless(node) - node.copy( - predicate: visit(node.predicate), - statements: visit(node.statements), - consequent: visit(node.consequent) - ) - end - - # Visit a UntilNode node. - def visit_until(node) - node.copy( - predicate: visit(node.predicate), - statements: visit(node.statements) - ) - end - - # Visit a VarField node. - def visit_var_field(node) - node.copy(value: visit(node.value)) - end - - # Visit a VarRef node. - def visit_var_ref(node) - node.copy(value: visit(node.value)) - end - - # Visit a PinnedVarRef node. - def visit_pinned_var_ref(node) - node.copy(value: visit(node.value)) - end - - # Visit a VCall node. - def visit_vcall(node) - node.copy(value: visit(node.value)) - end - - # Visit a VoidStmt node. - def visit_void_stmt(node) - node.copy - end - - # Visit a When node. - def visit_when(node) - node.copy( - arguments: visit(node.arguments), - statements: visit(node.statements), - consequent: visit(node.consequent) - ) - end - - # Visit a WhileNode node. - def visit_while(node) - node.copy( - predicate: visit(node.predicate), - statements: visit(node.statements) - ) - end - - # Visit a Word node. - def visit_word(node) - node.copy(parts: visit_all(node.parts)) - end - - # Visit a Words node. - def visit_words(node) - node.copy( - beginning: visit(node.beginning), - elements: visit_all(node.elements) - ) - end - - # Visit a WordsBeg node. - def visit_words_beg(node) - node.copy - end - - # Visit a XString node. - def visit_xstring(node) - node.copy(parts: visit_all(node.parts)) - end - - # Visit a XStringLiteral node. - def visit_xstring_literal(node) - node.copy(parts: visit_all(node.parts)) - end - - # Visit a YieldNode node. - def visit_yield(node) - node.copy(arguments: visit(node.arguments)) - end - - # Visit a ZSuper node. - def visit_zsuper(node) - node.copy - end - end - end -end diff --git a/lib/syntax_tree/visitor/pretty_print_visitor.rb b/lib/syntax_tree/visitor/pretty_print_visitor.rb deleted file mode 100644 index 674e3aac..00000000 --- a/lib/syntax_tree/visitor/pretty_print_visitor.rb +++ /dev/null @@ -1,85 +0,0 @@ -# frozen_string_literal: true - -module SyntaxTree - class Visitor - # This visitor pretty-prints the AST into an equivalent s-expression. - class PrettyPrintVisitor < FieldVisitor - attr_reader :q - - def initialize(q) - @q = q - end - - # This is here because we need to make sure the operator is cast to a - # string before we print it out. - def visit_binary(node) - node(node, "binary") do - field("left", node.left) - text("operator", node.operator.to_s) - field("right", node.right) - comments(node) - end - end - - # This is here to make it a little nicer to look at labels since they - # typically have their : at the end of the value. - def visit_label(node) - node(node, "label") do - q.breakable - q.text(":") - q.text(node.value[0...-1]) - comments(node) - end - end - - private - - def comments(node) - return if node.comments.empty? - - q.breakable - q.group(2, "(", ")") do - q.seplist(node.comments) { |comment| q.pp(comment) } - end - end - - def field(_name, value) - q.breakable - q.pp(value) - end - - def list(_name, values) - q.breakable - q.group(2, "(", ")") { q.seplist(values) { |value| q.pp(value) } } - end - - def node(_node, type) - q.group(2, "(", ")") do - q.text(type) - yield - end - end - - def pairs(_name, values) - q.group(2, "(", ")") do - q.seplist(values) do |(key, value)| - q.pp(key) - - if value - q.text("=") - q.group(2) do - q.breakable("") - q.pp(value) - end - end - end - end - end - - def text(_name, value) - q.breakable - q.text(value) - end - end - end -end diff --git a/lib/syntax_tree/visitor/with_environment.rb b/lib/syntax_tree/with_environment.rb similarity index 58% rename from lib/syntax_tree/visitor/with_environment.rb rename to lib/syntax_tree/with_environment.rb index 59033d50..60301390 100644 --- a/lib/syntax_tree/visitor/with_environment.rb +++ b/lib/syntax_tree/with_environment.rb @@ -22,6 +22,87 @@ module SyntaxTree # end # end module WithEnvironment + # The environment class is used to keep track of local variables and + # arguments inside a particular scope + class Environment + # This class tracks the occurrences of a local variable or argument + class Local + # [Symbol] The type of the local (e.g. :argument, :variable) + attr_reader :type + + # [Array[Location]] The locations of all definitions and assignments of + # this local + attr_reader :definitions + + # [Array[Location]] The locations of all usages of this local + attr_reader :usages + + # initialize: (Symbol type) -> void + def initialize(type) + @type = type + @definitions = [] + @usages = [] + end + + # add_definition: (Location location) -> void + def add_definition(location) + @definitions << location + end + + # add_usage: (Location location) -> void + def add_usage(location) + @usages << location + end + end + + # [Array[Local]] The local variables and arguments defined in this + # environment + attr_reader :locals + + # [Environment | nil] The parent environment + attr_reader :parent + + # initialize: (Environment | nil parent) -> void + def initialize(parent = nil) + @locals = {} + @parent = parent + end + + # Adding a local definition will either insert a new entry in the locals + # hash or append a new definition location to an existing local. Notice that + # it's not possible to change the type of a local after it has been + # registered + # add_local_definition: (Ident | Label identifier, Symbol type) -> void + def add_local_definition(identifier, type) + name = identifier.value.delete_suffix(":") + + @locals[name] ||= Local.new(type) + @locals[name].add_definition(identifier.location) + end + + # Adding a local usage will either insert a new entry in the locals + # hash or append a new usage location to an existing local. Notice that + # it's not possible to change the type of a local after it has been + # registered + # add_local_usage: (Ident | Label identifier, Symbol type) -> void + def add_local_usage(identifier, type) + name = identifier.value.delete_suffix(":") + + @locals[name] ||= Local.new(type) + @locals[name].add_usage(identifier.location) + end + + # Try to find the local given its name in this environment or any of its + # parents + # find_local: (String name) -> Local | nil + def find_local(name) + local = @locals[name] + return local unless local.nil? + + @parent&.find_local(name) + end + end + def current_environment @current_environment ||= Environment.new end diff --git a/lib/syntax_tree/yarv.rb b/lib/syntax_tree/yarv.rb index ff8d3801..bd5c54b9 100644 --- a/lib/syntax_tree/yarv.rb +++ b/lib/syntax_tree/yarv.rb @@ -1,5 +1,7 @@ # frozen_string_literal: true +require "stringio" + require_relative "yarv/basic_block" require_relative "yarv/bf" require_relative "yarv/calldata" diff --git a/lib/syntax_tree/yarv/compiler.rb b/lib/syntax_tree/yarv/compiler.rb index e1a8544a..a8044faf 100644 --- a/lib/syntax_tree/yarv/compiler.rb +++ b/lib/syntax_tree/yarv/compiler.rb @@ -8,7 +8,7 @@ module YARV # # You use this as with any other visitor. First you parse code into a tree, # then you visit it with this compiler. Visiting the root node of the tree - # will return a SyntaxTree::Visitor::Compiler::InstructionSequence object. + # will return a SyntaxTree::YARV::Compiler::InstructionSequence object. # With that object you can call #to_a on it, which will return a serialized # form of the instruction sequence as an array. This array _should_ mirror # the array given by RubyVM::InstructionSequence#to_a. diff --git a/test/test_helper.rb b/test/test_helper.rb index e4452e3d..2c8f6466 100644 --- a/test/test_helper.rb +++ b/test/test_helper.rb @@ -94,7 +94,7 @@ def assert_syntax_tree(node) assert_includes(pretty, type) # Assert that we can get back a new tree by using the mutation visitor. - assert_operator node, :===, node.accept(Visitor::MutationVisitor.new) + assert_operator node, :===, node.accept(MutationVisitor.new) # Serialize the node to JSON, parse it back out, and assert that we have # found the expected type. From 0dd027671e860975d85fd8af3cf8e2e2c117a59a Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Fri, 10 Feb 2023 11:37:12 -0500 Subject: [PATCH 51/58] More utility functions --- lib/syntax_tree.rb | 35 ++++++++++++++++++- lib/syntax_tree/mermaid_visitor.rb | 6 +--- lib/syntax_tree/mutation_visitor.rb | 25 +++----------- lib/syntax_tree/with_environment.rb | 52 +++++++++++++---------------- 4 files changed, 64 insertions(+), 54 deletions(-) diff --git a/lib/syntax_tree.rb b/lib/syntax_tree.rb index 0bdc4827..70126b14 100644 --- a/lib/syntax_tree.rb +++ b/lib/syntax_tree.rb @@ -60,9 +60,36 @@ def self.format( maxwidth = DEFAULT_PRINT_WIDTH, base_indentation = DEFAULT_INDENTATION, options: Formatter::Options.new + ) + format_node( + source, + parse(source), + maxwidth, + base_indentation, + options: options + ) + end + + # Parses the given file and returns the formatted source. + def self.format_file( + filepath, + maxwidth = DEFAULT_PRINT_WIDTH, + base_indentation = DEFAULT_INDENTATION, + options: Formatter::Options.new + ) + format(read(filepath), maxwidth, base_indentation, options: options) + end + + # Accepts a node in the tree and returns the formatted source. + def self.format_node( + source, + node, + maxwidth = DEFAULT_PRINT_WIDTH, + base_indentation = DEFAULT_INDENTATION, + options: Formatter::Options.new ) formatter = Formatter.new(source, [], maxwidth, options: options) - parse(source).format(formatter) + node.format(formatter) formatter.flush(base_indentation) formatter.output.join @@ -130,4 +157,10 @@ def self.search(source, query, &block) Search.new(pattern).scan(program, &block) end + + # Searches through the given file using the given pattern and yields each + # node in the tree that matches the pattern to the given block. + def self.search_file(filepath, query, &block) + search(read(filepath), query, &block) + end end diff --git a/lib/syntax_tree/mermaid_visitor.rb b/lib/syntax_tree/mermaid_visitor.rb index 52d1b5c6..fc9f6706 100644 --- a/lib/syntax_tree/mermaid_visitor.rb +++ b/lib/syntax_tree/mermaid_visitor.rb @@ -29,11 +29,7 @@ def field(name, value) flowchart.link(target, visit(value), name) else to = - flowchart.node( - "#{target.id}_#{name}", - value.inspect, - shape: :stadium - ) + flowchart.node("#{target.id}_#{name}", value.inspect, shape: :stadium) flowchart.link(target, to, name) end end diff --git a/lib/syntax_tree/mutation_visitor.rb b/lib/syntax_tree/mutation_visitor.rb index 2d96620d..f96e442f 100644 --- a/lib/syntax_tree/mutation_visitor.rb +++ b/lib/syntax_tree/mutation_visitor.rb @@ -35,10 +35,7 @@ def visit(node) # Visit a BEGINBlock node. def visit_BEGIN(node) - node.copy( - lbrace: visit(node.lbrace), - statements: visit(node.statements) - ) + node.copy(lbrace: visit(node.lbrace), statements: visit(node.statements)) end # Visit a CHAR node. @@ -48,10 +45,7 @@ def visit_CHAR(node) # Visit a ENDBlock node. def visit_END(node) - node.copy( - lbrace: visit(node.lbrace), - statements: visit(node.statements) - ) + node.copy(lbrace: visit(node.lbrace), statements: visit(node.statements)) end # Visit a EndContent node. @@ -101,10 +95,7 @@ def visit_args_forward(node) # Visit a ArrayLiteral node. def visit_array(node) - node.copy( - lbracket: visit(node.lbracket), - contents: visit(node.contents) - ) + node.copy(lbracket: visit(node.lbracket), contents: visit(node.contents)) end # Visit a AryPtn node. @@ -493,10 +484,7 @@ def visit_label_end(node) # Visit a Lambda node. def visit_lambda(node) - node.copy( - params: visit(node.params), - statements: visit(node.statements) - ) + node.copy(params: visit(node.params), statements: visit(node.statements)) end # Visit a LambdaVar node. @@ -541,10 +529,7 @@ def visit_mlhs_paren(node) # Visit a ModuleDeclaration node. def visit_module(node) - node.copy( - constant: visit(node.constant), - bodystmt: visit(node.bodystmt) - ) + node.copy(constant: visit(node.constant), bodystmt: visit(node.bodystmt)) end # Visit a MRHS node. diff --git a/lib/syntax_tree/with_environment.rb b/lib/syntax_tree/with_environment.rb index 60301390..13f5e080 100644 --- a/lib/syntax_tree/with_environment.rb +++ b/lib/syntax_tree/with_environment.rb @@ -5,22 +5,25 @@ module SyntaxTree # from Visitor. The module overrides a few visit methods to automatically keep # track of local variables and arguments defined in the current environment. # Example usage: - # class MyVisitor < Visitor - # include WithEnvironment # - # def visit_ident(node) - # # Check if we're visiting an identifier for an argument, a local - # variable or something else - # local = current_environment.find_local(node) + # class MyVisitor < Visitor + # include WithEnvironment # - # if local.type == :argument - # # handle identifiers for arguments - # elsif local.type == :variable - # # handle identifiers for variables - # else - # # handle other identifiers, such as method names + # def visit_ident(node) + # # Check if we're visiting an identifier for an argument, a local + # # variable or something else + # local = current_environment.find_local(node) + # + # if local.type == :argument + # # handle identifiers for arguments + # elsif local.type == :variable + # # handle identifiers for variables + # else + # # handle other identifiers, such as method names + # end # end - # end + # end + # module WithEnvironment # The environment class is used to keep track of local variables and # arguments inside a particular scope @@ -37,19 +40,16 @@ class Local # [Array[Location]] The locations of all usages of this local attr_reader :usages - # initialize: (Symbol type) -> void def initialize(type) @type = type @definitions = [] @usages = [] end - # add_definition: (Location location) -> void def add_definition(location) @definitions << location end - # add_usage: (Location location) -> void def add_usage(location) @usages << location end @@ -62,17 +62,15 @@ def add_usage(location) # [Environment | nil] The parent environment attr_reader :parent - # initialize: (Environment | nil parent) -> void def initialize(parent = nil) @locals = {} @parent = parent end # Adding a local definition will either insert a new entry in the locals - # hash or append a new definition location to an existing local. Notice that - # it's not possible to change the type of a local after it has been - # registered - # add_local_definition: (Ident | Label identifier, Symbol type) -> void + # hash or append a new definition location to an existing local. Notice + # that it's not possible to change the type of a local after it has been + # registered. def add_local_definition(identifier, type) name = identifier.value.delete_suffix(":") @@ -83,8 +81,7 @@ def add_local_definition(identifier, type) # Adding a local usage will either insert a new entry in the locals # hash or append a new usage location to an existing local. Notice that # it's not possible to change the type of a local after it has been - # registered - # add_local_usage: (Ident | Label identifier, Symbol type) -> void + # registered. def add_local_usage(identifier, type) name = identifier.value.delete_suffix(":") @@ -93,8 +90,7 @@ def add_local_usage(identifier, type) end # Try to find the local given its name in this environment or any of its - # parents - # find_local: (String name) -> Local | nil + # parents. def find_local(name) local = @locals[name] return local unless local.nil? @@ -116,7 +112,7 @@ def with_new_environment end # Visits for nodes that create new environments, such as classes, modules - # and method definitions + # and method definitions. def visit_class(node) with_new_environment { super } end @@ -127,7 +123,7 @@ def visit_module(node) # When we find a method invocation with a block, only the code that happens # inside of the block needs a fresh environment. The method invocation - # itself happens in the same environment + # itself happens in the same environment. def visit_method_add_block(node) visit(node.call) with_new_environment { visit(node.block) } @@ -138,7 +134,7 @@ def visit_def(node) end # Visit for keeping track of local arguments, such as method and block - # arguments + # arguments. def visit_params(node) add_argument_definitions(node.requireds) From 1a202316e4919eef70ed6f2945d0135686982ad9 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Fri, 10 Feb 2023 11:56:41 -0500 Subject: [PATCH 52/58] Use visit_methods {} --- .rubocop.yml | 3 + lib/syntax_tree/field_visitor.rb | 1444 ++++----- lib/syntax_tree/index.rb | 110 +- lib/syntax_tree/language_server.rb | 170 +- lib/syntax_tree/mutation_visitor.rb | 1457 ++++----- lib/syntax_tree/parser.rb | 6 +- lib/syntax_tree/translation/parser.rb | 4231 +++++++++++++------------ lib/syntax_tree/with_environment.rb | 6 +- lib/syntax_tree/yarv/compiler.rb | 199 +- test/visitor_test.rb | 14 +- test/visitor_with_environment_test.rb | 50 +- 11 files changed, 3890 insertions(+), 3800 deletions(-) diff --git a/.rubocop.yml b/.rubocop.yml index 21beca1b..e5a3fe96 100644 --- a/.rubocop.yml +++ b/.rubocop.yml @@ -84,6 +84,9 @@ Security/Eval: Style/AccessorGrouping: Enabled: false +Style/Alias: + Enabled: false + Style/CaseEquality: Enabled: false diff --git a/lib/syntax_tree/field_visitor.rb b/lib/syntax_tree/field_visitor.rb index f4fc00e3..ca1df55b 100644 --- a/lib/syntax_tree/field_visitor.rb +++ b/lib/syntax_tree/field_visitor.rb @@ -48,972 +48,974 @@ module SyntaxTree # a method. # class FieldVisitor < BasicVisitor - def visit_aref(node) - node(node, "aref") do - field("collection", node.collection) - field("index", node.index) - comments(node) + visit_methods do + def visit_aref(node) + node(node, "aref") do + field("collection", node.collection) + field("index", node.index) + comments(node) + end end - end - def visit_aref_field(node) - node(node, "aref_field") do - field("collection", node.collection) - field("index", node.index) - comments(node) + def visit_aref_field(node) + node(node, "aref_field") do + field("collection", node.collection) + field("index", node.index) + comments(node) + end end - end - def visit_alias(node) - node(node, "alias") do - field("left", node.left) - field("right", node.right) - comments(node) + def visit_alias(node) + node(node, "alias") do + field("left", node.left) + field("right", node.right) + comments(node) + end end - end - def visit_arg_block(node) - node(node, "arg_block") do - field("value", node.value) if node.value - comments(node) + def visit_arg_block(node) + node(node, "arg_block") do + field("value", node.value) if node.value + comments(node) + end end - end - def visit_arg_paren(node) - node(node, "arg_paren") do - field("arguments", node.arguments) - comments(node) + def visit_arg_paren(node) + node(node, "arg_paren") do + field("arguments", node.arguments) + comments(node) + end end - end - def visit_arg_star(node) - node(node, "arg_star") do - field("value", node.value) - comments(node) + def visit_arg_star(node) + node(node, "arg_star") do + field("value", node.value) + comments(node) + end end - end - def visit_args(node) - node(node, "args") do - list("parts", node.parts) - comments(node) + def visit_args(node) + node(node, "args") do + list("parts", node.parts) + comments(node) + end end - end - def visit_args_forward(node) - node(node, "args_forward") { comments(node) } - end + def visit_args_forward(node) + node(node, "args_forward") { comments(node) } + end - def visit_array(node) - node(node, "array") do - field("contents", node.contents) - comments(node) + def visit_array(node) + node(node, "array") do + field("contents", node.contents) + comments(node) + end end - end - def visit_aryptn(node) - node(node, "aryptn") do - field("constant", node.constant) if node.constant - list("requireds", node.requireds) if node.requireds.any? - field("rest", node.rest) if node.rest - list("posts", node.posts) if node.posts.any? - comments(node) + def visit_aryptn(node) + node(node, "aryptn") do + field("constant", node.constant) if node.constant + list("requireds", node.requireds) if node.requireds.any? + field("rest", node.rest) if node.rest + list("posts", node.posts) if node.posts.any? + comments(node) + end end - end - def visit_assign(node) - node(node, "assign") do - field("target", node.target) - field("value", node.value) - comments(node) + def visit_assign(node) + node(node, "assign") do + field("target", node.target) + field("value", node.value) + comments(node) + end end - end - def visit_assoc(node) - node(node, "assoc") do - field("key", node.key) - field("value", node.value) if node.value - comments(node) + def visit_assoc(node) + node(node, "assoc") do + field("key", node.key) + field("value", node.value) if node.value + comments(node) + end end - end - def visit_assoc_splat(node) - node(node, "assoc_splat") do - field("value", node.value) - comments(node) + def visit_assoc_splat(node) + node(node, "assoc_splat") do + field("value", node.value) + comments(node) + end end - end - def visit_backref(node) - visit_token(node, "backref") - end + def visit_backref(node) + visit_token(node, "backref") + end - def visit_backtick(node) - visit_token(node, "backtick") - end + def visit_backtick(node) + visit_token(node, "backtick") + end - def visit_bare_assoc_hash(node) - node(node, "bare_assoc_hash") do - list("assocs", node.assocs) - comments(node) + def visit_bare_assoc_hash(node) + node(node, "bare_assoc_hash") do + list("assocs", node.assocs) + comments(node) + end end - end - def visit_BEGIN(node) - node(node, "BEGIN") do - field("statements", node.statements) - comments(node) + def visit_BEGIN(node) + node(node, "BEGIN") do + field("statements", node.statements) + comments(node) + end end - end - def visit_begin(node) - node(node, "begin") do - field("bodystmt", node.bodystmt) - comments(node) + def visit_begin(node) + node(node, "begin") do + field("bodystmt", node.bodystmt) + comments(node) + end end - end - def visit_binary(node) - node(node, "binary") do - field("left", node.left) - text("operator", node.operator) - field("right", node.right) - comments(node) + def visit_binary(node) + node(node, "binary") do + field("left", node.left) + text("operator", node.operator) + field("right", node.right) + comments(node) + end end - end - def visit_block(node) - node(node, "block") do - field("block_var", node.block_var) if node.block_var - field("bodystmt", node.bodystmt) - comments(node) + def visit_block(node) + node(node, "block") do + field("block_var", node.block_var) if node.block_var + field("bodystmt", node.bodystmt) + comments(node) + end end - end - def visit_blockarg(node) - node(node, "blockarg") do - field("name", node.name) if node.name - comments(node) + def visit_blockarg(node) + node(node, "blockarg") do + field("name", node.name) if node.name + comments(node) + end end - end - def visit_block_var(node) - node(node, "block_var") do - field("params", node.params) - list("locals", node.locals) if node.locals.any? - comments(node) + def visit_block_var(node) + node(node, "block_var") do + field("params", node.params) + list("locals", node.locals) if node.locals.any? + comments(node) + end end - end - def visit_bodystmt(node) - node(node, "bodystmt") do - field("statements", node.statements) - field("rescue_clause", node.rescue_clause) if node.rescue_clause - field("else_clause", node.else_clause) if node.else_clause - field("ensure_clause", node.ensure_clause) if node.ensure_clause - comments(node) + def visit_bodystmt(node) + node(node, "bodystmt") do + field("statements", node.statements) + field("rescue_clause", node.rescue_clause) if node.rescue_clause + field("else_clause", node.else_clause) if node.else_clause + field("ensure_clause", node.ensure_clause) if node.ensure_clause + comments(node) + end end - end - def visit_break(node) - node(node, "break") do - field("arguments", node.arguments) - comments(node) + def visit_break(node) + node(node, "break") do + field("arguments", node.arguments) + comments(node) + end end - end - def visit_call(node) - node(node, "call") do - field("receiver", node.receiver) - field("operator", node.operator) - field("message", node.message) - field("arguments", node.arguments) if node.arguments - comments(node) + def visit_call(node) + node(node, "call") do + field("receiver", node.receiver) + field("operator", node.operator) + field("message", node.message) + field("arguments", node.arguments) if node.arguments + comments(node) + end end - end - def visit_case(node) - node(node, "case") do - field("keyword", node.keyword) - field("value", node.value) if node.value - field("consequent", node.consequent) - comments(node) + def visit_case(node) + node(node, "case") do + field("keyword", node.keyword) + field("value", node.value) if node.value + field("consequent", node.consequent) + comments(node) + end end - end - def visit_CHAR(node) - visit_token(node, "CHAR") - end + def visit_CHAR(node) + visit_token(node, "CHAR") + end - def visit_class(node) - node(node, "class") do - field("constant", node.constant) - field("superclass", node.superclass) if node.superclass - field("bodystmt", node.bodystmt) - comments(node) + def visit_class(node) + node(node, "class") do + field("constant", node.constant) + field("superclass", node.superclass) if node.superclass + field("bodystmt", node.bodystmt) + comments(node) + end end - end - def visit_comma(node) - node(node, "comma") { field("value", node.value) } - end + def visit_comma(node) + node(node, "comma") { field("value", node.value) } + end - def visit_command(node) - node(node, "command") do - field("message", node.message) - field("arguments", node.arguments) - comments(node) + def visit_command(node) + node(node, "command") do + field("message", node.message) + field("arguments", node.arguments) + comments(node) + end end - end - def visit_command_call(node) - node(node, "command_call") do - field("receiver", node.receiver) - field("operator", node.operator) - field("message", node.message) - field("arguments", node.arguments) if node.arguments - comments(node) + def visit_command_call(node) + node(node, "command_call") do + field("receiver", node.receiver) + field("operator", node.operator) + field("message", node.message) + field("arguments", node.arguments) if node.arguments + comments(node) + end end - end - def visit_comment(node) - node(node, "comment") { field("value", node.value) } - end + def visit_comment(node) + node(node, "comment") { field("value", node.value) } + end - def visit_const(node) - visit_token(node, "const") - end + def visit_const(node) + visit_token(node, "const") + end - def visit_const_path_field(node) - node(node, "const_path_field") do - field("parent", node.parent) - field("constant", node.constant) - comments(node) + def visit_const_path_field(node) + node(node, "const_path_field") do + field("parent", node.parent) + field("constant", node.constant) + comments(node) + end end - end - def visit_const_path_ref(node) - node(node, "const_path_ref") do - field("parent", node.parent) - field("constant", node.constant) - comments(node) + def visit_const_path_ref(node) + node(node, "const_path_ref") do + field("parent", node.parent) + field("constant", node.constant) + comments(node) + end end - end - def visit_const_ref(node) - node(node, "const_ref") do - field("constant", node.constant) - comments(node) + def visit_const_ref(node) + node(node, "const_ref") do + field("constant", node.constant) + comments(node) + end end - end - def visit_cvar(node) - visit_token(node, "cvar") - end + def visit_cvar(node) + visit_token(node, "cvar") + end - def visit_def(node) - node(node, "def") do - field("target", node.target) - field("operator", node.operator) - field("name", node.name) - field("params", node.params) - field("bodystmt", node.bodystmt) - comments(node) + def visit_def(node) + node(node, "def") do + field("target", node.target) + field("operator", node.operator) + field("name", node.name) + field("params", node.params) + field("bodystmt", node.bodystmt) + comments(node) + end end - end - def visit_defined(node) - node(node, "defined") do - field("value", node.value) - comments(node) + def visit_defined(node) + node(node, "defined") do + field("value", node.value) + comments(node) + end end - end - def visit_dyna_symbol(node) - node(node, "dyna_symbol") do - list("parts", node.parts) - comments(node) + def visit_dyna_symbol(node) + node(node, "dyna_symbol") do + list("parts", node.parts) + comments(node) + end end - end - def visit_END(node) - node(node, "END") do - field("statements", node.statements) - comments(node) + def visit_END(node) + node(node, "END") do + field("statements", node.statements) + comments(node) + end end - end - def visit_else(node) - node(node, "else") do - field("statements", node.statements) - comments(node) + def visit_else(node) + node(node, "else") do + field("statements", node.statements) + comments(node) + end end - end - def visit_elsif(node) - node(node, "elsif") do - field("predicate", node.predicate) - field("statements", node.statements) - field("consequent", node.consequent) if node.consequent - comments(node) + def visit_elsif(node) + node(node, "elsif") do + field("predicate", node.predicate) + field("statements", node.statements) + field("consequent", node.consequent) if node.consequent + comments(node) + end end - end - def visit_embdoc(node) - node(node, "embdoc") { field("value", node.value) } - end + def visit_embdoc(node) + node(node, "embdoc") { field("value", node.value) } + end - def visit_embexpr_beg(node) - node(node, "embexpr_beg") { field("value", node.value) } - end + def visit_embexpr_beg(node) + node(node, "embexpr_beg") { field("value", node.value) } + end - def visit_embexpr_end(node) - node(node, "embexpr_end") { field("value", node.value) } - end + def visit_embexpr_end(node) + node(node, "embexpr_end") { field("value", node.value) } + end - def visit_embvar(node) - node(node, "embvar") { field("value", node.value) } - end + def visit_embvar(node) + node(node, "embvar") { field("value", node.value) } + end - def visit_ensure(node) - node(node, "ensure") do - field("statements", node.statements) - comments(node) + def visit_ensure(node) + node(node, "ensure") do + field("statements", node.statements) + comments(node) + end end - end - def visit_excessed_comma(node) - visit_token(node, "excessed_comma") - end + def visit_excessed_comma(node) + visit_token(node, "excessed_comma") + end - def visit_field(node) - node(node, "field") do - field("parent", node.parent) - field("operator", node.operator) - field("name", node.name) - comments(node) + def visit_field(node) + node(node, "field") do + field("parent", node.parent) + field("operator", node.operator) + field("name", node.name) + comments(node) + end end - end - def visit_float(node) - visit_token(node, "float") - end + def visit_float(node) + visit_token(node, "float") + end - def visit_fndptn(node) - node(node, "fndptn") do - field("constant", node.constant) if node.constant - field("left", node.left) - list("values", node.values) - field("right", node.right) - comments(node) + def visit_fndptn(node) + node(node, "fndptn") do + field("constant", node.constant) if node.constant + field("left", node.left) + list("values", node.values) + field("right", node.right) + comments(node) + end end - end - def visit_for(node) - node(node, "for") do - field("index", node.index) - field("collection", node.collection) - field("statements", node.statements) - comments(node) + def visit_for(node) + node(node, "for") do + field("index", node.index) + field("collection", node.collection) + field("statements", node.statements) + comments(node) + end end - end - def visit_gvar(node) - visit_token(node, "gvar") - end + def visit_gvar(node) + visit_token(node, "gvar") + end - def visit_hash(node) - node(node, "hash") do - list("assocs", node.assocs) if node.assocs.any? - comments(node) + def visit_hash(node) + node(node, "hash") do + list("assocs", node.assocs) if node.assocs.any? + comments(node) + end end - end - def visit_heredoc(node) - node(node, "heredoc") do - list("parts", node.parts) - comments(node) + def visit_heredoc(node) + node(node, "heredoc") do + list("parts", node.parts) + comments(node) + end end - end - def visit_heredoc_beg(node) - visit_token(node, "heredoc_beg") - end + def visit_heredoc_beg(node) + visit_token(node, "heredoc_beg") + end - def visit_heredoc_end(node) - visit_token(node, "heredoc_end") - end + def visit_heredoc_end(node) + visit_token(node, "heredoc_end") + end - def visit_hshptn(node) - node(node, "hshptn") do - field("constant", node.constant) if node.constant - pairs("keywords", node.keywords) if node.keywords.any? - field("keyword_rest", node.keyword_rest) if node.keyword_rest - comments(node) + def visit_hshptn(node) + node(node, "hshptn") do + field("constant", node.constant) if node.constant + pairs("keywords", node.keywords) if node.keywords.any? + field("keyword_rest", node.keyword_rest) if node.keyword_rest + comments(node) + end end - end - def visit_ident(node) - visit_token(node, "ident") - end + def visit_ident(node) + visit_token(node, "ident") + end - def visit_if(node) - node(node, "if") do - field("predicate", node.predicate) - field("statements", node.statements) - field("consequent", node.consequent) if node.consequent - comments(node) + def visit_if(node) + node(node, "if") do + field("predicate", node.predicate) + field("statements", node.statements) + field("consequent", node.consequent) if node.consequent + comments(node) + end end - end - def visit_if_op(node) - node(node, "if_op") do - field("predicate", node.predicate) - field("truthy", node.truthy) - field("falsy", node.falsy) - comments(node) + def visit_if_op(node) + node(node, "if_op") do + field("predicate", node.predicate) + field("truthy", node.truthy) + field("falsy", node.falsy) + comments(node) + end end - end - def visit_imaginary(node) - visit_token(node, "imaginary") - end + def visit_imaginary(node) + visit_token(node, "imaginary") + end - def visit_in(node) - node(node, "in") do - field("pattern", node.pattern) - field("statements", node.statements) - field("consequent", node.consequent) if node.consequent - comments(node) + def visit_in(node) + node(node, "in") do + field("pattern", node.pattern) + field("statements", node.statements) + field("consequent", node.consequent) if node.consequent + comments(node) + end end - end - def visit_int(node) - visit_token(node, "int") - end + def visit_int(node) + visit_token(node, "int") + end - def visit_ivar(node) - visit_token(node, "ivar") - end + def visit_ivar(node) + visit_token(node, "ivar") + end - def visit_kw(node) - visit_token(node, "kw") - end + def visit_kw(node) + visit_token(node, "kw") + end - def visit_kwrest_param(node) - node(node, "kwrest_param") do - field("name", node.name) - comments(node) + def visit_kwrest_param(node) + node(node, "kwrest_param") do + field("name", node.name) + comments(node) + end end - end - def visit_label(node) - visit_token(node, "label") - end + def visit_label(node) + visit_token(node, "label") + end - def visit_label_end(node) - node(node, "label_end") { field("value", node.value) } - end + def visit_label_end(node) + node(node, "label_end") { field("value", node.value) } + end - def visit_lambda(node) - node(node, "lambda") do - field("params", node.params) - field("statements", node.statements) - comments(node) + def visit_lambda(node) + node(node, "lambda") do + field("params", node.params) + field("statements", node.statements) + comments(node) + end end - end - def visit_lambda_var(node) - node(node, "lambda_var") do - field("params", node.params) - list("locals", node.locals) if node.locals.any? - comments(node) + def visit_lambda_var(node) + node(node, "lambda_var") do + field("params", node.params) + list("locals", node.locals) if node.locals.any? + comments(node) + end end - end - def visit_lbrace(node) - visit_token(node, "lbrace") - end + def visit_lbrace(node) + visit_token(node, "lbrace") + end - def visit_lbracket(node) - visit_token(node, "lbracket") - end + def visit_lbracket(node) + visit_token(node, "lbracket") + end - def visit_lparen(node) - visit_token(node, "lparen") - end + def visit_lparen(node) + visit_token(node, "lparen") + end - def visit_massign(node) - node(node, "massign") do - field("target", node.target) - field("value", node.value) - comments(node) + def visit_massign(node) + node(node, "massign") do + field("target", node.target) + field("value", node.value) + comments(node) + end end - end - def visit_method_add_block(node) - node(node, "method_add_block") do - field("call", node.call) - field("block", node.block) - comments(node) + def visit_method_add_block(node) + node(node, "method_add_block") do + field("call", node.call) + field("block", node.block) + comments(node) + end end - end - def visit_mlhs(node) - node(node, "mlhs") do - list("parts", node.parts) - comments(node) + def visit_mlhs(node) + node(node, "mlhs") do + list("parts", node.parts) + comments(node) + end end - end - def visit_mlhs_paren(node) - node(node, "mlhs_paren") do - field("contents", node.contents) - comments(node) + def visit_mlhs_paren(node) + node(node, "mlhs_paren") do + field("contents", node.contents) + comments(node) + end end - end - def visit_module(node) - node(node, "module") do - field("constant", node.constant) - field("bodystmt", node.bodystmt) - comments(node) + def visit_module(node) + node(node, "module") do + field("constant", node.constant) + field("bodystmt", node.bodystmt) + comments(node) + end end - end - def visit_mrhs(node) - node(node, "mrhs") do - list("parts", node.parts) - comments(node) + def visit_mrhs(node) + node(node, "mrhs") do + list("parts", node.parts) + comments(node) + end end - end - def visit_next(node) - node(node, "next") do - field("arguments", node.arguments) - comments(node) + def visit_next(node) + node(node, "next") do + field("arguments", node.arguments) + comments(node) + end end - end - def visit_not(node) - node(node, "not") do - field("statement", node.statement) - comments(node) + def visit_not(node) + node(node, "not") do + field("statement", node.statement) + comments(node) + end end - end - def visit_op(node) - visit_token(node, "op") - end + def visit_op(node) + visit_token(node, "op") + end - def visit_opassign(node) - node(node, "opassign") do - field("target", node.target) - field("operator", node.operator) - field("value", node.value) - comments(node) + def visit_opassign(node) + node(node, "opassign") do + field("target", node.target) + field("operator", node.operator) + field("value", node.value) + comments(node) + end end - end - def visit_params(node) - node(node, "params") do - list("requireds", node.requireds) if node.requireds.any? - pairs("optionals", node.optionals) if node.optionals.any? - field("rest", node.rest) if node.rest - list("posts", node.posts) if node.posts.any? - pairs("keywords", node.keywords) if node.keywords.any? - field("keyword_rest", node.keyword_rest) if node.keyword_rest - field("block", node.block) if node.block - comments(node) + def visit_params(node) + node(node, "params") do + list("requireds", node.requireds) if node.requireds.any? + pairs("optionals", node.optionals) if node.optionals.any? + field("rest", node.rest) if node.rest + list("posts", node.posts) if node.posts.any? + pairs("keywords", node.keywords) if node.keywords.any? + field("keyword_rest", node.keyword_rest) if node.keyword_rest + field("block", node.block) if node.block + comments(node) + end end - end - def visit_paren(node) - node(node, "paren") do - field("contents", node.contents) - comments(node) + def visit_paren(node) + node(node, "paren") do + field("contents", node.contents) + comments(node) + end end - end - def visit_period(node) - visit_token(node, "period") - end + def visit_period(node) + visit_token(node, "period") + end - def visit_pinned_begin(node) - node(node, "pinned_begin") do - field("statement", node.statement) - comments(node) + def visit_pinned_begin(node) + node(node, "pinned_begin") do + field("statement", node.statement) + comments(node) + end end - end - def visit_pinned_var_ref(node) - node(node, "pinned_var_ref") do - field("value", node.value) - comments(node) + def visit_pinned_var_ref(node) + node(node, "pinned_var_ref") do + field("value", node.value) + comments(node) + end end - end - def visit_program(node) - node(node, "program") do - field("statements", node.statements) - comments(node) + def visit_program(node) + node(node, "program") do + field("statements", node.statements) + comments(node) + end end - end - def visit_qsymbols(node) - node(node, "qsymbols") do - list("elements", node.elements) - comments(node) + def visit_qsymbols(node) + node(node, "qsymbols") do + list("elements", node.elements) + comments(node) + end end - end - def visit_qsymbols_beg(node) - node(node, "qsymbols_beg") { field("value", node.value) } - end + def visit_qsymbols_beg(node) + node(node, "qsymbols_beg") { field("value", node.value) } + end - def visit_qwords(node) - node(node, "qwords") do - list("elements", node.elements) - comments(node) + def visit_qwords(node) + node(node, "qwords") do + list("elements", node.elements) + comments(node) + end end - end - def visit_qwords_beg(node) - node(node, "qwords_beg") { field("value", node.value) } - end + def visit_qwords_beg(node) + node(node, "qwords_beg") { field("value", node.value) } + end - def visit_range(node) - node(node, "range") do - field("left", node.left) if node.left - field("operator", node.operator) - field("right", node.right) if node.right - comments(node) + def visit_range(node) + node(node, "range") do + field("left", node.left) if node.left + field("operator", node.operator) + field("right", node.right) if node.right + comments(node) + end end - end - def visit_rassign(node) - node(node, "rassign") do - field("value", node.value) - field("operator", node.operator) - field("pattern", node.pattern) - comments(node) + def visit_rassign(node) + node(node, "rassign") do + field("value", node.value) + field("operator", node.operator) + field("pattern", node.pattern) + comments(node) + end end - end - def visit_rational(node) - visit_token(node, "rational") - end + def visit_rational(node) + visit_token(node, "rational") + end - def visit_rbrace(node) - node(node, "rbrace") { field("value", node.value) } - end + def visit_rbrace(node) + node(node, "rbrace") { field("value", node.value) } + end - def visit_rbracket(node) - node(node, "rbracket") { field("value", node.value) } - end + def visit_rbracket(node) + node(node, "rbracket") { field("value", node.value) } + end - def visit_redo(node) - node(node, "redo") { comments(node) } - end + def visit_redo(node) + node(node, "redo") { comments(node) } + end - def visit_regexp_beg(node) - node(node, "regexp_beg") { field("value", node.value) } - end + def visit_regexp_beg(node) + node(node, "regexp_beg") { field("value", node.value) } + end - def visit_regexp_content(node) - node(node, "regexp_content") { list("parts", node.parts) } - end + def visit_regexp_content(node) + node(node, "regexp_content") { list("parts", node.parts) } + end - def visit_regexp_end(node) - node(node, "regexp_end") { field("value", node.value) } - end + def visit_regexp_end(node) + node(node, "regexp_end") { field("value", node.value) } + end - def visit_regexp_literal(node) - node(node, "regexp_literal") do - list("parts", node.parts) - field("options", node.options) - comments(node) + def visit_regexp_literal(node) + node(node, "regexp_literal") do + list("parts", node.parts) + field("options", node.options) + comments(node) + end end - end - def visit_rescue(node) - node(node, "rescue") do - field("exception", node.exception) if node.exception - field("statements", node.statements) - field("consequent", node.consequent) if node.consequent - comments(node) + def visit_rescue(node) + node(node, "rescue") do + field("exception", node.exception) if node.exception + field("statements", node.statements) + field("consequent", node.consequent) if node.consequent + comments(node) + end end - end - def visit_rescue_ex(node) - node(node, "rescue_ex") do - field("exceptions", node.exceptions) - field("variable", node.variable) - comments(node) + def visit_rescue_ex(node) + node(node, "rescue_ex") do + field("exceptions", node.exceptions) + field("variable", node.variable) + comments(node) + end end - end - def visit_rescue_mod(node) - node(node, "rescue_mod") do - field("statement", node.statement) - field("value", node.value) - comments(node) + def visit_rescue_mod(node) + node(node, "rescue_mod") do + field("statement", node.statement) + field("value", node.value) + comments(node) + end end - end - def visit_rest_param(node) - node(node, "rest_param") do - field("name", node.name) - comments(node) + def visit_rest_param(node) + node(node, "rest_param") do + field("name", node.name) + comments(node) + end end - end - def visit_retry(node) - node(node, "retry") { comments(node) } - end + def visit_retry(node) + node(node, "retry") { comments(node) } + end - def visit_return(node) - node(node, "return") do - field("arguments", node.arguments) - comments(node) + def visit_return(node) + node(node, "return") do + field("arguments", node.arguments) + comments(node) + end end - end - def visit_rparen(node) - node(node, "rparen") { field("value", node.value) } - end + def visit_rparen(node) + node(node, "rparen") { field("value", node.value) } + end - def visit_sclass(node) - node(node, "sclass") do - field("target", node.target) - field("bodystmt", node.bodystmt) - comments(node) + def visit_sclass(node) + node(node, "sclass") do + field("target", node.target) + field("bodystmt", node.bodystmt) + comments(node) + end end - end - def visit_statements(node) - node(node, "statements") do - list("body", node.body) - comments(node) + def visit_statements(node) + node(node, "statements") do + list("body", node.body) + comments(node) + end end - end - def visit_string_concat(node) - node(node, "string_concat") do - field("left", node.left) - field("right", node.right) - comments(node) + def visit_string_concat(node) + node(node, "string_concat") do + field("left", node.left) + field("right", node.right) + comments(node) + end end - end - def visit_string_content(node) - node(node, "string_content") { list("parts", node.parts) } - end + def visit_string_content(node) + node(node, "string_content") { list("parts", node.parts) } + end - def visit_string_dvar(node) - node(node, "string_dvar") do - field("variable", node.variable) - comments(node) + def visit_string_dvar(node) + node(node, "string_dvar") do + field("variable", node.variable) + comments(node) + end end - end - def visit_string_embexpr(node) - node(node, "string_embexpr") do - field("statements", node.statements) - comments(node) + def visit_string_embexpr(node) + node(node, "string_embexpr") do + field("statements", node.statements) + comments(node) + end end - end - def visit_string_literal(node) - node(node, "string_literal") do - list("parts", node.parts) - comments(node) + def visit_string_literal(node) + node(node, "string_literal") do + list("parts", node.parts) + comments(node) + end end - end - def visit_super(node) - node(node, "super") do - field("arguments", node.arguments) - comments(node) + def visit_super(node) + node(node, "super") do + field("arguments", node.arguments) + comments(node) + end end - end - def visit_symbeg(node) - node(node, "symbeg") { field("value", node.value) } - end + def visit_symbeg(node) + node(node, "symbeg") { field("value", node.value) } + end - def visit_symbol_content(node) - node(node, "symbol_content") { field("value", node.value) } - end + def visit_symbol_content(node) + node(node, "symbol_content") { field("value", node.value) } + end - def visit_symbol_literal(node) - node(node, "symbol_literal") do - field("value", node.value) - comments(node) + def visit_symbol_literal(node) + node(node, "symbol_literal") do + field("value", node.value) + comments(node) + end end - end - def visit_symbols(node) - node(node, "symbols") do - list("elements", node.elements) - comments(node) + def visit_symbols(node) + node(node, "symbols") do + list("elements", node.elements) + comments(node) + end end - end - def visit_symbols_beg(node) - node(node, "symbols_beg") { field("value", node.value) } - end + def visit_symbols_beg(node) + node(node, "symbols_beg") { field("value", node.value) } + end - def visit_tlambda(node) - node(node, "tlambda") { field("value", node.value) } - end + def visit_tlambda(node) + node(node, "tlambda") { field("value", node.value) } + end - def visit_tlambeg(node) - node(node, "tlambeg") { field("value", node.value) } - end + def visit_tlambeg(node) + node(node, "tlambeg") { field("value", node.value) } + end - def visit_top_const_field(node) - node(node, "top_const_field") do - field("constant", node.constant) - comments(node) + def visit_top_const_field(node) + node(node, "top_const_field") do + field("constant", node.constant) + comments(node) + end end - end - def visit_top_const_ref(node) - node(node, "top_const_ref") do - field("constant", node.constant) - comments(node) + def visit_top_const_ref(node) + node(node, "top_const_ref") do + field("constant", node.constant) + comments(node) + end end - end - def visit_tstring_beg(node) - node(node, "tstring_beg") { field("value", node.value) } - end + def visit_tstring_beg(node) + node(node, "tstring_beg") { field("value", node.value) } + end - def visit_tstring_content(node) - visit_token(node, "tstring_content") - end + def visit_tstring_content(node) + visit_token(node, "tstring_content") + end - def visit_tstring_end(node) - node(node, "tstring_end") { field("value", node.value) } - end + def visit_tstring_end(node) + node(node, "tstring_end") { field("value", node.value) } + end - def visit_unary(node) - node(node, "unary") do - field("operator", node.operator) - field("statement", node.statement) - comments(node) + def visit_unary(node) + node(node, "unary") do + field("operator", node.operator) + field("statement", node.statement) + comments(node) + end end - end - def visit_undef(node) - node(node, "undef") do - list("symbols", node.symbols) - comments(node) + def visit_undef(node) + node(node, "undef") do + list("symbols", node.symbols) + comments(node) + end end - end - def visit_unless(node) - node(node, "unless") do - field("predicate", node.predicate) - field("statements", node.statements) - field("consequent", node.consequent) if node.consequent - comments(node) + def visit_unless(node) + node(node, "unless") do + field("predicate", node.predicate) + field("statements", node.statements) + field("consequent", node.consequent) if node.consequent + comments(node) + end end - end - def visit_until(node) - node(node, "until") do - field("predicate", node.predicate) - field("statements", node.statements) - comments(node) + def visit_until(node) + node(node, "until") do + field("predicate", node.predicate) + field("statements", node.statements) + comments(node) + end end - end - def visit_var_field(node) - node(node, "var_field") do - field("value", node.value) - comments(node) + def visit_var_field(node) + node(node, "var_field") do + field("value", node.value) + comments(node) + end end - end - def visit_var_ref(node) - node(node, "var_ref") do - field("value", node.value) - comments(node) + def visit_var_ref(node) + node(node, "var_ref") do + field("value", node.value) + comments(node) + end end - end - def visit_vcall(node) - node(node, "vcall") do - field("value", node.value) - comments(node) + def visit_vcall(node) + node(node, "vcall") do + field("value", node.value) + comments(node) + end end - end - def visit_void_stmt(node) - node(node, "void_stmt") { comments(node) } - end + def visit_void_stmt(node) + node(node, "void_stmt") { comments(node) } + end - def visit_when(node) - node(node, "when") do - field("arguments", node.arguments) - field("statements", node.statements) - field("consequent", node.consequent) if node.consequent - comments(node) + def visit_when(node) + node(node, "when") do + field("arguments", node.arguments) + field("statements", node.statements) + field("consequent", node.consequent) if node.consequent + comments(node) + end end - end - def visit_while(node) - node(node, "while") do - field("predicate", node.predicate) - field("statements", node.statements) - comments(node) + def visit_while(node) + node(node, "while") do + field("predicate", node.predicate) + field("statements", node.statements) + comments(node) + end end - end - def visit_word(node) - node(node, "word") do - list("parts", node.parts) - comments(node) + def visit_word(node) + node(node, "word") do + list("parts", node.parts) + comments(node) + end end - end - def visit_words(node) - node(node, "words") do - list("elements", node.elements) - comments(node) + def visit_words(node) + node(node, "words") do + list("elements", node.elements) + comments(node) + end end - end - def visit_words_beg(node) - node(node, "words_beg") { field("value", node.value) } - end + def visit_words_beg(node) + node(node, "words_beg") { field("value", node.value) } + end - def visit_xstring(node) - node(node, "xstring") { list("parts", node.parts) } - end + def visit_xstring(node) + node(node, "xstring") { list("parts", node.parts) } + end - def visit_xstring_literal(node) - node(node, "xstring_literal") do - list("parts", node.parts) - comments(node) + def visit_xstring_literal(node) + node(node, "xstring_literal") do + list("parts", node.parts) + comments(node) + end end - end - def visit_yield(node) - node(node, "yield") do - field("arguments", node.arguments) - comments(node) + def visit_yield(node) + node(node, "yield") do + field("arguments", node.arguments) + comments(node) + end end - end - def visit_zsuper(node) - node(node, "zsuper") { comments(node) } - end + def visit_zsuper(node) + node(node, "zsuper") { comments(node) } + end - def visit___end__(node) - visit_token(node, "__end__") + def visit___end__(node) + visit_token(node, "__end__") + end end private diff --git a/lib/syntax_tree/index.rb b/lib/syntax_tree/index.rb index 8b33f785..ab2460dd 100644 --- a/lib/syntax_tree/index.rb +++ b/lib/syntax_tree/index.rb @@ -257,74 +257,76 @@ def initialize @statements = nil end - def visit_class(node) - name = visit(node.constant).to_sym - location = - Location.new(node.location.start_line, node.location.start_column) - - results << ClassDefinition.new( - nesting.dup, - name, - location, - comments_for(node) - ) - - nesting << name - super - nesting.pop - end - - def visit_const_ref(node) - node.constant.value - end + visit_methods do + def visit_class(node) + name = visit(node.constant).to_sym + location = + Location.new(node.location.start_line, node.location.start_column) - def visit_def(node) - name = node.name.value.to_sym - location = - Location.new(node.location.start_line, node.location.start_column) - - results << if node.target.nil? - MethodDefinition.new( + results << ClassDefinition.new( nesting.dup, name, location, comments_for(node) ) - else - SingletonMethodDefinition.new( + + nesting << name + super + nesting.pop + end + + def visit_const_ref(node) + node.constant.value + end + + def visit_def(node) + name = node.name.value.to_sym + location = + Location.new(node.location.start_line, node.location.start_column) + + results << if node.target.nil? + MethodDefinition.new( + nesting.dup, + name, + location, + comments_for(node) + ) + else + SingletonMethodDefinition.new( + nesting.dup, + name, + location, + comments_for(node) + ) + end + end + + def visit_module(node) + name = visit(node.constant).to_sym + location = + Location.new(node.location.start_line, node.location.start_column) + + results << ModuleDefinition.new( nesting.dup, name, location, comments_for(node) ) - end - end - - def visit_module(node) - name = visit(node.constant).to_sym - location = - Location.new(node.location.start_line, node.location.start_column) - results << ModuleDefinition.new( - nesting.dup, - name, - location, - comments_for(node) - ) - - nesting << name - super - nesting.pop - end + nesting << name + super + nesting.pop + end - def visit_program(node) - super - results - end + def visit_program(node) + super + results + end - def visit_statements(node) - @statements = node - super + def visit_statements(node) + @statements = node + super + end end private diff --git a/lib/syntax_tree/language_server.rb b/lib/syntax_tree/language_server.rb index afb1540e..6ec81030 100644 --- a/lib/syntax_tree/language_server.rb +++ b/lib/syntax_tree/language_server.rb @@ -52,101 +52,103 @@ def visit(node) result end - # Adds parentheses around assignments contained within the default values - # of parameters. For example, - # - # def foo(a = b = c) - # end - # - # becomes - # - # def foo(a = ₍b = c₎) - # end - # - def visit_assign(node) - parentheses(node.location) if stack[-2].is_a?(Params) - super - end - - # Adds parentheses around binary expressions to make it clear which - # subexpression will be evaluated first. For example, - # - # a + b * c - # - # becomes - # - # a + ₍b * c₎ - # - def visit_binary(node) - case stack[-2] - when Assign, OpAssign - parentheses(node.location) - when Binary - parentheses(node.location) if stack[-2].operator != node.operator + visit_methods do + # Adds parentheses around assignments contained within the default + # values of parameters. For example, + # + # def foo(a = b = c) + # end + # + # becomes + # + # def foo(a = ₍b = c₎) + # end + # + def visit_assign(node) + parentheses(node.location) if stack[-2].is_a?(Params) + super end - super - end + # Adds parentheses around binary expressions to make it clear which + # subexpression will be evaluated first. For example, + # + # a + b * c + # + # becomes + # + # a + ₍b * c₎ + # + def visit_binary(node) + case stack[-2] + when Assign, OpAssign + parentheses(node.location) + when Binary + parentheses(node.location) if stack[-2].operator != node.operator + end - # Adds parentheses around ternary operators contained within certain - # expressions where it could be confusing which subexpression will get - # evaluated first. For example, - # - # a ? b : c ? d : e - # - # becomes - # - # a ? b : ₍c ? d : e₎ - # - def visit_if_op(node) - case stack[-2] - when Assign, Binary, IfOp, OpAssign - parentheses(node.location) + super end - super - end + # Adds parentheses around ternary operators contained within certain + # expressions where it could be confusing which subexpression will get + # evaluated first. For example, + # + # a ? b : c ? d : e + # + # becomes + # + # a ? b : ₍c ? d : e₎ + # + def visit_if_op(node) + case stack[-2] + when Assign, Binary, IfOp, OpAssign + parentheses(node.location) + end - # Adds the implicitly rescued StandardError into a bare rescue clause. For - # example, - # - # begin - # rescue - # end - # - # becomes - # - # begin - # rescue StandardError - # end - # - def visit_rescue(node) - if node.exception.nil? - hints << Hint.new( - line: node.location.start_line - 1, - character: node.location.start_column + "rescue".length, - label: " StandardError" - ) + super end - super - end + # Adds the implicitly rescued StandardError into a bare rescue clause. + # For example, + # + # begin + # rescue + # end + # + # becomes + # + # begin + # rescue StandardError + # end + # + def visit_rescue(node) + if node.exception.nil? + hints << Hint.new( + line: node.location.start_line - 1, + character: node.location.start_column + "rescue".length, + label: " StandardError" + ) + end - # Adds parentheses around unary statements using the - operator that are - # contained within Binary nodes. For example, - # - # -a + b - # - # becomes - # - # ₍-a₎ + b - # - def visit_unary(node) - if stack[-2].is_a?(Binary) && (node.operator == "-") - parentheses(node.location) + super end - super + # Adds parentheses around unary statements using the - operator that are + # contained within Binary nodes. For example, + # + # -a + b + # + # becomes + # + # ₍-a₎ + b + # + def visit_unary(node) + if stack[-2].is_a?(Binary) && (node.operator == "-") + parentheses(node.location) + end + + super + end end private diff --git a/lib/syntax_tree/mutation_visitor.rb b/lib/syntax_tree/mutation_visitor.rb index f96e442f..0b4b9357 100644 --- a/lib/syntax_tree/mutation_visitor.rb +++ b/lib/syntax_tree/mutation_visitor.rb @@ -33,875 +33,892 @@ def visit(node) result end - # Visit a BEGINBlock node. - def visit_BEGIN(node) - node.copy(lbrace: visit(node.lbrace), statements: visit(node.statements)) - end + visit_methods do + # Visit a BEGINBlock node. + def visit_BEGIN(node) + node.copy( + lbrace: visit(node.lbrace), + statements: visit(node.statements) + ) + end - # Visit a CHAR node. - def visit_CHAR(node) - node.copy - end + # Visit a CHAR node. + def visit_CHAR(node) + node.copy + end - # Visit a ENDBlock node. - def visit_END(node) - node.copy(lbrace: visit(node.lbrace), statements: visit(node.statements)) - end + # Visit a ENDBlock node. + def visit_END(node) + node.copy( + lbrace: visit(node.lbrace), + statements: visit(node.statements) + ) + end - # Visit a EndContent node. - def visit___end__(node) - node.copy - end + # Visit a EndContent node. + def visit___end__(node) + node.copy + end - # Visit a AliasNode node. - def visit_alias(node) - node.copy(left: visit(node.left), right: visit(node.right)) - end + # Visit a AliasNode node. + def visit_alias(node) + node.copy(left: visit(node.left), right: visit(node.right)) + end - # Visit a ARef node. - def visit_aref(node) - node.copy(index: visit(node.index)) - end + # Visit a ARef node. + def visit_aref(node) + node.copy(index: visit(node.index)) + end - # Visit a ARefField node. - def visit_aref_field(node) - node.copy(index: visit(node.index)) - end + # Visit a ARefField node. + def visit_aref_field(node) + node.copy(index: visit(node.index)) + end - # Visit a ArgParen node. - def visit_arg_paren(node) - node.copy(arguments: visit(node.arguments)) - end + # Visit a ArgParen node. + def visit_arg_paren(node) + node.copy(arguments: visit(node.arguments)) + end - # Visit a Args node. - def visit_args(node) - node.copy(parts: visit_all(node.parts)) - end + # Visit a Args node. + def visit_args(node) + node.copy(parts: visit_all(node.parts)) + end - # Visit a ArgBlock node. - def visit_arg_block(node) - node.copy(value: visit(node.value)) - end + # Visit a ArgBlock node. + def visit_arg_block(node) + node.copy(value: visit(node.value)) + end - # Visit a ArgStar node. - def visit_arg_star(node) - node.copy(value: visit(node.value)) - end + # Visit a ArgStar node. + def visit_arg_star(node) + node.copy(value: visit(node.value)) + end - # Visit a ArgsForward node. - def visit_args_forward(node) - node.copy - end + # Visit a ArgsForward node. + def visit_args_forward(node) + node.copy + end - # Visit a ArrayLiteral node. - def visit_array(node) - node.copy(lbracket: visit(node.lbracket), contents: visit(node.contents)) - end + # Visit a ArrayLiteral node. + def visit_array(node) + node.copy( + lbracket: visit(node.lbracket), + contents: visit(node.contents) + ) + end - # Visit a AryPtn node. - def visit_aryptn(node) - node.copy( - constant: visit(node.constant), - requireds: visit_all(node.requireds), - rest: visit(node.rest), - posts: visit_all(node.posts) - ) - end + # Visit a AryPtn node. + def visit_aryptn(node) + node.copy( + constant: visit(node.constant), + requireds: visit_all(node.requireds), + rest: visit(node.rest), + posts: visit_all(node.posts) + ) + end - # Visit a Assign node. - def visit_assign(node) - node.copy(target: visit(node.target)) - end + # Visit a Assign node. + def visit_assign(node) + node.copy(target: visit(node.target)) + end - # Visit a Assoc node. - def visit_assoc(node) - node.copy - end + # Visit a Assoc node. + def visit_assoc(node) + node.copy + end - # Visit a AssocSplat node. - def visit_assoc_splat(node) - node.copy - end + # Visit a AssocSplat node. + def visit_assoc_splat(node) + node.copy + end - # Visit a Backref node. - def visit_backref(node) - node.copy - end + # Visit a Backref node. + def visit_backref(node) + node.copy + end - # Visit a Backtick node. - def visit_backtick(node) - node.copy - end + # Visit a Backtick node. + def visit_backtick(node) + node.copy + end - # Visit a BareAssocHash node. - def visit_bare_assoc_hash(node) - node.copy(assocs: visit_all(node.assocs)) - end + # Visit a BareAssocHash node. + def visit_bare_assoc_hash(node) + node.copy(assocs: visit_all(node.assocs)) + end - # Visit a Begin node. - def visit_begin(node) - node.copy(bodystmt: visit(node.bodystmt)) - end + # Visit a Begin node. + def visit_begin(node) + node.copy(bodystmt: visit(node.bodystmt)) + end - # Visit a PinnedBegin node. - def visit_pinned_begin(node) - node.copy - end + # Visit a PinnedBegin node. + def visit_pinned_begin(node) + node.copy + end - # Visit a Binary node. - def visit_binary(node) - node.copy - end + # Visit a Binary node. + def visit_binary(node) + node.copy + end - # Visit a BlockVar node. - def visit_block_var(node) - node.copy(params: visit(node.params), locals: visit_all(node.locals)) - end + # Visit a BlockVar node. + def visit_block_var(node) + node.copy(params: visit(node.params), locals: visit_all(node.locals)) + end - # Visit a BlockArg node. - def visit_blockarg(node) - node.copy(name: visit(node.name)) - end + # Visit a BlockArg node. + def visit_blockarg(node) + node.copy(name: visit(node.name)) + end - # Visit a BodyStmt node. - def visit_bodystmt(node) - node.copy( - statements: visit(node.statements), - rescue_clause: visit(node.rescue_clause), - else_clause: visit(node.else_clause), - ensure_clause: visit(node.ensure_clause) - ) - end + # Visit a BodyStmt node. + def visit_bodystmt(node) + node.copy( + statements: visit(node.statements), + rescue_clause: visit(node.rescue_clause), + else_clause: visit(node.else_clause), + ensure_clause: visit(node.ensure_clause) + ) + end - # Visit a Break node. - def visit_break(node) - node.copy(arguments: visit(node.arguments)) - end + # Visit a Break node. + def visit_break(node) + node.copy(arguments: visit(node.arguments)) + end - # Visit a Call node. - def visit_call(node) - node.copy( - receiver: visit(node.receiver), - operator: node.operator == :"::" ? :"::" : visit(node.operator), - message: node.message == :call ? :call : visit(node.message), - arguments: visit(node.arguments) - ) - end + # Visit a Call node. + def visit_call(node) + node.copy( + receiver: visit(node.receiver), + operator: node.operator == :"::" ? :"::" : visit(node.operator), + message: node.message == :call ? :call : visit(node.message), + arguments: visit(node.arguments) + ) + end - # Visit a Case node. - def visit_case(node) - node.copy( - keyword: visit(node.keyword), - value: visit(node.value), - consequent: visit(node.consequent) - ) - end + # Visit a Case node. + def visit_case(node) + node.copy( + keyword: visit(node.keyword), + value: visit(node.value), + consequent: visit(node.consequent) + ) + end - # Visit a RAssign node. - def visit_rassign(node) - node.copy(operator: visit(node.operator)) - end + # Visit a RAssign node. + def visit_rassign(node) + node.copy(operator: visit(node.operator)) + end - # Visit a ClassDeclaration node. - def visit_class(node) - node.copy( - constant: visit(node.constant), - superclass: visit(node.superclass), - bodystmt: visit(node.bodystmt) - ) - end + # Visit a ClassDeclaration node. + def visit_class(node) + node.copy( + constant: visit(node.constant), + superclass: visit(node.superclass), + bodystmt: visit(node.bodystmt) + ) + end - # Visit a Comma node. - def visit_comma(node) - node.copy - end + # Visit a Comma node. + def visit_comma(node) + node.copy + end - # Visit a Command node. - def visit_command(node) - node.copy( - message: visit(node.message), - arguments: visit(node.arguments), - block: visit(node.block) - ) - end + # Visit a Command node. + def visit_command(node) + node.copy( + message: visit(node.message), + arguments: visit(node.arguments), + block: visit(node.block) + ) + end - # Visit a CommandCall node. - def visit_command_call(node) - node.copy( - operator: node.operator == :"::" ? :"::" : visit(node.operator), - message: visit(node.message), - arguments: visit(node.arguments), - block: visit(node.block) - ) - end + # Visit a CommandCall node. + def visit_command_call(node) + node.copy( + operator: node.operator == :"::" ? :"::" : visit(node.operator), + message: visit(node.message), + arguments: visit(node.arguments), + block: visit(node.block) + ) + end - # Visit a Comment node. - def visit_comment(node) - node.copy - end + # Visit a Comment node. + def visit_comment(node) + node.copy + end - # Visit a Const node. - def visit_const(node) - node.copy - end + # Visit a Const node. + def visit_const(node) + node.copy + end - # Visit a ConstPathField node. - def visit_const_path_field(node) - node.copy(constant: visit(node.constant)) - end + # Visit a ConstPathField node. + def visit_const_path_field(node) + node.copy(constant: visit(node.constant)) + end - # Visit a ConstPathRef node. - def visit_const_path_ref(node) - node.copy(constant: visit(node.constant)) - end + # Visit a ConstPathRef node. + def visit_const_path_ref(node) + node.copy(constant: visit(node.constant)) + end - # Visit a ConstRef node. - def visit_const_ref(node) - node.copy(constant: visit(node.constant)) - end + # Visit a ConstRef node. + def visit_const_ref(node) + node.copy(constant: visit(node.constant)) + end - # Visit a CVar node. - def visit_cvar(node) - node.copy - end + # Visit a CVar node. + def visit_cvar(node) + node.copy + end - # Visit a Def node. - def visit_def(node) - node.copy( - target: visit(node.target), - operator: visit(node.operator), - name: visit(node.name), - params: visit(node.params), - bodystmt: visit(node.bodystmt) - ) - end + # Visit a Def node. + def visit_def(node) + node.copy( + target: visit(node.target), + operator: visit(node.operator), + name: visit(node.name), + params: visit(node.params), + bodystmt: visit(node.bodystmt) + ) + end - # Visit a Defined node. - def visit_defined(node) - node.copy - end + # Visit a Defined node. + def visit_defined(node) + node.copy + end - # Visit a Block node. - def visit_block(node) - node.copy( - opening: visit(node.opening), - block_var: visit(node.block_var), - bodystmt: visit(node.bodystmt) - ) - end + # Visit a Block node. + def visit_block(node) + node.copy( + opening: visit(node.opening), + block_var: visit(node.block_var), + bodystmt: visit(node.bodystmt) + ) + end - # Visit a RangeNode node. - def visit_range(node) - node.copy( - left: visit(node.left), - operator: visit(node.operator), - right: visit(node.right) - ) - end + # Visit a RangeNode node. + def visit_range(node) + node.copy( + left: visit(node.left), + operator: visit(node.operator), + right: visit(node.right) + ) + end - # Visit a DynaSymbol node. - def visit_dyna_symbol(node) - node.copy(parts: visit_all(node.parts)) - end + # Visit a DynaSymbol node. + def visit_dyna_symbol(node) + node.copy(parts: visit_all(node.parts)) + end - # Visit a Else node. - def visit_else(node) - node.copy( - keyword: visit(node.keyword), - statements: visit(node.statements) - ) - end + # Visit a Else node. + def visit_else(node) + node.copy( + keyword: visit(node.keyword), + statements: visit(node.statements) + ) + end - # Visit a Elsif node. - def visit_elsif(node) - node.copy( - statements: visit(node.statements), - consequent: visit(node.consequent) - ) - end + # Visit a Elsif node. + def visit_elsif(node) + node.copy( + statements: visit(node.statements), + consequent: visit(node.consequent) + ) + end - # Visit a EmbDoc node. - def visit_embdoc(node) - node.copy - end + # Visit a EmbDoc node. + def visit_embdoc(node) + node.copy + end - # Visit a EmbExprBeg node. - def visit_embexpr_beg(node) - node.copy - end + # Visit a EmbExprBeg node. + def visit_embexpr_beg(node) + node.copy + end - # Visit a EmbExprEnd node. - def visit_embexpr_end(node) - node.copy - end + # Visit a EmbExprEnd node. + def visit_embexpr_end(node) + node.copy + end - # Visit a EmbVar node. - def visit_embvar(node) - node.copy - end + # Visit a EmbVar node. + def visit_embvar(node) + node.copy + end - # Visit a Ensure node. - def visit_ensure(node) - node.copy( - keyword: visit(node.keyword), - statements: visit(node.statements) - ) - end + # Visit a Ensure node. + def visit_ensure(node) + node.copy( + keyword: visit(node.keyword), + statements: visit(node.statements) + ) + end - # Visit a ExcessedComma node. - def visit_excessed_comma(node) - node.copy - end + # Visit a ExcessedComma node. + def visit_excessed_comma(node) + node.copy + end - # Visit a Field node. - def visit_field(node) - node.copy( - operator: node.operator == :"::" ? :"::" : visit(node.operator), - name: visit(node.name) - ) - end + # Visit a Field node. + def visit_field(node) + node.copy( + operator: node.operator == :"::" ? :"::" : visit(node.operator), + name: visit(node.name) + ) + end - # Visit a FloatLiteral node. - def visit_float(node) - node.copy - end + # Visit a FloatLiteral node. + def visit_float(node) + node.copy + end - # Visit a FndPtn node. - def visit_fndptn(node) - node.copy( - constant: visit(node.constant), - left: visit(node.left), - values: visit_all(node.values), - right: visit(node.right) - ) - end + # Visit a FndPtn node. + def visit_fndptn(node) + node.copy( + constant: visit(node.constant), + left: visit(node.left), + values: visit_all(node.values), + right: visit(node.right) + ) + end - # Visit a For node. - def visit_for(node) - node.copy(index: visit(node.index), statements: visit(node.statements)) - end + # Visit a For node. + def visit_for(node) + node.copy(index: visit(node.index), statements: visit(node.statements)) + end - # Visit a GVar node. - def visit_gvar(node) - node.copy - end + # Visit a GVar node. + def visit_gvar(node) + node.copy + end - # Visit a HashLiteral node. - def visit_hash(node) - node.copy(lbrace: visit(node.lbrace), assocs: visit_all(node.assocs)) - end + # Visit a HashLiteral node. + def visit_hash(node) + node.copy(lbrace: visit(node.lbrace), assocs: visit_all(node.assocs)) + end - # Visit a Heredoc node. - def visit_heredoc(node) - node.copy( - beginning: visit(node.beginning), - ending: visit(node.ending), - parts: visit_all(node.parts) - ) - end + # Visit a Heredoc node. + def visit_heredoc(node) + node.copy( + beginning: visit(node.beginning), + ending: visit(node.ending), + parts: visit_all(node.parts) + ) + end - # Visit a HeredocBeg node. - def visit_heredoc_beg(node) - node.copy - end + # Visit a HeredocBeg node. + def visit_heredoc_beg(node) + node.copy + end - # Visit a HeredocEnd node. - def visit_heredoc_end(node) - node.copy - end + # Visit a HeredocEnd node. + def visit_heredoc_end(node) + node.copy + end - # Visit a HshPtn node. - def visit_hshptn(node) - node.copy( - constant: visit(node.constant), - keywords: - node.keywords.map { |label, value| [visit(label), visit(value)] }, - keyword_rest: visit(node.keyword_rest) - ) - end + # Visit a HshPtn node. + def visit_hshptn(node) + node.copy( + constant: visit(node.constant), + keywords: + node.keywords.map { |label, value| [visit(label), visit(value)] }, + keyword_rest: visit(node.keyword_rest) + ) + end - # Visit a Ident node. - def visit_ident(node) - node.copy - end + # Visit a Ident node. + def visit_ident(node) + node.copy + end - # Visit a IfNode node. - def visit_if(node) - node.copy( - predicate: visit(node.predicate), - statements: visit(node.statements), - consequent: visit(node.consequent) - ) - end + # Visit a IfNode node. + def visit_if(node) + node.copy( + predicate: visit(node.predicate), + statements: visit(node.statements), + consequent: visit(node.consequent) + ) + end - # Visit a IfOp node. - def visit_if_op(node) - node.copy - end + # Visit a IfOp node. + def visit_if_op(node) + node.copy + end - # Visit a Imaginary node. - def visit_imaginary(node) - node.copy - end + # Visit a Imaginary node. + def visit_imaginary(node) + node.copy + end - # Visit a In node. - def visit_in(node) - node.copy( - statements: visit(node.statements), - consequent: visit(node.consequent) - ) - end + # Visit a In node. + def visit_in(node) + node.copy( + statements: visit(node.statements), + consequent: visit(node.consequent) + ) + end - # Visit a Int node. - def visit_int(node) - node.copy - end + # Visit a Int node. + def visit_int(node) + node.copy + end - # Visit a IVar node. - def visit_ivar(node) - node.copy - end + # Visit a IVar node. + def visit_ivar(node) + node.copy + end - # Visit a Kw node. - def visit_kw(node) - node.copy - end + # Visit a Kw node. + def visit_kw(node) + node.copy + end - # Visit a KwRestParam node. - def visit_kwrest_param(node) - node.copy(name: visit(node.name)) - end + # Visit a KwRestParam node. + def visit_kwrest_param(node) + node.copy(name: visit(node.name)) + end - # Visit a Label node. - def visit_label(node) - node.copy - end + # Visit a Label node. + def visit_label(node) + node.copy + end - # Visit a LabelEnd node. - def visit_label_end(node) - node.copy - end + # Visit a LabelEnd node. + def visit_label_end(node) + node.copy + end - # Visit a Lambda node. - def visit_lambda(node) - node.copy(params: visit(node.params), statements: visit(node.statements)) - end + # Visit a Lambda node. + def visit_lambda(node) + node.copy( + params: visit(node.params), + statements: visit(node.statements) + ) + end - # Visit a LambdaVar node. - def visit_lambda_var(node) - node.copy(params: visit(node.params), locals: visit_all(node.locals)) - end + # Visit a LambdaVar node. + def visit_lambda_var(node) + node.copy(params: visit(node.params), locals: visit_all(node.locals)) + end - # Visit a LBrace node. - def visit_lbrace(node) - node.copy - end + # Visit a LBrace node. + def visit_lbrace(node) + node.copy + end - # Visit a LBracket node. - def visit_lbracket(node) - node.copy - end + # Visit a LBracket node. + def visit_lbracket(node) + node.copy + end - # Visit a LParen node. - def visit_lparen(node) - node.copy - end + # Visit a LParen node. + def visit_lparen(node) + node.copy + end - # Visit a MAssign node. - def visit_massign(node) - node.copy(target: visit(node.target)) - end + # Visit a MAssign node. + def visit_massign(node) + node.copy(target: visit(node.target)) + end - # Visit a MethodAddBlock node. - def visit_method_add_block(node) - node.copy(call: visit(node.call), block: visit(node.block)) - end + # Visit a MethodAddBlock node. + def visit_method_add_block(node) + node.copy(call: visit(node.call), block: visit(node.block)) + end - # Visit a MLHS node. - def visit_mlhs(node) - node.copy(parts: visit_all(node.parts)) - end + # Visit a MLHS node. + def visit_mlhs(node) + node.copy(parts: visit_all(node.parts)) + end - # Visit a MLHSParen node. - def visit_mlhs_paren(node) - node.copy(contents: visit(node.contents)) - end + # Visit a MLHSParen node. + def visit_mlhs_paren(node) + node.copy(contents: visit(node.contents)) + end - # Visit a ModuleDeclaration node. - def visit_module(node) - node.copy(constant: visit(node.constant), bodystmt: visit(node.bodystmt)) - end + # Visit a ModuleDeclaration node. + def visit_module(node) + node.copy( + constant: visit(node.constant), + bodystmt: visit(node.bodystmt) + ) + end - # Visit a MRHS node. - def visit_mrhs(node) - node.copy(parts: visit_all(node.parts)) - end + # Visit a MRHS node. + def visit_mrhs(node) + node.copy(parts: visit_all(node.parts)) + end - # Visit a Next node. - def visit_next(node) - node.copy(arguments: visit(node.arguments)) - end + # Visit a Next node. + def visit_next(node) + node.copy(arguments: visit(node.arguments)) + end - # Visit a Op node. - def visit_op(node) - node.copy - end + # Visit a Op node. + def visit_op(node) + node.copy + end - # Visit a OpAssign node. - def visit_opassign(node) - node.copy(target: visit(node.target), operator: visit(node.operator)) - end + # Visit a OpAssign node. + def visit_opassign(node) + node.copy(target: visit(node.target), operator: visit(node.operator)) + end - # Visit a Params node. - def visit_params(node) - node.copy( - requireds: visit_all(node.requireds), - optionals: - node.optionals.map { |ident, value| [visit(ident), visit(value)] }, - rest: visit(node.rest), - posts: visit_all(node.posts), - keywords: - node.keywords.map { |ident, value| [visit(ident), visit(value)] }, - keyword_rest: - node.keyword_rest == :nil ? :nil : visit(node.keyword_rest), - block: visit(node.block) - ) - end + # Visit a Params node. + def visit_params(node) + node.copy( + requireds: visit_all(node.requireds), + optionals: + node.optionals.map { |ident, value| [visit(ident), visit(value)] }, + rest: visit(node.rest), + posts: visit_all(node.posts), + keywords: + node.keywords.map { |ident, value| [visit(ident), visit(value)] }, + keyword_rest: + node.keyword_rest == :nil ? :nil : visit(node.keyword_rest), + block: visit(node.block) + ) + end - # Visit a Paren node. - def visit_paren(node) - node.copy(lparen: visit(node.lparen), contents: visit(node.contents)) - end + # Visit a Paren node. + def visit_paren(node) + node.copy(lparen: visit(node.lparen), contents: visit(node.contents)) + end - # Visit a Period node. - def visit_period(node) - node.copy - end + # Visit a Period node. + def visit_period(node) + node.copy + end - # Visit a Program node. - def visit_program(node) - node.copy(statements: visit(node.statements)) - end + # Visit a Program node. + def visit_program(node) + node.copy(statements: visit(node.statements)) + end - # Visit a QSymbols node. - def visit_qsymbols(node) - node.copy( - beginning: visit(node.beginning), - elements: visit_all(node.elements) - ) - end + # Visit a QSymbols node. + def visit_qsymbols(node) + node.copy( + beginning: visit(node.beginning), + elements: visit_all(node.elements) + ) + end - # Visit a QSymbolsBeg node. - def visit_qsymbols_beg(node) - node.copy - end + # Visit a QSymbolsBeg node. + def visit_qsymbols_beg(node) + node.copy + end - # Visit a QWords node. - def visit_qwords(node) - node.copy( - beginning: visit(node.beginning), - elements: visit_all(node.elements) - ) - end + # Visit a QWords node. + def visit_qwords(node) + node.copy( + beginning: visit(node.beginning), + elements: visit_all(node.elements) + ) + end - # Visit a QWordsBeg node. - def visit_qwords_beg(node) - node.copy - end + # Visit a QWordsBeg node. + def visit_qwords_beg(node) + node.copy + end - # Visit a RationalLiteral node. - def visit_rational(node) - node.copy - end + # Visit a RationalLiteral node. + def visit_rational(node) + node.copy + end - # Visit a RBrace node. - def visit_rbrace(node) - node.copy - end + # Visit a RBrace node. + def visit_rbrace(node) + node.copy + end - # Visit a RBracket node. - def visit_rbracket(node) - node.copy - end + # Visit a RBracket node. + def visit_rbracket(node) + node.copy + end - # Visit a Redo node. - def visit_redo(node) - node.copy - end + # Visit a Redo node. + def visit_redo(node) + node.copy + end - # Visit a RegexpContent node. - def visit_regexp_content(node) - node.copy(parts: visit_all(node.parts)) - end + # Visit a RegexpContent node. + def visit_regexp_content(node) + node.copy(parts: visit_all(node.parts)) + end - # Visit a RegexpBeg node. - def visit_regexp_beg(node) - node.copy - end + # Visit a RegexpBeg node. + def visit_regexp_beg(node) + node.copy + end - # Visit a RegexpEnd node. - def visit_regexp_end(node) - node.copy - end + # Visit a RegexpEnd node. + def visit_regexp_end(node) + node.copy + end - # Visit a RegexpLiteral node. - def visit_regexp_literal(node) - node.copy(parts: visit_all(node.parts)) - end + # Visit a RegexpLiteral node. + def visit_regexp_literal(node) + node.copy(parts: visit_all(node.parts)) + end - # Visit a RescueEx node. - def visit_rescue_ex(node) - node.copy(variable: visit(node.variable)) - end + # Visit a RescueEx node. + def visit_rescue_ex(node) + node.copy(variable: visit(node.variable)) + end - # Visit a Rescue node. - def visit_rescue(node) - node.copy( - keyword: visit(node.keyword), - exception: visit(node.exception), - statements: visit(node.statements), - consequent: visit(node.consequent) - ) - end + # Visit a Rescue node. + def visit_rescue(node) + node.copy( + keyword: visit(node.keyword), + exception: visit(node.exception), + statements: visit(node.statements), + consequent: visit(node.consequent) + ) + end - # Visit a RescueMod node. - def visit_rescue_mod(node) - node.copy - end + # Visit a RescueMod node. + def visit_rescue_mod(node) + node.copy + end - # Visit a RestParam node. - def visit_rest_param(node) - node.copy(name: visit(node.name)) - end + # Visit a RestParam node. + def visit_rest_param(node) + node.copy(name: visit(node.name)) + end - # Visit a Retry node. - def visit_retry(node) - node.copy - end + # Visit a Retry node. + def visit_retry(node) + node.copy + end - # Visit a Return node. - def visit_return(node) - node.copy(arguments: visit(node.arguments)) - end + # Visit a Return node. + def visit_return(node) + node.copy(arguments: visit(node.arguments)) + end - # Visit a RParen node. - def visit_rparen(node) - node.copy - end + # Visit a RParen node. + def visit_rparen(node) + node.copy + end - # Visit a SClass node. - def visit_sclass(node) - node.copy(bodystmt: visit(node.bodystmt)) - end + # Visit a SClass node. + def visit_sclass(node) + node.copy(bodystmt: visit(node.bodystmt)) + end - # Visit a Statements node. - def visit_statements(node) - node.copy(body: visit_all(node.body)) - end + # Visit a Statements node. + def visit_statements(node) + node.copy(body: visit_all(node.body)) + end - # Visit a StringContent node. - def visit_string_content(node) - node.copy(parts: visit_all(node.parts)) - end + # Visit a StringContent node. + def visit_string_content(node) + node.copy(parts: visit_all(node.parts)) + end - # Visit a StringConcat node. - def visit_string_concat(node) - node.copy(left: visit(node.left), right: visit(node.right)) - end + # Visit a StringConcat node. + def visit_string_concat(node) + node.copy(left: visit(node.left), right: visit(node.right)) + end - # Visit a StringDVar node. - def visit_string_dvar(node) - node.copy(variable: visit(node.variable)) - end + # Visit a StringDVar node. + def visit_string_dvar(node) + node.copy(variable: visit(node.variable)) + end - # Visit a StringEmbExpr node. - def visit_string_embexpr(node) - node.copy(statements: visit(node.statements)) - end + # Visit a StringEmbExpr node. + def visit_string_embexpr(node) + node.copy(statements: visit(node.statements)) + end - # Visit a StringLiteral node. - def visit_string_literal(node) - node.copy(parts: visit_all(node.parts)) - end + # Visit a StringLiteral node. + def visit_string_literal(node) + node.copy(parts: visit_all(node.parts)) + end - # Visit a Super node. - def visit_super(node) - node.copy(arguments: visit(node.arguments)) - end + # Visit a Super node. + def visit_super(node) + node.copy(arguments: visit(node.arguments)) + end - # Visit a SymBeg node. - def visit_symbeg(node) - node.copy - end + # Visit a SymBeg node. + def visit_symbeg(node) + node.copy + end - # Visit a SymbolContent node. - def visit_symbol_content(node) - node.copy(value: visit(node.value)) - end + # Visit a SymbolContent node. + def visit_symbol_content(node) + node.copy(value: visit(node.value)) + end - # Visit a SymbolLiteral node. - def visit_symbol_literal(node) - node.copy(value: visit(node.value)) - end + # Visit a SymbolLiteral node. + def visit_symbol_literal(node) + node.copy(value: visit(node.value)) + end - # Visit a Symbols node. - def visit_symbols(node) - node.copy( - beginning: visit(node.beginning), - elements: visit_all(node.elements) - ) - end + # Visit a Symbols node. + def visit_symbols(node) + node.copy( + beginning: visit(node.beginning), + elements: visit_all(node.elements) + ) + end - # Visit a SymbolsBeg node. - def visit_symbols_beg(node) - node.copy - end + # Visit a SymbolsBeg node. + def visit_symbols_beg(node) + node.copy + end - # Visit a TLambda node. - def visit_tlambda(node) - node.copy - end + # Visit a TLambda node. + def visit_tlambda(node) + node.copy + end - # Visit a TLamBeg node. - def visit_tlambeg(node) - node.copy - end + # Visit a TLamBeg node. + def visit_tlambeg(node) + node.copy + end - # Visit a TopConstField node. - def visit_top_const_field(node) - node.copy(constant: visit(node.constant)) - end + # Visit a TopConstField node. + def visit_top_const_field(node) + node.copy(constant: visit(node.constant)) + end - # Visit a TopConstRef node. - def visit_top_const_ref(node) - node.copy(constant: visit(node.constant)) - end + # Visit a TopConstRef node. + def visit_top_const_ref(node) + node.copy(constant: visit(node.constant)) + end - # Visit a TStringBeg node. - def visit_tstring_beg(node) - node.copy - end + # Visit a TStringBeg node. + def visit_tstring_beg(node) + node.copy + end - # Visit a TStringContent node. - def visit_tstring_content(node) - node.copy - end + # Visit a TStringContent node. + def visit_tstring_content(node) + node.copy + end - # Visit a TStringEnd node. - def visit_tstring_end(node) - node.copy - end + # Visit a TStringEnd node. + def visit_tstring_end(node) + node.copy + end - # Visit a Not node. - def visit_not(node) - node.copy(statement: visit(node.statement)) - end + # Visit a Not node. + def visit_not(node) + node.copy(statement: visit(node.statement)) + end - # Visit a Unary node. - def visit_unary(node) - node.copy - end + # Visit a Unary node. + def visit_unary(node) + node.copy + end - # Visit a Undef node. - def visit_undef(node) - node.copy(symbols: visit_all(node.symbols)) - end + # Visit a Undef node. + def visit_undef(node) + node.copy(symbols: visit_all(node.symbols)) + end - # Visit a UnlessNode node. - def visit_unless(node) - node.copy( - predicate: visit(node.predicate), - statements: visit(node.statements), - consequent: visit(node.consequent) - ) - end + # Visit a UnlessNode node. + def visit_unless(node) + node.copy( + predicate: visit(node.predicate), + statements: visit(node.statements), + consequent: visit(node.consequent) + ) + end - # Visit a UntilNode node. - def visit_until(node) - node.copy( - predicate: visit(node.predicate), - statements: visit(node.statements) - ) - end + # Visit a UntilNode node. + def visit_until(node) + node.copy( + predicate: visit(node.predicate), + statements: visit(node.statements) + ) + end - # Visit a VarField node. - def visit_var_field(node) - node.copy(value: visit(node.value)) - end + # Visit a VarField node. + def visit_var_field(node) + node.copy(value: visit(node.value)) + end - # Visit a VarRef node. - def visit_var_ref(node) - node.copy(value: visit(node.value)) - end + # Visit a VarRef node. + def visit_var_ref(node) + node.copy(value: visit(node.value)) + end - # Visit a PinnedVarRef node. - def visit_pinned_var_ref(node) - node.copy(value: visit(node.value)) - end + # Visit a PinnedVarRef node. + def visit_pinned_var_ref(node) + node.copy(value: visit(node.value)) + end - # Visit a VCall node. - def visit_vcall(node) - node.copy(value: visit(node.value)) - end + # Visit a VCall node. + def visit_vcall(node) + node.copy(value: visit(node.value)) + end - # Visit a VoidStmt node. - def visit_void_stmt(node) - node.copy - end + # Visit a VoidStmt node. + def visit_void_stmt(node) + node.copy + end - # Visit a When node. - def visit_when(node) - node.copy( - arguments: visit(node.arguments), - statements: visit(node.statements), - consequent: visit(node.consequent) - ) - end + # Visit a When node. + def visit_when(node) + node.copy( + arguments: visit(node.arguments), + statements: visit(node.statements), + consequent: visit(node.consequent) + ) + end - # Visit a WhileNode node. - def visit_while(node) - node.copy( - predicate: visit(node.predicate), - statements: visit(node.statements) - ) - end + # Visit a WhileNode node. + def visit_while(node) + node.copy( + predicate: visit(node.predicate), + statements: visit(node.statements) + ) + end - # Visit a Word node. - def visit_word(node) - node.copy(parts: visit_all(node.parts)) - end + # Visit a Word node. + def visit_word(node) + node.copy(parts: visit_all(node.parts)) + end - # Visit a Words node. - def visit_words(node) - node.copy( - beginning: visit(node.beginning), - elements: visit_all(node.elements) - ) - end + # Visit a Words node. + def visit_words(node) + node.copy( + beginning: visit(node.beginning), + elements: visit_all(node.elements) + ) + end - # Visit a WordsBeg node. - def visit_words_beg(node) - node.copy - end + # Visit a WordsBeg node. + def visit_words_beg(node) + node.copy + end - # Visit a XString node. - def visit_xstring(node) - node.copy(parts: visit_all(node.parts)) - end + # Visit a XString node. + def visit_xstring(node) + node.copy(parts: visit_all(node.parts)) + end - # Visit a XStringLiteral node. - def visit_xstring_literal(node) - node.copy(parts: visit_all(node.parts)) - end + # Visit a XStringLiteral node. + def visit_xstring_literal(node) + node.copy(parts: visit_all(node.parts)) + end - # Visit a YieldNode node. - def visit_yield(node) - node.copy(arguments: visit(node.arguments)) - end + # Visit a YieldNode node. + def visit_yield(node) + node.copy(arguments: visit(node.arguments)) + end - # Visit a ZSuper node. - def visit_zsuper(node) - node.copy + # Visit a ZSuper node. + def visit_zsuper(node) + node.copy + end end end end diff --git a/lib/syntax_tree/parser.rb b/lib/syntax_tree/parser.rb index 8059b18c..426bd945 100644 --- a/lib/syntax_tree/parser.rb +++ b/lib/syntax_tree/parser.rb @@ -668,8 +668,10 @@ def visit(node) stack.pop end - def visit_var_ref(node) - node.pin(stack[-2], pins.shift) + visit_methods do + def visit_var_ref(node) + node.pin(stack[-2], pins.shift) + end end def self.visit(node, tokens) diff --git a/lib/syntax_tree/translation/parser.rb b/lib/syntax_tree/translation/parser.rb index 70c98336..ad889478 100644 --- a/lib/syntax_tree/translation/parser.rb +++ b/lib/syntax_tree/translation/parser.rb @@ -89,2538 +89,2589 @@ def visit(node) result end - # Visit an AliasNode node. - def visit_alias(node) - s( - :alias, - [visit(node.left), visit(node.right)], - smap_keyword_bare( - srange_length(node.start_char, 5), - srange_node(node) + visit_methods do + # Visit an AliasNode node. + def visit_alias(node) + s( + :alias, + [visit(node.left), visit(node.right)], + smap_keyword_bare( + srange_length(node.start_char, 5), + srange_node(node) + ) ) - ) - end + end - # Visit an ARefNode. - def visit_aref(node) - if ::Parser::Builders::Default.emit_index - if node.index.nil? - s( - :index, - [visit(node.collection)], - smap_index( - srange_find(node.collection.end_char, node.end_char, "["), - srange_length(node.end_char, -1), - srange_node(node) + # Visit an ARefNode. + def visit_aref(node) + if ::Parser::Builders::Default.emit_index + if node.index.nil? + s( + :index, + [visit(node.collection)], + smap_index( + srange_find(node.collection.end_char, node.end_char, "["), + srange_length(node.end_char, -1), + srange_node(node) + ) ) - ) + else + s( + :index, + [visit(node.collection)].concat(visit_all(node.index.parts)), + smap_index( + srange_find_between(node.collection, node.index, "["), + srange_length(node.end_char, -1), + srange_node(node) + ) + ) + end else - s( - :index, - [visit(node.collection)].concat(visit_all(node.index.parts)), - smap_index( - srange_find_between(node.collection, node.index, "["), - srange_length(node.end_char, -1), - srange_node(node) + if node.index.nil? + s( + :send, + [visit(node.collection), :[]], + smap_send_bare( + srange_find(node.collection.end_char, node.end_char, "[]"), + srange_node(node) + ) ) - ) + else + s( + :send, + [visit(node.collection), :[], *visit_all(node.index.parts)], + smap_send_bare( + srange( + srange_find_between( + node.collection, + node.index, + "[" + ).begin_pos, + node.end_char + ), + srange_node(node) + ) + ) + end end - else - if node.index.nil? - s( - :send, - [visit(node.collection), :[]], - smap_send_bare( - srange_find(node.collection.end_char, node.end_char, "[]"), - srange_node(node) + end + + # Visit an ARefField node. + def visit_aref_field(node) + if ::Parser::Builders::Default.emit_index + if node.index.nil? + s( + :indexasgn, + [visit(node.collection)], + smap_index( + srange_find(node.collection.end_char, node.end_char, "["), + srange_length(node.end_char, -1), + srange_node(node) + ) ) - ) + else + s( + :indexasgn, + [visit(node.collection)].concat(visit_all(node.index.parts)), + smap_index( + srange_find_between(node.collection, node.index, "["), + srange_length(node.end_char, -1), + srange_node(node) + ) + ) + end else - s( - :send, - [visit(node.collection), :[], *visit_all(node.index.parts)], - smap_send_bare( - srange( - srange_find_between( - node.collection, - node.index, - "[" - ).begin_pos, - node.end_char + if node.index.nil? + s( + :send, + [visit(node.collection), :[]=], + smap_send_bare( + srange_find(node.collection.end_char, node.end_char, "[]"), + srange_node(node) + ) + ) + else + s( + :send, + [visit(node.collection), :[]=].concat( + visit_all(node.index.parts) ), + smap_send_bare( + srange( + srange_find_between( + node.collection, + node.index, + "[" + ).begin_pos, + node.end_char + ), + srange_node(node) + ) + ) + end + end + end + + # Visit an ArgBlock node. + def visit_arg_block(node) + s( + :block_pass, + [visit(node.value)], + smap_operator(srange_length(node.start_char, 1), srange_node(node)) + ) + end + + # Visit an ArgStar node. + def visit_arg_star(node) + if stack[-3].is_a?(MLHSParen) && stack[-3].contents.is_a?(MLHS) + if node.value.nil? + s(:restarg, [], smap_variable(nil, srange_node(node))) + else + s( + :restarg, + [node.value.value.to_sym], + smap_variable(srange_node(node.value), srange_node(node)) + ) + end + else + s( + :splat, + node.value.nil? ? [] : [visit(node.value)], + smap_operator( + srange_length(node.start_char, 1), srange_node(node) ) ) end end - end - # Visit an ARefField node. - def visit_aref_field(node) - if ::Parser::Builders::Default.emit_index - if node.index.nil? - s( - :indexasgn, - [visit(node.collection)], - smap_index( - srange_find(node.collection.end_char, node.end_char, "["), + # Visit an ArgsForward node. + def visit_args_forward(node) + s(:forwarded_args, [], smap(srange_node(node))) + end + + # Visit an ArrayLiteral node. + def visit_array(node) + s( + :array, + node.contents ? visit_all(node.contents.parts) : [], + if node.lbracket.nil? + smap_collection_bare(srange_node(node)) + else + smap_collection( + srange_node(node.lbracket), srange_length(node.end_char, -1), srange_node(node) ) - ) - else + end + ) + end + + # Visit an AryPtn node. + def visit_aryptn(node) + type = :array_pattern + children = visit_all(node.requireds) + + if node.rest.is_a?(VarField) + if !node.rest.value.nil? + children << s(:match_rest, [visit(node.rest)], nil) + elsif node.posts.empty? && + node.rest.start_char == node.rest.end_char + # Here we have an implicit rest, as in [foo,]. parser has a + # specific type for these patterns. + type = :array_pattern_with_tail + else + children << s(:match_rest, [], nil) + end + end + + if node.constant s( - :indexasgn, - [visit(node.collection)].concat(visit_all(node.index.parts)), - smap_index( - srange_find_between(node.collection, node.index, "["), + :const_pattern, + [ + visit(node.constant), + s( + type, + children + visit_all(node.posts), + smap_collection_bare( + srange(node.constant.end_char + 1, node.end_char - 1) + ) + ) + ], + smap_collection( + srange_length(node.constant.end_char, 1), srange_length(node.end_char, -1), srange_node(node) ) ) + else + s( + type, + children + visit_all(node.posts), + if buffer.source[node.start_char] == "[" + smap_collection( + srange_length(node.start_char, 1), + srange_length(node.end_char, -1), + srange_node(node) + ) + else + smap_collection_bare(srange_node(node)) + end + ) end - else - if node.index.nil? + end + + # Visit an Assign node. + def visit_assign(node) + target = visit(node.target) + location = + target + .location + .with_operator(srange_find_between(node.target, node.value, "=")) + .with_expression(srange_node(node)) + + s(target.type, target.children + [visit(node.value)], location) + end + + # Visit an Assoc node. + def visit_assoc(node) + if node.value.nil? + expression = srange(node.start_char, node.end_char - 1) + + type, location = + if node.key.value.start_with?(/[A-Z]/) + [:const, smap_constant(nil, expression, expression)] + else + [:send, smap_send_bare(expression, expression)] + end + s( - :send, - [visit(node.collection), :[]=], - smap_send_bare( - srange_find(node.collection.end_char, node.end_char, "[]"), + :pair, + [ + visit(node.key), + s(type, [nil, node.key.value.chomp(":").to_sym], location) + ], + smap_operator( + srange_length(node.key.end_char, -1), srange_node(node) ) ) else s( - :send, - [visit(node.collection), :[]=].concat( - visit_all(node.index.parts) - ), - smap_send_bare( - srange( - srange_find_between( - node.collection, - node.index, - "[" - ).begin_pos, - node.end_char - ), + :pair, + [visit(node.key), visit(node.value)], + smap_operator( + srange_search_between(node.key, node.value, "=>") || + srange_length(node.key.end_char, -1), srange_node(node) ) ) end end - end - # Visit an ArgBlock node. - def visit_arg_block(node) - s( - :block_pass, - [visit(node.value)], - smap_operator(srange_length(node.start_char, 1), srange_node(node)) - ) - end + # Visit an AssocSplat node. + def visit_assoc_splat(node) + s( + :kwsplat, + [visit(node.value)], + smap_operator(srange_length(node.start_char, 2), srange_node(node)) + ) + end - # Visit an ArgStar node. - def visit_arg_star(node) - if stack[-3].is_a?(MLHSParen) && stack[-3].contents.is_a?(MLHS) - if node.value.nil? - s(:restarg, [], smap_variable(nil, srange_node(node))) + # Visit a Backref node. + def visit_backref(node) + location = smap(srange_node(node)) + + if node.value.match?(/^\$\d+$/) + s(:nth_ref, [node.value[1..].to_i], location) else - s( - :restarg, - [node.value.value.to_sym], - smap_variable(srange_node(node.value), srange_node(node)) - ) + s(:back_ref, [node.value.to_sym], location) end - else + end + + # Visit a BareAssocHash node. + def visit_bare_assoc_hash(node) s( - :splat, - node.value.nil? ? [] : [visit(node.value)], - smap_operator(srange_length(node.start_char, 1), srange_node(node)) + if ::Parser::Builders::Default.emit_kwargs && + !stack[-2].is_a?(ArrayLiteral) + :kwargs + else + :hash + end, + visit_all(node.assocs), + smap_collection_bare(srange_node(node)) ) end - end - # Visit an ArgsForward node. - def visit_args_forward(node) - s(:forwarded_args, [], smap(srange_node(node))) - end + # Visit a BEGINBlock node. + def visit_BEGIN(node) + s( + :preexe, + [visit(node.statements)], + smap_keyword( + srange_length(node.start_char, 5), + srange_find(node.start_char + 5, node.statements.start_char, "{"), + srange_length(node.end_char, -1), + srange_node(node) + ) + ) + end - # Visit an ArrayLiteral node. - def visit_array(node) - s( - :array, - node.contents ? visit_all(node.contents.parts) : [], - if node.lbracket.nil? - smap_collection_bare(srange_node(node)) - else + # Visit a Begin node. + def visit_begin(node) + location = smap_collection( - srange_node(node.lbracket), - srange_length(node.end_char, -1), + srange_length(node.start_char, 5), + srange_length(node.end_char, -3), srange_node(node) ) - end - ) - end - # Visit an AryPtn node. - def visit_aryptn(node) - type = :array_pattern - children = visit_all(node.requireds) - - if node.rest.is_a?(VarField) - if !node.rest.value.nil? - children << s(:match_rest, [visit(node.rest)], nil) - elsif node.posts.empty? && node.rest.start_char == node.rest.end_char - # Here we have an implicit rest, as in [foo,]. parser has a specific - # type for these patterns. - type = :array_pattern_with_tail + if node.bodystmt.empty? + s(:kwbegin, [], location) + elsif node.bodystmt.rescue_clause.nil? && + node.bodystmt.ensure_clause.nil? && + node.bodystmt.else_clause.nil? + child = visit(node.bodystmt.statements) + + s( + :kwbegin, + child.type == :begin ? child.children : [child], + location + ) else - children << s(:match_rest, [], nil) + s(:kwbegin, [visit(node.bodystmt)], location) end end - if node.constant - s( - :const_pattern, - [ - visit(node.constant), - s( - type, - children + visit_all(node.posts), - smap_collection_bare( - srange(node.constant.end_char + 1, node.end_char - 1) - ) + # Visit a Binary node. + def visit_binary(node) + case node.operator + when :| + current = -2 + while stack[current].is_a?(Binary) && stack[current].operator == :| + current -= 1 + end + + if stack[current].is_a?(In) + s(:match_alt, [visit(node.left), visit(node.right)], nil) + else + visit(canonical_binary(node)) + end + when :"=>", :"&&", :and, :"||", :or + s( + { "=>": :match_as, "&&": :and, "||": :or }.fetch( + node.operator, + node.operator + ), + [visit(node.left), visit(node.right)], + smap_operator( + srange_find_between(node.left, node.right, node.operator.to_s), + srange_node(node) ) - ], - smap_collection( - srange_length(node.constant.end_char, 1), - srange_length(node.end_char, -1), - srange_node(node) ) - ) - else - s( - type, - children + visit_all(node.posts), - if buffer.source[node.start_char] == "[" - smap_collection( - srange_length(node.start_char, 1), - srange_length(node.end_char, -1), - srange_node(node) + when :=~ + # When you use a regular expression on the left hand side of a =~ + # operator and it doesn't have interpolatoin, then its named capture + # groups introduce local variables into the scope. In this case the + # parser gem has a different node (match_with_lvasgn) instead of the + # regular send. + if node.left.is_a?(RegexpLiteral) && node.left.parts.length == 1 && + node.left.parts.first.is_a?(TStringContent) + s( + :match_with_lvasgn, + [visit(node.left), visit(node.right)], + smap_operator( + srange_find_between( + node.left, + node.right, + node.operator.to_s + ), + srange_node(node) + ) ) else - smap_collection_bare(srange_node(node)) + visit(canonical_binary(node)) end - ) + else + visit(canonical_binary(node)) + end end - end - # Visit an Assign node. - def visit_assign(node) - target = visit(node.target) - location = - target - .location - .with_operator(srange_find_between(node.target, node.value, "=")) - .with_expression(srange_node(node)) + # Visit a BlockArg node. + def visit_blockarg(node) + if node.name.nil? + s(:blockarg, [nil], smap_variable(nil, srange_node(node))) + else + s( + :blockarg, + [node.name.value.to_sym], + smap_variable(srange_node(node.name), srange_node(node)) + ) + end + end - s(target.type, target.children + [visit(node.value)], location) - end + # Visit a BlockVar node. + def visit_block_var(node) + shadowargs = + node.locals.map do |local| + s( + :shadowarg, + [local.value.to_sym], + smap_variable(srange_node(local), srange_node(local)) + ) + end - # Visit an Assoc node. - def visit_assoc(node) - if node.value.nil? - expression = srange(node.start_char, node.end_char - 1) + params = node.params + children = + if ::Parser::Builders::Default.emit_procarg0 && node.arg0? + # There is a special node type in the parser gem for when a single + # required parameter to a block would potentially be expanded + # automatically. We handle that case here. + required = params.requireds.first + procarg0 = + if ::Parser::Builders::Default.emit_arg_inside_procarg0 && + required.is_a?(Ident) + s( + :procarg0, + [ + s( + :arg, + [required.value.to_sym], + smap_variable( + srange_node(required), + srange_node(required) + ) + ) + ], + smap_collection_bare(srange_node(required)) + ) + else + child = visit(required) + s(:procarg0, child, child.location) + end - type, location = - if node.key.value.start_with?(/[A-Z]/) - [:const, smap_constant(nil, expression, expression)] + [procarg0] else - [:send, smap_send_bare(expression, expression)] + visit(params).children end s( - :pair, - [ - visit(node.key), - s(type, [nil, node.key.value.chomp(":").to_sym], location) - ], - smap_operator( - srange_length(node.key.end_char, -1), - srange_node(node) - ) - ) - else - s( - :pair, - [visit(node.key), visit(node.value)], - smap_operator( - srange_search_between(node.key, node.value, "=>") || - srange_length(node.key.end_char, -1), + :args, + children + shadowargs, + smap_collection( + srange_length(node.start_char, 1), + srange_length(node.end_char, -1), srange_node(node) ) ) end - end - - # Visit an AssocSplat node. - def visit_assoc_splat(node) - s( - :kwsplat, - [visit(node.value)], - smap_operator(srange_length(node.start_char, 2), srange_node(node)) - ) - end - # Visit a Backref node. - def visit_backref(node) - location = smap(srange_node(node)) - - if node.value.match?(/^\$\d+$/) - s(:nth_ref, [node.value[1..].to_i], location) - else - s(:back_ref, [node.value.to_sym], location) - end - end + # Visit a BodyStmt node. + def visit_bodystmt(node) + result = visit(node.statements) + + if node.rescue_clause + rescue_node = visit(node.rescue_clause) + + children = [result] + rescue_node.children + location = rescue_node.location + + if node.else_clause + children.pop + children << visit(node.else_clause) + + location = + smap_condition( + nil, + nil, + srange_length(node.else_clause.start_char - 3, -4), + nil, + srange( + location.expression.begin_pos, + node.else_clause.end_char + ) + ) + end - # Visit a BareAssocHash node. - def visit_bare_assoc_hash(node) - s( - if ::Parser::Builders::Default.emit_kwargs && - !stack[-2].is_a?(ArrayLiteral) - :kwargs - else - :hash - end, - visit_all(node.assocs), - smap_collection_bare(srange_node(node)) - ) - end + result = s(rescue_node.type, children, location) + end - # Visit a BEGINBlock node. - def visit_BEGIN(node) - s( - :preexe, - [visit(node.statements)], - smap_keyword( - srange_length(node.start_char, 5), - srange_find(node.start_char + 5, node.statements.start_char, "{"), - srange_length(node.end_char, -1), - srange_node(node) - ) - ) - end + if node.ensure_clause + ensure_node = visit(node.ensure_clause) - # Visit a Begin node. - def visit_begin(node) - location = - smap_collection( - srange_length(node.start_char, 5), - srange_length(node.end_char, -3), - srange_node(node) - ) + expression = + ( + if result + result.location.expression.join( + ensure_node.location.expression + ) + else + ensure_node.location.expression + end + ) + location = ensure_node.location.with_expression(expression) - if node.bodystmt.empty? - s(:kwbegin, [], location) - elsif node.bodystmt.rescue_clause.nil? && - node.bodystmt.ensure_clause.nil? && node.bodystmt.else_clause.nil? - child = visit(node.bodystmt.statements) + result = + s(ensure_node.type, [result] + ensure_node.children, location) + end - s(:kwbegin, child.type == :begin ? child.children : [child], location) - else - s(:kwbegin, [visit(node.bodystmt)], location) + result end - end - # Visit a Binary node. - def visit_binary(node) - case node.operator - when :| - current = -2 - while stack[current].is_a?(Binary) && stack[current].operator == :| - current -= 1 - end + # Visit a Break node. + def visit_break(node) + s( + :break, + visit_all(node.arguments.parts), + smap_keyword_bare( + srange_length(node.start_char, 5), + srange_node(node) + ) + ) + end - if stack[current].is_a?(In) - s(:match_alt, [visit(node.left), visit(node.right)], nil) - else - visit(canonical_binary(node)) + # Visit a CallNode node. + def visit_call(node) + visit_command_call( + CommandCall.new( + receiver: node.receiver, + operator: node.operator, + message: node.message, + arguments: node.arguments, + block: nil, + location: node.location + ) + ) + end + + # Visit a Case node. + def visit_case(node) + clauses = [node.consequent] + while clauses.last && !clauses.last.is_a?(Else) + clauses << clauses.last.consequent end - when :"=>", :"&&", :and, :"||", :or + + else_token = + if clauses.last.is_a?(Else) + srange_length(clauses.last.start_char, 4) + end + s( - { "=>": :match_as, "&&": :and, "||": :or }.fetch( - node.operator, - node.operator - ), - [visit(node.left), visit(node.right)], - smap_operator( - srange_find_between(node.left, node.right, node.operator.to_s), + node.consequent.is_a?(In) ? :case_match : :case, + [visit(node.value)] + clauses.map { |clause| visit(clause) }, + smap_condition( + srange_length(node.start_char, 4), + nil, + else_token, + srange_length(node.end_char, -3), srange_node(node) ) ) - when :=~ - # When you use a regular expression on the left hand side of a =~ - # operator and it doesn't have interpolatoin, then its named capture - # groups introduce local variables into the scope. In this case the - # parser gem has a different node (match_with_lvasgn) instead of the - # regular send. - if node.left.is_a?(RegexpLiteral) && node.left.parts.length == 1 && - node.left.parts.first.is_a?(TStringContent) - s( - :match_with_lvasgn, - [visit(node.left), visit(node.right)], - smap_operator( - srange_find_between(node.left, node.right, node.operator.to_s), - srange_node(node) - ) + end + + # Visit a CHAR node. + def visit_CHAR(node) + s( + :str, + [node.value[1..]], + smap_collection( + srange_length(node.start_char, 1), + nil, + srange_node(node) ) - else - visit(canonical_binary(node)) - end - else - visit(canonical_binary(node)) + ) end - end - # Visit a BlockArg node. - def visit_blockarg(node) - if node.name.nil? - s(:blockarg, [nil], smap_variable(nil, srange_node(node))) - else + # Visit a ClassDeclaration node. + def visit_class(node) + operator = + if node.superclass + srange_find_between(node.constant, node.superclass, "<") + end + s( - :blockarg, - [node.name.value.to_sym], - smap_variable(srange_node(node.name), srange_node(node)) + :class, + [ + visit(node.constant), + visit(node.superclass), + visit(node.bodystmt) + ], + smap_definition( + srange_length(node.start_char, 5), + operator, + srange_node(node.constant), + srange_length(node.end_char, -3) + ).with_expression(srange_node(node)) ) end - end - # Visit a BlockVar node. - def visit_block_var(node) - shadowargs = - node.locals.map do |local| - s( - :shadowarg, - [local.value.to_sym], - smap_variable(srange_node(local), srange_node(local)) + # Visit a Command node. + def visit_command(node) + visit_command_call( + CommandCall.new( + receiver: nil, + operator: nil, + message: node.message, + arguments: node.arguments, + block: node.block, + location: node.location ) - end + ) + end - params = node.params - children = - if ::Parser::Builders::Default.emit_procarg0 && node.arg0? - # There is a special node type in the parser gem for when a single - # required parameter to a block would potentially be expanded - # automatically. We handle that case here. - required = params.requireds.first - procarg0 = - if ::Parser::Builders::Default.emit_arg_inside_procarg0 && - required.is_a?(Ident) - s( - :procarg0, - [ - s( - :arg, - [required.value.to_sym], - smap_variable( - srange_node(required), - srange_node(required) - ) - ) - ], - smap_collection_bare(srange_node(required)) - ) - else - child = visit(required) - s(:procarg0, child, child.location) - end + # Visit a CommandCall node. + def visit_command_call(node) + children = [ + visit(node.receiver), + node.message == :call ? :call : node.message.value.to_sym + ] + + begin_token = nil + end_token = nil + + case node.arguments + when Args + children += visit_all(node.arguments.parts) + when ArgParen + case node.arguments.arguments + when nil + # skip + when ArgsForward + children << visit(node.arguments.arguments) + else + children += visit_all(node.arguments.arguments.parts) + end - [procarg0] - else - visit(params).children + begin_token = srange_length(node.arguments.start_char, 1) + end_token = srange_length(node.arguments.end_char, -1) end - s( - :args, - children + shadowargs, - smap_collection( - srange_length(node.start_char, 1), - srange_length(node.end_char, -1), - srange_node(node) - ) - ) - end + dot_bound = + if node.arguments + node.arguments.start_char + elsif node.block + node.block.start_char + else + node.end_char + end - # Visit a BodyStmt node. - def visit_bodystmt(node) - result = visit(node.statements) + expression = + if node.arguments.is_a?(ArgParen) + srange(node.start_char, node.arguments.end_char) + elsif node.arguments.is_a?(Args) && node.arguments.parts.any? + last_part = node.arguments.parts.last + end_char = + if last_part.is_a?(Heredoc) + last_part.beginning.end_char + else + last_part.end_char + end - if node.rescue_clause - rescue_node = visit(node.rescue_clause) + srange(node.start_char, end_char) + elsif node.block + srange_node(node.message) + else + srange_node(node) + end - children = [result] + rescue_node.children - location = rescue_node.location + call = + s( + if node.operator.is_a?(Op) && node.operator.value == "&." + :csend + else + :send + end, + children, + smap_send( + if node.operator == :"::" + srange_find( + node.receiver.end_char, + if node.message == :call + dot_bound + else + node.message.start_char + end, + "::" + ) + elsif node.operator + srange_node(node.operator) + end, + node.message == :call ? nil : srange_node(node.message), + begin_token, + end_token, + expression + ) + ) - if node.else_clause - children.pop - children << visit(node.else_clause) + if node.block + type, arguments = block_children(node.block) - location = - smap_condition( - nil, - nil, - srange_length(node.else_clause.start_char - 3, -4), - nil, - srange(location.expression.begin_pos, node.else_clause.end_char) + s( + type, + [call, arguments, visit(node.block.bodystmt)], + smap_collection( + srange_node(node.block.opening), + srange_length( + node.end_char, + node.block.opening.is_a?(Kw) ? -3 : -1 + ), + srange_node(node) ) + ) + else + call end - - result = s(rescue_node.type, children, location) end - if node.ensure_clause - ensure_node = visit(node.ensure_clause) + # Visit a Const node. + def visit_const(node) + s( + :const, + [nil, node.value.to_sym], + smap_constant(nil, srange_node(node), srange_node(node)) + ) + end - expression = - ( - if result - result.location.expression.join(ensure_node.location.expression) - else - ensure_node.location.expression - end + # Visit a ConstPathField node. + def visit_const_path_field(node) + if node.parent.is_a?(VarRef) && node.parent.value.is_a?(Kw) && + node.parent.value.value == "self" && node.constant.is_a?(Ident) + s(:send, [visit(node.parent), :"#{node.constant.value}="], nil) + else + s( + :casgn, + [visit(node.parent), node.constant.value.to_sym], + smap_constant( + srange_find_between(node.parent, node.constant, "::"), + srange_node(node.constant), + srange_node(node) + ) ) - location = ensure_node.location.with_expression(expression) - - result = - s(ensure_node.type, [result] + ensure_node.children, location) + end end - result - end - - # Visit a Break node. - def visit_break(node) - s( - :break, - visit_all(node.arguments.parts), - smap_keyword_bare( - srange_length(node.start_char, 5), - srange_node(node) + # Visit a ConstPathRef node. + def visit_const_path_ref(node) + s( + :const, + [visit(node.parent), node.constant.value.to_sym], + smap_constant( + srange_find_between(node.parent, node.constant, "::"), + srange_node(node.constant), + srange_node(node) + ) ) - ) - end + end - # Visit a CallNode node. - def visit_call(node) - visit_command_call( - CommandCall.new( - receiver: node.receiver, - operator: node.operator, - message: node.message, - arguments: node.arguments, - block: nil, - location: node.location + # Visit a ConstRef node. + def visit_const_ref(node) + s( + :const, + [nil, node.constant.value.to_sym], + smap_constant(nil, srange_node(node.constant), srange_node(node)) ) - ) - end + end - # Visit a Case node. - def visit_case(node) - clauses = [node.consequent] - while clauses.last && !clauses.last.is_a?(Else) - clauses << clauses.last.consequent + # Visit a CVar node. + def visit_cvar(node) + s( + :cvar, + [node.value.to_sym], + smap_variable(srange_node(node), srange_node(node)) + ) end - else_token = - if clauses.last.is_a?(Else) - srange_length(clauses.last.start_char, 4) - end + # Visit a DefNode node. + def visit_def(node) + name = node.name.value.to_sym + args = + case node.params + when Params + child = visit(node.params) - s( - node.consequent.is_a?(In) ? :case_match : :case, - [visit(node.value)] + clauses.map { |clause| visit(clause) }, - smap_condition( - srange_length(node.start_char, 4), - nil, - else_token, - srange_length(node.end_char, -3), - srange_node(node) - ) - ) - end + s( + child.type, + child.children, + smap_collection_bare(child.location&.expression) + ) + when Paren + child = visit(node.params.contents) - # Visit a CHAR node. - def visit_CHAR(node) - s( - :str, - [node.value[1..]], - smap_collection( - srange_length(node.start_char, 1), - nil, - srange_node(node) - ) - ) - end + s( + child.type, + child.children, + smap_collection( + srange_length(node.params.start_char, 1), + srange_length(node.params.end_char, -1), + srange_node(node.params) + ) + ) + else + s(:args, [], smap_collection_bare(nil)) + end - # Visit a ClassDeclaration node. - def visit_class(node) - operator = - if node.superclass - srange_find_between(node.constant, node.superclass, "<") + location = + if node.endless? + smap_method_definition( + srange_length(node.start_char, 3), + nil, + srange_node(node.name), + nil, + srange_find_between( + (node.params || node.name), + node.bodystmt, + "=" + ), + srange_node(node) + ) + else + smap_method_definition( + srange_length(node.start_char, 3), + nil, + srange_node(node.name), + srange_length(node.end_char, -3), + nil, + srange_node(node) + ) + end + + if node.target + target = + node.target.is_a?(Paren) ? node.target.contents : node.target + + s( + :defs, + [visit(target), name, args, visit(node.bodystmt)], + smap_method_definition( + location.keyword, + srange_node(node.operator), + location.name, + location.end, + location.assignment, + location.expression + ) + ) + else + s(:def, [name, args, visit(node.bodystmt)], location) end + end - s( - :class, - [visit(node.constant), visit(node.superclass), visit(node.bodystmt)], - smap_definition( - srange_length(node.start_char, 5), - operator, - srange_node(node.constant), - srange_length(node.end_char, -3) - ).with_expression(srange_node(node)) - ) - end + # Visit a Defined node. + def visit_defined(node) + paren_range = (node.start_char + 8)...node.end_char + begin_token, end_token = + if buffer.source[paren_range].include?("(") + [ + srange_find(paren_range.begin, paren_range.end, "("), + srange_length(node.end_char, -1) + ] + end - # Visit a Command node. - def visit_command(node) - visit_command_call( - CommandCall.new( - receiver: nil, - operator: nil, - message: node.message, - arguments: node.arguments, - block: node.block, - location: node.location + s( + :defined?, + [visit(node.value)], + smap_keyword( + srange_length(node.start_char, 8), + begin_token, + end_token, + srange_node(node) + ) ) - ) - end + end - # Visit a CommandCall node. - def visit_command_call(node) - children = [ - visit(node.receiver), - node.message == :call ? :call : node.message.value.to_sym - ] - - begin_token = nil - end_token = nil - - case node.arguments - when Args - children += visit_all(node.arguments.parts) - when ArgParen - case node.arguments.arguments - when nil - # skip - when ArgsForward - children << visit(node.arguments.arguments) + # Visit a DynaSymbol node. + def visit_dyna_symbol(node) + location = + if node.quote + smap_collection( + srange_length(node.start_char, node.quote.length), + srange_length(node.end_char, -1), + srange_node(node) + ) + else + smap_collection_bare(srange_node(node)) + end + + if node.parts.length == 1 && node.parts.first.is_a?(TStringContent) + s(:sym, ["\"#{node.parts.first.value}\"".undump.to_sym], location) else - children += visit_all(node.arguments.arguments.parts) + s(:dsym, visit_all(node.parts), location) end - - begin_token = srange_length(node.arguments.start_char, 1) - end_token = srange_length(node.arguments.end_char, -1) end - dot_bound = - if node.arguments - node.arguments.start_char - elsif node.block - node.block.start_char + # Visit an Else node. + def visit_else(node) + if node.statements.empty? && stack[-2].is_a?(Case) + s(:empty_else, [], nil) else - node.end_char + visit(node.statements) end + end - expression = - if node.arguments.is_a?(ArgParen) - srange(node.start_char, node.arguments.end_char) - elsif node.arguments.is_a?(Args) && node.arguments.parts.any? - last_part = node.arguments.parts.last - end_char = - if last_part.is_a?(Heredoc) - last_part.beginning.end_char - else - last_part.end_char - end + # Visit an Elsif node. + def visit_elsif(node) + else_token = + case node.consequent + when Elsif + srange_length(node.consequent.start_char, 5) + when Else + srange_length(node.consequent.start_char, 4) + end - srange(node.start_char, end_char) - elsif node.block - srange_node(node.message) - else - srange_node(node) - end + expression = srange(node.start_char, node.statements.end_char - 1) - call = s( - if node.operator.is_a?(Op) && node.operator.value == "&." - :csend - else - :send - end, - children, - smap_send( - if node.operator == :"::" - srange_find( - node.receiver.end_char, - if node.message == :call - dot_bound - else - node.message.start_char - end, - "::" - ) - elsif node.operator - srange_node(node.operator) - end, - node.message == :call ? nil : srange_node(node.message), - begin_token, - end_token, + :if, + [ + visit(node.predicate), + visit(node.statements), + visit(node.consequent) + ], + smap_condition( + srange_length(node.start_char, 5), + nil, + else_token, + nil, expression ) ) + end - if node.block - type, arguments = block_children(node.block) - + # Visit an ENDBlock node. + def visit_END(node) s( - type, - [call, arguments, visit(node.block.bodystmt)], - smap_collection( - srange_node(node.block.opening), - srange_length( - node.end_char, - node.block.opening.is_a?(Kw) ? -3 : -1 - ), + :postexe, + [visit(node.statements)], + smap_keyword( + srange_length(node.start_char, 3), + srange_find(node.start_char + 3, node.statements.start_char, "{"), + srange_length(node.end_char, -1), srange_node(node) ) ) - else - call end - end - # Visit a Const node. - def visit_const(node) - s( - :const, - [nil, node.value.to_sym], - smap_constant(nil, srange_node(node), srange_node(node)) - ) - end + # Visit an Ensure node. + def visit_ensure(node) + start_char = node.start_char + end_char = + if node.statements.empty? + start_char + 6 + else + node.statements.body.last.end_char + end - # Visit a ConstPathField node. - def visit_const_path_field(node) - if node.parent.is_a?(VarRef) && node.parent.value.is_a?(Kw) && - node.parent.value.value == "self" && node.constant.is_a?(Ident) - s(:send, [visit(node.parent), :"#{node.constant.value}="], nil) - else s( - :casgn, - [visit(node.parent), node.constant.value.to_sym], - smap_constant( - srange_find_between(node.parent, node.constant, "::"), - srange_node(node.constant), - srange_node(node) + :ensure, + [visit(node.statements)], + smap_condition( + srange_length(start_char, 6), + nil, + nil, + nil, + srange(start_char, end_char) ) ) end - end - # Visit a ConstPathRef node. - def visit_const_path_ref(node) - s( - :const, - [visit(node.parent), node.constant.value.to_sym], - smap_constant( - srange_find_between(node.parent, node.constant, "::"), - srange_node(node.constant), - srange_node(node) + # Visit a Field node. + def visit_field(node) + message = + case stack[-2] + when Assign, MLHS + Ident.new( + value: "#{node.name.value}=", + location: node.name.location + ) + else + node.name + end + + visit_command_call( + CommandCall.new( + receiver: node.parent, + operator: node.operator, + message: message, + arguments: nil, + block: nil, + location: node.location + ) ) - ) - end + end - # Visit a ConstRef node. - def visit_const_ref(node) - s( - :const, - [nil, node.constant.value.to_sym], - smap_constant(nil, srange_node(node.constant), srange_node(node)) - ) - end + # Visit a FloatLiteral node. + def visit_float(node) + operator = + if %w[+ -].include?(buffer.source[node.start_char]) + srange_length(node.start_char, 1) + end - # Visit a CVar node. - def visit_cvar(node) - s( - :cvar, - [node.value.to_sym], - smap_variable(srange_node(node), srange_node(node)) - ) - end + s( + :float, + [node.value.to_f], + smap_operator(operator, srange_node(node)) + ) + end - # Visit a DefNode node. - def visit_def(node) - name = node.name.value.to_sym - args = - case node.params - when Params - child = visit(node.params) + # Visit a FndPtn node. + def visit_fndptn(node) + left, right = + [node.left, node.right].map do |child| + location = + smap_operator( + srange_length(child.start_char, 1), + srange_node(child) + ) - s( - child.type, - child.children, - smap_collection_bare(child.location&.expression) - ) - when Paren - child = visit(node.params.contents) + if child.is_a?(VarField) && child.value.nil? + s(:match_rest, [], location) + else + s(:match_rest, [visit(child)], location) + end + end + inner = s( - child.type, - child.children, + :find_pattern, + [left, *visit_all(node.values), right], smap_collection( - srange_length(node.params.start_char, 1), - srange_length(node.params.end_char, -1), - srange_node(node.params) + srange_length(node.start_char, 1), + srange_length(node.end_char, -1), + srange_node(node) ) ) + + if node.constant + s(:const_pattern, [visit(node.constant), inner], nil) else - s(:args, [], smap_collection_bare(nil)) + inner end + end - location = - if node.endless? - smap_method_definition( - srange_length(node.start_char, 3), - nil, - srange_node(node.name), - nil, - srange_find_between( - (node.params || node.name), - node.bodystmt, - "=" - ), - srange_node(node) - ) - else - smap_method_definition( + # Visit a For node. + def visit_for(node) + s( + :for, + [visit(node.index), visit(node.collection), visit(node.statements)], + smap_for( srange_length(node.start_char, 3), - nil, - srange_node(node.name), + srange_find_between(node.index, node.collection, "in"), + srange_search_between(node.collection, node.statements, "do") || + srange_search_between(node.collection, node.statements, ";"), srange_length(node.end_char, -3), - nil, srange_node(node) ) - end - - if node.target - target = node.target.is_a?(Paren) ? node.target.contents : node.target - - s( - :defs, - [visit(target), name, args, visit(node.bodystmt)], - smap_method_definition( - location.keyword, - srange_node(node.operator), - location.name, - location.end, - location.assignment, - location.expression - ) ) - else - s(:def, [name, args, visit(node.bodystmt)], location) end - end - # Visit a Defined node. - def visit_defined(node) - paren_range = (node.start_char + 8)...node.end_char - begin_token, end_token = - if buffer.source[paren_range].include?("(") - [ - srange_find(paren_range.begin, paren_range.end, "("), - srange_length(node.end_char, -1) - ] - end - - s( - :defined?, - [visit(node.value)], - smap_keyword( - srange_length(node.start_char, 8), - begin_token, - end_token, - srange_node(node) + # Visit a GVar node. + def visit_gvar(node) + s( + :gvar, + [node.value.to_sym], + smap_variable(srange_node(node), srange_node(node)) ) - ) - end + end - # Visit a DynaSymbol node. - def visit_dyna_symbol(node) - location = - if node.quote + # Visit a HashLiteral node. + def visit_hash(node) + s( + :hash, + visit_all(node.assocs), smap_collection( - srange_length(node.start_char, node.quote.length), + srange_length(node.start_char, 1), srange_length(node.end_char, -1), srange_node(node) ) - else - smap_collection_bare(srange_node(node)) - end - - if node.parts.length == 1 && node.parts.first.is_a?(TStringContent) - s(:sym, ["\"#{node.parts.first.value}\"".undump.to_sym], location) - else - s(:dsym, visit_all(node.parts), location) + ) end - end - # Visit an Else node. - def visit_else(node) - if node.statements.empty? && stack[-2].is_a?(Case) - s(:empty_else, [], nil) - else - visit(node.statements) - end - end + # Visit a Heredoc node. + def visit_heredoc(node) + heredoc = HeredocBuilder.new(node) - # Visit an Elsif node. - def visit_elsif(node) - else_token = - case node.consequent - when Elsif - srange_length(node.consequent.start_char, 5) - when Else - srange_length(node.consequent.start_char, 4) - end + # For each part of the heredoc, if it's a string content node, split + # it into multiple string content nodes, one for each line. Otherwise, + # visit the node as normal. + node.parts.each do |part| + if part.is_a?(TStringContent) && part.value.count("\n") > 1 + index = part.start_char + lines = part.value.split("\n") - expression = srange(node.start_char, node.statements.end_char - 1) - - s( - :if, - [ - visit(node.predicate), - visit(node.statements), - visit(node.consequent) - ], - smap_condition( - srange_length(node.start_char, 5), - nil, - else_token, - nil, - expression - ) - ) - end + lines.each do |line| + length = line.length + 1 + location = smap_collection_bare(srange_length(index, length)) - # Visit an ENDBlock node. - def visit_END(node) - s( - :postexe, - [visit(node.statements)], - smap_keyword( - srange_length(node.start_char, 3), - srange_find(node.start_char + 3, node.statements.start_char, "{"), - srange_length(node.end_char, -1), - srange_node(node) - ) - ) - end + heredoc << s(:str, ["#{line}\n"], location) + index += length + end + else + heredoc << visit(part) + end + end - # Visit an Ensure node. - def visit_ensure(node) - start_char = node.start_char - end_char = - if node.statements.empty? - start_char + 6 + # Now that we have all of the pieces on the heredoc, we can trim it if + # it is a heredoc that supports trimming (i.e., it has a ~ on the + # declaration). + heredoc.trim! + + # Generate the location for the heredoc, which goes from the + # declaration to the ending delimiter. + location = + smap_heredoc( + srange_node(node.beginning), + srange( + if node.parts.empty? + node.beginning.end_char + 1 + else + node.parts.first.start_char + end, + node.ending.start_char + ), + srange(node.ending.start_char, node.ending.end_char - 1) + ) + + # Finally, decide which kind of heredoc node to generate based on its + # declaration and contents. + if node.beginning.value.match?(/`\w+`\z/) + s(:xstr, heredoc.segments, location) + elsif heredoc.segments.length == 1 + segment = heredoc.segments.first + s(segment.type, segment.children, location) else - node.statements.body.last.end_char + s(:dstr, heredoc.segments, location) end + end - s( - :ensure, - [visit(node.statements)], - smap_condition( - srange_length(start_char, 6), - nil, - nil, - nil, - srange(start_char, end_char) - ) - ) - end + # Visit a HshPtn node. + def visit_hshptn(node) + children = + node.keywords.map do |(keyword, value)| + next s(:pair, [visit(keyword), visit(value)], nil) if value + + case keyword + when DynaSymbol + raise if keyword.parts.length > 1 + s(:match_var, [keyword.parts.first.value.to_sym], nil) + when Label + s(:match_var, [keyword.value.chomp(":").to_sym], nil) + end + end - # Visit a Field node. - def visit_field(node) - message = - case stack[-2] - when Assign, MLHS - Ident.new( - value: "#{node.name.value}=", - location: node.name.location - ) + if node.keyword_rest.is_a?(VarField) + children << if node.keyword_rest.value.nil? + s(:match_rest, [], nil) + elsif node.keyword_rest.value == :nil + s(:match_nil_pattern, [], nil) + else + s(:match_rest, [visit(node.keyword_rest)], nil) + end + end + + inner = s(:hash_pattern, children, nil) + if node.constant + s(:const_pattern, [visit(node.constant), inner], nil) else - node.name + inner end + end - visit_command_call( - CommandCall.new( - receiver: node.parent, - operator: node.operator, - message: message, - arguments: nil, - block: nil, - location: node.location + # Visit an Ident node. + def visit_ident(node) + s( + :lvar, + [node.value.to_sym], + smap_variable(srange_node(node), srange_node(node)) ) - ) - end - - # Visit a FloatLiteral node. - def visit_float(node) - operator = - if %w[+ -].include?(buffer.source[node.start_char]) - srange_length(node.start_char, 1) - end + end - s(:float, [node.value.to_f], smap_operator(operator, srange_node(node))) - end + # Visit an IfNode node. + def visit_if(node) + predicate = + case node.predicate + when RangeNode + type = + node.predicate.operator.value == ".." ? :iflipflop : :eflipflop + s(type, visit(node.predicate).children, nil) + when RegexpLiteral + s(:match_current_line, [visit(node.predicate)], nil) + when Unary + if node.predicate.operator.value == "!" && + node.predicate.statement.is_a?(RegexpLiteral) + s( + :send, + [ + s(:match_current_line, [visit(node.predicate.statement)]), + :! + ], + nil + ) + else + visit(node.predicate) + end + else + visit(node.predicate) + end - # Visit a FndPtn node. - def visit_fndptn(node) - left, right = - [node.left, node.right].map do |child| - location = - smap_operator( - srange_length(child.start_char, 1), - srange_node(child) + s( + :if, + [predicate, visit(node.statements), visit(node.consequent)], + if node.modifier? + smap_keyword_bare( + srange_find_between(node.statements, node.predicate, "if"), + srange_node(node) ) - - if child.is_a?(VarField) && child.value.nil? - s(:match_rest, [], location) else - s(:match_rest, [visit(child)], location) + begin_start = node.predicate.end_char + begin_end = + if node.statements.empty? + node.statements.end_char + else + node.statements.body.first.start_char + end + + begin_token = + if buffer.source[begin_start...begin_end].include?("then") + srange_find(begin_start, begin_end, "then") + elsif buffer.source[begin_start...begin_end].include?(";") + srange_find(begin_start, begin_end, ";") + end + + else_token = + case node.consequent + when Elsif + srange_length(node.consequent.start_char, 5) + when Else + srange_length(node.consequent.start_char, 4) + end + + smap_condition( + srange_length(node.start_char, 2), + begin_token, + else_token, + srange_length(node.end_char, -3), + srange_node(node) + ) end - end + ) + end - inner = + # Visit an IfOp node. + def visit_if_op(node) s( - :find_pattern, - [left, *visit_all(node.values), right], - smap_collection( - srange_length(node.start_char, 1), - srange_length(node.end_char, -1), + :if, + [visit(node.predicate), visit(node.truthy), visit(node.falsy)], + smap_ternary( + srange_find_between(node.predicate, node.truthy, "?"), + srange_find_between(node.truthy, node.falsy, ":"), srange_node(node) ) ) - - if node.constant - s(:const_pattern, [visit(node.constant), inner], nil) - else - inner end - end - - # Visit a For node. - def visit_for(node) - s( - :for, - [visit(node.index), visit(node.collection), visit(node.statements)], - smap_for( - srange_length(node.start_char, 3), - srange_find_between(node.index, node.collection, "in"), - srange_search_between(node.collection, node.statements, "do") || - srange_search_between(node.collection, node.statements, ";"), - srange_length(node.end_char, -3), - srange_node(node) - ) - ) - end - - # Visit a GVar node. - def visit_gvar(node) - s( - :gvar, - [node.value.to_sym], - smap_variable(srange_node(node), srange_node(node)) - ) - end - # Visit a HashLiteral node. - def visit_hash(node) - s( - :hash, - visit_all(node.assocs), - smap_collection( - srange_length(node.start_char, 1), - srange_length(node.end_char, -1), - srange_node(node) + # Visit an Imaginary node. + def visit_imaginary(node) + s( + :complex, + [ + # We have to do an eval here in order to get the value in case + # it's something like 42ri. to_c will not give the right value in + # that case. Maybe there's an API for this but I can't find it. + eval(node.value) + ], + smap_operator(nil, srange_node(node)) ) - ) - end - - # Visit a Heredoc node. - def visit_heredoc(node) - heredoc = HeredocBuilder.new(node) - - # For each part of the heredoc, if it's a string content node, split it - # into multiple string content nodes, one for each line. Otherwise, - # visit the node as normal. - node.parts.each do |part| - if part.is_a?(TStringContent) && part.value.count("\n") > 1 - index = part.start_char - lines = part.value.split("\n") - - lines.each do |line| - length = line.length + 1 - location = smap_collection_bare(srange_length(index, length)) - - heredoc << s(:str, ["#{line}\n"], location) - index += length - end - else - heredoc << visit(part) - end end - # Now that we have all of the pieces on the heredoc, we can trim it if - # it is a heredoc that supports trimming (i.e., it has a ~ on the - # declaration). - heredoc.trim! + # Visit an In node. + def visit_in(node) + case node.pattern + when IfNode + s( + :in_pattern, + [ + visit(node.pattern.statements), + s(:if_guard, [visit(node.pattern.predicate)], nil), + visit(node.statements) + ], + nil + ) + when UnlessNode + s( + :in_pattern, + [ + visit(node.pattern.statements), + s(:unless_guard, [visit(node.pattern.predicate)], nil), + visit(node.statements) + ], + nil + ) + else + begin_token = + srange_search_between(node.pattern, node.statements, "then") - # Generate the location for the heredoc, which goes from the declaration - # to the ending delimiter. - location = - smap_heredoc( - srange_node(node.beginning), - srange( - if node.parts.empty? - node.beginning.end_char + 1 + end_char = + if begin_token || node.statements.empty? + node.statements.end_char - 1 else - node.parts.first.start_char - end, - node.ending.start_char - ), - srange(node.ending.start_char, node.ending.end_char - 1) - ) + node.statements.body.last.start_char + end - # Finally, decide which kind of heredoc node to generate based on its - # declaration and contents. - if node.beginning.value.match?(/`\w+`\z/) - s(:xstr, heredoc.segments, location) - elsif heredoc.segments.length == 1 - segment = heredoc.segments.first - s(segment.type, segment.children, location) - else - s(:dstr, heredoc.segments, location) + s( + :in_pattern, + [visit(node.pattern), nil, visit(node.statements)], + smap_keyword( + srange_length(node.start_char, 2), + begin_token, + nil, + srange(node.start_char, end_char) + ) + ) + end end - end - # Visit a HshPtn node. - def visit_hshptn(node) - children = - node.keywords.map do |(keyword, value)| - next s(:pair, [visit(keyword), visit(value)], nil) if value - - case keyword - when DynaSymbol - raise if keyword.parts.length > 1 - s(:match_var, [keyword.parts.first.value.to_sym], nil) - when Label - s(:match_var, [keyword.value.chomp(":").to_sym], nil) + # Visit an Int node. + def visit_int(node) + operator = + if %w[+ -].include?(buffer.source[node.start_char]) + srange_length(node.start_char, 1) end - end - if node.keyword_rest.is_a?(VarField) - children << if node.keyword_rest.value.nil? - s(:match_rest, [], nil) - elsif node.keyword_rest.value == :nil - s(:match_nil_pattern, [], nil) - else - s(:match_rest, [visit(node.keyword_rest)], nil) - end + s(:int, [node.value.to_i], smap_operator(operator, srange_node(node))) end - inner = s(:hash_pattern, children, nil) - if node.constant - s(:const_pattern, [visit(node.constant), inner], nil) - else - inner + # Visit an IVar node. + def visit_ivar(node) + s( + :ivar, + [node.value.to_sym], + smap_variable(srange_node(node), srange_node(node)) + ) end - end - # Visit an Ident node. - def visit_ident(node) - s( - :lvar, - [node.value.to_sym], - smap_variable(srange_node(node), srange_node(node)) - ) - end + # Visit a Kw node. + def visit_kw(node) + location = smap(srange_node(node)) - # Visit an IfNode node. - def visit_if(node) - predicate = - case node.predicate - when RangeNode - type = - node.predicate.operator.value == ".." ? :iflipflop : :eflipflop - s(type, visit(node.predicate).children, nil) - when RegexpLiteral - s(:match_current_line, [visit(node.predicate)], nil) - when Unary - if node.predicate.operator.value == "!" && - node.predicate.statement.is_a?(RegexpLiteral) - s( - :send, - [s(:match_current_line, [visit(node.predicate.statement)]), :!], - nil - ) + case node.value + when "__FILE__" + s(:str, [buffer.name], location) + when "__LINE__" + s( + :int, + [node.location.start_line + buffer.first_line - 1], + location + ) + when "__ENCODING__" + if ::Parser::Builders::Default.emit_encoding + s(:__ENCODING__, [], location) else - visit(node.predicate) + s(:const, [s(:const, [nil, :Encoding], nil), :UTF_8], location) end else - visit(node.predicate) + s(node.value.to_sym, [], location) end + end - s( - :if, - [predicate, visit(node.statements), visit(node.consequent)], - if node.modifier? - smap_keyword_bare( - srange_find_between(node.statements, node.predicate, "if"), - srange_node(node) - ) + # Visit a KwRestParam node. + def visit_kwrest_param(node) + if node.name.nil? + s(:kwrestarg, [], smap_variable(nil, srange_node(node))) else - begin_start = node.predicate.end_char - begin_end = - if node.statements.empty? - node.statements.end_char - else - node.statements.body.first.start_char - end + s( + :kwrestarg, + [node.name.value.to_sym], + smap_variable(srange_node(node.name), srange_node(node)) + ) + end + end - begin_token = - if buffer.source[begin_start...begin_end].include?("then") - srange_find(begin_start, begin_end, "then") - elsif buffer.source[begin_start...begin_end].include?(";") - srange_find(begin_start, begin_end, ";") - end + # Visit a Label node. + def visit_label(node) + s( + :sym, + [node.value.chomp(":").to_sym], + smap_collection_bare(srange(node.start_char, node.end_char - 1)) + ) + end - else_token = - case node.consequent - when Elsif - srange_length(node.consequent.start_char, 5) - when Else - srange_length(node.consequent.start_char, 4) - end + # Visit a Lambda node. + def visit_lambda(node) + args = + node.params.is_a?(LambdaVar) ? node.params : node.params.contents + args_node = visit(args) - smap_condition( - srange_length(node.start_char, 2), - begin_token, - else_token, - srange_length(node.end_char, -3), - srange_node(node) - ) + type = :block + if args.empty? && (maximum = num_block_type(node.statements)) + type = :numblock + args_node = maximum end - ) - end - # Visit an IfOp node. - def visit_if_op(node) - s( - :if, - [visit(node.predicate), visit(node.truthy), visit(node.falsy)], - smap_ternary( - srange_find_between(node.predicate, node.truthy, "?"), - srange_find_between(node.truthy, node.falsy, ":"), - srange_node(node) - ) - ) - end + begin_token, end_token = + if ( + srange = + srange_search_between(node.params, node.statements, "{") + ) + [srange, srange_length(node.end_char, -1)] + else + [ + srange_find_between(node.params, node.statements, "do"), + srange_length(node.end_char, -3) + ] + end - # Visit an Imaginary node. - def visit_imaginary(node) - s( - :complex, - [ - # We have to do an eval here in order to get the value in case it's - # something like 42ri. to_c will not give the right value in that - # case. Maybe there's an API for this but I can't find it. - eval(node.value) - ], - smap_operator(nil, srange_node(node)) - ) - end + selector = srange_length(node.start_char, 2) - # Visit an In node. - def visit_in(node) - case node.pattern - when IfNode - s( - :in_pattern, - [ - visit(node.pattern.statements), - s(:if_guard, [visit(node.pattern.predicate)], nil), - visit(node.statements) - ], - nil - ) - when UnlessNode s( - :in_pattern, + type, [ - visit(node.pattern.statements), - s(:unless_guard, [visit(node.pattern.predicate)], nil), + if ::Parser::Builders::Default.emit_lambda + s(:lambda, [], smap(selector)) + else + s(:send, [nil, :lambda], smap_send_bare(selector, selector)) + end, + args_node, visit(node.statements) ], - nil + smap_collection(begin_token, end_token, srange_node(node)) ) - else - begin_token = - srange_search_between(node.pattern, node.statements, "then") + end - end_char = - if begin_token || node.statements.empty? - node.statements.end_char - 1 + # Visit a LambdaVar node. + def visit_lambda_var(node) + shadowargs = + node.locals.map do |local| + s( + :shadowarg, + [local.value.to_sym], + smap_variable(srange_node(local), srange_node(local)) + ) + end + + location = + if node.start_char == node.end_char + smap_collection_bare(nil) else - node.statements.body.last.start_char + smap_collection( + srange_length(node.start_char, 1), + srange_length(node.end_char, -1), + srange_node(node) + ) end + s(:args, visit(node.params).children + shadowargs, location) + end + + # Visit an MAssign node. + def visit_massign(node) s( - :in_pattern, - [visit(node.pattern), nil, visit(node.statements)], - smap_keyword( - srange_length(node.start_char, 2), - begin_token, - nil, - srange(node.start_char, end_char) + :masgn, + [visit(node.target), visit(node.value)], + smap_operator( + srange_find_between(node.target, node.value, "="), + srange_node(node) ) ) end - end - - # Visit an Int node. - def visit_int(node) - operator = - if %w[+ -].include?(buffer.source[node.start_char]) - srange_length(node.start_char, 1) - end - s(:int, [node.value.to_i], smap_operator(operator, srange_node(node))) - end + # Visit a MethodAddBlock node. + def visit_method_add_block(node) + case node.call + when Break, Next, ReturnNode + type, arguments = block_children(node.block) + call = visit(node.call) - # Visit an IVar node. - def visit_ivar(node) - s( - :ivar, - [node.value.to_sym], - smap_variable(srange_node(node), srange_node(node)) - ) - end + s( + call.type, + [ + s( + type, + [*call.children, arguments, visit(node.block.bodystmt)], + nil + ) + ], + nil + ) + when ARef, Super, ZSuper + type, arguments = block_children(node.block) - # Visit a Kw node. - def visit_kw(node) - location = smap(srange_node(node)) - - case node.value - when "__FILE__" - s(:str, [buffer.name], location) - when "__LINE__" - s(:int, [node.location.start_line + buffer.first_line - 1], location) - when "__ENCODING__" - if ::Parser::Builders::Default.emit_encoding - s(:__ENCODING__, [], location) + s( + type, + [visit(node.call), arguments, visit(node.block.bodystmt)], + nil + ) else - s(:const, [s(:const, [nil, :Encoding], nil), :UTF_8], location) + visit_command_call( + CommandCall.new( + receiver: node.call.receiver, + operator: node.call.operator, + message: node.call.message, + arguments: node.call.arguments, + block: node.block, + location: node.location + ) + ) end - else - s(node.value.to_sym, [], location) end - end - # Visit a KwRestParam node. - def visit_kwrest_param(node) - if node.name.nil? - s(:kwrestarg, [], smap_variable(nil, srange_node(node))) - else + # Visit an MLHS node. + def visit_mlhs(node) s( - :kwrestarg, - [node.name.value.to_sym], - smap_variable(srange_node(node.name), srange_node(node)) + :mlhs, + node.parts.map do |part| + if part.is_a?(Ident) + s( + :arg, + [part.value.to_sym], + smap_variable(srange_node(part), srange_node(part)) + ) + else + visit(part) + end + end, + smap_collection_bare(srange_node(node)) ) end - end - - # Visit a Label node. - def visit_label(node) - s( - :sym, - [node.value.chomp(":").to_sym], - smap_collection_bare(srange(node.start_char, node.end_char - 1)) - ) - end - - # Visit a Lambda node. - def visit_lambda(node) - args = node.params.is_a?(LambdaVar) ? node.params : node.params.contents - args_node = visit(args) - - type = :block - if args.empty? && (maximum = num_block_type(node.statements)) - type = :numblock - args_node = maximum - end - - begin_token, end_token = - if (srange = srange_search_between(node.params, node.statements, "{")) - [srange, srange_length(node.end_char, -1)] - else - [ - srange_find_between(node.params, node.statements, "do"), - srange_length(node.end_char, -3) - ] - end - - selector = srange_length(node.start_char, 2) - - s( - type, - [ - if ::Parser::Builders::Default.emit_lambda - s(:lambda, [], smap(selector)) - else - s(:send, [nil, :lambda], smap_send_bare(selector, selector)) - end, - args_node, - visit(node.statements) - ], - smap_collection(begin_token, end_token, srange_node(node)) - ) - end - # Visit a LambdaVar node. - def visit_lambda_var(node) - shadowargs = - node.locals.map do |local| - s( - :shadowarg, - [local.value.to_sym], - smap_variable(srange_node(local), srange_node(local)) - ) - end + # Visit an MLHSParen node. + def visit_mlhs_paren(node) + child = visit(node.contents) - location = - if node.start_char == node.end_char - smap_collection_bare(nil) - else + s( + child.type, + child.children, smap_collection( srange_length(node.start_char, 1), srange_length(node.end_char, -1), srange_node(node) ) - end - - s(:args, visit(node.params).children + shadowargs, location) - end - - # Visit an MAssign node. - def visit_massign(node) - s( - :masgn, - [visit(node.target), visit(node.value)], - smap_operator( - srange_find_between(node.target, node.value, "="), - srange_node(node) ) - ) - end - - # Visit a MethodAddBlock node. - def visit_method_add_block(node) - case node.call - when Break, Next, ReturnNode - type, arguments = block_children(node.block) - call = visit(node.call) + end + # Visit a ModuleDeclaration node. + def visit_module(node) s( - call.type, - [ - s( - type, - [*call.children, arguments, visit(node.block.bodystmt)], - nil - ) - ], - nil + :module, + [visit(node.constant), visit(node.bodystmt)], + smap_definition( + srange_length(node.start_char, 6), + nil, + srange_node(node.constant), + srange_length(node.end_char, -3) + ).with_expression(srange_node(node)) ) - when ARef, Super, ZSuper - type, arguments = block_children(node.block) + end - s( - type, - [visit(node.call), arguments, visit(node.block.bodystmt)], - nil - ) - else - visit_command_call( - CommandCall.new( - receiver: node.call.receiver, - operator: node.call.operator, - message: node.call.message, - arguments: node.call.arguments, - block: node.block, + # Visit an MRHS node. + def visit_mrhs(node) + visit_array( + ArrayLiteral.new( + lbracket: nil, + contents: Args.new(parts: node.parts, location: node.location), location: node.location ) ) end - end - - # Visit an MLHS node. - def visit_mlhs(node) - s( - :mlhs, - node.parts.map do |part| - if part.is_a?(Ident) - s( - :arg, - [part.value.to_sym], - smap_variable(srange_node(part), srange_node(part)) - ) - else - visit(part) - end - end, - smap_collection_bare(srange_node(node)) - ) - end - - # Visit an MLHSParen node. - def visit_mlhs_paren(node) - child = visit(node.contents) - - s( - child.type, - child.children, - smap_collection( - srange_length(node.start_char, 1), - srange_length(node.end_char, -1), - srange_node(node) - ) - ) - end - - # Visit a ModuleDeclaration node. - def visit_module(node) - s( - :module, - [visit(node.constant), visit(node.bodystmt)], - smap_definition( - srange_length(node.start_char, 6), - nil, - srange_node(node.constant), - srange_length(node.end_char, -3) - ).with_expression(srange_node(node)) - ) - end - - # Visit an MRHS node. - def visit_mrhs(node) - visit_array( - ArrayLiteral.new( - lbracket: nil, - contents: Args.new(parts: node.parts, location: node.location), - location: node.location - ) - ) - end - - # Visit a Next node. - def visit_next(node) - s( - :next, - visit_all(node.arguments.parts), - smap_keyword_bare( - srange_length(node.start_char, 4), - srange_node(node) - ) - ) - end - - # Visit a Not node. - def visit_not(node) - if node.statement.nil? - begin_token = srange_find(node.start_char, nil, "(") - end_token = srange_find(node.start_char, nil, ")") - - s( - :send, - [ - s( - :begin, - [], - smap_collection( - begin_token, - end_token, - begin_token.join(end_token) - ) - ), - :! - ], - smap_send_bare(srange_length(node.start_char, 3), srange_node(node)) - ) - else - begin_token, end_token = - if node.parentheses? - [ - srange_find( - node.start_char + 3, - node.statement.start_char, - "(" - ), - srange_length(node.end_char, -1) - ] - end + # Visit a Next node. + def visit_next(node) s( - :send, - [visit(node.statement), :!], - smap_send( - nil, - srange_length(node.start_char, 3), - begin_token, - end_token, + :next, + visit_all(node.arguments.parts), + smap_keyword_bare( + srange_length(node.start_char, 4), srange_node(node) ) ) end - end - - # Visit an OpAssign node. - def visit_opassign(node) - target = visit(node.target) - location = - target - .location - .with_expression(srange_node(node)) - .with_operator(srange_node(node.operator)) - - case node.operator.value - when "||=" - s(:or_asgn, [target, visit(node.value)], location) - when "&&=" - s(:and_asgn, [target, visit(node.value)], location) - else - s( - :op_asgn, - [target, node.operator.value.chomp("=").to_sym, visit(node.value)], - location - ) - end - end - # Visit a Params node. - def visit_params(node) - children = [] + # Visit a Not node. + def visit_not(node) + if node.statement.nil? + begin_token = srange_find(node.start_char, nil, "(") + end_token = srange_find(node.start_char, nil, ")") - children += - node.requireds.map do |required| - case required - when MLHSParen - visit(required) - else - s( - :arg, - [required.value.to_sym], - smap_variable(srange_node(required), srange_node(required)) + s( + :send, + [ + s( + :begin, + [], + smap_collection( + begin_token, + end_token, + begin_token.join(end_token) + ) + ), + :! + ], + smap_send_bare( + srange_length(node.start_char, 3), + srange_node(node) ) - end - end + ) + else + begin_token, end_token = + if node.parentheses? + [ + srange_find( + node.start_char + 3, + node.statement.start_char, + "(" + ), + srange_length(node.end_char, -1) + ] + end - children += - node.optionals.map do |(name, value)| s( - :optarg, - [name.value.to_sym, visit(value)], - smap_variable( - srange_node(name), - srange_node(name).join(srange_node(value)) - ).with_operator(srange_find_between(name, value, "=")) + :send, + [visit(node.statement), :!], + smap_send( + nil, + srange_length(node.start_char, 3), + begin_token, + end_token, + srange_node(node) + ) ) end - - if node.rest && !node.rest.is_a?(ExcessedComma) - children << visit(node.rest) end - children += - node.posts.map do |post| + # Visit an OpAssign node. + def visit_opassign(node) + target = visit(node.target) + location = + target + .location + .with_expression(srange_node(node)) + .with_operator(srange_node(node.operator)) + + case node.operator.value + when "||=" + s(:or_asgn, [target, visit(node.value)], location) + when "&&=" + s(:and_asgn, [target, visit(node.value)], location) + else s( - :arg, - [post.value.to_sym], - smap_variable(srange_node(post), srange_node(post)) + :op_asgn, + [ + target, + node.operator.value.chomp("=").to_sym, + visit(node.value) + ], + location ) end + end - children += - node.keywords.map do |(name, value)| - key = name.value.chomp(":").to_sym + # Visit a Params node. + def visit_params(node) + children = [] - if value - s( - :kwoptarg, - [key, visit(value)], - smap_variable( - srange(name.start_char, name.end_char - 1), - srange_node(name).join(srange_node(value)) + children += + node.requireds.map do |required| + case required + when MLHSParen + visit(required) + else + s( + :arg, + [required.value.to_sym], + smap_variable(srange_node(required), srange_node(required)) ) - ) - else + end + end + + children += + node.optionals.map do |(name, value)| s( - :kwarg, - [key], + :optarg, + [name.value.to_sym, visit(value)], smap_variable( - srange(name.start_char, name.end_char - 1), - srange_node(name) - ) + srange_node(name), + srange_node(name).join(srange_node(value)) + ).with_operator(srange_find_between(name, value, "=")) ) end + + if node.rest && !node.rest.is_a?(ExcessedComma) + children << visit(node.rest) end - case node.keyword_rest - when nil, ArgsForward - # do nothing - when :nil - children << s( - :kwnilarg, - [], - smap_variable(srange_length(node.end_char, -3), srange_node(node)) - ) - else - children << visit(node.keyword_rest) - end + children += + node.posts.map do |post| + s( + :arg, + [post.value.to_sym], + smap_variable(srange_node(post), srange_node(post)) + ) + end - children << visit(node.block) if node.block + children += + node.keywords.map do |(name, value)| + key = name.value.chomp(":").to_sym - if node.keyword_rest.is_a?(ArgsForward) - location = smap(srange_node(node.keyword_rest)) + if value + s( + :kwoptarg, + [key, visit(value)], + smap_variable( + srange(name.start_char, name.end_char - 1), + srange_node(name).join(srange_node(value)) + ) + ) + else + s( + :kwarg, + [key], + smap_variable( + srange(name.start_char, name.end_char - 1), + srange_node(name) + ) + ) + end + end - # If there are no other arguments and we have the emit_forward_arg - # option enabled, then the entire argument list is represented by a - # single forward_args node. - if children.empty? && !::Parser::Builders::Default.emit_forward_arg - return s(:forward_args, [], location) + case node.keyword_rest + when nil, ArgsForward + # do nothing + when :nil + children << s( + :kwnilarg, + [], + smap_variable(srange_length(node.end_char, -3), srange_node(node)) + ) + else + children << visit(node.keyword_rest) end - # Otherwise, we need to insert a forward_arg node into the list of - # parameters before any keyword rest or block parameters. - index = - node.requireds.length + node.optionals.length + node.keywords.length - children.insert(index, s(:forward_arg, [], location)) - end + children << visit(node.block) if node.block - location = - unless children.empty? - first = children.first.location.expression - last = children.last.location.expression - smap_collection_bare(first.join(last)) + if node.keyword_rest.is_a?(ArgsForward) + location = smap(srange_node(node.keyword_rest)) + + # If there are no other arguments and we have the emit_forward_arg + # option enabled, then the entire argument list is represented by a + # single forward_args node. + if children.empty? && !::Parser::Builders::Default.emit_forward_arg + return s(:forward_args, [], location) + end + + # Otherwise, we need to insert a forward_arg node into the list of + # parameters before any keyword rest or block parameters. + index = + node.requireds.length + node.optionals.length + + node.keywords.length + children.insert(index, s(:forward_arg, [], location)) end - s(:args, children, location) - end + location = + unless children.empty? + first = children.first.location.expression + last = children.last.location.expression + smap_collection_bare(first.join(last)) + end - # Visit a Paren node. - def visit_paren(node) - location = - smap_collection( - srange_length(node.start_char, 1), - srange_length(node.end_char, -1), - srange_node(node) - ) + s(:args, children, location) + end - if node.contents.nil? || - (node.contents.is_a?(Statements) && node.contents.empty?) - s(:begin, [], location) - else - child = visit(node.contents) - child.type == :begin ? child : s(:begin, [child], location) + # Visit a Paren node. + def visit_paren(node) + location = + smap_collection( + srange_length(node.start_char, 1), + srange_length(node.end_char, -1), + srange_node(node) + ) + + if node.contents.nil? || + (node.contents.is_a?(Statements) && node.contents.empty?) + s(:begin, [], location) + else + child = visit(node.contents) + child.type == :begin ? child : s(:begin, [child], location) + end end - end - # Visit a PinnedBegin node. - def visit_pinned_begin(node) - s( - :pin, - [ - s( - :begin, - [visit(node.statement)], - smap_collection( - srange_length(node.start_char + 1, 1), - srange_length(node.end_char, -1), - srange(node.start_char + 1, node.end_char) + # Visit a PinnedBegin node. + def visit_pinned_begin(node) + s( + :pin, + [ + s( + :begin, + [visit(node.statement)], + smap_collection( + srange_length(node.start_char + 1, 1), + srange_length(node.end_char, -1), + srange(node.start_char + 1, node.end_char) + ) ) - ) - ], - smap_send_bare(srange_length(node.start_char, 1), srange_node(node)) - ) - end + ], + smap_send_bare(srange_length(node.start_char, 1), srange_node(node)) + ) + end - # Visit a PinnedVarRef node. - def visit_pinned_var_ref(node) - s( - :pin, - [visit(node.value)], - smap_send_bare(srange_length(node.start_char, 1), srange_node(node)) - ) - end + # Visit a PinnedVarRef node. + def visit_pinned_var_ref(node) + s( + :pin, + [visit(node.value)], + smap_send_bare(srange_length(node.start_char, 1), srange_node(node)) + ) + end - # Visit a Program node. - def visit_program(node) - visit(node.statements) - end + # Visit a Program node. + def visit_program(node) + visit(node.statements) + end - # Visit a QSymbols node. - def visit_qsymbols(node) - parts = - node.elements.map do |element| - SymbolLiteral.new(value: element, location: element.location) - end + # Visit a QSymbols node. + def visit_qsymbols(node) + parts = + node.elements.map do |element| + SymbolLiteral.new(value: element, location: element.location) + end - visit_array( - ArrayLiteral.new( - lbracket: node.beginning, - contents: Args.new(parts: parts, location: node.location), - location: node.location + visit_array( + ArrayLiteral.new( + lbracket: node.beginning, + contents: Args.new(parts: parts, location: node.location), + location: node.location + ) ) - ) - end + end - # Visit a QWords node. - def visit_qwords(node) - visit_array( - ArrayLiteral.new( - lbracket: node.beginning, - contents: Args.new(parts: node.elements, location: node.location), - location: node.location + # Visit a QWords node. + def visit_qwords(node) + visit_array( + ArrayLiteral.new( + lbracket: node.beginning, + contents: Args.new(parts: node.elements, location: node.location), + location: node.location + ) ) - ) - end + end - # Visit a RangeNode node. - def visit_range(node) - s( - node.operator.value == ".." ? :irange : :erange, - [visit(node.left), visit(node.right)], - smap_operator(srange_node(node.operator), srange_node(node)) - ) - end + # Visit a RangeNode node. + def visit_range(node) + s( + node.operator.value == ".." ? :irange : :erange, + [visit(node.left), visit(node.right)], + smap_operator(srange_node(node.operator), srange_node(node)) + ) + end - # Visit an RAssign node. - def visit_rassign(node) - s( - node.operator.value == "=>" ? :match_pattern : :match_pattern_p, - [visit(node.value), visit(node.pattern)], - smap_operator(srange_node(node.operator), srange_node(node)) - ) - end + # Visit an RAssign node. + def visit_rassign(node) + s( + node.operator.value == "=>" ? :match_pattern : :match_pattern_p, + [visit(node.value), visit(node.pattern)], + smap_operator(srange_node(node.operator), srange_node(node)) + ) + end - # Visit a Rational node. - def visit_rational(node) - s(:rational, [node.value.to_r], smap_operator(nil, srange_node(node))) - end + # Visit a Rational node. + def visit_rational(node) + s(:rational, [node.value.to_r], smap_operator(nil, srange_node(node))) + end - # Visit a Redo node. - def visit_redo(node) - s(:redo, [], smap_keyword_bare(srange_node(node), srange_node(node))) - end + # Visit a Redo node. + def visit_redo(node) + s(:redo, [], smap_keyword_bare(srange_node(node), srange_node(node))) + end - # Visit a RegexpLiteral node. - def visit_regexp_literal(node) - s( - :regexp, - visit_all(node.parts).push( - s( - :regopt, - node.ending.scan(/[a-z]/).sort.map(&:to_sym), - smap(srange_length(node.end_char, -(node.ending.length - 1))) + # Visit a RegexpLiteral node. + def visit_regexp_literal(node) + s( + :regexp, + visit_all(node.parts).push( + s( + :regopt, + node.ending.scan(/[a-z]/).sort.map(&:to_sym), + smap(srange_length(node.end_char, -(node.ending.length - 1))) + ) + ), + smap_collection( + srange_length(node.start_char, node.beginning.length), + srange_length(node.end_char - node.ending.length, 1), + srange_node(node) ) - ), - smap_collection( - srange_length(node.start_char, node.beginning.length), - srange_length(node.end_char - node.ending.length, 1), - srange_node(node) ) - ) - end + end - # Visit a Rescue node. - def visit_rescue(node) - # In the parser gem, there is a separation between the rescue node and - # the rescue body. They have different bounds, so we have to calculate - # those here. - start_char = node.start_char + # Visit a Rescue node. + def visit_rescue(node) + # In the parser gem, there is a separation between the rescue node and + # the rescue body. They have different bounds, so we have to calculate + # those here. + start_char = node.start_char - body_end_char = - if node.statements.empty? - start_char + 6 - else - node.statements.body.last.end_char - end + body_end_char = + if node.statements.empty? + start_char + 6 + else + node.statements.body.last.end_char + end - end_char = - if node.consequent - end_node = node.consequent - end_node = end_node.consequent while end_node.consequent + end_char = + if node.consequent + end_node = node.consequent + end_node = end_node.consequent while end_node.consequent - if end_node.statements.empty? - start_char + 6 + if end_node.statements.empty? + start_char + 6 + else + end_node.statements.body.last.end_char + end else - end_node.statements.body.last.end_char + body_end_char end - else - body_end_char - end - # These locations are reused for multiple children. - keyword = srange_length(start_char, 6) - body_expression = srange(start_char, body_end_char) - expression = srange(start_char, end_char) + # These locations are reused for multiple children. + keyword = srange_length(start_char, 6) + body_expression = srange(start_char, body_end_char) + expression = srange(start_char, end_char) - exceptions = - case node.exception&.exceptions - when nil - nil - when MRHS - visit_array( - ArrayLiteral.new( - lbracket: nil, - contents: - Args.new( - parts: node.exception.exceptions.parts, - location: node.exception.exceptions.location - ), - location: node.exception.exceptions.location + exceptions = + case node.exception&.exceptions + when nil + nil + when MRHS + visit_array( + ArrayLiteral.new( + lbracket: nil, + contents: + Args.new( + parts: node.exception.exceptions.parts, + location: node.exception.exceptions.location + ), + location: node.exception.exceptions.location + ) ) - ) - else - visit_array( - ArrayLiteral.new( - lbracket: nil, - contents: - Args.new( - parts: [node.exception.exceptions], - location: node.exception.exceptions.location + else + visit_array( + ArrayLiteral.new( + lbracket: nil, + contents: + Args.new( + parts: [node.exception.exceptions], + location: node.exception.exceptions.location + ), + location: node.exception.exceptions.location + ) + ) + end + + resbody = + if node.exception.nil? + s( + :resbody, + [nil, nil, visit(node.statements)], + smap_rescue_body(keyword, nil, nil, body_expression) + ) + elsif node.exception.variable.nil? + s( + :resbody, + [exceptions, nil, visit(node.statements)], + smap_rescue_body(keyword, nil, nil, body_expression) + ) + else + s( + :resbody, + [ + exceptions, + visit(node.exception.variable), + visit(node.statements) + ], + smap_rescue_body( + keyword, + srange_find( + node.start_char + 6, + node.exception.variable.start_char, + "=>" ), - location: node.exception.exceptions.location + nil, + body_expression + ) ) - ) - end + end - resbody = - if node.exception.nil? - s( - :resbody, - [nil, nil, visit(node.statements)], - smap_rescue_body(keyword, nil, nil, body_expression) - ) - elsif node.exception.variable.nil? - s( - :resbody, - [exceptions, nil, visit(node.statements)], - smap_rescue_body(keyword, nil, nil, body_expression) - ) + children = [resbody] + if node.consequent + children += visit(node.consequent).children else - s( - :resbody, - [ - exceptions, - visit(node.exception.variable), - visit(node.statements) - ], - smap_rescue_body( - keyword, - srange_find( - node.start_char + 6, - node.exception.variable.start_char, - "=>" - ), - nil, - body_expression - ) - ) + children << nil end - children = [resbody] - if node.consequent - children += visit(node.consequent).children - else - children << nil + s(:rescue, children, smap_condition_bare(expression)) end - s(:rescue, children, smap_condition_bare(expression)) - end - - # Visit a RescueMod node. - def visit_rescue_mod(node) - keyword = srange_find_between(node.statement, node.value, "rescue") - - s( - :rescue, - [ - visit(node.statement), - s( - :resbody, - [nil, nil, visit(node.value)], - smap_rescue_body( - keyword, - nil, - nil, - keyword.join(srange_node(node.value)) - ) - ), - nil - ], - smap_condition_bare(srange_node(node)) - ) - end + # Visit a RescueMod node. + def visit_rescue_mod(node) + keyword = srange_find_between(node.statement, node.value, "rescue") - # Visit a RestParam node. - def visit_rest_param(node) - if node.name s( - :restarg, - [node.name.value.to_sym], - smap_variable(srange_node(node.name), srange_node(node)) + :rescue, + [ + visit(node.statement), + s( + :resbody, + [nil, nil, visit(node.value)], + smap_rescue_body( + keyword, + nil, + nil, + keyword.join(srange_node(node.value)) + ) + ), + nil + ], + smap_condition_bare(srange_node(node)) ) - else - s(:restarg, [], smap_variable(nil, srange_node(node))) end - end - - # Visit a Retry node. - def visit_retry(node) - s(:retry, [], smap_keyword_bare(srange_node(node), srange_node(node))) - end - - # Visit a ReturnNode node. - def visit_return(node) - s( - :return, - node.arguments ? visit_all(node.arguments.parts) : [], - smap_keyword_bare( - srange_length(node.start_char, 6), - srange_node(node) - ) - ) - end - # Visit an SClass node. - def visit_sclass(node) - s( - :sclass, - [visit(node.target), visit(node.bodystmt)], - smap_definition( - srange_length(node.start_char, 5), - srange_find(node.start_char + 5, node.target.start_char, "<<"), - nil, - srange_length(node.end_char, -3) - ).with_expression(srange_node(node)) - ) - end - - # Visit a Statements node. - def visit_statements(node) - children = - node.body.reject do |child| - child.is_a?(Comment) || child.is_a?(EmbDoc) || - child.is_a?(EndContent) || child.is_a?(VoidStmt) + # Visit a RestParam node. + def visit_rest_param(node) + if node.name + s( + :restarg, + [node.name.value.to_sym], + smap_variable(srange_node(node.name), srange_node(node)) + ) + else + s(:restarg, [], smap_variable(nil, srange_node(node))) end + end - case children.length - when 0 - nil - when 1 - visit(children.first) - else + # Visit a Retry node. + def visit_retry(node) + s(:retry, [], smap_keyword_bare(srange_node(node), srange_node(node))) + end + + # Visit a ReturnNode node. + def visit_return(node) s( - :begin, - visit_all(children), - smap_collection_bare( - srange(children.first.start_char, children.last.end_char) + :return, + node.arguments ? visit_all(node.arguments.parts) : [], + smap_keyword_bare( + srange_length(node.start_char, 6), + srange_node(node) ) ) end - end - - # Visit a StringConcat node. - def visit_string_concat(node) - s( - :dstr, - [visit(node.left), visit(node.right)], - smap_collection_bare(srange_node(node)) - ) - end - # Visit a StringDVar node. - def visit_string_dvar(node) - visit(node.variable) - end - - # Visit a StringEmbExpr node. - def visit_string_embexpr(node) - s( - :begin, - visit(node.statements).then { |child| child ? [child] : [] }, - smap_collection( - srange_length(node.start_char, 2), - srange_length(node.end_char, -1), - srange_node(node) + # Visit an SClass node. + def visit_sclass(node) + s( + :sclass, + [visit(node.target), visit(node.bodystmt)], + smap_definition( + srange_length(node.start_char, 5), + srange_find(node.start_char + 5, node.target.start_char, "<<"), + nil, + srange_length(node.end_char, -3) + ).with_expression(srange_node(node)) ) - ) - end + end - # Visit a StringLiteral node. - def visit_string_literal(node) - location = - if node.quote - smap_collection( - srange_length(node.start_char, node.quote.length), - srange_length(node.end_char, -1), - srange_node(node) - ) + # Visit a Statements node. + def visit_statements(node) + children = + node.body.reject do |child| + child.is_a?(Comment) || child.is_a?(EmbDoc) || + child.is_a?(EndContent) || child.is_a?(VoidStmt) + end + + case children.length + when 0 + nil + when 1 + visit(children.first) else - smap_collection_bare(srange_node(node)) + s( + :begin, + visit_all(children), + smap_collection_bare( + srange(children.first.start_char, children.last.end_char) + ) + ) end + end - if node.parts.empty? - s(:str, [""], location) - elsif node.parts.length == 1 && node.parts.first.is_a?(TStringContent) - child = visit(node.parts.first) - s(child.type, child.children, location) - else - s(:dstr, visit_all(node.parts), location) + # Visit a StringConcat node. + def visit_string_concat(node) + s( + :dstr, + [visit(node.left), visit(node.right)], + smap_collection_bare(srange_node(node)) + ) end - end - # Visit a Super node. - def visit_super(node) - if node.arguments.is_a?(Args) + # Visit a StringDVar node. + def visit_string_dvar(node) + visit(node.variable) + end + + # Visit a StringEmbExpr node. + def visit_string_embexpr(node) s( - :super, - visit_all(node.arguments.parts), - smap_keyword_bare( - srange_length(node.start_char, 5), + :begin, + visit(node.statements).then { |child| child ? [child] : [] }, + smap_collection( + srange_length(node.start_char, 2), + srange_length(node.end_char, -1), srange_node(node) ) ) - else - case node.arguments.arguments - when nil - s( - :super, - [], - smap_keyword( - srange_length(node.start_char, 5), - srange_find(node.start_char + 5, node.end_char, "("), + end + + # Visit a StringLiteral node. + def visit_string_literal(node) + location = + if node.quote + smap_collection( + srange_length(node.start_char, node.quote.length), srange_length(node.end_char, -1), srange_node(node) ) - ) - when ArgsForward - s(:super, [visit(node.arguments.arguments)], nil) + else + smap_collection_bare(srange_node(node)) + end + + if node.parts.empty? + s(:str, [""], location) + elsif node.parts.length == 1 && node.parts.first.is_a?(TStringContent) + child = visit(node.parts.first) + s(child.type, child.children, location) else + s(:dstr, visit_all(node.parts), location) + end + end + + # Visit a Super node. + def visit_super(node) + if node.arguments.is_a?(Args) s( :super, - visit_all(node.arguments.arguments.parts), - smap_keyword( + visit_all(node.arguments.parts), + smap_keyword_bare( srange_length(node.start_char, 5), - srange_find(node.start_char + 5, node.end_char, "("), - srange_length(node.end_char, -1), srange_node(node) ) ) + else + case node.arguments.arguments + when nil + s( + :super, + [], + smap_keyword( + srange_length(node.start_char, 5), + srange_find(node.start_char + 5, node.end_char, "("), + srange_length(node.end_char, -1), + srange_node(node) + ) + ) + when ArgsForward + s(:super, [visit(node.arguments.arguments)], nil) + else + s( + :super, + visit_all(node.arguments.arguments.parts), + smap_keyword( + srange_length(node.start_char, 5), + srange_find(node.start_char + 5, node.end_char, "("), + srange_length(node.end_char, -1), + srange_node(node) + ) + ) + end end end - end - # Visit a SymbolLiteral node. - def visit_symbol_literal(node) - begin_token = - if buffer.source[node.start_char] == ":" - srange_length(node.start_char, 1) - end + # Visit a SymbolLiteral node. + def visit_symbol_literal(node) + begin_token = + if buffer.source[node.start_char] == ":" + srange_length(node.start_char, 1) + end - s( - :sym, - [node.value.value.to_sym], - smap_collection(begin_token, nil, srange_node(node)) - ) - end + s( + :sym, + [node.value.value.to_sym], + smap_collection(begin_token, nil, srange_node(node)) + ) + end - # Visit a Symbols node. - def visit_symbols(node) - parts = - node.elements.map do |element| - part = element.parts.first + # Visit a Symbols node. + def visit_symbols(node) + parts = + node.elements.map do |element| + part = element.parts.first - if element.parts.length == 1 && part.is_a?(TStringContent) - SymbolLiteral.new(value: part, location: part.location) - else - DynaSymbol.new( - parts: element.parts, - quote: nil, - location: element.location - ) + if element.parts.length == 1 && part.is_a?(TStringContent) + SymbolLiteral.new(value: part, location: part.location) + else + DynaSymbol.new( + parts: element.parts, + quote: nil, + location: element.location + ) + end end - end - - visit_array( - ArrayLiteral.new( - lbracket: node.beginning, - contents: Args.new(parts: parts, location: node.location), - location: node.location - ) - ) - end - # Visit a TopConstField node. - def visit_top_const_field(node) - s( - :casgn, - [ - s(:cbase, [], smap(srange_length(node.start_char, 2))), - node.constant.value.to_sym - ], - smap_constant( - srange_length(node.start_char, 2), - srange_node(node.constant), - srange_node(node) + visit_array( + ArrayLiteral.new( + lbracket: node.beginning, + contents: Args.new(parts: parts, location: node.location), + location: node.location + ) ) - ) - end + end - # Visit a TopConstRef node. - def visit_top_const_ref(node) - s( - :const, - [ - s(:cbase, [], smap(srange_length(node.start_char, 2))), - node.constant.value.to_sym - ], - smap_constant( - srange_length(node.start_char, 2), - srange_node(node.constant), - srange_node(node) + # Visit a TopConstField node. + def visit_top_const_field(node) + s( + :casgn, + [ + s(:cbase, [], smap(srange_length(node.start_char, 2))), + node.constant.value.to_sym + ], + smap_constant( + srange_length(node.start_char, 2), + srange_node(node.constant), + srange_node(node) + ) ) - ) - end - - # Visit a TStringContent node. - def visit_tstring_content(node) - dumped = node.value.gsub(/([^[:ascii:]])/) { $1.dump[1...-1] } - - s( - :str, - ["\"#{dumped}\"".undump], - smap_collection_bare(srange_node(node)) - ) - end + end - # Visit a Unary node. - def visit_unary(node) - # Special handling here for flipflops - if node.statement.is_a?(Paren) && - node.statement.contents.is_a?(Statements) && - node.statement.contents.body.length == 1 && - (range = node.statement.contents.body.first).is_a?(RangeNode) && - node.operator == "!" - type = range.operator.value == ".." ? :iflipflop : :eflipflop - return( - s( - :send, - [s(:begin, [s(type, visit(range).children, nil)], nil), :!], - nil + # Visit a TopConstRef node. + def visit_top_const_ref(node) + s( + :const, + [ + s(:cbase, [], smap(srange_length(node.start_char, 2))), + node.constant.value.to_sym + ], + smap_constant( + srange_length(node.start_char, 2), + srange_node(node.constant), + srange_node(node) ) ) end - visit(canonical_unary(node)) - end + # Visit a TStringContent node. + def visit_tstring_content(node) + dumped = node.value.gsub(/([^[:ascii:]])/) { $1.dump[1...-1] } - # Visit an Undef node. - def visit_undef(node) - s( - :undef, - visit_all(node.symbols), - smap_keyword_bare( - srange_length(node.start_char, 5), - srange_node(node) + s( + :str, + ["\"#{dumped}\"".undump], + smap_collection_bare(srange_node(node)) ) - ) - end + end - # Visit an UnlessNode node. - def visit_unless(node) - predicate = - case node.predicate - when RegexpLiteral - s(:match_current_line, [visit(node.predicate)], nil) - when Unary - if node.predicate.operator.value == "!" && - node.predicate.statement.is_a?(RegexpLiteral) + # Visit a Unary node. + def visit_unary(node) + # Special handling here for flipflops + if node.statement.is_a?(Paren) && + node.statement.contents.is_a?(Statements) && + node.statement.contents.body.length == 1 && + (range = node.statement.contents.body.first).is_a?(RangeNode) && + node.operator == "!" + type = range.operator.value == ".." ? :iflipflop : :eflipflop + return( s( :send, - [s(:match_current_line, [visit(node.predicate.statement)]), :!], + [s(:begin, [s(type, visit(range).children, nil)], nil), :!], nil ) - else - visit(node.predicate) - end - else - visit(node.predicate) - end - - s( - :if, - [predicate, visit(node.consequent), visit(node.statements)], - if node.modifier? - smap_keyword_bare( - srange_find_between(node.statements, node.predicate, "unless"), - srange_node(node) - ) - else - smap_condition( - srange_length(node.start_char, 6), - srange_search_between(node.predicate, node.statements, "then"), - nil, - srange_length(node.end_char, -3), - srange_node(node) ) end - ) - end - # Visit an UntilNode node. - def visit_until(node) - s( - loop_post?(node) ? :until_post : :until, - [visit(node.predicate), visit(node.statements)], - if node.modifier? + visit(canonical_unary(node)) + end + + # Visit an Undef node. + def visit_undef(node) + s( + :undef, + visit_all(node.symbols), smap_keyword_bare( - srange_find_between(node.statements, node.predicate, "until"), - srange_node(node) - ) - else - smap_keyword( srange_length(node.start_char, 5), - srange_search_between(node.predicate, node.statements, "do") || - srange_search_between(node.predicate, node.statements, ";"), - srange_length(node.end_char, -3), srange_node(node) ) - end - ) - end + ) + end - # Visit a VarField node. - def visit_var_field(node) - name = node.value.value.to_sym - match_var = - [stack[-3], stack[-2]].any? do |parent| - case parent - when AryPtn, FndPtn, HshPtn, In, RAssign - true - when Binary - parent.operator == :"=>" + # Visit an UnlessNode node. + def visit_unless(node) + predicate = + case node.predicate + when RegexpLiteral + s(:match_current_line, [visit(node.predicate)], nil) + when Unary + if node.predicate.operator.value == "!" && + node.predicate.statement.is_a?(RegexpLiteral) + s( + :send, + [ + s(:match_current_line, [visit(node.predicate.statement)]), + :! + ], + nil + ) + else + visit(node.predicate) + end else - false + visit(node.predicate) end - end - if match_var s( - :match_var, - [name], - smap_variable(srange_node(node.value), srange_node(node.value)) + :if, + [predicate, visit(node.consequent), visit(node.statements)], + if node.modifier? + smap_keyword_bare( + srange_find_between(node.statements, node.predicate, "unless"), + srange_node(node) + ) + else + smap_condition( + srange_length(node.start_char, 6), + srange_search_between(node.predicate, node.statements, "then"), + nil, + srange_length(node.end_char, -3), + srange_node(node) + ) + end ) - elsif node.value.is_a?(Const) + end + + # Visit an UntilNode node. + def visit_until(node) s( - :casgn, - [nil, name], - smap_constant(nil, srange_node(node.value), srange_node(node)) + loop_post?(node) ? :until_post : :until, + [visit(node.predicate), visit(node.statements)], + if node.modifier? + smap_keyword_bare( + srange_find_between(node.statements, node.predicate, "until"), + srange_node(node) + ) + else + smap_keyword( + srange_length(node.start_char, 5), + srange_search_between(node.predicate, node.statements, "do") || + srange_search_between(node.predicate, node.statements, ";"), + srange_length(node.end_char, -3), + srange_node(node) + ) + end ) - else - location = smap_variable(srange_node(node), srange_node(node)) + end - case node.value - when CVar - s(:cvasgn, [name], location) - when GVar - s(:gvasgn, [name], location) - when Ident - s(:lvasgn, [name], location) - when IVar - s(:ivasgn, [name], location) - when VarRef - s(:lvasgn, [name], location) + # Visit a VarField node. + def visit_var_field(node) + name = node.value.value.to_sym + match_var = + [stack[-3], stack[-2]].any? do |parent| + case parent + when AryPtn, FndPtn, HshPtn, In, RAssign + true + when Binary + parent.operator == :"=>" + else + false + end + end + + if match_var + s( + :match_var, + [name], + smap_variable(srange_node(node.value), srange_node(node.value)) + ) + elsif node.value.is_a?(Const) + s( + :casgn, + [nil, name], + smap_constant(nil, srange_node(node.value), srange_node(node)) + ) else - s(:match_rest, [], nil) + location = smap_variable(srange_node(node), srange_node(node)) + + case node.value + when CVar + s(:cvasgn, [name], location) + when GVar + s(:gvasgn, [name], location) + when Ident + s(:lvasgn, [name], location) + when IVar + s(:ivasgn, [name], location) + when VarRef + s(:lvasgn, [name], location) + else + s(:match_rest, [], nil) + end end end - end - # Visit a VarRef node. - def visit_var_ref(node) - visit(node.value) - end + # Visit a VarRef node. + def visit_var_ref(node) + visit(node.value) + end - # Visit a VCall node. - def visit_vcall(node) - visit_command_call( - CommandCall.new( - receiver: nil, - operator: nil, - message: node.value, - arguments: nil, - block: nil, - location: node.location + # Visit a VCall node. + def visit_vcall(node) + visit_command_call( + CommandCall.new( + receiver: nil, + operator: nil, + message: node.value, + arguments: nil, + block: nil, + location: node.location + ) ) - ) - end - - # Visit a When node. - def visit_when(node) - keyword = srange_length(node.start_char, 4) - begin_token = - if buffer.source[node.statements.start_char] == ";" - srange_length(node.statements.start_char, 1) - end + end - end_char = - if node.statements.body.empty? - node.statements.end_char - else - node.statements.body.last.end_char - end + # Visit a When node. + def visit_when(node) + keyword = srange_length(node.start_char, 4) + begin_token = + if buffer.source[node.statements.start_char] == ";" + srange_length(node.statements.start_char, 1) + end - s( - :when, - visit_all(node.arguments.parts) + [visit(node.statements)], - smap_keyword( - keyword, - begin_token, - nil, - srange(keyword.begin_pos, end_char) - ) - ) - end + end_char = + if node.statements.body.empty? + node.statements.end_char + else + node.statements.body.last.end_char + end - # Visit a WhileNode node. - def visit_while(node) - s( - loop_post?(node) ? :while_post : :while, - [visit(node.predicate), visit(node.statements)], - if node.modifier? - smap_keyword_bare( - srange_find_between(node.statements, node.predicate, "while"), - srange_node(node) - ) - else + s( + :when, + visit_all(node.arguments.parts) + [visit(node.statements)], smap_keyword( - srange_length(node.start_char, 5), - srange_search_between(node.predicate, node.statements, "do") || - srange_search_between(node.predicate, node.statements, ";"), - srange_length(node.end_char, -3), - srange_node(node) + keyword, + begin_token, + nil, + srange(keyword.begin_pos, end_char) ) - end - ) - end - - # Visit a Word node. - def visit_word(node) - visit_string_literal( - StringLiteral.new( - parts: node.parts, - quote: nil, - location: node.location ) - ) - end + end - # Visit a Words node. - def visit_words(node) - visit_array( - ArrayLiteral.new( - lbracket: node.beginning, - contents: Args.new(parts: node.elements, location: node.location), - location: node.location + # Visit a WhileNode node. + def visit_while(node) + s( + loop_post?(node) ? :while_post : :while, + [visit(node.predicate), visit(node.statements)], + if node.modifier? + smap_keyword_bare( + srange_find_between(node.statements, node.predicate, "while"), + srange_node(node) + ) + else + smap_keyword( + srange_length(node.start_char, 5), + srange_search_between(node.predicate, node.statements, "do") || + srange_search_between(node.predicate, node.statements, ";"), + srange_length(node.end_char, -3), + srange_node(node) + ) + end ) - ) - end + end - # Visit an XStringLiteral node. - def visit_xstring_literal(node) - s( - :xstr, - visit_all(node.parts), - smap_collection( - srange_length( - node.start_char, - buffer.source[node.start_char] == "%" ? 3 : 1 - ), - srange_length(node.end_char, -1), - srange_node(node) + # Visit a Word node. + def visit_word(node) + visit_string_literal( + StringLiteral.new( + parts: node.parts, + quote: nil, + location: node.location + ) ) - ) - end + end - def visit_yield(node) - case node.arguments - when nil - s( - :yield, - [], - smap_keyword_bare( - srange_length(node.start_char, 5), - srange_node(node) + # Visit a Words node. + def visit_words(node) + visit_array( + ArrayLiteral.new( + lbracket: node.beginning, + contents: Args.new(parts: node.elements, location: node.location), + location: node.location ) ) - when Args + end + + # Visit an XStringLiteral node. + def visit_xstring_literal(node) s( - :yield, - visit_all(node.arguments.parts), - smap_keyword_bare( - srange_length(node.start_char, 5), + :xstr, + visit_all(node.parts), + smap_collection( + srange_length( + node.start_char, + buffer.source[node.start_char] == "%" ? 3 : 1 + ), + srange_length(node.end_char, -1), srange_node(node) ) ) - else + end + + def visit_yield(node) + case node.arguments + when nil + s( + :yield, + [], + smap_keyword_bare( + srange_length(node.start_char, 5), + srange_node(node) + ) + ) + when Args + s( + :yield, + visit_all(node.arguments.parts), + smap_keyword_bare( + srange_length(node.start_char, 5), + srange_node(node) + ) + ) + else + s( + :yield, + visit_all(node.arguments.contents.parts), + smap_keyword( + srange_length(node.start_char, 5), + srange_length(node.arguments.start_char, 1), + srange_length(node.end_char, -1), + srange_node(node) + ) + ) + end + end + + # Visit a ZSuper node. + def visit_zsuper(node) s( - :yield, - visit_all(node.arguments.contents.parts), - smap_keyword( + :zsuper, + [], + smap_keyword_bare( srange_length(node.start_char, 5), - srange_length(node.arguments.start_char, 1), - srange_length(node.end_char, -1), srange_node(node) ) ) end end - # Visit a ZSuper node. - def visit_zsuper(node) - s( - :zsuper, - [], - smap_keyword_bare( - srange_length(node.start_char, 5), - srange_node(node) - ) - ) - end - private def block_children(node) diff --git a/lib/syntax_tree/with_environment.rb b/lib/syntax_tree/with_environment.rb index 13f5e080..da300dc0 100644 --- a/lib/syntax_tree/with_environment.rb +++ b/lib/syntax_tree/with_environment.rb @@ -121,9 +121,9 @@ def visit_module(node) with_new_environment { super } end - # When we find a method invocation with a block, only the code that happens - # inside of the block needs a fresh environment. The method invocation - # itself happens in the same environment. + # When we find a method invocation with a block, only the code that + # happens inside of the block needs a fresh environment. The method + # invocation itself happens in the same environment. def visit_method_add_block(node) visit(node.call) with_new_environment { visit(node.block) } diff --git a/lib/syntax_tree/yarv/compiler.rb b/lib/syntax_tree/yarv/compiler.rb index a8044faf..bd20bc19 100644 --- a/lib/syntax_tree/yarv/compiler.rb +++ b/lib/syntax_tree/yarv/compiler.rb @@ -124,76 +124,122 @@ def self.compile(node) rescue CompilationError end - def visit_array(node) - node.contents ? visit_all(node.contents.parts) : [] - end + visit_methods do + def visit_array(node) + node.contents ? visit_all(node.contents.parts) : [] + end - def visit_bare_assoc_hash(node) - node.assocs.to_h do |assoc| - # We can only convert regular key-value pairs. A double splat ** - # operator means it has to be converted at run-time. - raise CompilationError unless assoc.is_a?(Assoc) - [visit(assoc.key), visit(assoc.value)] + def visit_bare_assoc_hash(node) + node.assocs.to_h do |assoc| + # We can only convert regular key-value pairs. A double splat ** + # operator means it has to be converted at run-time. + raise CompilationError unless assoc.is_a?(Assoc) + [visit(assoc.key), visit(assoc.value)] + end end - end - def visit_float(node) - node.value.to_f - end + def visit_float(node) + node.value.to_f + end - alias visit_hash visit_bare_assoc_hash + alias visit_hash visit_bare_assoc_hash - def visit_imaginary(node) - node.value.to_c - end + def visit_imaginary(node) + node.value.to_c + end - def visit_int(node) - case (value = node.value) - when /^0b/ - value[2..].to_i(2) - when /^0o/ - value[2..].to_i(8) - when /^0d/ - value[2..].to_i - when /^0x/ - value[2..].to_i(16) - else - value.to_i + def visit_int(node) + case (value = node.value) + when /^0b/ + value[2..].to_i(2) + when /^0o/ + value[2..].to_i(8) + when /^0d/ + value[2..].to_i + when /^0x/ + value[2..].to_i(16) + else + value.to_i + end end - end - def visit_label(node) - node.value.chomp(":").to_sym - end + def visit_label(node) + node.value.chomp(":").to_sym + end - def visit_mrhs(node) - visit_all(node.parts) - end + def visit_mrhs(node) + visit_all(node.parts) + end - def visit_qsymbols(node) - node.elements.map { |element| visit(element).to_sym } - end + def visit_qsymbols(node) + node.elements.map { |element| visit(element).to_sym } + end - def visit_qwords(node) - visit_all(node.elements) - end + def visit_qwords(node) + visit_all(node.elements) + end - def visit_range(node) - left, right = [visit(node.left), visit(node.right)] - node.operator.value === ".." ? left..right : left...right - end + def visit_range(node) + left, right = [visit(node.left), visit(node.right)] + node.operator.value === ".." ? left..right : left...right + end - def visit_rational(node) - node.value.to_r - end + def visit_rational(node) + node.value.to_r + end - def visit_regexp_literal(node) - if node.parts.length == 1 && node.parts.first.is_a?(TStringContent) - Regexp.new(node.parts.first.value, visit_regexp_literal_flags(node)) - else - # Any interpolation of expressions or variables will result in the - # regular expression being constructed at run-time. - raise CompilationError + def visit_regexp_literal(node) + if node.parts.length == 1 && node.parts.first.is_a?(TStringContent) + Regexp.new( + node.parts.first.value, + visit_regexp_literal_flags(node) + ) + else + # Any interpolation of expressions or variables will result in the + # regular expression being constructed at run-time. + raise CompilationError + end + end + + def visit_symbol_literal(node) + node.value.value.to_sym + end + + def visit_symbols(node) + node.elements.map { |element| visit(element).to_sym } + end + + def visit_tstring_content(node) + node.value + end + + def visit_var_ref(node) + raise CompilationError unless node.value.is_a?(Kw) + + case node.value.value + when "nil" + nil + when "true" + true + when "false" + false + else + raise CompilationError + end + end + + def visit_word(node) + if node.parts.length == 1 && node.parts.first.is_a?(TStringContent) + node.parts.first.value + else + # Any interpolation of expressions or variables will result in the + # string being constructed at run-time. + raise CompilationError + end + end + + def visit_words(node) + visit_all(node.elements) end end @@ -219,47 +265,6 @@ def visit_regexp_literal_flags(node) end end - def visit_symbol_literal(node) - node.value.value.to_sym - end - - def visit_symbols(node) - node.elements.map { |element| visit(element).to_sym } - end - - def visit_tstring_content(node) - node.value - end - - def visit_var_ref(node) - raise CompilationError unless node.value.is_a?(Kw) - - case node.value.value - when "nil" - nil - when "true" - true - when "false" - false - else - raise CompilationError - end - end - - def visit_word(node) - if node.parts.length == 1 && node.parts.first.is_a?(TStringContent) - node.parts.first.value - else - # Any interpolation of expressions or variables will result in the - # string being constructed at run-time. - raise CompilationError - end - end - - def visit_words(node) - visit_all(node.elements) - end - def visit_unsupported(_node) raise CompilationError end diff --git a/test/visitor_test.rb b/test/visitor_test.rb index 86ff1b01..d9637df0 100644 --- a/test/visitor_test.rb +++ b/test/visitor_test.rb @@ -30,13 +30,15 @@ def initialize @visited_nodes = [] end - visit_method def visit_class(node) - @visited_nodes << node.constant.constant.value - super - end + visit_methods do + def visit_class(node) + @visited_nodes << node.constant.constant.value + super + end - visit_method def visit_def(node) - @visited_nodes << node.name.value + def visit_def(node) + @visited_nodes << node.name.value + end end end diff --git a/test/visitor_with_environment_test.rb b/test/visitor_with_environment_test.rb index cc4007fe..278ae361 100644 --- a/test/visitor_with_environment_test.rb +++ b/test/visitor_with_environment_test.rb @@ -14,26 +14,28 @@ def initialize @arguments = {} end - def visit_ident(node) - local = current_environment.find_local(node.value) - return unless local - - value = node.value.delete_suffix(":") - - case local.type - when :argument - @arguments[value] = local - when :variable - @variables[value] = local + visit_methods do + def visit_ident(node) + local = current_environment.find_local(node.value) + return unless local + + value = node.value.delete_suffix(":") + + case local.type + when :argument + @arguments[value] = local + when :variable + @variables[value] = local + end end - end - def visit_label(node) - value = node.value.delete_suffix(":") - local = current_environment.find_local(value) - return unless local + def visit_label(node) + value = node.value.delete_suffix(":") + local = current_environment.find_local(value) + return unless local - @arguments[value] = node if local.type == :argument + @arguments[value] = node if local.type == :argument + end end end @@ -625,13 +627,15 @@ def initialize @locals = [] end - def visit_assign(node) - level = 0 - environment = current_environment - level += 1 until (environment = environment.parent).nil? + visit_methods do + def visit_assign(node) + level = 0 + environment = current_environment + level += 1 until (environment = environment.parent).nil? - locals << [node.target.value.value, level] - super + locals << [node.target.value.value, level] + super + end end end From 174cc6bae01dc6825858906fa46a9f3213608c24 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Fri, 10 Feb 2023 12:50:59 -0500 Subject: [PATCH 53/58] Make environment break at boundaries --- lib/syntax_tree/with_environment.rb | 56 ++- test/visitor_with_environment_test.rb | 663 -------------------------- test/with_environment_test.rb | 457 ++++++++++++++++++ 3 files changed, 499 insertions(+), 677 deletions(-) delete mode 100644 test/visitor_with_environment_test.rb create mode 100644 test/with_environment_test.rb diff --git a/lib/syntax_tree/with_environment.rb b/lib/syntax_tree/with_environment.rb index da300dc0..3a6f04b9 100644 --- a/lib/syntax_tree/with_environment.rb +++ b/lib/syntax_tree/with_environment.rb @@ -55,14 +55,18 @@ def add_usage(location) end end - # [Array[Local]] The local variables and arguments defined in this + # [Integer] a unique identifier for this environment + attr_reader :id + + # [Hash[String, Local]] The local variables and arguments defined in this # environment attr_reader :locals # [Environment | nil] The parent environment attr_reader :parent - def initialize(parent = nil) + def initialize(id, parent = nil) + @id = id @locals = {} @parent = parent end @@ -74,8 +78,14 @@ def initialize(parent = nil) def add_local_definition(identifier, type) name = identifier.value.delete_suffix(":") - @locals[name] ||= Local.new(type) - @locals[name].add_definition(identifier.location) + local = + if type == :argument + locals[name] ||= Local.new(type) + else + resolve_local(name, type) + end + + local.add_definition(identifier.location) end # Adding a local usage will either insert a new entry in the locals @@ -84,28 +94,42 @@ def add_local_definition(identifier, type) # registered. def add_local_usage(identifier, type) name = identifier.value.delete_suffix(":") - - @locals[name] ||= Local.new(type) - @locals[name].add_usage(identifier.location) + resolve_local(name, type).add_usage(identifier.location) end # Try to find the local given its name in this environment or any of its # parents. def find_local(name) - local = @locals[name] - return local unless local.nil? + locals[name] || parent&.find_local(name) + end - @parent&.find_local(name) + private + + def resolve_local(name, type) + local = find_local(name) + + unless local + local = Local.new(type) + locals[name] = local + end + + local end end + def initialize(*args, **kwargs, &block) + super + @environment_id = 0 + end + def current_environment - @current_environment ||= Environment.new + @current_environment ||= Environment.new(next_environment_id) end - def with_new_environment + def with_new_environment(parent_environment = nil) previous_environment = @current_environment - @current_environment = Environment.new(previous_environment) + @current_environment = + Environment.new(next_environment_id, parent_environment) yield ensure @current_environment = previous_environment @@ -126,7 +150,7 @@ def visit_module(node) # invocation itself happens in the same environment. def visit_method_add_block(node) visit(node.call) - with_new_environment { visit(node.block) } + with_new_environment(current_environment) { visit(node.block) } end def visit_def(node) @@ -213,5 +237,9 @@ def add_argument_definitions(list) end end end + + def next_environment_id + @environment_id += 1 + end end end diff --git a/test/visitor_with_environment_test.rb b/test/visitor_with_environment_test.rb deleted file mode 100644 index 278ae361..00000000 --- a/test/visitor_with_environment_test.rb +++ /dev/null @@ -1,663 +0,0 @@ -# frozen_string_literal: true - -require_relative "test_helper" - -module SyntaxTree - class VisitorWithEnvironmentTest < Minitest::Test - class Collector < Visitor - include WithEnvironment - - attr_reader :variables, :arguments - - def initialize - @variables = {} - @arguments = {} - end - - visit_methods do - def visit_ident(node) - local = current_environment.find_local(node.value) - return unless local - - value = node.value.delete_suffix(":") - - case local.type - when :argument - @arguments[value] = local - when :variable - @variables[value] = local - end - end - - def visit_label(node) - value = node.value.delete_suffix(":") - local = current_environment.find_local(value) - return unless local - - @arguments[value] = node if local.type == :argument - end - end - end - - def test_collecting_simple_variables - tree = SyntaxTree.parse(<<~RUBY) - def foo - a = 1 - a - end - RUBY - - visitor = Collector.new - visitor.visit(tree) - - assert_equal(1, visitor.variables.length) - - variable = visitor.variables["a"] - assert_equal(1, variable.definitions.length) - assert_equal(1, variable.usages.length) - - assert_equal(2, variable.definitions[0].start_line) - assert_equal(3, variable.usages[0].start_line) - end - - def test_collecting_aref_variables - tree = SyntaxTree.parse(<<~RUBY) - def foo - a = [] - a[1] - end - RUBY - - visitor = Collector.new - visitor.visit(tree) - - assert_equal(1, visitor.variables.length) - - variable = visitor.variables["a"] - assert_equal(1, variable.definitions.length) - assert_equal(1, variable.usages.length) - - assert_equal(2, variable.definitions[0].start_line) - assert_equal(3, variable.usages[0].start_line) - end - - def test_collecting_multi_assign_variables - tree = SyntaxTree.parse(<<~RUBY) - def foo - a, b = [1, 2] - puts a - puts b - end - RUBY - - visitor = Collector.new - visitor.visit(tree) - - assert_equal(2, visitor.variables.length) - - variable_a = visitor.variables["a"] - assert_equal(1, variable_a.definitions.length) - assert_equal(1, variable_a.usages.length) - - assert_equal(2, variable_a.definitions[0].start_line) - assert_equal(3, variable_a.usages[0].start_line) - - variable_b = visitor.variables["b"] - assert_equal(1, variable_b.definitions.length) - assert_equal(1, variable_b.usages.length) - - assert_equal(2, variable_b.definitions[0].start_line) - assert_equal(4, variable_b.usages[0].start_line) - end - - def test_collecting_pattern_matching_variables - tree = SyntaxTree.parse(<<~RUBY) - def foo - case [1, 2] - in Integer => a, Integer - puts a - end - end - RUBY - - visitor = Collector.new - visitor.visit(tree) - - # There are two occurrences, one on line 3 for pinning and one on line 4 - # for reference - assert_equal(1, visitor.variables.length) - - variable = visitor.variables["a"] - - # Assignment a - assert_equal(3, variable.definitions[0].start_line) - assert_equal(4, variable.usages[0].start_line) - end - - def test_collecting_pinned_variables - tree = SyntaxTree.parse(<<~RUBY) - def foo - a = 18 - case [1, 2] - in ^a, *rest - puts a - puts rest - end - end - RUBY - - visitor = Collector.new - visitor.visit(tree) - - assert_equal(2, visitor.variables.length) - - variable_a = visitor.variables["a"] - assert_equal(2, variable_a.definitions.length) - assert_equal(1, variable_a.usages.length) - - assert_equal(2, variable_a.definitions[0].start_line) - assert_equal(4, variable_a.definitions[1].start_line) - assert_equal(5, variable_a.usages[0].start_line) - - variable_rest = visitor.variables["rest"] - assert_equal(1, variable_rest.definitions.length) - assert_equal(4, variable_rest.definitions[0].start_line) - - # Rest is considered a vcall by the parser instead of a var_ref - # assert_equal(1, variable_rest.usages.length) - # assert_equal(6, variable_rest.usages[0].start_line) - end - - if RUBY_VERSION >= "3.1" - def test_collecting_one_line_pattern_matching_variables - tree = SyntaxTree.parse(<<~RUBY) - def foo - [1] => a - puts a - end - RUBY - - visitor = Collector.new - visitor.visit(tree) - - assert_equal(1, visitor.variables.length) - - variable = visitor.variables["a"] - assert_equal(1, variable.definitions.length) - assert_equal(1, variable.usages.length) - - assert_equal(2, variable.definitions[0].start_line) - assert_equal(3, variable.usages[0].start_line) - end - - def test_collecting_endless_method_arguments - tree = SyntaxTree.parse(<<~RUBY) - def foo(a) = puts a - RUBY - - visitor = Collector.new - visitor.visit(tree) - - assert_equal(1, visitor.arguments.length) - - argument = visitor.arguments["a"] - assert_equal(1, argument.definitions.length) - assert_equal(1, argument.usages.length) - - assert_equal(1, argument.definitions[0].start_line) - assert_equal(1, argument.usages[0].start_line) - end - end - - def test_collecting_method_arguments - tree = SyntaxTree.parse(<<~RUBY) - def foo(a) - puts a - end - RUBY - - visitor = Collector.new - visitor.visit(tree) - - assert_equal(1, visitor.arguments.length) - - argument = visitor.arguments["a"] - assert_equal(1, argument.definitions.length) - assert_equal(1, argument.usages.length) - - assert_equal(1, argument.definitions[0].start_line) - assert_equal(2, argument.usages[0].start_line) - end - - def test_collecting_singleton_method_arguments - tree = SyntaxTree.parse(<<~RUBY) - def self.foo(a) - puts a - end - RUBY - - visitor = Collector.new - visitor.visit(tree) - - assert_equal(1, visitor.arguments.length) - - argument = visitor.arguments["a"] - assert_equal(1, argument.definitions.length) - assert_equal(1, argument.usages.length) - - assert_equal(1, argument.definitions[0].start_line) - assert_equal(2, argument.usages[0].start_line) - end - - def test_collecting_method_arguments_all_types - tree = SyntaxTree.parse(<<~RUBY) - def foo(a, b = 1, *c, d, e: 1, **f, &block) - puts a - puts b - puts c - puts d - puts e - puts f - block.call - end - RUBY - - visitor = Collector.new - visitor.visit(tree) - - assert_equal(7, visitor.arguments.length) - - argument_a = visitor.arguments["a"] - assert_equal(1, argument_a.definitions.length) - assert_equal(1, argument_a.usages.length) - assert_equal(1, argument_a.definitions[0].start_line) - assert_equal(2, argument_a.usages[0].start_line) - - argument_b = visitor.arguments["b"] - assert_equal(1, argument_b.definitions.length) - assert_equal(1, argument_b.usages.length) - assert_equal(1, argument_b.definitions[0].start_line) - assert_equal(3, argument_b.usages[0].start_line) - - argument_c = visitor.arguments["c"] - assert_equal(1, argument_c.definitions.length) - assert_equal(1, argument_c.usages.length) - assert_equal(1, argument_c.definitions[0].start_line) - assert_equal(4, argument_c.usages[0].start_line) - - argument_d = visitor.arguments["d"] - assert_equal(1, argument_d.definitions.length) - assert_equal(1, argument_d.usages.length) - assert_equal(1, argument_d.definitions[0].start_line) - assert_equal(5, argument_d.usages[0].start_line) - - argument_e = visitor.arguments["e"] - assert_equal(1, argument_e.definitions.length) - assert_equal(1, argument_e.usages.length) - assert_equal(1, argument_e.definitions[0].start_line) - assert_equal(6, argument_e.usages[0].start_line) - - argument_f = visitor.arguments["f"] - assert_equal(1, argument_f.definitions.length) - assert_equal(1, argument_f.usages.length) - assert_equal(1, argument_f.definitions[0].start_line) - assert_equal(7, argument_f.usages[0].start_line) - - argument_block = visitor.arguments["block"] - assert_equal(1, argument_block.definitions.length) - assert_equal(1, argument_block.usages.length) - assert_equal(1, argument_block.definitions[0].start_line) - assert_equal(8, argument_block.usages[0].start_line) - end - - def test_collecting_block_arguments - tree = SyntaxTree.parse(<<~RUBY) - def foo - [].each do |i| - puts i - end - end - RUBY - - visitor = Collector.new - visitor.visit(tree) - - assert_equal(1, visitor.arguments.length) - - argument = visitor.arguments["i"] - assert_equal(1, argument.definitions.length) - assert_equal(1, argument.usages.length) - assert_equal(2, argument.definitions[0].start_line) - assert_equal(3, argument.usages[0].start_line) - end - - def test_collecting_one_line_block_arguments - tree = SyntaxTree.parse(<<~RUBY) - def foo - [].each { |i| puts i } - end - RUBY - - visitor = Collector.new - visitor.visit(tree) - - assert_equal(1, visitor.arguments.length) - - argument = visitor.arguments["i"] - assert_equal(1, argument.definitions.length) - assert_equal(1, argument.usages.length) - assert_equal(2, argument.definitions[0].start_line) - assert_equal(2, argument.usages[0].start_line) - end - - def test_collecting_shadowed_block_arguments - tree = SyntaxTree.parse(<<~RUBY) - def foo - i = "something" - - [].each do |i| - puts i - end - - i - end - RUBY - - visitor = Collector.new - visitor.visit(tree) - - assert_equal(1, visitor.arguments.length) - assert_equal(1, visitor.variables.length) - - argument = visitor.arguments["i"] - assert_equal(1, argument.definitions.length) - assert_equal(1, argument.usages.length) - assert_equal(4, argument.definitions[0].start_line) - assert_equal(5, argument.usages[0].start_line) - - variable = visitor.variables["i"] - assert_equal(1, variable.definitions.length) - assert_equal(1, variable.usages.length) - assert_equal(2, variable.definitions[0].start_line) - assert_equal(8, variable.usages[0].start_line) - end - - def test_collecting_shadowed_local_variables - tree = SyntaxTree.parse(<<~RUBY) - def foo(a) - puts a - a = 123 - a - end - RUBY - - visitor = Collector.new - visitor.visit(tree) - - # All occurrences are considered arguments, despite overriding the - # argument value - assert_equal(1, visitor.arguments.length) - assert_equal(0, visitor.variables.length) - - argument = visitor.arguments["a"] - assert_equal(2, argument.definitions.length) - assert_equal(2, argument.usages.length) - - assert_equal(1, argument.definitions[0].start_line) - assert_equal(3, argument.definitions[1].start_line) - assert_equal(2, argument.usages[0].start_line) - assert_equal(4, argument.usages[1].start_line) - end - - def test_variables_in_the_top_level - tree = SyntaxTree.parse(<<~RUBY) - a = 123 - a - RUBY - - visitor = Collector.new - visitor.visit(tree) - - assert_equal(0, visitor.arguments.length) - assert_equal(1, visitor.variables.length) - - variable = visitor.variables["a"] - assert_equal(1, variable.definitions.length) - assert_equal(1, variable.usages.length) - - assert_equal(1, variable.definitions[0].start_line) - assert_equal(2, variable.usages[0].start_line) - end - - def test_aref_field - tree = SyntaxTree.parse(<<~RUBY) - object = {} - object["name"] = "something" - RUBY - - visitor = Collector.new - visitor.visit(tree) - - assert_equal(0, visitor.arguments.length) - assert_equal(1, visitor.variables.length) - - variable = visitor.variables["object"] - assert_equal(1, variable.definitions.length) - assert_equal(1, variable.usages.length) - - assert_equal(1, variable.definitions[0].start_line) - assert_equal(2, variable.usages[0].start_line) - end - - def test_aref_on_a_method_call - tree = SyntaxTree.parse(<<~RUBY) - object = MyObject.new - object.attributes["name"] = "something" - RUBY - - visitor = Collector.new - visitor.visit(tree) - - assert_equal(0, visitor.arguments.length) - assert_equal(1, visitor.variables.length) - - variable = visitor.variables["object"] - assert_equal(1, variable.definitions.length) - assert_equal(1, variable.usages.length) - - assert_equal(1, variable.definitions[0].start_line) - assert_equal(2, variable.usages[0].start_line) - end - - def test_aref_with_two_accesses - tree = SyntaxTree.parse(<<~RUBY) - object = MyObject.new - object["first"]["second"] ||= [] - RUBY - - visitor = Collector.new - visitor.visit(tree) - - assert_equal(0, visitor.arguments.length) - assert_equal(1, visitor.variables.length) - - variable = visitor.variables["object"] - assert_equal(1, variable.definitions.length) - assert_equal(1, variable.usages.length) - - assert_equal(1, variable.definitions[0].start_line) - assert_equal(2, variable.usages[0].start_line) - end - - def test_aref_on_a_method_call_with_arguments - tree = SyntaxTree.parse(<<~RUBY) - object = MyObject.new - object.instance_variable_get(:@attributes)[:something] = :other_thing - RUBY - - visitor = Collector.new - visitor.visit(tree) - - assert_equal(0, visitor.arguments.length) - assert_equal(1, visitor.variables.length) - - variable = visitor.variables["object"] - assert_equal(1, variable.definitions.length) - assert_equal(1, variable.usages.length) - - assert_equal(1, variable.definitions[0].start_line) - assert_equal(2, variable.usages[0].start_line) - end - - def test_double_aref_on_method_call - tree = SyntaxTree.parse(<<~RUBY) - object = MyObject.new - object["attributes"].find { |a| a["field"] == "expected" }["value"] = "changed" - RUBY - - visitor = Collector.new - visitor.visit(tree) - - assert_equal(1, visitor.arguments.length) - assert_equal(1, visitor.variables.length) - - variable = visitor.variables["object"] - assert_equal(1, variable.definitions.length) - assert_equal(1, variable.usages.length) - - assert_equal(1, variable.definitions[0].start_line) - assert_equal(2, variable.usages[0].start_line) - - argument = visitor.arguments["a"] - assert_equal(1, argument.definitions.length) - assert_equal(1, argument.usages.length) - - assert_equal(2, argument.definitions[0].start_line) - assert_equal(2, argument.usages[0].start_line) - end - - def test_nested_arguments - tree = SyntaxTree.parse(<<~RUBY) - [[1, [2, 3]]].each do |one, (two, three)| - one - two - three - end - RUBY - - visitor = Collector.new - visitor.visit(tree) - - assert_equal(3, visitor.arguments.length) - assert_equal(0, visitor.variables.length) - - argument = visitor.arguments["one"] - assert_equal(1, argument.definitions.length) - assert_equal(1, argument.usages.length) - - assert_equal(1, argument.definitions[0].start_line) - assert_equal(2, argument.usages[0].start_line) - - argument = visitor.arguments["two"] - assert_equal(1, argument.definitions.length) - assert_equal(1, argument.usages.length) - - assert_equal(1, argument.definitions[0].start_line) - assert_equal(3, argument.usages[0].start_line) - - argument = visitor.arguments["three"] - assert_equal(1, argument.definitions.length) - assert_equal(1, argument.usages.length) - - assert_equal(1, argument.definitions[0].start_line) - assert_equal(4, argument.usages[0].start_line) - end - - def test_double_nested_arguments - tree = SyntaxTree.parse(<<~RUBY) - [[1, [2, 3]]].each do |one, (two, (three, four))| - one - two - three - four - end - RUBY - - visitor = Collector.new - visitor.visit(tree) - - assert_equal(4, visitor.arguments.length) - assert_equal(0, visitor.variables.length) - - argument = visitor.arguments["one"] - assert_equal(1, argument.definitions.length) - assert_equal(1, argument.usages.length) - - assert_equal(1, argument.definitions[0].start_line) - assert_equal(2, argument.usages[0].start_line) - - argument = visitor.arguments["two"] - assert_equal(1, argument.definitions.length) - assert_equal(1, argument.usages.length) - - assert_equal(1, argument.definitions[0].start_line) - assert_equal(3, argument.usages[0].start_line) - - argument = visitor.arguments["three"] - assert_equal(1, argument.definitions.length) - assert_equal(1, argument.usages.length) - - assert_equal(1, argument.definitions[0].start_line) - assert_equal(4, argument.usages[0].start_line) - - argument = visitor.arguments["four"] - assert_equal(1, argument.definitions.length) - assert_equal(1, argument.usages.length) - - assert_equal(1, argument.definitions[0].start_line) - assert_equal(5, argument.usages[0].start_line) - end - - class Resolver < Visitor - include WithEnvironment - - attr_reader :locals - - def initialize - @locals = [] - end - - visit_methods do - def visit_assign(node) - level = 0 - environment = current_environment - level += 1 until (environment = environment.parent).nil? - - locals << [node.target.value.value, level] - super - end - end - end - - def test_class - source = <<~RUBY - module Level0 - level0 = 0 - - module Level1 - level1 = 1 - - class Level2 - level2 = 2 - end - end - end - RUBY - - visitor = Resolver.new - SyntaxTree.parse(source).accept(visitor) - - assert_equal [["level0", 0], ["level1", 1], ["level2", 2]], visitor.locals - end - end -end diff --git a/test/with_environment_test.rb b/test/with_environment_test.rb new file mode 100644 index 00000000..b6f79c14 --- /dev/null +++ b/test/with_environment_test.rb @@ -0,0 +1,457 @@ +# frozen_string_literal: true + +require_relative "test_helper" + +module SyntaxTree + class WithEnvironmentTest < Minitest::Test + class Collector < Visitor + prepend WithEnvironment + + attr_reader :arguments, :variables + + def initialize + @arguments = {} + @variables = {} + end + + def self.collect(source) + new.tap { SyntaxTree.parse(source).accept(_1) } + end + + visit_methods do + def visit_ident(node) + value = node.value.delete_suffix(":") + local = current_environment.find_local(node.value) + + case local&.type + when :argument + arguments[[current_environment.id, value]] = local + when :variable + variables[[current_environment.id, value]] = local + end + end + + def visit_label(node) + value = node.value.delete_suffix(":") + local = current_environment.find_local(value) + + if local&.type == :argument + arguments[[current_environment.id, value]] = node + end + end + end + end + + def test_collecting_simple_variables + collector = Collector.collect(<<~RUBY) + def foo + a = 1 + a + end + RUBY + + assert_equal(1, collector.variables.length) + assert_variable(collector, "a", definitions: [2], usages: [3]) + end + + def test_collecting_aref_variables + collector = Collector.collect(<<~RUBY) + def foo + a = [] + a[1] + end + RUBY + + assert_equal(1, collector.variables.length) + assert_variable(collector, "a", definitions: [2], usages: [3]) + end + + def test_collecting_multi_assign_variables + collector = Collector.collect(<<~RUBY) + def foo + a, b = [1, 2] + puts a + puts b + end + RUBY + + assert_equal(2, collector.variables.length) + assert_variable(collector, "a", definitions: [2], usages: [3]) + assert_variable(collector, "b", definitions: [2], usages: [4]) + end + + def test_collecting_pattern_matching_variables + collector = Collector.collect(<<~RUBY) + def foo + case [1, 2] + in Integer => a, Integer + puts a + end + end + RUBY + + # There are two occurrences, one on line 3 for pinning and one on line 4 + # for reference + assert_equal(1, collector.variables.length) + assert_variable(collector, "a", definitions: [3], usages: [4]) + end + + def test_collecting_pinned_variables + collector = Collector.collect(<<~RUBY) + def foo + a = 18 + case [1, 2] + in ^a, *rest + puts a + puts rest + end + end + RUBY + + assert_equal(2, collector.variables.length) + assert_variable(collector, "a", definitions: [2, 4], usages: [5]) + assert_variable(collector, "rest", definitions: [4]) + + # Rest is considered a vcall by the parser instead of a var_ref + # assert_equal(1, variable_rest.usages.length) + # assert_equal(6, variable_rest.usages[0].start_line) + end + + if RUBY_VERSION >= "3.1" + def test_collecting_one_line_pattern_matching_variables + collector = Collector.collect(<<~RUBY) + def foo + [1] => a + puts a + end + RUBY + + assert_equal(1, collector.variables.length) + assert_variable(collector, "a", definitions: [2], usages: [3]) + end + + def test_collecting_endless_method_arguments + collector = Collector.collect(<<~RUBY) + def foo(a) = puts a + RUBY + + assert_equal(1, collector.arguments.length) + assert_argument(collector, "a", definitions: [1], usages: [1]) + end + end + + def test_collecting_method_arguments + collector = Collector.collect(<<~RUBY) + def foo(a) + puts a + end + RUBY + + assert_equal(1, collector.arguments.length) + assert_argument(collector, "a", definitions: [1], usages: [2]) + end + + def test_collecting_singleton_method_arguments + collector = Collector.collect(<<~RUBY) + def self.foo(a) + puts a + end + RUBY + + assert_equal(1, collector.arguments.length) + assert_argument(collector, "a", definitions: [1], usages: [2]) + end + + def test_collecting_method_arguments_all_types + collector = Collector.collect(<<~RUBY) + def foo(a, b = 1, *c, d, e: 1, **f, &block) + puts a + puts b + puts c + puts d + puts e + puts f + block.call + end + RUBY + + assert_equal(7, collector.arguments.length) + assert_argument(collector, "a", definitions: [1], usages: [2]) + assert_argument(collector, "b", definitions: [1], usages: [3]) + assert_argument(collector, "c", definitions: [1], usages: [4]) + assert_argument(collector, "d", definitions: [1], usages: [5]) + assert_argument(collector, "e", definitions: [1], usages: [6]) + assert_argument(collector, "f", definitions: [1], usages: [7]) + assert_argument(collector, "block", definitions: [1], usages: [8]) + end + + def test_collecting_block_arguments + collector = Collector.collect(<<~RUBY) + def foo + [].each do |i| + puts i + end + end + RUBY + + assert_equal(1, collector.arguments.length) + assert_argument(collector, "i", definitions: [2], usages: [3]) + end + + def test_collecting_one_line_block_arguments + collector = Collector.collect(<<~RUBY) + def foo + [].each { |i| puts i } + end + RUBY + + assert_equal(1, collector.arguments.length) + assert_argument(collector, "i", definitions: [2], usages: [2]) + end + + def test_collecting_shadowed_block_arguments + collector = Collector.collect(<<~RUBY) + def foo + i = "something" + + [].each do |i| + puts i + end + + i + end + RUBY + + assert_equal(1, collector.arguments.length) + assert_argument(collector, "i", definitions: [4], usages: [5]) + + assert_equal(1, collector.variables.length) + assert_variable(collector, "i", definitions: [2], usages: [8]) + end + + def test_collecting_shadowed_local_variables + collector = Collector.collect(<<~RUBY) + def foo(a) + puts a + a = 123 + a + end + RUBY + + # All occurrences are considered arguments, despite overriding the + # argument value + assert_equal(1, collector.arguments.length) + assert_equal(0, collector.variables.length) + assert_argument(collector, "a", definitions: [1, 3], usages: [2, 4]) + end + + def test_variables_in_the_top_level + collector = Collector.collect(<<~RUBY) + a = 123 + a + RUBY + + assert_equal(0, collector.arguments.length) + assert_equal(1, collector.variables.length) + assert_variable(collector, "a", definitions: [1], usages: [2]) + end + + def test_aref_field + collector = Collector.collect(<<~RUBY) + object = {} + object["name"] = "something" + RUBY + + assert_equal(0, collector.arguments.length) + assert_equal(1, collector.variables.length) + assert_variable(collector, "object", definitions: [1], usages: [2]) + end + + def test_aref_on_a_method_call + collector = Collector.collect(<<~RUBY) + object = MyObject.new + object.attributes["name"] = "something" + RUBY + + assert_equal(0, collector.arguments.length) + assert_equal(1, collector.variables.length) + assert_variable(collector, "object", definitions: [1], usages: [2]) + end + + def test_aref_with_two_accesses + collector = Collector.collect(<<~RUBY) + object = MyObject.new + object["first"]["second"] ||= [] + RUBY + + assert_equal(0, collector.arguments.length) + assert_equal(1, collector.variables.length) + assert_variable(collector, "object", definitions: [1], usages: [2]) + end + + def test_aref_on_a_method_call_with_arguments + collector = Collector.collect(<<~RUBY) + object = MyObject.new + object.instance_variable_get(:@attributes)[:something] = :other_thing + RUBY + + assert_equal(0, collector.arguments.length) + assert_equal(1, collector.variables.length) + assert_variable(collector, "object", definitions: [1], usages: [2]) + end + + def test_double_aref_on_method_call + collector = Collector.collect(<<~RUBY) + object = MyObject.new + object["attributes"].find { |a| a["field"] == "expected" }["value"] = "changed" + RUBY + + assert_equal(1, collector.arguments.length) + assert_argument(collector, "a", definitions: [2], usages: [2]) + + assert_equal(1, collector.variables.length) + assert_variable(collector, "object", definitions: [1], usages: [2]) + end + + def test_nested_arguments + collector = Collector.collect(<<~RUBY) + [[1, [2, 3]]].each do |one, (two, three)| + one + two + three + end + RUBY + + assert_equal(3, collector.arguments.length) + assert_equal(0, collector.variables.length) + + assert_argument(collector, "one", definitions: [1], usages: [2]) + assert_argument(collector, "two", definitions: [1], usages: [3]) + assert_argument(collector, "three", definitions: [1], usages: [4]) + end + + def test_double_nested_arguments + collector = Collector.collect(<<~RUBY) + [[1, [2, 3]]].each do |one, (two, (three, four))| + one + two + three + four + end + RUBY + + assert_equal(4, collector.arguments.length) + assert_equal(0, collector.variables.length) + + assert_argument(collector, "one", definitions: [1], usages: [2]) + assert_argument(collector, "two", definitions: [1], usages: [3]) + assert_argument(collector, "three", definitions: [1], usages: [4]) + assert_argument(collector, "four", definitions: [1], usages: [5]) + end + + class Resolver < Visitor + prepend WithEnvironment + + attr_reader :locals + + def initialize + @locals = [] + end + + visit_methods do + def visit_assign(node) + super.tap do + level = 0 + name = node.target.value.value + + environment = current_environment + while !environment.locals.key?(name) && !environment.parent.nil? + level += 1 + environment = environment.parent + end + + locals << [name, level] + end + end + end + end + + def test_resolver + source = <<~RUBY + module Level0 + level0 = 0 + + class Level1 + level1 = 1 + + def level2 + level2 = 2 + + tap do |level3| + level2 = 2 + level3 = 3 + + tap do |level4| + level2 = 2 + level4 = 4 + end + end + end + end + end + RUBY + + resolver = Resolver.new + SyntaxTree.parse(source).accept(resolver) + + expected = [ + ["level0", 0], + ["level1", 0], + ["level2", 0], + ["level2", 1], + ["level3", 0], + ["level2", 2], + ["level4", 0] + ] + + assert_equal expected, resolver.locals + end + + private + + def assert_collected(field, name, definitions: [], usages: []) + keys = field.keys.select { |key| key[1] == name } + assert_equal(1, keys.length) + + variable = field[keys.first] + + assert_equal(definitions.length, variable.definitions.length) + definitions.each_with_index do |definition, index| + assert_equal(definition, variable.definitions[index].start_line) + end + + assert_equal(usages.length, variable.usages.length) + usages.each_with_index do |usage, index| + assert_equal(usage, variable.usages[index].start_line) + end + end + + def assert_argument(collector, name, definitions: [], usages: []) + assert_collected( + collector.arguments, + name, + definitions: definitions, + usages: usages + ) + end + + def assert_variable(collector, name, definitions: [], usages: []) + assert_collected( + collector.variables, + name, + definitions: definitions, + usages: usages + ) + end + end +end From 4a6fc77abd4c696b3d38498250ab37e571f27d9a Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Fri, 10 Feb 2023 13:00:40 -0500 Subject: [PATCH 54/58] WithEnvironment -> WithScope --- README.md | 12 +- lib/syntax_tree.rb | 2 +- .../{with_environment.rb => with_scope.rb} | 107 ++++++++---------- ...environment_test.rb => with_scope_test.rb} | 22 ++-- 4 files changed, 67 insertions(+), 76 deletions(-) rename lib/syntax_tree/{with_environment.rb => with_scope.rb} (65%) rename test/{with_environment_test.rb => with_scope_test.rb} (95%) diff --git a/README.md b/README.md index 5f447ad8..500d5fad 100644 --- a/README.md +++ b/README.md @@ -43,7 +43,7 @@ It is built with only standard library dependencies. It additionally ships with - [visit_methods](#visit_methods) - [BasicVisitor](#basicvisitor) - [MutationVisitor](#mutationvisitor) - - [WithEnvironment](#withenvironment) + - [WithScope](#withscope) - [Language server](#language-server) - [textDocument/formatting](#textdocumentformatting) - [textDocument/inlayHint](#textdocumentinlayhint) @@ -588,20 +588,18 @@ SyntaxTree::Formatter.format(source, program.accept(visitor)) # => "if (a = 1)\nend\n" ``` -### WithEnvironment +### WithScope -The `WithEnvironment` module can be included in visitors to automatically keep track of local variables and arguments -defined inside each environment. A `current_environment` accessor is made available to the request, allowing it to find -all usages and definitions of a local. +The `WithScope` module can be included in visitors to automatically keep track of local variables and arguments defined inside each scope. A `current_scope` accessor is made available to the request, allowing it to find all usages and definitions of a local. ```ruby class MyVisitor < Visitor - include WithEnvironment + prepend WithScope def visit_ident(node) # find_local will return a Local for any local variables or arguments # present in the current environment or nil if the identifier is not a local - local = current_environment.find_local(node) + local = current_scope.find_local(node) puts local.type # the type of the local (:variable or :argument) puts local.definitions # the array of locations where this local is defined diff --git a/lib/syntax_tree.rb b/lib/syntax_tree.rb index 70126b14..4e183383 100644 --- a/lib/syntax_tree.rb +++ b/lib/syntax_tree.rb @@ -33,7 +33,7 @@ module SyntaxTree autoload :PrettyPrintVisitor, "syntax_tree/pretty_print_visitor" autoload :Search, "syntax_tree/search" autoload :Translation, "syntax_tree/translation" - autoload :WithEnvironment, "syntax_tree/with_environment" + autoload :WithScope, "syntax_tree/with_scope" autoload :YARV, "syntax_tree/yarv" # This holds references to objects that respond to both #parse and #format diff --git a/lib/syntax_tree/with_environment.rb b/lib/syntax_tree/with_scope.rb similarity index 65% rename from lib/syntax_tree/with_environment.rb rename to lib/syntax_tree/with_scope.rb index 3a6f04b9..efa8d075 100644 --- a/lib/syntax_tree/with_environment.rb +++ b/lib/syntax_tree/with_scope.rb @@ -1,18 +1,18 @@ # frozen_string_literal: true module SyntaxTree - # WithEnvironment is a module intended to be included in classes inheriting - # from Visitor. The module overrides a few visit methods to automatically keep - # track of local variables and arguments defined in the current environment. + # WithScope is a module intended to be included in classes inheriting from + # Visitor. The module overrides a few visit methods to automatically keep + # track of local variables and arguments defined in the current scope. # Example usage: # # class MyVisitor < Visitor - # include WithEnvironment + # include WithScope # # def visit_ident(node) # # Check if we're visiting an identifier for an argument, a local # # variable or something else - # local = current_environment.find_local(node) + # local = current_scope.find_local(node) # # if local.type == :argument # # handle identifiers for arguments @@ -24,11 +24,11 @@ module SyntaxTree # end # end # - module WithEnvironment - # The environment class is used to keep track of local variables and - # arguments inside a particular scope - class Environment - # This class tracks the occurrences of a local variable or argument + module WithScope + # The scope class is used to keep track of local variables and arguments + # inside a particular scope. + class Scope + # This class tracks the occurrences of a local variable or argument. class Local # [Symbol] The type of the local (e.g. :argument, :variable) attr_reader :type @@ -55,20 +55,20 @@ def add_usage(location) end end - # [Integer] a unique identifier for this environment + # [Integer] a unique identifier for this scope attr_reader :id + # [scope | nil] The parent scope + attr_reader :parent + # [Hash[String, Local]] The local variables and arguments defined in this - # environment + # scope attr_reader :locals - # [Environment | nil] The parent environment - attr_reader :parent - def initialize(id, parent = nil) @id = id - @locals = {} @parent = parent + @locals = {} end # Adding a local definition will either insert a new entry in the locals @@ -97,7 +97,7 @@ def add_local_usage(identifier, type) resolve_local(name, type).add_usage(identifier.location) end - # Try to find the local given its name in this environment or any of its + # Try to find the local given its name in this scope or any of its # parents. def find_local(name) locals[name] || parent&.find_local(name) @@ -117,44 +117,35 @@ def resolve_local(name, type) end end + attr_reader :current_scope + def initialize(*args, **kwargs, &block) super - @environment_id = 0 - end - - def current_environment - @current_environment ||= Environment.new(next_environment_id) - end - def with_new_environment(parent_environment = nil) - previous_environment = @current_environment - @current_environment = - Environment.new(next_environment_id, parent_environment) - yield - ensure - @current_environment = previous_environment + @current_scope = Scope.new(0) + @next_scope_id = 0 end - # Visits for nodes that create new environments, such as classes, modules + # Visits for nodes that create new scopes, such as classes, modules # and method definitions. def visit_class(node) - with_new_environment { super } + with_scope { super } end def visit_module(node) - with_new_environment { super } + with_scope { super } end - # When we find a method invocation with a block, only the code that - # happens inside of the block needs a fresh environment. The method - # invocation itself happens in the same environment. + # When we find a method invocation with a block, only the code that happens + # inside of the block needs a fresh scope. The method invocation + # itself happens in the same scope. def visit_method_add_block(node) visit(node.call) - with_new_environment(current_environment) { visit(node.block) } + with_scope(current_scope) { visit(node.block) } end def visit_def(node) - with_new_environment { super } + with_scope { super } end # Visit for keeping track of local arguments, such as method and block @@ -163,15 +154,15 @@ def visit_params(node) add_argument_definitions(node.requireds) node.posts.each do |param| - current_environment.add_local_definition(param, :argument) + current_scope.add_local_definition(param, :argument) end node.keywords.each do |param| - current_environment.add_local_definition(param.first, :argument) + current_scope.add_local_definition(param.first, :argument) end node.optionals.each do |param| - current_environment.add_local_definition(param.first, :argument) + current_scope.add_local_definition(param.first, :argument) end super @@ -179,21 +170,21 @@ def visit_params(node) def visit_rest_param(node) name = node.name - current_environment.add_local_definition(name, :argument) if name + current_scope.add_local_definition(name, :argument) if name super end def visit_kwrest_param(node) name = node.name - current_environment.add_local_definition(name, :argument) if name + current_scope.add_local_definition(name, :argument) if name super end def visit_blockarg(node) name = node.name - current_environment.add_local_definition(name, :argument) if name + current_scope.add_local_definition(name, :argument) if name super end @@ -201,10 +192,7 @@ def visit_blockarg(node) # Visit for keeping track of local variable definitions def visit_var_field(node) value = node.value - - if value.is_a?(SyntaxTree::Ident) - current_environment.add_local_definition(value, :variable) - end + current_scope.add_local_definition(value, :variable) if value.is_a?(Ident) super end @@ -215,12 +203,9 @@ def visit_var_field(node) def visit_var_ref(node) value = node.value - if value.is_a?(SyntaxTree::Ident) - definition = current_environment.find_local(value.value) - - if definition - current_environment.add_local_usage(value, definition.type) - end + if value.is_a?(Ident) + definition = current_scope.find_local(value.value) + current_scope.add_local_usage(value, definition.type) if definition end super @@ -233,13 +218,21 @@ def add_argument_definitions(list) if param.is_a?(SyntaxTree::MLHSParen) add_argument_definitions(param.contents.parts) else - current_environment.add_local_definition(param, :argument) + current_scope.add_local_definition(param, :argument) end end end - def next_environment_id - @environment_id += 1 + def next_scope_id + @next_scope_id += 1 + end + + def with_scope(parent_scope = nil) + previous_scope = @current_scope + @current_scope = Scope.new(next_scope_id, parent_scope) + yield + ensure + @current_scope = previous_scope end end end diff --git a/test/with_environment_test.rb b/test/with_scope_test.rb similarity index 95% rename from test/with_environment_test.rb rename to test/with_scope_test.rb index b6f79c14..1a4c5468 100644 --- a/test/with_environment_test.rb +++ b/test/with_scope_test.rb @@ -3,9 +3,9 @@ require_relative "test_helper" module SyntaxTree - class WithEnvironmentTest < Minitest::Test + class WithScopeTest < Minitest::Test class Collector < Visitor - prepend WithEnvironment + prepend WithScope attr_reader :arguments, :variables @@ -21,22 +21,22 @@ def self.collect(source) visit_methods do def visit_ident(node) value = node.value.delete_suffix(":") - local = current_environment.find_local(node.value) + local = current_scope.find_local(node.value) case local&.type when :argument - arguments[[current_environment.id, value]] = local + arguments[[current_scope.id, value]] = local when :variable - variables[[current_environment.id, value]] = local + variables[[current_scope.id, value]] = local end end def visit_label(node) value = node.value.delete_suffix(":") - local = current_environment.find_local(value) + local = current_scope.find_local(value) if local&.type == :argument - arguments[[current_environment.id, value]] = node + arguments[[current_scope.id, value]] = node end end end @@ -350,7 +350,7 @@ def test_double_nested_arguments end class Resolver < Visitor - prepend WithEnvironment + prepend WithScope attr_reader :locals @@ -364,10 +364,10 @@ def visit_assign(node) level = 0 name = node.target.value.value - environment = current_environment - while !environment.locals.key?(name) && !environment.parent.nil? + scope = current_scope + while !scope.locals.key?(name) && !scope.parent.nil? level += 1 - environment = environment.parent + scope = scope.parent end locals << [name, level] From 0068978479bb18b581aa745a12bb104f52ebe82f Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Fri, 10 Feb 2023 14:24:02 -0500 Subject: [PATCH 55/58] Pinned variables should be treated as usages, not definitions --- lib/syntax_tree/with_scope.rb | 8 +++++++- test/with_scope_test.rb | 2 +- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/lib/syntax_tree/with_scope.rb b/lib/syntax_tree/with_scope.rb index efa8d075..7fcef067 100644 --- a/lib/syntax_tree/with_scope.rb +++ b/lib/syntax_tree/with_scope.rb @@ -197,7 +197,13 @@ def visit_var_field(node) super end - alias visit_pinned_var_ref visit_var_field + # Visit for keeping track of local variable definitions + def visit_pinned_var_ref(node) + value = node.value + current_scope.add_local_usage(value, :variable) if value.is_a?(Ident) + + super + end # Visits for keeping track of variable and argument usages def visit_var_ref(node) diff --git a/test/with_scope_test.rb b/test/with_scope_test.rb index 1a4c5468..9675e811 100644 --- a/test/with_scope_test.rb +++ b/test/with_scope_test.rb @@ -109,7 +109,7 @@ def foo RUBY assert_equal(2, collector.variables.length) - assert_variable(collector, "a", definitions: [2, 4], usages: [5]) + assert_variable(collector, "a", definitions: [2], usages: [4, 5]) assert_variable(collector, "rest", definitions: [4]) # Rest is considered a vcall by the parser instead of a var_ref From 575ae3ea24a66a74b254090e421c6cd439e63fee Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Fri, 10 Feb 2023 14:38:26 -0500 Subject: [PATCH 56/58] No submodules needed --- .github/workflows/main.yml | 2 -- 1 file changed, 2 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 8bca2fc4..3f811317 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -23,8 +23,6 @@ jobs: # TESTOPTS: --verbose steps: - uses: actions/checkout@master - with: - submodules: true - uses: ruby/setup-ruby@v1 with: bundler-cache: true From cfc297925a056201825f76c1aea67ce72a65dcfc Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Fri, 10 Feb 2023 14:39:04 -0500 Subject: [PATCH 57/58] Remove unused sections of rubocop config --- .rubocop.yml | 2 -- 1 file changed, 2 deletions(-) diff --git a/.rubocop.yml b/.rubocop.yml index e5a3fe96..e74cdc1b 100644 --- a/.rubocop.yml +++ b/.rubocop.yml @@ -8,8 +8,6 @@ AllCops: TargetRubyVersion: 2.7 Exclude: - '{.git,.github,bin,coverage,pkg,spec,test/fixtures,vendor,tmp}/**/*' - - test/ruby-syntax-fixtures/**/* - - test/suites/parser/**/* - test.rb Gemspec/DevelopmentDependencies: From 4dac90b53df388f726dce50ce638a1ba71cc59f8 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Fri, 10 Feb 2023 15:19:00 -0500 Subject: [PATCH 58/58] Bump to version 6.0.0 --- CHANGELOG.md | 65 +++++++++++++++++++++++++++++++++++++- Gemfile.lock | 2 +- lib/syntax_tree/version.rb | 2 +- 3 files changed, 66 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index c39bed36..34c40e40 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,68 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/) a ## [Unreleased] +## [6.0.0] - 2023-02-10 + +### Added + +- `SyntaxTree::BasicVisitor::visit_methods` has been added to allow you to check multiple visit methods inside of a block. There _was_ a method called `visit_methods` previously, but it was undocumented because it was meant as a private API. That method has been renamed to `valid_visit_methods`. +- `rake sorbet:rbi` has been added as a task within the repository to generate an RBI file corresponding to the nodes in the tree. This can be used to help aid consumers of Syntax Tree that are using Sorbet. +- `SyntaxTree::Reflection` has been added to allow you to get information about the nodes in the tree. It is not required by default, since it takes a small amount of time to parse `node.rb` and get all of the information. +- `SyntaxTree::Node#to_mermaid` has been added to allow you to generate a Mermaid diagram of the node and its children. This is useful for debugging and understanding the structure of the tree. +- `SyntaxTree::Translation` has been added as an experimental API to transform the Syntax Tree syntax tree into the syntax trees represented by the whitequark/parser and rubocop/rubocop-ast gems. + - `SyntaxTree::Translation.to_parser(node, buffer)` will return a `Parser::AST::Node` object. + - `SyntaxTree::Translation.to_rubocop_ast(node, buffer)` will return a `RuboCop::AST::Node` object. +- `SyntaxTree::index` and `SyntaxTree::index_file` have been added to allow you to get a list of all of the classes, modules, and methods defined in a given source string or file. +- Various convenience methods have been added: + - `SyntaxTree::format_file` - which calls format with the result of reading the file + - `SyntaxTree::format_node` - which formats the node directly + - `SyntaxTree::parse_file` - which calls parse with the result of reading the file + - `SyntaxTree::search_file` - which calls search with the result of reading the file + - `SyntaxTree::Node#start_char` - which is the same as calling `node.location.start_char` + - `SyntaxTree::Node#end_char` - which is the same as calling `node.location.end_char` +- `SyntaxTree::Assoc` nodes can now be formatted on their own without a parent hash node. +- `SyntaxTree::BlockVar#arg0?` has been added to check if a single required block parameter is present and would potentially be expanded. +- More experimental APIs have been added to the `SyntaxTree::YARV` module, including: + - `SyntaxTree::YARV::ControlFlowGraph` + - `SyntaxTree::YARV::DataFlowGraph` + - `SyntaxTree::YARV::SeaOfNodes` + +### Changed + +#### Major changes + +- *BREAKING* Updates to `WithEnvironment`: + - The `WithEnvironment` module has been renamed to `WithScope`. + - The `current_environment` method has been renamed to `current_scope`. + - The `with_current_environment` method has been removed. + - Previously scopes were always able to look up the tree, as in: `a = 1; def foo; a = 2; end` would see only a single `a` variable. That has been corrected. + - Previously accessing variables from inside of blocks that were not shadowed would mark them as being local to the block only. This has been correct. +- *BREAKING* Lots of constants moved out of `SyntaxTree::Visitor` to just `SyntaxTree`: + * `SyntaxTree::Visitor::FieldVisitor` is now `SyntaxTree::FieldVisitor` + * `SyntaxTree::Visitor::JSONVisitor` is now `SyntaxTree::JSONVisitor` + * `SyntaxTree::Visitor::MatchVisitor` is now `SyntaxTree::MatchVisitor` + * `SyntaxTree::Visitor::MutationVisitor` is now `SyntaxTree::MutationVisitor` + * `SyntaxTree::Visitor::PrettyPrintVisitor` is now `SyntaxTree::PrettyPrintVisitor` +- *BREAKING* Lots of constants are now autoloaded instead of required by default. This is only particularly relevant if you are in a forking environment and want to preload constants before forking for better memory usage with copy-on-write. +- *BREAKING* The `SyntaxTree::Statements#initialize` method no longer accepts a parser as the first argument. It now mirrors the other nodes in that it accepts its children and location. As a result, Syntax Tree nodes are now marshalable (and therefore can be sent over DRb). Previously the `Statements` node was not able to be marshaled because it held a reference to the parser. + +#### Minor changes + +- Many places where embedded documents (`=begin` to `=end`) were being treated as real comments have been fixed for formatting. +- Dynamic symbols in keyword pattern matching now have better formatting. +- Endless method definitions used to have a `SyntaxTree::BodyStmt` node that had any kind of node as its `statements` field. That has been corrected to be more consistent such that now going from `def_node.bodystmt.statements` always returns a `SyntaxTree::Statements` node, which is more consistent. +- We no longer assume that `fiddle` is able to be required, and only require it when it is actually needed. + +#### Tiny changes + +- Empty parameter nodes within blocks now have more accurate location information. +- Pinned variables have more correct location information now. (Previously the location was just around the variable itself, but it now includes the pin.) +- Array patterns in pattern matching now have more accurate location information when they are using parentheses with a constant present. +- Find patterns in pattern matching now have more correct location information for their `left` and `right` fields. +- Lots of nodes have more correct types in the comments on their attributes. +- The expressions `break foo.bar :baz do |qux| qux end` and `next fun foo do end` now correctly parses as a control-flow statement with a method call that has a block attached, as opposed to a control-flow statement with a block attached. +- The expression `self::a, b = 1, 2` would previously yield a `SyntaxTree::ConstPathField` node for the first element of the left-hand-side of the multiple assignment. Semantically this is incorrect, and we have fixed this to now be a `SyntaxTree::Field` node instead. + ## [5.3.0] - 2023-01-26 ### Added @@ -497,7 +559,8 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/) a - 🎉 Initial release! 🎉 -[unreleased]: https://github.com/ruby-syntax-tree/syntax_tree/compare/v5.3.0...HEAD +[unreleased]: https://github.com/ruby-syntax-tree/syntax_tree/compare/v6.0.0...HEAD +[6.0.0]: https://github.com/ruby-syntax-tree/syntax_tree/compare/v5.3.0...v6.0.0 [5.3.0]: https://github.com/ruby-syntax-tree/syntax_tree/compare/v5.2.0...v5.3.0 [5.2.0]: https://github.com/ruby-syntax-tree/syntax_tree/compare/v5.1.0...v5.2.0 [5.1.0]: https://github.com/ruby-syntax-tree/syntax_tree/compare/v5.0.1...v5.1.0 diff --git a/Gemfile.lock b/Gemfile.lock index 46111ea4..325d89b3 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -1,7 +1,7 @@ PATH remote: . specs: - syntax_tree (5.3.0) + syntax_tree (6.0.0) prettier_print (>= 1.2.0) GEM diff --git a/lib/syntax_tree/version.rb b/lib/syntax_tree/version.rb index 6cb1fccf..1f028f89 100644 --- a/lib/syntax_tree/version.rb +++ b/lib/syntax_tree/version.rb @@ -1,5 +1,5 @@ # frozen_string_literal: true module SyntaxTree - VERSION = "5.3.0" + VERSION = "6.0.0" end