From f5f8b6a8dcbf499db95d2c3f8c13ff57a4782bcc Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Sat, 18 Feb 2023 10:14:58 -0500 Subject: [PATCH 01/15] Even more parser gem locations --- lib/syntax_tree/translation/parser.rb | 129 +++++++++++++------------- 1 file changed, 67 insertions(+), 62 deletions(-) diff --git a/lib/syntax_tree/translation/parser.rb b/lib/syntax_tree/translation/parser.rb index ad889478..0ed2c61f 100644 --- a/lib/syntax_tree/translation/parser.rb +++ b/lib/syntax_tree/translation/parser.rb @@ -1287,35 +1287,13 @@ def visit_ident(node) # Visit an IfNode node. def visit_if(node) - predicate = - case node.predicate - when RangeNode - type = - node.predicate.operator.value == ".." ? :iflipflop : :eflipflop - s(type, visit(node.predicate).children, nil) - when RegexpLiteral - s(:match_current_line, [visit(node.predicate)], nil) - when Unary - if node.predicate.operator.value == "!" && - node.predicate.statement.is_a?(RegexpLiteral) - s( - :send, - [ - s(:match_current_line, [visit(node.predicate.statement)]), - :! - ], - nil - ) - else - visit(node.predicate) - end - else - visit(node.predicate) - end - s( :if, - [predicate, visit(node.statements), visit(node.consequent)], + [ + visit_predicate(node.predicate), + visit(node.statements), + visit(node.consequent) + ], if node.modifier? smap_keyword_bare( srange_find_between(node.statements, node.predicate, "if"), @@ -2376,22 +2354,42 @@ def visit_tstring_content(node) # Visit a Unary node. def visit_unary(node) # Special handling here for flipflops - if node.statement.is_a?(Paren) && - node.statement.contents.is_a?(Statements) && - node.statement.contents.body.length == 1 && - (range = node.statement.contents.body.first).is_a?(RangeNode) && + if (paren = node.statement).is_a?(Paren) && + paren.contents.is_a?(Statements) && + paren.contents.body.length == 1 && + (range = paren.contents.body.first).is_a?(RangeNode) && node.operator == "!" - type = range.operator.value == ".." ? :iflipflop : :eflipflop - return( - s( - :send, - [s(:begin, [s(type, visit(range).children, nil)], nil), :!], - nil + s( + :send, + [ + s( + :begin, + [ + s( + range.operator.value == ".." ? :iflipflop : :eflipflop, + visit(range).children, + smap_operator( + srange_node(range.operator), + srange_node(range) + ) + ) + ], + smap_collection( + srange_length(paren.start_char, 1), + srange_length(paren.end_char, -1), + srange_node(paren) + ) + ), + :! + ], + smap_send_bare( + srange_length(node.start_char, 1), + srange_node(node) ) ) + else + visit(canonical_unary(node)) end - - visit(canonical_unary(node)) end # Visit an Undef node. @@ -2408,31 +2406,13 @@ def visit_undef(node) # Visit an UnlessNode node. def visit_unless(node) - predicate = - case node.predicate - when RegexpLiteral - s(:match_current_line, [visit(node.predicate)], nil) - when Unary - if node.predicate.operator.value == "!" && - node.predicate.statement.is_a?(RegexpLiteral) - s( - :send, - [ - s(:match_current_line, [visit(node.predicate.statement)]), - :! - ], - nil - ) - else - visit(node.predicate) - end - else - visit(node.predicate) - end - s( :if, - [predicate, visit(node.consequent), visit(node.statements)], + [ + visit_predicate(node.predicate), + visit(node.consequent), + visit(node.statements) + ], if node.modifier? smap_keyword_bare( srange_find_between(node.statements, node.predicate, "unless"), @@ -3014,6 +2994,31 @@ def srange_node(node) location = node.location srange(location.start_char, location.end_char) end + + def visit_predicate(node) + case node + when RangeNode + s( + node.operator.value == ".." ? :iflipflop : :eflipflop, + visit(node).children, + smap_operator(srange_node(node.operator), srange_node(node)) + ) + when RegexpLiteral + s(:match_current_line, [visit(node)], smap(srange_node(node))) + when Unary + if node.operator.value == "!" && node.statement.is_a?(RegexpLiteral) + s( + :send, + [s(:match_current_line, [visit(node.statement)]), :!], + smap_send_bare(srange_node(node.operator), srange_node(node)) + ) + else + visit(node) + end + else + visit(node) + end + end end end end From 4057dfa17c3fc80ed8b4b11722e97fd53de50cf2 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Sat, 18 Feb 2023 18:19:04 -0500 Subject: [PATCH 02/15] Handle matching current line --- lib/syntax_tree/translation/parser.rb | 16 ++++++++++++++++ test/translation/parser_test.rb | 1 - 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/lib/syntax_tree/translation/parser.rb b/lib/syntax_tree/translation/parser.rb index 0ed2c61f..9c53ad14 100644 --- a/lib/syntax_tree/translation/parser.rb +++ b/lib/syntax_tree/translation/parser.rb @@ -2387,6 +2387,22 @@ def visit_unary(node) srange_node(node) ) ) + elsif node.operator == "!" && node.statement.is_a?(RegexpLiteral) + s( + :send, + [ + s( + :match_current_line, + [visit(node.statement)], + smap(srange_node(node.statement)) + ), + :! + ], + smap_send_bare( + srange_length(node.start_char, 1), + srange_node(node) + ) + ) else visit(canonical_unary(node)) end diff --git a/test/translation/parser_test.rb b/test/translation/parser_test.rb index ad87d8c6..1df98f47 100644 --- a/test/translation/parser_test.rb +++ b/test/translation/parser_test.rb @@ -55,7 +55,6 @@ class ParserTest < Minitest::Test "test_dedenting_heredoc:399", "test_slash_newline_in_heredocs:7194", "test_parser_slash_slash_n_escaping_in_literals:*", - "test_cond_match_current_line:4801", "test_forwarded_restarg:*", "test_forwarded_kwrestarg:*", "test_forwarded_argument_with_restarg:*", From 6f135be2dbcd002afb67da194759190f752c59fc Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Sat, 18 Feb 2023 18:22:50 -0500 Subject: [PATCH 03/15] Block on super location --- lib/syntax_tree/translation/parser.rb | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/lib/syntax_tree/translation/parser.rb b/lib/syntax_tree/translation/parser.rb index 9c53ad14..243b460b 100644 --- a/lib/syntax_tree/translation/parser.rb +++ b/lib/syntax_tree/translation/parser.rb @@ -1576,7 +1576,11 @@ def visit_method_add_block(node) s( type, [visit(node.call), arguments, visit(node.block.bodystmt)], - nil + smap_collection( + srange_node(node.block.opening), + srange_length(node.block.end_char, node.block.opening.is_a?(Kw) ? -3 : -1), + srange_node(node) + ) ) else visit_command_call( From 305ee004c932718ca39af8815a4debc1aa72e745 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Sat, 18 Feb 2023 19:48:15 -0500 Subject: [PATCH 04/15] ; delimiting unless nodes --- lib/syntax_tree/translation/parser.rb | 37 +++++++++++++++------------ 1 file changed, 20 insertions(+), 17 deletions(-) diff --git a/lib/syntax_tree/translation/parser.rb b/lib/syntax_tree/translation/parser.rb index 243b460b..4f32c933 100644 --- a/lib/syntax_tree/translation/parser.rb +++ b/lib/syntax_tree/translation/parser.rb @@ -1555,21 +1555,6 @@ def visit_massign(node) # Visit a MethodAddBlock node. def visit_method_add_block(node) case node.call - when Break, Next, ReturnNode - type, arguments = block_children(node.block) - call = visit(node.call) - - s( - call.type, - [ - s( - type, - [*call.children, arguments, visit(node.block.bodystmt)], - nil - ) - ], - nil - ) when ARef, Super, ZSuper type, arguments = block_children(node.block) @@ -1578,7 +1563,10 @@ def visit_method_add_block(node) [visit(node.call), arguments, visit(node.block.bodystmt)], smap_collection( srange_node(node.block.opening), - srange_length(node.block.end_char, node.block.opening.is_a?(Kw) ? -3 : -1), + srange_length( + node.block.end_char, + node.block.opening.is_a?(Kw) ? -3 : -1 + ), srange_node(node) ) ) @@ -2439,9 +2427,24 @@ def visit_unless(node) srange_node(node) ) else + begin_start = node.predicate.end_char + begin_end = + if node.statements.empty? + node.statements.end_char + else + node.statements.body.first.start_char + end + + begin_token = + if buffer.source[begin_start...begin_end].include?("then") + srange_find(begin_start, begin_end, "then") + elsif buffer.source[begin_start...begin_end].include?(";") + srange_find(begin_start, begin_end, ";") + end + smap_condition( srange_length(node.start_char, 6), - srange_search_between(node.predicate, node.statements, "then"), + begin_token, nil, srange_length(node.end_char, -3), srange_node(node) From 1eec9e708387c13766e7fa48d1447b408049df27 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Mon, 13 Feb 2023 10:28:09 -0500 Subject: [PATCH 05/15] More whitequark/parser translation --- lib/syntax_tree/parser.rb | 11 +++- lib/syntax_tree/translation/parser.rb | 80 +++++++++++++++++++++++---- 2 files changed, 80 insertions(+), 11 deletions(-) diff --git a/lib/syntax_tree/parser.rb b/lib/syntax_tree/parser.rb index 426bd945..d0a5bf67 100644 --- a/lib/syntax_tree/parser.rb +++ b/lib/syntax_tree/parser.rb @@ -1559,7 +1559,14 @@ def on_elsif(predicate, statements, consequent) beginning = consume_keyword(:elsif) ending = consequent || consume_keyword(:end) - start_char = find_next_statement_start(predicate.location.end_char) + delimiter = + find_keyword_between(:then, predicate, statements) || + find_token_between(Semicolon, predicate, statements) + + tokens.delete(delimiter) if delimiter + start_char = + find_next_statement_start((delimiter || predicate).location.end_char) + statements.bind( self, start_char, @@ -2045,6 +2052,7 @@ def on_if(predicate, statements, consequent) start_char = find_next_statement_start((keyword || predicate).location.end_char) + statements.bind( self, start_char, @@ -3805,6 +3813,7 @@ def on_unless(predicate, statements, consequent) start_char = find_next_statement_start((keyword || predicate).location.end_char) + statements.bind( self, start_char, diff --git a/lib/syntax_tree/translation/parser.rb b/lib/syntax_tree/translation/parser.rb index 4f32c933..8be4fc79 100644 --- a/lib/syntax_tree/translation/parser.rb +++ b/lib/syntax_tree/translation/parser.rb @@ -336,8 +336,8 @@ def visit_assign(node) # Visit an Assoc node. def visit_assoc(node) if node.value.nil? + # { foo: } expression = srange(node.start_char, node.end_char - 1) - type, location = if node.key.value.start_with?(/[A-Z]/) [:const, smap_constant(nil, expression, expression)] @@ -356,13 +356,38 @@ def visit_assoc(node) srange_node(node) ) ) - else + elsif node.key.is_a?(Label) + # { foo: 1 } s( :pair, [visit(node.key), visit(node.value)], smap_operator( - srange_search_between(node.key, node.value, "=>") || - srange_length(node.key.end_char, -1), + srange_length(node.key.end_char, -1), + srange_node(node) + ) + ) + elsif (operator = srange_search_between(node.key, node.value, "=>")) + # { :foo => 1 } + s( + :pair, + [visit(node.key), visit(node.value)], + smap_operator(operator, srange_node(node)) + ) + else + # { "foo": 1 } + key = visit(node.key) + key_location = + smap_collection( + key.location.begin, + srange_length(node.key.end_char - 2, 1), + srange(node.key.start_char, node.key.end_char - 1) + ) + + s( + :pair, + [s(key.type, key.children, key_location), visit(node.value)], + smap_operator( + srange_length(node.key.end_char, -1), srange_node(node) ) ) @@ -769,7 +794,11 @@ def visit_command_call(node) srange(node.start_char, end_char) elsif node.block - srange_node(node.message) + if node.receiver + srange(node.receiver.start_char, node.message.end_char) + else + srange_node(node.message) + end else srange_node(node) end @@ -1010,6 +1039,21 @@ def visit_else(node) # Visit an Elsif node. def visit_elsif(node) + begin_start = node.predicate.end_char + begin_end = + if node.statements.empty? + node.statements.end_char + else + node.statements.body.first.start_char + end + + begin_token = + if buffer.source[begin_start...begin_end].include?("then") + srange_find(begin_start, begin_end, "then") + elsif buffer.source[begin_start...begin_end].include?(";") + srange_find(begin_start, begin_end, ";") + end + else_token = case node.consequent when Elsif @@ -1029,7 +1073,7 @@ def visit_elsif(node) ], smap_condition( srange_length(node.start_char, 5), - nil, + begin_token, else_token, nil, expression @@ -1529,12 +1573,14 @@ def visit_lambda_var(node) location = if node.start_char == node.end_char smap_collection_bare(nil) - else + elsif buffer.source[node.start_char - 1] == "(" smap_collection( srange_length(node.start_char, 1), srange_length(node.end_char, -1), srange_node(node) ) + else + smap_collection_bare(srange_node(node)) end s(:args, visit(node.params).children + shadowargs, location) @@ -1565,7 +1611,7 @@ def visit_method_add_block(node) srange_node(node.block.opening), srange_length( node.block.end_char, - node.block.opening.is_a?(Kw) ? -3 : -1 + node.block.keywords? ? -3 : -1 ), srange_node(node) ) @@ -2244,7 +2290,16 @@ def visit_super(node) ) ) when ArgsForward - s(:super, [visit(node.arguments.arguments)], nil) + s( + :super, + [visit(node.arguments.arguments)], + smap_keyword( + srange_length(node.start_char, 5), + srange_find(node.start_char + 5, node.end_char, "("), + srange_length(node.end_char, -1), + srange_node(node) + ) + ) else s( :super, @@ -2442,10 +2497,15 @@ def visit_unless(node) srange_find(begin_start, begin_end, ";") end + else_token = + if node.consequent + srange_length(node.consequent.start_char, 4) + end + smap_condition( srange_length(node.start_char, 6), begin_token, - nil, + else_token, srange_length(node.end_char, -3), srange_node(node) ) From ce9de3114c537de85cc86f90bf603d56d7eba653 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Tue, 21 Feb 2023 10:03:42 -0500 Subject: [PATCH 06/15] Better handle nested constant names --- lib/syntax_tree/index.rb | 75 +++++++++++++++++++++++++++++++++------- test/index_test.rb | 33 ++++++++++++++---- 2 files changed, 90 insertions(+), 18 deletions(-) diff --git a/lib/syntax_tree/index.rb b/lib/syntax_tree/index.rb index ab2460dd..c067606f 100644 --- a/lib/syntax_tree/index.rb +++ b/lib/syntax_tree/index.rb @@ -176,30 +176,64 @@ def location_for(iseq) Location.new(code_location[0], code_location[1]) end + def find_constant_path(insns, index) + insn = insns[index] + + if insn.is_a?(Array) && insn[0] == :opt_getconstant_path + # In this case we're on Ruby 3.2+ and we have an opt_getconstant_path + # instruction, so we already know all of the symbols in the nesting. + insn[1] + elsif insn.is_a?(Symbol) && insn.match?(/\Alabel_\d+/) + # Otherwise, if we have a label then this is very likely the + # destination of an opt_getinlinecache instruction, in which case + # we'll walk backwards to grab up all of the constants. + names = [] + + index -= 1 + until insns[index][0] == :opt_getinlinecache + names.unshift(insns[index][1]) if insns[index][0] == :getconstant + index -= 1 + end + + names + end + end + def index_iseq(iseq, file_comments) results = [] queue = [[iseq, []]] while (current_iseq, current_nesting = queue.shift) - current_iseq[13].each_with_index do |insn, index| + insns = current_iseq[13] + insns.each_with_index do |insn, index| next unless insn.is_a?(Array) case insn[0] when :defineclass _, name, class_iseq, flags = insn + next_nesting = current_nesting.dup + + if (nesting = find_constant_path(insns, index - 2)) + # If there is a constant path in the class name, then we need to + # handle that by updating the nesting. + next_nesting << (nesting << name) + else + # Otherwise we'll add the class name to the nesting. + next_nesting << [name] + end if flags == VM_DEFINECLASS_TYPE_SINGLETON_CLASS # At the moment, we don't support singletons that aren't # defined on self. We could, but it would require more # emulation. - if current_iseq[13][index - 2] != [:putself] + if insns[index - 2] != [:putself] raise NotImplementedError, "singleton class with non-self receiver" end elsif flags & VM_DEFINECLASS_TYPE_MODULE > 0 location = location_for(class_iseq) results << ModuleDefinition.new( - current_nesting, + next_nesting, name, location, EntryComments.new(file_comments, location) @@ -207,14 +241,14 @@ def index_iseq(iseq, file_comments) else location = location_for(class_iseq) results << ClassDefinition.new( - current_nesting, + next_nesting, name, location, EntryComments.new(file_comments, location) ) end - queue << [class_iseq, current_nesting + [name]] + queue << [class_iseq, next_nesting] when :definemethod location = location_for(insn[2]) results << MethodDefinition.new( @@ -259,24 +293,36 @@ def initialize visit_methods do def visit_class(node) - name = visit(node.constant).to_sym + names = visit(node.constant) + nesting << names + location = Location.new(node.location.start_line, node.location.start_column) results << ClassDefinition.new( nesting.dup, - name, + names.last, location, comments_for(node) ) - nesting << name super nesting.pop end def visit_const_ref(node) - node.constant.value + [node.constant.value.to_sym] + end + + def visit_const_path_ref(node) + names = + if node.parent.is_a?(ConstPathRef) + visit(node.parent) + else + [visit(node.parent)] + end + + names << node.constant.value.to_sym end def visit_def(node) @@ -302,18 +348,19 @@ def visit_def(node) end def visit_module(node) - name = visit(node.constant).to_sym + names = visit(node.constant) + nesting << names + location = Location.new(node.location.start_line, node.location.start_column) results << ModuleDefinition.new( nesting.dup, - name, + names.last, location, comments_for(node) ) - nesting << name super nesting.pop end @@ -327,6 +374,10 @@ def visit_statements(node) @statements = node super end + + def visit_var_ref(node) + node.value.value.to_sym + end end private diff --git a/test/index_test.rb b/test/index_test.rb index 6bb83881..b00b4bc6 100644 --- a/test/index_test.rb +++ b/test/index_test.rb @@ -7,14 +7,14 @@ class IndexTest < Minitest::Test def test_module index_each("module Foo; end") do |entry| assert_equal :Foo, entry.name - assert_empty entry.nesting + assert_equal [[:Foo]], entry.nesting end end def test_module_nested index_each("module Foo; module Bar; end; end") do |entry| assert_equal :Bar, entry.name - assert_equal [:Foo], entry.nesting + assert_equal [[:Foo], [:Bar]], entry.nesting end end @@ -28,14 +28,35 @@ def test_module_comments def test_class index_each("class Foo; end") do |entry| assert_equal :Foo, entry.name - assert_empty entry.nesting + assert_equal [[:Foo]], entry.nesting + end + end + + def test_class_paths_2 + index_each("class Foo::Bar; end") do |entry| + assert_equal :Bar, entry.name + assert_equal [[:Foo, :Bar]], entry.nesting + end + end + + def test_class_paths_3 + index_each("class Foo::Bar::Baz; end") do |entry| + assert_equal :Baz, entry.name + assert_equal [[:Foo, :Bar, :Baz]], entry.nesting end end def test_class_nested index_each("class Foo; class Bar; end; end") do |entry| assert_equal :Bar, entry.name - assert_equal [:Foo], entry.nesting + assert_equal [[:Foo], [:Bar]], entry.nesting + end + end + + def test_class_paths_nested + index_each("class Foo; class Bar::Baz::Qux; end; end") do |entry| + assert_equal :Qux, entry.name + assert_equal [[:Foo], [:Bar, :Baz, :Qux]], entry.nesting end end @@ -56,7 +77,7 @@ def test_method def test_method_nested index_each("class Foo; def foo; end; end") do |entry| assert_equal :foo, entry.name - assert_equal [:Foo], entry.nesting + assert_equal [[:Foo]], entry.nesting end end @@ -77,7 +98,7 @@ def test_singleton_method def test_singleton_method_nested index_each("class Foo; def self.foo; end; end") do |entry| assert_equal :foo, entry.name - assert_equal [:Foo], entry.nesting + assert_equal [[:Foo]], entry.nesting end end From 2d5f9fc2d4af804662b470c64fe0479277a4b88c Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Tue, 21 Feb 2023 10:16:03 -0500 Subject: [PATCH 07/15] Handle superclasses --- lib/syntax_tree/index.rb | 56 ++++++++++++++++++++++++++++++---------- test/index_test.rb | 30 +++++++++++++++++++++ 2 files changed, 72 insertions(+), 14 deletions(-) diff --git a/lib/syntax_tree/index.rb b/lib/syntax_tree/index.rb index c067606f..c2850f6a 100644 --- a/lib/syntax_tree/index.rb +++ b/lib/syntax_tree/index.rb @@ -20,11 +20,12 @@ def initialize(line, column) # This entry represents a class definition using the class keyword. class ClassDefinition - attr_reader :nesting, :name, :location, :comments + attr_reader :nesting, :name, :superclass, :location, :comments - def initialize(nesting, name, location, comments) + def initialize(nesting, name, superclass, location, comments) @nesting = nesting @name = name + @superclass = superclass @location = location @comments = comments end @@ -182,7 +183,7 @@ def find_constant_path(insns, index) if insn.is_a?(Array) && insn[0] == :opt_getconstant_path # In this case we're on Ruby 3.2+ and we have an opt_getconstant_path # instruction, so we already know all of the symbols in the nesting. - insn[1] + [index - 1, insn[1]] elsif insn.is_a?(Symbol) && insn.match?(/\Alabel_\d+/) # Otherwise, if we have a label then this is very likely the # destination of an opt_getinlinecache instruction, in which case @@ -195,7 +196,9 @@ def find_constant_path(insns, index) index -= 1 end - names + [index - 1, names] + else + [index, []] end end @@ -213,7 +216,24 @@ def index_iseq(iseq, file_comments) _, name, class_iseq, flags = insn next_nesting = current_nesting.dup - if (nesting = find_constant_path(insns, index - 2)) + # This is the index we're going to search for the nested constant + # path within the declaration name. + constant_index = index - 2 + + # This is the superclass of the class being defined. + superclass = [] + + # If there is a superclass, then we're going to find it here and + # then update the constant_index as necessary. + if flags & VM_DEFINECLASS_FLAG_HAS_SUPERCLASS > 0 + constant_index, superclass = find_constant_path(insns, index - 1) + + if superclass.empty? + raise NotImplementedError, "superclass with non constant path" + end + end + + if (_, nesting = find_constant_path(insns, constant_index)) # If there is a constant path in the class name, then we need to # handle that by updating the nesting. next_nesting << (nesting << name) @@ -243,6 +263,7 @@ def index_iseq(iseq, file_comments) results << ClassDefinition.new( next_nesting, name, + superclass, location, EntryComments.new(file_comments, location) ) @@ -299,9 +320,23 @@ def visit_class(node) location = Location.new(node.location.start_line, node.location.start_column) + superclass = + if node.superclass + visited = visit(node.superclass) + + if visited == [[]] + raise NotImplementedError, "superclass with non constant path" + end + + visited + else + [] + end + results << ClassDefinition.new( nesting.dup, names.last, + superclass, location, comments_for(node) ) @@ -315,14 +350,7 @@ def visit_const_ref(node) end def visit_const_path_ref(node) - names = - if node.parent.is_a?(ConstPathRef) - visit(node.parent) - else - [visit(node.parent)] - end - - names << node.constant.value.to_sym + visit(node.parent) << node.constant.value.to_sym end def visit_def(node) @@ -376,7 +404,7 @@ def visit_statements(node) end def visit_var_ref(node) - node.value.value.to_sym + [node.value.value.to_sym] end end diff --git a/test/index_test.rb b/test/index_test.rb index b00b4bc6..9101870b 100644 --- a/test/index_test.rb +++ b/test/index_test.rb @@ -60,6 +60,36 @@ def test_class_paths_nested end end + def test_class_superclass + index_each("class Foo < Bar; end") do |entry| + assert_equal :Foo, entry.name + assert_equal [[:Foo]], entry.nesting + assert_equal [:Bar], entry.superclass + end + end + + def test_class_path_superclass + index_each("class Foo::Bar < Baz::Qux; end") do |entry| + assert_equal :Bar, entry.name + assert_equal [[:Foo, :Bar]], entry.nesting + assert_equal [:Baz, :Qux], entry.superclass + end + end + + def test_class_path_superclass_unknown + source = "class Foo < bar; end" + + assert_raises NotImplementedError do + Index.index(source, backend: Index::ParserBackend.new) + end + + if defined?(RubyVM::InstructionSequence) + assert_raises NotImplementedError do + Index.index(source, backend: Index::ISeqBackend.new) + end + end + end + def test_class_comments index_each("# comment1\n# comment2\nclass Foo; end") do |entry| assert_equal :Foo, entry.name From a886179e15831e22f958c859fec4456a48eddcc8 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Tue, 21 Feb 2023 10:43:08 -0500 Subject: [PATCH 08/15] Handle line numbers in constant searching --- lib/syntax_tree/index.rb | 28 +++++++++++++++++++++++----- test/index_test.rb | 10 +++++----- 2 files changed, 28 insertions(+), 10 deletions(-) diff --git a/lib/syntax_tree/index.rb b/lib/syntax_tree/index.rb index c2850f6a..c6973847 100644 --- a/lib/syntax_tree/index.rb +++ b/lib/syntax_tree/index.rb @@ -178,6 +178,7 @@ def location_for(iseq) end def find_constant_path(insns, index) + index -= 1 while insns[index].is_a?(Integer) insn = insns[index] if insn.is_a?(Array) && insn[0] == :opt_getconstant_path @@ -191,8 +192,12 @@ def find_constant_path(insns, index) names = [] index -= 1 - until insns[index][0] == :opt_getinlinecache - names.unshift(insns[index][1]) if insns[index][0] == :getconstant + until insns[index].is_a?(Array) && + insns[index][0] == :opt_getinlinecache + if insns[index].is_a?(Array) && insns[index][0] == :getconstant + names.unshift(insns[index][1]) + end + index -= 1 end @@ -207,9 +212,20 @@ def index_iseq(iseq, file_comments) queue = [[iseq, []]] while (current_iseq, current_nesting = queue.shift) + line = current_iseq[8] insns = current_iseq[13] + insns.each_with_index do |insn, index| - next unless insn.is_a?(Array) + case insn + when Integer + line = insn + next + when Array + # continue on + else + # skip everything else + next + end case insn[0] when :defineclass @@ -226,10 +242,12 @@ def index_iseq(iseq, file_comments) # If there is a superclass, then we're going to find it here and # then update the constant_index as necessary. if flags & VM_DEFINECLASS_FLAG_HAS_SUPERCLASS > 0 - constant_index, superclass = find_constant_path(insns, index - 1) + constant_index, superclass = + find_constant_path(insns, index - 1) if superclass.empty? - raise NotImplementedError, "superclass with non constant path" + raise NotImplementedError, + "superclass with non constant path on line #{line}" end end diff --git a/test/index_test.rb b/test/index_test.rb index 9101870b..60c51d9d 100644 --- a/test/index_test.rb +++ b/test/index_test.rb @@ -35,14 +35,14 @@ def test_class def test_class_paths_2 index_each("class Foo::Bar; end") do |entry| assert_equal :Bar, entry.name - assert_equal [[:Foo, :Bar]], entry.nesting + assert_equal [%i[Foo Bar]], entry.nesting end end def test_class_paths_3 index_each("class Foo::Bar::Baz; end") do |entry| assert_equal :Baz, entry.name - assert_equal [[:Foo, :Bar, :Baz]], entry.nesting + assert_equal [%i[Foo Bar Baz]], entry.nesting end end @@ -56,7 +56,7 @@ def test_class_nested def test_class_paths_nested index_each("class Foo; class Bar::Baz::Qux; end; end") do |entry| assert_equal :Qux, entry.name - assert_equal [[:Foo], [:Bar, :Baz, :Qux]], entry.nesting + assert_equal [[:Foo], %i[Bar Baz Qux]], entry.nesting end end @@ -71,8 +71,8 @@ def test_class_superclass def test_class_path_superclass index_each("class Foo::Bar < Baz::Qux; end") do |entry| assert_equal :Bar, entry.name - assert_equal [[:Foo, :Bar]], entry.nesting - assert_equal [:Baz, :Qux], entry.superclass + assert_equal [%i[Foo Bar]], entry.nesting + assert_equal %i[Baz Qux], entry.superclass end end From e68e3f6e34c8ff7cde4ec69bd45d8a5af72b418f Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Tue, 21 Feb 2023 10:45:42 -0500 Subject: [PATCH 09/15] Document indexing --- README.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/README.md b/README.md index 500d5fad..03942d46 100644 --- a/README.md +++ b/README.md @@ -29,6 +29,7 @@ It is built with only standard library dependencies. It additionally ships with - [SyntaxTree.format(source)](#syntaxtreeformatsource) - [SyntaxTree.mutation(&block)](#syntaxtreemutationblock) - [SyntaxTree.search(source, query, &block)](#syntaxtreesearchsource-query-block) + - [SyntaxTree.index(source)](#syntaxtreeindexsource) - [Nodes](#nodes) - [child_nodes](#child_nodes) - [copy(**attrs)](#copyattrs) @@ -347,6 +348,10 @@ This function yields a new mutation visitor to the block, and then returns the i This function takes an input string containing Ruby code, an input string containing a valid Ruby `in` clause expression that can be used to match against nodes in the tree (can be generated using `stree expr`, `stree match`, or `Node#construct_keys`), and a block. Each node that matches the given query will be yielded to the block. The block will receive the node as its only argument. +### SyntaxTree.index(source) + +This function takes an input string containing Ruby code and returns a list of all of the class declarations, module declarations, and method definitions within a file. Each of the entries also has access to its associated comments. This is useful for generating documentation or index information for a file to support something like go-to-definition. + ## Nodes There are many different node types in the syntax tree. They are meant to be treated as immutable structs containing links to child nodes with minimal logic contained within their implementation. However, for the most part they all respond to a certain set of APIs, listed below. From 4cb8b9bb6745c6512bc34f12dd13c57d08b8a1d0 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Tue, 21 Feb 2023 10:50:24 -0500 Subject: [PATCH 10/15] Changelog for indexing --- CHANGELOG.md | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 34c40e40..3548fa6e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,14 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/) a ## [Unreleased] +### Added + +- The class declarations returned as the result of the indexing operation now have their superclass as a field. It is returned as an array of constants. If the superclass is anything other than a constant lookup, then it raises an error. + +### Changed + +- The `nesting` field on the results of the indexing operation is no longer a single flat array. Instead it is an array of arrays, where each array is a single nesting level. This more accurately reflects the nesting of the nodes in the tree. For example, `class Foo::Bar::Baz; end` would result in `[Foo, Bar, Baz]`, but that incorrectly implies that you can see constants at each of those levels. Now this would result in `[[Foo, Bar, Baz]]` to indicate that it can see either the top level or constants within the scope of `Foo::Bar::Baz` only. + ## [6.0.0] - 2023-02-10 ### Added From 3742be00e332b9910c6c0ebcf693c589e5c5da54 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 23 Feb 2023 18:00:40 +0000 Subject: [PATCH 11/15] Bump rubocop from 1.45.1 to 1.46.0 Bumps [rubocop](https://github.com/rubocop/rubocop) from 1.45.1 to 1.46.0. - [Release notes](https://github.com/rubocop/rubocop/releases) - [Changelog](https://github.com/rubocop/rubocop/blob/master/CHANGELOG.md) - [Commits](https://github.com/rubocop/rubocop/compare/v1.45.1...v1.46.0) --- updated-dependencies: - dependency-name: rubocop dependency-type: direct:development update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] --- Gemfile.lock | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/Gemfile.lock b/Gemfile.lock index 325d89b3..1995351b 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -12,25 +12,25 @@ GEM json (2.6.3) minitest (5.17.0) parallel (1.22.1) - parser (3.2.0.0) + parser (3.2.1.0) ast (~> 2.4.1) prettier_print (1.2.0) rainbow (3.1.1) rake (13.0.6) - regexp_parser (2.6.2) + regexp_parser (2.7.0) rexml (3.2.5) - rubocop (1.45.1) + rubocop (1.46.0) json (~> 2.3) parallel (~> 1.10) parser (>= 3.2.0.0) rainbow (>= 2.2.2, < 4.0) regexp_parser (>= 1.8, < 3.0) rexml (>= 3.2.5, < 4.0) - rubocop-ast (>= 1.24.1, < 2.0) + rubocop-ast (>= 1.26.0, < 2.0) ruby-progressbar (~> 1.7) unicode-display_width (>= 2.4.0, < 3.0) - rubocop-ast (1.24.1) - parser (>= 3.1.1.0) + rubocop-ast (1.26.0) + parser (>= 3.2.1.0) ruby-progressbar (1.11.0) simplecov (0.22.0) docile (~> 1.1) From 2993e27af7a87e369a7b6df0de6bd2fa646acafb Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Sun, 26 Feb 2023 12:38:22 -0500 Subject: [PATCH 12/15] Handle assoc value omission with mixed delimiters --- CHANGELOG.md | 1 + lib/syntax_tree/node.rb | 37 ++++++++++++++++++++++++++----------- test/fixtures/hash.rb | 2 ++ 3 files changed, 29 insertions(+), 11 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 3548fa6e..27b1813f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,6 +13,7 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/) a ### Changed - The `nesting` field on the results of the indexing operation is no longer a single flat array. Instead it is an array of arrays, where each array is a single nesting level. This more accurately reflects the nesting of the nodes in the tree. For example, `class Foo::Bar::Baz; end` would result in `[Foo, Bar, Baz]`, but that incorrectly implies that you can see constants at each of those levels. Now this would result in `[[Foo, Bar, Baz]]` to indicate that it can see either the top level or constants within the scope of `Foo::Bar::Baz` only. +- When formatting hashes that have omitted values and mixed hash rockets with labels, the formatting now maintains whichever delimiter was used in the source. This is because forcing the use of hash rockets with omitted values results in a syntax error. ## [6.0.0] - 2023-02-10 diff --git a/lib/syntax_tree/node.rb b/lib/syntax_tree/node.rb index 567ec0c8..dd4eb3ab 100644 --- a/lib/syntax_tree/node.rb +++ b/lib/syntax_tree/node.rb @@ -1780,13 +1780,25 @@ def format_key(q, key) end def self.for(container) - labels = - container.assocs.all? do |assoc| - next true if assoc.is_a?(AssocSplat) - + container.assocs.each do |assoc| + if assoc.is_a?(AssocSplat) + # Splat nodes do not impact the formatting choice. + elsif assoc.value.nil? + # If the value is nil, then it has been omitted. In this case we have + # to match the existing formatting because standardizing would + # potentially break the code. For example: + # + # { first:, "second" => "value" } + # + return Identity.new + else + # Otherwise, we need to check the type of the key. If it's a label or + # dynamic symbol, we can use labels. If it's a symbol literal then it + # needs to match a certain pattern to be used as a label. If it's + # anything else, then we need to use hash rockets. case assoc.key - when Label - true + when Label, DynaSymbol + # Here labels can be used. when SymbolLiteral # When attempting to convert a hash rocket into a hash label, # you need to take care because only certain patterns are @@ -1794,15 +1806,18 @@ def self.for(container) # arguments to methods, but don't specify what that is. After # some experimentation, it looks like it's: value = assoc.key.value.value - value.match?(/^[_A-Za-z]/) && !value.end_with?("=") - when DynaSymbol - true + + if !value.match?(/^[_A-Za-z]/) || value.end_with?("=") + return Rockets.new + end else - false + # If the value is anything else, we have to use hash rockets. + return Rockets.new end end + end - (labels ? Labels : Rockets).new + Labels.new end end diff --git a/test/fixtures/hash.rb b/test/fixtures/hash.rb index 9c43a4fe..70e89f69 100644 --- a/test/fixtures/hash.rb +++ b/test/fixtures/hash.rb @@ -29,3 +29,5 @@ { # comment } +% # >= 3.1.0 +{ foo:, "bar" => "baz" } From b0ba92edf5fc371243cb1875c892387515816532 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Sun, 26 Feb 2023 12:57:03 -0500 Subject: [PATCH 13/15] Handle keywords with bare hashes --- CHANGELOG.md | 1 + lib/syntax_tree/node.rb | 10 +++++++++- test/fixtures/break.rb | 2 ++ test/fixtures/next.rb | 2 ++ test/fixtures/return.rb | 2 ++ 5 files changed, 16 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 27b1813f..d3fd9964 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,6 +14,7 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/) a - The `nesting` field on the results of the indexing operation is no longer a single flat array. Instead it is an array of arrays, where each array is a single nesting level. This more accurately reflects the nesting of the nodes in the tree. For example, `class Foo::Bar::Baz; end` would result in `[Foo, Bar, Baz]`, but that incorrectly implies that you can see constants at each of those levels. Now this would result in `[[Foo, Bar, Baz]]` to indicate that it can see either the top level or constants within the scope of `Foo::Bar::Baz` only. - When formatting hashes that have omitted values and mixed hash rockets with labels, the formatting now maintains whichever delimiter was used in the source. This is because forcing the use of hash rockets with omitted values results in a syntax error. +- Handle the case where a bare hash is used after the `break`, `next`, or `return` keywords. Previously this would result in hash labels which is not valid syntax. Now it maintains the delimiters used in the source. ## [6.0.0] - 2023-02-10 diff --git a/lib/syntax_tree/node.rb b/lib/syntax_tree/node.rb index dd4eb3ab..2dbe3a79 100644 --- a/lib/syntax_tree/node.rb +++ b/lib/syntax_tree/node.rb @@ -1874,7 +1874,15 @@ def ===(other) end def format_key(q, key) - (@key_formatter ||= HashKeyFormatter.for(self)).format_key(q, key) + @key_formatter ||= + case q.parents.take(3).last + when Break, Next, ReturnNode + HashKeyFormatter::Identity.new + else + HashKeyFormatter.for(self) + end + + @key_formatter.format_key(q, key) end end diff --git a/test/fixtures/break.rb b/test/fixtures/break.rb index a608a6b2..519becda 100644 --- a/test/fixtures/break.rb +++ b/test/fixtures/break.rb @@ -33,3 +33,5 @@ qux end ) +% +break :foo => "bar" diff --git a/test/fixtures/next.rb b/test/fixtures/next.rb index 79a8c62e..66e90028 100644 --- a/test/fixtures/next.rb +++ b/test/fixtures/next.rb @@ -72,3 +72,5 @@ fun foo do end ) +% +next :foo => "bar" diff --git a/test/fixtures/return.rb b/test/fixtures/return.rb index 8f7d0aa3..7092464f 100644 --- a/test/fixtures/return.rb +++ b/test/fixtures/return.rb @@ -37,3 +37,5 @@ return [] % return [1] +% +return :foo => "bar" From 7dcc84396bf196b24b37165b9d38e6cde46265be Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Sun, 26 Feb 2023 13:11:52 -0500 Subject: [PATCH 14/15] Split up chained << expressions --- CHANGELOG.md | 1 + lib/syntax_tree/node.rb | 16 ++++++++++++---- test/fixtures/binary.rb | 5 +++++ 3 files changed, 18 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index d3fd9964..bb8425bd 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -15,6 +15,7 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/) a - The `nesting` field on the results of the indexing operation is no longer a single flat array. Instead it is an array of arrays, where each array is a single nesting level. This more accurately reflects the nesting of the nodes in the tree. For example, `class Foo::Bar::Baz; end` would result in `[Foo, Bar, Baz]`, but that incorrectly implies that you can see constants at each of those levels. Now this would result in `[[Foo, Bar, Baz]]` to indicate that it can see either the top level or constants within the scope of `Foo::Bar::Baz` only. - When formatting hashes that have omitted values and mixed hash rockets with labels, the formatting now maintains whichever delimiter was used in the source. This is because forcing the use of hash rockets with omitted values results in a syntax error. - Handle the case where a bare hash is used after the `break`, `next`, or `return` keywords. Previously this would result in hash labels which is not valid syntax. Now it maintains the delimiters used in the source. +- The `<<` operator will now break on chained `<<` expressions. Previously it would always stay flat. ## [6.0.0] - 2023-02-10 diff --git a/lib/syntax_tree/node.rb b/lib/syntax_tree/node.rb index 2dbe3a79..c4bc1495 100644 --- a/lib/syntax_tree/node.rb +++ b/lib/syntax_tree/node.rb @@ -2097,10 +2097,7 @@ def format(q) q.group { q.format(left) } q.text(" ") unless power - if operator == :<< - q.text("<< ") - q.format(right) - else + if operator != :<< q.group do q.text(operator.name) q.indent do @@ -2108,6 +2105,17 @@ def format(q) q.format(right) end end + elsif left.is_a?(Binary) && left.operator == :<< + q.group do + q.text(operator.name) + q.indent do + power ? q.breakable_empty : q.breakable_space + q.format(right) + end + end + else + q.text("<< ") + q.format(right) end end end diff --git a/test/fixtures/binary.rb b/test/fixtures/binary.rb index f8833cdc..4cb56cbf 100644 --- a/test/fixtures/binary.rb +++ b/test/fixtures/binary.rb @@ -3,6 +3,11 @@ % foo << bar % +foo << barrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrr << barrrrrrrrrrrrr << barrrrrrrrrrrrrrrrrr +- +foo << barrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrr << barrrrrrrrrrrrr << + barrrrrrrrrrrrrrrrrr +% foo**bar % foo * barrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrr From ff9094ac1364e78041872b38b642e4e1b5e21a1e Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Sun, 26 Feb 2023 14:15:34 -0500 Subject: [PATCH 15/15] Bump to v6.0.1 --- CHANGELOG.md | 5 ++++- Gemfile.lock | 2 +- lib/syntax_tree/version.rb | 2 +- 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index bb8425bd..018d5b25 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,8 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/) a ## [Unreleased] +## [6.0.1] - 2023-02-26 + ### Added - The class declarations returned as the result of the indexing operation now have their superclass as a field. It is returned as an array of constants. If the superclass is anything other than a constant lookup, then it raises an error. @@ -570,7 +572,8 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/) a - 🎉 Initial release! 🎉 -[unreleased]: https://github.com/ruby-syntax-tree/syntax_tree/compare/v6.0.0...HEAD +[unreleased]: https://github.com/ruby-syntax-tree/syntax_tree/compare/v6.0.1...HEAD +[6.0.1]: https://github.com/ruby-syntax-tree/syntax_tree/compare/v6.0.0...v6.0.1 [6.0.0]: https://github.com/ruby-syntax-tree/syntax_tree/compare/v5.3.0...v6.0.0 [5.3.0]: https://github.com/ruby-syntax-tree/syntax_tree/compare/v5.2.0...v5.3.0 [5.2.0]: https://github.com/ruby-syntax-tree/syntax_tree/compare/v5.1.0...v5.2.0 diff --git a/Gemfile.lock b/Gemfile.lock index 1995351b..c7ffc7d0 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -1,7 +1,7 @@ PATH remote: . specs: - syntax_tree (6.0.0) + syntax_tree (6.0.1) prettier_print (>= 1.2.0) GEM diff --git a/lib/syntax_tree/version.rb b/lib/syntax_tree/version.rb index 1f028f89..0b3502d1 100644 --- a/lib/syntax_tree/version.rb +++ b/lib/syntax_tree/version.rb @@ -1,5 +1,5 @@ # frozen_string_literal: true module SyntaxTree - VERSION = "6.0.0" + VERSION = "6.0.1" end