From bc9e665798b68081c0cb14c75cb2fddc7c331d40 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Wed, 25 Jan 2023 11:38:38 -0500 Subject: [PATCH] Indexing functionality --- lib/syntax_tree.rb | 15 +++ lib/syntax_tree/index.rb | 223 +++++++++++++++++++++++++++++++++++++++ test/index_test.rb | 59 +++++++++++ 3 files changed, 297 insertions(+) create mode 100644 lib/syntax_tree/index.rb create mode 100644 test/index_test.rb diff --git a/lib/syntax_tree.rb b/lib/syntax_tree.rb index f1217ac3..f5c71aba 100644 --- a/lib/syntax_tree.rb +++ b/lib/syntax_tree.rb @@ -26,6 +26,7 @@ require_relative "syntax_tree/parser" require_relative "syntax_tree/pattern" require_relative "syntax_tree/search" +require_relative "syntax_tree/index" require_relative "syntax_tree/yarv" require_relative "syntax_tree/yarv/bf" @@ -116,4 +117,18 @@ def self.read(filepath) def self.search(source, query, &block) Search.new(Pattern.new(query).compile).scan(parse(source), &block) end + + # Indexes the given source code to return a list of all class, module, and + # method definitions. Used to quickly provide indexing capability for IDEs or + # documentation generation. + def self.index(source) + Index.index(source) + end + + # Indexes the given file to return a list of all class, module, and method + # definitions. Used to quickly provide indexing capability for IDEs or + # documentation generation. + def self.index_file(filepath) + Index.index_file(filepath) + end end diff --git a/lib/syntax_tree/index.rb b/lib/syntax_tree/index.rb new file mode 100644 index 00000000..60158314 --- /dev/null +++ b/lib/syntax_tree/index.rb @@ -0,0 +1,223 @@ +# frozen_string_literal: true + +module SyntaxTree + # This class can be used to build an index of the structure of Ruby files. We + # define an index as the list of constants and methods defined within a file. + # + # This index strives to be as fast as possible to better support tools like + # IDEs. Because of that, it has different backends depending on what + # functionality is available. + module Index + # This is a location for an index entry. + class Location + attr_reader :line, :column + + def initialize(line, column) + @line = line + @column = column + end + end + + # This entry represents a class definition using the class keyword. + class ClassDefinition + attr_reader :nesting, :name, :location + + def initialize(nesting, name, location) + @nesting = nesting + @name = name + @location = location + end + end + + # This entry represents a module definition using the module keyword. + class ModuleDefinition + attr_reader :nesting, :name, :location + + def initialize(nesting, name, location) + @nesting = nesting + @name = name + @location = location + end + end + + # This entry represents a method definition using the def keyword. + class MethodDefinition + attr_reader :nesting, :name, :location + + def initialize(nesting, name, location) + @nesting = nesting + @name = name + @location = location + end + end + + # This entry represents a singleton method definition using the def keyword + # with a specified target. + class SingletonMethodDefinition + attr_reader :nesting, :name, :location + + def initialize(nesting, name, location) + @nesting = nesting + @name = name + @location = location + end + end + + # This backend creates the index using RubyVM::InstructionSequence, which is + # faster than using the Syntax Tree parser, but is not available on all + # runtimes. + class ISeqBackend + VM_DEFINECLASS_TYPE_CLASS = 0x00 + VM_DEFINECLASS_TYPE_SINGLETON_CLASS = 0x01 + VM_DEFINECLASS_TYPE_MODULE = 0x02 + VM_DEFINECLASS_FLAG_SCOPED = 0x08 + VM_DEFINECLASS_FLAG_HAS_SUPERCLASS = 0x10 + + def index(source) + index_iseq(RubyVM::InstructionSequence.compile(source).to_a) + end + + def index_file(filepath) + index_iseq(RubyVM::InstructionSequence.compile_file(filepath).to_a) + end + + private + + def index_iseq(iseq) + results = [] + queue = [[iseq, []]] + + while (current_iseq, current_nesting = queue.shift) + current_iseq[13].each_with_index do |insn, index| + next unless insn.is_a?(Array) + + case insn[0] + when :defineclass + _, name, class_iseq, flags = insn + + if flags == VM_DEFINECLASS_TYPE_SINGLETON_CLASS + # At the moment, we don't support singletons that aren't + # defined on self. We could, but it would require more + # emulation. + if current_iseq[13][index - 2] != [:putself] + raise NotImplementedError, + "singleton class with non-self receiver" + end + elsif flags & VM_DEFINECLASS_TYPE_MODULE > 0 + code_location = class_iseq[4][:code_location] + location = Location.new(code_location[0], code_location[1]) + results << ModuleDefinition.new(current_nesting, name, location) + else + code_location = class_iseq[4][:code_location] + location = Location.new(code_location[0], code_location[1]) + results << ClassDefinition.new(current_nesting, name, location) + end + + queue << [class_iseq, current_nesting + [name]] + when :definemethod + _, name, method_iseq = insn + + code_location = method_iseq[4][:code_location] + location = Location.new(code_location[0], code_location[1]) + results << SingletonMethodDefinition.new( + current_nesting, + name, + location + ) + when :definesmethod + _, name, method_iseq = insn + + code_location = method_iseq[4][:code_location] + location = Location.new(code_location[0], code_location[1]) + results << MethodDefinition.new(current_nesting, name, location) + end + end + end + + results + end + end + + # This backend creates the index using the Syntax Tree parser and a visitor. + # It is not as fast as using the instruction sequences directly, but is + # supported on all runtimes. + class ParserBackend + class IndexVisitor < Visitor + attr_reader :results, :nesting + + def initialize + @results = [] + @nesting = [] + end + + def visit_class(node) + name = visit(node.constant).to_sym + location = + Location.new(node.location.start_line, node.location.start_column) + + results << ClassDefinition.new(nesting.dup, name, location) + nesting << name + + super + nesting.pop + end + + def visit_const_ref(node) + node.constant.value + end + + def visit_def(node) + name = node.name.value.to_sym + location = + Location.new(node.location.start_line, node.location.start_column) + + results << if node.target.nil? + MethodDefinition.new(nesting.dup, name, location) + else + SingletonMethodDefinition.new(nesting.dup, name, location) + end + end + + def visit_module(node) + name = visit(node.constant).to_sym + location = + Location.new(node.location.start_line, node.location.start_column) + + results << ModuleDefinition.new(nesting.dup, name, location) + nesting << name + + super + nesting.pop + end + + def visit_program(node) + super + results + end + end + + def index(source) + SyntaxTree.parse(source).accept(IndexVisitor.new) + end + + def index_file(filepath) + index(SyntaxTree.read(filepath)) + end + end + + # The class defined here is used to perform the indexing, depending on what + # functionality is available from the runtime. + INDEX_BACKEND = + defined?(RubyVM::InstructionSequence) ? ISeqBackend : ParserBackend + + # This method accepts source code and then indexes it. + def self.index(source) + INDEX_BACKEND.new.index(source) + end + + # This method accepts a filepath and then indexes it. + def self.index_file(filepath) + INDEX_BACKEND.new.index_file(filepath) + end + end +end diff --git a/test/index_test.rb b/test/index_test.rb new file mode 100644 index 00000000..3ea02a20 --- /dev/null +++ b/test/index_test.rb @@ -0,0 +1,59 @@ +# frozen_string_literal: true + +require_relative "test_helper" + +module SyntaxTree + class IndexTest < Minitest::Test + def test_module + index_each("module Foo; end") do |entry| + assert_equal :Foo, entry.name + assert_empty entry.nesting + end + end + + def test_module_nested + index_each("module Foo; module Bar; end; end") do |entry| + assert_equal :Bar, entry.name + assert_equal [:Foo], entry.nesting + end + end + + def test_class + index_each("class Foo; end") do |entry| + assert_equal :Foo, entry.name + assert_empty entry.nesting + end + end + + def test_class_nested + index_each("class Foo; class Bar; end; end") do |entry| + assert_equal :Bar, entry.name + assert_equal [:Foo], entry.nesting + end + end + + def test_method + index_each("def foo; end") do |entry| + assert_equal :foo, entry.name + assert_empty entry.nesting + end + end + + def test_method_nested + index_each("class Foo; def foo; end; end") do |entry| + assert_equal :foo, entry.name + assert_equal [:Foo], entry.nesting + end + end + + private + + def index_each(source) + yield SyntaxTree::Index::ParserBackend.new.index(source).last + + if defined?(RubyVM::InstructionSequence) + yield SyntaxTree::Index::ISeqBackend.new.index(source).last + end + end + end +end