diff --git a/.gitignore b/.gitignore index 2838e82b..69755243 100644 --- a/.gitignore +++ b/.gitignore @@ -9,3 +9,4 @@ /vendor/ test.rb +query.txt diff --git a/.rubocop.yml b/.rubocop.yml index 4dbeeb33..c0892d8a 100644 --- a/.rubocop.yml +++ b/.rubocop.yml @@ -28,6 +28,9 @@ Lint/InterpolationCheck: Lint/MissingSuper: Enabled: false +Lint/RedundantRequireStatement: + Enabled: false + Lint/UnusedMethodArgument: AllowUnusedKeywordArguments: true diff --git a/CHANGELOG.md b/CHANGELOG.md index 46f47ec9..bbaf044e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,23 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/) a ## [Unreleased] +## [4.2.0] - 2022-10-25 + +### Added + +- [#182](https://github.com/ruby-syntax-tree/syntax_tree/pull/182) - The new `stree expr` CLI command will function similarly to the `stree match` CLI command except that it only outputs the first expression of the program. +- [#182](https://github.com/ruby-syntax-tree/syntax_tree/pull/182) - Added the `SyntaxTree::Pattern` class for compiling `in` expressions into procs. + +### Changed + +- [#182](https://github.com/ruby-syntax-tree/syntax_tree/pull/182) - Much more syntax is now supported by the search command. + +## [4.1.0] - 2022-10-24 + +### Added + +- [#180](https://github.com/ruby-syntax-tree/syntax_tree/pull/180) - The new `stree search` CLI command and the corresponding `SyntaxTree::Search` class for searching for a pattern against a given syntax tree. + ## [4.0.2] - 2022-10-19 ### Changed @@ -397,7 +414,9 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/) a - 🎉 Initial release! 🎉 -[unreleased]: https://github.com/ruby-syntax-tree/syntax_tree/compare/v4.0.2...HEAD +[unreleased]: https://github.com/ruby-syntax-tree/syntax_tree/compare/v4.2.0...HEAD +[4.2.0]: https://github.com/ruby-syntax-tree/syntax_tree/compare/v4.1.0...v4.2.0 +[4.1.0]: https://github.com/ruby-syntax-tree/syntax_tree/compare/v4.0.2...v4.1.0 [4.0.2]: https://github.com/ruby-syntax-tree/syntax_tree/compare/v4.0.1...v4.0.2 [4.0.1]: https://github.com/ruby-syntax-tree/syntax_tree/compare/v4.0.0...v4.0.1 [4.0.0]: https://github.com/ruby-syntax-tree/syntax_tree/compare/v3.6.3...v4.0.0 diff --git a/Gemfile.lock b/Gemfile.lock index abe983b2..339de160 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -1,7 +1,7 @@ PATH remote: . specs: - syntax_tree (4.0.2) + syntax_tree (4.2.0) prettier_print (>= 1.0.2) GEM @@ -19,17 +19,17 @@ GEM rake (13.0.6) regexp_parser (2.6.0) rexml (3.2.5) - rubocop (1.36.0) + rubocop (1.37.1) json (~> 2.3) parallel (~> 1.10) parser (>= 3.1.2.1) rainbow (>= 2.2.2, < 4.0) regexp_parser (>= 1.8, < 3.0) rexml (>= 3.2.5, < 4.0) - rubocop-ast (>= 1.20.1, < 2.0) + rubocop-ast (>= 1.23.0, < 2.0) ruby-progressbar (~> 1.7) unicode-display_width (>= 1.4.0, < 3.0) - rubocop-ast (1.22.0) + rubocop-ast (1.23.0) parser (>= 3.1.1.0) ruby-progressbar (1.11.0) simplecov (0.21.2) diff --git a/README.md b/README.md index 30c35ac8..368c9361 100644 --- a/README.md +++ b/README.md @@ -15,9 +15,11 @@ It is built with only standard library dependencies. It additionally ships with - [CLI](#cli) - [ast](#ast) - [check](#check) + - [expr](#expr) - [format](#format) - [json](#json) - [match](#match) + - [search](#search) - [write](#write) - [Configuration](#configuration) - [Globbing](#globbing) @@ -25,6 +27,7 @@ It is built with only standard library dependencies. It additionally ships with - [SyntaxTree.read(filepath)](#syntaxtreereadfilepath) - [SyntaxTree.parse(source)](#syntaxtreeparsesource) - [SyntaxTree.format(source)](#syntaxtreeformatsource) + - [SyntaxTree.search(source, query, &block)](#syntaxtreesearchsource-query-block) - [Nodes](#nodes) - [child_nodes](#child_nodes) - [Pattern matching](#pattern-matching) @@ -128,6 +131,24 @@ To change the print width that you are checking against, specify the `--print-wi stree check --print-width=100 path/to/file.rb ``` +### expr + +This command will output a Ruby case-match expression that would match correctly against the first expression of the input. + +```sh +stree expr path/to/file.rb +``` + +For a file that contains `1 + 1`, you will receive: + +```ruby +SyntaxTree::Binary[ + left: SyntaxTree::Int[value: "1"], + operator: :+, + right: SyntaxTree::Int[value: "1"] +] +``` + ### format This command will output the formatted version of each of the listed files. Importantly, it will not write that content back to the source files. It is meant to display the formatted version only. @@ -215,6 +236,29 @@ SyntaxTree::Program[ ] ``` +### search + +This command will search the given filepaths against the specified pattern to find nodes that match. The pattern is a Ruby pattern-matching expression that is matched against each node in the tree. It can optionally be loaded from a file if you specify a filepath as the pattern argument. + +```sh +stree search VarRef path/to/file.rb +``` + +For a file that contains `Foo + Bar` you will receive: + +``` +path/to/file.rb:1:0: Foo + Bar +path/to/file.rb:1:6: Foo + Bar +``` + +If you put `VarRef` into a file instead (for example, `query.txt`), you would instead run: + +```sh +stree search query.txt path/to/file.rb +``` + +Note that the output of the `match` CLI command creates a valid pattern that can be used as the input for this command. + ### write This command will format the listed files and write that formatted version back to the source files. Note that this overwrites the original content, to be sure to be using a version control system. @@ -288,6 +332,10 @@ This function takes an input string containing Ruby code and returns the syntax This function takes an input string containing Ruby code, parses it into its underlying syntax tree, and formats it back out to a string. You can optionally pass a second argument to this method as well that is the maximum width to print. It defaults to `80`. +### SyntaxTree.search(source, query, &block) + +This function takes an input string containing Ruby code, an input string containing a valid Ruby `in` clause expression that can be used to match against nodes in the tree (can be generated using `stree expr`, `stree match`, or `Node#construct_keys`), and a block. Each node that matches the given query will be yielded to the block. The block will receive the node as its only argument. + ## Nodes There are many different node types in the syntax tree. They are meant to be treated as immutable structs containing links to child nodes with minimal logic contained within their implementation. However, for the most part they all respond to a certain set of APIs, listed below. diff --git a/lib/syntax_tree.rb b/lib/syntax_tree.rb index fbd4fcef..df2f43a9 100644 --- a/lib/syntax_tree.rb +++ b/lib/syntax_tree.rb @@ -21,6 +21,8 @@ require_relative "syntax_tree/visitor/with_environment" require_relative "syntax_tree/parser" +require_relative "syntax_tree/pattern" +require_relative "syntax_tree/search" # Syntax Tree is a suite of tools built on top of the internal CRuby parser. It # provides the ability to generate a syntax tree from source, as well as the @@ -73,4 +75,10 @@ def self.read(filepath) File.read(filepath, encoding: encoding) end + + # Searches through the given source using the given pattern and yields each + # node in the tree that matches the pattern to the given block. + def self.search(source, query, &block) + Search.new(Pattern.new(query).compile).scan(parse(source), &block) + end end diff --git a/lib/syntax_tree/cli.rb b/lib/syntax_tree/cli.rb index b839d562..b847e059 100644 --- a/lib/syntax_tree/cli.rb +++ b/lib/syntax_tree/cli.rb @@ -188,6 +188,20 @@ def run(item) end end + # An action of the CLI that outputs a pattern-matching Ruby expression that + # would match the first expression of the input given. + class Expr < Action + def run(item) + case item.handler.parse(item.source) + in Program[statements: Statements[body: [expression]]] + puts expression.construct_keys + else + warn("The input to `stree expr` must be a single expression.") + exit(1) + end + end + end + # An action of the CLI that formats the input source and prints it out. class Format < Action def run(item) @@ -212,6 +226,44 @@ def run(item) end end + # An action of the CLI that searches for the given pattern matching pattern + # in the given files. + class Search < Action + attr_reader :search + + def initialize(query) + query = File.read(query) if File.readable?(query) + pattern = + begin + Pattern.new(query).compile + rescue Pattern::CompilationError => error + warn(error.message) + exit(1) + end + + @search = SyntaxTree::Search.new(pattern) + end + + def run(item) + search.scan(item.handler.parse(item.source)) do |node| + location = node.location + line = location.start_line + + bold_range = + if line == location.end_line + location.start_column...location.end_column + else + location.start_column.. + end + + source = item.source.lines[line - 1].chomp + source[bold_range] = Color.bold(source[bold_range]).to_s + + puts("#{item.filepath}:#{line}:#{location.start_column}: #{source}") + end + end + end + # An action of the CLI that formats the input source and writes the # formatted output back to the file. class Write < Action @@ -248,6 +300,10 @@ def run(item) #{Color.bold("stree doc [--plugins=...] [-e SCRIPT] FILE")} Print out the doc tree that would be used to format the given files + #{Color.bold("stree expr [-e SCRIPT] FILE")} + Print out a pattern-matching Ruby expression that would match the first + expression of the given files + #{Color.bold("stree format [--plugins=...] [--print-width=NUMBER] [-e SCRIPT] FILE")} Print out the formatted version of the given files @@ -263,6 +319,9 @@ def run(item) #{Color.bold("stree lsp [--plugins=...] [--print-width=NUMBER]")} Run syntax tree in language server mode + #{Color.bold("stree search PATTERN [-e SCRIPT] FILE")} + Search for the given pattern in the given files + #{Color.bold("stree version")} Output the current version of syntax tree @@ -400,6 +459,10 @@ def run(argv) Debug.new(options) when "doc" Doc.new(options) + when "e", "expr" + Expr.new(options) + when "f", "format" + Format.new(options) when "help" puts HELP return 0 @@ -411,8 +474,8 @@ def run(argv) return 0 when "m", "match" Match.new(options) - when "f", "format" - Format.new(options) + when "s", "search" + Search.new(arguments.shift) when "version" puts SyntaxTree::VERSION return 0 @@ -434,7 +497,7 @@ def run(argv) .glob(pattern) .each do |filepath| if File.readable?(filepath) && - options.ignore_files.none? { File.fnmatch?(_1, filepath) } + options.ignore_files.none? { File.fnmatch?(_1, filepath) } queue << FileItem.new(filepath) end end diff --git a/lib/syntax_tree/node.rb b/lib/syntax_tree/node.rb index 5162655e..aa133b7f 100644 --- a/lib/syntax_tree/node.rb +++ b/lib/syntax_tree/node.rb @@ -1657,12 +1657,12 @@ class Binary < Node # for older Ruby versions. unless :+.respond_to?(:name) using Module.new { - refine Symbol do - def name - to_s.freeze - end - end - } + refine Symbol do + def name + to_s.freeze + end + end + } end # [untyped] the left-hand side of the expression diff --git a/lib/syntax_tree/pattern.rb b/lib/syntax_tree/pattern.rb new file mode 100644 index 00000000..aa558361 --- /dev/null +++ b/lib/syntax_tree/pattern.rb @@ -0,0 +1,172 @@ +# frozen_string_literal: true + +module SyntaxTree + # A pattern is an object that wraps a Ruby pattern matching expression. The + # expression would normally be passed to an `in` clause within a `case` + # expression or a rightward assignment expression. For example, in the + # following snippet: + # + # case node + # in Const[value: "SyntaxTree"] + # end + # + # the pattern is the `Const[value: "SyntaxTree"]` expression. Within Syntax + # Tree, every node generates these kinds of expressions using the + # #construct_keys method. + # + # The pattern gets compiled into an object that responds to call by running + # the #compile method. This method itself will run back through Syntax Tree to + # parse the expression into a tree, then walk the tree to generate the + # necessary callable objects. For example, if you wanted to compile the + # expression above into a callable, you would: + # + # callable = SyntaxTree::Pattern.new("Const[value: 'SyntaxTree']").compile + # callable.call(node) + # + # The callable object returned by #compile is guaranteed to respond to #call + # with a single argument, which is the node to match against. It also is + # guaranteed to respond to #===, which means it itself can be used in a `case` + # expression, as in: + # + # case node + # when callable + # end + # + # If the query given to the initializer cannot be compiled into a valid + # matcher (either because of a syntax error or because it is using syntax we + # do not yet support) then a SyntaxTree::Pattern::CompilationError will be + # raised. + class Pattern + # Raised when the query given to a pattern is either invalid Ruby syntax or + # is using syntax that we don't yet support. + class CompilationError < StandardError + def initialize(repr) + super(<<~ERROR) + Syntax Tree was unable to compile the pattern you provided to search + into a usable expression. It failed on to understand the node + represented by: + + #{repr} + + Note that not all syntax supported by Ruby's pattern matching syntax + is also supported by Syntax Tree's code search. If you're using some + syntax that you believe should be supported, please open an issue on + GitHub at https://github.com/ruby-syntax-tree/syntax_tree/issues/new. + ERROR + end + end + + attr_reader :query + + def initialize(query) + @query = query + end + + def compile + program = + begin + SyntaxTree.parse("case nil\nin #{query}\nend") + rescue Parser::ParseError + raise CompilationError, query + end + + compile_node(program.statements.body.first.consequent.pattern) + end + + private + + def combine_and(left, right) + ->(node) { left.call(node) && right.call(node) } + end + + def combine_or(left, right) + ->(node) { left.call(node) || right.call(node) } + end + + def compile_node(root) + case root + in AryPtn[constant:, requireds:, rest: nil, posts: []] + compiled_constant = compile_node(constant) if constant + + preprocessed = requireds.map { |required| compile_node(required) } + + compiled_requireds = ->(node) do + deconstructed = node.deconstruct + + deconstructed.length == preprocessed.length && + preprocessed + .zip(deconstructed) + .all? { |(matcher, value)| matcher.call(value) } + end + + if compiled_constant + combine_and(compiled_constant, compiled_requireds) + else + compiled_requireds + end + in Binary[left:, operator: :|, right:] + combine_or(compile_node(left), compile_node(right)) + in Const[value:] if SyntaxTree.const_defined?(value) + clazz = SyntaxTree.const_get(value) + + ->(node) { node.is_a?(clazz) } + in Const[value:] if Object.const_defined?(value) + clazz = Object.const_get(value) + + ->(node) { node.is_a?(clazz) } + in ConstPathRef[ + parent: VarRef[value: Const[value: "SyntaxTree"]], constant: + ] + compile_node(constant) + in DynaSymbol[parts: []] + symbol = :"" + + ->(node) { node == symbol } + in DynaSymbol[parts: [TStringContent[value:]]] + symbol = value.to_sym + + ->(attribute) { attribute == value } + in HshPtn[constant:, keywords:, keyword_rest: nil] + compiled_constant = compile_node(constant) + + preprocessed = + keywords.to_h do |keyword, value| + raise NoMatchingPatternError unless keyword.is_a?(Label) + [keyword.value.chomp(":").to_sym, compile_node(value)] + end + + compiled_keywords = ->(node) do + deconstructed = node.deconstruct_keys(preprocessed.keys) + + preprocessed.all? do |keyword, matcher| + matcher.call(deconstructed[keyword]) + end + end + + if compiled_constant + combine_and(compiled_constant, compiled_keywords) + else + compiled_keywords + end + in RegexpLiteral[parts: [TStringContent[value:]]] + regexp = /#{value}/ + + ->(attribute) { regexp.match?(attribute) } + in StringLiteral[parts: []] + ->(attribute) { attribute == "" } + in StringLiteral[parts: [TStringContent[value:]]] + ->(attribute) { attribute == value } + in SymbolLiteral[value:] + symbol = value.value.to_sym + + ->(attribute) { attribute == symbol } + in VarRef[value: Const => value] + compile_node(value) + in VarRef[value: Kw[value: "nil"]] + ->(attribute) { attribute.nil? } + end + rescue NoMatchingPatternError + raise CompilationError, PP.pp(root, +"").chomp + end + end +end diff --git a/lib/syntax_tree/search.rb b/lib/syntax_tree/search.rb new file mode 100644 index 00000000..9fd52ba1 --- /dev/null +++ b/lib/syntax_tree/search.rb @@ -0,0 +1,26 @@ +# frozen_string_literal: true + +module SyntaxTree + # Provides an interface for searching for a pattern of nodes against a + # subtree of an AST. + class Search + attr_reader :pattern + + def initialize(pattern) + @pattern = pattern + end + + def scan(root) + return to_enum(__method__, root) unless block_given? + queue = [root] + + until queue.empty? + node = queue.shift + next unless node + + yield node if pattern.call(node) + queue += node.child_nodes + end + end + end +end diff --git a/lib/syntax_tree/version.rb b/lib/syntax_tree/version.rb index 98d461df..0b68a850 100644 --- a/lib/syntax_tree/version.rb +++ b/lib/syntax_tree/version.rb @@ -1,5 +1,5 @@ # frozen_string_literal: true module SyntaxTree - VERSION = "4.0.2" + VERSION = "4.2.0" end diff --git a/test/cli_test.rb b/test/cli_test.rb index 03293333..b4ef0afc 100644 --- a/test/cli_test.rb +++ b/test/cli_test.rb @@ -79,6 +79,11 @@ def test_doc assert_includes(result.stdio, "test") end + def test_expr + result = run_cli("expr") + assert_includes(result.stdio, "SyntaxTree::Ident") + end + def test_format result = run_cli("format") assert_equal("test\n", result.stdio) @@ -94,6 +99,11 @@ def test_match assert_includes(result.stdio, "SyntaxTree::Program") end + def test_search + result = run_cli("search", "VarRef", contents: "Foo + Bar") + assert_equal(2, result.stdio.lines.length) + end + def test_version result = run_cli("version") assert_includes(result.stdio, SyntaxTree::VERSION.to_s) diff --git a/test/search_test.rb b/test/search_test.rb new file mode 100644 index 00000000..314142e3 --- /dev/null +++ b/test/search_test.rb @@ -0,0 +1,62 @@ +# frozen_string_literal: true + +require_relative "test_helper" + +module SyntaxTree + class SearchTest < Minitest::Test + def test_search_binary_or + results = search("Foo + Bar + 1", "VarRef | Int") + + assert_equal 3, results.length + assert_equal "1", results.min_by { |node| node.class.name }.value + end + + def test_search_const + results = search("Foo + Bar + Baz", "VarRef") + + assert_equal 3, results.length + assert_equal %w[Bar Baz Foo], results.map { |node| node.value.value }.sort + end + + def test_search_syntax_tree_const + results = search("Foo + Bar + Baz", "SyntaxTree::VarRef") + + assert_equal 3, results.length + end + + def test_search_hash_pattern_string + results = search("Foo + Bar + Baz", "VarRef[value: Const[value: 'Foo']]") + + assert_equal 1, results.length + assert_equal "Foo", results.first.value.value + end + + def test_search_hash_pattern_regexp + results = search("Foo + Bar + Baz", "VarRef[value: Const[value: /^Ba/]]") + + assert_equal 2, results.length + assert_equal %w[Bar Baz], results.map { |node| node.value.value }.sort + end + + def test_search_string_empty + results = search("''", "StringLiteral[parts: []]") + + assert_equal 1, results.length + end + + def test_search_symbol_empty + results = search(":''", "DynaSymbol[parts: []]") + + assert_equal 1, results.length + end + + private + + def search(source, query) + pattern = Pattern.new(query).compile + program = SyntaxTree.parse(source) + + Search.new(pattern).scan(program).to_a + end + end +end