diff --git a/.github/workflows/auto-merge.yml b/.github/workflows/auto-merge.yml index 514ac27a..5468e6d0 100644 --- a/.github/workflows/auto-merge.yml +++ b/.github/workflows/auto-merge.yml @@ -12,7 +12,7 @@ jobs: steps: - name: Dependabot metadata id: metadata - uses: dependabot/fetch-metadata@v1.3.5 + uses: dependabot/fetch-metadata@v2.4.0 with: github-token: "${{ secrets.GITHUB_TOKEN }}" - name: Enable auto-merge for Dependabot PRs diff --git a/.github/workflows/gh-pages.yml b/.github/workflows/gh-pages.yml index fc02f2fe..7ff5f5f1 100644 --- a/.github/workflows/gh-pages.yml +++ b/.github/workflows/gh-pages.yml @@ -25,9 +25,9 @@ jobs: runs-on: ubuntu-latest steps: - name: Checkout - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Setup Pages - uses: actions/configure-pages@v2 + uses: actions/configure-pages@v5 - name: Set up Ruby uses: ruby/setup-ruby@v1 with: @@ -39,7 +39,7 @@ jobs: rdoc --main README.md --op _site --exclude={Gemfile,Rakefile,"coverage/*","vendor/*","bin/*","test/*","tmp/*"} cp -r doc _site/doc - name: Upload artifact - uses: actions/upload-pages-artifact@v1 + uses: actions/upload-pages-artifact@v2 # Deployment job deploy: @@ -51,4 +51,4 @@ jobs: steps: - name: Deploy to GitHub Pages id: deployment - uses: actions/deploy-pages@v1 + uses: actions/deploy-pages@v4 diff --git a/.gitignore b/.gitignore index 69755243..3ce1e327 100644 --- a/.gitignore +++ b/.gitignore @@ -4,6 +4,7 @@ /coverage/ /pkg/ /rdocs/ +/sorbet/ /spec/reports/ /tmp/ /vendor/ diff --git a/.gitmodules b/.gitmodules deleted file mode 100644 index f5477ea3..00000000 --- a/.gitmodules +++ /dev/null @@ -1,6 +0,0 @@ -[submodule "mspec"] - path = spec/mspec - url = git@github.com:ruby/mspec.git -[submodule "spec"] - path = spec/ruby - url = git@github.com:ruby/spec.git diff --git a/.rubocop.yml b/.rubocop.yml index 069041bd..2142296f 100644 --- a/.rubocop.yml +++ b/.rubocop.yml @@ -7,9 +7,12 @@ AllCops: SuggestExtensions: false TargetRubyVersion: 2.7 Exclude: - - '{.git,.github,bin,coverage,pkg,spec,test/fixtures,vendor,tmp}/**/*' + - '{.git,.github,.ruby-lsp,bin,coverage,doc,pkg,sorbet,spec,test/fixtures,vendor,tmp}/**/*' - test.rb +Gemspec/DevelopmentDependencies: + Enabled: false + Layout/LineLength: Max: 80 @@ -25,6 +28,9 @@ Lint/AmbiguousRange: Lint/BooleanSymbol: Enabled: false +Lint/Debugger: + Enabled: false + Lint/DuplicateBranch: Enabled: false @@ -76,6 +82,9 @@ Security/Eval: Style/AccessorGrouping: Enabled: false +Style/Alias: + Enabled: false + Style/CaseEquality: Enabled: false @@ -85,6 +94,9 @@ Style/CaseLikeIf: Style/ClassVars: Enabled: false +Style/CombinableLoops: + Enabled: false + Style/DocumentDynamicEvalDefinition: Enabled: false @@ -106,6 +118,9 @@ Style/FormatStringToken: Style/GuardClause: Enabled: false +Style/HashLikeCase: + Enabled: false + Style/IdenticalConditionalBranches: Enabled: false @@ -139,6 +154,9 @@ Style/ParallelAssignment: Style/PerlBackrefs: Enabled: false +Style/RedundantArrayConstructor: + Enabled: false + Style/SafeNavigation: Enabled: false diff --git a/.ruby-version b/.ruby-version new file mode 100644 index 00000000..944880fa --- /dev/null +++ b/.ruby-version @@ -0,0 +1 @@ +3.2.0 diff --git a/CHANGELOG.md b/CHANGELOG.md index 4b29fcbb..1beac42f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,138 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/) a ## [Unreleased] +## [6.2.0] - 2023-09-20 + +### Added + +- Fix `WithScope` for destructured post arguments. + +### Changed + +- Always use `do`/`end` for multi-line lambdas. + +## [6.1.1] - 2023-03-21 + +### Changed + +- Fixed a bug where the call chain formatter was incorrectly looking at call messages. + +## [6.1.0] - 2023-03-20 + +### Added + +- The `stree ctags` command for generating ctags like `universal-ctags` or `ripper-tags` would. +- The `definedivar` YARV instruction has been added to match CRuby's implementation. +- We now generate better Sorbet RBI files for the nodes in the tree and the visitors. +- `SyntaxTree::Reflection.nodes` now includes the visitor method. + +### Changed + +- We now explicitly require `pp` in environments that need it. + +## [6.0.2] - 2023-03-03 + +### Added + +- The `WithScope` visitor mixin will now additionally report local variables defined through regular expression named captures. +- The `WithScope` visitor mixin now properly handles destructured splat arguments in required positions. + +### Changed + +- Fixed the AST output by adding blocks to `Command` and `CommandCall` nodes in the `FieldVisitor`. +- Fixed the location of lambda local variables (e.g., `->(; a) {}`). + +## [6.0.1] - 2023-02-26 + +### Added + +- The class declarations returned as the result of the indexing operation now have their superclass as a field. It is returned as an array of constants. If the superclass is anything other than a constant lookup, then it raises an error. + +### Changed + +- The `nesting` field on the results of the indexing operation is no longer a single flat array. Instead it is an array of arrays, where each array is a single nesting level. This more accurately reflects the nesting of the nodes in the tree. For example, `class Foo::Bar::Baz; end` would result in `[Foo, Bar, Baz]`, but that incorrectly implies that you can see constants at each of those levels. Now this would result in `[[Foo, Bar, Baz]]` to indicate that it can see either the top level or constants within the scope of `Foo::Bar::Baz` only. +- When formatting hashes that have omitted values and mixed hash rockets with labels, the formatting now maintains whichever delimiter was used in the source. This is because forcing the use of hash rockets with omitted values results in a syntax error. +- Handle the case where a bare hash is used after the `break`, `next`, or `return` keywords. Previously this would result in hash labels which is not valid syntax. Now it maintains the delimiters used in the source. +- The `<<` operator will now break on chained `<<` expressions. Previously it would always stay flat. + +## [6.0.0] - 2023-02-10 + +### Added + +- `SyntaxTree::BasicVisitor::visit_methods` has been added to allow you to check multiple visit methods inside of a block. There _was_ a method called `visit_methods` previously, but it was undocumented because it was meant as a private API. That method has been renamed to `valid_visit_methods`. +- `rake sorbet:rbi` has been added as a task within the repository to generate an RBI file corresponding to the nodes in the tree. This can be used to help aid consumers of Syntax Tree that are using Sorbet. +- `SyntaxTree::Reflection` has been added to allow you to get information about the nodes in the tree. It is not required by default, since it takes a small amount of time to parse `node.rb` and get all of the information. +- `SyntaxTree::Node#to_mermaid` has been added to allow you to generate a Mermaid diagram of the node and its children. This is useful for debugging and understanding the structure of the tree. +- `SyntaxTree::Translation` has been added as an experimental API to transform the Syntax Tree syntax tree into the syntax trees represented by the whitequark/parser and rubocop/rubocop-ast gems. + - `SyntaxTree::Translation.to_parser(node, buffer)` will return a `Parser::AST::Node` object. + - `SyntaxTree::Translation.to_rubocop_ast(node, buffer)` will return a `RuboCop::AST::Node` object. +- `SyntaxTree::index` and `SyntaxTree::index_file` have been added to allow you to get a list of all of the classes, modules, and methods defined in a given source string or file. +- Various convenience methods have been added: + - `SyntaxTree::format_file` - which calls format with the result of reading the file + - `SyntaxTree::format_node` - which formats the node directly + - `SyntaxTree::parse_file` - which calls parse with the result of reading the file + - `SyntaxTree::search_file` - which calls search with the result of reading the file + - `SyntaxTree::Node#start_char` - which is the same as calling `node.location.start_char` + - `SyntaxTree::Node#end_char` - which is the same as calling `node.location.end_char` +- `SyntaxTree::Assoc` nodes can now be formatted on their own without a parent hash node. +- `SyntaxTree::BlockVar#arg0?` has been added to check if a single required block parameter is present and would potentially be expanded. +- More experimental APIs have been added to the `SyntaxTree::YARV` module, including: + - `SyntaxTree::YARV::ControlFlowGraph` + - `SyntaxTree::YARV::DataFlowGraph` + - `SyntaxTree::YARV::SeaOfNodes` + +### Changed + +#### Major changes + +- *BREAKING* Updates to `WithEnvironment`: + - The `WithEnvironment` module has been renamed to `WithScope`. + - The `current_environment` method has been renamed to `current_scope`. + - The `with_current_environment` method has been removed. + - Previously scopes were always able to look up the tree, as in: `a = 1; def foo; a = 2; end` would see only a single `a` variable. That has been corrected. + - Previously accessing variables from inside of blocks that were not shadowed would mark them as being local to the block only. This has been correct. +- *BREAKING* Lots of constants moved out of `SyntaxTree::Visitor` to just `SyntaxTree`: + * `SyntaxTree::Visitor::FieldVisitor` is now `SyntaxTree::FieldVisitor` + * `SyntaxTree::Visitor::JSONVisitor` is now `SyntaxTree::JSONVisitor` + * `SyntaxTree::Visitor::MatchVisitor` is now `SyntaxTree::MatchVisitor` + * `SyntaxTree::Visitor::MutationVisitor` is now `SyntaxTree::MutationVisitor` + * `SyntaxTree::Visitor::PrettyPrintVisitor` is now `SyntaxTree::PrettyPrintVisitor` +- *BREAKING* Lots of constants are now autoloaded instead of required by default. This is only particularly relevant if you are in a forking environment and want to preload constants before forking for better memory usage with copy-on-write. +- *BREAKING* The `SyntaxTree::Statements#initialize` method no longer accepts a parser as the first argument. It now mirrors the other nodes in that it accepts its children and location. As a result, Syntax Tree nodes are now marshalable (and therefore can be sent over DRb). Previously the `Statements` node was not able to be marshaled because it held a reference to the parser. + +#### Minor changes + +- Many places where embedded documents (`=begin` to `=end`) were being treated as real comments have been fixed for formatting. +- Dynamic symbols in keyword pattern matching now have better formatting. +- Endless method definitions used to have a `SyntaxTree::BodyStmt` node that had any kind of node as its `statements` field. That has been corrected to be more consistent such that now going from `def_node.bodystmt.statements` always returns a `SyntaxTree::Statements` node, which is more consistent. +- We no longer assume that `fiddle` is able to be required, and only require it when it is actually needed. + +#### Tiny changes + +- Empty parameter nodes within blocks now have more accurate location information. +- Pinned variables have more correct location information now. (Previously the location was just around the variable itself, but it now includes the pin.) +- Array patterns in pattern matching now have more accurate location information when they are using parentheses with a constant present. +- Find patterns in pattern matching now have more correct location information for their `left` and `right` fields. +- Lots of nodes have more correct types in the comments on their attributes. +- The expressions `break foo.bar :baz do |qux| qux end` and `next fun foo do end` now correctly parses as a control-flow statement with a method call that has a block attached, as opposed to a control-flow statement with a block attached. +- The expression `self::a, b = 1, 2` would previously yield a `SyntaxTree::ConstPathField` node for the first element of the left-hand-side of the multiple assignment. Semantically this is incorrect, and we have fixed this to now be a `SyntaxTree::Field` node instead. + +## [5.3.0] - 2023-01-26 + +### Added + +- `#arity` has been added to `DefNode`, `BlockNode`, and `Params`. The method returns a range where the lower bound is the minimum and the upper bound is the maximum number of arguments that can be used to invoke that block/method definition. +- `#arity` has been added to `CallNode`, `Command`, `CommandCall`, and `VCall` nodes. The method returns the number of arguments included in the invocation. For splats, double splats, or argument forwards, this method returns `Float::INFINITY`. +- `SyntaxTree::index` and `SyntaxTree::index_file` APIs have been added to collect a list of classes, modules, and methods defined in a given source string or file, respectively. These APIs are experimental and subject to change. +- A `plugin/disable_auto_ternary` plugin has been added the disables the formatted that automatically changes permissable `if/else` clauses into ternaries. + +### Changed + +- Files are now only written from the CLI if the content of them changes, which should match watching files less chaotic. +- In the case that `rb_iseq_load` cannot be found, `Fiddle::DLError` is now rescued. +- Previously if there were invalid UTF-8 byte sequences after the `__END__` keyword the parser could potentially have crashed when parsing comments. This has been fixed. +- Previously there was special formatting for array literals that contained only variable references (either locals, method calls, or constants). For consistency, this has been removed and all array literals are now formatted the same way. + ## [5.2.0] - 2023-01-04 ### Added @@ -481,7 +613,14 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/) a - 🎉 Initial release! 🎉 -[unreleased]: https://github.com/ruby-syntax-tree/syntax_tree/compare/v5.2.0...HEAD +[unreleased]: https://github.com/ruby-syntax-tree/syntax_tree/compare/v6.2.0...HEAD +[6.2.0]: https://github.com/ruby-syntax-tree/syntax_tree/compare/v6.1.1...v6.2.0 +[6.1.1]: https://github.com/ruby-syntax-tree/syntax_tree/compare/v6.1.0...v6.1.1 +[6.1.0]: https://github.com/ruby-syntax-tree/syntax_tree/compare/v6.0.2...v6.1.0 +[6.0.2]: https://github.com/ruby-syntax-tree/syntax_tree/compare/v6.0.1...v6.0.2 +[6.0.1]: https://github.com/ruby-syntax-tree/syntax_tree/compare/v6.0.0...v6.0.1 +[6.0.0]: https://github.com/ruby-syntax-tree/syntax_tree/compare/v5.3.0...v6.0.0 +[5.3.0]: https://github.com/ruby-syntax-tree/syntax_tree/compare/v5.2.0...v5.3.0 [5.2.0]: https://github.com/ruby-syntax-tree/syntax_tree/compare/v5.1.0...v5.2.0 [5.1.0]: https://github.com/ruby-syntax-tree/syntax_tree/compare/v5.0.1...v5.1.0 [5.0.1]: https://github.com/ruby-syntax-tree/syntax_tree/compare/v5.0.0...v5.0.1 diff --git a/Gemfile.lock b/Gemfile.lock index bb5e3663..1bf158a2 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -1,7 +1,7 @@ PATH remote: . specs: - syntax_tree (5.2.0) + syntax_tree (6.2.0) prettier_print (>= 1.2.0) GEM @@ -10,35 +10,39 @@ GEM ast (2.4.2) docile (1.4.0) json (2.6.3) - minitest (5.17.0) - parallel (1.22.1) - parser (3.2.0.0) + language_server-protocol (3.17.0.3) + minitest (5.25.5) + parallel (1.23.0) + parser (3.2.2.4) ast (~> 2.4.1) - prettier_print (1.2.0) + racc + prettier_print (1.2.1) + racc (1.7.1) rainbow (3.1.1) - rake (13.0.6) - regexp_parser (2.6.1) - rexml (3.2.5) - rubocop (1.42.0) + rake (13.3.0) + regexp_parser (2.8.2) + rexml (3.2.6) + rubocop (1.57.2) json (~> 2.3) + language_server-protocol (>= 3.17.0) parallel (~> 1.10) - parser (>= 3.1.2.1) + parser (>= 3.2.2.4) rainbow (>= 2.2.2, < 4.0) regexp_parser (>= 1.8, < 3.0) rexml (>= 3.2.5, < 4.0) - rubocop-ast (>= 1.24.1, < 2.0) + rubocop-ast (>= 1.28.1, < 2.0) ruby-progressbar (~> 1.7) - unicode-display_width (>= 1.4.0, < 3.0) - rubocop-ast (1.24.1) - parser (>= 3.1.1.0) - ruby-progressbar (1.11.0) + unicode-display_width (>= 2.4.0, < 3.0) + rubocop-ast (1.30.0) + parser (>= 3.2.1.0) + ruby-progressbar (1.13.0) simplecov (0.22.0) docile (~> 1.1) simplecov-html (~> 0.11) simplecov_json_formatter (~> 0.1) simplecov-html (0.12.3) simplecov_json_formatter (0.1.4) - unicode-display_width (2.4.1) + unicode-display_width (2.5.0) PLATFORMS arm64-darwin-21 diff --git a/README.md b/README.md index 7a943ca8..6e1119df 100644 --- a/README.md +++ b/README.md @@ -15,6 +15,7 @@ It is built with only standard library dependencies. It additionally ships with - [CLI](#cli) - [ast](#ast) - [check](#check) + - [ctags](#ctags) - [expr](#expr) - [format](#format) - [json](#json) @@ -29,6 +30,7 @@ It is built with only standard library dependencies. It additionally ships with - [SyntaxTree.format(source)](#syntaxtreeformatsource) - [SyntaxTree.mutation(&block)](#syntaxtreemutationblock) - [SyntaxTree.search(source, query, &block)](#syntaxtreesearchsource-query-block) + - [SyntaxTree.index(source)](#syntaxtreeindexsource) - [Nodes](#nodes) - [child_nodes](#child_nodes) - [copy(**attrs)](#copyattrs) @@ -40,9 +42,10 @@ It is built with only standard library dependencies. It additionally ships with - [construct_keys](#construct_keys) - [Visitor](#visitor) - [visit_method](#visit_method) + - [visit_methods](#visit_methods) - [BasicVisitor](#basicvisitor) - [MutationVisitor](#mutationvisitor) - - [WithEnvironment](#withenvironment) + - [WithScope](#withscope) - [Language server](#language-server) - [textDocument/formatting](#textdocumentformatting) - [textDocument/inlayHint](#textdocumentinlayhint) @@ -137,6 +140,33 @@ To change the print width that you are checking against, specify the `--print-wi stree check --print-width=100 path/to/file.rb ``` +### ctags + +This command will output to stdout a set of tags suitable for usage with [ctags](https://github.com/universal-ctags/ctags). + +```sh +stree ctags path/to/file.rb +``` + +For a file containing the following Ruby code: + +```ruby +class Foo +end + +class Bar < Foo +end +``` + +you will receive: + +``` +!_TAG_FILE_FORMAT 2 /extended format; --format=1 will not append ;" to lines/ +!_TAG_FILE_SORTED 1 /0=unsorted, 1=sorted, 2=foldcase/ +Bar test.rb /^class Bar < Foo$/;" c inherits:Foo +Foo test.rb /^class Foo$/;" c +``` + ### expr This command will output a Ruby case-match expression that would match correctly against the first expression of the input. @@ -267,7 +297,7 @@ Note that the output of the `match` CLI command creates a valid pattern that can ### write -This command will format the listed files and write that formatted version back to the source files. Note that this overwrites the original content, to be sure to be using a version control system. +This command will format the listed files and write that formatted version back to the source files. Note that this overwrites the original content, so be sure to be using a version control system. ```sh stree write path/to/file.rb @@ -340,12 +370,16 @@ This function takes an input string containing Ruby code, parses it into its und ### SyntaxTree.mutation(&block) -This function yields a new mutation visitor to the block, and then returns the initialized visitor. It's effectively a shortcut for creating a `SyntaxTree::Visitor::MutationVisitor` without having to remember the class name. For more information on that visitor, see the definition below. +This function yields a new mutation visitor to the block, and then returns the initialized visitor. It's effectively a shortcut for creating a `SyntaxTree::MutationVisitor` without having to remember the class name. For more information on that visitor, see the definition below. ### SyntaxTree.search(source, query, &block) This function takes an input string containing Ruby code, an input string containing a valid Ruby `in` clause expression that can be used to match against nodes in the tree (can be generated using `stree expr`, `stree match`, or `Node#construct_keys`), and a block. Each node that matches the given query will be yielded to the block. The block will receive the node as its only argument. +### SyntaxTree.index(source) + +This function takes an input string containing Ruby code and returns a list of all of the class declarations, module declarations, and method definitions within a file. Each of the entries also has access to its associated comments. This is useful for generating documentation or index information for a file to support something like go-to-definition. + ## Nodes There are many different node types in the syntax tree. They are meant to be treated as immutable structs containing links to child nodes with minimal logic contained within their implementation. However, for the most part they all respond to a certain set of APIs, listed below. @@ -491,7 +525,7 @@ With visitors, you only define handlers for the nodes that you need. You can fin * call `visit(child)` with each child that you want to visit * call nothing if you're sure you don't want to descend further -There are a couple of visitors that ship with Syntax Tree that can be used as examples. They live in the [lib/syntax_tree/visitor](lib/syntax_tree/visitor) directory. +There are a couple of visitors that ship with Syntax Tree that can be used as examples. They live in the [lib/syntax_tree](lib/syntax_tree) directory. ### visit_method @@ -517,6 +551,26 @@ Did you mean? visit_binary from bin/console:8:in `
' ``` +### visit_methods + +Similar to `visit_method`, `visit_methods` also checks that methods defined are valid visit methods. This variation however accepts a block and checks that all methods defined within that block are valid visit methods. It's meant to be used like: + +```ruby +class ArithmeticVisitor < SyntaxTree::Visitor + visit_methods do + def visit_binary(node) + # ... + end + + def visit_int(node) + # ... + end + end +end +``` + +This is only checked when the methods are defined and does not impose any kind of runtime overhead after that. It is very useful for upgrading versions of Syntax Tree in case these methods names change. + ### BasicVisitor When you're defining your own visitor, by default it will walk down the tree even if you don't define `visit_*` methods. This is to ensure you can define a subset of the necessary methods in order to only interact with the nodes you're interested in. If you'd like to change this default to instead raise an error if you visit a node you haven't explicitly handled, you can instead inherit from `BasicVisitor`. @@ -537,7 +591,7 @@ The `MutationVisitor` is a visitor that can be used to mutate the tree. It works ```ruby # Create a new visitor -visitor = SyntaxTree::Visitor::MutationVisitor.new +visitor = SyntaxTree::MutationVisitor.new # Specify that it should mutate If nodes with assignments in their predicates visitor.mutate("IfNode[predicate: Assign | OpAssign]") do |node| @@ -567,20 +621,18 @@ SyntaxTree::Formatter.format(source, program.accept(visitor)) # => "if (a = 1)\nend\n" ``` -### WithEnvironment +### WithScope -The `WithEnvironment` module can be included in visitors to automatically keep track of local variables and arguments -defined inside each environment. A `current_environment` accessor is made available to the request, allowing it to find -all usages and definitions of a local. +The `WithScope` module can be included in visitors to automatically keep track of local variables and arguments defined inside each scope. A `current_scope` accessor is made available to the request, allowing it to find all usages and definitions of a local. ```ruby class MyVisitor < Visitor - include WithEnvironment + prepend WithScope def visit_ident(node) # find_local will return a Local for any local variables or arguments # present in the current environment or nil if the identifier is not a local - local = current_environment.find_local(node) + local = current_scope.find_local(node) puts local.type # the type of the local (:variable or :argument) puts local.definitions # the array of locations where this local is defined @@ -658,6 +710,7 @@ To register plugins, define a file somewhere in your load path named `syntax_tre * `plugin/single_quotes` - This will change all of your string literals to use single quotes instead of the default double quotes. * `plugin/trailing_comma` - This will put trailing commas into multiline array literals, hash literals, and method calls that can support trailing commas. +* `plugin/disable_auto_ternary` - This will prevent the automatic conversion of `if ... else` to ternary expressions. If you're using Syntax Tree as a library, you can require those files directly or manually pass those options to the formatter initializer through the `SyntaxTree::Formatter::Options` class. @@ -763,6 +816,7 @@ inherit_gem: * [Neovim](https://neovim.io/) - [neovim/nvim-lspconfig](https://github.com/neovim/nvim-lspconfig). * [Vim](https://www.vim.org/) - [dense-analysis/ale](https://github.com/dense-analysis/ale). * [VSCode](https://code.visualstudio.com/) - [ruby-syntax-tree/vscode-syntax-tree](https://github.com/ruby-syntax-tree/vscode-syntax-tree). +* [Emacs](https://www.gnu.org/software/emacs/) - [emacs-format-all-the-code](https://github.com/lassik/emacs-format-all-the-code). ## Contributing diff --git a/Rakefile b/Rakefile index f06d8cf8..fb4f8847 100644 --- a/Rakefile +++ b/Rakefile @@ -4,6 +4,8 @@ require "bundler/gem_tasks" require "rake/testtask" require "syntax_tree/rake_tasks" +Rake.add_rakelib "tasks" + Rake::TestTask.new(:test) do |t| t.libs << "test" t.libs << "lib" @@ -14,7 +16,16 @@ task default: :test configure = ->(task) do task.source_files = - FileList[%w[Gemfile Rakefile syntax_tree.gemspec lib/**/*.rb test/*.rb]] + FileList[ + %w[ + Gemfile + Rakefile + syntax_tree.gemspec + lib/**/*.rb + tasks/*.rake + test/*.rb + ] + ] # Since Syntax Tree supports back to Ruby 2.7.0, we need to make sure that we # format our code such that it's compatible with that version. This actually @@ -26,10 +37,3 @@ end SyntaxTree::Rake::CheckTask.new(&configure) SyntaxTree::Rake::WriteTask.new(&configure) - -desc "Run mspec tests using YARV emulation" -task :spec do - Dir["./spec/ruby/language/**/*_spec.rb"].each do |filepath| - sh "exe/yarv ./spec/mspec/bin/mspec-tag #{filepath}" - end -end diff --git a/bin/console b/bin/console index 1c18bd62..6f35f1ec 100755 --- a/bin/console +++ b/bin/console @@ -3,6 +3,7 @@ require "bundler/setup" require "syntax_tree" +require "syntax_tree/reflection" require "irb" IRB.start(__FILE__) diff --git a/bin/whitequark b/bin/whitequark new file mode 100755 index 00000000..121bcd53 --- /dev/null +++ b/bin/whitequark @@ -0,0 +1,79 @@ +#!/usr/bin/env ruby +# frozen_string_literal: true + +require "bundler/setup" +require "parser/current" + +$:.unshift(File.expand_path("../lib", __dir__)) +require "syntax_tree" + +# First, opt in to every AST feature. +Parser::Builders::Default.modernize + +# Modify the source map == check so that it doesn't check against the node +# itself so we don't get into a recursive loop. +Parser::Source::Map.prepend( + Module.new { + def ==(other) + self.class == other.class && + (instance_variables - %i[@node]).map do |ivar| + instance_variable_get(ivar) == other.instance_variable_get(ivar) + end.reduce(:&) + end + } +) + +# Next, ensure that we're comparing the nodes and also comparing the source +# ranges so that we're getting all of the necessary information. +Parser::AST::Node.prepend( + Module.new { + def ==(other) + super && (location == other.location) + end + } +) + +source = ARGF.read + +parser = Parser::CurrentRuby.new +parser.diagnostics.all_errors_are_fatal = true + +buffer = Parser::Source::Buffer.new("(string)", 1) +buffer.source = source.dup.force_encoding(parser.default_encoding) + +stree = SyntaxTree::Translation.to_parser(SyntaxTree.parse(source), buffer) +ptree = parser.parse(buffer) + +if stree == ptree + puts "Syntax trees are equivalent." +elsif stree.inspect == ptree.inspect + warn "Syntax tree locations are different." + + queue = [[stree, ptree]] + while (left, right = queue.shift) + if left.location != right.location + warn "Different node:" + pp left + + warn "Different location:" + + warn "Syntax Tree:" + pp left.location + + warn "whitequark/parser:" + pp right.location + + exit + end + + left.children.zip(right.children).each do |left_child, right_child| + queue << [left_child, right_child] if left_child.is_a?(Parser::AST::Node) + end + end +else + warn "Syntax Tree:" + pp stree + + warn "whitequark/parser:" + pp ptree +end diff --git a/doc/changing_structure.md b/doc/changing_structure.md new file mode 100644 index 00000000..74012f26 --- /dev/null +++ b/doc/changing_structure.md @@ -0,0 +1,16 @@ +# Changing structure + +First and foremost, changing the structure of the tree in any way is a major breaking change. It forces the consumers to update their visitors, pattern matches, and method calls. It should not be taking lightly, and can only happen on a major version change. So keep that in mind. + +That said, if you do want to change the structure of the tree, there are a few steps that you have to take. They are enumerated below. + +1. Change the structure in the required node classes. This could mean adding/removing classes or adding/removing fields. Be sure to also update the `copy` and `===` methods to be sure that they are correct. +2. Update the parser to correctly create the new structure. +3. Update any visitor methods that are affected by the change. For example, if adding a new node make sure to create the new visit method alias in the `Visitor` class. +4. Update the `FieldVisitor` class to be sure that the various serializers, pretty printers, and matchers all get updated accordingly. +5. Update the `DSL` module to be sure that folks can correctly create nodes with the new structure. +6. Ensure the formatting of the code hasn't changed. This can mostly be done by running the tests, but if there's a corner case that we don't cover that is now exposed by your change be sure to add test cases. +7. Update the translation visitors to ensure we're still translating into other ASTs correctly. +8. Update the YARV compiler visitor to ensure we're still compiling correctly. +9. Make sure we aren't referencing the previous structure in any documentation or tests. +10. Be sure to update `CHANGELOG.md` with a description of the change that you made. diff --git a/lib/syntax_tree.rb b/lib/syntax_tree.rb index f1217ac3..6c595db5 100644 --- a/lib/syntax_tree.rb +++ b/lib/syntax_tree.rb @@ -1,49 +1,43 @@ # frozen_string_literal: true -require "etc" -require "fiddle" -require "json" -require "pp" require "prettier_print" +require "pp" require "ripper" -require "stringio" -require_relative "syntax_tree/formatter" require_relative "syntax_tree/node" -require_relative "syntax_tree/dsl" -require_relative "syntax_tree/version" - require_relative "syntax_tree/basic_visitor" require_relative "syntax_tree/visitor" -require_relative "syntax_tree/visitor/field_visitor" -require_relative "syntax_tree/visitor/json_visitor" -require_relative "syntax_tree/visitor/match_visitor" -require_relative "syntax_tree/visitor/mutation_visitor" -require_relative "syntax_tree/visitor/pretty_print_visitor" -require_relative "syntax_tree/visitor/environment" -require_relative "syntax_tree/visitor/with_environment" +require_relative "syntax_tree/formatter" require_relative "syntax_tree/parser" -require_relative "syntax_tree/pattern" -require_relative "syntax_tree/search" - -require_relative "syntax_tree/yarv" -require_relative "syntax_tree/yarv/bf" -require_relative "syntax_tree/yarv/compiler" -require_relative "syntax_tree/yarv/decompiler" -require_relative "syntax_tree/yarv/disassembler" -require_relative "syntax_tree/yarv/instruction_sequence" -require_relative "syntax_tree/yarv/instructions" -require_relative "syntax_tree/yarv/legacy" -require_relative "syntax_tree/yarv/local_table" -require_relative "syntax_tree/yarv/assembler" -require_relative "syntax_tree/yarv/vm" +require_relative "syntax_tree/version" # Syntax Tree is a suite of tools built on top of the internal CRuby parser. It # provides the ability to generate a syntax tree from source, as well as the # tools necessary to inspect and manipulate that syntax tree. It can be used to # build formatters, linters, language servers, and more. module SyntaxTree + # Syntax Tree the library has many features that aren't always used by the + # CLI. Requiring those features takes time, so we autoload as many constants + # as possible in order to keep the CLI as fast as possible. + + autoload :Database, "syntax_tree/database" + autoload :DSL, "syntax_tree/dsl" + autoload :FieldVisitor, "syntax_tree/field_visitor" + autoload :Index, "syntax_tree/index" + autoload :JSONVisitor, "syntax_tree/json_visitor" + autoload :LanguageServer, "syntax_tree/language_server" + autoload :MatchVisitor, "syntax_tree/match_visitor" + autoload :Mermaid, "syntax_tree/mermaid" + autoload :MermaidVisitor, "syntax_tree/mermaid_visitor" + autoload :MutationVisitor, "syntax_tree/mutation_visitor" + autoload :Pattern, "syntax_tree/pattern" + autoload :PrettyPrintVisitor, "syntax_tree/pretty_print_visitor" + autoload :Search, "syntax_tree/search" + autoload :Translation, "syntax_tree/translation" + autoload :WithScope, "syntax_tree/with_scope" + autoload :YARV, "syntax_tree/yarv" + # This holds references to objects that respond to both #parse and #format # so that we can use them in the CLI. HANDLERS = {} @@ -62,40 +56,80 @@ module SyntaxTree # that Syntax Tree can format arbitrary parts of a document. DEFAULT_INDENTATION = 0 - # This is a hook provided so that plugins can register themselves as the - # handler for a particular file type. - def self.register_handler(extension, handler) - HANDLERS[extension] = handler + # Parses the given source and returns the formatted source. + def self.format( + source, + maxwidth = DEFAULT_PRINT_WIDTH, + base_indentation = DEFAULT_INDENTATION, + options: Formatter::Options.new + ) + format_node( + source, + parse(source), + maxwidth, + base_indentation, + options: options + ) end - # Parses the given source and returns the syntax tree. - def self.parse(source) - parser = Parser.new(source) - response = parser.parse - response unless parser.error? + # Parses the given file and returns the formatted source. + def self.format_file( + filepath, + maxwidth = DEFAULT_PRINT_WIDTH, + base_indentation = DEFAULT_INDENTATION, + options: Formatter::Options.new + ) + format(read(filepath), maxwidth, base_indentation, options: options) end - # Parses the given source and returns the formatted source. - def self.format( + # Accepts a node in the tree and returns the formatted source. + def self.format_node( source, + node, maxwidth = DEFAULT_PRINT_WIDTH, base_indentation = DEFAULT_INDENTATION, options: Formatter::Options.new ) formatter = Formatter.new(source, [], maxwidth, options: options) - parse(source).format(formatter) + node.format(formatter) formatter.flush(base_indentation) formatter.output.join end + # Indexes the given source code to return a list of all class, module, and + # method definitions. Used to quickly provide indexing capability for IDEs or + # documentation generation. + def self.index(source) + Index.index(source) + end + + # Indexes the given file to return a list of all class, module, and method + # definitions. Used to quickly provide indexing capability for IDEs or + # documentation generation. + def self.index_file(filepath) + Index.index_file(filepath) + end + # A convenience method for creating a new mutation visitor. def self.mutation - visitor = Visitor::MutationVisitor.new + visitor = MutationVisitor.new yield visitor visitor end + # Parses the given source and returns the syntax tree. + def self.parse(source) + parser = Parser.new(source) + response = parser.parse + response unless parser.error? + end + + # Parses the given file and returns the syntax tree. + def self.parse_file(filepath) + parse(read(filepath)) + end + # Returns the source from the given filepath taking into account any potential # magic encoding comments. def self.read(filepath) @@ -111,9 +145,24 @@ def self.read(filepath) File.read(filepath, encoding: encoding) end + # This is a hook provided so that plugins can register themselves as the + # handler for a particular file type. + def self.register_handler(extension, handler) + HANDLERS[extension] = handler + end + # Searches through the given source using the given pattern and yields each # node in the tree that matches the pattern to the given block. def self.search(source, query, &block) - Search.new(Pattern.new(query).compile).scan(parse(source), &block) + pattern = Pattern.new(query).compile + program = parse(source) + + Search.new(pattern).scan(program, &block) + end + + # Searches through the given file using the given pattern and yields each + # node in the tree that matches the pattern to the given block. + def self.search_file(filepath, query, &block) + search(read(filepath), query, &block) end end diff --git a/lib/syntax_tree/basic_visitor.rb b/lib/syntax_tree/basic_visitor.rb index 34b7876e..bd8ea5f2 100644 --- a/lib/syntax_tree/basic_visitor.rb +++ b/lib/syntax_tree/basic_visitor.rb @@ -29,7 +29,7 @@ def initialize(error) def corrections @corrections ||= DidYouMean::SpellChecker.new( - dictionary: Visitor.visit_methods + dictionary: BasicVisitor.valid_visit_methods ).correct(visit_method) end @@ -40,7 +40,40 @@ def corrections end end + # This module is responsible for checking all of the methods defined within + # a given block to ensure that they are valid visit methods. + class VisitMethodsChecker < Module + Status = Struct.new(:checking) + + # This is the status of the checker. It's used to determine whether or not + # we should be checking the methods that are defined. It is kept as an + # instance variable so that it can be disabled later. + attr_reader :status + + def initialize + # We need the status to be an instance variable so that it can be + # accessed by the disable! method, but also a local variable so that it + # can be captured by the define_method block. + status = @status = Status.new(true) + + define_method(:method_added) do |name| + BasicVisitor.visit_method(name) if status.checking + super(name) + end + end + + def disable! + status.checking = false + end + end + class << self + # This is the list of all of the valid visit methods. + def valid_visit_methods + @valid_visit_methods ||= + Visitor.instance_methods.grep(/^visit_(?!child_nodes)/) + end + # This method is here to help folks write visitors. # # It's not always easy to ensure you're writing the correct method name in @@ -51,15 +84,21 @@ class << self # name. It will raise an error if the visit method you're defining isn't # actually a method on the parent visitor. def visit_method(method_name) - return if visit_methods.include?(method_name) + return if valid_visit_methods.include?(method_name) raise VisitMethodError, method_name end - # This is the list of all of the valid visit methods. + # This method is here to help folks write visitors. + # + # Within the given block, every method that is defined will be checked to + # ensure it's a valid visit method using the BasicVisitor::visit_method + # method defined above. def visit_methods - @visit_methods ||= - Visitor.instance_methods.grep(/^visit_(?!child_nodes)/) + checker = VisitMethodsChecker.new + extend(checker) + yield + checker.disable! end end diff --git a/lib/syntax_tree/cli.rb b/lib/syntax_tree/cli.rb index 392dd627..f2616c87 100644 --- a/lib/syntax_tree/cli.rb +++ b/lib/syntax_tree/cli.rb @@ -1,5 +1,6 @@ # frozen_string_literal: true +require "etc" require "optparse" module SyntaxTree @@ -153,6 +154,92 @@ def failure end end + # An action of the CLI that generates ctags for the given source. + class CTags < Action + attr_reader :entries + + def initialize(options) + super(options) + @entries = [] + end + + def run(item) + lines = item.source.lines(chomp: true) + + SyntaxTree + .index(item.source) + .each do |entry| + line = lines[entry.location.line - 1] + pattern = "/^#{line.gsub("\\", "\\\\\\\\").gsub("/", "\\/")}$/;\"" + + entries << case entry + when SyntaxTree::Index::ModuleDefinition + parts = [entry.name, item.filepath, pattern, "m"] + + if entry.nesting != [[entry.name]] + parts << "class:#{entry.nesting.flatten.tap(&:pop).join(".")}" + end + + parts.join("\t") + when SyntaxTree::Index::ClassDefinition + parts = [entry.name, item.filepath, pattern, "c"] + + if entry.nesting != [[entry.name]] + parts << "class:#{entry.nesting.flatten.tap(&:pop).join(".")}" + end + + unless entry.superclass.empty? + inherits = entry.superclass.join(".").delete_prefix(".") + parts << "inherits:#{inherits}" + end + + parts.join("\t") + when SyntaxTree::Index::MethodDefinition + parts = [entry.name, item.filepath, pattern, "f"] + + unless entry.nesting.empty? + parts << "class:#{entry.nesting.flatten.join(".")}" + end + + parts.join("\t") + when SyntaxTree::Index::SingletonMethodDefinition + parts = [entry.name, item.filepath, pattern, "F"] + + unless entry.nesting.empty? + parts << "class:#{entry.nesting.flatten.join(".")}" + end + + parts.join("\t") + when SyntaxTree::Index::AliasMethodDefinition + parts = [entry.name, item.filepath, pattern, "a"] + + unless entry.nesting.empty? + parts << "class:#{entry.nesting.flatten.join(".")}" + end + + parts.join("\t") + when SyntaxTree::Index::ConstantDefinition + parts = [entry.name, item.filepath, pattern, "C"] + + unless entry.nesting.empty? + parts << "class:#{entry.nesting.flatten.join(".")}" + end + + parts.join("\t") + end + end + end + + def success + puts(<<~HEADER) + !_TAG_FILE_FORMAT 2 /extended format; --format=1 will not append ;" to lines/ + !_TAG_FILE_SORTED 1 /0=unsorted, 1=sorted, 2=foldcase/ + HEADER + + entries.sort.each { |entry| puts(entry) } + end + end + # An action of the CLI that formats the source twice to check if the first # format is not idempotent. class Debug < Action @@ -238,7 +325,7 @@ def run(item) # representation. class Json < Action def run(item) - object = Visitor::JSONVisitor.new.visit(item.handler.parse(item.source)) + object = item.handler.parse(item.source).accept(JSONVisitor.new) puts JSON.pretty_generate(object) end end @@ -303,10 +390,11 @@ def run(item) options.print_width, options: options.formatter_options ) + changed = source != formatted - File.write(filepath, formatted) if item.writable? + File.write(filepath, formatted) if item.writable? && changed - color = source == formatted ? Color.gray(filepath) : filepath + color = changed ? filepath : Color.gray(filepath) delta = ((Time.now - start) * 1000).round puts "#{color} #{delta}ms" @@ -325,6 +413,9 @@ def run(item) #{Color.bold("stree check [--plugins=...] [--print-width=NUMBER] [-e SCRIPT] FILE")} Check that the given files are formatted as syntax tree would format them + #{Color.bold("stree ctags [-e SCRIPT] FILE")} + Print out a ctags-compatible index of the given files + #{Color.bold("stree debug [--plugins=...] [--print-width=NUMBER] [-e SCRIPT] FILE")} Check that the given files can be formatted idempotently @@ -486,6 +577,8 @@ def run(argv) AST.new(options) when "c", "check" Check.new(options) + when "ctags" + CTags.new(options) when "debug" Debug.new(options) when "doc" @@ -500,7 +593,6 @@ def run(argv) when "j", "json" Json.new(options) when "lsp" - require "syntax_tree/language_server" LanguageServer.new(print_width: options.print_width).run return 0 when "m", "match" diff --git a/lib/syntax_tree/database.rb b/lib/syntax_tree/database.rb new file mode 100644 index 00000000..c9981f35 --- /dev/null +++ b/lib/syntax_tree/database.rb @@ -0,0 +1,331 @@ +# frozen_string_literal: true + +module SyntaxTree + # Provides the ability to index source files into a database, then query for + # the nodes. + module Database + class IndexingVisitor < SyntaxTree::FieldVisitor + attr_reader :database, :filepath, :node_id + + def initialize(database, filepath) + @database = database + @filepath = filepath + @node_id = nil + end + + private + + def comments(node) + end + + def field(name, value) + return unless value.is_a?(SyntaxTree::Node) + + binds = [node_id, visit(value), name] + database.execute(<<~SQL, binds) + INSERT INTO edges (from_id, to_id, name) + VALUES (?, ?, ?) + SQL + end + + def list(name, values) + values.each_with_index do |value, index| + binds = [node_id, visit(value), name, index] + database.execute(<<~SQL, binds) + INSERT INTO edges (from_id, to_id, name, list_index) + VALUES (?, ?, ?, ?) + SQL + end + end + + def node(node, _name) + previous = node_id + binds = [ + node.class.name.delete_prefix("SyntaxTree::"), + filepath, + node.location.start_line, + node.location.start_column + ] + + database.execute(<<~SQL, binds) + INSERT INTO nodes (type, path, line, column) + VALUES (?, ?, ?, ?) + SQL + + begin + @node_id = database.last_insert_row_id + yield + @node_id + ensure + @node_id = previous + end + end + + def text(name, value) + end + + def pairs(name, values) + values.each_with_index do |(key, value), index| + binds = [node_id, visit(key), "#{name}[0]", index] + database.execute(<<~SQL, binds) + INSERT INTO edges (from_id, to_id, name, list_index) + VALUES (?, ?, ?, ?) + SQL + + binds = [node_id, visit(value), "#{name}[1]", index] + database.execute(<<~SQL, binds) + INSERT INTO edges (from_id, to_id, name, list_index) + VALUES (?, ?, ?, ?) + SQL + end + end + end + + # Query for a specific type of node. + class TypeQuery + attr_reader :type + + def initialize(type) + @type = type + end + + def each(database, &block) + sql = "SELECT * FROM nodes WHERE type = ?" + database.execute(sql, type).each(&block) + end + end + + # Query for the attributes of a node, optionally also filtering by type. + class AttrQuery + attr_reader :type, :attrs + + def initialize(type, attrs) + @type = type + @attrs = attrs + end + + def each(database, &block) + joins = [] + binds = [] + + attrs.each do |name, query| + ids = query.each(database).map { |row| row[0] } + joins << <<~SQL + JOIN edges AS #{name} + ON #{name}.from_id = nodes.id + AND #{name}.name = ? + AND #{name}.to_id IN (#{(["?"] * ids.size).join(", ")}) + SQL + + binds.push(name).concat(ids) + end + + sql = +"SELECT nodes.* FROM nodes, edges #{joins.join(" ")}" + + if type + sql << " WHERE nodes.type = ?" + binds << type + end + + sql << " GROUP BY nodes.id" + database.execute(sql, binds).each(&block) + end + end + + # Query for the results of either query. + class OrQuery + attr_reader :left, :right + + def initialize(left, right) + @left = left + @right = right + end + + def each(database, &block) + left.each(database, &block) + right.each(database, &block) + end + end + + # A lazy query result. + class QueryResult + attr_reader :database, :query + + def initialize(database, query) + @database = database + @query = query + end + + def each(&block) + return enum_for(__method__) unless block_given? + query.each(database, &block) + end + end + + # A pattern matching expression that will be compiled into a query. + class Pattern + class CompilationError < StandardError + end + + attr_reader :query + + def initialize(query) + @query = query + end + + def compile + program = + begin + SyntaxTree.parse("case nil\nin #{query}\nend") + rescue Parser::ParseError + raise CompilationError, query + end + + compile_node(program.statements.body.first.consequent.pattern) + end + + private + + def compile_error(node) + raise CompilationError, PP.pp(node, +"").chomp + end + + # Shortcut for combining two queries into one that returns the results of + # if either query matches. + def combine_or(left, right) + OrQuery.new(left, right) + end + + # in foo | bar + def compile_binary(node) + compile_error(node) if node.operator != :| + + combine_or(compile_node(node.left), compile_node(node.right)) + end + + # in Ident + def compile_const(node) + value = node.value + + if SyntaxTree.const_defined?(value, false) + clazz = SyntaxTree.const_get(value) + TypeQuery.new(clazz.name.delete_prefix("SyntaxTree::")) + else + compile_error(node) + end + end + + # in SyntaxTree::Ident + def compile_const_path_ref(node) + parent = node.parent + if !parent.is_a?(SyntaxTree::VarRef) || + !parent.value.is_a?(SyntaxTree::Const) + compile_error(node) + end + + if parent.value.value == "SyntaxTree" + compile_node(node.constant) + else + compile_error(node) + end + end + + # in Ident[value: String] + def compile_hshptn(node) + compile_error(node) unless node.keyword_rest.nil? + + attrs = {} + node.keywords.each do |keyword, value| + compile_error(node) unless keyword.is_a?(SyntaxTree::Label) + attrs[keyword.value.chomp(":")] = compile_node(value) + end + + type = node.constant ? compile_node(node.constant).type : nil + AttrQuery.new(type, attrs) + end + + # in Foo + def compile_var_ref(node) + value = node.value + + if value.is_a?(SyntaxTree::Const) + compile_node(value) + else + compile_error(node) + end + end + + def compile_node(node) + case node + when SyntaxTree::Binary + compile_binary(node) + when SyntaxTree::Const + compile_const(node) + when SyntaxTree::ConstPathRef + compile_const_path_ref(node) + when SyntaxTree::HshPtn + compile_hshptn(node) + when SyntaxTree::VarRef + compile_var_ref(node) + else + compile_error(node) + end + end + end + + class Connection + attr_reader :raw_connection + + def initialize(raw_connection) + @raw_connection = raw_connection + end + + def execute(query, binds = []) + raw_connection.execute(query, binds) + end + + def index_file(filepath) + program = SyntaxTree.parse(SyntaxTree.read(filepath)) + program.accept(IndexingVisitor.new(self, filepath)) + end + + def last_insert_row_id + raw_connection.last_insert_row_id + end + + def prepare + raw_connection.execute(<<~SQL) + CREATE TABLE nodes ( + id integer primary key, + type varchar(20), + path varchar(200), + line integer, + column integer + ); + SQL + + raw_connection.execute(<<~SQL) + CREATE INDEX nodes_type ON nodes (type); + SQL + + raw_connection.execute(<<~SQL) + CREATE TABLE edges ( + id integer primary key, + from_id integer, + to_id integer, + name varchar(20), + list_index integer + ); + SQL + + raw_connection.execute(<<~SQL) + CREATE INDEX edges_name ON edges (name); + SQL + end + + def search(query) + QueryResult.new(self, Pattern.new(query).compile) + end + end + end +end diff --git a/lib/syntax_tree/dsl.rb b/lib/syntax_tree/dsl.rb index 860a1fe5..4506aa04 100644 --- a/lib/syntax_tree/dsl.rb +++ b/lib/syntax_tree/dsl.rb @@ -210,12 +210,17 @@ def RAssign(value, operator, pattern) end # Create a new ClassDeclaration node. - def ClassDeclaration(constant, superclass, bodystmt) + def ClassDeclaration( + constant, + superclass, + bodystmt, + location = Location.default + ) ClassDeclaration.new( constant: constant, superclass: superclass, bodystmt: bodystmt, - location: Location.default + location: location ) end @@ -225,12 +230,12 @@ def Comma(value) end # Create a new Command node. - def Command(message, arguments, block) + def Command(message, arguments, block, location = Location.default) Command.new( message: message, arguments: arguments, block: block, - location: Location.default + location: location ) end @@ -247,8 +252,8 @@ def CommandCall(receiver, operator, message, arguments, block) end # Create a new Comment node. - def Comment(value, inline) - Comment.new(value: value, inline: inline, location: Location.default) + def Comment(value, inline, location = Location.default) + Comment.new(value: value, inline: inline, location: location) end # Create a new Const node. @@ -285,14 +290,21 @@ def CVar(value) end # Create a new DefNode node. - def DefNode(target, operator, name, params, bodystmt) + def DefNode( + target, + operator, + name, + params, + bodystmt, + location = Location.default + ) DefNode.new( target: target, operator: operator, name: name, params: params, bodystmt: bodystmt, - location: Location.default + location: location ) end @@ -565,8 +577,8 @@ def MAssign(target, value) end # Create a new MethodAddBlock node. - def MethodAddBlock(call, block) - MethodAddBlock.new(call: call, block: block, location: Location.default) + def MethodAddBlock(call, block, location = Location.default) + MethodAddBlock.new(call: call, block: block, location: location) end # Create a new MLHS node. @@ -779,7 +791,7 @@ def SClass(target, bodystmt) # Create a new Statements node. def Statements(body) - Statements.new(nil, body: body, location: Location.default) + Statements.new(body: body, location: Location.default) end # Create a new StringContent node. diff --git a/lib/syntax_tree/visitor/field_visitor.rb b/lib/syntax_tree/field_visitor.rb similarity index 91% rename from lib/syntax_tree/visitor/field_visitor.rb rename to lib/syntax_tree/field_visitor.rb index 6e643e09..f5607c67 100644 --- a/lib/syntax_tree/visitor/field_visitor.rb +++ b/lib/syntax_tree/field_visitor.rb @@ -1,55 +1,54 @@ # frozen_string_literal: true module SyntaxTree - class Visitor - # This is the parent class of a lot of built-in visitors for Syntax Tree. It - # reflects visiting each of the fields on every node in turn. It itself does - # not do anything with these fields, it leaves that behavior up to the - # subclass to implement. - # - # In order to properly use this class, you will need to subclass it and - # implement #comments, #field, #list, #node, #pairs, and #text. Those are - # documented here. - # - # == comments(node) - # - # This accepts the node that is being visited and does something depending - # on the comments attached to the node. - # - # == field(name, value) - # - # This accepts the name of the field being visited as a string (like - # "value") and the actual value of that field. The value can be a subclass - # of Node or any other type that can be held within the tree. - # - # == list(name, values) - # - # This accepts the name of the field being visited as well as a list of - # values. This is used, for example, when visiting something like the body - # of a Statements node. - # - # == node(name, node) - # - # This is the parent serialization method for each node. It is called with - # the node itself, as well as the type of the node as a string. The type - # is an internally used value that usually resembles the name of the - # ripper event that generated the node. The method should yield to the - # given block which then calls through to visit each of the fields on the - # node. - # - # == text(name, value) - # - # This accepts the name of the field being visited as well as a string - # value representing the value of the field. - # - # == pairs(name, values) - # - # This accepts the name of the field being visited as well as a list of - # pairs that represent the value of the field. It is used only in a couple - # of circumstances, like when visiting the list of optional parameters - # defined on a method. - # - class FieldVisitor < BasicVisitor + # This is the parent class of a lot of built-in visitors for Syntax Tree. It + # reflects visiting each of the fields on every node in turn. It itself does + # not do anything with these fields, it leaves that behavior up to the + # subclass to implement. + # + # In order to properly use this class, you will need to subclass it and + # implement #comments, #field, #list, #node, #pairs, and #text. Those are + # documented here. + # + # == comments(node) + # + # This accepts the node that is being visited and does something depending on + # the comments attached to the node. + # + # == field(name, value) + # + # This accepts the name of the field being visited as a string (like "value") + # and the actual value of that field. The value can be a subclass of Node or + # any other type that can be held within the tree. + # + # == list(name, values) + # + # This accepts the name of the field being visited as well as a list of + # values. This is used, for example, when visiting something like the body of + # a Statements node. + # + # == node(name, node) + # + # This is the parent serialization method for each node. It is called with the + # node itself, as well as the type of the node as a string. The type is an + # internally used value that usually resembles the name of the ripper event + # that generated the node. The method should yield to the given block which + # then calls through to visit each of the fields on the node. + # + # == text(name, value) + # + # This accepts the name of the field being visited as well as a string value + # representing the value of the field. + # + # == pairs(name, values) + # + # This accepts the name of the field being visited as well as a list of pairs + # that represent the value of the field. It is used only in a couple of + # circumstances, like when visiting the list of optional parameters defined on + # a method. + # + class FieldVisitor < BasicVisitor + visit_methods do def visit_aref(node) node(node, "aref") do field("collection", node.collection) @@ -264,6 +263,7 @@ def visit_command(node) node(node, "command") do field("message", node.message) field("arguments", node.arguments) + field("block", node.block) if node.block comments(node) end end @@ -274,6 +274,7 @@ def visit_command_call(node) field("operator", node.operator) field("message", node.message) field("arguments", node.arguments) if node.arguments + field("block", node.block) if node.block comments(node) end end @@ -1017,14 +1018,14 @@ def visit_zsuper(node) def visit___end__(node) visit_token(node, "__end__") end + end - private + private - def visit_token(node, type) - node(node, type) do - field("value", node.value) - comments(node) - end + def visit_token(node, type) + node(node, type) do + field("value", node.value) + comments(node) end end end diff --git a/lib/syntax_tree/formatter.rb b/lib/syntax_tree/formatter.rb index fddc06fe..2b229885 100644 --- a/lib/syntax_tree/formatter.rb +++ b/lib/syntax_tree/formatter.rb @@ -21,11 +21,15 @@ def initialize(version) # that folks have become entrenched in their ways, we decided to provide a # small amount of configurability. class Options - attr_reader :quote, :trailing_comma, :target_ruby_version + attr_reader :quote, + :trailing_comma, + :disable_auto_ternary, + :target_ruby_version def initialize( quote: :default, trailing_comma: :default, + disable_auto_ternary: :default, target_ruby_version: :default ) @quote = @@ -50,6 +54,17 @@ def initialize( trailing_comma end + @disable_auto_ternary = + if disable_auto_ternary == :default + # We ship with a disable ternary plugin that will define this + # constant. That constant is responsible for determining the default + # disable ternary value. If it's defined, then we default to true. + # Otherwise we default to false. + defined?(DISABLE_AUTO_TERNARY) + else + disable_auto_ternary + end + @target_ruby_version = if target_ruby_version == :default # The default target Ruby version is the current version of Ruby. @@ -69,8 +84,13 @@ def initialize( # These options are overridden in plugins to we need to make sure they are # available here. - attr_reader :quote, :trailing_comma, :target_ruby_version + attr_reader :quote, + :trailing_comma, + :disable_auto_ternary, + :target_ruby_version + alias trailing_comma? trailing_comma + alias disable_auto_ternary? disable_auto_ternary def initialize(source, *args, options: Options.new) super(*args) @@ -81,6 +101,7 @@ def initialize(source, *args, options: Options.new) # Memoizing these values to make access faster. @quote = options.quote @trailing_comma = options.trailing_comma + @disable_auto_ternary = options.disable_auto_ternary @target_ruby_version = options.target_ruby_version end @@ -117,7 +138,7 @@ def format(node, stackable: true) # going to just print out the node as it was seen in the source. doc = if last_leading&.ignore? - range = source[node.location.start_char...node.location.end_char] + range = source[node.start_char...node.end_char] first = true range.each_line(chomp: true) do |line| diff --git a/lib/syntax_tree/index.rb b/lib/syntax_tree/index.rb new file mode 100644 index 00000000..0280749f --- /dev/null +++ b/lib/syntax_tree/index.rb @@ -0,0 +1,683 @@ +# frozen_string_literal: true + +module SyntaxTree + # This class can be used to build an index of the structure of Ruby files. We + # define an index as the list of constants and methods defined within a file. + # + # This index strives to be as fast as possible to better support tools like + # IDEs. Because of that, it has different backends depending on what + # functionality is available. + module Index + # This is a location for an index entry. + class Location + attr_reader :line, :column + + def initialize(line, column) + @line = line + @column = column + end + end + + # This entry represents a class definition using the class keyword. + class ClassDefinition + attr_reader :nesting, :name, :superclass, :location, :comments + + def initialize(nesting, name, superclass, location, comments) + @nesting = nesting + @name = name + @superclass = superclass + @location = location + @comments = comments + end + end + + # This entry represents a constant assignment. + class ConstantDefinition + attr_reader :nesting, :name, :location, :comments + + def initialize(nesting, name, location, comments) + @nesting = nesting + @name = name + @location = location + @comments = comments + end + end + + # This entry represents a module definition using the module keyword. + class ModuleDefinition + attr_reader :nesting, :name, :location, :comments + + def initialize(nesting, name, location, comments) + @nesting = nesting + @name = name + @location = location + @comments = comments + end + end + + # This entry represents a method definition using the def keyword. + class MethodDefinition + attr_reader :nesting, :name, :location, :comments + + def initialize(nesting, name, location, comments) + @nesting = nesting + @name = name + @location = location + @comments = comments + end + end + + # This entry represents a singleton method definition using the def keyword + # with a specified target. + class SingletonMethodDefinition + attr_reader :nesting, :name, :location, :comments + + def initialize(nesting, name, location, comments) + @nesting = nesting + @name = name + @location = location + @comments = comments + end + end + + # This entry represents a method definition that was created using the alias + # keyword. + class AliasMethodDefinition + attr_reader :nesting, :name, :location, :comments + + def initialize(nesting, name, location, comments) + @nesting = nesting + @name = name + @location = location + @comments = comments + end + end + + # When you're using the instruction sequence backend, this class is used to + # lazily parse comments out of the source code. + class FileComments + # We use the ripper library to pull out source comments. + class Parser < Ripper + attr_reader :comments + + def initialize(*) + super + @comments = {} + end + + def on_comment(value) + comments[lineno] = value.chomp + end + end + + # This represents the Ruby source in the form of a file. When it needs to + # be read we'll read the file. + class FileSource + attr_reader :filepath + + def initialize(filepath) + @filepath = filepath + end + + def source + File.read(filepath) + end + end + + # This represents the Ruby source in the form of a string. When it needs + # to be read the string is returned. + class StringSource + attr_reader :source + + def initialize(source) + @source = source + end + end + + attr_reader :source + + def initialize(source) + @source = source + end + + def comments + @comments ||= Parser.new(source.source).tap(&:parse).comments + end + end + + # This class handles parsing comments from Ruby source code in the case that + # we use the instruction sequence backend. Because the instruction sequence + # backend doesn't provide comments (since they are dropped) we provide this + # interface to lazily parse them out. + class EntryComments + include Enumerable + attr_reader :file_comments, :location + + def initialize(file_comments, location) + @file_comments = file_comments + @location = location + end + + def each(&block) + line = location.line - 1 + result = [] + + while line >= 0 && (comment = file_comments.comments[line]) + result.unshift(comment) + line -= 1 + end + + result.each(&block) + end + end + + # This backend creates the index using RubyVM::InstructionSequence, which is + # faster than using the Syntax Tree parser, but is not available on all + # runtimes. + class ISeqBackend + VM_DEFINECLASS_TYPE_CLASS = 0x00 + VM_DEFINECLASS_TYPE_SINGLETON_CLASS = 0x01 + VM_DEFINECLASS_TYPE_MODULE = 0x02 + VM_DEFINECLASS_FLAG_SCOPED = 0x08 + VM_DEFINECLASS_FLAG_HAS_SUPERCLASS = 0x10 + + def index(source) + index_iseq( + RubyVM::InstructionSequence.compile(source).to_a, + FileComments.new(FileComments::StringSource.new(source)) + ) + end + + def index_file(filepath) + index_iseq( + RubyVM::InstructionSequence.compile_file(filepath).to_a, + FileComments.new(FileComments::FileSource.new(filepath)) + ) + end + + private + + def location_for(iseq) + code_location = iseq[4][:code_location] + Location.new(code_location[0], code_location[1]) + end + + def find_constant_path(insns, index) + index -= 1 while index >= 0 && + ( + insns[index].is_a?(Integer) || + ( + insns[index].is_a?(Array) && + %i[swap topn].include?(insns[index][0]) + ) + ) + insn = insns[index] + + if insn.is_a?(Array) && insn[0] == :opt_getconstant_path + # In this case we're on Ruby 3.2+ and we have an opt_getconstant_path + # instruction, so we already know all of the symbols in the nesting. + [index - 1, insn[1]] + elsif insn.is_a?(Symbol) && insn.match?(/\Alabel_\d+/) + # Otherwise, if we have a label then this is very likely the + # destination of an opt_getinlinecache instruction, in which case + # we'll walk backwards to grab up all of the constants. + names = [] + + index -= 1 + until insns[index].is_a?(Array) && + insns[index][0] == :opt_getinlinecache + if insns[index].is_a?(Array) && insns[index][0] == :getconstant + names.unshift(insns[index][1]) + end + + index -= 1 + end + + [index - 1, names] + else + [index, []] + end + end + + def find_attr_arguments(insns, index) + orig_argc = insns[index][1][:orig_argc] + names = [] + + current = index - 1 + while current >= 0 && names.length < orig_argc + if insns[current].is_a?(Array) && insns[current][0] == :putobject + names.unshift(insns[current][1]) + end + + current -= 1 + end + + names if insns[current] == [:putself] && names.length == orig_argc + end + + def method_definition(nesting, name, location, file_comments) + comments = EntryComments.new(file_comments, location) + + if nesting.last == [:singletonclass] + SingletonMethodDefinition.new( + nesting[0...-1], + name, + location, + comments + ) + else + MethodDefinition.new(nesting, name, location, comments) + end + end + + def index_iseq(iseq, file_comments) + results = [] + queue = [[iseq, []]] + + while (current_iseq, current_nesting = queue.shift) + file = current_iseq[5] + line = current_iseq[8] + insns = current_iseq[13] + + insns.each_with_index do |insn, index| + case insn + when Integer + line = insn + next + when Array + # continue on + else + # skip everything else + next + end + + case insn[0] + when :defineclass + _, name, class_iseq, flags = insn + next_nesting = current_nesting.dup + + # This is the index we're going to search for the nested constant + # path within the declaration name. + constant_index = index - 2 + + # This is the superclass of the class being defined. + superclass = [] + + # If there is a superclass, then we're going to find it here and + # then update the constant_index as necessary. + if flags & VM_DEFINECLASS_FLAG_HAS_SUPERCLASS > 0 + constant_index, superclass = + find_constant_path(insns, index - 1) + + if superclass.empty? + warn("#{file}:#{line}: superclass with non constant path") + next + end + end + + if (_, nesting = find_constant_path(insns, constant_index)) + # If there is a constant path in the class name, then we need to + # handle that by updating the nesting. + next_nesting << (nesting << name) + else + # Otherwise we'll add the class name to the nesting. + next_nesting << [name] + end + + if flags == VM_DEFINECLASS_TYPE_SINGLETON_CLASS + # At the moment, we don't support singletons that aren't + # defined on self. We could, but it would require more + # emulation. + if insns[index - 2] != [:putself] + warn( + "#{file}:#{line}: singleton class with non-self receiver" + ) + next + end + elsif flags & VM_DEFINECLASS_TYPE_MODULE > 0 + location = location_for(class_iseq) + results << ModuleDefinition.new( + next_nesting, + name, + location, + EntryComments.new(file_comments, location) + ) + else + location = location_for(class_iseq) + results << ClassDefinition.new( + next_nesting, + name, + superclass, + location, + EntryComments.new(file_comments, location) + ) + end + + queue << [class_iseq, next_nesting] + when :definemethod + location = location_for(insn[2]) + results << method_definition( + current_nesting, + insn[1], + location, + file_comments + ) + when :definesmethod + if insns[index - 1] != [:putself] + warn("#{file}:#{line}: singleton method with non-self receiver") + next + end + + location = location_for(insn[2]) + results << SingletonMethodDefinition.new( + current_nesting, + insn[1], + location, + EntryComments.new(file_comments, location) + ) + when :setconstant + next_nesting = current_nesting.dup + name = insn[1] + + _, nesting = find_constant_path(insns, index - 1) + next_nesting << nesting if nesting.any? + + location = Location.new(line, :unknown) + results << ConstantDefinition.new( + next_nesting, + name, + location, + EntryComments.new(file_comments, location) + ) + when :opt_send_without_block, :send + case insn[1][:mid] + when :attr_reader, :attr_writer, :attr_accessor + attr_names = find_attr_arguments(insns, index) + next unless attr_names + + location = Location.new(line, :unknown) + attr_names.each do |attr_name| + if insn[1][:mid] != :attr_writer + results << method_definition( + current_nesting, + attr_name, + location, + file_comments + ) + end + + if insn[1][:mid] != :attr_reader + results << method_definition( + current_nesting, + :"#{attr_name}=", + location, + file_comments + ) + end + end + when :"core#set_method_alias" + # Now we have to validate that the alias is happening with a + # non-interpolated value. To do this we'll match the specific + # pattern we're expecting. + values = + insns[(index - 4)...index].map do |previous| + previous.is_a?(Array) ? previous[0] : previous + end + if values != + %i[putspecialobject putspecialobject putobject putobject] + next + end + + # Now that we know it's in the structure we want it, we can use + # the values of the putobject to determine the alias. + location = Location.new(line, :unknown) + results << AliasMethodDefinition.new( + current_nesting, + insns[index - 2][1], + location, + EntryComments.new(file_comments, location) + ) + end + end + end + end + + results + end + end + + # This backend creates the index using the Syntax Tree parser and a visitor. + # It is not as fast as using the instruction sequences directly, but is + # supported on all runtimes. + class ParserBackend + class ConstantNameVisitor < Visitor + def visit_const_ref(node) + [node.constant.value.to_sym] + end + + def visit_const_path_ref(node) + visit(node.parent) << node.constant.value.to_sym + end + + def visit_var_ref(node) + [node.value.value.to_sym] + end + end + + class IndexVisitor < Visitor + attr_reader :results, :nesting, :statements + + def initialize + @results = [] + @nesting = [] + @statements = nil + end + + visit_methods do + def visit_alias(node) + if node.left.is_a?(SymbolLiteral) && node.right.is_a?(SymbolLiteral) + location = + Location.new( + node.location.start_line, + node.location.start_column + ) + + results << AliasMethodDefinition.new( + nesting.dup, + node.left.value.value.to_sym, + location, + comments_for(node) + ) + end + + super + end + + def visit_assign(node) + if node.target.is_a?(VarField) && node.target.value.is_a?(Const) + location = + Location.new( + node.location.start_line, + node.location.start_column + ) + + results << ConstantDefinition.new( + nesting.dup, + node.target.value.value.to_sym, + location, + comments_for(node) + ) + end + + super + end + + def visit_class(node) + names = node.constant.accept(ConstantNameVisitor.new) + nesting << names + + location = + Location.new(node.location.start_line, node.location.start_column) + + superclass = + if node.superclass + visited = node.superclass.accept(ConstantNameVisitor.new) + + if visited == [[]] + raise NotImplementedError, "superclass with non constant path" + end + + visited + else + [] + end + + results << ClassDefinition.new( + nesting.dup, + names.last, + superclass, + location, + comments_for(node) + ) + + super + nesting.pop + end + + def visit_command(node) + case node.message.value + when "attr_reader", "attr_writer", "attr_accessor" + comments = comments_for(node) + location = + Location.new( + node.location.start_line, + node.location.start_column + ) + + node.arguments.parts.each do |argument| + next unless argument.is_a?(SymbolLiteral) + name = argument.value.value.to_sym + + if node.message.value != "attr_writer" + results << MethodDefinition.new( + nesting.dup, + name, + location, + comments + ) + end + + if node.message.value != "attr_reader" + results << MethodDefinition.new( + nesting.dup, + :"#{name}=", + location, + comments + ) + end + end + end + + super + end + + def visit_def(node) + name = node.name.value.to_sym + location = + Location.new(node.location.start_line, node.location.start_column) + + results << if node.target.nil? + MethodDefinition.new( + nesting.dup, + name, + location, + comments_for(node) + ) + else + SingletonMethodDefinition.new( + nesting.dup, + name, + location, + comments_for(node) + ) + end + + super + end + + def visit_module(node) + names = node.constant.accept(ConstantNameVisitor.new) + nesting << names + + location = + Location.new(node.location.start_line, node.location.start_column) + + results << ModuleDefinition.new( + nesting.dup, + names.last, + location, + comments_for(node) + ) + + super + nesting.pop + end + + def visit_program(node) + super + results + end + + def visit_statements(node) + @statements = node + super + end + end + + private + + def comments_for(node) + comments = [] + + body = statements.body + line = node.location.start_line - 1 + index = body.index(node) + return comments if index.nil? + + index -= 1 + while index >= 0 && body[index].is_a?(Comment) && + (line - body[index].location.start_line < 2) + comments.unshift(body[index].value) + line = body[index].location.start_line + index -= 1 + end + + comments + end + end + + def index(source) + SyntaxTree.parse(source).accept(IndexVisitor.new) + end + + def index_file(filepath) + index(SyntaxTree.read(filepath)) + end + end + + # The class defined here is used to perform the indexing, depending on what + # functionality is available from the runtime. + INDEX_BACKEND = + defined?(RubyVM::InstructionSequence) ? ISeqBackend : ParserBackend + + # This method accepts source code and then indexes it. + def self.index(source, backend: INDEX_BACKEND.new) + backend.index(source) + end + + # This method accepts a filepath and then indexes it. + def self.index_file(filepath, backend: INDEX_BACKEND.new) + backend.index_file(filepath) + end + end +end diff --git a/lib/syntax_tree/json_visitor.rb b/lib/syntax_tree/json_visitor.rb new file mode 100644 index 00000000..7ad3fba0 --- /dev/null +++ b/lib/syntax_tree/json_visitor.rb @@ -0,0 +1,55 @@ +# frozen_string_literal: true + +require "json" + +module SyntaxTree + # This visitor transforms the AST into a hash that contains only primitives + # that can be easily serialized into JSON. + class JSONVisitor < FieldVisitor + attr_reader :target + + def initialize + @target = nil + end + + private + + def comments(node) + target[:comments] = visit_all(node.comments) + end + + def field(name, value) + target[name] = value.is_a?(Node) ? visit(value) : value + end + + def list(name, values) + target[name] = visit_all(values) + end + + def node(node, type) + previous = @target + @target = { type: type, location: visit_location(node.location) } + yield + @target + ensure + @target = previous + end + + def pairs(name, values) + target[name] = values.map { |(key, value)| [visit(key), visit(value)] } + end + + def text(name, value) + target[name] = value + end + + def visit_location(location) + [ + location.start_line, + location.start_char, + location.end_line, + location.end_char + ] + end + end +end diff --git a/lib/syntax_tree/language_server.rb b/lib/syntax_tree/language_server.rb index a7b23664..6ec81030 100644 --- a/lib/syntax_tree/language_server.rb +++ b/lib/syntax_tree/language_server.rb @@ -2,10 +2,9 @@ require "cgi" require "json" +require "pp" require "uri" -require_relative "language_server/inlay_hints" - module SyntaxTree # Syntax Tree additionally ships with a language server conforming to the # language server protocol. It can be invoked through the CLI by running: @@ -13,6 +12,162 @@ module SyntaxTree # stree lsp # class LanguageServer + # This class provides inlay hints for the language server. For more + # information, see the spec here: + # https://github.com/microsoft/language-server-protocol/issues/956. + class InlayHints < Visitor + # This represents a hint that is going to be displayed in the editor. + class Hint + attr_reader :line, :character, :label + + def initialize(line:, character:, label:) + @line = line + @character = character + @label = label + end + + # This is the shape that the LSP expects. + def to_json(*opts) + { + position: { + line: line, + character: character + }, + label: label + }.to_json(*opts) + end + end + + attr_reader :stack, :hints + + def initialize + @stack = [] + @hints = [] + end + + def visit(node) + stack << node + result = super + stack.pop + result + end + + visit_methods do + # Adds parentheses around assignments contained within the default + # values of parameters. For example, + # + # def foo(a = b = c) + # end + # + # becomes + # + # def foo(a = ₍b = c₎) + # end + # + def visit_assign(node) + parentheses(node.location) if stack[-2].is_a?(Params) + super + end + + # Adds parentheses around binary expressions to make it clear which + # subexpression will be evaluated first. For example, + # + # a + b * c + # + # becomes + # + # a + ₍b * c₎ + # + def visit_binary(node) + case stack[-2] + when Assign, OpAssign + parentheses(node.location) + when Binary + parentheses(node.location) if stack[-2].operator != node.operator + end + + super + end + + # Adds parentheses around ternary operators contained within certain + # expressions where it could be confusing which subexpression will get + # evaluated first. For example, + # + # a ? b : c ? d : e + # + # becomes + # + # a ? b : ₍c ? d : e₎ + # + def visit_if_op(node) + case stack[-2] + when Assign, Binary, IfOp, OpAssign + parentheses(node.location) + end + + super + end + + # Adds the implicitly rescued StandardError into a bare rescue clause. + # For example, + # + # begin + # rescue + # end + # + # becomes + # + # begin + # rescue StandardError + # end + # + def visit_rescue(node) + if node.exception.nil? + hints << Hint.new( + line: node.location.start_line - 1, + character: node.location.start_column + "rescue".length, + label: " StandardError" + ) + end + + super + end + + # Adds parentheses around unary statements using the - operator that are + # contained within Binary nodes. For example, + # + # -a + b + # + # becomes + # + # ₍-a₎ + b + # + def visit_unary(node) + if stack[-2].is_a?(Binary) && (node.operator == "-") + parentheses(node.location) + end + + super + end + end + + private + + def parentheses(location) + hints << Hint.new( + line: location.start_line - 1, + character: location.start_column, + label: "₍" + ) + + hints << Hint.new( + line: location.end_line - 1, + character: location.end_column, + label: "₎" + ) + end + end + # This is a small module that effectively mirrors pattern matching. We're # using it so that we can support truffleruby without having to ignore the # language server. diff --git a/lib/syntax_tree/language_server/inlay_hints.rb b/lib/syntax_tree/language_server/inlay_hints.rb deleted file mode 100644 index dfd63b8d..00000000 --- a/lib/syntax_tree/language_server/inlay_hints.rb +++ /dev/null @@ -1,159 +0,0 @@ -# frozen_string_literal: true - -module SyntaxTree - class LanguageServer - # This class provides inlay hints for the language server. For more - # information, see the spec here: - # https://github.com/microsoft/language-server-protocol/issues/956. - class InlayHints < Visitor - # This represents a hint that is going to be displayed in the editor. - class Hint - attr_reader :line, :character, :label - - def initialize(line:, character:, label:) - @line = line - @character = character - @label = label - end - - # This is the shape that the LSP expects. - def to_json(*opts) - { - position: { - line: line, - character: character - }, - label: label - }.to_json(*opts) - end - end - - attr_reader :stack, :hints - - def initialize - @stack = [] - @hints = [] - end - - def visit(node) - stack << node - result = super - stack.pop - result - end - - # Adds parentheses around assignments contained within the default values - # of parameters. For example, - # - # def foo(a = b = c) - # end - # - # becomes - # - # def foo(a = ₍b = c₎) - # end - # - def visit_assign(node) - parentheses(node.location) if stack[-2].is_a?(Params) - super - end - - # Adds parentheses around binary expressions to make it clear which - # subexpression will be evaluated first. For example, - # - # a + b * c - # - # becomes - # - # a + ₍b * c₎ - # - def visit_binary(node) - case stack[-2] - when Assign, OpAssign - parentheses(node.location) - when Binary - parentheses(node.location) if stack[-2].operator != node.operator - end - - super - end - - # Adds parentheses around ternary operators contained within certain - # expressions where it could be confusing which subexpression will get - # evaluated first. For example, - # - # a ? b : c ? d : e - # - # becomes - # - # a ? b : ₍c ? d : e₎ - # - def visit_if_op(node) - case stack[-2] - when Assign, Binary, IfOp, OpAssign - parentheses(node.location) - end - - super - end - - # Adds the implicitly rescued StandardError into a bare rescue clause. For - # example, - # - # begin - # rescue - # end - # - # becomes - # - # begin - # rescue StandardError - # end - # - def visit_rescue(node) - if node.exception.nil? - hints << Hint.new( - line: node.location.start_line - 1, - character: node.location.start_column + "rescue".length, - label: " StandardError" - ) - end - - super - end - - # Adds parentheses around unary statements using the - operator that are - # contained within Binary nodes. For example, - # - # -a + b - # - # becomes - # - # ₍-a₎ + b - # - def visit_unary(node) - if stack[-2].is_a?(Binary) && (node.operator == "-") - parentheses(node.location) - end - - super - end - - private - - def parentheses(location) - hints << Hint.new( - line: location.start_line - 1, - character: location.start_column, - label: "₍" - ) - - hints << Hint.new( - line: location.end_line - 1, - character: location.end_column, - label: "₎" - ) - end - end - end -end diff --git a/lib/syntax_tree/match_visitor.rb b/lib/syntax_tree/match_visitor.rb new file mode 100644 index 00000000..ca5bf234 --- /dev/null +++ b/lib/syntax_tree/match_visitor.rb @@ -0,0 +1,120 @@ +# frozen_string_literal: true + +module SyntaxTree + # This visitor transforms the AST into a Ruby pattern matching expression that + # would match correctly against the AST. + class MatchVisitor < FieldVisitor + attr_reader :q + + def initialize(q) + @q = q + end + + def visit(node) + case node + when Node + super + when String + # pp will split up a string on newlines and concat them together using a + # "+" operator. This breaks the pattern matching expression. So instead + # we're going to check here for strings and manually put the entire + # value into the output buffer. + q.text(node.inspect) + else + node.pretty_print(q) + end + end + + private + + def comments(node) + return if node.comments.empty? + + q.nest(0) do + q.text("comments: [") + q.indent do + q.breakable("") + q.seplist(node.comments) { |comment| visit(comment) } + end + q.breakable("") + q.text("]") + end + end + + def field(name, value) + q.nest(0) do + q.text(name) + q.text(": ") + visit(value) + end + end + + def list(name, values) + q.group do + q.text(name) + q.text(": [") + q.indent do + q.breakable("") + q.seplist(values) { |value| visit(value) } + end + q.breakable("") + q.text("]") + end + end + + def node(node, _type) + items = [] + q.with_target(items) { yield } + + if items.empty? + q.text(node.class.name) + return + end + + q.group do + q.text(node.class.name) + q.text("[") + q.indent do + q.breakable("") + q.seplist(items) { |item| q.target << item } + end + q.breakable("") + q.text("]") + end + end + + def pairs(name, values) + q.group do + q.text(name) + q.text(": [") + q.indent do + q.breakable("") + q.seplist(values) do |(key, value)| + q.group do + q.text("[") + q.indent do + q.breakable("") + visit(key) + q.text(",") + q.breakable + visit(value || nil) + end + q.breakable("") + q.text("]") + end + end + end + q.breakable("") + q.text("]") + end + end + + def text(name, value) + q.nest(0) do + q.text(name) + q.text(": ") + value.pretty_print(q) + end + end + end +end diff --git a/lib/syntax_tree/mermaid.rb b/lib/syntax_tree/mermaid.rb new file mode 100644 index 00000000..68ea4734 --- /dev/null +++ b/lib/syntax_tree/mermaid.rb @@ -0,0 +1,177 @@ +# frozen_string_literal: true + +require "cgi" +require "stringio" + +module SyntaxTree + # This module is responsible for rendering mermaid (https://mermaid.js.org/) + # flow charts. + module Mermaid + # This is the main class that handles rendering a flowchart. It keeps track + # of its nodes and links and renders them according to the mermaid syntax. + class FlowChart + attr_reader :output, :prefix, :nodes, :links + + def initialize + @output = StringIO.new + @output.puts("flowchart TD") + @prefix = " " + + @nodes = {} + @links = [] + end + + # Retrieve a node that has already been added to the flowchart by its id. + def fetch(id) + nodes.fetch(id) + end + + # Add a link to the flowchart between two nodes with an optional label. + def link(from, to, label = nil, type: :directed, color: nil) + link = Link.new(from, to, label, type, color) + links << link + + output.puts("#{prefix}#{link.render}") + link + end + + # Add a node to the flowchart with an optional label. + def node(id, label = " ", shape: :rectangle) + node = Node.new(id, label, shape) + nodes[id] = node + + output.puts("#{prefix}#{nodes[id].render}") + node + end + + # Add a subgraph to the flowchart. Within the given block, all of the + # nodes will be rendered within the subgraph. + def subgraph(label) + output.puts("#{prefix}subgraph #{Mermaid.escape(label)}") + + previous = prefix + @prefix = "#{prefix} " + + begin + yield + ensure + @prefix = previous + output.puts("#{prefix}end") + end + end + + # Return the rendered flowchart. + def render + links.each_with_index do |link, index| + if link.color + output.puts("#{prefix}linkStyle #{index} stroke:#{link.color}") + end + end + + output.string + end + end + + # This class represents a link between two nodes in a flowchart. It is not + # meant to be interacted with directly, but rather used as a data structure + # by the FlowChart class. + class Link + TYPES = %i[directed dotted].freeze + COLORS = %i[green red].freeze + + attr_reader :from, :to, :label, :type, :color + + def initialize(from, to, label, type, color) + raise unless TYPES.include?(type) + raise if color && !COLORS.include?(color) + + @from = from + @to = to + @label = label + @type = type + @color = color + end + + def render + left_side, right_side, full_side = sides + + if label + escaped = Mermaid.escape(label) + "#{from.id} #{left_side} #{escaped} #{right_side} #{to.id}" + else + "#{from.id} #{full_side} #{to.id}" + end + end + + private + + def sides + case type + when :directed + %w[-- --> -->] + when :dotted + %w[-. .-> -.->] + end + end + end + + # This class represents a node in a flowchart. Unlike the Link class, it can + # be used directly. It is the return value of the #node method, and is meant + # to be passed around to #link methods to create links between nodes. + class Node + SHAPES = %i[circle rectangle rounded stadium].freeze + + attr_reader :id, :label, :shape + + def initialize(id, label, shape) + raise unless SHAPES.include?(shape) + + @id = id + @label = label + @shape = shape + end + + def render + left_bound, right_bound = bounds + "#{id}#{left_bound}#{Mermaid.escape(label)}#{right_bound}" + end + + private + + def bounds + case shape + when :circle + %w[(( ))] + when :rectangle + ["[", "]"] + when :rounded + %w[( )] + when :stadium + ["([", "])"] + end + end + end + + class << self + # Escape a label to be used in the mermaid syntax. This is used to escape + # HTML entities such that they render properly within the quotes. + def escape(label) + "\"#{CGI.escapeHTML(label)}\"" + end + + # Create a new flowchart. If a block is given, it will be yielded to and + # the flowchart will be rendered. Otherwise, the flowchart will be + # returned. + def flowchart + flowchart = FlowChart.new + + if block_given? + yield flowchart + flowchart.render + else + flowchart + end + end + end + end +end diff --git a/lib/syntax_tree/mermaid_visitor.rb b/lib/syntax_tree/mermaid_visitor.rb new file mode 100644 index 00000000..fc9f6706 --- /dev/null +++ b/lib/syntax_tree/mermaid_visitor.rb @@ -0,0 +1,69 @@ +# frozen_string_literal: true + +module SyntaxTree + # This visitor transforms the AST into a mermaid flow chart. + class MermaidVisitor < FieldVisitor + attr_reader :flowchart, :target + + def initialize + @flowchart = Mermaid.flowchart + @target = nil + end + + def visit_program(node) + super + flowchart.render + end + + private + + def comments(node) + # Ignore + end + + def field(name, value) + case value + when nil + # skip + when Node + flowchart.link(target, visit(value), name) + else + to = + flowchart.node("#{target.id}_#{name}", value.inspect, shape: :stadium) + flowchart.link(target, to, name) + end + end + + def list(name, values) + values.each_with_index do |value, index| + field("#{name}[#{index}]", value) + end + end + + def node(node, type) + previous_target = target + + begin + @target = flowchart.node("node_#{node.object_id}", type) + yield + @target + ensure + @target = previous_target + end + end + + def pairs(name, values) + values.each_with_index do |(key, value), index| + to = flowchart.node("#{target.id}_#{name}_#{index}", shape: :circle) + + flowchart.link(target, to, "#{name}[#{index}]") + flowchart.link(to, visit(key), "[0]") + flowchart.link(to, visit(value), "[1]") if value + end + end + + def text(name, value) + field(name, value) + end + end +end diff --git a/lib/syntax_tree/visitor/mutation_visitor.rb b/lib/syntax_tree/mutation_visitor.rb similarity index 94% rename from lib/syntax_tree/visitor/mutation_visitor.rb rename to lib/syntax_tree/mutation_visitor.rb index 65f8c5ba..0b4b9357 100644 --- a/lib/syntax_tree/visitor/mutation_visitor.rb +++ b/lib/syntax_tree/mutation_visitor.rb @@ -1,39 +1,39 @@ # frozen_string_literal: true module SyntaxTree - class Visitor - # This visitor walks through the tree and copies each node as it is being - # visited. This is useful for mutating the tree before it is formatted. - class MutationVisitor < BasicVisitor - attr_reader :mutations + # This visitor walks through the tree and copies each node as it is being + # visited. This is useful for mutating the tree before it is formatted. + class MutationVisitor < BasicVisitor + attr_reader :mutations - def initialize - @mutations = [] - end - - # Create a new mutation based on the given query that will mutate the node - # using the given block. The block should return a new node that will take - # the place of the given node in the tree. These blocks frequently make - # use of the `copy` method on nodes to create a new node with the same - # properties as the original node. - def mutate(query, &block) - mutations << [Pattern.new(query).compile, block] - end + def initialize + @mutations = [] + end - # This is the base visit method for each node in the tree. It first - # creates a copy of the node using the visit_* methods defined below. Then - # it checks each mutation in sequence and calls it if it finds a match. - def visit(node) - return unless node - result = node.accept(self) + # Create a new mutation based on the given query that will mutate the node + # using the given block. The block should return a new node that will take + # the place of the given node in the tree. These blocks frequently make use + # of the `copy` method on nodes to create a new node with the same + # properties as the original node. + def mutate(query, &block) + mutations << [Pattern.new(query).compile, block] + end - mutations.each do |(pattern, mutation)| - result = mutation.call(result) if pattern.call(result) - end + # This is the base visit method for each node in the tree. It first creates + # a copy of the node using the visit_* methods defined below. Then it checks + # each mutation in sequence and calls it if it finds a match. + def visit(node) + return unless node + result = node.accept(self) - result + mutations.each do |(pattern, mutation)| + result = mutation.call(result) if pattern.call(result) end + result + end + + visit_methods do # Visit a BEGINBlock node. def visit_BEGIN(node) node.copy( diff --git a/lib/syntax_tree/node.rb b/lib/syntax_tree/node.rb index f19cfb2c..3b676552 100644 --- a/lib/syntax_tree/node.rb +++ b/lib/syntax_tree/node.rb @@ -126,18 +126,28 @@ def format(q) raise NotImplementedError end + def start_char + location.start_char + end + + def end_char + location.end_char + end + def pretty_print(q) - visitor = Visitor::PrettyPrintVisitor.new(q) - visitor.visit(self) + accept(PrettyPrintVisitor.new(q)) end def to_json(*opts) - visitor = Visitor::JSONVisitor.new - visitor.visit(self).to_json(*opts) + accept(JSONVisitor.new).to_json(*opts) + end + + def to_mermaid + accept(MermaidVisitor.new) end def construct_keys - PrettierPrint.format(+"") { |q| Visitor::MatchVisitor.new(q).visit(self) } + PrettierPrint.format(+"") { |q| accept(MatchVisitor.new(q)) } end end @@ -555,7 +565,7 @@ def var_alias? # collection[] # class ARef < Node - # [untyped] the value being indexed + # [Node] the value being indexed attr_reader :collection # [nil | Args] the value being passed within the brackets @@ -633,7 +643,7 @@ def ===(other) # collection[index] = value # class ARefField < Node - # [untyped] the value being indexed + # [Node] the value being indexed attr_reader :collection # [nil | Args] the value being passed within the brackets @@ -775,12 +785,17 @@ def ===(other) other.is_a?(ArgParen) && arguments === other.arguments end + def arity + arguments&.arity || 0 + end + private def trailing_comma? + arguments = self.arguments return false unless arguments.is_a?(Args) - parts = arguments.parts + parts = arguments.parts if parts.last.is_a?(ArgBlock) # If the last argument is a block, then we can't put a trailing comma # after it without resulting in a syntax error. @@ -804,7 +819,7 @@ def trailing_comma? # method(first, second, third) # class Args < Node - # [Array[ untyped ]] the arguments that this node wraps + # [Array[ Node ]] the arguments that this node wraps attr_reader :parts # [Array[ Comment | EmbDoc ]] the comments attached to this node @@ -848,6 +863,21 @@ def format(q) def ===(other) other.is_a?(Args) && ArrayMatch.call(parts, other.parts) end + + def arity + parts.sum do |part| + case part + when ArgStar, ArgsForward + Float::INFINITY + when BareAssocHash + part.assocs.sum do |assoc| + assoc.is_a?(AssocSplat) ? Float::INFINITY : 1 + end + else + 1 + end + end + end end # ArgBlock represents using a block operator on an expression. @@ -855,7 +885,7 @@ def ===(other) # method(&expression) # class ArgBlock < Node - # [nil | untyped] the expression being turned into a block + # [nil | Node] the expression being turned into a block attr_reader :value # [Array[ Comment | EmbDoc ]] the comments attached to this node @@ -907,7 +937,7 @@ def ===(other) # method(*arguments) # class ArgStar < Node - # [nil | untyped] the expression being splatted + # [nil | Node] the expression being splatted attr_reader :value # [Array[ Comment | EmbDoc ]] the comments attached to this node @@ -1008,6 +1038,10 @@ def format(q) def ===(other) other.is_a?(ArgsForward) end + + def arity + Float::INFINITY + end end # ArrayLiteral represents an array literal, which can optionally contain @@ -1080,58 +1114,6 @@ def format(q) end end - # Formats an array that contains only a list of variable references. To make - # things simpler, if there are a bunch, we format them all using the "fill" - # algorithm as opposed to breaking them into a ton of lines. For example, - # - # [foo, bar, baz] - # - # instead of becoming: - # - # [ - # foo, - # bar, - # baz - # ] - # - # would instead become: - # - # [ - # foo, bar, - # baz - # ] - # - # provided the line length was hit between `bar` and `baz`. - class VarRefsFormatter - # The separator for the fill algorithm. - class Separator - def call(q) - q.text(",") - q.fill_breakable - end - end - - # [Args] the contents of the array - attr_reader :contents - - def initialize(contents) - @contents = contents - end - - def format(q) - q.text("[") - q.group do - q.indent do - q.breakable_empty - q.seplist(contents.parts, Separator.new) { |part| q.format(part) } - q.if_break { q.text(",") } if q.trailing_comma? - end - q.breakable_empty - end - q.text("]") - end - end - # This is a special formatter used if the array literal contains no values # but _does_ contain comments. In this case we do some special formatting to # make sure the comments gets indented properly. @@ -1158,7 +1140,8 @@ def format(q) end end - # [LBracket] the bracket that opens this array + # [nil | LBracket | QSymbolsBeg | QWordsBeg | SymbolsBeg | WordsBeg] the + # bracket that opens this array attr_reader :lbracket # [nil | Args] the contents of the array @@ -1206,19 +1189,20 @@ def deconstruct_keys(_keys) end def format(q) - if qwords? - QWordsFormatter.new(contents).format(q) - return - end + lbracket = self.lbracket + contents = self.contents - if qsymbols? - QSymbolsFormatter.new(contents).format(q) - return - end + if lbracket.is_a?(LBracket) && lbracket.comments.empty? && contents && + contents.comments.empty? && contents.parts.length > 1 + if qwords? + QWordsFormatter.new(contents).format(q) + return + end - if var_refs?(q) - VarRefsFormatter.new(contents).format(q) - return + if qsymbols? + QSymbolsFormatter.new(contents).format(q) + return + end end if empty_with_comments? @@ -1250,39 +1234,24 @@ def ===(other) private def qwords? - lbracket.comments.empty? && contents && contents.comments.empty? && - contents.parts.length > 1 && - contents.parts.all? do |part| - case part - when StringLiteral - part.comments.empty? && part.parts.length == 1 && - part.parts.first.is_a?(TStringContent) && - !part.parts.first.value.match?(/[\s\[\]\\]/) - when CHAR - !part.value.match?(/[\[\]\\]/) - else - false - end + contents.parts.all? do |part| + case part + when StringLiteral + part.comments.empty? && part.parts.length == 1 && + part.parts.first.is_a?(TStringContent) && + !part.parts.first.value.match?(/[\s\[\]\\]/) + when CHAR + !part.value.match?(/[\[\]\\]/) + else + false end + end end def qsymbols? - lbracket.comments.empty? && contents && contents.comments.empty? && - contents.parts.length > 1 && - contents.parts.all? do |part| - part.is_a?(SymbolLiteral) && part.comments.empty? - end - end - - def var_refs?(q) - lbracket.comments.empty? && contents && contents.comments.empty? && - contents.parts.all? do |part| - part.is_a?(VarRef) && part.comments.empty? - end && - ( - contents.parts.sum { |part| part.value.value.length + 2 } > - q.maxwidth * 2 - ) + contents.parts.all? do |part| + part.is_a?(SymbolLiteral) && part.comments.empty? + end end # If we have an empty array that contains only comments, then we're going @@ -1330,10 +1299,10 @@ def format(q) end end - # [nil | VarRef] the optional constant wrapper + # [nil | VarRef | ConstPathRef] the optional constant wrapper attr_reader :constant - # [Array[ untyped ]] the regular positional arguments that this array + # [Array[ Node ]] the regular positional arguments that this array # pattern is matching against attr_reader :requireds @@ -1341,7 +1310,7 @@ def format(q) # positional arguments attr_reader :rest - # [Array[ untyped ]] the list of positional arguments occurring after the + # [Array[ Node ]] the list of positional arguments occurring after the # optional star if there is one attr_reader :posts @@ -1451,7 +1420,7 @@ class Assign < Node # to assign the result of the expression to attr_reader :target - # [untyped] the expression to be assigned + # [Node] the expression to be assigned attr_reader :value # [Array[ Comment | EmbDoc ]] the comments attached to this node @@ -1526,10 +1495,10 @@ def skip_indent? # # In the above example, the would be two Assoc nodes. class Assoc < Node - # [untyped] the key of this pair + # [Node] the key of this pair attr_reader :key - # [untyped] the value of this pair + # [nil | Node] the value of this pair attr_reader :value # [Array[ Comment | EmbDoc ]] the comments attached to this node @@ -1583,7 +1552,7 @@ def ===(other) private def format_contents(q) - q.parent.format_key(q, key) + (q.parent || HashKeyFormatter::Identity.new).format_key(q, key) return unless value if key.comments.empty? && AssignFormatting.skip_indent?(value) @@ -1604,7 +1573,7 @@ def format_contents(q) # { **pairs } # class AssocSplat < Node - # [nil | untyped] the expression that is being splatted + # [nil | Node] the expression that is being splatted attr_reader :value # [Array[ Comment | EmbDoc ]] the comments attached to this node @@ -1800,14 +1769,40 @@ def format_key(q, key) end end - def self.for(container) - labels = - container.assocs.all? do |assoc| - next true if assoc.is_a?(AssocSplat) + # When formatting a single assoc node without the context of the parent + # hash, this formatter is used. It uses whatever is present in the node, + # because there is nothing to be consistent with. + class Identity + def format_key(q, key) + if key.is_a?(Label) + q.format(key) + else + q.format(key) + q.text(" =>") + end + end + end + def self.for(container) + container.assocs.each do |assoc| + if assoc.is_a?(AssocSplat) + # Splat nodes do not impact the formatting choice. + elsif assoc.value.nil? + # If the value is nil, then it has been omitted. In this case we have + # to match the existing formatting because standardizing would + # potentially break the code. For example: + # + # { first:, "second" => "value" } + # + return Identity.new + else + # Otherwise, we need to check the type of the key. If it's a label or + # dynamic symbol, we can use labels. If it's a symbol literal then it + # needs to match a certain pattern to be used as a label. If it's + # anything else, then we need to use hash rockets. case assoc.key - when Label - true + when Label, DynaSymbol + # Here labels can be used. when SymbolLiteral # When attempting to convert a hash rocket into a hash label, # you need to take care because only certain patterns are @@ -1815,15 +1810,18 @@ def self.for(container) # arguments to methods, but don't specify what that is. After # some experimentation, it looks like it's: value = assoc.key.value.value - value.match?(/^[_A-Za-z]/) && !value.end_with?("=") - when DynaSymbol - true + + if !value.match?(/^[_A-Za-z]/) || value.end_with?("=") + return Rockets.new + end else - false + # If the value is anything else, we have to use hash rockets. + return Rockets.new end end + end - (labels ? Labels : Rockets).new + Labels.new end end @@ -1880,7 +1878,15 @@ def ===(other) end def format_key(q, key) - (@key_formatter ||= HashKeyFormatter.for(self)).format_key(q, key) + @key_formatter ||= + case q.parents.take(3).last + when Break, Next, ReturnNode + HashKeyFormatter::Identity.new + else + HashKeyFormatter.for(self) + end + + @key_formatter.format_key(q, key) end end @@ -1954,7 +1960,7 @@ def ===(other) # end # class PinnedBegin < Node - # [untyped] the expression being pinned + # [Node] the expression being pinned attr_reader :statement # [Array[ Comment | EmbDoc ]] the comments attached to this node @@ -2035,13 +2041,13 @@ def name } end - # [untyped] the left-hand side of the expression + # [Node] the left-hand side of the expression attr_reader :left # [Symbol] the operator used between the two expressions attr_reader :operator - # [untyped] the right-hand side of the expression + # [Node] the right-hand side of the expression attr_reader :right # [Array[ Comment | EmbDoc ]] the comments attached to this node @@ -2089,16 +2095,14 @@ def deconstruct_keys(_keys) end def format(q) + left = self.left power = operator == :** q.group do q.group { q.format(left) } q.text(" ") unless power - if operator == :<< - q.text("<< ") - q.format(right) - else + if operator != :<< q.group do q.text(operator.name) q.indent do @@ -2106,6 +2110,17 @@ def format(q) q.format(right) end end + elsif left.is_a?(Binary) && left.operator == :<< + q.group do + q.text(operator.name) + q.indent do + power ? q.breakable_empty : q.breakable_space + q.format(right) + end + end + else + q.text("<< ") + q.format(right) end end end @@ -2195,6 +2210,14 @@ def ===(other) other.is_a?(BlockVar) && params === other.params && ArrayMatch.call(locals, other.locals) end + + # When a single required parameter is declared for a block, it gets + # automatically expanded if the values being yielded into it are an array. + def arg0? + params.requireds.length == 1 && params.optionals.empty? && + params.rest.nil? && params.posts.empty? && params.keywords.empty? && + params.keyword_rest.nil? && params.block.nil? + end end # BlockArg represents declaring a block parameter on a method definition. @@ -2288,7 +2311,9 @@ def initialize( @comments = [] end - def bind(start_char, start_column, end_char, end_column) + def bind(parser, start_char, start_column, end_char, end_column) + rescue_clause = self.rescue_clause + @location = Location.new( start_line: location.start_line, @@ -2302,6 +2327,7 @@ def bind(start_char, start_column, end_char, end_column) # Here we're going to determine the bounds for the statements consequent = rescue_clause || else_clause || ensure_clause statements.bind( + parser, start_char, start_column, consequent ? consequent.location.start_char : end_char, @@ -2311,6 +2337,7 @@ def bind(start_char, start_column, end_char, end_column) # Next we're going to determine the rescue clause if there is one if rescue_clause consequent = else_clause || ensure_clause + rescue_clause.bind_end( consequent ? consequent.location.start_char : end_char, consequent ? consequent.location.start_column : end_column @@ -2692,7 +2719,7 @@ def format(q) # Of course there are a lot of caveats to that, including trailing operators # when necessary, where comments are places, how blocks are aligned, etc. class CallChainFormatter - # [Call | MethodAddBlock] the top of the call chain + # [CallNode | MethodAddBlock] the top of the call chain attr_reader :node def initialize(node) @@ -2716,7 +2743,7 @@ def format(q) children << receiver end when MethodAddBlock - if receiver.call.is_a?(CallNode) && !receiver.call.receiver.nil? + if (call = receiver.call).is_a?(CallNode) && !call.receiver.nil? children << receiver else break @@ -2725,8 +2752,8 @@ def format(q) break end when MethodAddBlock - if child.call.is_a?(CallNode) && !child.call.receiver.nil? - children << child.call + if (call = child.call).is_a?(CallNode) && !call.receiver.nil? + children << call else break end @@ -2748,8 +2775,8 @@ def format(q) # of just Statements nodes. parent = parents[3] if parent.is_a?(BlockNode) && parent.keywords? - if parent.is_a?(MethodAddBlock) && parent.call.is_a?(CallNode) && - parent.call.message.value == "sig" + if parent.is_a?(MethodAddBlock) && + (call = parent.call).is_a?(CallNode) && call.message.value == "sig" threshold = 2 end end @@ -2794,10 +2821,10 @@ def format_chain(q, children) while (child = children.pop) if child.is_a?(CallNode) - if child.receiver.is_a?(CallNode) && - (child.receiver.message != :call) && - (child.receiver.message.value == "where") && - (child.message.value == "not") + if (receiver = child.receiver).is_a?(CallNode) && + (receiver.message != :call) && + (receiver.message.value == "where") && + (child.message != :call && child.message.value == "not") # This is very specialized behavior wherein we group # .where.not calls together because it looks better. For more # information, see @@ -2821,8 +2848,11 @@ def format_chain(q, children) # If the parent call node has a comment on the message then we need # to print the operator trailing in order to keep it working. last_child = children.last - if last_child.is_a?(CallNode) && last_child.message.comments.any? && - last_child.operator + if last_child.is_a?(CallNode) && last_child.message != :call && + ( + (last_child.message.comments.any? && last_child.operator) || + (last_child.operator && last_child.operator.comments.any?) + ) q.format(CallOperatorFormatter.new(last_child.operator)) skip_operator = true else @@ -2853,7 +2883,8 @@ def self.chained?(node) when CallNode !node.receiver.nil? when MethodAddBlock - node.call.is_a?(CallNode) && !node.call.receiver.nil? + call = node.call + call.is_a?(CallNode) && !call.receiver.nil? else false end @@ -2913,7 +2944,7 @@ def format_child( # receiver.message # class CallNode < Node - # [nil | untyped] the receiver of the method call + # [nil | Node] the receiver of the method call attr_reader :receiver # [nil | :"::" | Op | Period] the operator being used to send the message @@ -3068,6 +3099,10 @@ def format_contents(q) end end end + + def arity + arguments&.arity || 0 + end end # Case represents the beginning of a case chain. @@ -3085,7 +3120,7 @@ class Case < Node # [Kw] the keyword that opens this expression attr_reader :keyword - # [nil | untyped] optional value being switched on + # [nil | Node] optional value being switched on attr_reader :value # [In | When] the next clause in the chain @@ -3164,14 +3199,14 @@ def ===(other) # value => pattern # class RAssign < Node - # [untyped] the left-hand expression + # [Node] the left-hand expression attr_reader :value # [Kw | Op] the operator being used to match against the pattern, which is # either => or in attr_reader :operator - # [untyped] the pattern on the right-hand side of the expression + # [Node] the pattern on the right-hand side of the expression attr_reader :pattern # [Array[ Comment | EmbDoc ]] the comments attached to this node @@ -3282,7 +3317,7 @@ class ClassDeclaration < Node # defined attr_reader :constant - # [nil | untyped] the optional superclass declaration + # [nil | Node] the optional superclass declaration attr_reader :superclass # [BodyStmt] the expressions to execute within the context of the class @@ -3420,7 +3455,7 @@ class Command < Node # [Args] the arguments being sent with the message attr_reader :arguments - # [nil | Block] the optional block being passed to the method + # [nil | BlockNode] the optional block being passed to the method attr_reader :block # [Array[ Comment | EmbDoc ]] the comments attached to this node @@ -3481,6 +3516,10 @@ def ===(other) arguments === other.arguments && block === other.block end + def arity + arguments.arity + end + private def align(q, node, &block) @@ -3522,19 +3561,19 @@ def align(q, node, &block) # object.method argument # class CommandCall < Node - # [untyped] the receiver of the message + # [nil | Node] the receiver of the message attr_reader :receiver - # [:"::" | Op | Period] the operator used to send the message + # [nil | :"::" | Op | Period] the operator used to send the message attr_reader :operator - # [Const | Ident | Op] the message being send + # [:call | Const | Ident | Op] the message being send attr_reader :message - # [nil | Args] the arguments going along with the message + # [nil | Args | ArgParen] the arguments going along with the message attr_reader :arguments - # [nil | Block] the block associated with this method call + # [nil | BlockNode] the block associated with this method call attr_reader :block # [Array[ Comment | EmbDoc ]] the comments attached to this node @@ -3602,6 +3641,10 @@ def deconstruct_keys(_keys) end def format(q) + message = self.message + arguments = self.arguments + block = self.block + q.group do doc = q.nest(0) do @@ -3610,7 +3653,7 @@ def format(q) # If there are leading comments on the message then we know we have # a newline in the source that is forcing these things apart. In # this case we will have to use a trailing operator. - if message.comments.any?(&:leading?) + if message != :call && message.comments.any?(&:leading?) q.format(CallOperatorFormatter.new(operator), stackable: false) q.indent do q.breakable_empty @@ -3646,6 +3689,10 @@ def ===(other) arguments === other.arguments && block === other.block end + def arity + arguments&.arity || 0 + end + private def argument_alignment(q, doc) @@ -3816,7 +3863,7 @@ def ===(other) # object::Const = value # class ConstPathField < Node - # [untyped] the source of the constant + # [Node] the source of the constant attr_reader :parent # [Const] the constant itself @@ -3880,7 +3927,7 @@ def ===(other) # object::Const # class ConstPathRef < Node - # [untyped] the source of the constant + # [Node] the source of the constant attr_reader :parent # [Const] the constant itself @@ -4049,7 +4096,7 @@ def ===(other) # def object.method(param) result end # class DefNode < Node - # [nil | untyped] the target where the method is being defined + # [nil | Node] the target where the method is being defined attr_reader :target # [nil | Op | Period] the operator being used to declare the method @@ -4061,7 +4108,7 @@ class DefNode < Node # [nil | Params | Paren] the parameter declaration for the method attr_reader :params - # [BodyStmt | untyped] the expressions to be executed by the method + # [BodyStmt | Node] the expressions to be executed by the method attr_reader :bodystmt # [Array[ Comment | EmbDoc ]] the comments attached to this node @@ -4122,9 +4169,13 @@ def deconstruct_keys(_keys) end def format(q) + params = self.params + bodystmt = self.bodystmt + q.group do q.group do - q.text("def ") + q.text("def") + q.text(" ") if target || name.comments.empty? if target q.format(target) @@ -4175,6 +4226,19 @@ def ===(other) def endless? !bodystmt.is_a?(BodyStmt) end + + def arity + params = self.params + + case params + when Params + params.arity + when Paren + params.contents.arity + else + 0..0 + end + end end # Defined represents the use of the +defined?+ operator. It can be used with @@ -4183,7 +4247,7 @@ def endless? # defined?(variable) # class Defined < Node - # [untyped] the value being sent to the keyword + # [Node] the value being sent to the keyword attr_reader :value # [Array[ Comment | EmbDoc ]] the comments attached to this node @@ -4340,7 +4404,7 @@ def format(q) # are no parentheses around the arguments to that command, so we need to # break the block. case q.parent - when Command, CommandCall + when nil, Command, CommandCall q.break_parent format_break(q, break_opening, break_closing) return @@ -4362,6 +4426,15 @@ def keywords? opening.is_a?(Kw) end + def arity + case block_var + when BlockVar + block_var.params.arity + else + 0..0 + end + end + private # If this is nested anywhere inside certain nodes, then we can't change @@ -4385,7 +4458,7 @@ def unchangeable_bounds?(q) # If we're a sibling of a control-flow keyword, then we're going to have to # use the do..end bounds. def forced_do_end_bounds?(q) - case q.parent.call + case q.parent&.call when Break, Next, ReturnNode, Super true else @@ -4465,13 +4538,13 @@ def format_flat(q, flat_opening, flat_closing) # # One of the sides of the expression may be nil, but not both. class RangeNode < Node - # [nil | untyped] the left side of the expression + # [nil | Node] the left side of the expression attr_reader :left # [Op] the operator used for this range attr_reader :operator - # [nil | untyped] the right side of the expression + # [nil | Node] the right side of the expression attr_reader :right # [Array[ Comment | EmbDoc ]] the comments attached to this node @@ -4592,7 +4665,7 @@ class DynaSymbol < Node # dynamic symbol attr_reader :parts - # [String] the quote used to delimit the dynamic symbol + # [nil | String] the quote used to delimit the dynamic symbol attr_reader :quote # [Array[ Comment | EmbDoc ]] the comments attached to this node @@ -4790,7 +4863,7 @@ def ===(other) # end # class Elsif < Node - # [untyped] the expression to be checked + # [Node] the expression to be checked attr_reader :predicate # [Statements] the expressions to be executed @@ -4886,6 +4959,25 @@ class EmbDoc < Node def initialize(value:, location:) @value = value @location = location + + @leading = false + @trailing = false + end + + def leading! + @leading = true + end + + def leading? + @leading + end + + def trailing! + @trailing = true + end + + def trailing? + @trailing end def inline? @@ -4922,7 +5014,13 @@ def deconstruct_keys(_keys) end def format(q) - q.trim + if (q.parent.is_a?(DefNode) && q.parent.endless?) || + q.parent.is_a?(Statements) + q.trim + else + q.breakable_return + end + q.text(value) end @@ -5191,7 +5289,7 @@ def ===(other) # object.variable = value # class Field < Node - # [untyped] the parent object that owns the field being assigned + # [Node] the parent object that owns the field being assigned attr_reader :parent # [:"::" | Op | Period] the operator being used for the assignment @@ -5216,6 +5314,7 @@ def accept(visitor) end def child_nodes + operator = self.operator [parent, (operator if operator != :"::"), name] end @@ -5317,13 +5416,13 @@ def ===(other) # end # class FndPtn < Node - # [nil | untyped] the optional constant wrapper + # [nil | VarRef | ConstPathRef] the optional constant wrapper attr_reader :constant # [VarField] the splat on the left-hand side attr_reader :left - # [Array[ untyped ]] the list of positional expressions in the pattern that + # [Array[ Node ]] the list of positional expressions in the pattern that # are being matched attr_reader :values @@ -5419,7 +5518,7 @@ class For < Node # pull values out of the object being enumerated attr_reader :index - # [untyped] the object being enumerated in the loop + # [Node] the object being enumerated in the loop attr_reader :collection # [Statements] the statements to be executed @@ -5597,7 +5696,7 @@ def accept(visitor) end def child_nodes - [lbrace] + assocs + [lbrace].concat(assocs) end def copy(lbrace: nil, assocs: nil, location: nil) @@ -5689,7 +5788,7 @@ class Heredoc < Node # [Array[ Comment | EmbDoc ]] the comments attached to this node attr_reader :comments - def initialize(beginning:, ending: nil, dedent: 0, parts: [], location:) + def initialize(beginning:, location:, ending: nil, dedent: 0, parts: []) @beginning = beginning @ending = ending @dedent = dedent @@ -5898,7 +5997,7 @@ class KeywordFormatter # [Label] the keyword being used attr_reader :key - # [untyped] the optional value for the keyword + # [Node] the optional value for the keyword attr_reader :value def initialize(key, value) @@ -5911,7 +6010,7 @@ def comments end def format(q) - q.format(key) + HashKeyFormatter::Labels.new.format_key(q, key) if value q.text(" ") @@ -5939,11 +6038,11 @@ def format(q) end end - # [nil | untyped] the optional constant wrapper + # [nil | VarRef | ConstPathRef] the optional constant wrapper attr_reader :constant - # [Array[ [Label, untyped] ]] the set of tuples representing the keywords - # that should be matched against in the pattern + # [Array[ [DynaSymbol | Label, nil | Node] ]] the set of tuples + # representing the keywords that should be matched against in the pattern attr_reader :keywords # [nil | VarField] an optional parameter to gather up all remaining keywords @@ -6057,6 +6156,8 @@ def ===(other) private def format_contents(q, parts, nested) + keyword_rest = self.keyword_rest + q.group { q.seplist(parts) { |part| q.format(part, stackable: false) } } # If there isn't a constant, and there's a blank keyword_rest, then we @@ -6153,7 +6254,7 @@ def self.call(parent) module Ternaryable class << self def call(q, node) - return false if ENV["STREE_FAST_FORMAT"] + return false if ENV["STREE_FAST_FORMAT"] || q.disable_auto_ternary? # If this is a conditional inside of a parentheses as the only content, # then we don't want to transform it into a ternary. Presumably the user @@ -6368,7 +6469,7 @@ def contains_conditional? # end # class IfNode < Node - # [untyped] the expression to be checked + # [Node] the expression to be checked attr_reader :predicate # [Statements] the expressions to be executed @@ -6441,13 +6542,13 @@ def modifier? # predicate ? truthy : falsy # class IfOp < Node - # [untyped] the expression to be checked + # [Node] the expression to be checked attr_reader :predicate - # [untyped] the expression to be executed if the predicate is truthy + # [Node] the expression to be executed if the predicate is truthy attr_reader :truthy - # [untyped] the expression to be executed if the predicate is falsy + # [Node] the expression to be executed if the predicate is falsy attr_reader :falsy # [Array[ Comment | EmbDoc ]] the comments attached to this node @@ -6496,9 +6597,26 @@ def deconstruct_keys(_keys) def format(q) force_flat = [ - AliasNode, Assign, Break, Command, CommandCall, Heredoc, IfNode, IfOp, - Lambda, MAssign, Next, OpAssign, RescueMod, ReturnNode, Super, Undef, - UnlessNode, VoidStmt, YieldNode, ZSuper + AliasNode, + Assign, + Break, + Command, + CommandCall, + Heredoc, + IfNode, + IfOp, + Lambda, + MAssign, + Next, + OpAssign, + RescueMod, + ReturnNode, + Super, + Undef, + UnlessNode, + VoidStmt, + YieldNode, + ZSuper ] if q.parent.is_a?(Paren) || force_flat.include?(truthy.class) || @@ -6614,7 +6732,7 @@ def ===(other) # end # class In < Node - # [untyped] the pattern to check against + # [Node] the pattern to check against attr_reader :pattern # [Statements] the expressions to execute if the pattern matched @@ -6669,10 +6787,13 @@ def deconstruct_keys(_keys) def format(q) keyword = "in " + pattern = self.pattern + consequent = self.consequent q.group do q.text(keyword) q.nest(keyword.length) { q.format(pattern) } + q.text(" then") if pattern.is_a?(RangeNode) && pattern.right.nil? unless statements.empty? q.indent do @@ -7070,6 +7191,8 @@ def deconstruct_keys(_keys) end def format(q) + params = self.params + q.text("->") q.group do if params.is_a?(Paren) @@ -7087,36 +7210,17 @@ def format(q) q.text(" ") q .if_break do - force_parens = - q.parents.any? do |node| - node.is_a?(Command) || node.is_a?(CommandCall) - end - - if force_parens - q.text("{") + q.text("do") - unless statements.empty? - q.indent do - q.breakable_space - q.format(statements) - end + unless statements.empty? + q.indent do q.breakable_space + q.format(statements) end - - q.text("}") - else - q.text("do") - - unless statements.empty? - q.indent do - q.breakable_space - q.format(statements) - end - end - - q.breakable_space - q.text("end") end + + q.breakable_space + q.text("end") end .if_flat do q.text("{") @@ -7397,7 +7501,7 @@ class MAssign < Node # [MLHS | MLHSParen] the target of the multiple assignment attr_reader :target - # [untyped] the value being assigned + # [Node] the value being assigned attr_reader :value # [Array[ Comment | EmbDoc ]] the comments attached to this node @@ -7457,10 +7561,10 @@ def ===(other) # method {} # class MethodAddBlock < Node - # [Call | Command | CommandCall] the method call + # [ARef | CallNode | Command | CommandCall | Super | ZSuper] the method call attr_reader :call - # [Block] the block being sent with the method call + # [BlockNode] the block being sent with the method call attr_reader :block # [Array[ Comment | EmbDoc ]] the comments attached to this node @@ -7532,8 +7636,12 @@ def format_contents(q) # first, second, third = value # class MLHS < Node - # Array[ARefField | ArgStar | Field | Ident | MLHSParen | VarField] the - # parts of the left-hand side of a multiple assignment + # [ + # Array[ + # ARefField | ArgStar | ConstPathField | Field | Ident | MLHSParen | + # TopConstField | VarField + # ] + # ] the parts of the left-hand side of a multiple assignment attr_reader :parts # [boolean] whether or not there is a trailing comma at the end of this @@ -7544,7 +7652,7 @@ class MLHS < Node # [Array[ Comment | EmbDoc ]] the comments attached to this node attr_reader :comments - def initialize(parts:, comma: false, location:) + def initialize(parts:, location:, comma: false) @parts = parts @comma = comma @location = location @@ -7605,7 +7713,7 @@ class MLHSParen < Node # [Array[ Comment | EmbDoc ]] the comments attached to this node attr_reader :comments - def initialize(contents:, comma: false, location:) + def initialize(contents:, location:, comma: false) @contents = contents @comma = comma @location = location @@ -7759,7 +7867,7 @@ def format_declaration(q) # values = first, second, third # class MRHS < Node - # Array[untyped] the parts that are being assigned + # [Array[Node]] the parts that are being assigned attr_reader :parts # [Array[ Comment | EmbDoc ]] the comments attached to this node @@ -7935,7 +8043,7 @@ class OpAssign < Node # [Op] the operator being used for the assignment attr_reader :operator - # [untyped] the expression to be assigned + # [Node] the expression to be assigned attr_reader :value # [Array[ Comment | EmbDoc ]] the comments attached to this node @@ -8092,7 +8200,7 @@ class OptionalFormatter # [Ident] the name of the parameter attr_reader :name - # [untyped] the value of the parameter + # [Node] the value of the parameter attr_reader :value def initialize(name, value) @@ -8117,7 +8225,7 @@ class KeywordFormatter # [Ident] the name of the parameter attr_reader :name - # [nil | untyped] the value of the parameter + # [nil | Node] the value of the parameter attr_reader :value def initialize(name, value) @@ -8158,10 +8266,10 @@ def format(q) end end - # [Array[ Ident ]] any required parameters + # [Array[ Ident | MLHSParen ]] any required parameters attr_reader :requireds - # [Array[ [ Ident, untyped ] ]] any optional parameters and their default + # [Array[ [ Ident, Node ] ]] any optional parameters and their default # values attr_reader :optionals @@ -8169,15 +8277,16 @@ def format(q) # parameter attr_reader :rest - # [Array[ Ident ]] any positional parameters that exist after a rest - # parameter + # [Array[ Ident | MLHSParen ]] any positional parameters that exist after a + # rest parameter attr_reader :posts - # [Array[ [ Ident, nil | untyped ] ]] any keyword parameters and their + # [Array[ [ Label, nil | Node ] ]] any keyword parameters and their # optional default values attr_reader :keywords - # [nil | :nil | KwRestParam] the optional keyword rest parameter + # [nil | :nil | ArgsForward | KwRestParam] the optional keyword rest + # parameter attr_reader :keyword_rest # [nil | BlockArg] the optional block parameter @@ -8187,14 +8296,14 @@ def format(q) attr_reader :comments def initialize( + location:, requireds: [], optionals: [], rest: nil, posts: [], keywords: [], keyword_rest: nil, - block: nil, - location: + block: nil ) @requireds = requireds @optionals = optionals @@ -8221,6 +8330,8 @@ def accept(visitor) end def child_nodes + keyword_rest = self.keyword_rest + [ *requireds, *optionals.flatten(1), @@ -8275,16 +8386,19 @@ def deconstruct_keys(_keys) end def format(q) + rest = self.rest + keyword_rest = self.keyword_rest + parts = [ *requireds, *optionals.map { |(name, value)| OptionalFormatter.new(name, value) } ] parts << rest if rest && !rest.is_a?(ExcessedComma) - parts += [ - *posts, - *keywords.map { |(name, value)| KeywordFormatter.new(name, value) } - ] + parts.concat(posts) + parts.concat( + keywords.map { |(name, value)| KeywordFormatter.new(name, value) } + ) parts << KeywordRestFormatter.new(keyword_rest) if keyword_rest parts << block if block @@ -8325,6 +8439,29 @@ def ===(other) keyword_rest === other.keyword_rest && block === other.block end + # Returns a range representing the possible number of arguments accepted + # by this params node not including the block. For example: + # + # def foo(a, b = 1, c:, d: 2, &block) + # ... + # end + # + # has arity 2..4. + # + def arity + optional_keywords = keywords.count { |_label, value| value } + + lower_bound = + requireds.length + posts.length + keywords.length - optional_keywords + + upper_bound = + if keyword_rest.nil? && rest.nil? + lower_bound + optionals.length + optional_keywords + end + + lower_bound..upper_bound + end + private def format_contents(q, parts) @@ -8343,7 +8480,7 @@ class Paren < Node # [LParen] the left parenthesis that opened this statement attr_reader :lparen - # [nil | untyped] the expression inside the parentheses + # [nil | Node] the expression inside the parentheses attr_reader :contents # [Array[ Comment | EmbDoc ]] the comments attached to this node @@ -8388,6 +8525,8 @@ def deconstruct_keys(_keys) end def format(q) + contents = self.contents + q.group do q.format(lparen) @@ -9192,7 +9331,7 @@ def ambiguous?(q) # end # class RescueEx < Node - # [untyped] the list of exceptions being rescued + # [nil | Node] the list of exceptions being rescued attr_reader :exceptions # [nil | Field | VarField] the expression being used to capture the raised @@ -9270,7 +9409,7 @@ class Rescue < Node # [Kw] the rescue keyword attr_reader :keyword - # [RescueEx] the exceptions being rescued + # [nil | RescueEx] the exceptions being rescued attr_reader :exception # [Statements] the expressions to evaluate when an error is rescued @@ -9302,11 +9441,11 @@ def bind_end(end_char, end_column) end_column: end_column ) - if consequent - consequent.bind_end(end_char, end_column) + if (next_node = consequent) + next_node.bind_end(end_char, end_column) statements.bind_end( - consequent.location.start_char, - consequent.location.start_column + next_node.location.start_char, + next_node.location.start_column ) else statements.bind_end(end_char, end_column) @@ -9390,10 +9529,10 @@ def ===(other) # expression rescue value # class RescueMod < Node - # [untyped] the expression to execute + # [Node] the expression to execute attr_reader :statement - # [untyped] the value to use if the executed expression raises an error + # [Node] the value to use if the executed expression raises an error attr_reader :value # [Array[ Comment | EmbDoc ]] the comments attached to this node @@ -9652,7 +9791,7 @@ def ===(other) # end # class SClass < Node - # [untyped] the target of the singleton class to enter + # [Node] the target of the singleton class to enter attr_reader :target # [BodyStmt] the expressions to be executed @@ -9726,23 +9865,19 @@ def ===(other) # propagate that onto void_stmt nodes inside the stmts in order to make sure # all comments get printed appropriately. class Statements < Node - # [SyntaxTree] the parser that is generating this node - attr_reader :parser - - # [Array[ untyped ]] the list of expressions contained within this node + # [Array[ Node ]] the list of expressions contained within this node attr_reader :body # [Array[ Comment | EmbDoc ]] the comments attached to this node attr_reader :comments - def initialize(parser, body:, location:) - @parser = parser + def initialize(body:, location:) @body = body @location = location @comments = [] end - def bind(start_char, start_column, end_char, end_column) + def bind(parser, start_char, start_column, end_char, end_column) @location = Location.new( start_line: location.start_line, @@ -9753,8 +9888,8 @@ def bind(start_char, start_column, end_char, end_column) end_column: end_column ) - if body[0].is_a?(VoidStmt) - location = body[0].location + if (void_stmt = body[0]).is_a?(VoidStmt) + location = void_stmt.location location = Location.new( start_line: location.start_line, @@ -9768,7 +9903,7 @@ def bind(start_char, start_column, end_char, end_column) body[0] = VoidStmt.new(location: location) end - attach_comments(start_char, end_char) + attach_comments(parser, start_char, end_char) end def bind_end(end_char, end_column) @@ -9800,7 +9935,6 @@ def child_nodes def copy(body: nil, location: nil) node = Statements.new( - parser, body: body || self.body, location: location || self.location ) @@ -9812,7 +9946,7 @@ def copy(body: nil, location: nil) alias deconstruct child_nodes def deconstruct_keys(_keys) - { parser: parser, body: body, location: location, comments: comments } + { body: body, location: location, comments: comments } end def format(q) @@ -9872,7 +10006,7 @@ def ===(other) # As efficiently as possible, gather up all of the comments that have been # found while this statements list was being parsed and add them into the # body. - def attach_comments(start_char, end_char) + def attach_comments(parser, start_char, end_char) parser_comments = parser.comments comment_index = 0 @@ -9919,9 +10053,13 @@ class StringContent < Node # string attr_reader :parts + # [Array[ Comment | EmbDoc ]] the comments attached to this node + attr_reader :comments + def initialize(parts:, location:) @parts = parts @location = location + @comments = [] end def accept(visitor) @@ -9948,6 +10086,33 @@ def deconstruct_keys(_keys) def ===(other) other.is_a?(StringContent) && ArrayMatch.call(parts, other.parts) end + + def format(q) + q.text(q.quote) + q.group do + parts.each do |part| + if part.is_a?(TStringContent) + value = Quotes.normalize(part.value, q.quote) + first = true + + value.each_line(chomp: true) do |line| + if first + first = false + else + q.breakable_return + end + + q.text(line) + end + + q.breakable_return if value.end_with?("\n") + else + q.format(part) + end + end + end + q.text(q.quote) + end end # StringConcat represents concatenating two strings together using a backward @@ -9957,7 +10122,8 @@ def ===(other) # "second" # class StringConcat < Node - # [StringConcat | StringLiteral] the left side of the concatenation + # [Heredoc | StringConcat | StringLiteral] the left side of the + # concatenation attr_reader :left # [StringLiteral] the right side of the concatenation @@ -10154,7 +10320,7 @@ class StringLiteral < Node # string literal attr_reader :parts - # [String] which quote was used by the string literal + # [nil | String] which quote was used by the string literal attr_reader :quote # [Array[ Comment | EmbDoc ]] the comments attached to this node @@ -10202,7 +10368,7 @@ def format(q) opening_quote, closing_quote = if !Quotes.locked?(self, q.quote) [q.quote, q.quote] - elsif quote.start_with?("%") + elsif quote&.start_with?("%") [quote, Quotes.matching(quote[/%[qQ]?(.)/, 1])] else [quote, quote] @@ -10399,8 +10565,8 @@ def ===(other) # :symbol # class SymbolLiteral < Node - # [Backtick | Const | CVar | GVar | Ident | IVar | Kw | Op] the value of the - # symbol + # [Backtick | Const | CVar | GVar | Ident | IVar | Kw | Op | TStringContent] + # the value of the symbol attr_reader :value # [Array[ Comment | EmbDoc ]] the comments attached to this node @@ -10439,6 +10605,7 @@ def deconstruct_keys(_keys) def format(q) q.text(":") + q.text("\\") if value.comments.any? q.format(value) end @@ -10908,7 +11075,7 @@ def ===(other) # not value # class Not < Node - # [nil | untyped] the statement on which to operate + # [nil | Node] the statement on which to operate attr_reader :statement # [boolean] whether or not parentheses were used @@ -10995,7 +11162,7 @@ class Unary < Node # [String] the operator being used attr_reader :operator - # [untyped] the statement on which to operate + # [Node] the statement on which to operate attr_reader :statement # [Array[ Comment | EmbDoc ]] the comments attached to this node @@ -11139,7 +11306,7 @@ def ===(other) # end # class UnlessNode < Node - # [untyped] the expression to be checked + # [Node] the expression to be checked attr_reader :predicate # [Statements] the expressions to be executed @@ -11285,7 +11452,7 @@ def format_break(q) # end # class UntilNode < Node - # [untyped] the expression to be checked + # [Node] the expression to be checked attr_reader :predicate # [Statements] the expressions to be executed @@ -11353,7 +11520,7 @@ def modifier? # # In the example above, the VarField node represents the +variable+ token. class VarField < Node - # [nil | Const | CVar | GVar | Ident | IVar] the target of this node + # [nil | :nil | Const | CVar | GVar | Ident | IVar] the target of this node attr_reader :value # [Array[ Comment | EmbDoc ]] the comments attached to this node @@ -11370,7 +11537,7 @@ def accept(visitor) end def child_nodes - [value] + value == :nil ? [] : [value] end def copy(value: nil, location: nil) @@ -11464,8 +11631,9 @@ def ===(other) # # To be clear, this method should just not exist. It's not good. It's a # place of shame. But it's necessary for now, so I'm keeping it. - def pin(parent) - replace = PinnedVarRef.new(value: value, location: location) + def pin(parent, pin) + replace = + PinnedVarRef.new(value: value, location: pin.location.to(location)) parent .deconstruct_keys([]) @@ -11491,7 +11659,7 @@ def pin(parent) # This can be a plain local variable like the example above. It can also be a # a class variable, a global variable, or an instance variable. class PinnedVarRef < Node - # [VarRef] the value of this node + # [Const | CVar | GVar | Ident | IVar] the value of this node attr_reader :value # [Array[ Comment | EmbDoc ]] the comments attached to this node @@ -11594,6 +11762,10 @@ def ===(other) def access_control? @access_control ||= %w[private protected public].include?(value.value) end + + def arity + 0 + end end # VoidStmt represents an empty lexical block of code. @@ -11601,9 +11773,6 @@ def access_control? # ;; # class VoidStmt < Node - # [Location] the location of this node - attr_reader :location - # [Array[ Comment | EmbDoc ]] the comments attached to this node attr_reader :comments @@ -11764,7 +11933,7 @@ def ===(other) # end # class WhileNode < Node - # [untyped] the expression to be checked + # [Node] the expression to be checked attr_reader :predicate # [Statements] the expressions to be executed diff --git a/lib/syntax_tree/parser.rb b/lib/syntax_tree/parser.rb index 602bb98f..825cd90e 100644 --- a/lib/syntax_tree/parser.rb +++ b/lib/syntax_tree/parser.rb @@ -256,11 +256,37 @@ def find_token(type) tokens[index] if index end + def find_token_between(type, left, right) + bounds = left.location.end_char...right.location.start_char + index = + tokens.rindex do |token| + char = token.location.start_char + break if char < bounds.begin + + token.is_a?(type) && bounds.cover?(char) + end + + tokens[index] if index + end + def find_keyword(name) index = tokens.rindex { |token| token.is_a?(Kw) && (token.name == name) } tokens[index] if index end + def find_keyword_between(name, left, right) + bounds = left.end_char...right.start_char + index = + tokens.rindex do |token| + char = token.location.start_char + break if char < bounds.begin + + token.is_a?(Kw) && (token.name == name) && bounds.cover?(char) + end + + tokens[index] if index + end + def find_operator(name) index = tokens.rindex { |token| token.is_a?(Op) && (token.name == name) } tokens[index] if index @@ -348,6 +374,7 @@ def on_BEGIN(statements) start_char = find_next_statement_start(lbrace.location.end_char) statements.bind( + self, start_char, start_char - line_counts[lbrace.location.start_line - 1].start, rbrace.location.start_char, @@ -386,6 +413,7 @@ def on_END(statements) start_char = find_next_statement_start(lbrace.location.end_char) statements.bind( + self, start_char, start_char - line_counts[lbrace.location.start_line - 1].start, rbrace.location.start_char, @@ -640,13 +668,14 @@ def visit(node) stack.pop end - def visit_var_ref(node) - pins.shift - node.pin(stack[-2]) + visit_methods do + def visit_var_ref(node) + node.pin(stack[-2], pins.shift) + end end def self.visit(node, tokens) - start_char = node.location.start_char + start_char = node.start_char allocated = [] tokens.reverse_each do |token| @@ -670,18 +699,22 @@ def self.visit(node, tokens) # (nil | Array[untyped]) posts # ) -> AryPtn def on_aryptn(constant, requireds, rest, posts) - parts = [constant, *requireds, rest, *posts].compact + lbracket = find_token(LBracket) + lbracket ||= find_token(LParen) if constant - # If there aren't any parts (no constant, no positional arguments), then - # we're matching an empty array. In this case, we're going to look for the - # left and right brackets explicitly. Otherwise, we'll just use the bounds - # of the various parts. - location = - if parts.empty? - consume_token(LBracket).location.to(consume_token(RBracket).location) - else - parts[0].location.to(parts[-1].location) - end + rbracket = find_token(RBracket) + rbracket ||= find_token(RParen) if constant + + parts = [constant, lbracket, *requireds, rest, *posts, rbracket].compact + + # The location is going to be determined by the first part to the last + # part. This includes potential brackets. + location = parts[0].location.to(parts[-1].location) + + # Now that we have the location calculated, we can remove the brackets + # from the list of tokens. + tokens.delete(lbracket) if lbracket + tokens.delete(rbracket) if rbracket # If there is a plain *, then we're going to fix up the location of it # here because it currently doesn't have anything to use for its precise @@ -820,6 +853,7 @@ def on_begin(bodystmt) end bodystmt.bind( + self, find_next_statement_start(keyword.location.end_char), keyword.location.end_column, end_location.end_char, @@ -871,13 +905,34 @@ def on_binary(left, operator, right) # on_block_var: (Params params, (nil | Array[Ident]) locals) -> BlockVar def on_block_var(params, locals) index = - tokens.rindex do |node| - node.is_a?(Op) && %w[| ||].include?(node.value) && - node.location.start_char < params.location.start_char - end + tokens.rindex { |node| node.is_a?(Op) && %w[| ||].include?(node.value) } + + ending = tokens.delete_at(index) + beginning = ending.value == "||" ? ending : consume_operator(:|) + + # If there are no parameters, then we didn't have anything to base the + # location information of off. Now that we have an opening of the + # block, we can correct this. + if params.empty? + start_line = params.location.start_line + start_char = + ( + if beginning.value == "||" + beginning.location.start_char + else + find_next_statement_start(beginning.location.end_char) + end + ) + + location = + Location.fixed( + line: start_line, + char: start_char, + column: start_char - line_counts[start_line - 1].start + ) - beginning = tokens[index] - ending = tokens[-1] + params = params.copy(location: location) + end BlockVar.new( params: params, @@ -905,6 +960,14 @@ def on_blockarg(name) # (nil | Ensure) ensure_clause # ) -> BodyStmt def on_bodystmt(statements, rescue_clause, else_clause, ensure_clause) + # In certain versions of Ruby, the `statements` argument can be any node + # in the case that we're inside of an endless method definition. In this + # case we'll wrap it in a Statements node to be consistent. + unless statements.is_a?(Statements) + statements = + Statements.new(body: [statements], location: statements.location) + end + parts = [statements, rescue_clause, else_clause, ensure_clause].compact BodyStmt.new( @@ -929,6 +992,7 @@ def on_brace_block(block_var, statements) start_char = find_next_statement_start(location.end_char) statements.bind( + self, start_char, start_char - line_counts[location.start_line - 1].start, rbrace.location.start_char, @@ -1036,6 +1100,7 @@ def on_class(constant, superclass, bodystmt) start_char = find_next_statement_start(location.end_char) bodystmt.bind( + self, start_char, start_char - line_counts[location.start_line - 1].start, ending.location.start_char, @@ -1103,6 +1168,7 @@ def on_command_call(receiver, operator, message, arguments) # :call-seq: # on_comment: (String value) -> Comment def on_comment(value) + # char is the index of the # character in the source. char = char_pos location = Location.token( @@ -1112,8 +1178,24 @@ def on_comment(value) size: value.size - 1 ) - index = source.rindex(/[^\t ]/, char - 1) if char != 0 - inline = index && (source[index] != "\n") + # Loop backward in the source string, starting from the beginning of the + # comment, and find the first character that is not a space or a tab. If + # index is -1, this indicates that we've checked all of the characters + # back to the start of the source, so this comment must be at the + # beginning of the file. + # + # We are purposefully not using rindex or regular expressions here because + # they check if there are invalid characters, which is actually possible + # with the use of __END__ syntax. + index = char - 1 + while index > -1 && (source[index] == "\t" || source[index] == " ") + index -= 1 + end + + # If we found a character that was not a space or a tab before the comment + # and it's a newline, then this comment is inline. Otherwise, it stands on + # its own and can be attached as its own node in the tree. + inline = index != -1 && source[index] != "\n" comment = Comment.new(value: value.chomp, inline: inline, location: location) @@ -1137,13 +1219,23 @@ def on_const(value) end # :call-seq: - # on_const_path_field: (untyped parent, Const constant) -> ConstPathField + # on_const_path_field: (untyped parent, Const constant) -> + # ConstPathField | Field def on_const_path_field(parent, constant) - ConstPathField.new( - parent: parent, - constant: constant, - location: parent.location.to(constant.location) - ) + if constant.is_a?(Const) + ConstPathField.new( + parent: parent, + constant: constant, + location: parent.location.to(constant.location) + ) + else + Field.new( + parent: parent, + operator: consume_operator(:"::"), + name: constant, + location: parent.location.to(constant.location) + ) + end end # :call-seq: @@ -1218,6 +1310,7 @@ def on_def(name, params, bodystmt) start_char = find_next_statement_start(params.location.end_char) bodystmt.bind( + self, start_char, start_char - line_counts[params.location.start_line - 1].start, ending.location.start_char, @@ -1306,6 +1399,7 @@ def on_defs(target, operator, name, params, bodystmt) start_char = find_next_statement_start(params.location.end_char) bodystmt.bind( + self, start_char, start_char - line_counts[params.location.start_line - 1].start, ending.location.start_char, @@ -1345,6 +1439,7 @@ def on_do_block(block_var, bodystmt) start_char = find_next_statement_start(location.end_char) bodystmt.bind( + self, start_char, start_char - line_counts[location.start_line - 1].start, ending.location.start_char, @@ -1440,6 +1535,7 @@ def on_else(statements) start_char = find_next_statement_start(keyword.location.end_char) statements.bind( + self, start_char, start_char - line_counts[keyword.location.start_line - 1].start, ending.location.start_char, @@ -1463,8 +1559,16 @@ def on_elsif(predicate, statements, consequent) beginning = consume_keyword(:elsif) ending = consequent || consume_keyword(:end) - start_char = find_next_statement_start(predicate.location.end_char) + delimiter = + find_keyword_between(:then, predicate, statements) || + find_token_between(Semicolon, predicate, statements) + + tokens.delete(delimiter) if delimiter + start_char = + find_next_statement_start((delimiter || predicate).location.end_char) + statements.bind( + self, start_char, start_char - line_counts[predicate.location.start_line - 1].start, ending.location.start_char, @@ -1588,6 +1692,7 @@ def on_ensure(statements) ending = find_keyword(:end) start_char = find_next_statement_start(keyword.location.end_char) statements.bind( + self, start_char, start_char - line_counts[keyword.location.start_line - 1].start, ending.location.start_char, @@ -1662,6 +1767,22 @@ def on_float(value) # VarField right # ) -> FndPtn def on_fndptn(constant, left, values, right) + # The left and right of a find pattern are always going to be splats, so + # we're going to consume the * operators and use their location + # information to extend the location of the splats. + right, left = + [right, left].map do |node| + operator = consume_operator(:*) + location = + if node.value + operator.location.to(node.location) + else + operator.location + end + + node.copy(location: location) + end + # The opening of this find pattern is either going to be a left bracket, a # right left parenthesis, or the left splat. We're going to use this to # determine how to find the closing of the pattern, as well as determining @@ -1702,21 +1823,20 @@ def on_for(index, collection, statements) in_keyword = consume_keyword(:in) ending = consume_keyword(:end) - # Consume the do keyword if it exists so that it doesn't get confused for - # some other block - keyword = find_keyword(:do) - if keyword && - keyword.location.start_char > collection.location.end_char && - keyword.location.end_char < ending.location.start_char - tokens.delete(keyword) - end + delimiter = + find_keyword_between(:do, collection, ending) || + find_token_between(Semicolon, collection, ending) + + tokens.delete(delimiter) if delimiter start_char = - find_next_statement_start((keyword || collection).location.end_char) + find_next_statement_start((delimiter || collection).location.end_char) + statements.bind( + self, start_char, start_char - - line_counts[(keyword || collection).location.end_line - 1].start, + line_counts[(delimiter || collection).location.end_line - 1].start, ending.location.start_char, ending.location.start_column ) @@ -1770,7 +1890,7 @@ def on_heredoc_beg(value) line: lineno, char: char_pos, column: current_column, - size: value.size + 1 + size: value.size ) # Here we're going to artificially create an extra node type so that if @@ -1805,7 +1925,7 @@ def on_heredoc_end(value) line: lineno, char: char_pos, column: current_column, - size: value.size + 1 + size: value.size ) heredoc_end = HeredocEnd.new(value: value.chomp, location: location) @@ -1820,9 +1940,9 @@ def on_heredoc_end(value) start_line: heredoc.location.start_line, start_char: heredoc.location.start_char, start_column: heredoc.location.start_column, - end_line: lineno, - end_char: char_pos, - end_column: current_column + end_line: location.end_line, + end_char: location.end_char, + end_column: location.end_column ) ) end @@ -1830,10 +1950,42 @@ def on_heredoc_end(value) # :call-seq: # on_hshptn: ( # (nil | untyped) constant, - # Array[[Label, untyped]] keywords, + # Array[[Label | StringContent, untyped]] keywords, # (nil | VarField) keyword_rest # ) -> HshPtn def on_hshptn(constant, keywords, keyword_rest) + keywords = + (keywords || []).map do |(label, value)| + if label.is_a?(Label) + [label, value] + else + tstring_beg_index = + tokens.rindex do |token| + token.is_a?(TStringBeg) && + token.location.start_char < label.location.start_char + end + + tstring_beg = tokens.delete_at(tstring_beg_index) + + label_end_index = + tokens.rindex do |token| + token.is_a?(LabelEnd) && + token.location.start_char == label.location.end_char + end + + label_end = tokens.delete_at(label_end_index) + + [ + DynaSymbol.new( + parts: label.parts, + quote: label_end.value[0], + location: tstring_beg.location.to(label_end.location) + ), + value + ] + end + end + if keyword_rest # We're doing this to delete the token from the list so that it doesn't # confuse future patterns by thinking they have an extra ** on the end. @@ -1846,7 +1998,7 @@ def on_hshptn(constant, keywords, keyword_rest) keyword_rest = VarField.new(value: nil, location: token.location) end - parts = [constant, *keywords&.flatten(1), keyword_rest].compact + parts = [constant, *keywords.flatten(1), keyword_rest].compact # If there's no constant, there may be braces, so we're going to look for # those to get our bounds. @@ -1863,7 +2015,7 @@ def on_hshptn(constant, keywords, keyword_rest) HshPtn.new( constant: constant, - keywords: keywords || [], + keywords: keywords, keyword_rest: keyword_rest, location: parts[0].location.to(parts[-1].location) ) @@ -1894,8 +2046,15 @@ def on_if(predicate, statements, consequent) beginning = consume_keyword(:if) ending = consequent || consume_keyword(:end) - start_char = find_next_statement_start(predicate.location.end_char) + if (keyword = find_keyword_between(:then, predicate, ending)) + tokens.delete(keyword) + end + + start_char = + find_next_statement_start((keyword || predicate).location.end_char) + statements.bind( + self, start_char, start_char - line_counts[predicate.location.end_line - 1].start, ending.location.start_char, @@ -1929,7 +2088,7 @@ def on_if_mod(predicate, statement) IfNode.new( predicate: predicate, statements: - Statements.new(self, body: [statement], location: statement.location), + Statements.new(body: [statement], location: statement.location), consequent: nil, location: statement.location.to(predicate.location) ) @@ -1973,13 +2132,22 @@ def on_in(pattern, statements, consequent) ending = consequent || consume_keyword(:end) statements_start = pattern - if (token = find_keyword(:then)) + if (token = find_keyword_between(:then, pattern, statements)) tokens.delete(token) statements_start = token end - start_char = find_next_statement_start(statements_start.location.end_char) + start_char = + find_next_statement_start((token || statements_start).location.end_char) + + # Ripper ignores parentheses on patterns, so we need to do the same in + # order to attach comments correctly to the pattern. + if source[start_char] == ")" + start_char = find_next_statement_start(start_char + 1) + end + statements.bind( + self, start_char, start_char - line_counts[statements_start.location.start_line - 1].start, @@ -2104,12 +2272,19 @@ def on_lambda(params, statements) token.location.start_char > beginning.location.start_char end + if braces + opening = consume_token(TLamBeg) + closing = consume_token(RBrace) + else + opening = consume_keyword(:do) + closing = consume_keyword(:end) + end + # We need to do some special mapping here. Since ripper doesn't support - # capturing lambda var until 3.2, we need to normalize all of that here. + # capturing lambda vars, we need to normalize all of that here. params = - case params - when Paren - # In this case we've gotten to the <3.2 parentheses wrapping a set of + if params.is_a?(Paren) + # In this case we've gotten to the parentheses wrapping a set of # parameters case. Here we need to manually scan for lambda locals. range = (params.location.start_char + 1)...params.location.end_char locals = lambda_locals(source[range]) @@ -2131,27 +2306,31 @@ def on_lambda(params, statements) node.comments.concat(params.comments) node - when Params - # In this case we've gotten to the <3.2 plain set of parameters. In - # this case there cannot be lambda locals, so we will wrap the - # parameters into a lambda var that has no locals. + else + # If there are no parameters, then we didn't have anything to base the + # location information of off. Now that we have an opening of the + # block, we can correct this. + if params.empty? + opening_location = opening.location + location = + Location.fixed( + line: opening_location.start_line, + char: opening_location.start_char, + column: opening_location.start_column + ) + + params = params.copy(location: location) + end + + # In this case we've gotten to the plain set of parameters. In this + # case there cannot be lambda locals, so we will wrap the parameters + # into a lambda var that has no locals. LambdaVar.new(params: params, locals: [], location: params.location) - when LambdaVar - # In this case we've gotten to 3.2+ lambda var. In this case we don't - # need to do anything and can just the value as given. - params end - if braces - opening = consume_token(TLamBeg) - closing = consume_token(RBrace) - else - opening = consume_keyword(:do) - closing = consume_keyword(:end) - end - start_char = find_next_statement_start(opening.location.end_char) statements.bind( + self, start_char, start_char - line_counts[opening.location.end_line - 1].start, closing.location.start_char, @@ -2219,8 +2398,14 @@ def lambda_locals(source) } } + parent_line = lineno - 1 + parent_column = + consume_token(Semicolon).location.start_column - tokens[index][0][1] + tokens[(index + 1)..].each_with_object([]) do |token, locals| (lineno, column), type, value, = token + column += parent_column if lineno == 1 + lineno += parent_line # Make the state transition for the parser. If there isn't a transition # from the current state to a new state for this type, then we're in a @@ -2336,23 +2521,30 @@ def on_method_add_arg(call, arguments) # :call-seq: # on_method_add_block: ( - # (Call | Command | CommandCall) call, + # (Break | Call | Command | CommandCall, Next) call, # Block block - # ) -> MethodAddBlock + # ) -> Break | MethodAddBlock def on_method_add_block(call, block) location = call.location.to(block.location) case call + when Break, Next, ReturnNode + parts = call.arguments.parts + + node = parts.pop + copied = + node.copy(block: block, location: node.location.to(block.location)) + + copied.comments.concat(call.comments) + parts << copied + + call.copy(location: location) when Command, CommandCall node = call.copy(block: block, location: location) node.comments.concat(call.comments) node else - MethodAddBlock.new( - call: call, - block: block, - location: call.location.to(block.location) - ) + MethodAddBlock.new(call: call, block: block, location: location) end end @@ -2429,6 +2621,7 @@ def on_module(constant, bodystmt) start_char = find_next_statement_start(constant.location.end_char) bodystmt.bind( + self, start_char, start_char - line_counts[constant.location.start_line - 1].start, ending.location.start_char, @@ -2575,19 +2768,40 @@ def on_params( # have a `nil` for the value instead of a `false`. keywords&.map! { |(key, value)| [key, value || nil] } - parts = [ - *requireds, - *optionals&.flatten(1), - rest, - *posts, - *keywords&.flatten(1), - (keyword_rest if keyword_rest != :nil), - (block if block != :&) - ].compact + # Here we're going to build up a list of all of the params so that we can + # determine our location information. + parts = [] + + requireds&.each { |required| parts << required.location } + optionals&.each do |(key, value)| + parts << key.location + parts << value.location if value + end + + parts << rest.location if rest + posts&.each { |post| parts << post.location } + + keywords&.each do |(key, value)| + parts << key.location + parts << value.location if value + end + + if keyword_rest == :nil + # When we get a :nil here, it means that we have **nil syntax, which + # means this set of parameters accepts no more keyword arguments. In + # this case we need to go and find the location of these two tokens. + operator = consume_operator(:**) + parts << operator.location.to(consume_keyword(:nil).location) + elsif keyword_rest + parts << keyword_rest.location + end + + parts << block.location if block && block != :& + parts = parts.compact location = if parts.any? - parts[0].location.to(parts[-1].location) + parts[0].to(parts[-1]) else Location.fixed(line: lineno, char: char_pos, column: current_column) end @@ -2684,7 +2898,7 @@ def on_program(statements) ) statements.body << @__end__ if @__end__ - statements.bind(0, 0, source.length, last_column) + statements.bind(self, 0, 0, source.length, last_column) program = Program.new(statements: statements, location: location) attach_comments(program, @comments) @@ -3016,8 +3230,9 @@ def on_rescue(exceptions, variable, statements, consequent) exceptions = exceptions[0] if exceptions.is_a?(Array) last_node = variable || exceptions || keyword - start_char = find_next_statement_start(last_node.location.end_char) + start_char = find_next_statement_start(last_node.end_char) statements.bind( + self, start_char, start_char - line_counts[last_node.location.start_line - 1].start, char_pos, @@ -3038,7 +3253,7 @@ def on_rescue(exceptions, variable, statements, consequent) start_char: keyword.location.end_char + 1, start_column: keyword.location.end_column + 1, end_line: last_node.location.end_line, - end_char: last_node.location.end_char, + end_char: last_node.end_char, end_column: last_node.location.end_column ) ) @@ -3136,6 +3351,7 @@ def on_sclass(target, bodystmt) start_char = find_next_statement_start(target.location.end_char) bodystmt.bind( + self, start_char, start_char - line_counts[target.location.start_line - 1].start, ending.location.start_char, @@ -3149,9 +3365,29 @@ def on_sclass(target, bodystmt) ) end - # def on_semicolon(value) - # value - # end + # Semicolons are tokens that get added to the token list but never get + # attached to the AST. Because of this they only need to track their + # associated location so they can be used for computing bounds. + class Semicolon + attr_reader :location + + def initialize(location) + @location = location + end + end + + # :call-seq: + # on_semicolon: (String value) -> Semicolon + def on_semicolon(value) + tokens << Semicolon.new( + Location.token( + line: lineno, + char: char_pos, + column: current_column, + size: value.size + ) + ) + end # def on_sp(value) # value @@ -3169,18 +3405,13 @@ def on_stmts_add(statements, statement) statements.location.to(statement.location) end - Statements.new( - self, - body: statements.body << statement, - location: location - ) + Statements.new(body: statements.body << statement, location: location) end # :call-seq: # on_stmts_new: () -> Statements def on_stmts_new Statements.new( - self, body: [], location: Location.fixed(line: lineno, char: char_pos, column: current_column) @@ -3245,6 +3476,7 @@ def on_string_embexpr(statements) embexpr_end = consume_token(EmbExprEnd) statements.bind( + self, embexpr_beg.location.end_char, embexpr_beg.location.end_column, embexpr_end.location.start_char, @@ -3588,8 +3820,15 @@ def on_unless(predicate, statements, consequent) beginning = consume_keyword(:unless) ending = consequent || consume_keyword(:end) - start_char = find_next_statement_start(predicate.location.end_char) + if (keyword = find_keyword_between(:then, predicate, ending)) + tokens.delete(keyword) + end + + start_char = + find_next_statement_start((keyword || predicate).location.end_char) + statements.bind( + self, start_char, start_char - line_counts[predicate.location.end_line - 1].start, ending.location.start_char, @@ -3612,7 +3851,7 @@ def on_unless_mod(predicate, statement) UnlessNode.new( predicate: predicate, statements: - Statements.new(self, body: [statement], location: statement.location), + Statements.new(body: [statement], location: statement.location), consequent: nil, location: statement.location.to(predicate.location) ) @@ -3624,17 +3863,18 @@ def on_until(predicate, statements) beginning = consume_keyword(:until) ending = consume_keyword(:end) - # Consume the do keyword if it exists so that it doesn't get confused for - # some other block - keyword = find_keyword(:do) - if keyword && keyword.location.start_char > predicate.location.end_char && - keyword.location.end_char < ending.location.start_char - tokens.delete(keyword) - end + delimiter = + find_keyword_between(:do, predicate, statements) || + find_token_between(Semicolon, predicate, statements) + + tokens.delete(delimiter) if delimiter # Update the Statements location information - start_char = find_next_statement_start(predicate.location.end_char) + start_char = + find_next_statement_start((delimiter || predicate).location.end_char) + statements.bind( + self, start_char, start_char - line_counts[predicate.location.end_line - 1].start, ending.location.start_char, @@ -3656,7 +3896,7 @@ def on_until_mod(predicate, statement) UntilNode.new( predicate: predicate, statements: - Statements.new(self, body: [statement], location: statement.location), + Statements.new(body: [statement], location: statement.location), location: statement.location.to(predicate.location) ) end @@ -3727,9 +3967,11 @@ def on_when(arguments, statements, consequent) statements_start = token end - start_char = find_next_statement_start(statements_start.location.end_char) + start_char = + find_next_statement_start((token || statements_start).location.end_char) statements.bind( + self, start_char, start_char - line_counts[statements_start.location.start_line - 1].start, @@ -3751,17 +3993,18 @@ def on_while(predicate, statements) beginning = consume_keyword(:while) ending = consume_keyword(:end) - # Consume the do keyword if it exists so that it doesn't get confused for - # some other block - keyword = find_keyword(:do) - if keyword && keyword.location.start_char > predicate.location.end_char && - keyword.location.end_char < ending.location.start_char - tokens.delete(keyword) - end + delimiter = + find_keyword_between(:do, predicate, statements) || + find_token_between(Semicolon, predicate, statements) + + tokens.delete(delimiter) if delimiter # Update the Statements location information - start_char = find_next_statement_start(predicate.location.end_char) + start_char = + find_next_statement_start((delimiter || predicate).location.end_char) + statements.bind( + self, start_char, start_char - line_counts[predicate.location.end_line - 1].start, ending.location.start_char, @@ -3783,7 +4026,7 @@ def on_while_mod(predicate, statement) WhileNode.new( predicate: predicate, statements: - Statements.new(self, body: [statement], location: statement.location), + Statements.new(body: [statement], location: statement.location), location: statement.location.to(predicate.location) ) end diff --git a/lib/syntax_tree/plugin/disable_auto_ternary.rb b/lib/syntax_tree/plugin/disable_auto_ternary.rb new file mode 100644 index 00000000..dd38c783 --- /dev/null +++ b/lib/syntax_tree/plugin/disable_auto_ternary.rb @@ -0,0 +1,7 @@ +# frozen_string_literal: true + +module SyntaxTree + class Formatter + DISABLE_AUTO_TERNARY = true + end +end diff --git a/lib/syntax_tree/pretty_print_visitor.rb b/lib/syntax_tree/pretty_print_visitor.rb new file mode 100644 index 00000000..894e0cf4 --- /dev/null +++ b/lib/syntax_tree/pretty_print_visitor.rb @@ -0,0 +1,83 @@ +# frozen_string_literal: true + +module SyntaxTree + # This visitor pretty-prints the AST into an equivalent s-expression. + class PrettyPrintVisitor < FieldVisitor + attr_reader :q + + def initialize(q) + @q = q + end + + # This is here because we need to make sure the operator is cast to a string + # before we print it out. + def visit_binary(node) + node(node, "binary") do + field("left", node.left) + text("operator", node.operator.to_s) + field("right", node.right) + comments(node) + end + end + + # This is here to make it a little nicer to look at labels since they + # typically have their : at the end of the value. + def visit_label(node) + node(node, "label") do + q.breakable + q.text(":") + q.text(node.value[0...-1]) + comments(node) + end + end + + private + + def comments(node) + return if node.comments.empty? + + q.breakable + q.group(2, "(", ")") do + q.seplist(node.comments) { |comment| q.pp(comment) } + end + end + + def field(_name, value) + q.breakable + q.pp(value) + end + + def list(_name, values) + q.breakable + q.group(2, "(", ")") { q.seplist(values) { |value| q.pp(value) } } + end + + def node(_node, type) + q.group(2, "(", ")") do + q.text(type) + yield + end + end + + def pairs(_name, values) + q.group(2, "(", ")") do + q.seplist(values) do |(key, value)| + q.pp(key) + + if value + q.text("=") + q.group(2) do + q.breakable("") + q.pp(value) + end + end + end + end + end + + def text(_name, value) + q.breakable + q.text(value) + end + end +end diff --git a/lib/syntax_tree/reflection.rb b/lib/syntax_tree/reflection.rb new file mode 100644 index 00000000..6955aa21 --- /dev/null +++ b/lib/syntax_tree/reflection.rb @@ -0,0 +1,257 @@ +# frozen_string_literal: true + +module SyntaxTree + # This module is used to provide some reflection on the various types of nodes + # and their attributes. As soon as it is required it collects all of its + # information. + module Reflection + # This module represents the type of the values being passed to attributes + # of nodes. It is used as part of the documentation of the attributes. + module Type + CONSTANTS = SyntaxTree.constants.to_h { [_1, SyntaxTree.const_get(_1)] } + + # Represents an array type that holds another type. + class ArrayType + attr_reader :type + + def initialize(type) + @type = type + end + + def ===(value) + value.is_a?(Array) && value.all? { type === _1 } + end + + def inspect + "Array<#{type.inspect}>" + end + end + + # Represents a tuple type that holds a number of types in order. + class TupleType + attr_reader :types + + def initialize(types) + @types = types + end + + def ===(value) + value.is_a?(Array) && value.length == types.length && + value.zip(types).all? { |item, type| type === item } + end + + def inspect + "[#{types.map(&:inspect).join(", ")}]" + end + end + + # Represents a union type that can be one of a number of types. + class UnionType + attr_reader :types + + def initialize(types) + @types = types + end + + def ===(value) + types.any? { _1 === value } + end + + def inspect + types.map(&:inspect).join(" | ") + end + end + + class << self + def parse(comment) + comment = comment.gsub("\n", " ") + + unless comment.start_with?("[") + raise "Comment does not start with a bracket: #{comment.inspect}" + end + + count = 1 + found = + comment.chars[1..] + .find + .with_index(1) do |char, index| + count += { "[" => 1, "]" => -1 }.fetch(char, 0) + break index if count == 0 + end + + # If we weren't able to find the end of the balanced brackets, then + # the comment is malformed. + if found.nil? + raise "Comment does not have balanced brackets: #{comment.inspect}" + end + + parse_type(comment[1...found].strip) + end + + private + + def parse_type(value) + case value + when "Integer" + Integer + when "String" + String + when "Symbol" + Symbol + when "boolean" + UnionType.new([TrueClass, FalseClass]) + when "nil" + NilClass + when ":\"::\"" + :"::" + when ":call" + :call + when ":nil" + :nil + when /\AArray\[(.+)\]\z/ + ArrayType.new(parse_type($1.strip)) + when /\A\[(.+)\]\z/ + TupleType.new($1.strip.split(/\s*,\s*/).map { parse_type(_1) }) + else + if value.include?("|") + UnionType.new(value.split(/\s*\|\s*/).map { parse_type(_1) }) + else + CONSTANTS.fetch(value.to_sym) + end + end + end + end + end + + # This class represents one of the attributes on a node in the tree. + class Attribute + attr_reader :name, :comment, :type + + def initialize(name, comment) + @name = name + @comment = comment + @type = Type.parse(comment) + end + end + + # This class represents one of our nodes in the tree. We're going to use it + # as a placeholder for collecting all of the various places that nodes are + # used. + class Node + attr_reader :name, :comment, :attributes, :visitor_method + + def initialize(name, comment, attributes, visitor_method) + @name = name + @comment = comment + @attributes = attributes + @visitor_method = visitor_method + end + end + + class << self + # This is going to hold a hash of all of the nodes in the tree. The keys + # are the names of the nodes as symbols. + attr_reader :nodes + + # This expects a node name as a symbol and returns the node object for + # that node. + def node(name) + nodes.fetch(name) + end + + private + + def parse_comments(statements, index) + statements[0...index] + .reverse_each + .take_while { _1.is_a?(SyntaxTree::Comment) } + .reverse_each + .map { _1.value[2..] } + end + end + + @nodes = {} + + # For each node, we're going to parse out its attributes and other metadata. + # We'll use this as the basis for our report. + program = + SyntaxTree.parse(SyntaxTree.read(File.expand_path("node.rb", __dir__))) + + program_statements = program.statements + main_statements = program_statements.body.last.bodystmt.statements.body + main_statements.each_with_index do |main_statement, main_statement_index| + # Ensure we are only looking at class declarations. + next unless main_statement.is_a?(SyntaxTree::ClassDeclaration) + + # Ensure we're looking at class declarations with superclasses. + superclass = main_statement.superclass + next unless superclass.is_a?(SyntaxTree::VarRef) + + # Ensure we're looking at class declarations that inherit from Node. + next unless superclass.value.value == "Node" + + # All child nodes inherit the location attr_reader from Node, so we'll add + # that to the list of attributes first. + attributes = { + location: + Attribute.new(:location, "[Location] the location of this node") + } + + # This is the name of the method tha gets called on the given visitor when + # the accept method is called on this node. + visitor_method = nil + + statements = main_statement.bodystmt.statements.body + statements.each_with_index do |statement, statement_index| + case statement + when SyntaxTree::Command + # We only use commands in node classes to define attributes. So, we + # can safely assume that we're looking at an attribute definition. + unless %w[attr_reader attr_accessor].include?(statement.message.value) + raise "Unexpected command: #{statement.message.value.inspect}" + end + + # The arguments to the command are the attributes that we're defining. + # We want to ensure that we're only defining one at a time. + if statement.arguments.parts.length != 1 + raise "Declaring more than one attribute at a time is not permitted" + end + + attribute = + Attribute.new( + statement.arguments.parts.first.value.value.to_sym, + "#{parse_comments(statements, statement_index).join("\n")}\n" + ) + + # Ensure that we don't already have an attribute named the same as + # this one, and then add it to the list of attributes. + if attributes.key?(attribute.name) + raise "Duplicate attribute: #{attribute.name}" + end + + attributes[attribute.name] = attribute + when SyntaxTree::DefNode + if statement.name.value == "accept" + call_node = statement.bodystmt.statements.body.first + visitor_method = call_node.message.value.to_sym + end + end + end + + # If we never found a visitor method, then we have an error. + raise if visitor_method.nil? + + # Finally, set it up in the hash of nodes so that we can use it later. + comments = parse_comments(main_statements, main_statement_index) + node = + Node.new( + main_statement.constant.constant.value.to_sym, + "#{comments.join("\n")}\n", + attributes, + visitor_method + ) + + @nodes[node.name] = node + end + end +end diff --git a/lib/syntax_tree/translation.rb b/lib/syntax_tree/translation.rb new file mode 100644 index 00000000..6fc96f00 --- /dev/null +++ b/lib/syntax_tree/translation.rb @@ -0,0 +1,28 @@ +# frozen_string_literal: true + +module SyntaxTree + # This module is responsible for translating the Syntax Tree syntax tree into + # other representations. + module Translation + # This method translates the given node into the representation defined by + # the whitequark/parser gem. We don't explicitly list it as a dependency + # because it's not required for the core functionality of Syntax Tree. + def self.to_parser(node, buffer) + require "parser" + require_relative "translation/parser" + + node.accept(Parser.new(buffer)) + end + + # This method translates the given node into the representation defined by + # the rubocop/rubocop-ast gem. We don't explicitly list it as a dependency + # because it's not required for the core functionality of Syntax Tree. + def self.to_rubocop_ast(node, buffer) + require "rubocop/ast" + require_relative "translation/parser" + require_relative "translation/rubocop_ast" + + node.accept(RuboCopAST.new(buffer)) + end + end +end diff --git a/lib/syntax_tree/translation/parser.rb b/lib/syntax_tree/translation/parser.rb new file mode 100644 index 00000000..8be4fc79 --- /dev/null +++ b/lib/syntax_tree/translation/parser.rb @@ -0,0 +1,3107 @@ +# frozen_string_literal: true + +module SyntaxTree + module Translation + # This visitor is responsible for converting the syntax tree produced by + # Syntax Tree into the syntax tree produced by the whitequark/parser gem. + class Parser < BasicVisitor + # Heredocs are represented _very_ differently in the parser gem from how + # they are represented in the Syntax Tree AST. This class is responsible + # for handling the translation. + class HeredocBuilder + Line = Struct.new(:value, :segments) + + attr_reader :node, :segments + + def initialize(node) + @node = node + @segments = [] + end + + def <<(segment) + if segment.type == :str && segments.last && + segments.last.type == :str && + !segments.last.children.first.end_with?("\n") + segments.last.children.first << segment.children.first + else + segments << segment + end + end + + def trim! + return unless node.beginning.value[2] == "~" + lines = [Line.new(+"", [])] + + segments.each do |segment| + lines.last.segments << segment + + if segment.type == :str + lines.last.value << segment.children.first + lines << Line.new(+"", []) if lines.last.value.end_with?("\n") + end + end + + lines.pop if lines.last.value.empty? + return if lines.empty? + + segments.clear + lines.each do |line| + remaining = node.dedent + + line.segments.each do |segment| + if segment.type == :str + if remaining > 0 + whitespace = segment.children.first[/^\s{0,#{remaining}}/] + segment.children.first.sub!(/^#{whitespace}/, "") + remaining -= whitespace.length + end + + if node.beginning.value[3] != "'" && segments.any? && + segments.last.type == :str && + segments.last.children.first.end_with?("\\\n") + segments.last.children.first.gsub!(/\\\n\z/, "") + segments.last.children.first.concat(segment.children.first) + elsif !segment.children.first.empty? + segments << segment + end + else + segments << segment + end + end + end + end + end + + attr_reader :buffer, :stack + + def initialize(buffer) + @buffer = buffer + @stack = [] + end + + # For each node that we visit, we keep track of it in a stack as we + # descend into its children. We do this so that child nodes can reflect on + # their parents if they need additional information about their context. + def visit(node) + stack << node + result = super + stack.pop + result + end + + visit_methods do + # Visit an AliasNode node. + def visit_alias(node) + s( + :alias, + [visit(node.left), visit(node.right)], + smap_keyword_bare( + srange_length(node.start_char, 5), + srange_node(node) + ) + ) + end + + # Visit an ARefNode. + def visit_aref(node) + if ::Parser::Builders::Default.emit_index + if node.index.nil? + s( + :index, + [visit(node.collection)], + smap_index( + srange_find(node.collection.end_char, node.end_char, "["), + srange_length(node.end_char, -1), + srange_node(node) + ) + ) + else + s( + :index, + [visit(node.collection)].concat(visit_all(node.index.parts)), + smap_index( + srange_find_between(node.collection, node.index, "["), + srange_length(node.end_char, -1), + srange_node(node) + ) + ) + end + else + if node.index.nil? + s( + :send, + [visit(node.collection), :[]], + smap_send_bare( + srange_find(node.collection.end_char, node.end_char, "[]"), + srange_node(node) + ) + ) + else + s( + :send, + [visit(node.collection), :[], *visit_all(node.index.parts)], + smap_send_bare( + srange( + srange_find_between( + node.collection, + node.index, + "[" + ).begin_pos, + node.end_char + ), + srange_node(node) + ) + ) + end + end + end + + # Visit an ARefField node. + def visit_aref_field(node) + if ::Parser::Builders::Default.emit_index + if node.index.nil? + s( + :indexasgn, + [visit(node.collection)], + smap_index( + srange_find(node.collection.end_char, node.end_char, "["), + srange_length(node.end_char, -1), + srange_node(node) + ) + ) + else + s( + :indexasgn, + [visit(node.collection)].concat(visit_all(node.index.parts)), + smap_index( + srange_find_between(node.collection, node.index, "["), + srange_length(node.end_char, -1), + srange_node(node) + ) + ) + end + else + if node.index.nil? + s( + :send, + [visit(node.collection), :[]=], + smap_send_bare( + srange_find(node.collection.end_char, node.end_char, "[]"), + srange_node(node) + ) + ) + else + s( + :send, + [visit(node.collection), :[]=].concat( + visit_all(node.index.parts) + ), + smap_send_bare( + srange( + srange_find_between( + node.collection, + node.index, + "[" + ).begin_pos, + node.end_char + ), + srange_node(node) + ) + ) + end + end + end + + # Visit an ArgBlock node. + def visit_arg_block(node) + s( + :block_pass, + [visit(node.value)], + smap_operator(srange_length(node.start_char, 1), srange_node(node)) + ) + end + + # Visit an ArgStar node. + def visit_arg_star(node) + if stack[-3].is_a?(MLHSParen) && stack[-3].contents.is_a?(MLHS) + if node.value.nil? + s(:restarg, [], smap_variable(nil, srange_node(node))) + else + s( + :restarg, + [node.value.value.to_sym], + smap_variable(srange_node(node.value), srange_node(node)) + ) + end + else + s( + :splat, + node.value.nil? ? [] : [visit(node.value)], + smap_operator( + srange_length(node.start_char, 1), + srange_node(node) + ) + ) + end + end + + # Visit an ArgsForward node. + def visit_args_forward(node) + s(:forwarded_args, [], smap(srange_node(node))) + end + + # Visit an ArrayLiteral node. + def visit_array(node) + s( + :array, + node.contents ? visit_all(node.contents.parts) : [], + if node.lbracket.nil? + smap_collection_bare(srange_node(node)) + else + smap_collection( + srange_node(node.lbracket), + srange_length(node.end_char, -1), + srange_node(node) + ) + end + ) + end + + # Visit an AryPtn node. + def visit_aryptn(node) + type = :array_pattern + children = visit_all(node.requireds) + + if node.rest.is_a?(VarField) + if !node.rest.value.nil? + children << s(:match_rest, [visit(node.rest)], nil) + elsif node.posts.empty? && + node.rest.start_char == node.rest.end_char + # Here we have an implicit rest, as in [foo,]. parser has a + # specific type for these patterns. + type = :array_pattern_with_tail + else + children << s(:match_rest, [], nil) + end + end + + if node.constant + s( + :const_pattern, + [ + visit(node.constant), + s( + type, + children + visit_all(node.posts), + smap_collection_bare( + srange(node.constant.end_char + 1, node.end_char - 1) + ) + ) + ], + smap_collection( + srange_length(node.constant.end_char, 1), + srange_length(node.end_char, -1), + srange_node(node) + ) + ) + else + s( + type, + children + visit_all(node.posts), + if buffer.source[node.start_char] == "[" + smap_collection( + srange_length(node.start_char, 1), + srange_length(node.end_char, -1), + srange_node(node) + ) + else + smap_collection_bare(srange_node(node)) + end + ) + end + end + + # Visit an Assign node. + def visit_assign(node) + target = visit(node.target) + location = + target + .location + .with_operator(srange_find_between(node.target, node.value, "=")) + .with_expression(srange_node(node)) + + s(target.type, target.children + [visit(node.value)], location) + end + + # Visit an Assoc node. + def visit_assoc(node) + if node.value.nil? + # { foo: } + expression = srange(node.start_char, node.end_char - 1) + type, location = + if node.key.value.start_with?(/[A-Z]/) + [:const, smap_constant(nil, expression, expression)] + else + [:send, smap_send_bare(expression, expression)] + end + + s( + :pair, + [ + visit(node.key), + s(type, [nil, node.key.value.chomp(":").to_sym], location) + ], + smap_operator( + srange_length(node.key.end_char, -1), + srange_node(node) + ) + ) + elsif node.key.is_a?(Label) + # { foo: 1 } + s( + :pair, + [visit(node.key), visit(node.value)], + smap_operator( + srange_length(node.key.end_char, -1), + srange_node(node) + ) + ) + elsif (operator = srange_search_between(node.key, node.value, "=>")) + # { :foo => 1 } + s( + :pair, + [visit(node.key), visit(node.value)], + smap_operator(operator, srange_node(node)) + ) + else + # { "foo": 1 } + key = visit(node.key) + key_location = + smap_collection( + key.location.begin, + srange_length(node.key.end_char - 2, 1), + srange(node.key.start_char, node.key.end_char - 1) + ) + + s( + :pair, + [s(key.type, key.children, key_location), visit(node.value)], + smap_operator( + srange_length(node.key.end_char, -1), + srange_node(node) + ) + ) + end + end + + # Visit an AssocSplat node. + def visit_assoc_splat(node) + s( + :kwsplat, + [visit(node.value)], + smap_operator(srange_length(node.start_char, 2), srange_node(node)) + ) + end + + # Visit a Backref node. + def visit_backref(node) + location = smap(srange_node(node)) + + if node.value.match?(/^\$\d+$/) + s(:nth_ref, [node.value[1..].to_i], location) + else + s(:back_ref, [node.value.to_sym], location) + end + end + + # Visit a BareAssocHash node. + def visit_bare_assoc_hash(node) + s( + if ::Parser::Builders::Default.emit_kwargs && + !stack[-2].is_a?(ArrayLiteral) + :kwargs + else + :hash + end, + visit_all(node.assocs), + smap_collection_bare(srange_node(node)) + ) + end + + # Visit a BEGINBlock node. + def visit_BEGIN(node) + s( + :preexe, + [visit(node.statements)], + smap_keyword( + srange_length(node.start_char, 5), + srange_find(node.start_char + 5, node.statements.start_char, "{"), + srange_length(node.end_char, -1), + srange_node(node) + ) + ) + end + + # Visit a Begin node. + def visit_begin(node) + location = + smap_collection( + srange_length(node.start_char, 5), + srange_length(node.end_char, -3), + srange_node(node) + ) + + if node.bodystmt.empty? + s(:kwbegin, [], location) + elsif node.bodystmt.rescue_clause.nil? && + node.bodystmt.ensure_clause.nil? && + node.bodystmt.else_clause.nil? + child = visit(node.bodystmt.statements) + + s( + :kwbegin, + child.type == :begin ? child.children : [child], + location + ) + else + s(:kwbegin, [visit(node.bodystmt)], location) + end + end + + # Visit a Binary node. + def visit_binary(node) + case node.operator + when :| + current = -2 + while stack[current].is_a?(Binary) && stack[current].operator == :| + current -= 1 + end + + if stack[current].is_a?(In) + s(:match_alt, [visit(node.left), visit(node.right)], nil) + else + visit(canonical_binary(node)) + end + when :"=>", :"&&", :and, :"||", :or + s( + { "=>": :match_as, "&&": :and, "||": :or }.fetch( + node.operator, + node.operator + ), + [visit(node.left), visit(node.right)], + smap_operator( + srange_find_between(node.left, node.right, node.operator.to_s), + srange_node(node) + ) + ) + when :=~ + # When you use a regular expression on the left hand side of a =~ + # operator and it doesn't have interpolatoin, then its named capture + # groups introduce local variables into the scope. In this case the + # parser gem has a different node (match_with_lvasgn) instead of the + # regular send. + if node.left.is_a?(RegexpLiteral) && node.left.parts.length == 1 && + node.left.parts.first.is_a?(TStringContent) + s( + :match_with_lvasgn, + [visit(node.left), visit(node.right)], + smap_operator( + srange_find_between( + node.left, + node.right, + node.operator.to_s + ), + srange_node(node) + ) + ) + else + visit(canonical_binary(node)) + end + else + visit(canonical_binary(node)) + end + end + + # Visit a BlockArg node. + def visit_blockarg(node) + if node.name.nil? + s(:blockarg, [nil], smap_variable(nil, srange_node(node))) + else + s( + :blockarg, + [node.name.value.to_sym], + smap_variable(srange_node(node.name), srange_node(node)) + ) + end + end + + # Visit a BlockVar node. + def visit_block_var(node) + shadowargs = + node.locals.map do |local| + s( + :shadowarg, + [local.value.to_sym], + smap_variable(srange_node(local), srange_node(local)) + ) + end + + params = node.params + children = + if ::Parser::Builders::Default.emit_procarg0 && node.arg0? + # There is a special node type in the parser gem for when a single + # required parameter to a block would potentially be expanded + # automatically. We handle that case here. + required = params.requireds.first + procarg0 = + if ::Parser::Builders::Default.emit_arg_inside_procarg0 && + required.is_a?(Ident) + s( + :procarg0, + [ + s( + :arg, + [required.value.to_sym], + smap_variable( + srange_node(required), + srange_node(required) + ) + ) + ], + smap_collection_bare(srange_node(required)) + ) + else + child = visit(required) + s(:procarg0, child, child.location) + end + + [procarg0] + else + visit(params).children + end + + s( + :args, + children + shadowargs, + smap_collection( + srange_length(node.start_char, 1), + srange_length(node.end_char, -1), + srange_node(node) + ) + ) + end + + # Visit a BodyStmt node. + def visit_bodystmt(node) + result = visit(node.statements) + + if node.rescue_clause + rescue_node = visit(node.rescue_clause) + + children = [result] + rescue_node.children + location = rescue_node.location + + if node.else_clause + children.pop + children << visit(node.else_clause) + + location = + smap_condition( + nil, + nil, + srange_length(node.else_clause.start_char - 3, -4), + nil, + srange( + location.expression.begin_pos, + node.else_clause.end_char + ) + ) + end + + result = s(rescue_node.type, children, location) + end + + if node.ensure_clause + ensure_node = visit(node.ensure_clause) + + expression = + ( + if result + result.location.expression.join( + ensure_node.location.expression + ) + else + ensure_node.location.expression + end + ) + location = ensure_node.location.with_expression(expression) + + result = + s(ensure_node.type, [result] + ensure_node.children, location) + end + + result + end + + # Visit a Break node. + def visit_break(node) + s( + :break, + visit_all(node.arguments.parts), + smap_keyword_bare( + srange_length(node.start_char, 5), + srange_node(node) + ) + ) + end + + # Visit a CallNode node. + def visit_call(node) + visit_command_call( + CommandCall.new( + receiver: node.receiver, + operator: node.operator, + message: node.message, + arguments: node.arguments, + block: nil, + location: node.location + ) + ) + end + + # Visit a Case node. + def visit_case(node) + clauses = [node.consequent] + while clauses.last && !clauses.last.is_a?(Else) + clauses << clauses.last.consequent + end + + else_token = + if clauses.last.is_a?(Else) + srange_length(clauses.last.start_char, 4) + end + + s( + node.consequent.is_a?(In) ? :case_match : :case, + [visit(node.value)] + clauses.map { |clause| visit(clause) }, + smap_condition( + srange_length(node.start_char, 4), + nil, + else_token, + srange_length(node.end_char, -3), + srange_node(node) + ) + ) + end + + # Visit a CHAR node. + def visit_CHAR(node) + s( + :str, + [node.value[1..]], + smap_collection( + srange_length(node.start_char, 1), + nil, + srange_node(node) + ) + ) + end + + # Visit a ClassDeclaration node. + def visit_class(node) + operator = + if node.superclass + srange_find_between(node.constant, node.superclass, "<") + end + + s( + :class, + [ + visit(node.constant), + visit(node.superclass), + visit(node.bodystmt) + ], + smap_definition( + srange_length(node.start_char, 5), + operator, + srange_node(node.constant), + srange_length(node.end_char, -3) + ).with_expression(srange_node(node)) + ) + end + + # Visit a Command node. + def visit_command(node) + visit_command_call( + CommandCall.new( + receiver: nil, + operator: nil, + message: node.message, + arguments: node.arguments, + block: node.block, + location: node.location + ) + ) + end + + # Visit a CommandCall node. + def visit_command_call(node) + children = [ + visit(node.receiver), + node.message == :call ? :call : node.message.value.to_sym + ] + + begin_token = nil + end_token = nil + + case node.arguments + when Args + children += visit_all(node.arguments.parts) + when ArgParen + case node.arguments.arguments + when nil + # skip + when ArgsForward + children << visit(node.arguments.arguments) + else + children += visit_all(node.arguments.arguments.parts) + end + + begin_token = srange_length(node.arguments.start_char, 1) + end_token = srange_length(node.arguments.end_char, -1) + end + + dot_bound = + if node.arguments + node.arguments.start_char + elsif node.block + node.block.start_char + else + node.end_char + end + + expression = + if node.arguments.is_a?(ArgParen) + srange(node.start_char, node.arguments.end_char) + elsif node.arguments.is_a?(Args) && node.arguments.parts.any? + last_part = node.arguments.parts.last + end_char = + if last_part.is_a?(Heredoc) + last_part.beginning.end_char + else + last_part.end_char + end + + srange(node.start_char, end_char) + elsif node.block + if node.receiver + srange(node.receiver.start_char, node.message.end_char) + else + srange_node(node.message) + end + else + srange_node(node) + end + + call = + s( + if node.operator.is_a?(Op) && node.operator.value == "&." + :csend + else + :send + end, + children, + smap_send( + if node.operator == :"::" + srange_find( + node.receiver.end_char, + if node.message == :call + dot_bound + else + node.message.start_char + end, + "::" + ) + elsif node.operator + srange_node(node.operator) + end, + node.message == :call ? nil : srange_node(node.message), + begin_token, + end_token, + expression + ) + ) + + if node.block + type, arguments = block_children(node.block) + + s( + type, + [call, arguments, visit(node.block.bodystmt)], + smap_collection( + srange_node(node.block.opening), + srange_length( + node.end_char, + node.block.opening.is_a?(Kw) ? -3 : -1 + ), + srange_node(node) + ) + ) + else + call + end + end + + # Visit a Const node. + def visit_const(node) + s( + :const, + [nil, node.value.to_sym], + smap_constant(nil, srange_node(node), srange_node(node)) + ) + end + + # Visit a ConstPathField node. + def visit_const_path_field(node) + if node.parent.is_a?(VarRef) && node.parent.value.is_a?(Kw) && + node.parent.value.value == "self" && node.constant.is_a?(Ident) + s(:send, [visit(node.parent), :"#{node.constant.value}="], nil) + else + s( + :casgn, + [visit(node.parent), node.constant.value.to_sym], + smap_constant( + srange_find_between(node.parent, node.constant, "::"), + srange_node(node.constant), + srange_node(node) + ) + ) + end + end + + # Visit a ConstPathRef node. + def visit_const_path_ref(node) + s( + :const, + [visit(node.parent), node.constant.value.to_sym], + smap_constant( + srange_find_between(node.parent, node.constant, "::"), + srange_node(node.constant), + srange_node(node) + ) + ) + end + + # Visit a ConstRef node. + def visit_const_ref(node) + s( + :const, + [nil, node.constant.value.to_sym], + smap_constant(nil, srange_node(node.constant), srange_node(node)) + ) + end + + # Visit a CVar node. + def visit_cvar(node) + s( + :cvar, + [node.value.to_sym], + smap_variable(srange_node(node), srange_node(node)) + ) + end + + # Visit a DefNode node. + def visit_def(node) + name = node.name.value.to_sym + args = + case node.params + when Params + child = visit(node.params) + + s( + child.type, + child.children, + smap_collection_bare(child.location&.expression) + ) + when Paren + child = visit(node.params.contents) + + s( + child.type, + child.children, + smap_collection( + srange_length(node.params.start_char, 1), + srange_length(node.params.end_char, -1), + srange_node(node.params) + ) + ) + else + s(:args, [], smap_collection_bare(nil)) + end + + location = + if node.endless? + smap_method_definition( + srange_length(node.start_char, 3), + nil, + srange_node(node.name), + nil, + srange_find_between( + (node.params || node.name), + node.bodystmt, + "=" + ), + srange_node(node) + ) + else + smap_method_definition( + srange_length(node.start_char, 3), + nil, + srange_node(node.name), + srange_length(node.end_char, -3), + nil, + srange_node(node) + ) + end + + if node.target + target = + node.target.is_a?(Paren) ? node.target.contents : node.target + + s( + :defs, + [visit(target), name, args, visit(node.bodystmt)], + smap_method_definition( + location.keyword, + srange_node(node.operator), + location.name, + location.end, + location.assignment, + location.expression + ) + ) + else + s(:def, [name, args, visit(node.bodystmt)], location) + end + end + + # Visit a Defined node. + def visit_defined(node) + paren_range = (node.start_char + 8)...node.end_char + begin_token, end_token = + if buffer.source[paren_range].include?("(") + [ + srange_find(paren_range.begin, paren_range.end, "("), + srange_length(node.end_char, -1) + ] + end + + s( + :defined?, + [visit(node.value)], + smap_keyword( + srange_length(node.start_char, 8), + begin_token, + end_token, + srange_node(node) + ) + ) + end + + # Visit a DynaSymbol node. + def visit_dyna_symbol(node) + location = + if node.quote + smap_collection( + srange_length(node.start_char, node.quote.length), + srange_length(node.end_char, -1), + srange_node(node) + ) + else + smap_collection_bare(srange_node(node)) + end + + if node.parts.length == 1 && node.parts.first.is_a?(TStringContent) + s(:sym, ["\"#{node.parts.first.value}\"".undump.to_sym], location) + else + s(:dsym, visit_all(node.parts), location) + end + end + + # Visit an Else node. + def visit_else(node) + if node.statements.empty? && stack[-2].is_a?(Case) + s(:empty_else, [], nil) + else + visit(node.statements) + end + end + + # Visit an Elsif node. + def visit_elsif(node) + begin_start = node.predicate.end_char + begin_end = + if node.statements.empty? + node.statements.end_char + else + node.statements.body.first.start_char + end + + begin_token = + if buffer.source[begin_start...begin_end].include?("then") + srange_find(begin_start, begin_end, "then") + elsif buffer.source[begin_start...begin_end].include?(";") + srange_find(begin_start, begin_end, ";") + end + + else_token = + case node.consequent + when Elsif + srange_length(node.consequent.start_char, 5) + when Else + srange_length(node.consequent.start_char, 4) + end + + expression = srange(node.start_char, node.statements.end_char - 1) + + s( + :if, + [ + visit(node.predicate), + visit(node.statements), + visit(node.consequent) + ], + smap_condition( + srange_length(node.start_char, 5), + begin_token, + else_token, + nil, + expression + ) + ) + end + + # Visit an ENDBlock node. + def visit_END(node) + s( + :postexe, + [visit(node.statements)], + smap_keyword( + srange_length(node.start_char, 3), + srange_find(node.start_char + 3, node.statements.start_char, "{"), + srange_length(node.end_char, -1), + srange_node(node) + ) + ) + end + + # Visit an Ensure node. + def visit_ensure(node) + start_char = node.start_char + end_char = + if node.statements.empty? + start_char + 6 + else + node.statements.body.last.end_char + end + + s( + :ensure, + [visit(node.statements)], + smap_condition( + srange_length(start_char, 6), + nil, + nil, + nil, + srange(start_char, end_char) + ) + ) + end + + # Visit a Field node. + def visit_field(node) + message = + case stack[-2] + when Assign, MLHS + Ident.new( + value: "#{node.name.value}=", + location: node.name.location + ) + else + node.name + end + + visit_command_call( + CommandCall.new( + receiver: node.parent, + operator: node.operator, + message: message, + arguments: nil, + block: nil, + location: node.location + ) + ) + end + + # Visit a FloatLiteral node. + def visit_float(node) + operator = + if %w[+ -].include?(buffer.source[node.start_char]) + srange_length(node.start_char, 1) + end + + s( + :float, + [node.value.to_f], + smap_operator(operator, srange_node(node)) + ) + end + + # Visit a FndPtn node. + def visit_fndptn(node) + left, right = + [node.left, node.right].map do |child| + location = + smap_operator( + srange_length(child.start_char, 1), + srange_node(child) + ) + + if child.is_a?(VarField) && child.value.nil? + s(:match_rest, [], location) + else + s(:match_rest, [visit(child)], location) + end + end + + inner = + s( + :find_pattern, + [left, *visit_all(node.values), right], + smap_collection( + srange_length(node.start_char, 1), + srange_length(node.end_char, -1), + srange_node(node) + ) + ) + + if node.constant + s(:const_pattern, [visit(node.constant), inner], nil) + else + inner + end + end + + # Visit a For node. + def visit_for(node) + s( + :for, + [visit(node.index), visit(node.collection), visit(node.statements)], + smap_for( + srange_length(node.start_char, 3), + srange_find_between(node.index, node.collection, "in"), + srange_search_between(node.collection, node.statements, "do") || + srange_search_between(node.collection, node.statements, ";"), + srange_length(node.end_char, -3), + srange_node(node) + ) + ) + end + + # Visit a GVar node. + def visit_gvar(node) + s( + :gvar, + [node.value.to_sym], + smap_variable(srange_node(node), srange_node(node)) + ) + end + + # Visit a HashLiteral node. + def visit_hash(node) + s( + :hash, + visit_all(node.assocs), + smap_collection( + srange_length(node.start_char, 1), + srange_length(node.end_char, -1), + srange_node(node) + ) + ) + end + + # Visit a Heredoc node. + def visit_heredoc(node) + heredoc = HeredocBuilder.new(node) + + # For each part of the heredoc, if it's a string content node, split + # it into multiple string content nodes, one for each line. Otherwise, + # visit the node as normal. + node.parts.each do |part| + if part.is_a?(TStringContent) && part.value.count("\n") > 1 + index = part.start_char + lines = part.value.split("\n") + + lines.each do |line| + length = line.length + 1 + location = smap_collection_bare(srange_length(index, length)) + + heredoc << s(:str, ["#{line}\n"], location) + index += length + end + else + heredoc << visit(part) + end + end + + # Now that we have all of the pieces on the heredoc, we can trim it if + # it is a heredoc that supports trimming (i.e., it has a ~ on the + # declaration). + heredoc.trim! + + # Generate the location for the heredoc, which goes from the + # declaration to the ending delimiter. + location = + smap_heredoc( + srange_node(node.beginning), + srange( + if node.parts.empty? + node.beginning.end_char + 1 + else + node.parts.first.start_char + end, + node.ending.start_char + ), + srange(node.ending.start_char, node.ending.end_char - 1) + ) + + # Finally, decide which kind of heredoc node to generate based on its + # declaration and contents. + if node.beginning.value.match?(/`\w+`\z/) + s(:xstr, heredoc.segments, location) + elsif heredoc.segments.length == 1 + segment = heredoc.segments.first + s(segment.type, segment.children, location) + else + s(:dstr, heredoc.segments, location) + end + end + + # Visit a HshPtn node. + def visit_hshptn(node) + children = + node.keywords.map do |(keyword, value)| + next s(:pair, [visit(keyword), visit(value)], nil) if value + + case keyword + when DynaSymbol + raise if keyword.parts.length > 1 + s(:match_var, [keyword.parts.first.value.to_sym], nil) + when Label + s(:match_var, [keyword.value.chomp(":").to_sym], nil) + end + end + + if node.keyword_rest.is_a?(VarField) + children << if node.keyword_rest.value.nil? + s(:match_rest, [], nil) + elsif node.keyword_rest.value == :nil + s(:match_nil_pattern, [], nil) + else + s(:match_rest, [visit(node.keyword_rest)], nil) + end + end + + inner = s(:hash_pattern, children, nil) + if node.constant + s(:const_pattern, [visit(node.constant), inner], nil) + else + inner + end + end + + # Visit an Ident node. + def visit_ident(node) + s( + :lvar, + [node.value.to_sym], + smap_variable(srange_node(node), srange_node(node)) + ) + end + + # Visit an IfNode node. + def visit_if(node) + s( + :if, + [ + visit_predicate(node.predicate), + visit(node.statements), + visit(node.consequent) + ], + if node.modifier? + smap_keyword_bare( + srange_find_between(node.statements, node.predicate, "if"), + srange_node(node) + ) + else + begin_start = node.predicate.end_char + begin_end = + if node.statements.empty? + node.statements.end_char + else + node.statements.body.first.start_char + end + + begin_token = + if buffer.source[begin_start...begin_end].include?("then") + srange_find(begin_start, begin_end, "then") + elsif buffer.source[begin_start...begin_end].include?(";") + srange_find(begin_start, begin_end, ";") + end + + else_token = + case node.consequent + when Elsif + srange_length(node.consequent.start_char, 5) + when Else + srange_length(node.consequent.start_char, 4) + end + + smap_condition( + srange_length(node.start_char, 2), + begin_token, + else_token, + srange_length(node.end_char, -3), + srange_node(node) + ) + end + ) + end + + # Visit an IfOp node. + def visit_if_op(node) + s( + :if, + [visit(node.predicate), visit(node.truthy), visit(node.falsy)], + smap_ternary( + srange_find_between(node.predicate, node.truthy, "?"), + srange_find_between(node.truthy, node.falsy, ":"), + srange_node(node) + ) + ) + end + + # Visit an Imaginary node. + def visit_imaginary(node) + s( + :complex, + [ + # We have to do an eval here in order to get the value in case + # it's something like 42ri. to_c will not give the right value in + # that case. Maybe there's an API for this but I can't find it. + eval(node.value) + ], + smap_operator(nil, srange_node(node)) + ) + end + + # Visit an In node. + def visit_in(node) + case node.pattern + when IfNode + s( + :in_pattern, + [ + visit(node.pattern.statements), + s(:if_guard, [visit(node.pattern.predicate)], nil), + visit(node.statements) + ], + nil + ) + when UnlessNode + s( + :in_pattern, + [ + visit(node.pattern.statements), + s(:unless_guard, [visit(node.pattern.predicate)], nil), + visit(node.statements) + ], + nil + ) + else + begin_token = + srange_search_between(node.pattern, node.statements, "then") + + end_char = + if begin_token || node.statements.empty? + node.statements.end_char - 1 + else + node.statements.body.last.start_char + end + + s( + :in_pattern, + [visit(node.pattern), nil, visit(node.statements)], + smap_keyword( + srange_length(node.start_char, 2), + begin_token, + nil, + srange(node.start_char, end_char) + ) + ) + end + end + + # Visit an Int node. + def visit_int(node) + operator = + if %w[+ -].include?(buffer.source[node.start_char]) + srange_length(node.start_char, 1) + end + + s(:int, [node.value.to_i], smap_operator(operator, srange_node(node))) + end + + # Visit an IVar node. + def visit_ivar(node) + s( + :ivar, + [node.value.to_sym], + smap_variable(srange_node(node), srange_node(node)) + ) + end + + # Visit a Kw node. + def visit_kw(node) + location = smap(srange_node(node)) + + case node.value + when "__FILE__" + s(:str, [buffer.name], location) + when "__LINE__" + s( + :int, + [node.location.start_line + buffer.first_line - 1], + location + ) + when "__ENCODING__" + if ::Parser::Builders::Default.emit_encoding + s(:__ENCODING__, [], location) + else + s(:const, [s(:const, [nil, :Encoding], nil), :UTF_8], location) + end + else + s(node.value.to_sym, [], location) + end + end + + # Visit a KwRestParam node. + def visit_kwrest_param(node) + if node.name.nil? + s(:kwrestarg, [], smap_variable(nil, srange_node(node))) + else + s( + :kwrestarg, + [node.name.value.to_sym], + smap_variable(srange_node(node.name), srange_node(node)) + ) + end + end + + # Visit a Label node. + def visit_label(node) + s( + :sym, + [node.value.chomp(":").to_sym], + smap_collection_bare(srange(node.start_char, node.end_char - 1)) + ) + end + + # Visit a Lambda node. + def visit_lambda(node) + args = + node.params.is_a?(LambdaVar) ? node.params : node.params.contents + args_node = visit(args) + + type = :block + if args.empty? && (maximum = num_block_type(node.statements)) + type = :numblock + args_node = maximum + end + + begin_token, end_token = + if ( + srange = + srange_search_between(node.params, node.statements, "{") + ) + [srange, srange_length(node.end_char, -1)] + else + [ + srange_find_between(node.params, node.statements, "do"), + srange_length(node.end_char, -3) + ] + end + + selector = srange_length(node.start_char, 2) + + s( + type, + [ + if ::Parser::Builders::Default.emit_lambda + s(:lambda, [], smap(selector)) + else + s(:send, [nil, :lambda], smap_send_bare(selector, selector)) + end, + args_node, + visit(node.statements) + ], + smap_collection(begin_token, end_token, srange_node(node)) + ) + end + + # Visit a LambdaVar node. + def visit_lambda_var(node) + shadowargs = + node.locals.map do |local| + s( + :shadowarg, + [local.value.to_sym], + smap_variable(srange_node(local), srange_node(local)) + ) + end + + location = + if node.start_char == node.end_char + smap_collection_bare(nil) + elsif buffer.source[node.start_char - 1] == "(" + smap_collection( + srange_length(node.start_char, 1), + srange_length(node.end_char, -1), + srange_node(node) + ) + else + smap_collection_bare(srange_node(node)) + end + + s(:args, visit(node.params).children + shadowargs, location) + end + + # Visit an MAssign node. + def visit_massign(node) + s( + :masgn, + [visit(node.target), visit(node.value)], + smap_operator( + srange_find_between(node.target, node.value, "="), + srange_node(node) + ) + ) + end + + # Visit a MethodAddBlock node. + def visit_method_add_block(node) + case node.call + when ARef, Super, ZSuper + type, arguments = block_children(node.block) + + s( + type, + [visit(node.call), arguments, visit(node.block.bodystmt)], + smap_collection( + srange_node(node.block.opening), + srange_length( + node.block.end_char, + node.block.keywords? ? -3 : -1 + ), + srange_node(node) + ) + ) + else + visit_command_call( + CommandCall.new( + receiver: node.call.receiver, + operator: node.call.operator, + message: node.call.message, + arguments: node.call.arguments, + block: node.block, + location: node.location + ) + ) + end + end + + # Visit an MLHS node. + def visit_mlhs(node) + s( + :mlhs, + node.parts.map do |part| + if part.is_a?(Ident) + s( + :arg, + [part.value.to_sym], + smap_variable(srange_node(part), srange_node(part)) + ) + else + visit(part) + end + end, + smap_collection_bare(srange_node(node)) + ) + end + + # Visit an MLHSParen node. + def visit_mlhs_paren(node) + child = visit(node.contents) + + s( + child.type, + child.children, + smap_collection( + srange_length(node.start_char, 1), + srange_length(node.end_char, -1), + srange_node(node) + ) + ) + end + + # Visit a ModuleDeclaration node. + def visit_module(node) + s( + :module, + [visit(node.constant), visit(node.bodystmt)], + smap_definition( + srange_length(node.start_char, 6), + nil, + srange_node(node.constant), + srange_length(node.end_char, -3) + ).with_expression(srange_node(node)) + ) + end + + # Visit an MRHS node. + def visit_mrhs(node) + visit_array( + ArrayLiteral.new( + lbracket: nil, + contents: Args.new(parts: node.parts, location: node.location), + location: node.location + ) + ) + end + + # Visit a Next node. + def visit_next(node) + s( + :next, + visit_all(node.arguments.parts), + smap_keyword_bare( + srange_length(node.start_char, 4), + srange_node(node) + ) + ) + end + + # Visit a Not node. + def visit_not(node) + if node.statement.nil? + begin_token = srange_find(node.start_char, nil, "(") + end_token = srange_find(node.start_char, nil, ")") + + s( + :send, + [ + s( + :begin, + [], + smap_collection( + begin_token, + end_token, + begin_token.join(end_token) + ) + ), + :! + ], + smap_send_bare( + srange_length(node.start_char, 3), + srange_node(node) + ) + ) + else + begin_token, end_token = + if node.parentheses? + [ + srange_find( + node.start_char + 3, + node.statement.start_char, + "(" + ), + srange_length(node.end_char, -1) + ] + end + + s( + :send, + [visit(node.statement), :!], + smap_send( + nil, + srange_length(node.start_char, 3), + begin_token, + end_token, + srange_node(node) + ) + ) + end + end + + # Visit an OpAssign node. + def visit_opassign(node) + target = visit(node.target) + location = + target + .location + .with_expression(srange_node(node)) + .with_operator(srange_node(node.operator)) + + case node.operator.value + when "||=" + s(:or_asgn, [target, visit(node.value)], location) + when "&&=" + s(:and_asgn, [target, visit(node.value)], location) + else + s( + :op_asgn, + [ + target, + node.operator.value.chomp("=").to_sym, + visit(node.value) + ], + location + ) + end + end + + # Visit a Params node. + def visit_params(node) + children = [] + + children += + node.requireds.map do |required| + case required + when MLHSParen + visit(required) + else + s( + :arg, + [required.value.to_sym], + smap_variable(srange_node(required), srange_node(required)) + ) + end + end + + children += + node.optionals.map do |(name, value)| + s( + :optarg, + [name.value.to_sym, visit(value)], + smap_variable( + srange_node(name), + srange_node(name).join(srange_node(value)) + ).with_operator(srange_find_between(name, value, "=")) + ) + end + + if node.rest && !node.rest.is_a?(ExcessedComma) + children << visit(node.rest) + end + + children += + node.posts.map do |post| + s( + :arg, + [post.value.to_sym], + smap_variable(srange_node(post), srange_node(post)) + ) + end + + children += + node.keywords.map do |(name, value)| + key = name.value.chomp(":").to_sym + + if value + s( + :kwoptarg, + [key, visit(value)], + smap_variable( + srange(name.start_char, name.end_char - 1), + srange_node(name).join(srange_node(value)) + ) + ) + else + s( + :kwarg, + [key], + smap_variable( + srange(name.start_char, name.end_char - 1), + srange_node(name) + ) + ) + end + end + + case node.keyword_rest + when nil, ArgsForward + # do nothing + when :nil + children << s( + :kwnilarg, + [], + smap_variable(srange_length(node.end_char, -3), srange_node(node)) + ) + else + children << visit(node.keyword_rest) + end + + children << visit(node.block) if node.block + + if node.keyword_rest.is_a?(ArgsForward) + location = smap(srange_node(node.keyword_rest)) + + # If there are no other arguments and we have the emit_forward_arg + # option enabled, then the entire argument list is represented by a + # single forward_args node. + if children.empty? && !::Parser::Builders::Default.emit_forward_arg + return s(:forward_args, [], location) + end + + # Otherwise, we need to insert a forward_arg node into the list of + # parameters before any keyword rest or block parameters. + index = + node.requireds.length + node.optionals.length + + node.keywords.length + children.insert(index, s(:forward_arg, [], location)) + end + + location = + unless children.empty? + first = children.first.location.expression + last = children.last.location.expression + smap_collection_bare(first.join(last)) + end + + s(:args, children, location) + end + + # Visit a Paren node. + def visit_paren(node) + location = + smap_collection( + srange_length(node.start_char, 1), + srange_length(node.end_char, -1), + srange_node(node) + ) + + if node.contents.nil? || + (node.contents.is_a?(Statements) && node.contents.empty?) + s(:begin, [], location) + else + child = visit(node.contents) + child.type == :begin ? child : s(:begin, [child], location) + end + end + + # Visit a PinnedBegin node. + def visit_pinned_begin(node) + s( + :pin, + [ + s( + :begin, + [visit(node.statement)], + smap_collection( + srange_length(node.start_char + 1, 1), + srange_length(node.end_char, -1), + srange(node.start_char + 1, node.end_char) + ) + ) + ], + smap_send_bare(srange_length(node.start_char, 1), srange_node(node)) + ) + end + + # Visit a PinnedVarRef node. + def visit_pinned_var_ref(node) + s( + :pin, + [visit(node.value)], + smap_send_bare(srange_length(node.start_char, 1), srange_node(node)) + ) + end + + # Visit a Program node. + def visit_program(node) + visit(node.statements) + end + + # Visit a QSymbols node. + def visit_qsymbols(node) + parts = + node.elements.map do |element| + SymbolLiteral.new(value: element, location: element.location) + end + + visit_array( + ArrayLiteral.new( + lbracket: node.beginning, + contents: Args.new(parts: parts, location: node.location), + location: node.location + ) + ) + end + + # Visit a QWords node. + def visit_qwords(node) + visit_array( + ArrayLiteral.new( + lbracket: node.beginning, + contents: Args.new(parts: node.elements, location: node.location), + location: node.location + ) + ) + end + + # Visit a RangeNode node. + def visit_range(node) + s( + node.operator.value == ".." ? :irange : :erange, + [visit(node.left), visit(node.right)], + smap_operator(srange_node(node.operator), srange_node(node)) + ) + end + + # Visit an RAssign node. + def visit_rassign(node) + s( + node.operator.value == "=>" ? :match_pattern : :match_pattern_p, + [visit(node.value), visit(node.pattern)], + smap_operator(srange_node(node.operator), srange_node(node)) + ) + end + + # Visit a Rational node. + def visit_rational(node) + s(:rational, [node.value.to_r], smap_operator(nil, srange_node(node))) + end + + # Visit a Redo node. + def visit_redo(node) + s(:redo, [], smap_keyword_bare(srange_node(node), srange_node(node))) + end + + # Visit a RegexpLiteral node. + def visit_regexp_literal(node) + s( + :regexp, + visit_all(node.parts).push( + s( + :regopt, + node.ending.scan(/[a-z]/).sort.map(&:to_sym), + smap(srange_length(node.end_char, -(node.ending.length - 1))) + ) + ), + smap_collection( + srange_length(node.start_char, node.beginning.length), + srange_length(node.end_char - node.ending.length, 1), + srange_node(node) + ) + ) + end + + # Visit a Rescue node. + def visit_rescue(node) + # In the parser gem, there is a separation between the rescue node and + # the rescue body. They have different bounds, so we have to calculate + # those here. + start_char = node.start_char + + body_end_char = + if node.statements.empty? + start_char + 6 + else + node.statements.body.last.end_char + end + + end_char = + if node.consequent + end_node = node.consequent + end_node = end_node.consequent while end_node.consequent + + if end_node.statements.empty? + start_char + 6 + else + end_node.statements.body.last.end_char + end + else + body_end_char + end + + # These locations are reused for multiple children. + keyword = srange_length(start_char, 6) + body_expression = srange(start_char, body_end_char) + expression = srange(start_char, end_char) + + exceptions = + case node.exception&.exceptions + when nil + nil + when MRHS + visit_array( + ArrayLiteral.new( + lbracket: nil, + contents: + Args.new( + parts: node.exception.exceptions.parts, + location: node.exception.exceptions.location + ), + location: node.exception.exceptions.location + ) + ) + else + visit_array( + ArrayLiteral.new( + lbracket: nil, + contents: + Args.new( + parts: [node.exception.exceptions], + location: node.exception.exceptions.location + ), + location: node.exception.exceptions.location + ) + ) + end + + resbody = + if node.exception.nil? + s( + :resbody, + [nil, nil, visit(node.statements)], + smap_rescue_body(keyword, nil, nil, body_expression) + ) + elsif node.exception.variable.nil? + s( + :resbody, + [exceptions, nil, visit(node.statements)], + smap_rescue_body(keyword, nil, nil, body_expression) + ) + else + s( + :resbody, + [ + exceptions, + visit(node.exception.variable), + visit(node.statements) + ], + smap_rescue_body( + keyword, + srange_find( + node.start_char + 6, + node.exception.variable.start_char, + "=>" + ), + nil, + body_expression + ) + ) + end + + children = [resbody] + if node.consequent + children += visit(node.consequent).children + else + children << nil + end + + s(:rescue, children, smap_condition_bare(expression)) + end + + # Visit a RescueMod node. + def visit_rescue_mod(node) + keyword = srange_find_between(node.statement, node.value, "rescue") + + s( + :rescue, + [ + visit(node.statement), + s( + :resbody, + [nil, nil, visit(node.value)], + smap_rescue_body( + keyword, + nil, + nil, + keyword.join(srange_node(node.value)) + ) + ), + nil + ], + smap_condition_bare(srange_node(node)) + ) + end + + # Visit a RestParam node. + def visit_rest_param(node) + if node.name + s( + :restarg, + [node.name.value.to_sym], + smap_variable(srange_node(node.name), srange_node(node)) + ) + else + s(:restarg, [], smap_variable(nil, srange_node(node))) + end + end + + # Visit a Retry node. + def visit_retry(node) + s(:retry, [], smap_keyword_bare(srange_node(node), srange_node(node))) + end + + # Visit a ReturnNode node. + def visit_return(node) + s( + :return, + node.arguments ? visit_all(node.arguments.parts) : [], + smap_keyword_bare( + srange_length(node.start_char, 6), + srange_node(node) + ) + ) + end + + # Visit an SClass node. + def visit_sclass(node) + s( + :sclass, + [visit(node.target), visit(node.bodystmt)], + smap_definition( + srange_length(node.start_char, 5), + srange_find(node.start_char + 5, node.target.start_char, "<<"), + nil, + srange_length(node.end_char, -3) + ).with_expression(srange_node(node)) + ) + end + + # Visit a Statements node. + def visit_statements(node) + children = + node.body.reject do |child| + child.is_a?(Comment) || child.is_a?(EmbDoc) || + child.is_a?(EndContent) || child.is_a?(VoidStmt) + end + + case children.length + when 0 + nil + when 1 + visit(children.first) + else + s( + :begin, + visit_all(children), + smap_collection_bare( + srange(children.first.start_char, children.last.end_char) + ) + ) + end + end + + # Visit a StringConcat node. + def visit_string_concat(node) + s( + :dstr, + [visit(node.left), visit(node.right)], + smap_collection_bare(srange_node(node)) + ) + end + + # Visit a StringDVar node. + def visit_string_dvar(node) + visit(node.variable) + end + + # Visit a StringEmbExpr node. + def visit_string_embexpr(node) + s( + :begin, + visit(node.statements).then { |child| child ? [child] : [] }, + smap_collection( + srange_length(node.start_char, 2), + srange_length(node.end_char, -1), + srange_node(node) + ) + ) + end + + # Visit a StringLiteral node. + def visit_string_literal(node) + location = + if node.quote + smap_collection( + srange_length(node.start_char, node.quote.length), + srange_length(node.end_char, -1), + srange_node(node) + ) + else + smap_collection_bare(srange_node(node)) + end + + if node.parts.empty? + s(:str, [""], location) + elsif node.parts.length == 1 && node.parts.first.is_a?(TStringContent) + child = visit(node.parts.first) + s(child.type, child.children, location) + else + s(:dstr, visit_all(node.parts), location) + end + end + + # Visit a Super node. + def visit_super(node) + if node.arguments.is_a?(Args) + s( + :super, + visit_all(node.arguments.parts), + smap_keyword_bare( + srange_length(node.start_char, 5), + srange_node(node) + ) + ) + else + case node.arguments.arguments + when nil + s( + :super, + [], + smap_keyword( + srange_length(node.start_char, 5), + srange_find(node.start_char + 5, node.end_char, "("), + srange_length(node.end_char, -1), + srange_node(node) + ) + ) + when ArgsForward + s( + :super, + [visit(node.arguments.arguments)], + smap_keyword( + srange_length(node.start_char, 5), + srange_find(node.start_char + 5, node.end_char, "("), + srange_length(node.end_char, -1), + srange_node(node) + ) + ) + else + s( + :super, + visit_all(node.arguments.arguments.parts), + smap_keyword( + srange_length(node.start_char, 5), + srange_find(node.start_char + 5, node.end_char, "("), + srange_length(node.end_char, -1), + srange_node(node) + ) + ) + end + end + end + + # Visit a SymbolLiteral node. + def visit_symbol_literal(node) + begin_token = + if buffer.source[node.start_char] == ":" + srange_length(node.start_char, 1) + end + + s( + :sym, + [node.value.value.to_sym], + smap_collection(begin_token, nil, srange_node(node)) + ) + end + + # Visit a Symbols node. + def visit_symbols(node) + parts = + node.elements.map do |element| + part = element.parts.first + + if element.parts.length == 1 && part.is_a?(TStringContent) + SymbolLiteral.new(value: part, location: part.location) + else + DynaSymbol.new( + parts: element.parts, + quote: nil, + location: element.location + ) + end + end + + visit_array( + ArrayLiteral.new( + lbracket: node.beginning, + contents: Args.new(parts: parts, location: node.location), + location: node.location + ) + ) + end + + # Visit a TopConstField node. + def visit_top_const_field(node) + s( + :casgn, + [ + s(:cbase, [], smap(srange_length(node.start_char, 2))), + node.constant.value.to_sym + ], + smap_constant( + srange_length(node.start_char, 2), + srange_node(node.constant), + srange_node(node) + ) + ) + end + + # Visit a TopConstRef node. + def visit_top_const_ref(node) + s( + :const, + [ + s(:cbase, [], smap(srange_length(node.start_char, 2))), + node.constant.value.to_sym + ], + smap_constant( + srange_length(node.start_char, 2), + srange_node(node.constant), + srange_node(node) + ) + ) + end + + # Visit a TStringContent node. + def visit_tstring_content(node) + dumped = node.value.gsub(/([^[:ascii:]])/) { $1.dump[1...-1] } + + s( + :str, + ["\"#{dumped}\"".undump], + smap_collection_bare(srange_node(node)) + ) + end + + # Visit a Unary node. + def visit_unary(node) + # Special handling here for flipflops + if (paren = node.statement).is_a?(Paren) && + paren.contents.is_a?(Statements) && + paren.contents.body.length == 1 && + (range = paren.contents.body.first).is_a?(RangeNode) && + node.operator == "!" + s( + :send, + [ + s( + :begin, + [ + s( + range.operator.value == ".." ? :iflipflop : :eflipflop, + visit(range).children, + smap_operator( + srange_node(range.operator), + srange_node(range) + ) + ) + ], + smap_collection( + srange_length(paren.start_char, 1), + srange_length(paren.end_char, -1), + srange_node(paren) + ) + ), + :! + ], + smap_send_bare( + srange_length(node.start_char, 1), + srange_node(node) + ) + ) + elsif node.operator == "!" && node.statement.is_a?(RegexpLiteral) + s( + :send, + [ + s( + :match_current_line, + [visit(node.statement)], + smap(srange_node(node.statement)) + ), + :! + ], + smap_send_bare( + srange_length(node.start_char, 1), + srange_node(node) + ) + ) + else + visit(canonical_unary(node)) + end + end + + # Visit an Undef node. + def visit_undef(node) + s( + :undef, + visit_all(node.symbols), + smap_keyword_bare( + srange_length(node.start_char, 5), + srange_node(node) + ) + ) + end + + # Visit an UnlessNode node. + def visit_unless(node) + s( + :if, + [ + visit_predicate(node.predicate), + visit(node.consequent), + visit(node.statements) + ], + if node.modifier? + smap_keyword_bare( + srange_find_between(node.statements, node.predicate, "unless"), + srange_node(node) + ) + else + begin_start = node.predicate.end_char + begin_end = + if node.statements.empty? + node.statements.end_char + else + node.statements.body.first.start_char + end + + begin_token = + if buffer.source[begin_start...begin_end].include?("then") + srange_find(begin_start, begin_end, "then") + elsif buffer.source[begin_start...begin_end].include?(";") + srange_find(begin_start, begin_end, ";") + end + + else_token = + if node.consequent + srange_length(node.consequent.start_char, 4) + end + + smap_condition( + srange_length(node.start_char, 6), + begin_token, + else_token, + srange_length(node.end_char, -3), + srange_node(node) + ) + end + ) + end + + # Visit an UntilNode node. + def visit_until(node) + s( + loop_post?(node) ? :until_post : :until, + [visit(node.predicate), visit(node.statements)], + if node.modifier? + smap_keyword_bare( + srange_find_between(node.statements, node.predicate, "until"), + srange_node(node) + ) + else + smap_keyword( + srange_length(node.start_char, 5), + srange_search_between(node.predicate, node.statements, "do") || + srange_search_between(node.predicate, node.statements, ";"), + srange_length(node.end_char, -3), + srange_node(node) + ) + end + ) + end + + # Visit a VarField node. + def visit_var_field(node) + name = node.value.value.to_sym + match_var = + [stack[-3], stack[-2]].any? do |parent| + case parent + when AryPtn, FndPtn, HshPtn, In, RAssign + true + when Binary + parent.operator == :"=>" + else + false + end + end + + if match_var + s( + :match_var, + [name], + smap_variable(srange_node(node.value), srange_node(node.value)) + ) + elsif node.value.is_a?(Const) + s( + :casgn, + [nil, name], + smap_constant(nil, srange_node(node.value), srange_node(node)) + ) + else + location = smap_variable(srange_node(node), srange_node(node)) + + case node.value + when CVar + s(:cvasgn, [name], location) + when GVar + s(:gvasgn, [name], location) + when Ident + s(:lvasgn, [name], location) + when IVar + s(:ivasgn, [name], location) + when VarRef + s(:lvasgn, [name], location) + else + s(:match_rest, [], nil) + end + end + end + + # Visit a VarRef node. + def visit_var_ref(node) + visit(node.value) + end + + # Visit a VCall node. + def visit_vcall(node) + visit_command_call( + CommandCall.new( + receiver: nil, + operator: nil, + message: node.value, + arguments: nil, + block: nil, + location: node.location + ) + ) + end + + # Visit a When node. + def visit_when(node) + keyword = srange_length(node.start_char, 4) + begin_token = + if buffer.source[node.statements.start_char] == ";" + srange_length(node.statements.start_char, 1) + end + + end_char = + if node.statements.body.empty? + node.statements.end_char + else + node.statements.body.last.end_char + end + + s( + :when, + visit_all(node.arguments.parts) + [visit(node.statements)], + smap_keyword( + keyword, + begin_token, + nil, + srange(keyword.begin_pos, end_char) + ) + ) + end + + # Visit a WhileNode node. + def visit_while(node) + s( + loop_post?(node) ? :while_post : :while, + [visit(node.predicate), visit(node.statements)], + if node.modifier? + smap_keyword_bare( + srange_find_between(node.statements, node.predicate, "while"), + srange_node(node) + ) + else + smap_keyword( + srange_length(node.start_char, 5), + srange_search_between(node.predicate, node.statements, "do") || + srange_search_between(node.predicate, node.statements, ";"), + srange_length(node.end_char, -3), + srange_node(node) + ) + end + ) + end + + # Visit a Word node. + def visit_word(node) + visit_string_literal( + StringLiteral.new( + parts: node.parts, + quote: nil, + location: node.location + ) + ) + end + + # Visit a Words node. + def visit_words(node) + visit_array( + ArrayLiteral.new( + lbracket: node.beginning, + contents: Args.new(parts: node.elements, location: node.location), + location: node.location + ) + ) + end + + # Visit an XStringLiteral node. + def visit_xstring_literal(node) + s( + :xstr, + visit_all(node.parts), + smap_collection( + srange_length( + node.start_char, + buffer.source[node.start_char] == "%" ? 3 : 1 + ), + srange_length(node.end_char, -1), + srange_node(node) + ) + ) + end + + def visit_yield(node) + case node.arguments + when nil + s( + :yield, + [], + smap_keyword_bare( + srange_length(node.start_char, 5), + srange_node(node) + ) + ) + when Args + s( + :yield, + visit_all(node.arguments.parts), + smap_keyword_bare( + srange_length(node.start_char, 5), + srange_node(node) + ) + ) + else + s( + :yield, + visit_all(node.arguments.contents.parts), + smap_keyword( + srange_length(node.start_char, 5), + srange_length(node.arguments.start_char, 1), + srange_length(node.end_char, -1), + srange_node(node) + ) + ) + end + end + + # Visit a ZSuper node. + def visit_zsuper(node) + s( + :zsuper, + [], + smap_keyword_bare( + srange_length(node.start_char, 5), + srange_node(node) + ) + ) + end + end + + private + + def block_children(node) + arguments = + if node.block_var + visit(node.block_var) + else + s(:args, [], smap_collection_bare(nil)) + end + + type = :block + if !node.block_var && (maximum = num_block_type(node.bodystmt)) + type = :numblock + arguments = maximum + end + + [type, arguments] + end + + # Convert a Unary node into a canonical CommandCall node. + def canonical_unary(node) + # For integers and floats with a leading + or -, parser represents them + # as just their values with the signs attached. + if %w[+ -].include?(node.operator) && + (node.statement.is_a?(Int) || node.statement.is_a?(FloatLiteral)) + return( + node.statement.class.new( + value: "#{node.operator}#{node.statement.value}", + location: node.location + ) + ) + end + + value = { "+" => "+@", "-" => "-@" }.fetch(node.operator, node.operator) + length = node.operator.length + + CommandCall.new( + receiver: node.statement, + operator: nil, + message: + Op.new( + value: value, + location: + Location.new( + start_line: node.location.start_line, + start_char: node.start_char, + start_column: node.location.start_column, + end_line: node.location.start_line, + end_char: node.start_char + length, + end_column: node.location.start_column + length + ) + ), + arguments: nil, + block: nil, + location: node.location + ) + end + + # Convert a Binary node into a canonical CommandCall node. + def canonical_binary(node) + operator = node.operator.to_s + + start_char = node.left.end_char + end_char = node.right.start_char + + index = buffer.source[start_char...end_char].index(operator) + start_line = + node.location.start_line + + buffer.source[start_char...index].count("\n") + start_column = + index - (buffer.source[start_char...index].rindex("\n") || 0) + + op_location = + Location.new( + start_line: start_line, + start_column: start_column, + start_char: start_char + index, + end_line: start_line, + end_column: start_column + operator.length, + end_char: start_char + index + operator.length + ) + + CommandCall.new( + receiver: node.left, + operator: nil, + message: Op.new(value: operator, location: op_location), + arguments: + Args.new(parts: [node.right], location: node.right.location), + block: nil, + location: node.location + ) + end + + # When you have a begin..end while or begin..end until, it's a special + # kind of syntax that executes the block in a loop. In this case the + # parser gem has a special node type for it. + def loop_post?(node) + node.modifier? && node.statements.is_a?(Statements) && + node.statements.body.length == 1 && + node.statements.body.first.is_a?(Begin) + end + + # We need to find if we should transform this block into a numblock + # since there could be new numbered variables like _1. + def num_block_type(statements) + variables = [] + queue = [statements] + + while (child_node = queue.shift) + if child_node.is_a?(VarRef) && child_node.value.is_a?(Ident) && + child_node.value.value =~ /^_(\d+)$/ + variables << $1.to_i + end + + queue += child_node.child_nodes.compact + end + + variables.max + end + + # This method comes almost directly from the parser gem and creates a new + # parser gem node from the given s-expression. type is expected to be a + # symbol, children is expected to be an array, and location is expected to + # be a source map. + def s(type, children, location) + ::Parser::AST::Node.new(type, children, location: location) + end + + # Constructs a plain source map just for an expression. + def smap(expression) + ::Parser::Source::Map.new(expression) + end + + # Constructs a new source map for a collection. + def smap_collection(begin_token, end_token, expression) + ::Parser::Source::Map::Collection.new( + begin_token, + end_token, + expression + ) + end + + # Constructs a new source map for a collection without a begin or end. + def smap_collection_bare(expression) + smap_collection(nil, nil, expression) + end + + # Constructs a new source map for a conditional expression. + def smap_condition( + keyword, + begin_token, + else_token, + end_token, + expression + ) + ::Parser::Source::Map::Condition.new( + keyword, + begin_token, + else_token, + end_token, + expression + ) + end + + # Constructs a new source map for a conditional expression with no begin + # or end. + def smap_condition_bare(expression) + smap_condition(nil, nil, nil, nil, expression) + end + + # Constructs a new source map for a constant reference. + def smap_constant(double_colon, name, expression) + ::Parser::Source::Map::Constant.new(double_colon, name, expression) + end + + # Constructs a new source map for a class definition. + def smap_definition(keyword, operator, name, end_token) + ::Parser::Source::Map::Definition.new( + keyword, + operator, + name, + end_token + ) + end + + # Constructs a new source map for a for loop. + def smap_for(keyword, in_token, begin_token, end_token, expression) + ::Parser::Source::Map::For.new( + keyword, + in_token, + begin_token, + end_token, + expression + ) + end + + # Constructs a new source map for a heredoc. + def smap_heredoc(expression, heredoc_body, heredoc_end) + ::Parser::Source::Map::Heredoc.new( + expression, + heredoc_body, + heredoc_end + ) + end + + # Construct a source map for an index operation. + def smap_index(begin_token, end_token, expression) + ::Parser::Source::Map::Index.new(begin_token, end_token, expression) + end + + # Constructs a new source map for the use of a keyword. + def smap_keyword(keyword, begin_token, end_token, expression) + ::Parser::Source::Map::Keyword.new( + keyword, + begin_token, + end_token, + expression + ) + end + + # Constructs a new source map for the use of a keyword without a begin or + # end token. + def smap_keyword_bare(keyword, expression) + smap_keyword(keyword, nil, nil, expression) + end + + # Constructs a new source map for a method definition. + def smap_method_definition( + keyword, + operator, + name, + end_token, + assignment, + expression + ) + ::Parser::Source::Map::MethodDefinition.new( + keyword, + operator, + name, + end_token, + assignment, + expression + ) + end + + # Constructs a new source map for an operator. + def smap_operator(operator, expression) + ::Parser::Source::Map::Operator.new(operator, expression) + end + + # Constructs a source map for the body of a rescue clause. + def smap_rescue_body(keyword, assoc, begin_token, expression) + ::Parser::Source::Map::RescueBody.new( + keyword, + assoc, + begin_token, + expression + ) + end + + # Constructs a new source map for a method call. + def smap_send(dot, selector, begin_token, end_token, expression) + ::Parser::Source::Map::Send.new( + dot, + selector, + begin_token, + end_token, + expression + ) + end + + # Constructs a new source map for a method call without a begin or end. + def smap_send_bare(selector, expression) + smap_send(nil, selector, nil, nil, expression) + end + + # Constructs a new source map for a ternary expression. + def smap_ternary(question, colon, expression) + ::Parser::Source::Map::Ternary.new(question, colon, expression) + end + + # Constructs a new source map for a variable. + def smap_variable(name, expression) + ::Parser::Source::Map::Variable.new(name, expression) + end + + # Constructs a new source range from the given start and end offsets. + def srange(start_char, end_char) + ::Parser::Source::Range.new(buffer, start_char, end_char) + end + + # Constructs a new source range by finding the given needle in the given + # range of the source. If the needle is not found, returns nil. + def srange_search(start_char, end_char, needle) + index = buffer.source[start_char...end_char].index(needle) + return unless index + + offset = start_char + index + srange(offset, offset + needle.length) + end + + # Constructs a new source range by searching for the given needle between + # the end location of the start node and the start location of the end + # node. If the needle is not found, returns nil. + def srange_search_between(start_node, end_node, needle) + srange_search(start_node.end_char, end_node.start_char, needle) + end + + # Constructs a new source range by finding the given needle in the given + # range of the source. If it needle is not found, raises an error. + def srange_find(start_char, end_char, needle) + srange = srange_search(start_char, end_char, needle) + + unless srange + slice = buffer.source[start_char...end_char].inspect + raise "Could not find #{needle.inspect} in #{slice}" + end + + srange + end + + # Constructs a new source range by finding the given needle between the + # end location of the start node and the start location of the end node. + # If the needle is not found, returns raises an error. + def srange_find_between(start_node, end_node, needle) + srange_find(start_node.end_char, end_node.start_char, needle) + end + + # Constructs a new source range from the given start offset and length. + def srange_length(start_char, length) + if length > 0 + srange(start_char, start_char + length) + else + srange(start_char + length, start_char) + end + end + + # Constructs a new source range using the given node's location. + def srange_node(node) + location = node.location + srange(location.start_char, location.end_char) + end + + def visit_predicate(node) + case node + when RangeNode + s( + node.operator.value == ".." ? :iflipflop : :eflipflop, + visit(node).children, + smap_operator(srange_node(node.operator), srange_node(node)) + ) + when RegexpLiteral + s(:match_current_line, [visit(node)], smap(srange_node(node))) + when Unary + if node.operator.value == "!" && node.statement.is_a?(RegexpLiteral) + s( + :send, + [s(:match_current_line, [visit(node.statement)]), :!], + smap_send_bare(srange_node(node.operator), srange_node(node)) + ) + else + visit(node) + end + else + visit(node) + end + end + end + end +end diff --git a/lib/syntax_tree/translation/rubocop_ast.rb b/lib/syntax_tree/translation/rubocop_ast.rb new file mode 100644 index 00000000..53c6737b --- /dev/null +++ b/lib/syntax_tree/translation/rubocop_ast.rb @@ -0,0 +1,21 @@ +# frozen_string_literal: true + +module SyntaxTree + module Translation + # This visitor is responsible for converting the syntax tree produced by + # Syntax Tree into the syntax tree produced by the rubocop/rubocop-ast gem. + class RuboCopAST < Parser + private + + # This method is effectively the same thing as the parser gem except that + # it uses the rubocop-ast specializations of the nodes. + def s(type, children, location) + ::RuboCop::AST::Builder::NODE_MAP.fetch(type, ::RuboCop::AST::Node).new( + type, + children, + location: location + ) + end + end + end +end diff --git a/lib/syntax_tree/version.rb b/lib/syntax_tree/version.rb index a97f5e43..51599f77 100644 --- a/lib/syntax_tree/version.rb +++ b/lib/syntax_tree/version.rb @@ -1,5 +1,5 @@ # frozen_string_literal: true module SyntaxTree - VERSION = "5.2.0" + VERSION = "6.2.0" end diff --git a/lib/syntax_tree/visitor/environment.rb b/lib/syntax_tree/visitor/environment.rb deleted file mode 100644 index b07a5203..00000000 --- a/lib/syntax_tree/visitor/environment.rb +++ /dev/null @@ -1,84 +0,0 @@ -# frozen_string_literal: true - -module SyntaxTree - # The environment class is used to keep track of local variables and arguments - # inside a particular scope - class Environment - # This class tracks the occurrences of a local variable or argument - class Local - # [Symbol] The type of the local (e.g. :argument, :variable) - attr_reader :type - - # [Array[Location]] The locations of all definitions and assignments of - # this local - attr_reader :definitions - - # [Array[Location]] The locations of all usages of this local - attr_reader :usages - - # initialize: (Symbol type) -> void - def initialize(type) - @type = type - @definitions = [] - @usages = [] - end - - # add_definition: (Location location) -> void - def add_definition(location) - @definitions << location - end - - # add_usage: (Location location) -> void - def add_usage(location) - @usages << location - end - end - - # [Array[Local]] The local variables and arguments defined in this - # environment - attr_reader :locals - - # [Environment | nil] The parent environment - attr_reader :parent - - # initialize: (Environment | nil parent) -> void - def initialize(parent = nil) - @locals = {} - @parent = parent - end - - # Adding a local definition will either insert a new entry in the locals - # hash or append a new definition location to an existing local. Notice that - # it's not possible to change the type of a local after it has been - # registered - # add_local_definition: (Ident | Label identifier, Symbol type) -> void - def add_local_definition(identifier, type) - name = identifier.value.delete_suffix(":") - - @locals[name] ||= Local.new(type) - @locals[name].add_definition(identifier.location) - end - - # Adding a local usage will either insert a new entry in the locals - # hash or append a new usage location to an existing local. Notice that - # it's not possible to change the type of a local after it has been - # registered - # add_local_usage: (Ident | Label identifier, Symbol type) -> void - def add_local_usage(identifier, type) - name = identifier.value.delete_suffix(":") - - @locals[name] ||= Local.new(type) - @locals[name].add_usage(identifier.location) - end - - # Try to find the local given its name in this environment or any of its - # parents - # find_local: (String name) -> Local | nil - def find_local(name) - local = @locals[name] - return local unless local.nil? - - @parent&.find_local(name) - end - end -end diff --git a/lib/syntax_tree/visitor/json_visitor.rb b/lib/syntax_tree/visitor/json_visitor.rb deleted file mode 100644 index b516980c..00000000 --- a/lib/syntax_tree/visitor/json_visitor.rb +++ /dev/null @@ -1,55 +0,0 @@ -# frozen_string_literal: true - -module SyntaxTree - class Visitor - # This visitor transforms the AST into a hash that contains only primitives - # that can be easily serialized into JSON. - class JSONVisitor < FieldVisitor - attr_reader :target - - def initialize - @target = nil - end - - private - - def comments(node) - target[:comments] = visit_all(node.comments) - end - - def field(name, value) - target[name] = value.is_a?(Node) ? visit(value) : value - end - - def list(name, values) - target[name] = visit_all(values) - end - - def node(node, type) - previous = @target - @target = { type: type, location: visit_location(node.location) } - yield - @target - ensure - @target = previous - end - - def pairs(name, values) - target[name] = values.map { |(key, value)| [visit(key), visit(value)] } - end - - def text(name, value) - target[name] = value - end - - def visit_location(location) - [ - location.start_line, - location.start_char, - location.end_line, - location.end_char - ] - end - end - end -end diff --git a/lib/syntax_tree/visitor/match_visitor.rb b/lib/syntax_tree/visitor/match_visitor.rb deleted file mode 100644 index e0bdaf08..00000000 --- a/lib/syntax_tree/visitor/match_visitor.rb +++ /dev/null @@ -1,122 +0,0 @@ -# frozen_string_literal: true - -module SyntaxTree - class Visitor - # This visitor transforms the AST into a Ruby pattern matching expression - # that would match correctly against the AST. - class MatchVisitor < FieldVisitor - attr_reader :q - - def initialize(q) - @q = q - end - - def visit(node) - case node - when Node - super - when String - # pp will split up a string on newlines and concat them together using - # a "+" operator. This breaks the pattern matching expression. So - # instead we're going to check here for strings and manually put the - # entire value into the output buffer. - q.text(node.inspect) - else - node.pretty_print(q) - end - end - - private - - def comments(node) - return if node.comments.empty? - - q.nest(0) do - q.text("comments: [") - q.indent do - q.breakable("") - q.seplist(node.comments) { |comment| visit(comment) } - end - q.breakable("") - q.text("]") - end - end - - def field(name, value) - q.nest(0) do - q.text(name) - q.text(": ") - visit(value) - end - end - - def list(name, values) - q.group do - q.text(name) - q.text(": [") - q.indent do - q.breakable("") - q.seplist(values) { |value| visit(value) } - end - q.breakable("") - q.text("]") - end - end - - def node(node, _type) - items = [] - q.with_target(items) { yield } - - if items.empty? - q.text(node.class.name) - return - end - - q.group do - q.text(node.class.name) - q.text("[") - q.indent do - q.breakable("") - q.seplist(items) { |item| q.target << item } - end - q.breakable("") - q.text("]") - end - end - - def pairs(name, values) - q.group do - q.text(name) - q.text(": [") - q.indent do - q.breakable("") - q.seplist(values) do |(key, value)| - q.group do - q.text("[") - q.indent do - q.breakable("") - visit(key) - q.text(",") - q.breakable - visit(value || nil) - end - q.breakable("") - q.text("]") - end - end - end - q.breakable("") - q.text("]") - end - end - - def text(name, value) - q.nest(0) do - q.text(name) - q.text(": ") - value.pretty_print(q) - end - end - end - end -end diff --git a/lib/syntax_tree/visitor/pretty_print_visitor.rb b/lib/syntax_tree/visitor/pretty_print_visitor.rb deleted file mode 100644 index 674e3aac..00000000 --- a/lib/syntax_tree/visitor/pretty_print_visitor.rb +++ /dev/null @@ -1,85 +0,0 @@ -# frozen_string_literal: true - -module SyntaxTree - class Visitor - # This visitor pretty-prints the AST into an equivalent s-expression. - class PrettyPrintVisitor < FieldVisitor - attr_reader :q - - def initialize(q) - @q = q - end - - # This is here because we need to make sure the operator is cast to a - # string before we print it out. - def visit_binary(node) - node(node, "binary") do - field("left", node.left) - text("operator", node.operator.to_s) - field("right", node.right) - comments(node) - end - end - - # This is here to make it a little nicer to look at labels since they - # typically have their : at the end of the value. - def visit_label(node) - node(node, "label") do - q.breakable - q.text(":") - q.text(node.value[0...-1]) - comments(node) - end - end - - private - - def comments(node) - return if node.comments.empty? - - q.breakable - q.group(2, "(", ")") do - q.seplist(node.comments) { |comment| q.pp(comment) } - end - end - - def field(_name, value) - q.breakable - q.pp(value) - end - - def list(_name, values) - q.breakable - q.group(2, "(", ")") { q.seplist(values) { |value| q.pp(value) } } - end - - def node(_node, type) - q.group(2, "(", ")") do - q.text(type) - yield - end - end - - def pairs(_name, values) - q.group(2, "(", ")") do - q.seplist(values) do |(key, value)| - q.pp(key) - - if value - q.text("=") - q.group(2) do - q.breakable("") - q.pp(value) - end - end - end - end - end - - def text(_name, value) - q.breakable - q.text(value) - end - end - end -end diff --git a/lib/syntax_tree/visitor/with_environment.rb b/lib/syntax_tree/visitor/with_environment.rb deleted file mode 100644 index 59033d50..00000000 --- a/lib/syntax_tree/visitor/with_environment.rb +++ /dev/null @@ -1,140 +0,0 @@ -# frozen_string_literal: true - -module SyntaxTree - # WithEnvironment is a module intended to be included in classes inheriting - # from Visitor. The module overrides a few visit methods to automatically keep - # track of local variables and arguments defined in the current environment. - # Example usage: - # class MyVisitor < Visitor - # include WithEnvironment - # - # def visit_ident(node) - # # Check if we're visiting an identifier for an argument, a local - # variable or something else - # local = current_environment.find_local(node) - # - # if local.type == :argument - # # handle identifiers for arguments - # elsif local.type == :variable - # # handle identifiers for variables - # else - # # handle other identifiers, such as method names - # end - # end - module WithEnvironment - def current_environment - @current_environment ||= Environment.new - end - - def with_new_environment - previous_environment = @current_environment - @current_environment = Environment.new(previous_environment) - yield - ensure - @current_environment = previous_environment - end - - # Visits for nodes that create new environments, such as classes, modules - # and method definitions - def visit_class(node) - with_new_environment { super } - end - - def visit_module(node) - with_new_environment { super } - end - - # When we find a method invocation with a block, only the code that happens - # inside of the block needs a fresh environment. The method invocation - # itself happens in the same environment - def visit_method_add_block(node) - visit(node.call) - with_new_environment { visit(node.block) } - end - - def visit_def(node) - with_new_environment { super } - end - - # Visit for keeping track of local arguments, such as method and block - # arguments - def visit_params(node) - add_argument_definitions(node.requireds) - - node.posts.each do |param| - current_environment.add_local_definition(param, :argument) - end - - node.keywords.each do |param| - current_environment.add_local_definition(param.first, :argument) - end - - node.optionals.each do |param| - current_environment.add_local_definition(param.first, :argument) - end - - super - end - - def visit_rest_param(node) - name = node.name - current_environment.add_local_definition(name, :argument) if name - - super - end - - def visit_kwrest_param(node) - name = node.name - current_environment.add_local_definition(name, :argument) if name - - super - end - - def visit_blockarg(node) - name = node.name - current_environment.add_local_definition(name, :argument) if name - - super - end - - # Visit for keeping track of local variable definitions - def visit_var_field(node) - value = node.value - - if value.is_a?(SyntaxTree::Ident) - current_environment.add_local_definition(value, :variable) - end - - super - end - - alias visit_pinned_var_ref visit_var_field - - # Visits for keeping track of variable and argument usages - def visit_var_ref(node) - value = node.value - - if value.is_a?(SyntaxTree::Ident) - definition = current_environment.find_local(value.value) - - if definition - current_environment.add_local_usage(value, definition.type) - end - end - - super - end - - private - - def add_argument_definitions(list) - list.each do |param| - if param.is_a?(SyntaxTree::MLHSParen) - add_argument_definitions(param.contents.parts) - else - current_environment.add_local_definition(param, :argument) - end - end - end - end -end diff --git a/lib/syntax_tree/with_scope.rb b/lib/syntax_tree/with_scope.rb new file mode 100644 index 00000000..8c4908f3 --- /dev/null +++ b/lib/syntax_tree/with_scope.rb @@ -0,0 +1,311 @@ +# frozen_string_literal: true + +module SyntaxTree + # WithScope is a module intended to be included in classes inheriting from + # Visitor. The module overrides a few visit methods to automatically keep + # track of local variables and arguments defined in the current scope. + # Example usage: + # + # class MyVisitor < Visitor + # include WithScope + # + # def visit_ident(node) + # # Check if we're visiting an identifier for an argument, a local + # # variable or something else + # local = current_scope.find_local(node) + # + # if local.type == :argument + # # handle identifiers for arguments + # elsif local.type == :variable + # # handle identifiers for variables + # else + # # handle other identifiers, such as method names + # end + # end + # end + # + module WithScope + # The scope class is used to keep track of local variables and arguments + # inside a particular scope. + class Scope + # This class tracks the occurrences of a local variable or argument. + class Local + # [Symbol] The type of the local (e.g. :argument, :variable) + attr_reader :type + + # [Array[Location]] The locations of all definitions and assignments of + # this local + attr_reader :definitions + + # [Array[Location]] The locations of all usages of this local + attr_reader :usages + + def initialize(type) + @type = type + @definitions = [] + @usages = [] + end + + def add_definition(location) + @definitions << location + end + + def add_usage(location) + @usages << location + end + end + + # [Integer] a unique identifier for this scope + attr_reader :id + + # [scope | nil] The parent scope + attr_reader :parent + + # [Hash[String, Local]] The local variables and arguments defined in this + # scope + attr_reader :locals + + def initialize(id, parent = nil) + @id = id + @parent = parent + @locals = {} + end + + # Adding a local definition will either insert a new entry in the locals + # hash or append a new definition location to an existing local. Notice + # that it's not possible to change the type of a local after it has been + # registered. + def add_local_definition(identifier, type) + name = identifier.value.delete_suffix(":") + + local = + if type == :argument + locals[name] ||= Local.new(type) + else + resolve_local(name, type) + end + + local.add_definition(identifier.location) + end + + # Adding a local usage will either insert a new entry in the locals + # hash or append a new usage location to an existing local. Notice that + # it's not possible to change the type of a local after it has been + # registered. + def add_local_usage(identifier, type) + name = identifier.value.delete_suffix(":") + resolve_local(name, type).add_usage(identifier.location) + end + + # Try to find the local given its name in this scope or any of its + # parents. + def find_local(name) + locals[name] || parent&.find_local(name) + end + + private + + def resolve_local(name, type) + local = find_local(name) + + unless local + local = Local.new(type) + locals[name] = local + end + + local + end + end + + attr_reader :current_scope + + def initialize(*args, **kwargs, &block) + super + + @current_scope = Scope.new(0) + @next_scope_id = 0 + end + + # Visits for nodes that create new scopes, such as classes, modules + # and method definitions. + def visit_class(node) + with_scope { super } + end + + def visit_module(node) + with_scope { super } + end + + # When we find a method invocation with a block, only the code that happens + # inside of the block needs a fresh scope. The method invocation + # itself happens in the same scope. + def visit_method_add_block(node) + visit(node.call) + with_scope(current_scope) { visit(node.block) } + end + + def visit_def(node) + with_scope { super } + end + + # Visit for keeping track of local arguments, such as method and block + # arguments. + def visit_params(node) + add_argument_definitions(node.requireds) + add_argument_definitions(node.posts) + + node.keywords.each do |param| + current_scope.add_local_definition(param.first, :argument) + end + + node.optionals.each do |param| + current_scope.add_local_definition(param.first, :argument) + end + + super + end + + def visit_rest_param(node) + name = node.name + current_scope.add_local_definition(name, :argument) if name + + super + end + + def visit_kwrest_param(node) + name = node.name + current_scope.add_local_definition(name, :argument) if name + + super + end + + def visit_blockarg(node) + name = node.name + current_scope.add_local_definition(name, :argument) if name + + super + end + + def visit_block_var(node) + node.locals.each do |local| + current_scope.add_local_definition(local, :variable) + end + + super + end + alias visit_lambda_var visit_block_var + + # Visit for keeping track of local variable definitions + def visit_var_field(node) + value = node.value + current_scope.add_local_definition(value, :variable) if value.is_a?(Ident) + + super + end + + # Visit for keeping track of local variable definitions + def visit_pinned_var_ref(node) + value = node.value + current_scope.add_local_usage(value, :variable) if value.is_a?(Ident) + + super + end + + # Visits for keeping track of variable and argument usages + def visit_var_ref(node) + value = node.value + + if value.is_a?(Ident) + definition = current_scope.find_local(value.value) + current_scope.add_local_usage(value, definition.type) if definition + end + + super + end + + # When using regex named capture groups, vcalls might actually be a variable + def visit_vcall(node) + value = node.value + definition = current_scope.find_local(value.value) + current_scope.add_local_usage(value, definition.type) if definition + + super + end + + # Visit for capturing local variables defined in regex named capture groups + def visit_binary(node) + if node.operator == :=~ + left = node.left + + if left.is_a?(RegexpLiteral) && left.parts.length == 1 && + left.parts.first.is_a?(TStringContent) + content = left.parts.first + + value = content.value + location = content.location + start_line = location.start_line + + Regexp + .new(value, Regexp::FIXEDENCODING) + .names + .each do |name| + offset = value.index(/\(\?<#{Regexp.escape(name)}>/) + line = start_line + value[0...offset].count("\n") + + # We need to add 3 to account for these three characters + # prefixing a named capture (?< + column = location.start_column + offset + 3 + if value[0...offset].include?("\n") + column = + value[0...offset].length - value[0...offset].rindex("\n") + + 3 - 1 + end + + ident_location = + Location.new( + start_line: line, + start_char: location.start_char + offset, + start_column: column, + end_line: line, + end_char: location.start_char + offset + name.length, + end_column: column + name.length + ) + + identifier = Ident.new(value: name, location: ident_location) + current_scope.add_local_definition(identifier, :variable) + end + end + end + + super + end + + private + + def add_argument_definitions(list) + list.each do |param| + case param + when ArgStar + value = param.value + current_scope.add_local_definition(value, :argument) if value + when MLHSParen + add_argument_definitions(param.contents.parts) + else + current_scope.add_local_definition(param, :argument) + end + end + end + + def next_scope_id + @next_scope_id += 1 + end + + def with_scope(parent_scope = nil) + previous_scope = @current_scope + @current_scope = Scope.new(next_scope_id, parent_scope) + yield + ensure + @current_scope = previous_scope + end + end +end diff --git a/lib/syntax_tree/yarv.rb b/lib/syntax_tree/yarv.rb index 7e4da7bb..bd5c54b9 100644 --- a/lib/syntax_tree/yarv.rb +++ b/lib/syntax_tree/yarv.rb @@ -1,5 +1,23 @@ # frozen_string_literal: true +require "stringio" + +require_relative "yarv/basic_block" +require_relative "yarv/bf" +require_relative "yarv/calldata" +require_relative "yarv/compiler" +require_relative "yarv/control_flow_graph" +require_relative "yarv/data_flow_graph" +require_relative "yarv/decompiler" +require_relative "yarv/disassembler" +require_relative "yarv/instruction_sequence" +require_relative "yarv/instructions" +require_relative "yarv/legacy" +require_relative "yarv/local_table" +require_relative "yarv/sea_of_nodes" +require_relative "yarv/assembler" +require_relative "yarv/vm" + module SyntaxTree # This module provides an object representation of the YARV bytecode. module YARV diff --git a/lib/syntax_tree/yarv/assembler.rb b/lib/syntax_tree/yarv/assembler.rb index ec467b58..b29c252a 100644 --- a/lib/syntax_tree/yarv/assembler.rb +++ b/lib/syntax_tree/yarv/assembler.rb @@ -31,7 +31,6 @@ def visit_string_literal(node) "FCALL" => CallData::CALL_FCALL, "VCALL" => CallData::CALL_VCALL, "ARGS_SIMPLE" => CallData::CALL_ARGS_SIMPLE, - "BLOCKISEQ" => CallData::CALL_BLOCKISEQ, "KWARG" => CallData::CALL_KWARG, "KW_SPLAT" => CallData::CALL_KW_SPLAT, "TAILCALL" => CallData::CALL_TAILCALL, @@ -62,22 +61,26 @@ def visit_string_literal(node) "constant-from" ].freeze - attr_reader :filepath + attr_reader :lines - def initialize(filepath) - @filepath = filepath + def initialize(lines) + @lines = lines end def assemble iseq = InstructionSequence.new("
", "", 1, :top) - assemble_iseq(iseq, File.readlines(filepath, chomp: true)) + assemble_iseq(iseq, lines) iseq.compile! iseq end - def self.assemble(filepath) - new(filepath).assemble + def self.assemble(source) + new(source.lines(chomp: true)).assemble + end + + def self.assemble_file(filepath) + new(File.readlines(filepath, chomp: true)).assemble end private diff --git a/lib/syntax_tree/yarv/basic_block.rb b/lib/syntax_tree/yarv/basic_block.rb new file mode 100644 index 00000000..6798a092 --- /dev/null +++ b/lib/syntax_tree/yarv/basic_block.rb @@ -0,0 +1,53 @@ +# frozen_string_literal: true + +module SyntaxTree + module YARV + # This object represents a single basic block, wherein all contained + # instructions do not branch except for the last one. + class BasicBlock + # This is the unique identifier for this basic block. + attr_reader :id + + # This is the index into the list of instructions where this block starts. + attr_reader :block_start + + # This is the set of instructions that this block contains. + attr_reader :insns + + # This is an array of basic blocks that lead into this block. + attr_reader :incoming_blocks + + # This is an array of basic blocks that this block leads into. + attr_reader :outgoing_blocks + + def initialize(block_start, insns) + @id = "block_#{block_start}" + + @block_start = block_start + @insns = insns + + @incoming_blocks = [] + @outgoing_blocks = [] + end + + # Yield each instruction in this basic block along with its index from the + # original instruction sequence. + def each_with_length + return enum_for(:each_with_length) unless block_given? + + length = block_start + insns.each do |insn| + yield insn, length + length += insn.length + end + end + + # This method is used to verify that the basic block is well formed. It + # checks that the only instruction in this basic block that branches is + # the last instruction. + def verify + insns[0...-1].each { |insn| raise unless insn.branch_targets.empty? } + end + end + end +end diff --git a/lib/syntax_tree/yarv/calldata.rb b/lib/syntax_tree/yarv/calldata.rb new file mode 100644 index 00000000..e35992f5 --- /dev/null +++ b/lib/syntax_tree/yarv/calldata.rb @@ -0,0 +1,97 @@ +# frozen_string_literal: true + +module SyntaxTree + module YARV + # This is an operand to various YARV instructions that represents the + # information about a specific call site. + class CallData + flags = %i[ + CALL_ARGS_SPLAT + CALL_ARGS_BLOCKARG + CALL_FCALL + CALL_VCALL + CALL_ARGS_SIMPLE + CALL_KWARG + CALL_KW_SPLAT + CALL_TAILCALL + CALL_SUPER + CALL_ZSUPER + CALL_OPT_SEND + CALL_KW_SPLAT_MUT + ] + + # Insert the legacy CALL_BLOCKISEQ flag for Ruby 3.2 and earlier. + flags.insert(5, :CALL_BLOCKISEQ) if RUBY_VERSION < "3.3" + + # Set the flags as constants on the class. + flags.each_with_index { |name, index| const_set(name, 1 << index) } + + attr_reader :method, :argc, :flags, :kw_arg + + def initialize( + method, + argc = 0, + flags = CallData::CALL_ARGS_SIMPLE, + kw_arg = nil + ) + @method = method + @argc = argc + @flags = flags + @kw_arg = kw_arg + end + + def flag?(mask) + (flags & mask) > 0 + end + + def to_h + result = { mid: method, flag: flags, orig_argc: argc } + result[:kw_arg] = kw_arg if kw_arg + result + end + + def inspect + names = [] + names << :ARGS_SPLAT if flag?(CALL_ARGS_SPLAT) + names << :ARGS_BLOCKARG if flag?(CALL_ARGS_BLOCKARG) + names << :FCALL if flag?(CALL_FCALL) + names << :VCALL if flag?(CALL_VCALL) + names << :ARGS_SIMPLE if flag?(CALL_ARGS_SIMPLE) + names << :KWARG if flag?(CALL_KWARG) + names << :KW_SPLAT if flag?(CALL_KW_SPLAT) + names << :TAILCALL if flag?(CALL_TAILCALL) + names << :SUPER if flag?(CALL_SUPER) + names << :ZSUPER if flag?(CALL_ZSUPER) + names << :OPT_SEND if flag?(CALL_OPT_SEND) + names << :KW_SPLAT_MUT if flag?(CALL_KW_SPLAT_MUT) + + parts = [] + parts << "mid:#{method}" if method + parts << "argc:#{argc}" + parts << "kw:[#{kw_arg.join(", ")}]" if kw_arg + parts << names.join("|") if names.any? + + "" + end + + def self.from(serialized) + new( + serialized[:mid], + serialized[:orig_argc], + serialized[:flag], + serialized[:kw_arg] + ) + end + end + + # A convenience method for creating a CallData object. + def self.calldata( + method, + argc = 0, + flags = CallData::CALL_ARGS_SIMPLE, + kw_arg = nil + ) + CallData.new(method, argc, flags, kw_arg) + end + end +end diff --git a/lib/syntax_tree/yarv/compiler.rb b/lib/syntax_tree/yarv/compiler.rb index 4c9a4d50..0f7e7372 100644 --- a/lib/syntax_tree/yarv/compiler.rb +++ b/lib/syntax_tree/yarv/compiler.rb @@ -8,7 +8,7 @@ module YARV # # You use this as with any other visitor. First you parse code into a tree, # then you visit it with this compiler. Visiting the root node of the tree - # will return a SyntaxTree::Visitor::Compiler::InstructionSequence object. + # will return a SyntaxTree::YARV::Compiler::InstructionSequence object. # With that object you can call #to_a on it, which will return a serialized # form of the instruction sequence as an array. This array _should_ mirror # the array given by RubyVM::InstructionSequence#to_a. @@ -124,76 +124,122 @@ def self.compile(node) rescue CompilationError end - def visit_array(node) - node.contents ? visit_all(node.contents.parts) : [] - end + visit_methods do + def visit_array(node) + node.contents ? visit_all(node.contents.parts) : [] + end - def visit_bare_assoc_hash(node) - node.assocs.to_h do |assoc| - # We can only convert regular key-value pairs. A double splat ** - # operator means it has to be converted at run-time. - raise CompilationError unless assoc.is_a?(Assoc) - [visit(assoc.key), visit(assoc.value)] + def visit_bare_assoc_hash(node) + node.assocs.to_h do |assoc| + # We can only convert regular key-value pairs. A double splat ** + # operator means it has to be converted at run-time. + raise CompilationError unless assoc.is_a?(Assoc) + [visit(assoc.key), visit(assoc.value)] + end end - end - def visit_float(node) - node.value.to_f - end + def visit_float(node) + node.value.to_f + end - alias visit_hash visit_bare_assoc_hash + alias visit_hash visit_bare_assoc_hash - def visit_imaginary(node) - node.value.to_c - end + def visit_imaginary(node) + node.value.to_c + end - def visit_int(node) - case (value = node.value) - when /^0b/ - value[2..].to_i(2) - when /^0o/ - value[2..].to_i(8) - when /^0d/ - value[2..].to_i - when /^0x/ - value[2..].to_i(16) - else - value.to_i + def visit_int(node) + case (value = node.value) + when /^0b/ + value[2..].to_i(2) + when /^0o/ + value[2..].to_i(8) + when /^0d/ + value[2..].to_i + when /^0x/ + value[2..].to_i(16) + else + value.to_i + end end - end - def visit_label(node) - node.value.chomp(":").to_sym - end + def visit_label(node) + node.value.chomp(":").to_sym + end - def visit_mrhs(node) - visit_all(node.parts) - end + def visit_mrhs(node) + visit_all(node.parts) + end - def visit_qsymbols(node) - node.elements.map { |element| visit(element).to_sym } - end + def visit_qsymbols(node) + node.elements.map { |element| visit(element).to_sym } + end - def visit_qwords(node) - visit_all(node.elements) - end + def visit_qwords(node) + visit_all(node.elements) + end - def visit_range(node) - left, right = [visit(node.left), visit(node.right)] - node.operator.value === ".." ? left..right : left...right - end + def visit_range(node) + left, right = [visit(node.left), visit(node.right)] + node.operator.value === ".." ? left..right : left...right + end - def visit_rational(node) - node.value.to_r - end + def visit_rational(node) + node.value.to_r + end - def visit_regexp_literal(node) - if node.parts.length == 1 && node.parts.first.is_a?(TStringContent) - Regexp.new(node.parts.first.value, visit_regexp_literal_flags(node)) - else - # Any interpolation of expressions or variables will result in the - # regular expression being constructed at run-time. - raise CompilationError + def visit_regexp_literal(node) + if node.parts.length == 1 && node.parts.first.is_a?(TStringContent) + Regexp.new( + node.parts.first.value, + visit_regexp_literal_flags(node) + ) + else + # Any interpolation of expressions or variables will result in the + # regular expression being constructed at run-time. + raise CompilationError + end + end + + def visit_symbol_literal(node) + node.value.value.to_sym + end + + def visit_symbols(node) + node.elements.map { |element| visit(element).to_sym } + end + + def visit_tstring_content(node) + node.value + end + + def visit_var_ref(node) + raise CompilationError unless node.value.is_a?(Kw) + + case node.value.value + when "nil" + nil + when "true" + true + when "false" + false + else + raise CompilationError + end + end + + def visit_word(node) + if node.parts.length == 1 && node.parts.first.is_a?(TStringContent) + node.parts.first.value + else + # Any interpolation of expressions or variables will result in the + # string being constructed at run-time. + raise CompilationError + end + end + + def visit_words(node) + visit_all(node.elements) end end @@ -219,47 +265,6 @@ def visit_regexp_literal_flags(node) end end - def visit_symbol_literal(node) - node.value.value.to_sym - end - - def visit_symbols(node) - node.elements.map { |element| visit(element).to_sym } - end - - def visit_tstring_content(node) - node.value - end - - def visit_var_ref(node) - raise CompilationError unless node.value.is_a?(Kw) - - case node.value.value - when "nil" - nil - when "true" - true - when "false" - false - else - raise CompilationError - end - end - - def visit_word(node) - if node.parts.length == 1 && node.parts.first.is_a?(TStringContent) - node.parts.first.value - else - # Any interpolation of expressions or variables will result in the - # string being constructed at run-time. - raise CompilationError - end - end - - def visit_words(node) - visit_all(node.elements) - end - def visit_unsupported(_node) raise CompilationError end @@ -285,7 +290,7 @@ def visit_unsupported(_node) # if we need to return the value of the last statement. attr_reader :last_statement - def initialize(options) + def initialize(options = Options.new) @options = options @iseq = nil @last_statement = false @@ -870,8 +875,7 @@ def visit_defined(node) when Ident iseq.putobject("local-variable") when IVar - iseq.putnil - iseq.defined(Defined::TYPE_IVAR, name, "instance-variable") + iseq.definedivar(name, iseq.inline_storage, "instance-variable") when Kw case name when :false @@ -1050,11 +1054,16 @@ def visit_if_op(node) visit_if( IfNode.new( predicate: node.predicate, - statements: node.truthy, + statements: + Statements.new(body: [node.truthy], location: Location.default), consequent: Else.new( keyword: Kw.new(value: "else", location: Location.default), - statements: node.falsy, + statements: + Statements.new( + body: [node.falsy], + location: Location.default + ), location: Location.default ), location: Location.default diff --git a/lib/syntax_tree/yarv/control_flow_graph.rb b/lib/syntax_tree/yarv/control_flow_graph.rb new file mode 100644 index 00000000..2829bb21 --- /dev/null +++ b/lib/syntax_tree/yarv/control_flow_graph.rb @@ -0,0 +1,257 @@ +# frozen_string_literal: true + +module SyntaxTree + module YARV + # This class represents a control flow graph of a YARV instruction sequence. + # It constructs a graph of basic blocks that hold subsets of the list of + # instructions from the instruction sequence. + # + # You can use this class by calling the ::compile method and passing it a + # YARV instruction sequence. It will return a control flow graph object. + # + # iseq = RubyVM::InstructionSequence.compile("1 + 2") + # iseq = SyntaxTree::YARV::InstructionSequence.from(iseq.to_a) + # cfg = SyntaxTree::YARV::ControlFlowGraph.compile(iseq) + # + class ControlFlowGraph + # This class is responsible for creating a control flow graph from the + # given instruction sequence. + class Compiler + # This is the instruction sequence that is being compiled. + attr_reader :iseq + + # This is a hash of indices in the YARV instruction sequence that point + # to their corresponding instruction. + attr_reader :insns + + # This is a hash of labels that point to their corresponding index into + # the YARV instruction sequence. Note that this is not the same as the + # index into the list of instructions on the instruction sequence + # object. Instead, this is the index into the C array, so it includes + # operands. + attr_reader :labels + + def initialize(iseq) + @iseq = iseq + + @insns = {} + @labels = {} + + length = 0 + iseq.insns.each do |insn| + case insn + when Instruction + @insns[length] = insn + length += insn.length + when InstructionSequence::Label + @labels[insn] = length + end + end + end + + # This method is used to compile the instruction sequence into a control + # flow graph. It returns an instance of ControlFlowGraph. + def compile + blocks = build_basic_blocks + + connect_basic_blocks(blocks) + prune_basic_blocks(blocks) + + ControlFlowGraph.new(iseq, insns, blocks.values).tap(&:verify) + end + + private + + # Finds the indices of the instructions that start a basic block because + # they're either: + # + # * the start of an instruction sequence + # * the target of a branch + # * fallen through to from a branch + # + def find_basic_block_starts + block_starts = Set.new([0]) + + insns.each do |index, insn| + branch_targets = insn.branch_targets + + if branch_targets.any? + branch_targets.each do |branch_target| + block_starts.add(labels[branch_target]) + end + + block_starts.add(index + insn.length) if insn.falls_through? + end + end + + block_starts.to_a.sort + end + + # Builds up a set of basic blocks by iterating over the starts of each + # block. They are keyed by the index of their first instruction. + def build_basic_blocks + block_starts = find_basic_block_starts + + length = 0 + blocks = + iseq + .insns + .grep(Instruction) + .slice_after do |insn| + length += insn.length + block_starts.include?(length) + end + + block_starts + .zip(blocks) + .to_h do |block_start, insns| + # It's possible that we have not detected a block start but still + # have branching instructions inside of a basic block. This can + # happen if you have an unconditional jump which is followed by + # instructions that are unreachable. As of Ruby 3.2, this is + # possible with something as simple as "1 => a". In this case we + # can discard all instructions that follow branching instructions. + block_insns = + insns.slice_after { |insn| insn.branch_targets.any? }.first + + [block_start, BasicBlock.new(block_start, block_insns)] + end + end + + # Connect the blocks by letting them know which blocks are incoming and + # outgoing from each block. + def connect_basic_blocks(blocks) + blocks.each do |block_start, block| + insn = block.insns.last + + insn.branch_targets.each do |branch_target| + block.outgoing_blocks << blocks.fetch(labels[branch_target]) + end + + if (insn.branch_targets.empty? && !insn.leaves?) || + insn.falls_through? + fall_through_start = block_start + block.insns.sum(&:length) + block.outgoing_blocks << blocks.fetch(fall_through_start) + end + + block.outgoing_blocks.each do |outgoing_block| + outgoing_block.incoming_blocks << block + end + end + end + + # If there are blocks that are unreachable, we can remove them from the + # graph entirely at this point. + def prune_basic_blocks(blocks) + visited = Set.new + queue = [blocks.fetch(0)] + + until queue.empty? + current_block = queue.shift + next if visited.include?(current_block) + + visited << current_block + queue.concat(current_block.outgoing_blocks) + end + + blocks.select! { |_, block| visited.include?(block) } + end + end + + # This is the instruction sequence that this control flow graph + # corresponds to. + attr_reader :iseq + + # This is the list of instructions that this control flow graph contains. + # It is effectively the same as the list of instructions in the + # instruction sequence but with line numbers and events filtered out. + attr_reader :insns + + # This is the set of basic blocks that this control-flow graph contains. + attr_reader :blocks + + def initialize(iseq, insns, blocks) + @iseq = iseq + @insns = insns + @blocks = blocks + end + + def disasm + fmt = Disassembler.new(iseq) + fmt.puts("== cfg: #{iseq.inspect}") + + blocks.each do |block| + fmt.puts(block.id) + fmt.with_prefix(" ") do |prefix| + unless block.incoming_blocks.empty? + from = block.incoming_blocks.map(&:id) + fmt.puts("#{prefix}== from: #{from.join(", ")}") + end + + fmt.format_insns!(block.insns, block.block_start) + + to = block.outgoing_blocks.map(&:id) + to << "leaves" if block.insns.last.leaves? + fmt.puts("#{prefix}== to: #{to.join(", ")}") + end + end + + fmt.string + end + + def to_dfg + DataFlowGraph.compile(self) + end + + def to_son + to_dfg.to_son + end + + def to_mermaid + Mermaid.flowchart do |flowchart| + disasm = Disassembler::Squished.new + + blocks.each do |block| + flowchart.subgraph(block.id) do + previous = nil + + block.each_with_length do |insn, length| + node = + flowchart.node( + "node_#{length}", + "%04d %s" % [length, insn.disasm(disasm)] + ) + + flowchart.link(previous, node) if previous + previous = node + end + end + end + + blocks.each do |block| + block.outgoing_blocks.each do |outgoing| + offset = + block.block_start + block.insns.sum(&:length) - + block.insns.last.length + + from = flowchart.fetch("node_#{offset}") + to = flowchart.fetch("node_#{outgoing.block_start}") + flowchart.link(from, to) + end + end + end + end + + # This method is used to verify that the control flow graph is well + # formed. It does this by checking that each basic block is itself well + # formed. + def verify + blocks.each(&:verify) + end + + def self.compile(iseq) + Compiler.new(iseq).compile + end + end + end +end diff --git a/lib/syntax_tree/yarv/data_flow_graph.rb b/lib/syntax_tree/yarv/data_flow_graph.rb new file mode 100644 index 00000000..aedee9ba --- /dev/null +++ b/lib/syntax_tree/yarv/data_flow_graph.rb @@ -0,0 +1,338 @@ +# frozen_string_literal: true + +module SyntaxTree + module YARV + # Constructs a data-flow-graph of a YARV instruction sequence, via a + # control-flow-graph. Data flow is discovered locally and then globally. The + # graph only considers data flow through the stack - local variables and + # objects are considered fully escaped in this analysis. + # + # You can use this class by calling the ::compile method and passing it a + # control flow graph. It will return a data flow graph object. + # + # iseq = RubyVM::InstructionSequence.compile("1 + 2") + # iseq = SyntaxTree::YARV::InstructionSequence.from(iseq.to_a) + # cfg = SyntaxTree::YARV::ControlFlowGraph.compile(iseq) + # dfg = SyntaxTree::YARV::DataFlowGraph.compile(cfg) + # + class DataFlowGraph + # This object represents the flow of data between instructions. + class DataFlow + attr_reader :in + attr_reader :out + + def initialize + @in = [] + @out = [] + end + end + + # This represents an object that goes on the stack that is passed between + # basic blocks. + class BlockArgument + attr_reader :name + + def initialize(name) + @name = name + end + + def local? + false + end + + def to_str + name.to_s + end + end + + # This represents an object that goes on the stack that is passed between + # instructions within a basic block. + class LocalArgument + attr_reader :name, :length + + def initialize(length) + @length = length + end + + def local? + true + end + + def to_str + length.to_s + end + end + + attr_reader :cfg, :insn_flows, :block_flows + + def initialize(cfg, insn_flows, block_flows) + @cfg = cfg + @insn_flows = insn_flows + @block_flows = block_flows + end + + def blocks + cfg.blocks + end + + def disasm + fmt = Disassembler.new(cfg.iseq) + fmt.puts("== dfg: #{cfg.iseq.inspect}") + + blocks.each do |block| + fmt.puts(block.id) + fmt.with_prefix(" ") do |prefix| + unless block.incoming_blocks.empty? + from = block.incoming_blocks.map(&:id) + fmt.puts("#{prefix}== from: #{from.join(", ")}") + end + + block_flow = block_flows.fetch(block.id) + unless block_flow.in.empty? + fmt.puts("#{prefix}== in: #{block_flow.in.join(", ")}") + end + + fmt.format_insns!(block.insns, block.block_start) do |_, length| + insn_flow = insn_flows[length] + next if insn_flow.in.empty? && insn_flow.out.empty? + + fmt.print(" # ") + unless insn_flow.in.empty? + fmt.print("in: #{insn_flow.in.join(", ")}") + fmt.print("; ") unless insn_flow.out.empty? + end + + unless insn_flow.out.empty? + fmt.print("out: #{insn_flow.out.join(", ")}") + end + end + + to = block.outgoing_blocks.map(&:id) + to << "leaves" if block.insns.last.leaves? + fmt.puts("#{prefix}== to: #{to.join(", ")}") + + unless block_flow.out.empty? + fmt.puts("#{prefix}== out: #{block_flow.out.join(", ")}") + end + end + end + + fmt.string + end + + def to_son + SeaOfNodes.compile(self) + end + + def to_mermaid + Mermaid.flowchart do |flowchart| + disasm = Disassembler::Squished.new + + blocks.each do |block| + block_flow = block_flows.fetch(block.id) + graph_name = + if block_flow.in.any? + "#{block.id} #{block_flows[block.id].in.join(", ")}" + else + block.id + end + + flowchart.subgraph(graph_name) do + previous = nil + + block.each_with_length do |insn, length| + node = + flowchart.node( + "node_#{length}", + "%04d %s" % [length, insn.disasm(disasm)], + shape: :rounded + ) + + flowchart.link(previous, node, color: :red) if previous + insn_flows[length].in.each do |input| + if input.is_a?(LocalArgument) + from = flowchart.fetch("node_#{input.length}") + flowchart.link(from, node, color: :green) + end + end + + previous = node + end + end + end + + blocks.each do |block| + block.outgoing_blocks.each do |outgoing| + offset = + block.block_start + block.insns.sum(&:length) - + block.insns.last.length + + from = flowchart.fetch("node_#{offset}") + to = flowchart.fetch("node_#{outgoing.block_start}") + flowchart.link(from, to, color: :red) + end + end + end + end + + # Verify that we constructed the data flow graph correctly. + def verify + # Check that the first block has no arguments. + raise unless block_flows.fetch(blocks.first.id).in.empty? + + # Check all control flow edges between blocks pass the right number of + # arguments. + blocks.each do |block| + block_flow = block_flows.fetch(block.id) + + if block.outgoing_blocks.empty? + # With no outgoing blocks, there should be no output arguments. + raise unless block_flow.out.empty? + else + # Check with outgoing blocks... + block.outgoing_blocks.each do |outgoing_block| + outgoing_flow = block_flows.fetch(outgoing_block.id) + + # The block should have as many output arguments as the + # outgoing block has input arguments. + raise unless block_flow.out.size == outgoing_flow.in.size + end + end + end + end + + def self.compile(cfg) + Compiler.new(cfg).compile + end + + # This class is responsible for creating a data flow graph from the given + # control flow graph. + class Compiler + # This is the control flow graph that is being compiled. + attr_reader :cfg + + # This data structure will hold the data flow between instructions + # within individual basic blocks. + attr_reader :insn_flows + + # This data structure will hold the data flow between basic blocks. + attr_reader :block_flows + + def initialize(cfg) + @cfg = cfg + @insn_flows = cfg.insns.to_h { |length, _| [length, DataFlow.new] } + @block_flows = cfg.blocks.to_h { |block| [block.id, DataFlow.new] } + end + + def compile + find_internal_flow + find_external_flow + DataFlowGraph.new(cfg, insn_flows, block_flows).tap(&:verify) + end + + private + + # Find the data flow within each basic block. Using an abstract stack, + # connect from consumers of data to the producers of that data. + def find_internal_flow + cfg.blocks.each do |block| + block_flow = block_flows.fetch(block.id) + stack = [] + + # Go through each instruction in the block. + block.each_with_length do |insn, length| + insn_flow = insn_flows[length] + + # How many values will be missing from the local stack to run this + # instruction? This will be used to determine if the values that + # are being used by this instruction are coming from previous + # instructions or from previous basic blocks. + missing = insn.pops - stack.size + + # For every value the instruction pops off the stack. + insn.pops.times do + # Was the value it pops off from another basic block? + if stack.empty? + # If the stack is empty, then there aren't enough values being + # pushed from previous instructions to fulfill the needs of + # this instruction. In that case the values must be coming + # from previous basic blocks. + missing -= 1 + argument = BlockArgument.new(:"in_#{missing}") + + insn_flow.in.unshift(argument) + block_flow.in.unshift(argument) + else + # Since there are values in the stack, we can connect this + # consumer to the producer of the value. + insn_flow.in.unshift(stack.pop) + end + end + + # Record on our abstract stack that this instruction pushed + # this value onto the stack. + insn.pushes.times { stack << LocalArgument.new(length) } + end + + # Values that are left on the stack after going through all + # instructions are arguments to the basic block that we jump to. + stack.reverse_each.with_index do |producer, index| + block_flow.out << producer + + argument = BlockArgument.new(:"out_#{index}") + insn_flows[producer.length].out << argument + end + end + + # Go backwards and connect from producers to consumers. + cfg.insns.each_key do |length| + # For every instruction that produced a value used in this + # instruction... + insn_flows[length].in.each do |producer| + # If it's actually another instruction and not a basic block + # argument... + if producer.is_a?(LocalArgument) + # Record in the producing instruction that it produces a value + # used by this construction. + insn_flows[producer.length].out << LocalArgument.new(length) + end + end + end + end + + # Find the data that flows between basic blocks. + def find_external_flow + stack = [*cfg.blocks] + + until stack.empty? + block = stack.pop + block_flow = block_flows.fetch(block.id) + + block.incoming_blocks.each do |incoming_block| + incoming_flow = block_flows.fetch(incoming_block.id) + + # Does a predecessor block have fewer outputs than the successor + # has inputs? + if incoming_flow.out.size < block_flow.in.size + # If so then add arguments to pass data through from the + # incoming block's incoming blocks. + (block_flow.in.size - incoming_flow.out.size).times do |index| + name = BlockArgument.new(:"pass_#{index}") + + incoming_flow.in.unshift(name) + incoming_flow.out.unshift(name) + end + + # Since we modified the incoming block, add it back to the stack + # so it'll be considered as an outgoing block again, and + # propogate the external data flow back up the control flow + # graph. + stack << incoming_block + end + end + end + end + end + end + end +end diff --git a/lib/syntax_tree/yarv/decompiler.rb b/lib/syntax_tree/yarv/decompiler.rb index 47d2a2df..4ea99e3a 100644 --- a/lib/syntax_tree/yarv/decompiler.rb +++ b/lib/syntax_tree/yarv/decompiler.rb @@ -97,7 +97,7 @@ def decompile(iseq) clause << Next(Args([])) when Leave value = Args([clause.pop]) - clause << (iseq.type == :top ? Break(value) : ReturnNode(value)) + clause << (iseq.type != :top ? Break(value) : ReturnNode(value)) when OptAnd, OptDiv, OptEq, OptGE, OptGT, OptLE, OptLT, OptLTLT, OptMinus, OptMod, OptMult, OptOr, OptPlus left, right = clause.pop(2) @@ -151,7 +151,7 @@ def decompile(iseq) elsif argc == 1 && method.end_with?("=") receiver, argument = clause.pop(2) clause << Assign( - CallNode(receiver, Period("."), Ident(method[0..-2]), nil), + Field(receiver, Period("."), Ident(method[0..-2])), argument ) else diff --git a/lib/syntax_tree/yarv/disassembler.rb b/lib/syntax_tree/yarv/disassembler.rb index d303bcb7..dac220fd 100644 --- a/lib/syntax_tree/yarv/disassembler.rb +++ b/lib/syntax_tree/yarv/disassembler.rb @@ -3,16 +3,52 @@ module SyntaxTree module YARV class Disassembler + # This class is another object that handles disassembling a YARV + # instruction sequence but it renders it without any of the extra spacing + # or alignment. + class Squished + def calldata(value) + value.inspect + end + + def enqueue(iseq) + end + + def event(name) + end + + def inline_storage(cache) + "" + end + + def instruction(name, operands = []) + operands.empty? ? name : "#{name} #{operands.join(", ")}" + end + + def label(value) + "%04d" % value.name["label_".length..] + end + + def local(index, **) + index.inspect + end + + def object(value) + value.inspect + end + end + attr_reader :output, :queue + attr_reader :current_prefix attr_accessor :current_iseq - def initialize + def initialize(current_iseq = nil) @output = StringIO.new @queue = [] @current_prefix = "" - @current_iseq = nil + @current_iseq = current_iseq end ######################################################################## @@ -20,30 +56,7 @@ def initialize ######################################################################## def calldata(value) - flag_names = [] - flag_names << :ARGS_SPLAT if value.flag?(CallData::CALL_ARGS_SPLAT) - if value.flag?(CallData::CALL_ARGS_BLOCKARG) - flag_names << :ARGS_BLOCKARG - end - flag_names << :FCALL if value.flag?(CallData::CALL_FCALL) - flag_names << :VCALL if value.flag?(CallData::CALL_VCALL) - flag_names << :ARGS_SIMPLE if value.flag?(CallData::CALL_ARGS_SIMPLE) - flag_names << :BLOCKISEQ if value.flag?(CallData::CALL_BLOCKISEQ) - flag_names << :KWARG if value.flag?(CallData::CALL_KWARG) - flag_names << :KW_SPLAT if value.flag?(CallData::CALL_KW_SPLAT) - flag_names << :TAILCALL if value.flag?(CallData::CALL_TAILCALL) - flag_names << :SUPER if value.flag?(CallData::CALL_SUPER) - flag_names << :ZSUPER if value.flag?(CallData::CALL_ZSUPER) - flag_names << :OPT_SEND if value.flag?(CallData::CALL_OPT_SEND) - flag_names << :KW_SPLAT_MUT if value.flag?(CallData::CALL_KW_SPLAT_MUT) - - parts = [] - parts << "mid:#{value.method}" if value.method - parts << "argc:#{value.argc}" - parts << "kw:[#{value.kw_arg.join(", ")}]" if value.kw_arg - parts << flag_names.join("|") if flag_names.any? - - "" + value.inspect end def enqueue(iseq) @@ -97,7 +110,7 @@ def object(value) end ######################################################################## - # Main entrypoint + # Entrypoints ######################################################################## def format! @@ -105,63 +118,13 @@ def format! output << "\n" if output.pos > 0 format_iseq(@current_iseq) end - - output.string end - private - - def format_iseq(iseq) - output << "#{current_prefix}== disasm: " - output << "#:1 " - - location = Location.fixed(line: iseq.line, char: 0, column: 0) - output << "(#{location.start_line},#{location.start_column})-" - output << "(#{location.end_line},#{location.end_column})" - output << "> " - - if iseq.catch_table.any? - output << "(catch: TRUE)\n" - output << "#{current_prefix}== catch table\n" - - with_prefix("#{current_prefix}| ") do - iseq.catch_table.each do |entry| - case entry - when InstructionSequence::CatchBreak - output << "#{current_prefix}catch type: break\n" - format_iseq(entry.iseq) - when InstructionSequence::CatchNext - output << "#{current_prefix}catch type: next\n" - when InstructionSequence::CatchRedo - output << "#{current_prefix}catch type: redo\n" - when InstructionSequence::CatchRescue - output << "#{current_prefix}catch type: rescue\n" - format_iseq(entry.iseq) - end - end - end - - output << "#{current_prefix}|#{"-" * 72}\n" - else - output << "(catch: FALSE)\n" - end - - if (local_table = iseq.local_table) && !local_table.empty? - output << "#{current_prefix}local table (size: #{local_table.size})\n" - - locals = - local_table.locals.each_with_index.map do |local, index| - "[%2d] %s@%d" % [local_table.offset(index), local.name, index] - end - - output << "#{current_prefix}#{locals.join(" ")}\n" - end - - length = 0 + def format_insns!(insns, length = 0) events = [] lines = [] - iseq.insns.each do |insn| + insns.each do |insn| case insn when Integer lines << insn @@ -191,22 +154,83 @@ def format_iseq(iseq) events.clear end + # A hook here to allow for custom formatting of instructions after + # the main body has been processed. + yield insn, length if block_given? + output << "\n" length += insn.length end end end + def print(string) + output.print(string) + end + + def puts(string) + output.puts(string) + end + + def string + output.string + end + def with_prefix(value) previous = @current_prefix begin @current_prefix = value - yield + yield value ensure @current_prefix = previous end end + + private + + def format_iseq(iseq) + output << "#{current_prefix}== disasm: #{iseq.inspect} " + + if iseq.catch_table.any? + output << "(catch: TRUE)\n" + output << "#{current_prefix}== catch table\n" + + with_prefix("#{current_prefix}| ") do + iseq.catch_table.each do |entry| + case entry + when InstructionSequence::CatchBreak + output << "#{current_prefix}catch type: break\n" + format_iseq(entry.iseq) + when InstructionSequence::CatchNext + output << "#{current_prefix}catch type: next\n" + when InstructionSequence::CatchRedo + output << "#{current_prefix}catch type: redo\n" + when InstructionSequence::CatchRescue + output << "#{current_prefix}catch type: rescue\n" + format_iseq(entry.iseq) + end + end + end + + output << "#{current_prefix}|#{"-" * 72}\n" + else + output << "(catch: FALSE)\n" + end + + if (local_table = iseq.local_table) && !local_table.empty? + output << "#{current_prefix}local table (size: #{local_table.size})\n" + + locals = + local_table.locals.each_with_index.map do |local, index| + "[%2d] %s@%d" % [local_table.offset(index), local.name, index] + end + + output << "#{current_prefix}#{locals.join(" ")}\n" + end + + format_insns!(iseq.insns) + end end end end diff --git a/lib/syntax_tree/yarv/instruction_sequence.rb b/lib/syntax_tree/yarv/instruction_sequence.rb index c284221b..df92799b 100644 --- a/lib/syntax_tree/yarv/instruction_sequence.rb +++ b/lib/syntax_tree/yarv/instruction_sequence.rb @@ -7,6 +7,28 @@ module YARV # list of instructions along with the metadata pertaining to them. It also # functions as a builder for the instruction sequence. class InstructionSequence + # This provides a handle to the rb_iseq_load function, which allows you + # to pass a serialized iseq to Ruby and have it return a + # RubyVM::InstructionSequence object. + def self.iseq_load(iseq) + require "fiddle" + + @iseq_load_function ||= + Fiddle::Function.new( + Fiddle::Handle::DEFAULT["rb_iseq_load"], + [Fiddle::TYPE_VOIDP] * 3, + Fiddle::TYPE_VOIDP + ) + + Fiddle.dlunwrap(@iseq_load_function.call(Fiddle.dlwrap(iseq), 0, nil)) + rescue LoadError + raise "Could not load the Fiddle library" + rescue NameError + raise "Unable to find rb_iseq_load" + rescue Fiddle::DLError + raise "Unable to perform a dynamic load" + end + # When the list of instructions is first being created, it's stored as a # linked list. This is to make it easier to perform peephole optimizations # and other transformations like instruction specialization. @@ -28,7 +50,7 @@ def initialize @tail_node = nil end - def each + def each(&_blk) return to_enum(__method__) unless block_given? each_node { |node| yield node.value } end @@ -60,19 +82,6 @@ def push(instruction) MAGIC = "YARVInstructionSequence/SimpleDataFormat" - # This provides a handle to the rb_iseq_load function, which allows you to - # pass a serialized iseq to Ruby and have it return a - # RubyVM::InstructionSequence object. - ISEQ_LOAD = - begin - Fiddle::Function.new( - Fiddle::Handle::DEFAULT["rb_iseq_load"], - [Fiddle::TYPE_VOIDP] * 3, - Fiddle::TYPE_VOIDP - ) - rescue NameError - end - # This object is used to track the size of the stack at any given time. It # is effectively a mini symbolic interpreter. It's necessary because when # instruction sequences get serialized they include a :stack_max field on @@ -221,8 +230,7 @@ def length end def eval - raise "Unsupported platform" if ISEQ_LOAD.nil? - Fiddle.dlunwrap(ISEQ_LOAD.call(Fiddle.dlwrap(to_a), 0, nil)).eval + InstructionSequence.iseq_load(to_a).eval end def to_a @@ -269,10 +277,27 @@ def to_a ] end + def to_cfg + ControlFlowGraph.compile(self) + end + + def to_dfg + to_cfg.to_dfg + end + + def to_son + to_dfg.to_son + end + def disasm - disassembler = Disassembler.new - disassembler.enqueue(self) - disassembler.format! + fmt = Disassembler.new + fmt.enqueue(self) + fmt.format! + fmt.string + end + + def inspect + "#:1 (#{line},0)-(#{line},0)>" end # This method converts our linked list of instructions into a final array @@ -328,11 +353,27 @@ def specialize_instructions! next unless calldata.argc == 0 case calldata.method + when :min + node.value = + if RUBY_VERSION < "3.3" + Legacy::OptNewArrayMin.new(value.number) + else + OptNewArraySend.new(value.number, :min) + end + + node.next_node = next_node.next_node when :max - node.value = OptNewArrayMax.new(value.number) + node.value = + if RUBY_VERSION < "3.3" + Legacy::OptNewArrayMax.new(value.number) + else + OptNewArraySend.new(value.number, :max) + end + node.next_node = next_node.next_node - when :min - node.value = OptNewArrayMin.new(value.number) + when :hash + next if RUBY_VERSION < "3.3" + node.value = OptNewArraySend.new(value.number, :hash) node.next_node = next_node.next_node end when PutObject, PutString @@ -648,12 +689,21 @@ def concatstrings(number) push(ConcatStrings.new(number)) end + def defineclass(name, class_iseq, flags) + push(DefineClass.new(name, class_iseq, flags)) + end + def defined(type, name, message) push(Defined.new(type, name, message)) end - def defineclass(name, class_iseq, flags) - push(DefineClass.new(name, class_iseq, flags)) + def definedivar(name, cache, message) + if RUBY_VERSION < "3.3" + push(PutNil.new) + push(Defined.new(Defined::TYPE_IVAR, name, message)) + else + push(DefinedIVar.new(name, cache, message)) + end end def definemethod(name, method_iseq) @@ -1033,6 +1083,8 @@ def self.from(source, options = Compiler::Options.new, parent_iseq = nil) iseq.defineclass(opnds[0], from(opnds[1], options, iseq), opnds[2]) when :defined iseq.defined(opnds[0], opnds[1], opnds[2]) + when :definedivar + iseq.definedivar(opnds[0], opnds[1], opnds[2]) when :definemethod iseq.definemethod(opnds[0], from(opnds[1], options, iseq)) when :definesmethod @@ -1138,6 +1190,9 @@ def self.from(source, options = Compiler::Options.new, parent_iseq = nil) when :opt_newarray_min iseq.newarray(opnds[0]) iseq.send(YARV.calldata(:min)) + when :opt_newarray_send + iseq.newarray(opnds[0]) + iseq.send(CallData.new(opnds[1])) when :opt_neq iseq.push( OptNEq.new(CallData.from(opnds[0]), CallData.from(opnds[1])) diff --git a/lib/syntax_tree/yarv/instructions.rb b/lib/syntax_tree/yarv/instructions.rb index 5e1d116b..ffeebe65 100644 --- a/lib/syntax_tree/yarv/instructions.rb +++ b/lib/syntax_tree/yarv/instructions.rb @@ -2,65 +2,55 @@ module SyntaxTree module YARV - # This is an operand to various YARV instructions that represents the - # information about a specific call site. - class CallData - CALL_ARGS_SPLAT = 1 << 0 - CALL_ARGS_BLOCKARG = 1 << 1 - CALL_FCALL = 1 << 2 - CALL_VCALL = 1 << 3 - CALL_ARGS_SIMPLE = 1 << 4 - CALL_BLOCKISEQ = 1 << 5 - CALL_KWARG = 1 << 6 - CALL_KW_SPLAT = 1 << 7 - CALL_TAILCALL = 1 << 8 - CALL_SUPER = 1 << 9 - CALL_ZSUPER = 1 << 10 - CALL_OPT_SEND = 1 << 11 - CALL_KW_SPLAT_MUT = 1 << 12 - - attr_reader :method, :argc, :flags, :kw_arg - - def initialize( - method, - argc = 0, - flags = CallData::CALL_ARGS_SIMPLE, - kw_arg = nil - ) - @method = method - @argc = argc - @flags = flags - @kw_arg = kw_arg + # This is a base class for all YARV instructions. It provides a few + # convenience methods for working with instructions. + class Instruction + # This method creates an instruction that represents the canonical + # (non-specialized) form of this instruction. If this instruction is not + # a specialized instruction, then this method returns `self`. + def canonical + self + end + + # This returns the size of the instruction in terms of the number of slots + # it occupies in the instruction sequence. Effectively this is 1 plus the + # number of operands. + def length + 1 end - def flag?(mask) - (flags & mask) > 0 + # This returns the number of values that are pushed onto the stack. + def pushes + 0 + end + + # This returns the number of values that are popped off the stack. + def pops + 0 end - def to_h - result = { mid: method, flag: flags, orig_argc: argc } - result[:kw_arg] = kw_arg if kw_arg - result + # This returns an array of labels. + def branch_targets + [] end - def self.from(serialized) - new( - serialized[:mid], - serialized[:orig_argc], - serialized[:flag], - serialized[:kw_arg] - ) + # Whether or not this instruction leaves the current frame. + def leaves? + false end - end - # A convenience method for creating a CallData object. - def self.calldata( - method, - argc = 0, - flags = CallData::CALL_ARGS_SIMPLE, - kw_arg = nil - ) - CallData.new(method, argc, flags, kw_arg) + # Whether or not this instruction falls through to the next instruction if + # its branching fails. + def falls_through? + false + end + + # Does the instruction have side effects? Control-flow counts as a + # side-effect, as do some special-case instructions like Leave. By default + # every instruction is marked as having side effects. + def side_effects? + true + end end # ### Summary @@ -76,7 +66,7 @@ def self.calldata( # x[0] # ~~~ # - class AdjustStack + class AdjustStack < Instruction attr_reader :number def initialize(number) @@ -91,20 +81,20 @@ def to_a(_iseq) [:adjuststack, number] end - def length - 2 + def deconstruct_keys(_keys) + { number: number } end - def pops - number + def ==(other) + other.is_a?(AdjustStack) && other.number == number end - def pushes - 0 + def length + 2 end - def canonical - self + def pops + number end def call(vm) @@ -130,7 +120,7 @@ def call(vm) # "#{5}" # ~~~ # - class AnyToString + class AnyToString < Instruction def disasm(fmt) fmt.instruction("anytostring") end @@ -139,8 +129,12 @@ def to_a(_iseq) [:anytostring] end - def length - 1 + def deconstruct_keys(_keys) + {} + end + + def ==(other) + other.is_a?(AnyToString) end def pops @@ -151,10 +145,6 @@ def pushes 1 end - def canonical - self - end - def call(vm) original, value = vm.pop(2) @@ -182,7 +172,7 @@ def call(vm) # puts x # ~~~ # - class BranchIf + class BranchIf < Instruction attr_reader :label def initialize(label) @@ -197,6 +187,14 @@ def to_a(_iseq) [:branchif, label.name] end + def deconstruct_keys(_keys) + { label: label } + end + + def ==(other) + other.is_a?(BranchIf) && other.label == label + end + def length 2 end @@ -205,16 +203,16 @@ def pops 1 end - def pushes - 0 + def call(vm) + vm.jump(label) if vm.pop end - def canonical - self + def branch_targets + [label] end - def call(vm) - vm.jump(label) if vm.pop + def falls_through? + true end end @@ -235,7 +233,7 @@ def call(vm) # end # ~~~ # - class BranchNil + class BranchNil < Instruction attr_reader :label def initialize(label) @@ -250,6 +248,14 @@ def to_a(_iseq) [:branchnil, label.name] end + def deconstruct_keys(_keys) + { label: label } + end + + def ==(other) + other.is_a?(BranchNil) && other.label == label + end + def length 2 end @@ -258,16 +264,16 @@ def pops 1 end - def pushes - 0 + def call(vm) + vm.jump(label) if vm.pop.nil? end - def canonical - self + def branch_targets + [label] end - def call(vm) - vm.jump(label) if vm.pop.nil? + def falls_through? + true end end @@ -287,7 +293,7 @@ def call(vm) # end # ~~~ # - class BranchUnless + class BranchUnless < Instruction attr_reader :label def initialize(label) @@ -302,6 +308,14 @@ def to_a(_iseq) [:branchunless, label.name] end + def deconstruct_keys(_keys) + { label: label } + end + + def ==(other) + other.is_a?(BranchUnless) && other.label == label + end + def length 2 end @@ -310,16 +324,16 @@ def pops 1 end - def pushes - 0 + def call(vm) + vm.jump(label) unless vm.pop end - def canonical - self + def branch_targets + [label] end - def call(vm) - vm.jump(label) unless vm.pop + def falls_through? + true end end @@ -342,7 +356,7 @@ def call(vm) # evaluate(value: 3) # ~~~ # - class CheckKeyword + class CheckKeyword < Instruction attr_reader :keyword_bits_index, :keyword_index def initialize(keyword_bits_index, keyword_index) @@ -365,20 +379,22 @@ def to_a(iseq) ] end - def length - 3 + def deconstruct_keys(_keys) + { keyword_bits_index: keyword_bits_index, keyword_index: keyword_index } end - def pops - 0 + def ==(other) + other.is_a?(CheckKeyword) && + other.keyword_bits_index == keyword_bits_index && + other.keyword_index == keyword_index end - def pushes - 1 + def length + 3 end - def canonical - self + def pushes + 1 end def call(vm) @@ -398,7 +414,7 @@ def call(vm) # foo in Foo # ~~~ # - class CheckMatch + class CheckMatch < Instruction VM_CHECKMATCH_TYPE_WHEN = 1 VM_CHECKMATCH_TYPE_CASE = 2 VM_CHECKMATCH_TYPE_RESCUE = 3 @@ -419,6 +435,14 @@ def to_a(_iseq) [:checkmatch, type] end + def deconstruct_keys(_keys) + { type: type } + end + + def ==(other) + other.is_a?(CheckMatch) && other.type == type + end + def length 2 end @@ -431,10 +455,6 @@ def pushes 1 end - def canonical - self - end - def call(vm) target, pattern = vm.pop(2) @@ -478,7 +498,7 @@ def check?(pattern, target) # foo in [bar] # ~~~ # - class CheckType + class CheckType < Instruction TYPE_OBJECT = 0x01 TYPE_CLASS = 0x02 TYPE_MODULE = 0x03 @@ -561,6 +581,14 @@ def to_a(_iseq) [:checktype, type] end + def deconstruct_keys(_keys) + { type: type } + end + + def ==(other) + other.is_a?(CheckType) && other.type == type + end + def length 2 end @@ -577,10 +605,6 @@ def pushes 2 end - def canonical - self - end - def call(vm) object = vm.pop result = @@ -647,7 +671,7 @@ def call(vm) # [1, *2] # ~~~ # - class ConcatArray + class ConcatArray < Instruction def disasm(fmt) fmt.instruction("concatarray") end @@ -656,8 +680,12 @@ def to_a(_iseq) [:concatarray] end - def length - 1 + def deconstruct_keys(_keys) + {} + end + + def ==(other) + other.is_a?(ConcatArray) end def pops @@ -668,10 +696,6 @@ def pushes 1 end - def canonical - self - end - def call(vm) left, right = vm.pop(2) vm.push([*left, *right]) @@ -693,7 +717,7 @@ def call(vm) # "#{5}" # ~~~ # - class ConcatStrings + class ConcatStrings < Instruction attr_reader :number def initialize(number) @@ -708,6 +732,14 @@ def to_a(_iseq) [:concatstrings, number] end + def deconstruct_keys(_keys) + { number: number } + end + + def ==(other) + other.is_a?(ConcatStrings) && other.number == number + end + def length 2 end @@ -720,10 +752,6 @@ def pushes 1 end - def canonical - self - end - def call(vm) vm.push(vm.pop(number).join) end @@ -744,7 +772,7 @@ def call(vm) # end # ~~~ # - class DefineClass + class DefineClass < Instruction TYPE_CLASS = 0 TYPE_SINGLETON_CLASS = 1 TYPE_MODULE = 2 @@ -771,6 +799,15 @@ def to_a(_iseq) [:defineclass, name, class_iseq.to_a, flags] end + def deconstruct_keys(_keys) + { name: name, class_iseq: class_iseq, flags: flags } + end + + def ==(other) + other.is_a?(DefineClass) && other.name == name && + other.class_iseq == class_iseq && other.flags == flags + end + def length 4 end @@ -783,10 +820,6 @@ def pushes 1 end - def canonical - self - end - def call(vm) object, superclass = vm.pop(2) @@ -823,7 +856,7 @@ def call(vm) # defined?(x) # ~~~ # - class Defined + class Defined < Instruction TYPE_NIL = 1 TYPE_IVAR = 2 TYPE_LVAR = 3 @@ -899,6 +932,15 @@ def to_a(_iseq) [:defined, type, name, message] end + def deconstruct_keys(_keys) + { type: type, name: name, message: message } + end + + def ==(other) + other.is_a?(Defined) && other.type == type && other.name == name && + other.message == message + end + def length 4 end @@ -911,10 +953,6 @@ def pushes 1 end - def canonical - self - end - def call(vm) object = vm.pop @@ -956,6 +994,64 @@ def call(vm) end end + # ### Summary + # + # `definedivar` checks if an instance variable is defined. It is a + # specialization of the `defined` instruction. It accepts three arguments: + # the name of the instance variable, an inline cache, and the string that + # should be pushed onto the stack in the event that the instance variable + # is defined. + # + # ### Usage + # + # ~~~ruby + # defined?(@value) + # ~~~ + # + class DefinedIVar < Instruction + attr_reader :name, :cache, :message + + def initialize(name, cache, message) + @name = name + @cache = cache + @message = message + end + + def disasm(fmt) + fmt.instruction( + "definedivar", + [fmt.object(name), fmt.inline_storage(cache), fmt.object(message)] + ) + end + + def to_a(_iseq) + [:definedivar, name, cache, message] + end + + def deconstruct_keys(_keys) + { name: name, cache: cache, message: message } + end + + def ==(other) + other.is_a?(DefinedIVar) && other.name == name && + other.cache == cache && other.message == message + end + + def length + 4 + end + + def pushes + 1 + end + + def call(vm) + result = (message if vm.frame._self.instance_variable_defined?(name)) + + vm.push(result) + end + end + # ### Summary # # `definemethod` defines a method on the class of the current value of @@ -969,7 +1065,7 @@ def call(vm) # def value = "value" # ~~~ # - class DefineMethod + class DefineMethod < Instruction attr_reader :method_name, :method_iseq def initialize(method_name, method_iseq) @@ -989,20 +1085,17 @@ def to_a(_iseq) [:definemethod, method_name, method_iseq.to_a] end - def length - 3 - end - - def pops - 0 + def deconstruct_keys(_keys) + { method_name: method_name, method_iseq: method_iseq } end - def pushes - 0 + def ==(other) + other.is_a?(DefineMethod) && other.method_name == method_name && + other.method_iseq == method_iseq end - def canonical - self + def length + 3 end def call(vm) @@ -1041,7 +1134,7 @@ def call(vm) # def self.value = "value" # ~~~ # - class DefineSMethod + class DefineSMethod < Instruction attr_reader :method_name, :method_iseq def initialize(method_name, method_iseq) @@ -1061,20 +1154,21 @@ def to_a(_iseq) [:definesmethod, method_name, method_iseq.to_a] end - def length - 3 + def deconstruct_keys(_keys) + { method_name: method_name, method_iseq: method_iseq } end - def pops - 1 + def ==(other) + other.is_a?(DefineSMethod) && other.method_name == method_name && + other.method_iseq == method_iseq end - def pushes - 0 + def length + 3 end - def canonical - self + def pops + 1 end def call(vm) @@ -1109,7 +1203,7 @@ def call(vm) # $global = 5 # ~~~ # - class Dup + class Dup < Instruction def disasm(fmt) fmt.instruction("dup") end @@ -1118,8 +1212,12 @@ def to_a(_iseq) [:dup] end - def length - 1 + def deconstruct_keys(_keys) + {} + end + + def ==(other) + other.is_a?(Dup) end def pops @@ -1130,13 +1228,13 @@ def pushes 2 end - def canonical - self - end - def call(vm) vm.push(vm.stack.last.dup) end + + def side_effects? + false + end end # ### Summary @@ -1149,7 +1247,7 @@ def call(vm) # [true] # ~~~ # - class DupArray + class DupArray < Instruction attr_reader :object def initialize(object) @@ -1164,20 +1262,20 @@ def to_a(_iseq) [:duparray, object] end - def length - 2 + def deconstruct_keys(_keys) + { object: object } end - def pops - 0 + def ==(other) + other.is_a?(DupArray) && other.object == object end - def pushes - 1 + def length + 2 end - def canonical - self + def pushes + 1 end def call(vm) @@ -1195,7 +1293,7 @@ def call(vm) # { a: 1 } # ~~~ # - class DupHash + class DupHash < Instruction attr_reader :object def initialize(object) @@ -1210,20 +1308,20 @@ def to_a(_iseq) [:duphash, object] end - def length - 2 + def deconstruct_keys(_keys) + { object: object } end - def pops - 0 + def ==(other) + other.is_a?(DupHash) && other.object == object end - def pushes - 1 + def length + 2 end - def canonical - self + def pushes + 1 end def call(vm) @@ -1241,7 +1339,7 @@ def call(vm) # Object::X ||= true # ~~~ # - class DupN + class DupN < Instruction attr_reader :number def initialize(number) @@ -1256,20 +1354,20 @@ def to_a(_iseq) [:dupn, number] end - def length - 2 + def deconstruct_keys(_keys) + { number: number } end - def pops - 0 + def ==(other) + other.is_a?(DupN) && other.number == number end - def pushes - number + def length + 2 end - def canonical - self + def pushes + number end def call(vm) @@ -1291,7 +1389,7 @@ def call(vm) # x, = [true, false, nil] # ~~~ # - class ExpandArray + class ExpandArray < Instruction attr_reader :number, :flags def initialize(number, flags) @@ -1307,6 +1405,15 @@ def to_a(_iseq) [:expandarray, number, flags] end + def deconstruct_keys(_keys) + { number: number, flags: flags } + end + + def ==(other) + other.is_a?(ExpandArray) && other.number == number && + other.flags == flags + end + def length 3 end @@ -1319,10 +1426,6 @@ def pushes number end - def canonical - self - end - def call(vm) object = vm.pop object = @@ -1380,7 +1483,7 @@ def call(vm) # end # ~~~ # - class GetBlockParam + class GetBlockParam < Instruction attr_reader :index, :level def initialize(index, level) @@ -1398,20 +1501,21 @@ def to_a(iseq) [:getblockparam, current.local_table.offset(index), level] end - def length - 3 + def deconstruct_keys(_keys) + { index: index, level: level } end - def pops - 0 + def ==(other) + other.is_a?(GetBlockParam) && other.index == index && + other.level == level end - def pushes - 1 + def length + 3 end - def canonical - self + def pushes + 1 end def call(vm) @@ -1434,7 +1538,7 @@ def call(vm) # end # ~~~ # - class GetBlockParamProxy + class GetBlockParamProxy < Instruction attr_reader :index, :level def initialize(index, level) @@ -1455,20 +1559,21 @@ def to_a(iseq) [:getblockparamproxy, current.local_table.offset(index), level] end - def length - 3 + def deconstruct_keys(_keys) + { index: index, level: level } end - def pops - 0 + def ==(other) + other.is_a?(GetBlockParamProxy) && other.index == index && + other.level == level end - def pushes - 1 + def length + 3 end - def canonical - self + def pushes + 1 end def call(vm) @@ -1488,7 +1593,7 @@ def call(vm) # @@class_variable # ~~~ # - class GetClassVariable + class GetClassVariable < Instruction attr_reader :name, :cache def initialize(name, cache) @@ -1507,20 +1612,21 @@ def to_a(_iseq) [:getclassvariable, name, cache] end - def length - 3 + def deconstruct_keys(_keys) + { name: name, cache: cache } end - def pops - 0 + def ==(other) + other.is_a?(GetClassVariable) && other.name == name && + other.cache == cache end - def pushes - 1 + def length + 3 end - def canonical - self + def pushes + 1 end def call(vm) @@ -1542,7 +1648,7 @@ def call(vm) # Constant # ~~~ # - class GetConstant + class GetConstant < Instruction attr_reader :name def initialize(name) @@ -1557,6 +1663,14 @@ def to_a(_iseq) [:getconstant, name] end + def deconstruct_keys(_keys) + { name: name } + end + + def ==(other) + other.is_a?(GetConstant) && other.name == name + end + def length 2 end @@ -1569,10 +1683,6 @@ def pushes 1 end - def canonical - self - end - def call(vm) const_base, allow_nil = vm.pop(2) @@ -1604,7 +1714,7 @@ def call(vm) # $$ # ~~~ # - class GetGlobal + class GetGlobal < Instruction attr_reader :name def initialize(name) @@ -1619,20 +1729,20 @@ def to_a(_iseq) [:getglobal, name] end - def length - 2 + def deconstruct_keys(_keys) + { name: name } end - def pops - 0 + def ==(other) + other.is_a?(GetGlobal) && other.name == name end - def pushes - 1 + def length + 2 end - def canonical - self + def pushes + 1 end def call(vm) @@ -1659,7 +1769,7 @@ def call(vm) # @instance_variable # ~~~ # - class GetInstanceVariable + class GetInstanceVariable < Instruction attr_reader :name, :cache def initialize(name, cache) @@ -1678,20 +1788,21 @@ def to_a(_iseq) [:getinstancevariable, name, cache] end - def length - 3 + def deconstruct_keys(_keys) + { name: name, cache: cache } end - def pops - 0 + def ==(other) + other.is_a?(GetInstanceVariable) && other.name == name && + other.cache == cache end - def pushes - 1 + def length + 3 end - def canonical - self + def pushes + 1 end def call(vm) @@ -1714,7 +1825,7 @@ def call(vm) # tap { tap { value } } # ~~~ # - class GetLocal + class GetLocal < Instruction attr_reader :index, :level def initialize(index, level) @@ -1732,20 +1843,20 @@ def to_a(iseq) [:getlocal, current.local_table.offset(index), level] end - def length - 3 + def deconstruct_keys(_keys) + { index: index, level: level } end - def pops - 0 + def ==(other) + other.is_a?(GetLocal) && other.index == index && other.level == level end - def pushes - 1 + def length + 3 end - def canonical - self + def pushes + 1 end def call(vm) @@ -1766,7 +1877,7 @@ def call(vm) # value # ~~~ # - class GetLocalWC0 + class GetLocalWC0 < Instruction attr_reader :index def initialize(index) @@ -1781,12 +1892,16 @@ def to_a(iseq) [:getlocal_WC_0, iseq.local_table.offset(index)] end - def length - 2 + def deconstruct_keys(_keys) + { index: index } end - def pops - 0 + def ==(other) + other.is_a?(GetLocalWC0) && other.index == index + end + + def length + 2 end def pushes @@ -1815,7 +1930,7 @@ def call(vm) # self.then { value } # ~~~ # - class GetLocalWC1 + class GetLocalWC1 < Instruction attr_reader :index def initialize(index) @@ -1830,12 +1945,16 @@ def to_a(iseq) [:getlocal_WC_1, iseq.parent_iseq.local_table.offset(index)] end - def length - 2 + def deconstruct_keys(_keys) + { index: index } end - def pops - 0 + def ==(other) + other.is_a?(GetLocalWC1) && other.index == index + end + + def length + 2 end def pushes @@ -1861,7 +1980,7 @@ def call(vm) # 1 if (a == 1) .. (b == 2) # ~~~ # - class GetSpecial + class GetSpecial < Instruction SVAR_LASTLINE = 0 # $_ SVAR_BACKREF = 1 # $~ SVAR_FLIPFLOP_START = 2 # flipflop @@ -1881,20 +2000,20 @@ def to_a(_iseq) [:getspecial, key, type] end - def length - 3 + def deconstruct_keys(_keys) + { key: key, type: type } end - def pops - 0 + def ==(other) + other.is_a?(GetSpecial) && other.key == key && other.type == type end - def pushes - 1 + def length + 3 end - def canonical - self + def pushes + 1 end def call(vm) @@ -1920,7 +2039,7 @@ def call(vm) # :"#{"foo"}" # ~~~ # - class Intern + class Intern < Instruction def disasm(fmt) fmt.instruction("intern") end @@ -1929,8 +2048,12 @@ def to_a(_iseq) [:intern] end - def length - 1 + def deconstruct_keys(_keys) + {} + end + + def ==(other) + other.is_a?(Intern) end def pops @@ -1941,10 +2064,6 @@ def pushes 1 end - def canonical - self - end - def call(vm) vm.push(vm.pop.to_sym) end @@ -1964,7 +2083,7 @@ def call(vm) # end # ~~~ # - class InvokeBlock + class InvokeBlock < Instruction attr_reader :calldata def initialize(calldata) @@ -1979,6 +2098,14 @@ def to_a(_iseq) [:invokeblock, calldata.to_h] end + def deconstruct_keys(_keys) + { calldata: calldata } + end + + def ==(other) + other.is_a?(InvokeBlock) && other.calldata == calldata + end + def length 2 end @@ -1991,10 +2118,6 @@ def pushes 1 end - def canonical - self - end - def call(vm) vm.push(vm.frame_yield.block.call(*vm.pop(calldata.argc))) end @@ -2014,7 +2137,7 @@ def call(vm) # end # ~~~ # - class InvokeSuper + class InvokeSuper < Instruction attr_reader :calldata, :block_iseq def initialize(calldata, block_iseq) @@ -2034,8 +2157,13 @@ def to_a(_iseq) [:invokesuper, calldata.to_h, block_iseq&.to_a] end - def length - 1 + def deconstruct_keys(_keys) + { calldata: calldata, block_iseq: block_iseq } + end + + def ==(other) + other.is_a?(InvokeSuper) && other.calldata == calldata && + other.block_iseq == block_iseq end def pops @@ -2047,10 +2175,6 @@ def pushes 1 end - def canonical - self - end - def call(vm) block = if (iseq = block_iseq) @@ -2090,7 +2214,7 @@ def call(vm) # end # ~~~ # - class Jump + class Jump < Instruction attr_reader :label def initialize(label) @@ -2105,25 +2229,25 @@ def to_a(_iseq) [:jump, label.name] end - def length - 2 - end - - def pops - 0 + def deconstruct_keys(_keys) + { label: label } end - def pushes - 0 + def ==(other) + other.is_a?(Jump) && other.label == label end - def canonical - self + def length + 2 end def call(vm) vm.jump(label) end + + def branch_targets + [label] + end end # ### Summary @@ -2136,7 +2260,7 @@ def call(vm) # ;; # ~~~ # - class Leave + class Leave < Instruction def disasm(fmt) fmt.instruction("leave") end @@ -2145,8 +2269,12 @@ def to_a(_iseq) [:leave] end - def length - 1 + def deconstruct_keys(_keys) + {} + end + + def ==(other) + other.is_a?(Leave) end def pops @@ -2159,13 +2287,13 @@ def pushes 0 end - def canonical - self - end - def call(vm) vm.leave end + + def leaves? + true + end end # ### Summary @@ -2180,7 +2308,7 @@ def call(vm) # ["string"] # ~~~ # - class NewArray + class NewArray < Instruction attr_reader :number def initialize(number) @@ -2195,6 +2323,14 @@ def to_a(_iseq) [:newarray, number] end + def deconstruct_keys(_keys) + { number: number } + end + + def ==(other) + other.is_a?(NewArray) && other.number == number + end + def length 2 end @@ -2207,10 +2343,6 @@ def pushes 1 end - def canonical - self - end - def call(vm) vm.push(vm.pop(number)) end @@ -2228,7 +2360,7 @@ def call(vm) # ["string", **{ foo: "bar" }] # ~~~ # - class NewArrayKwSplat + class NewArrayKwSplat < Instruction attr_reader :number def initialize(number) @@ -2243,6 +2375,14 @@ def to_a(_iseq) [:newarraykwsplat, number] end + def deconstruct_keys(_keys) + { number: number } + end + + def ==(other) + other.is_a?(NewArrayKwSplat) && other.number == number + end + def length 2 end @@ -2255,10 +2395,6 @@ def pushes 1 end - def canonical - self - end - def call(vm) vm.push(vm.pop(number)) end @@ -2278,7 +2414,7 @@ def call(vm) # end # ~~~ # - class NewHash + class NewHash < Instruction attr_reader :number def initialize(number) @@ -2293,6 +2429,14 @@ def to_a(_iseq) [:newhash, number] end + def deconstruct_keys(_keys) + { number: number } + end + + def ==(other) + other.is_a?(NewHash) && other.number == number + end + def length 2 end @@ -2305,10 +2449,6 @@ def pushes 1 end - def canonical - self - end - def call(vm) vm.push(vm.pop(number).each_slice(2).to_h) end @@ -2329,7 +2469,7 @@ def call(vm) # p (x..y), (x...y) # ~~~ # - class NewRange + class NewRange < Instruction attr_reader :exclude_end def initialize(exclude_end) @@ -2344,6 +2484,14 @@ def to_a(_iseq) [:newrange, exclude_end] end + def deconstruct_keys(_keys) + { exclude_end: exclude_end } + end + + def ==(other) + other.is_a?(NewRange) && other.exclude_end == exclude_end + end + def length 2 end @@ -2356,10 +2504,6 @@ def pushes 1 end - def canonical - self - end - def call(vm) vm.push(Range.new(*vm.pop(2), exclude_end == 1)) end @@ -2376,7 +2520,7 @@ def call(vm) # raise rescue true # ~~~ # - class Nop + class Nop < Instruction def disasm(fmt) fmt.instruction("nop") end @@ -2385,23 +2529,19 @@ def to_a(_iseq) [:nop] end - def length - 1 - end - - def pops - 0 + def deconstruct_keys(_keys) + {} end - def pushes - 0 + def ==(other) + other.is_a?(Nop) end - def canonical - self + def call(vm) end - def call(vm) + def side_effects? + false end end @@ -2419,7 +2559,7 @@ def call(vm) # "#{5}" # ~~~ # - class ObjToString + class ObjToString < Instruction attr_reader :calldata def initialize(calldata) @@ -2434,6 +2574,14 @@ def to_a(_iseq) [:objtostring, calldata.to_h] end + def deconstruct_keys(_keys) + { calldata: calldata } + end + + def ==(other) + other.is_a?(ObjToString) && other.calldata == calldata + end + def length 2 end @@ -2446,10 +2594,6 @@ def pushes 1 end - def canonical - self - end - def call(vm) vm.push(vm.pop.to_s) end @@ -2468,7 +2612,7 @@ def call(vm) # END { puts "END" } # ~~~ # - class Once + class Once < Instruction attr_reader :iseq, :cache def initialize(iseq, cache) @@ -2485,20 +2629,20 @@ def to_a(_iseq) [:once, iseq.to_a, cache] end - def length - 3 + def deconstruct_keys(_keys) + { iseq: iseq, cache: cache } end - def pops - 0 + def ==(other) + other.is_a?(Once) && other.iseq == iseq && other.cache == cache end - def pushes - 1 + def length + 3 end - def canonical - self + def pushes + 1 end def call(vm) @@ -2521,7 +2665,7 @@ def call(vm) # 2 & 3 # ~~~ # - class OptAnd + class OptAnd < Instruction attr_reader :calldata def initialize(calldata) @@ -2536,6 +2680,14 @@ def to_a(_iseq) [:opt_and, calldata.to_h] end + def deconstruct_keys(_keys) + { calldata: calldata } + end + + def ==(other) + other.is_a?(OptAnd) && other.calldata == calldata + end + def length 2 end @@ -2569,7 +2721,7 @@ def call(vm) # 7[2] # ~~~ # - class OptAref + class OptAref < Instruction attr_reader :calldata def initialize(calldata) @@ -2584,6 +2736,14 @@ def to_a(_iseq) [:opt_aref, calldata.to_h] end + def deconstruct_keys(_keys) + { calldata: calldata } + end + + def ==(other) + other.is_a?(OptAref) && other.calldata == calldata + end + def length 2 end @@ -2618,7 +2778,7 @@ def call(vm) # { 'test' => true }['test'] # ~~~ # - class OptArefWith + class OptArefWith < Instruction attr_reader :object, :calldata def initialize(object, calldata) @@ -2637,6 +2797,15 @@ def to_a(_iseq) [:opt_aref_with, object, calldata.to_h] end + def deconstruct_keys(_keys) + { object: object, calldata: calldata } + end + + def ==(other) + other.is_a?(OptArefWith) && other.object == object && + other.calldata == calldata + end + def length 3 end @@ -2649,10 +2818,6 @@ def pushes 1 end - def canonical - self - end - def call(vm) vm.push(vm.pop[object]) end @@ -2671,7 +2836,7 @@ def call(vm) # {}[:key] = value # ~~~ # - class OptAset + class OptAset < Instruction attr_reader :calldata def initialize(calldata) @@ -2686,6 +2851,14 @@ def to_a(_iseq) [:opt_aset, calldata.to_h] end + def deconstruct_keys(_keys) + { calldata: calldata } + end + + def ==(other) + other.is_a?(OptAset) && other.calldata == calldata + end + def length 2 end @@ -2719,7 +2892,7 @@ def call(vm) # {}["key"] = value # ~~~ # - class OptAsetWith + class OptAsetWith < Instruction attr_reader :object, :calldata def initialize(object, calldata) @@ -2738,6 +2911,15 @@ def to_a(_iseq) [:opt_aset_with, object, calldata.to_h] end + def deconstruct_keys(_keys) + { object: object, calldata: calldata } + end + + def ==(other) + other.is_a?(OptAsetWith) && other.object == object && + other.calldata == calldata + end + def length 3 end @@ -2750,10 +2932,6 @@ def pushes 1 end - def canonical - self - end - def call(vm) hash, value = vm.pop(2) vm.push(hash[object] = value) @@ -2783,7 +2961,7 @@ def call(vm) # end # ~~~ # - class OptCaseDispatch + class OptCaseDispatch < Instruction attr_reader :case_dispatch_hash, :else_label def initialize(case_dispatch_hash, else_label) @@ -2806,6 +2984,16 @@ def to_a(_iseq) ] end + def deconstruct_keys(_keys) + { case_dispatch_hash: case_dispatch_hash, else_label: else_label } + end + + def ==(other) + other.is_a?(OptCaseDispatch) && + other.case_dispatch_hash == case_dispatch_hash && + other.else_label == else_label + end + def length 3 end @@ -2814,16 +3002,16 @@ def pops 1 end - def pushes - 0 + def call(vm) + vm.jump(case_dispatch_hash.fetch(vm.pop, else_label)) end - def canonical - self + def branch_targets + case_dispatch_hash.values.push(else_label) end - def call(vm) - vm.jump(case_dispatch_hash.fetch(vm.pop, else_label)) + def falls_through? + true end end @@ -2840,7 +3028,7 @@ def call(vm) # 2 / 3 # ~~~ # - class OptDiv + class OptDiv < Instruction attr_reader :calldata def initialize(calldata) @@ -2855,6 +3043,14 @@ def to_a(_iseq) [:opt_div, calldata.to_h] end + def deconstruct_keys(_keys) + { calldata: calldata } + end + + def ==(other) + other.is_a?(OptDiv) && other.calldata == calldata + end + def length 2 end @@ -2888,7 +3084,7 @@ def call(vm) # "".empty? # ~~~ # - class OptEmptyP + class OptEmptyP < Instruction attr_reader :calldata def initialize(calldata) @@ -2903,6 +3099,14 @@ def to_a(_iseq) [:opt_empty_p, calldata.to_h] end + def deconstruct_keys(_keys) + { calldata: calldata } + end + + def ==(other) + other.is_a?(OptEmptyP) && other.calldata == calldata + end + def length 2 end @@ -2937,7 +3141,7 @@ def call(vm) # 2 == 2 # ~~~ # - class OptEq + class OptEq < Instruction attr_reader :calldata def initialize(calldata) @@ -2952,6 +3156,14 @@ def to_a(_iseq) [:opt_eq, calldata.to_h] end + def deconstruct_keys(_keys) + { calldata: calldata } + end + + def ==(other) + other.is_a?(OptEq) && other.calldata == calldata + end + def length 2 end @@ -2986,7 +3198,7 @@ def call(vm) # 4 >= 3 # ~~~ # - class OptGE + class OptGE < Instruction attr_reader :calldata def initialize(calldata) @@ -3001,6 +3213,14 @@ def to_a(_iseq) [:opt_ge, calldata.to_h] end + def deconstruct_keys(_keys) + { calldata: calldata } + end + + def ==(other) + other.is_a?(OptGE) && other.calldata == calldata + end + def length 2 end @@ -3034,7 +3254,7 @@ def call(vm) # ::Object # ~~~ # - class OptGetConstantPath + class OptGetConstantPath < Instruction attr_reader :names def initialize(names) @@ -3050,20 +3270,20 @@ def to_a(_iseq) [:opt_getconstant_path, names] end - def length - 2 + def deconstruct_keys(_keys) + { names: names } end - def pops - 0 + def ==(other) + other.is_a?(OptGetConstantPath) && other.names == names end - def pushes - 1 + def length + 2 end - def canonical - self + def pushes + 1 end def call(vm) @@ -3091,7 +3311,7 @@ def call(vm) # 4 > 3 # ~~~ # - class OptGT + class OptGT < Instruction attr_reader :calldata def initialize(calldata) @@ -3106,6 +3326,14 @@ def to_a(_iseq) [:opt_gt, calldata.to_h] end + def deconstruct_keys(_keys) + { calldata: calldata } + end + + def ==(other) + other.is_a?(OptGT) && other.calldata == calldata + end + def length 2 end @@ -3140,7 +3368,7 @@ def call(vm) # 3 <= 4 # ~~~ # - class OptLE + class OptLE < Instruction attr_reader :calldata def initialize(calldata) @@ -3155,6 +3383,14 @@ def to_a(_iseq) [:opt_le, calldata.to_h] end + def deconstruct_keys(_keys) + { calldata: calldata } + end + + def ==(other) + other.is_a?(OptLE) && other.calldata == calldata + end + def length 2 end @@ -3189,7 +3425,7 @@ def call(vm) # "".length # ~~~ # - class OptLength + class OptLength < Instruction attr_reader :calldata def initialize(calldata) @@ -3204,6 +3440,14 @@ def to_a(_iseq) [:opt_length, calldata.to_h] end + def deconstruct_keys(_keys) + { calldata: calldata } + end + + def ==(other) + other.is_a?(OptLength) && other.calldata == calldata + end + def length 2 end @@ -3238,7 +3482,7 @@ def call(vm) # 3 < 4 # ~~~ # - class OptLT + class OptLT < Instruction attr_reader :calldata def initialize(calldata) @@ -3253,6 +3497,14 @@ def to_a(_iseq) [:opt_lt, calldata.to_h] end + def deconstruct_keys(_keys) + { calldata: calldata } + end + + def ==(other) + other.is_a?(OptLT) && other.calldata == calldata + end + def length 2 end @@ -3287,7 +3539,7 @@ def call(vm) # "" << 2 # ~~~ # - class OptLTLT + class OptLTLT < Instruction attr_reader :calldata def initialize(calldata) @@ -3302,6 +3554,14 @@ def to_a(_iseq) [:opt_ltlt, calldata.to_h] end + def deconstruct_keys(_keys) + { calldata: calldata } + end + + def ==(other) + other.is_a?(OptLTLT) && other.calldata == calldata + end + def length 2 end @@ -3337,7 +3597,7 @@ def call(vm) # 3 - 2 # ~~~ # - class OptMinus + class OptMinus < Instruction attr_reader :calldata def initialize(calldata) @@ -3352,6 +3612,14 @@ def to_a(_iseq) [:opt_minus, calldata.to_h] end + def deconstruct_keys(_keys) + { calldata: calldata } + end + + def ==(other) + other.is_a?(OptMinus) && other.calldata == calldata + end + def length 2 end @@ -3386,7 +3654,7 @@ def call(vm) # 4 % 2 # ~~~ # - class OptMod + class OptMod < Instruction attr_reader :calldata def initialize(calldata) @@ -3401,6 +3669,14 @@ def to_a(_iseq) [:opt_mod, calldata.to_h] end + def deconstruct_keys(_keys) + { calldata: calldata } + end + + def ==(other) + other.is_a?(OptMod) && other.calldata == calldata + end + def length 2 end @@ -3435,7 +3711,7 @@ def call(vm) # 3 * 2 # ~~~ # - class OptMult + class OptMult < Instruction attr_reader :calldata def initialize(calldata) @@ -3450,6 +3726,14 @@ def to_a(_iseq) [:opt_mult, calldata.to_h] end + def deconstruct_keys(_keys) + { calldata: calldata } + end + + def ==(other) + other.is_a?(OptMult) && other.calldata == calldata + end + def length 2 end @@ -3486,7 +3770,7 @@ def call(vm) # 2 != 2 # ~~~ # - class OptNEq + class OptNEq < Instruction attr_reader :eq_calldata, :neq_calldata def initialize(eq_calldata, neq_calldata) @@ -3505,6 +3789,15 @@ def to_a(_iseq) [:opt_neq, eq_calldata.to_h, neq_calldata.to_h] end + def deconstruct_keys(_keys) + { eq_calldata: eq_calldata, neq_calldata: neq_calldata } + end + + def ==(other) + other.is_a?(OptNEq) && other.eq_calldata == eq_calldata && + other.neq_calldata == neq_calldata + end + def length 3 end @@ -3517,10 +3810,6 @@ def pushes 1 end - def canonical - self - end - def call(vm) receiver, argument = vm.pop(2) vm.push(receiver != argument) @@ -3529,9 +3818,10 @@ def call(vm) # ### Summary # - # `opt_newarray_max` is a specialization that occurs when the `max` method - # is called on an array literal. It pops the values of the array off the - # stack and pushes on the result. + # `opt_newarray_send` is a specialization that occurs when a dynamic array + # literal is created and immediately sent the `min`, `max`, or `hash` + # methods. It pops the values of the array off the stack and pushes on the + # result of the method call. # # ### Usage # @@ -3539,71 +3829,36 @@ def call(vm) # [a, b, c].max # ~~~ # - class OptNewArrayMax - attr_reader :number + class OptNewArraySend < Instruction + attr_reader :number, :method - def initialize(number) + def initialize(number, method) @number = number + @method = method end def disasm(fmt) - fmt.instruction("opt_newarray_max", [fmt.object(number)]) + fmt.instruction( + "opt_newarray_send", + [fmt.object(number), fmt.object(method)] + ) end def to_a(_iseq) - [:opt_newarray_max, number] - end - - def length - 2 - end - - def pops - number - end - - def pushes - 1 - end - - def canonical - self - end - - def call(vm) - vm.push(vm.pop(number).max) - end - end - - # ### Summary - # - # `opt_newarray_min` is a specialization that occurs when the `min` method - # is called on an array literal. It pops the values of the array off the - # stack and pushes on the result. - # - # ### Usage - # - # ~~~ruby - # [a, b, c].min - # ~~~ - # - class OptNewArrayMin - attr_reader :number - - def initialize(number) - @number = number + [:opt_newarray_send, number, method] end - def disasm(fmt) - fmt.instruction("opt_newarray_min", [fmt.object(number)]) + def deconstruct_keys(_keys) + { number: number, method: method } end - def to_a(_iseq) - [:opt_newarray_min, number] + def ==(other) + other.is_a?(OptNewArraySend) && other.number == number && + other.method == method end def length - 2 + 3 end def pops @@ -3614,12 +3869,8 @@ def pushes 1 end - def canonical - self - end - def call(vm) - vm.push(vm.pop(number).min) + vm.push(vm.pop(number).__send__(method)) end end @@ -3636,7 +3887,7 @@ def call(vm) # "".nil? # ~~~ # - class OptNilP + class OptNilP < Instruction attr_reader :calldata def initialize(calldata) @@ -3651,6 +3902,14 @@ def to_a(_iseq) [:opt_nil_p, calldata.to_h] end + def deconstruct_keys(_keys) + { calldata: calldata } + end + + def ==(other) + other.is_a?(OptNilP) && other.calldata == calldata + end + def length 2 end @@ -3683,7 +3942,7 @@ def call(vm) # !true # ~~~ # - class OptNot + class OptNot < Instruction attr_reader :calldata def initialize(calldata) @@ -3698,6 +3957,14 @@ def to_a(_iseq) [:opt_not, calldata.to_h] end + def deconstruct_keys(_keys) + { calldata: calldata } + end + + def ==(other) + other.is_a?(OptNot) && other.calldata == calldata + end + def length 2 end @@ -3732,7 +3999,7 @@ def call(vm) # 2 | 3 # ~~~ # - class OptOr + class OptOr < Instruction attr_reader :calldata def initialize(calldata) @@ -3747,6 +4014,14 @@ def to_a(_iseq) [:opt_or, calldata.to_h] end + def deconstruct_keys(_keys) + { calldata: calldata } + end + + def ==(other) + other.is_a?(OptOr) && other.calldata == calldata + end + def length 2 end @@ -3781,7 +4056,7 @@ def call(vm) # 2 + 3 # ~~~ # - class OptPlus + class OptPlus < Instruction attr_reader :calldata def initialize(calldata) @@ -3796,6 +4071,14 @@ def to_a(_iseq) [:opt_plus, calldata.to_h] end + def deconstruct_keys(_keys) + { calldata: calldata } + end + + def ==(other) + other.is_a?(OptPlus) && other.calldata == calldata + end + def length 2 end @@ -3829,7 +4112,7 @@ def call(vm) # /a/ =~ "a" # ~~~ # - class OptRegExpMatch2 + class OptRegExpMatch2 < Instruction attr_reader :calldata def initialize(calldata) @@ -3844,6 +4127,14 @@ def to_a(_iseq) [:opt_regexpmatch2, calldata.to_h] end + def deconstruct_keys(_keys) + { calldata: calldata } + end + + def ==(other) + other.is_a?(OptRegExpMatch2) && other.calldata == calldata + end + def length 2 end @@ -3877,7 +4168,7 @@ def call(vm) # puts "Hello, world!" # ~~~ # - class OptSendWithoutBlock + class OptSendWithoutBlock < Instruction attr_reader :calldata def initialize(calldata) @@ -3892,6 +4183,14 @@ def to_a(_iseq) [:opt_send_without_block, calldata.to_h] end + def deconstruct_keys(_keys) + { calldata: calldata } + end + + def ==(other) + other.is_a?(OptSendWithoutBlock) && other.calldata == calldata + end + def length 2 end @@ -3926,7 +4225,7 @@ def call(vm) # "".size # ~~~ # - class OptSize + class OptSize < Instruction attr_reader :calldata def initialize(calldata) @@ -3941,6 +4240,14 @@ def to_a(_iseq) [:opt_size, calldata.to_h] end + def deconstruct_keys(_keys) + { calldata: calldata } + end + + def ==(other) + other.is_a?(OptSize) && other.calldata == calldata + end + def length 2 end @@ -3974,7 +4281,7 @@ def call(vm) # "hello".freeze # ~~~ # - class OptStrFreeze + class OptStrFreeze < Instruction attr_reader :object, :calldata def initialize(object, calldata) @@ -3993,20 +4300,21 @@ def to_a(_iseq) [:opt_str_freeze, object, calldata.to_h] end - def length - 3 + def deconstruct_keys(_keys) + { object: object, calldata: calldata } end - def pops - 0 + def ==(other) + other.is_a?(OptStrFreeze) && other.object == object && + other.calldata == calldata end - def pushes - 1 + def length + 3 end - def canonical - self + def pushes + 1 end def call(vm) @@ -4026,7 +4334,7 @@ def call(vm) # -"string" # ~~~ # - class OptStrUMinus + class OptStrUMinus < Instruction attr_reader :object, :calldata def initialize(object, calldata) @@ -4045,20 +4353,21 @@ def to_a(_iseq) [:opt_str_uminus, object, calldata.to_h] end - def length - 3 + def deconstruct_keys(_keys) + { object: object, calldata: calldata } end - def pops - 0 + def ==(other) + other.is_a?(OptStrUMinus) && other.object == object && + other.calldata == calldata end - def pushes - 1 + def length + 3 end - def canonical - self + def pushes + 1 end def call(vm) @@ -4079,7 +4388,7 @@ def call(vm) # "".succ # ~~~ # - class OptSucc + class OptSucc < Instruction attr_reader :calldata def initialize(calldata) @@ -4094,6 +4403,14 @@ def to_a(_iseq) [:opt_succ, calldata.to_h] end + def deconstruct_keys(_keys) + { calldata: calldata } + end + + def ==(other) + other.is_a?(OptSucc) && other.calldata == calldata + end + def length 2 end @@ -4125,7 +4442,7 @@ def call(vm) # a ||= 2 # ~~~ # - class Pop + class Pop < Instruction def disasm(fmt) fmt.instruction("pop") end @@ -4134,25 +4451,25 @@ def to_a(_iseq) [:pop] end - def length - 1 + def deconstruct_keys(_keys) + {} end - def pops - 1 - end - - def pushes - 0 + def ==(other) + other.is_a?(Pop) end - def canonical - self + def pops + 1 end def call(vm) vm.pop end + + def side_effects? + false + end end # ### Summary @@ -4165,7 +4482,7 @@ def call(vm) # nil # ~~~ # - class PutNil + class PutNil < Instruction def disasm(fmt) fmt.instruction("putnil") end @@ -4174,12 +4491,12 @@ def to_a(_iseq) [:putnil] end - def length - 1 + def deconstruct_keys(_keys) + {} end - def pops - 0 + def ==(other) + other.is_a?(PutNil) end def pushes @@ -4193,6 +4510,10 @@ def canonical def call(vm) canonical.call(vm) end + + def side_effects? + false + end end # ### Summary @@ -4205,7 +4526,7 @@ def call(vm) # 5 # ~~~ # - class PutObject + class PutObject < Instruction attr_reader :object def initialize(object) @@ -4220,25 +4541,29 @@ def to_a(_iseq) [:putobject, object] end - def length - 2 + def deconstruct_keys(_keys) + { object: object } end - def pops - 0 + def ==(other) + other.is_a?(PutObject) && other.object == object end - def pushes - 1 + def length + 2 end - def canonical - self + def pushes + 1 end def call(vm) vm.push(object) end + + def side_effects? + false + end end # ### Summary @@ -4253,7 +4578,7 @@ def call(vm) # 0 # ~~~ # - class PutObjectInt2Fix0 + class PutObjectInt2Fix0 < Instruction def disasm(fmt) fmt.instruction("putobject_INT2FIX_0_") end @@ -4262,12 +4587,12 @@ def to_a(_iseq) [:putobject_INT2FIX_0_] end - def length - 1 + def deconstruct_keys(_keys) + {} end - def pops - 0 + def ==(other) + other.is_a?(PutObjectInt2Fix0) end def pushes @@ -4281,6 +4606,10 @@ def canonical def call(vm) canonical.call(vm) end + + def side_effects? + false + end end # ### Summary @@ -4295,7 +4624,7 @@ def call(vm) # 1 # ~~~ # - class PutObjectInt2Fix1 + class PutObjectInt2Fix1 < Instruction def disasm(fmt) fmt.instruction("putobject_INT2FIX_1_") end @@ -4304,12 +4633,12 @@ def to_a(_iseq) [:putobject_INT2FIX_1_] end - def length - 1 + def deconstruct_keys(_keys) + {} end - def pops - 0 + def ==(other) + other.is_a?(PutObjectInt2Fix1) end def pushes @@ -4323,6 +4652,10 @@ def canonical def call(vm) canonical.call(vm) end + + def side_effects? + false + end end # ### Summary @@ -4335,7 +4668,7 @@ def call(vm) # puts "Hello, world!" # ~~~ # - class PutSelf + class PutSelf < Instruction def disasm(fmt) fmt.instruction("putself") end @@ -4344,25 +4677,25 @@ def to_a(_iseq) [:putself] end - def length - 1 + def deconstruct_keys(_keys) + {} end - def pops - 0 + def ==(other) + other.is_a?(PutSelf) end def pushes 1 end - def canonical - self - end - def call(vm) vm.push(vm.frame._self) end + + def side_effects? + false + end end # ### Summary @@ -4377,7 +4710,7 @@ def call(vm) # alias foo bar # ~~~ # - class PutSpecialObject + class PutSpecialObject < Instruction OBJECT_VMCORE = 1 OBJECT_CBASE = 2 OBJECT_CONST_BASE = 3 @@ -4396,20 +4729,20 @@ def to_a(_iseq) [:putspecialobject, object] end - def length - 2 + def deconstruct_keys(_keys) + { object: object } end - def pops - 0 + def ==(other) + other.is_a?(PutSpecialObject) && other.object == object end - def pushes - 1 + def length + 2 end - def canonical - self + def pushes + 1 end def call(vm) @@ -4436,7 +4769,7 @@ def call(vm) # "foo" # ~~~ # - class PutString + class PutString < Instruction attr_reader :object def initialize(object) @@ -4451,20 +4784,20 @@ def to_a(_iseq) [:putstring, object] end - def length - 2 + def deconstruct_keys(_keys) + { object: object } end - def pops - 0 + def ==(other) + other.is_a?(PutString) && other.object == object end - def pushes - 1 + def length + 2 end - def canonical - self + def pushes + 1 end def call(vm) @@ -4485,7 +4818,7 @@ def call(vm) # "hello".tap { |i| p i } # ~~~ # - class Send + class Send < Instruction attr_reader :calldata, :block_iseq def initialize(calldata, block_iseq) @@ -4505,6 +4838,15 @@ def to_a(_iseq) [:send, calldata.to_h, block_iseq&.to_a] end + def deconstruct_keys(_keys) + { calldata: calldata, block_iseq: block_iseq } + end + + def ==(other) + other.is_a?(Send) && other.calldata == calldata && + other.block_iseq == block_iseq + end + def length 3 end @@ -4518,10 +4860,6 @@ def pushes 1 end - def canonical - self - end - def call(vm) block = if (iseq = block_iseq) @@ -4564,7 +4902,7 @@ def call(vm) # end # ~~~ # - class SetBlockParam + class SetBlockParam < Instruction attr_reader :index, :level def initialize(index, level) @@ -4582,20 +4920,21 @@ def to_a(iseq) [:setblockparam, current.local_table.offset(index), level] end - def length - 3 + def deconstruct_keys(_keys) + { index: index, level: level } end - def pops - 1 + def ==(other) + other.is_a?(SetBlockParam) && other.index == index && + other.level == level end - def pushes - 0 + def length + 3 end - def canonical - self + def pops + 1 end def call(vm) @@ -4616,7 +4955,7 @@ def call(vm) # @@class_variable = 1 # ~~~ # - class SetClassVariable + class SetClassVariable < Instruction attr_reader :name, :cache def initialize(name, cache) @@ -4635,20 +4974,21 @@ def to_a(_iseq) [:setclassvariable, name, cache] end - def length - 3 + def deconstruct_keys(_keys) + { name: name, cache: cache } end - def pops - 1 + def ==(other) + other.is_a?(SetClassVariable) && other.name == name && + other.cache == cache end - def pushes - 0 + def length + 3 end - def canonical - self + def pops + 1 end def call(vm) @@ -4669,7 +5009,7 @@ def call(vm) # Constant = 1 # ~~~ # - class SetConstant + class SetConstant < Instruction attr_reader :name def initialize(name) @@ -4684,20 +5024,20 @@ def to_a(_iseq) [:setconstant, name] end - def length - 2 + def deconstruct_keys(_keys) + { name: name } end - def pops - 2 + def ==(other) + other.is_a?(SetConstant) && other.name == name end - def pushes - 0 + def length + 2 end - def canonical - self + def pops + 2 end def call(vm) @@ -4717,7 +5057,7 @@ def call(vm) # $global = 5 # ~~~ # - class SetGlobal + class SetGlobal < Instruction attr_reader :name def initialize(name) @@ -4732,20 +5072,20 @@ def to_a(_iseq) [:setglobal, name] end - def length - 2 + def deconstruct_keys(_keys) + { name: name } end - def pops - 1 + def ==(other) + other.is_a?(SetGlobal) && other.name == name end - def pushes - 0 + def length + 2 end - def canonical - self + def pops + 1 end def call(vm) @@ -4771,7 +5111,7 @@ def call(vm) # @instance_variable = 1 # ~~~ # - class SetInstanceVariable + class SetInstanceVariable < Instruction attr_reader :name, :cache def initialize(name, cache) @@ -4790,20 +5130,21 @@ def to_a(_iseq) [:setinstancevariable, name, cache] end - def length - 3 + def deconstruct_keys(_keys) + { name: name, cache: cache } end - def pops - 1 + def ==(other) + other.is_a?(SetInstanceVariable) && other.name == name && + other.cache == cache end - def pushes - 0 + def length + 3 end - def canonical - self + def pops + 1 end def call(vm) @@ -4826,7 +5167,7 @@ def call(vm) # tap { tap { value = 10 } } # ~~~ # - class SetLocal + class SetLocal < Instruction attr_reader :index, :level def initialize(index, level) @@ -4844,20 +5185,20 @@ def to_a(iseq) [:setlocal, current.local_table.offset(index), level] end - def length - 3 + def deconstruct_keys(_keys) + { index: index, level: level } end - def pops - 1 + def ==(other) + other.is_a?(SetLocal) && other.index == index && other.level == level end - def pushes - 0 + def length + 3 end - def canonical - self + def pops + 1 end def call(vm) @@ -4878,7 +5219,7 @@ def call(vm) # value = 5 # ~~~ # - class SetLocalWC0 + class SetLocalWC0 < Instruction attr_reader :index def initialize(index) @@ -4893,6 +5234,14 @@ def to_a(iseq) [:setlocal_WC_0, iseq.local_table.offset(index)] end + def deconstruct_keys(_keys) + { index: index } + end + + def ==(other) + other.is_a?(SetLocalWC0) && other.index == index + end + def length 2 end @@ -4901,10 +5250,6 @@ def pops 1 end - def pushes - 0 - end - def canonical SetLocal.new(index, 0) end @@ -4927,7 +5272,7 @@ def call(vm) # self.then { value = 10 } # ~~~ # - class SetLocalWC1 + class SetLocalWC1 < Instruction attr_reader :index def initialize(index) @@ -4942,6 +5287,14 @@ def to_a(iseq) [:setlocal_WC_1, iseq.parent_iseq.local_table.offset(index)] end + def deconstruct_keys(_keys) + { index: index } + end + + def ==(other) + other.is_a?(SetLocalWC1) && other.index == index + end + def length 2 end @@ -4950,10 +5303,6 @@ def pops 1 end - def pushes - 0 - end - def canonical SetLocal.new(index, 1) end @@ -4974,7 +5323,7 @@ def call(vm) # {}[:key] = 'val' # ~~~ # - class SetN + class SetN < Instruction attr_reader :number def initialize(number) @@ -4989,6 +5338,14 @@ def to_a(_iseq) [:setn, number] end + def deconstruct_keys(_keys) + { number: number } + end + + def ==(other) + other.is_a?(SetN) && other.number == number + end + def length 2 end @@ -5001,10 +5358,6 @@ def pushes 1 end - def canonical - self - end - def call(vm) vm.stack[-number - 1] = vm.stack.last end @@ -5022,7 +5375,7 @@ def call(vm) # baz if (foo == 1) .. (bar == 1) # ~~~ # - class SetSpecial + class SetSpecial < Instruction attr_reader :key def initialize(key) @@ -5037,20 +5390,20 @@ def to_a(_iseq) [:setspecial, key] end - def length - 2 + def deconstruct_keys(_keys) + { key: key } end - def pops - 1 + def ==(other) + other.is_a?(SetSpecial) && other.key == key end - def pushes - 0 + def length + 2 end - def canonical - self + def pops + 1 end def call(vm) @@ -5077,7 +5430,7 @@ def call(vm) # x = *(5) # ~~~ # - class SplatArray + class SplatArray < Instruction attr_reader :flag def initialize(flag) @@ -5092,6 +5445,14 @@ def to_a(_iseq) [:splatarray, flag] end + def deconstruct_keys(_keys) + { flag: flag } + end + + def ==(other) + other.is_a?(SplatArray) && other.flag == flag + end + def length 2 end @@ -5104,10 +5465,6 @@ def pushes 1 end - def canonical - self - end - def call(vm) value = vm.pop @@ -5147,7 +5504,7 @@ def call(vm) # !!defined?([[]]) # ~~~ # - class Swap + class Swap < Instruction def disasm(fmt) fmt.instruction("swap") end @@ -5156,8 +5513,12 @@ def to_a(_iseq) [:swap] end - def length - 1 + def deconstruct_keys(_keys) + {} + end + + def ==(other) + other.is_a?(Swap) end def pops @@ -5168,10 +5529,6 @@ def pushes 2 end - def canonical - self - end - def call(vm) left, right = vm.pop(2) vm.push(right, left) @@ -5190,7 +5547,7 @@ def call(vm) # [1, 2, 3].map { break 2 } # ~~~ # - class Throw + class Throw < Instruction RUBY_TAG_NONE = 0x0 RUBY_TAG_RETURN = 0x1 RUBY_TAG_BREAK = 0x2 @@ -5218,6 +5575,14 @@ def to_a(_iseq) [:throw, type] end + def deconstruct_keys(_keys) + { type: type } + end + + def ==(other) + other.is_a?(Throw) && other.type == type + end + def length 2 end @@ -5230,10 +5595,6 @@ def pushes 1 end - def canonical - self - end - def call(vm) state = type & VM_THROW_STATE_MASK value = vm.pop @@ -5289,7 +5650,7 @@ def error_backtrace(vm) # end # ~~~ # - class TopN + class TopN < Instruction attr_reader :number def initialize(number) @@ -5304,20 +5665,20 @@ def to_a(_iseq) [:topn, number] end - def length - 2 + def deconstruct_keys(_keys) + { number: number } end - def pops - 0 + def ==(other) + other.is_a?(TopN) && other.number == number end - def pushes - 1 + def length + 2 end - def canonical - self + def pushes + 1 end def call(vm) @@ -5336,7 +5697,7 @@ def call(vm) # /foo #{bar}/ # ~~~ # - class ToRegExp + class ToRegExp < Instruction attr_reader :options, :length def initialize(options, length) @@ -5352,6 +5713,15 @@ def to_a(_iseq) [:toregexp, options, length] end + def deconstruct_keys(_keys) + { options: options, length: length } + end + + def ==(other) + other.is_a?(ToRegExp) && other.options == options && + other.length == length + end + def pops length end @@ -5360,10 +5730,6 @@ def pushes 1 end - def canonical - self - end - def call(vm) vm.push(Regexp.new(vm.pop(length).join, options)) end diff --git a/lib/syntax_tree/yarv/legacy.rb b/lib/syntax_tree/yarv/legacy.rb index b2e33290..8715993a 100644 --- a/lib/syntax_tree/yarv/legacy.rb +++ b/lib/syntax_tree/yarv/legacy.rb @@ -19,7 +19,7 @@ module Legacy # @@class_variable # ~~~ # - class GetClassVariable + class GetClassVariable < Instruction attr_reader :name def initialize(name) @@ -34,12 +34,16 @@ def to_a(_iseq) [:getclassvariable, name] end - def length - 2 + def deconstruct_keys(_keys) + { name: name } end - def pops - 0 + def ==(other) + other.is_a?(GetClassVariable) && other.name == name + end + + def length + 2 end def pushes @@ -71,7 +75,7 @@ def call(vm) # Constant # ~~~ # - class OptGetInlineCache + class OptGetInlineCache < Instruction attr_reader :label, :cache def initialize(label, cache) @@ -90,24 +94,137 @@ def to_a(_iseq) [:opt_getinlinecache, label.name, cache] end + def deconstruct_keys(_keys) + { label: label, cache: cache } + end + + def ==(other) + other.is_a?(OptGetInlineCache) && other.label == label && + other.cache == cache + end + def length 3 end + def pushes + 1 + end + + def call(vm) + vm.push(nil) + end + + def branch_targets + [label] + end + + def falls_through? + true + end + end + + # ### Summary + # + # `opt_newarray_max` is a specialization that occurs when the `max` method + # is called on an array literal. It pops the values of the array off the + # stack and pushes on the result. + # + # ### Usage + # + # ~~~ruby + # [a, b, c].max + # ~~~ + # + class OptNewArrayMax < Instruction + attr_reader :number + + def initialize(number) + @number = number + end + + def disasm(fmt) + fmt.instruction("opt_newarray_max", [fmt.object(number)]) + end + + def to_a(_iseq) + [:opt_newarray_max, number] + end + + def deconstruct_keys(_keys) + { number: number } + end + + def ==(other) + other.is_a?(OptNewArrayMax) && other.number == number + end + + def length + 2 + end + def pops - 0 + number end def pushes 1 end - def canonical - self + def call(vm) + vm.push(vm.pop(number).max) + end + end + + # ### Summary + # + # `opt_newarray_min` is a specialization that occurs when the `min` method + # is called on an array literal. It pops the values of the array off the + # stack and pushes on the result. + # + # ### Usage + # + # ~~~ruby + # [a, b, c].min + # ~~~ + # + class OptNewArrayMin < Instruction + attr_reader :number + + def initialize(number) + @number = number + end + + def disasm(fmt) + fmt.instruction("opt_newarray_min", [fmt.object(number)]) + end + + def to_a(_iseq) + [:opt_newarray_min, number] + end + + def deconstruct_keys(_keys) + { number: number } + end + + def ==(other) + other.is_a?(OptNewArrayMin) && other.number == number + end + + def length + 2 + end + + def pops + number + end + + def pushes + 1 end def call(vm) - vm.push(nil) + vm.push(vm.pop(number).min) end end @@ -126,7 +243,7 @@ def call(vm) # Constant # ~~~ # - class OptSetInlineCache + class OptSetInlineCache < Instruction attr_reader :cache def initialize(cache) @@ -141,6 +258,14 @@ def to_a(_iseq) [:opt_setinlinecache, cache] end + def deconstruct_keys(_keys) + { cache: cache } + end + + def ==(other) + other.is_a?(OptSetInlineCache) && other.cache == cache + end + def length 2 end @@ -153,10 +278,6 @@ def pushes 1 end - def canonical - self - end - def call(vm) end end @@ -175,7 +296,7 @@ def call(vm) # @@class_variable = 1 # ~~~ # - class SetClassVariable + class SetClassVariable < Instruction attr_reader :name def initialize(name) @@ -190,6 +311,14 @@ def to_a(_iseq) [:setclassvariable, name] end + def deconstruct_keys(_keys) + { name: name } + end + + def ==(other) + other.is_a?(SetClassVariable) && other.name == name + end + def length 2 end @@ -198,10 +327,6 @@ def pops 1 end - def pushes - 0 - end - def canonical YARV::SetClassVariable.new(name, nil) end diff --git a/lib/syntax_tree/yarv/sea_of_nodes.rb b/lib/syntax_tree/yarv/sea_of_nodes.rb new file mode 100644 index 00000000..33ef14f7 --- /dev/null +++ b/lib/syntax_tree/yarv/sea_of_nodes.rb @@ -0,0 +1,534 @@ +# frozen_string_literal: true + +module SyntaxTree + module YARV + # A sea of nodes is an intermediate representation used by a compiler to + # represent both control and data flow in the same graph. The way we use it + # allows us to have the vertices of the graph represent either an + # instruction in the instruction sequence or a synthesized node that we add + # to the graph. The edges of the graph represent either control flow or data + # flow. + class SeaOfNodes + # This object represents a node in the graph that holds a YARV + # instruction. + class InsnNode + attr_reader :inputs, :outputs, :insn, :offset + + def initialize(insn, offset) + @inputs = [] + @outputs = [] + + @insn = insn + @offset = offset + end + + def id + offset + end + + def label + "%04d %s" % [offset, insn.disasm(Disassembler::Squished.new)] + end + end + + # Phi nodes are used to represent the merging of data flow from multiple + # incoming blocks. + class PhiNode + attr_reader :inputs, :outputs, :id + + def initialize(id) + @inputs = [] + @outputs = [] + @id = id + end + + def label + "#{id} φ" + end + end + + # Merge nodes are present in any block that has multiple incoming blocks. + # It provides a place for Phi nodes to attach their results. + class MergeNode + attr_reader :inputs, :outputs, :id + + def initialize(id) + @inputs = [] + @outputs = [] + @id = id + end + + def label + "#{id} ψ" + end + end + + # The edge of a graph represents either control flow or data flow. + class Edge + TYPES = %i[data control info].freeze + + attr_reader :from + attr_reader :to + attr_reader :type + attr_reader :label + + def initialize(from, to, type, label) + raise unless TYPES.include?(type) + + @from = from + @to = to + @type = type + @label = label + end + end + + # A subgraph represents the local data and control flow of a single basic + # block. + class SubGraph + attr_reader :first_fixed, :last_fixed, :inputs, :outputs + + def initialize(first_fixed, last_fixed, inputs, outputs) + @first_fixed = first_fixed + @last_fixed = last_fixed + @inputs = inputs + @outputs = outputs + end + end + + # The compiler is responsible for taking a data flow graph and turning it + # into a sea of nodes. + class Compiler + attr_reader :dfg, :nodes + + def initialize(dfg) + @dfg = dfg + @nodes = [] + + # We need to put a unique ID on the synthetic nodes in the graph, so + # we keep a counter that we increment any time we create a new + # synthetic node. + @id_counter = 999 + end + + def compile + local_graphs = {} + dfg.blocks.each do |block| + local_graphs[block.id] = create_local_graph(block) + end + + connect_local_graphs_control(local_graphs) + connect_local_graphs_data(local_graphs) + cleanup_phi_nodes + cleanup_insn_nodes + + SeaOfNodes.new(dfg, nodes, local_graphs).tap(&:verify) + end + + private + + # Counter for synthetic nodes. + def id_counter + @id_counter += 1 + end + + # Create a sub-graph for a single basic block - block block argument + # inputs and outputs will be left dangling, to be connected later. + def create_local_graph(block) + block_flow = dfg.block_flows.fetch(block.id) + + # A map of instructions to nodes. + insn_nodes = {} + + # Create a node for each instruction in the block. + block.each_with_length do |insn, offset| + node = InsnNode.new(insn, offset) + insn_nodes[offset] = node + nodes << node + end + + # The first and last node in the sub-graph, and the last fixed node. + previous_fixed = nil + first_fixed = nil + last_fixed = nil + + # The merge node for the phi nodes to attach to. + merge_node = nil + + # If there is more than one predecessor and we have basic block + # arguments coming in, then we need a merge node for the phi nodes to + # attach to. + if block.incoming_blocks.size > 1 && !block_flow.in.empty? + merge_node = MergeNode.new(id_counter) + nodes << merge_node + + previous_fixed = merge_node + first_fixed = merge_node + last_fixed = merge_node + end + + # Connect local control flow (only nodes with side effects.) + block.each_with_length do |insn, length| + if insn.side_effects? + insn_node = insn_nodes[length] + connect previous_fixed, insn_node, :control if previous_fixed + previous_fixed = insn_node + first_fixed ||= insn_node + last_fixed = insn_node + end + end + + # Connect basic block arguments. + inputs = {} + outputs = {} + block_flow.in.each do |arg| + # Each basic block argument gets a phi node. Even if there's only + # one predecessor! We'll tidy this up later. + phi = PhiNode.new(id_counter) + connect(phi, merge_node, :info) if merge_node + nodes << phi + inputs[arg] = phi + + block.each_with_length do |_, consumer_offset| + consumer_flow = dfg.insn_flows[consumer_offset] + consumer_flow.in.each_with_index do |producer, input_index| + if producer == arg + connect(phi, insn_nodes[consumer_offset], :data, input_index) + end + end + end + + block_flow.out.each { |out| outputs[out] = phi if out == arg } + end + + # Connect local dataflow from consumers back to producers. + block.each_with_length do |_, consumer_offset| + consumer_flow = dfg.insn_flows.fetch(consumer_offset) + consumer_flow.in.each_with_index do |producer, input_index| + if producer.local? + connect( + insn_nodes[producer.length], + insn_nodes[consumer_offset], + :data, + input_index + ) + end + end + end + + # Connect dataflow from producers that leaves the block. + block.each_with_length do |_, producer_pc| + dfg + .insn_flows + .fetch(producer_pc) + .out + .each do |consumer| + unless consumer.local? + # This is an argument to the successor block - not to an + # instruction here. + outputs[consumer.name] = insn_nodes[producer_pc] + end + end + end + + # A graph with only side-effect free instructions will currently have + # no fixed nodes! In that case just use the first instruction's node + # for both first and last. But it's a bug that it'll appear in the + # control flow path! + SubGraph.new( + first_fixed || insn_nodes[block.block_start], + last_fixed || insn_nodes[block.block_start], + inputs, + outputs + ) + end + + # Connect control flow that flows between basic blocks. + def connect_local_graphs_control(local_graphs) + dfg.blocks.each do |predecessor| + predecessor_last = local_graphs[predecessor.id].last_fixed + predecessor.outgoing_blocks.each_with_index do |successor, index| + label = + if index > 0 && + index == (predecessor.outgoing_blocks.length - 1) + # If there are multiple outgoing blocks from this block, then + # the last one is a fallthrough. Otherwise it's a branch. + :fallthrough + else + :"branch#{index}" + end + + connect( + predecessor_last, + local_graphs[successor.id].first_fixed, + :control, + label + ) + end + end + end + + # Connect data flow that flows between basic blocks. + def connect_local_graphs_data(local_graphs) + dfg.blocks.each do |predecessor| + arg_outs = local_graphs[predecessor.id].outputs.values + arg_outs.each_with_index do |arg_out, arg_n| + predecessor.outgoing_blocks.each do |successor| + successor_graph = local_graphs[successor.id] + arg_in = successor_graph.inputs.values[arg_n] + + # We're connecting to a phi node, so we may need a special + # label. + raise unless arg_in.is_a?(PhiNode) + + label = + case arg_out + when InsnNode + # Instructions that go into a phi node are labelled by the + # offset of last instruction in the block that executed + # them. This way you know which value to use for the phi, + # based on the last instruction you executed. + dfg.blocks.find do |block| + block_start = block.block_start + block_end = + block_start + block.insns.sum(&:length) - + block.insns.last.length + + if (block_start..block_end).cover?(arg_out.offset) + break block_end + end + end + when PhiNode + # Phi nodes to phi nodes are not labelled. + else + raise + end + + connect(arg_out, arg_in, :data, label) + end + end + end + end + + # We don't always build things in an optimal way. Go back and fix up + # some mess we left. Ideally we wouldn't create these problems in the + # first place. + def cleanup_phi_nodes + nodes.dup.each do |node| # dup because we're mutating + next unless node.is_a?(PhiNode) + + if node.inputs.size == 1 + # Remove phi nodes with a single input. + connect_over(node) + remove(node) + elsif node.inputs.map(&:from).uniq.size == 1 + # Remove phi nodes where all inputs are the same. + producer_edge = node.inputs.first + consumer_edge = node.outputs.find { |e| !e.to.is_a?(MergeNode) } + connect( + producer_edge.from, + consumer_edge.to, + :data, + consumer_edge.label + ) + remove(node) + end + end + end + + # Eliminate as many unnecessary nodes as we can. + def cleanup_insn_nodes + nodes.dup.each do |node| + next unless node.is_a?(InsnNode) + + case node.insn + when AdjustStack + # If there are any inputs to the adjust stack that are immediately + # discarded, we can remove them from the input list. + number = node.insn.number + + node.inputs.dup.each do |input_edge| + next if input_edge.type != :data + + from = input_edge.from + next unless from.is_a?(InsnNode) + + if from.inputs.empty? && from.outputs.size == 1 + number -= 1 + remove(input_edge.from) + elsif from.insn.is_a?(Dup) + number -= 1 + connect_over(from) + remove(from) + + new_edge = node.inputs.last + new_edge.from.outputs.delete(new_edge) + node.inputs.delete(new_edge) + end + end + + if number == 0 + connect_over(node) + remove(node) + else + next_node = + if number == 1 + InsnNode.new(Pop.new, node.offset) + else + InsnNode.new(AdjustStack.new(number), node.offset) + end + + next_node.inputs.concat(node.inputs) + next_node.outputs.concat(node.outputs) + + # Dynamically finding the index of the node in the nodes array + # because we're mutating the array as we go. + nodes[nodes.index(node)] = next_node + end + when Jump + # When you have a jump instruction that only has one input and one + # output, you can just connect over top of it and remove it. + if node.inputs.size == 1 && node.outputs.size == 1 + connect_over(node) + remove(node) + end + when Pop + from = node.inputs.find { |edge| edge.type == :data }.from + next unless from.is_a?(InsnNode) + + removed = + if from.inputs.empty? && from.outputs.size == 1 + remove(from) + true + elsif from.insn.is_a?(Dup) + connect_over(from) + remove(from) + + new_edge = node.inputs.last + new_edge.from.outputs.delete(new_edge) + node.inputs.delete(new_edge) + true + else + false + end + + if removed + connect_over(node) + remove(node) + end + end + end + end + + # Connect one node to another. + def connect(from, to, type, label = nil) + raise if from == to + raise if !to.is_a?(PhiNode) && type == :data && label.nil? + + edge = Edge.new(from, to, type, label) + from.outputs << edge + to.inputs << edge + end + + # Connect all of the inputs to all of the outputs of a node. + def connect_over(node) + node.inputs.each do |producer_edge| + node.outputs.each do |consumer_edge| + connect( + producer_edge.from, + consumer_edge.to, + producer_edge.type, + producer_edge.label + ) + end + end + end + + # Remove a node from the graph. + def remove(node) + node.inputs.each do |producer_edge| + producer_edge.from.outputs.reject! { |edge| edge.to == node } + end + + node.outputs.each do |consumer_edge| + consumer_edge.to.inputs.reject! { |edge| edge.from == node } + end + + nodes.delete(node) + end + end + + attr_reader :dfg, :nodes, :local_graphs + + def initialize(dfg, nodes, local_graphs) + @dfg = dfg + @nodes = nodes + @local_graphs = local_graphs + end + + def to_mermaid + Mermaid.flowchart do |flowchart| + nodes.each do |node| + flowchart.node("node_#{node.id}", node.label, shape: :rounded) + end + + nodes.each do |producer| + producer.outputs.each do |consumer_edge| + label = + if !consumer_edge.label + # No label. + elsif consumer_edge.to.is_a?(PhiNode) + # Edges into phi nodes are labelled by the offset of the + # instruction going into the merge. + "%04d" % consumer_edge.label + else + consumer_edge.label.to_s + end + + flowchart.link( + flowchart.fetch("node_#{producer.id}"), + flowchart.fetch("node_#{consumer_edge.to.id}"), + label, + type: consumer_edge.type == :info ? :dotted : :directed, + color: { data: :green, control: :red }[consumer_edge.type] + ) + end + end + end + end + + def verify + # Verify edge labels. + nodes.each do |node| + # Not talking about phi nodes right now. + next if node.is_a?(PhiNode) + + if node.is_a?(InsnNode) && node.insn.branch_targets.any? && + !node.insn.is_a?(Leave) + # A branching node must have at least one branch edge and + # potentially a fallthrough edge coming out. + + labels = node.outputs.map(&:label).sort + raise if labels[0] != :branch0 + raise if labels[1] != :fallthrough && labels.size > 2 + else + labels = node.inputs.filter { |e| e.type == :data }.map(&:label) + next if labels.empty? + + # No nil labels + raise if labels.any?(&:nil?) + + # Labels should start at zero. + raise unless labels.min.zero? + + # Labels should be contiguous. + raise unless labels.sort == (labels.min..labels.max).to_a + end + end + end + + def self.compile(dfg) + Compiler.new(dfg).compile + end + end + end +end diff --git a/lib/syntax_tree/yarv/vm.rb b/lib/syntax_tree/yarv/vm.rb index 1bbb82ed..b303944d 100644 --- a/lib/syntax_tree/yarv/vm.rb +++ b/lib/syntax_tree/yarv/vm.rb @@ -219,6 +219,10 @@ def initialize(events = NullEvents.new) @frame = nil end + def self.run(iseq) + new.run_top_frame(iseq) + end + ########################################################################## # Helper methods for frames ########################################################################## diff --git a/spec/mspec b/spec/mspec deleted file mode 160000 index 4877d58d..00000000 --- a/spec/mspec +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 4877d58dff577641bc1ecd1bf3d3c3daa93b423f diff --git a/spec/ruby b/spec/ruby deleted file mode 160000 index 71873ae4..00000000 --- a/spec/ruby +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 71873ae4421f5b551a5af0f3427e901414736835 diff --git a/tasks/sorbet.rake b/tasks/sorbet.rake new file mode 100644 index 00000000..05f48874 --- /dev/null +++ b/tasks/sorbet.rake @@ -0,0 +1,373 @@ +# frozen_string_literal: true + +module SyntaxTree + class RBI + include DSL + + attr_reader :body, :line + + def initialize + @body = [] + @line = 1 + end + + def generate + require "syntax_tree/reflection" + + body << Comment("# typed: strict", false, location) + @line += 2 + + generate_parent + Reflection.nodes.sort.each { |(_, node)| generate_node(node) } + + body << ClassDeclaration( + ConstPathRef(VarRef(Const("SyntaxTree")), Const("BasicVisitor")), + nil, + BodyStmt( + Statements(generate_visitor("overridable")), + nil, + nil, + nil, + nil + ), + location + ) + + body << ClassDeclaration( + ConstPathRef(VarRef(Const("SyntaxTree")), Const("Visitor")), + ConstPathRef(VarRef(Const("SyntaxTree")), Const("BasicVisitor")), + BodyStmt(Statements(generate_visitor("override")), nil, nil, nil, nil), + location + ) + + Formatter.format(nil, Program(Statements(body))) + end + + private + + def generate_comments(comment) + comment + .lines(chomp: true) + .map { |line| Comment("# #{line}", false, location).tap { @line += 1 } } + end + + def generate_parent + attribute = Reflection.nodes[:Program].attributes[:location] + class_location = location + + node_body = generate_comments(attribute.comment) + node_body << sig_block { sig_returns { sig_type_for(attribute.type) } } + @line += 1 + + node_body << Command( + Ident("attr_reader"), + Args([SymbolLiteral(Ident("location"))]), + nil, + location + ) + @line += 1 + + body << ClassDeclaration( + ConstPathRef(VarRef(Const("SyntaxTree")), Const("Node")), + nil, + BodyStmt(Statements(node_body), nil, nil, nil, nil), + class_location + ) + @line += 2 + end + + def generate_node(node) + body.concat(generate_comments(node.comment)) + class_location = location + @line += 2 + + body << ClassDeclaration( + ConstPathRef(VarRef(Const("SyntaxTree")), Const(node.name.to_s)), + ConstPathRef(VarRef(Const("SyntaxTree")), Const("Node")), + BodyStmt(Statements(generate_node_body(node)), nil, nil, nil, nil), + class_location + ) + + @line += 2 + end + + def generate_node_body(node) + node_body = [] + node.attributes.sort.each do |(name, attribute)| + next if name == :location + + node_body.concat(generate_comments(attribute.comment)) + node_body << sig_block { sig_returns { sig_type_for(attribute.type) } } + @line += 1 + + node_body << Command( + Ident("attr_reader"), + Args([SymbolLiteral(Ident(attribute.name.to_s))]), + nil, + location + ) + @line += 2 + end + + node_body.concat(generate_initialize(node)) + + node_body << sig_block do + CallNode( + sig_params do + BareAssocHash( + [Assoc(Label("visitor:"), sig_type_for(BasicVisitor))] + ) + end, + Period("."), + Ident("returns"), + ArgParen( + Args( + [CallNode(VarRef(Const("T")), Period("."), Ident("untyped"), nil)] + ) + ) + ) + end + @line += 1 + + node_body << generate_def_node( + "accept", + Paren( + LParen("("), + Params.new(requireds: [Ident("visitor")], location: location) + ) + ) + @line += 2 + + node_body << generate_child_nodes + @line += 1 + + node_body << generate_def_node("child_nodes", nil) + @line += 2 + + node_body << sig_block do + CallNode( + sig_params do + BareAssocHash( + [ + Assoc( + Label("other:"), + CallNode( + VarRef(Const("T")), + Period("."), + Ident("untyped"), + nil + ) + ) + ] + ) + end, + Period("."), + sig_returns { ConstPathRef(VarRef(Const("T")), Const("Boolean")) }, + nil + ) + end + @line += 1 + + node_body << generate_def_node( + "==", + Paren( + LParen("("), + Params.new(location: location, requireds: [Ident("other")]) + ) + ) + @line += 2 + + node_body + end + + def generate_initialize(node) + parameters = + SyntaxTree.const_get(node.name).instance_method(:initialize).parameters + + assocs = + parameters.map do |(_, name)| + Assoc(Label("#{name}:"), sig_type_for(node.attributes[name].type)) + end + + node_body = [] + node_body << sig_block do + CallNode( + sig_params { BareAssocHash(assocs) }, + Period("."), + Ident("void"), + nil + ) + end + @line += 1 + + params = Params.new(location: location) + parameters.each do |(type, name)| + case type + when :req + params.requireds << Ident(name.to_s) + when :keyreq + params.keywords << [Label("#{name}:"), nil] + when :key + params.keywords << [ + Label("#{name}:"), + CallNode( + VarRef(Const("T")), + Period("."), + Ident("unsafe"), + ArgParen(Args([VarRef(Kw("nil"))])) + ) + ] + else + raise + end + end + + node_body << generate_def_node("initialize", Paren(LParen("("), params)) + @line += 2 + + node_body + end + + def generate_child_nodes + type = + Reflection::Type::ArrayType.new( + Reflection::Type::UnionType.new([NilClass, Node]) + ) + + sig_block { sig_returns { sig_type_for(type) } } + end + + def generate_def_node(name, params) + DefNode( + nil, + nil, + Ident(name), + params, + BodyStmt(Statements([VoidStmt()]), nil, nil, nil, nil), + location + ) + end + + def generate_visitor(override) + body = [] + + Reflection.nodes.each do |name, node| + body << sig_block do + CallNode( + CallNode( + Ident(override), + Period("."), + sig_params do + BareAssocHash( + [ + Assoc( + Label("node:"), + sig_type_for(SyntaxTree.const_get(name)) + ) + ] + ) + end, + nil + ), + Period("."), + sig_returns do + CallNode(VarRef(Const("T")), Period("."), Ident("untyped"), nil) + end, + nil + ) + end + + body << generate_def_node( + node.visitor_method, + Paren( + LParen("("), + Params.new(requireds: [Ident("node")], location: location) + ) + ) + + @line += 2 + end + + body + end + + def sig_block + MethodAddBlock( + CallNode(nil, nil, Ident("sig"), nil), + BlockNode( + LBrace("{"), + nil, + BodyStmt(Statements([yield]), nil, nil, nil, nil) + ), + location + ) + end + + def sig_params + CallNode(nil, nil, Ident("params"), ArgParen(Args([yield]))) + end + + def sig_returns + CallNode(nil, nil, Ident("returns"), ArgParen(Args([yield]))) + end + + def sig_type_for(type) + case type + when Reflection::Type::ArrayType + ARef( + ConstPathRef(VarRef(Const("T")), Const("Array")), + sig_type_for(type.type) + ) + when Reflection::Type::TupleType + ArrayLiteral(LBracket("["), Args(type.types.map { sig_type_for(_1) })) + when Reflection::Type::UnionType + if type.types.include?(NilClass) + selected = type.types.reject { _1 == NilClass } + subtype = + if selected.size == 1 + selected.first + else + Reflection::Type::UnionType.new(selected) + end + + CallNode( + VarRef(Const("T")), + Period("."), + Ident("nilable"), + ArgParen(Args([sig_type_for(subtype)])) + ) + else + CallNode( + VarRef(Const("T")), + Period("."), + Ident("any"), + ArgParen(Args(type.types.map { sig_type_for(_1) })) + ) + end + when Symbol + ConstRef(Const("Symbol")) + else + *parents, constant = type.name.split("::").map { Const(_1) } + + if parents.empty? + ConstRef(constant) + else + [*parents[1..], constant].inject( + VarRef(parents.first) + ) { |accum, const| ConstPathRef(accum, const) } + end + end + end + + def location + Location.fixed(line: line, char: 0, column: 0) + end + end +end + +namespace :sorbet do + desc "Generate RBI files for Sorbet" + task :rbi do + puts SyntaxTree::RBI.new.generate + end +end diff --git a/tasks/whitequark.rake b/tasks/whitequark.rake new file mode 100644 index 00000000..4f7ee650 --- /dev/null +++ b/tasks/whitequark.rake @@ -0,0 +1,87 @@ +# frozen_string_literal: true + +# This file's purpose is to extract the examples from the whitequark/parser +# gem and generate a test file that we can use to ensure that our parser +# generates equivalent syntax trees when translating. To do this, it runs the +# parser's test suite but overrides the `assert_parses` method to collect the +# examples into a hash. Then, it writes out the hash to a file that we can use +# to generate our own tests. +# +# To run the test suite, it's important to note that we have to mirror both any +# APIs provided to the test suite (for example the ParseHelper module below). +# This is obviously relatively brittle, but it's effective for now. + +require "ast" + +module ParseHelper + # This object is going to collect all of the examples from the parser gem into + # a hash that we can use to generate our own tests. + COLLECTED = Hash.new { |hash, key| hash[key] = [] } + + include AST::Sexp + ALL_VERSIONS = %w[3.1 3.2] + + private + + def assert_context(*) + end + + def assert_diagnoses(*) + end + + def assert_diagnoses_many(*) + end + + def refute_diagnoses(*) + end + + def with_versions(*) + end + + def assert_parses(_ast, code, _source_maps = "", versions = ALL_VERSIONS) + # We're going to skip any examples that are for older Ruby versions + # that we do not support. + return if (versions & %w[3.1 3.2]).empty? + + entry = caller.find { _1.include?("test_parser.rb") } + _, lineno, name = *entry.match(/(\d+):in `(.+)'/) + + COLLECTED["#{name}:#{lineno}"] << code + end +end + +namespace :extract do + desc "Extract the whitequark/parser tests" + task :whitequark do + directory = File.expand_path("../tmp/parser", __dir__) + unless File.directory?(directory) + sh "git clone --depth 1 https://github.com/whitequark/parser #{directory}" + end + + mkdir_p "#{directory}/extract" + touch "#{directory}/extract/helper.rb" + touch "#{directory}/extract/parse_helper.rb" + touch "#{directory}/extract/extracted.txt" + $:.unshift "#{directory}/extract" + + require "parser/current" + require "minitest/autorun" + require_relative "#{directory}/test/test_parser" + + Minitest.after_run do + filepath = File.expand_path("../test/translation/parser.txt", __dir__) + + File.open(filepath, "w") do |file| + ParseHelper::COLLECTED.sort.each do |(key, codes)| + if codes.length == 1 + file.puts("!!! #{key}\n#{codes.first}") + else + codes.each_with_index do |code, index| + file.puts("!!! #{key}:#{index}\n#{code}") + end + end + end + end + end + end +end diff --git a/test/compiler_test.rb b/test/compiler_test.rb index 1922f8c6..ca3e8dde 100644 --- a/test/compiler_test.rb +++ b/test/compiler_test.rb @@ -311,6 +311,12 @@ class CompilerTest < Minitest::Test "[1, 2, 3].min", "[foo, bar, baz].min", "[foo, bar, baz].min(1)", + "[1, 2, 3].hash", + "[foo, bar, baz].hash", + "[foo, bar, baz].hash(1)", + "[1, 2, 3].foo", + "[foo, bar, baz].foo", + "[foo, bar, baz].foo(1)", "[**{ x: true }][0][:x]", # Core method calls "alias foo bar", diff --git a/test/fixtures/array_literal.rb b/test/fixtures/array_literal.rb index df807728..391d2eae 100644 --- a/test/fixtures/array_literal.rb +++ b/test/fixtures/array_literal.rb @@ -24,9 +24,16 @@ - fooooooooooooooooo = 1 [ - fooooooooooooooooo, fooooooooooooooooo, fooooooooooooooooo, - fooooooooooooooooo, fooooooooooooooooo, fooooooooooooooooo, - fooooooooooooooooo, fooooooooooooooooo, fooooooooooooooooo, fooooooooooooooooo + fooooooooooooooooo, + fooooooooooooooooo, + fooooooooooooooooo, + fooooooooooooooooo, + fooooooooooooooooo, + fooooooooooooooooo, + fooooooooooooooooo, + fooooooooooooooooo, + fooooooooooooooooo, + fooooooooooooooooo ] % [ diff --git a/test/fixtures/binary.rb b/test/fixtures/binary.rb index f8833cdc..4cb56cbf 100644 --- a/test/fixtures/binary.rb +++ b/test/fixtures/binary.rb @@ -3,6 +3,11 @@ % foo << bar % +foo << barrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrr << barrrrrrrrrrrrr << barrrrrrrrrrrrrrrrrr +- +foo << barrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrr << barrrrrrrrrrrrr << + barrrrrrrrrrrrrrrrrr +% foo**bar % foo * barrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrr diff --git a/test/fixtures/break.rb b/test/fixtures/break.rb index a77c6b35..519becda 100644 --- a/test/fixtures/break.rb +++ b/test/fixtures/break.rb @@ -27,3 +27,11 @@ ) % break foo.bar :baz do |qux| qux end +- +break( + foo.bar :baz do |qux| + qux + end +) +% +break :foo => "bar" diff --git a/test/fixtures/call.rb b/test/fixtures/call.rb index c41ee4ac..eec717f0 100644 --- a/test/fixtures/call.rb +++ b/test/fixtures/call.rb @@ -60,3 +60,15 @@ % a b do end.c d +% +self. +=begin +=end + to_s +% +fooooooooooooooooooooooooooooooooooo.barrrrrrrrrrrrrrrrrrrrrrrrrrrrrr.where.not(:id).order(:id) +- +fooooooooooooooooooooooooooooooooooo + .barrrrrrrrrrrrrrrrrrrrrrrrrrrrrr + .where.not(:id) + .order(:id) diff --git a/test/fixtures/def.rb b/test/fixtures/def.rb index a827adfe..0cc49e0a 100644 --- a/test/fixtures/def.rb +++ b/test/fixtures/def.rb @@ -23,3 +23,9 @@ def foo() # comment def foo( # comment ) end +% +def +=begin +=end +a +end diff --git a/test/fixtures/def_endless.rb b/test/fixtures/def_endless.rb index 4595fba9..8d1f9d33 100644 --- a/test/fixtures/def_endless.rb +++ b/test/fixtures/def_endless.rb @@ -22,3 +22,13 @@ def self.foo = bar baz end def foo? = true +% +def a() +=begin +=end +=1 +- +def a() = +=begin +=end + 1 diff --git a/test/fixtures/hash.rb b/test/fixtures/hash.rb index 9c43a4fe..70e89f69 100644 --- a/test/fixtures/hash.rb +++ b/test/fixtures/hash.rb @@ -29,3 +29,5 @@ { # comment } +% # >= 3.1.0 +{ foo:, "bar" => "baz" } diff --git a/test/fixtures/lambda.rb b/test/fixtures/lambda.rb index 5dba3be3..8b922ef0 100644 --- a/test/fixtures/lambda.rb +++ b/test/fixtures/lambda.rb @@ -80,3 +80,31 @@ -> do # comment1 # comment2 end +% # multiline lambda in a command +command "arg" do + -> { + multi + line + } +end +- +command "arg" do + -> do + multi + line + end +end +% # multiline lambda in a command call +command.call "arg" do + -> { + multi + line + } +end +- +command.call "arg" do + -> do + multi + line + end +end diff --git a/test/fixtures/next.rb b/test/fixtures/next.rb index be667951..66e90028 100644 --- a/test/fixtures/next.rb +++ b/test/fixtures/next.rb @@ -65,3 +65,12 @@ next([1, 2]) - next 1, 2 +% +next fun foo do end +- +next( + fun foo do + end +) +% +next :foo => "bar" diff --git a/test/fixtures/return.rb b/test/fixtures/return.rb index 8f7d0aa3..7092464f 100644 --- a/test/fixtures/return.rb +++ b/test/fixtures/return.rb @@ -37,3 +37,5 @@ return [] % return [1] +% +return :foo => "bar" diff --git a/test/fixtures/symbols.rb b/test/fixtures/symbols.rb index 5e2673f3..12f0a22f 100644 --- a/test/fixtures/symbols.rb +++ b/test/fixtures/symbols.rb @@ -19,3 +19,8 @@ %I[foo] # comment % %I{foo[]} +% +:\ +=begin +=end +symbol diff --git a/test/formatting_test.rb b/test/formatting_test.rb index 37ca29e1..5e5f9e9f 100644 --- a/test/formatting_test.rb +++ b/test/formatting_test.rb @@ -7,6 +7,7 @@ class FormattingTest < Minitest::Test Fixtures.each_fixture do |fixture| define_method(:"test_formatted_#{fixture.name}") do assert_equal(fixture.formatted, SyntaxTree.format(fixture.source)) + assert_syntax_tree(SyntaxTree.parse(fixture.source)) end end diff --git a/test/index_test.rb b/test/index_test.rb new file mode 100644 index 00000000..1e2a7fc7 --- /dev/null +++ b/test/index_test.rb @@ -0,0 +1,183 @@ +# frozen_string_literal: true + +require_relative "test_helper" + +module SyntaxTree + class IndexTest < Minitest::Test + def test_module + index_each("module Foo; end") do |entry| + assert_equal :Foo, entry.name + assert_equal [[:Foo]], entry.nesting + end + end + + def test_module_nested + index_each("module Foo; module Bar; end; end") do |entry| + assert_equal :Bar, entry.name + assert_equal [[:Foo], [:Bar]], entry.nesting + end + end + + def test_module_comments + index_each("# comment1\n# comment2\nmodule Foo; end") do |entry| + assert_equal :Foo, entry.name + assert_equal ["# comment1", "# comment2"], entry.comments.to_a + end + end + + def test_class + index_each("class Foo; end") do |entry| + assert_equal :Foo, entry.name + assert_equal [[:Foo]], entry.nesting + end + end + + def test_class_paths_2 + index_each("class Foo::Bar; end") do |entry| + assert_equal :Bar, entry.name + assert_equal [%i[Foo Bar]], entry.nesting + end + end + + def test_class_paths_3 + index_each("class Foo::Bar::Baz; end") do |entry| + assert_equal :Baz, entry.name + assert_equal [%i[Foo Bar Baz]], entry.nesting + end + end + + def test_class_nested + index_each("class Foo; class Bar; end; end") do |entry| + assert_equal :Bar, entry.name + assert_equal [[:Foo], [:Bar]], entry.nesting + end + end + + def test_class_paths_nested + index_each("class Foo; class Bar::Baz::Qux; end; end") do |entry| + assert_equal :Qux, entry.name + assert_equal [[:Foo], %i[Bar Baz Qux]], entry.nesting + end + end + + def test_class_superclass + index_each("class Foo < Bar; end") do |entry| + assert_equal :Foo, entry.name + assert_equal [[:Foo]], entry.nesting + assert_equal [:Bar], entry.superclass + end + end + + def test_class_path_superclass + index_each("class Foo::Bar < Baz::Qux; end") do |entry| + assert_equal :Bar, entry.name + assert_equal [%i[Foo Bar]], entry.nesting + assert_equal %i[Baz Qux], entry.superclass + end + end + + def test_class_comments + index_each("# comment1\n# comment2\nclass Foo; end") do |entry| + assert_equal :Foo, entry.name + assert_equal ["# comment1", "# comment2"], entry.comments.to_a + end + end + + def test_method + index_each("def foo; end") do |entry| + assert_equal :foo, entry.name + assert_empty entry.nesting + end + end + + def test_method_nested + index_each("class Foo; def foo; end; end") do |entry| + assert_equal :foo, entry.name + assert_equal [[:Foo]], entry.nesting + end + end + + def test_method_comments + index_each("# comment1\n# comment2\ndef foo; end") do |entry| + assert_equal :foo, entry.name + assert_equal ["# comment1", "# comment2"], entry.comments.to_a + end + end + + def test_singleton_method + index_each("def self.foo; end") do |entry| + assert_equal :foo, entry.name + assert_empty entry.nesting + end + end + + def test_singleton_method_nested + index_each("class Foo; def self.foo; end; end") do |entry| + assert_equal :foo, entry.name + assert_equal [[:Foo]], entry.nesting + end + end + + def test_singleton_method_comments + index_each("# comment1\n# comment2\ndef self.foo; end") do |entry| + assert_equal :foo, entry.name + assert_equal ["# comment1", "# comment2"], entry.comments.to_a + end + end + + def test_alias_method + index_each("alias foo bar") do |entry| + assert_equal :foo, entry.name + assert_empty entry.nesting + end + end + + def test_attr_reader + index_each("attr_reader :foo") do |entry| + assert_equal :foo, entry.name + assert_empty entry.nesting + end + end + + def test_attr_writer + index_each("attr_writer :foo") do |entry| + assert_equal :foo=, entry.name + assert_empty entry.nesting + end + end + + def test_attr_accessor + index_each("attr_accessor :foo") do |entry| + assert_equal :foo=, entry.name + assert_empty entry.nesting + end + end + + def test_constant + index_each("FOO = 1") do |entry| + assert_equal :FOO, entry.name + assert_empty entry.nesting + end + end + + def test_this_file + entries = Index.index_file(__FILE__, backend: Index::ParserBackend.new) + + if defined?(RubyVM::InstructionSequence) + entries += Index.index_file(__FILE__, backend: Index::ISeqBackend.new) + end + + entries.map { |entry| entry.comments.to_a } + end + + private + + def index_each(source) + yield Index.index(source, backend: Index::ParserBackend.new).last + + if defined?(RubyVM::InstructionSequence) + yield Index.index(source, backend: Index::ISeqBackend.new).last + end + end + end +end diff --git a/test/interface_test.rb b/test/interface_test.rb deleted file mode 100644 index 5086680e..00000000 --- a/test/interface_test.rb +++ /dev/null @@ -1,72 +0,0 @@ -# frozen_string_literal: true - -require_relative "test_helper" - -module SyntaxTree - class InterfaceTest < Minitest::Test - ObjectSpace.each_object(Node.singleton_class) do |klass| - next if klass == Node - - define_method(:"test_instantiate_#{klass.name}") do - assert_syntax_tree(instantiate(klass)) - end - end - - Fixtures.each_fixture do |fixture| - define_method(:"test_#{fixture.name}") do - assert_syntax_tree(SyntaxTree.parse(fixture.source)) - end - end - - private - - # This method is supposed to instantiate a new instance of the given class. - # The class is always a descendant from SyntaxTree::Node, so we can make - # certain assumptions about the way the initialize method is set up. If it - # needs to be special-cased, it's done so at the end of this method. - def instantiate(klass) - params = {} - - # Set up all of the keyword parameters for the class. - klass - .instance_method(:initialize) - .parameters - .each { |(type, name)| params[name] = nil if type.start_with?("key") } - - # Set up any default values that have to be arrays. - %i[ - assocs - comments - elements - keywords - locals - optionals - parts - posts - requireds - symbols - values - ].each { |key| params[key] = [] if params.key?(key) } - - # Set up a default location for the node. - params[:location] = Location.fixed(line: 0, char: 0, column: 0) - - case klass.name - when "SyntaxTree::Binary" - klass.new(**params, operator: :+) - when "SyntaxTree::Kw" - klass.new(**params, value: "kw") - when "SyntaxTree::Label" - klass.new(**params, value: "label:") - when "SyntaxTree::Op" - klass.new(**params, value: "+") - when "SyntaxTree::RegexpLiteral" - klass.new(**params, ending: "/") - when "SyntaxTree::Statements" - klass.new(nil, **params, body: []) - else - klass.new(**params) - end - end - end -end diff --git a/test/language_server_test.rb b/test/language_server_test.rb index 2fe4e60a..f5a6ca57 100644 --- a/test/language_server_test.rb +++ b/test/language_server_test.rb @@ -6,19 +6,38 @@ module SyntaxTree # stree-ignore class LanguageServerTest < Minitest::Test - class Initialize < Struct.new(:id) + class Initialize + attr_reader :id + + def initialize(id) + @id = id + end + def to_hash { method: "initialize", id: id } end end - class Shutdown < Struct.new(:id) + class Shutdown + attr_reader :id + + def initialize(id) + @id = id + end + def to_hash { method: "shutdown", id: id } end end - class TextDocumentDidOpen < Struct.new(:uri, :text) + class TextDocumentDidOpen + attr_reader :uri, :text + + def initialize(uri, text) + @uri = uri + @text = text + end + def to_hash { method: "textDocument/didOpen", @@ -27,7 +46,14 @@ def to_hash end end - class TextDocumentDidChange < Struct.new(:uri, :text) + class TextDocumentDidChange + attr_reader :uri, :text + + def initialize(uri, text) + @uri = uri + @text = text + end + def to_hash { method: "textDocument/didChange", @@ -39,7 +65,13 @@ def to_hash end end - class TextDocumentDidClose < Struct.new(:uri) + class TextDocumentDidClose + attr_reader :uri + + def initialize(uri) + @uri = uri + end + def to_hash { method: "textDocument/didClose", @@ -48,7 +80,14 @@ def to_hash end end - class TextDocumentFormatting < Struct.new(:id, :uri) + class TextDocumentFormatting + attr_reader :id, :uri + + def initialize(id, uri) + @id = id + @uri = uri + end + def to_hash { method: "textDocument/formatting", @@ -58,7 +97,14 @@ def to_hash end end - class TextDocumentInlayHint < Struct.new(:id, :uri) + class TextDocumentInlayHint + attr_reader :id, :uri + + def initialize(id, uri) + @id = id + @uri = uri + end + def to_hash { method: "textDocument/inlayHint", @@ -68,7 +114,14 @@ def to_hash end end - class SyntaxTreeVisualizing < Struct.new(:id, :uri) + class SyntaxTreeVisualizing + attr_reader :id, :uri + + def initialize(id, uri) + @id = id + @uri = uri + end + def to_hash { method: "syntaxTree/visualizing", diff --git a/test/node_test.rb b/test/node_test.rb index 3d700e73..19fbeed2 100644 --- a/test/node_test.rb +++ b/test/node_test.rb @@ -60,7 +60,7 @@ def test_arg_paren_heredoc ARGUMENT SOURCE - at = location(lines: 1..3, chars: 6..28) + at = location(lines: 1..3, chars: 6..37) assert_node(ArgParen, source, at: at, &:arguments) end @@ -131,7 +131,7 @@ def test_aryptn end SOURCE - at = location(lines: 2..2, chars: 18..47) + at = location(lines: 2..2, chars: 18..48) assert_node(AryPtn, source, at: at) { |node| node.consequent.pattern } end @@ -533,7 +533,7 @@ def test_heredoc HEREDOC SOURCE - at = location(lines: 1..3, chars: 0..22) + at = location(lines: 1..3, chars: 0..30) assert_node(Heredoc, source, at: at) end @@ -544,7 +544,7 @@ def test_heredoc_beg HEREDOC SOURCE - at = location(chars: 0..11) + at = location(chars: 0..10) assert_node(HeredocBeg, source, at: at, &:beginning) end @@ -555,7 +555,7 @@ def test_heredoc_end HEREDOC SOURCE - at = location(lines: 3..3, chars: 22..31, columns: 0..9) + at = location(lines: 3..3, chars: 22..30, columns: 0..8) assert_node(HeredocEnd, source, at: at, &:ending) end @@ -950,7 +950,7 @@ def test_var_field guard_version("3.1.0") do def test_pinned_var_ref source = "foo in ^bar" - at = location(chars: 8..11) + at = location(chars: 7..11) assert_node(PinnedVarRef, source, at: at, &:pattern) end @@ -1008,7 +1008,7 @@ def test_xstring_heredoc HEREDOC SOURCE - at = location(lines: 1..3, chars: 0..18) + at = location(lines: 1..3, chars: 0..26) assert_node(Heredoc, source, at: at) end @@ -1058,6 +1058,342 @@ def test_root_class_raises_not_implemented_errors end end + def test_arity_no_args + source = <<~SOURCE + def foo + end + SOURCE + + at = location(chars: 0..11, columns: 0..3, lines: 1..2) + assert_node(DefNode, source, at: at) do |node| + assert_equal(0..0, node.arity) + node + end + end + + def test_arity_positionals + source = <<~SOURCE + def foo(a, b = 1) + end + SOURCE + + at = location(chars: 0..21, columns: 0..3, lines: 1..2) + assert_node(DefNode, source, at: at) do |node| + assert_equal(1..2, node.arity) + node + end + end + + def test_arity_rest + source = <<~SOURCE + def foo(a, *b) + end + SOURCE + + at = location(chars: 0..18, columns: 0..3, lines: 1..2) + assert_node(DefNode, source, at: at) do |node| + assert_equal(1.., node.arity) + node + end + end + + def test_arity_keyword_rest + source = <<~SOURCE + def foo(a, **b) + end + SOURCE + + at = location(chars: 0..19, columns: 0..3, lines: 1..2) + assert_node(DefNode, source, at: at) do |node| + assert_equal(1.., node.arity) + node + end + end + + def test_arity_keywords + source = <<~SOURCE + def foo(a:, b: 1) + end + SOURCE + + at = location(chars: 0..21, columns: 0..3, lines: 1..2) + assert_node(DefNode, source, at: at) do |node| + assert_equal(1..2, node.arity) + node + end + end + + def test_arity_mixed + source = <<~SOURCE + def foo(a, b = 1, c:, d: 2) + end + SOURCE + + at = location(chars: 0..31, columns: 0..3, lines: 1..2) + assert_node(DefNode, source, at: at) do |node| + assert_equal(2..4, node.arity) + node + end + end + + guard_version("2.7.3") do + def test_arity_arg_forward + source = <<~SOURCE + def foo(...) + end + SOURCE + + at = location(chars: 0..16, columns: 0..3, lines: 1..2) + assert_node(DefNode, source, at: at) do |node| + assert_equal(0.., node.arity) + node + end + end + end + + guard_version("3.0.0") do + def test_arity_positional_and_arg_forward + source = <<~SOURCE + def foo(a, ...) + end + SOURCE + + at = location(chars: 0..19, columns: 0..3, lines: 1..2) + assert_node(DefNode, source, at: at) do |node| + assert_equal(1.., node.arity) + node + end + end + end + + def test_arity_no_parenthesis + source = <<~SOURCE + def foo a, b = 1 + end + SOURCE + + at = location(chars: 0..20, columns: 0..3, lines: 1..2) + assert_node(DefNode, source, at: at) do |node| + assert_equal(1..2, node.arity) + node + end + end + + def test_block_arity_positionals + source = <<~SOURCE + [].each do |a, b, c| + end + SOURCE + + at = location(chars: 8..24, columns: 8..3, lines: 1..2) + assert_node(BlockNode, source, at: at) do |node| + block = node.block + assert_equal(3..3, block.arity) + block + end + end + + def test_block_arity_with_optional + source = <<~SOURCE + [].each do |a, b = 1| + end + SOURCE + + at = location(chars: 8..25, columns: 8..3, lines: 1..2) + assert_node(BlockNode, source, at: at) do |node| + block = node.block + assert_equal(1..2, block.arity) + block + end + end + + def test_block_arity_with_optional_keyword + source = <<~SOURCE + [].each do |a, b: 2| + end + SOURCE + + at = location(chars: 8..24, columns: 8..3, lines: 1..2) + assert_node(BlockNode, source, at: at) do |node| + block = node.block + assert_equal(1..2, block.arity) + block + end + end + + def test_call_node_arity_positional_arguments + source = <<~SOURCE + foo(1, 2, 3) + SOURCE + + at = location(chars: 0..12, columns: 0..3, lines: 1..1) + assert_node(CallNode, source, at: at) do |node| + assert_equal(3, node.arity) + node + end + end + + def test_call_node_arity_keyword_arguments + source = <<~SOURCE + foo(bar, something: 123) + SOURCE + + at = location(chars: 0..24, columns: 0..24, lines: 1..1) + assert_node(CallNode, source, at: at) do |node| + assert_equal(2, node.arity) + node + end + end + + def test_call_node_arity_splat_arguments + source = <<~SOURCE + foo(*bar) + SOURCE + + at = location(chars: 0..9, columns: 0..9, lines: 1..1) + assert_node(CallNode, source, at: at) do |node| + assert_equal(Float::INFINITY, node.arity) + node + end + end + + def test_call_node_arity_keyword_rest_arguments + source = <<~SOURCE + foo(**bar) + SOURCE + + at = location(chars: 0..10, columns: 0..10, lines: 1..1) + assert_node(CallNode, source, at: at) do |node| + assert_equal(Float::INFINITY, node.arity) + node + end + end + + guard_version("2.7.3") do + def test_call_node_arity_arg_forward_arguments + source = <<~SOURCE + def foo(...) + bar(...) + end + SOURCE + + at = location(chars: 15..23, columns: 2..10, lines: 2..2) + assert_node(CallNode, source, at: at) do |node| + call = node.bodystmt.statements.body.first + assert_equal(Float::INFINITY, call.arity) + call + end + end + end + + def test_command_arity_positional_arguments + source = <<~SOURCE + foo 1, 2, 3 + SOURCE + + at = location(chars: 0..11, columns: 0..3, lines: 1..1) + assert_node(Command, source, at: at) do |node| + assert_equal(3, node.arity) + node + end + end + + def test_command_arity_keyword_arguments + source = <<~SOURCE + foo bar, something: 123 + SOURCE + + at = location(chars: 0..23, columns: 0..23, lines: 1..1) + assert_node(Command, source, at: at) do |node| + assert_equal(2, node.arity) + node + end + end + + def test_command_arity_splat_arguments + source = <<~SOURCE + foo *bar + SOURCE + + at = location(chars: 0..8, columns: 0..8, lines: 1..1) + assert_node(Command, source, at: at) do |node| + assert_equal(Float::INFINITY, node.arity) + node + end + end + + def test_command_arity_keyword_rest_arguments + source = <<~SOURCE + foo **bar + SOURCE + + at = location(chars: 0..9, columns: 0..9, lines: 1..1) + assert_node(Command, source, at: at) do |node| + assert_equal(Float::INFINITY, node.arity) + node + end + end + + def test_command_call_arity_positional_arguments + source = <<~SOURCE + object.foo 1, 2, 3 + SOURCE + + at = location(chars: 0..18, columns: 0..3, lines: 1..1) + assert_node(CommandCall, source, at: at) do |node| + assert_equal(3, node.arity) + node + end + end + + def test_command_call_arity_keyword_arguments + source = <<~SOURCE + object.foo bar, something: 123 + SOURCE + + at = location(chars: 0..30, columns: 0..30, lines: 1..1) + assert_node(CommandCall, source, at: at) do |node| + assert_equal(2, node.arity) + node + end + end + + def test_command_call_arity_splat_arguments + source = <<~SOURCE + object.foo *bar + SOURCE + + at = location(chars: 0..15, columns: 0..15, lines: 1..1) + assert_node(CommandCall, source, at: at) do |node| + assert_equal(Float::INFINITY, node.arity) + node + end + end + + def test_command_call_arity_keyword_rest_arguments + source = <<~SOURCE + object.foo **bar + SOURCE + + at = location(chars: 0..16, columns: 0..16, lines: 1..1) + assert_node(CommandCall, source, at: at) do |node| + assert_equal(Float::INFINITY, node.arity) + node + end + end + + def test_vcall_arity + source = <<~SOURCE + foo + SOURCE + + at = location(chars: 0..3, columns: 0..3, lines: 1..1) + assert_node(VCall, source, at: at) do |node| + assert_equal(0, node.arity) + node + end + end + private def location(lines: 1..1, chars: 0..0, columns: 0..0) diff --git a/test/parser_test.rb b/test/parser_test.rb index 6048cf11..7ac07381 100644 --- a/test/parser_test.rb +++ b/test/parser_test.rb @@ -65,5 +65,62 @@ def foo end RUBY end + + def test_does_not_choke_on_invalid_characters_in_source_string + SyntaxTree.parse(<<~RUBY) + # comment + # comment + __END__ + \xC5 + RUBY + end + + def test_lambda_vars_with_parameters_location + tree = SyntaxTree.parse(<<~RUBY) + # comment + # comment + ->(_i; a) { a } + RUBY + + local_location = + tree.statements.body.last.params.contents.locals.first.location + + assert_equal(3, local_location.start_line) + assert_equal(3, local_location.end_line) + assert_equal(7, local_location.start_column) + assert_equal(8, local_location.end_column) + end + + def test_lambda_vars_location + tree = SyntaxTree.parse(<<~RUBY) + # comment + # comment + ->(; a) { a } + RUBY + + local_location = + tree.statements.body.last.params.contents.locals.first.location + + assert_equal(3, local_location.start_line) + assert_equal(3, local_location.end_line) + assert_equal(5, local_location.start_column) + assert_equal(6, local_location.end_column) + end + + def test_multiple_lambda_vars_location + tree = SyntaxTree.parse(<<~RUBY) + # comment + # comment + ->(; a, b, c) { a } + RUBY + + local_location = + tree.statements.body.last.params.contents.locals.last.location + + assert_equal(3, local_location.start_line) + assert_equal(3, local_location.end_line) + assert_equal(11, local_location.start_column) + assert_equal(12, local_location.end_column) + end end end diff --git a/test/plugin/disable_auto_ternary_test.rb b/test/plugin/disable_auto_ternary_test.rb new file mode 100644 index 00000000..b2af9d35 --- /dev/null +++ b/test/plugin/disable_auto_ternary_test.rb @@ -0,0 +1,32 @@ +# frozen_string_literal: true + +require_relative "../test_helper" + +module SyntaxTree + class DisableTernaryTest < Minitest::Test + def test_short_if_else_unchanged + assert_format(<<~RUBY) + if true + 1 + else + 2 + end + RUBY + end + + def test_short_ternary_unchanged + assert_format("true ? 1 : 2\n") + end + + private + + def assert_format(expected, source = expected) + options = Formatter::Options.new(disable_auto_ternary: true) + formatter = Formatter.new(source, [], options: options) + SyntaxTree.parse(source).format(formatter) + + formatter.flush + assert_equal(expected, formatter.output.join) + end + end +end diff --git a/test/ractor_test.rb b/test/ractor_test.rb index bcdb2a51..7e0201ca 100644 --- a/test/ractor_test.rb +++ b/test/ractor_test.rb @@ -33,7 +33,7 @@ def test_formatting private def filepaths - Dir.glob(File.expand_path("../lib/syntax_tree/{node,parser}.rb", __dir__)) + Dir.glob(File.expand_path("../lib/syntax_tree/plugin/*.rb", __dir__)) end # Ractors still warn about usage, so I'm disabling that warning here just to diff --git a/test/syntax_tree_test.rb b/test/syntax_tree_test.rb index 05242d94..27aa6851 100644 --- a/test/syntax_tree_test.rb +++ b/test/syntax_tree_test.rb @@ -22,13 +22,18 @@ def method # comment SOURCE bodystmt = SyntaxTree.parse(source).statements.body.first.bodystmt - assert_equal(20, bodystmt.location.start_char) + assert_equal(20, bodystmt.start_char) end def test_parse_error assert_raises(Parser::ParseError) { SyntaxTree.parse("<>") } end + def test_marshalable + node = SyntaxTree.parse("1 + 2") + assert_operator(node, :===, Marshal.load(Marshal.dump(node))) + end + def test_maxwidth_format assert_equal("foo +\n bar\n", SyntaxTree.format("foo + bar", 5)) end diff --git a/test/test_helper.rb b/test/test_helper.rb index 77627e26..8015be14 100644 --- a/test/test_helper.rb +++ b/test/test_helper.rb @@ -1,16 +1,53 @@ # frozen_string_literal: true -require "simplecov" -SimpleCov.start do - add_filter("idempotency_test.rb") unless ENV["CI"] - add_group("lib", "lib") - add_group("test", "test") +unless RUBY_ENGINE == "truffleruby" + require "simplecov" + SimpleCov.start do + add_filter("idempotency_test.rb") unless ENV["CI"] + add_group("lib", "lib") + add_group("test", "test") + end end $LOAD_PATH.unshift(File.expand_path("../lib", __dir__)) require "syntax_tree" require "syntax_tree/cli" +unless RUBY_ENGINE == "truffleruby" + # Here we are going to establish type verification whenever a new node is + # created. We do this through the reflection module, which in turn parses the + # source code of the node classes. + require "syntax_tree/reflection" + SyntaxTree::Reflection.nodes.each do |name, node| + next if name == :Statements + + clazz = SyntaxTree.const_get(name) + parameters = clazz.instance_method(:initialize).parameters + + # First, verify that all of the parameters listed in the list of attributes. + # If there are any parameters that aren't listed in the attributes, then + # something went wrong with the parsing in the reflection module. + raise unless (parameters.map(&:last) - node.attributes.keys).empty? + + # Now we're going to use an alias chain to redefine the initialize method to + # include type checking. + clazz.alias_method(:initialize_without_verify, :initialize) + clazz.define_method(:initialize) do |**kwargs| + kwargs.each do |kwarg, value| + attribute = node.attributes.fetch(kwarg) + + unless attribute.type === value + raise TypeError, + "invalid type for #{name}##{kwarg}, expected " \ + "#{attribute.type.inspect}, got #{value.inspect}" + end + end + + initialize_without_verify(**kwargs) + end + end +end + require "json" require "tempfile" require "pp" @@ -61,7 +98,7 @@ def assert_syntax_tree(node) assert_includes(pretty, type) # Assert that we can get back a new tree by using the mutation visitor. - assert_operator node, :===, node.accept(Visitor::MutationVisitor.new) + assert_operator node, :===, node.accept(MutationVisitor.new) # Serialize the node to JSON, parse it back out, and assert that we have # found the expected type. diff --git a/test/translation/parser.txt b/test/translation/parser.txt new file mode 100644 index 00000000..5e9e8d31 --- /dev/null +++ b/test/translation/parser.txt @@ -0,0 +1,1824 @@ +!!! assert_parses_args:2249:0 +def f (foo: 1, bar: 2, **baz, &b); end +!!! assert_parses_args:2249:1 +def f (foo: 1, &b); end +!!! assert_parses_args:2249:2 +def f **baz, &b; end +!!! assert_parses_args:2249:3 +def f *, **; end +!!! assert_parses_args:2249:4 +def f a, o=1, *r, &b; end +!!! assert_parses_args:2249:5 +def f a, o=1, *r, p, &b; end +!!! assert_parses_args:2249:6 +def f a, o=1, &b; end +!!! assert_parses_args:2249:7 +def f a, o=1, p, &b; end +!!! assert_parses_args:2249:8 +def f a, *r, &b; end +!!! assert_parses_args:2249:9 +def f a, *r, p, &b; end +!!! assert_parses_args:2249:10 +def f a, &b; end +!!! assert_parses_args:2249:11 +def f o=1, *r, &b; end +!!! assert_parses_args:2249:12 +def f o=1, *r, p, &b; end +!!! assert_parses_args:2249:13 +def f o=1, &b; end +!!! assert_parses_args:2249:14 +def f o=1, p, &b; end +!!! assert_parses_args:2249:15 +def f *r, &b; end +!!! assert_parses_args:2249:16 +def f *r, p, &b; end +!!! assert_parses_args:2249:17 +def f &b; end +!!! assert_parses_args:2249:18 +def f ; end +!!! assert_parses_args:2249:19 +def f (((a))); end +!!! assert_parses_args:2249:20 +def f ((a, a1)); end +!!! assert_parses_args:2249:21 +def f ((a, *r)); end +!!! assert_parses_args:2249:22 +def f ((a, *r, p)); end +!!! assert_parses_args:2249:23 +def f ((a, *)); end +!!! assert_parses_args:2249:24 +def f ((a, *, p)); end +!!! assert_parses_args:2249:25 +def f ((*r)); end +!!! assert_parses_args:2249:26 +def f ((*r, p)); end +!!! assert_parses_args:2249:27 +def f ((*)); end +!!! assert_parses_args:2249:28 +def f ((*, p)); end +!!! assert_parses_args:2249:29 +def f foo: +; end +!!! assert_parses_args:2249:30 +def f foo: -1 +; end +!!! assert_parses_blockargs:2506:0 +f{ |a| } +!!! assert_parses_blockargs:2506:1 +f{ |a, b,| } +!!! assert_parses_blockargs:2506:2 +f{ |a| } +!!! assert_parses_blockargs:2506:3 +f{ |foo:| } +!!! assert_parses_blockargs:2506:4 +f{ } +!!! assert_parses_blockargs:2506:5 +f{ | | } +!!! assert_parses_blockargs:2506:6 +f{ |;a| } +!!! assert_parses_blockargs:2506:7 +f{ |; +a +| } +!!! assert_parses_blockargs:2506:8 +f{ || } +!!! assert_parses_blockargs:2506:9 +f{ |a| } +!!! assert_parses_blockargs:2506:10 +f{ |a, c| } +!!! assert_parses_blockargs:2506:11 +f{ |a,| } +!!! assert_parses_blockargs:2506:12 +f{ |a, &b| } +!!! assert_parses_blockargs:2506:13 +f{ |a, *s, &b| } +!!! assert_parses_blockargs:2506:14 +f{ |a, *, &b| } +!!! assert_parses_blockargs:2506:15 +f{ |a, *s| } +!!! assert_parses_blockargs:2506:16 +f{ |a, *| } +!!! assert_parses_blockargs:2506:17 +f{ |*s, &b| } +!!! assert_parses_blockargs:2506:18 +f{ |*, &b| } +!!! assert_parses_blockargs:2506:19 +f{ |*s| } +!!! assert_parses_blockargs:2506:20 +f{ |*| } +!!! assert_parses_blockargs:2506:21 +f{ |&b| } +!!! assert_parses_blockargs:2506:22 +f{ |a, o=1, o1=2, *r, &b| } +!!! assert_parses_blockargs:2506:23 +f{ |a, o=1, *r, p, &b| } +!!! assert_parses_blockargs:2506:24 +f{ |a, o=1, &b| } +!!! assert_parses_blockargs:2506:25 +f{ |a, o=1, p, &b| } +!!! assert_parses_blockargs:2506:26 +f{ |a, *r, p, &b| } +!!! assert_parses_blockargs:2506:27 +f{ |o=1, *r, &b| } +!!! assert_parses_blockargs:2506:28 +f{ |o=1, *r, p, &b| } +!!! assert_parses_blockargs:2506:29 +f{ |o=1, &b| } +!!! assert_parses_blockargs:2506:30 +f{ |o=1, p, &b| } +!!! assert_parses_blockargs:2506:31 +f{ |*r, p, &b| } +!!! assert_parses_blockargs:2506:32 +f{ |foo: 1, bar: 2, **baz, &b| } +!!! assert_parses_blockargs:2506:33 +f{ |foo: 1, &b| } +!!! assert_parses_blockargs:2506:34 +f{ |**baz, &b| } +!!! assert_parses_pattern_match:8503:0 +case foo; in self then true; end +!!! assert_parses_pattern_match:8503:1 +case foo; in 1..2 then true; end +!!! assert_parses_pattern_match:8503:2 +case foo; in 1.. then true; end +!!! assert_parses_pattern_match:8503:3 +case foo; in ..2 then true; end +!!! assert_parses_pattern_match:8503:4 +case foo; in 1...2 then true; end +!!! assert_parses_pattern_match:8503:5 +case foo; in 1... then true; end +!!! assert_parses_pattern_match:8503:6 +case foo; in ...2 then true; end +!!! assert_parses_pattern_match:8503:7 +case foo; in [*x, 1 => a, *y] then true; end +!!! assert_parses_pattern_match:8503:8 +case foo; in String(*, 1, *) then true; end +!!! assert_parses_pattern_match:8503:9 +case foo; in Array[*, 1, *] then true; end +!!! assert_parses_pattern_match:8503:10 +case foo; in *, 42, * then true; end +!!! assert_parses_pattern_match:8503:11 +case foo; in x, then nil; end +!!! assert_parses_pattern_match:8503:12 +case foo; in *x then nil; end +!!! assert_parses_pattern_match:8503:13 +case foo; in * then nil; end +!!! assert_parses_pattern_match:8503:14 +case foo; in x, y then nil; end +!!! assert_parses_pattern_match:8503:15 +case foo; in x, y, then nil; end +!!! assert_parses_pattern_match:8503:16 +case foo; in x, *y, z then nil; end +!!! assert_parses_pattern_match:8503:17 +case foo; in *x, y, z then nil; end +!!! assert_parses_pattern_match:8503:18 +case foo; in 1, "a", [], {} then nil; end +!!! assert_parses_pattern_match:8503:19 +case foo; in ->{ 42 } then true; end +!!! assert_parses_pattern_match:8503:20 +case foo; in A(1, 2) then true; end +!!! assert_parses_pattern_match:8503:21 +case foo; in A(x:) then true; end +!!! assert_parses_pattern_match:8503:22 +case foo; in A() then true; end +!!! assert_parses_pattern_match:8503:23 +case foo; in A[1, 2] then true; end +!!! assert_parses_pattern_match:8503:24 +case foo; in A[x:] then true; end +!!! assert_parses_pattern_match:8503:25 +case foo; in A[] then true; end +!!! assert_parses_pattern_match:8503:26 +case foo; in x then x; end +!!! assert_parses_pattern_match:8503:27 +case foo; in {} then true; end +!!! assert_parses_pattern_match:8503:28 +case foo; in a: 1 then true; end +!!! assert_parses_pattern_match:8503:29 +case foo; in { a: 1 } then true; end +!!! assert_parses_pattern_match:8503:30 +case foo; in { a: 1, } then true; end +!!! assert_parses_pattern_match:8503:31 +case foo; in a: then true; end +!!! assert_parses_pattern_match:8503:32 +case foo; in **a then true; end +!!! assert_parses_pattern_match:8503:33 +case foo; in ** then true; end +!!! assert_parses_pattern_match:8503:34 +case foo; in a: 1, b: 2 then true; end +!!! assert_parses_pattern_match:8503:35 +case foo; in a:, b: then true; end +!!! assert_parses_pattern_match:8503:36 +case foo; in a: 1, _a:, ** then true; end +!!! assert_parses_pattern_match:8503:37 +case foo; + in {a: 1 + } + false + ; end +!!! assert_parses_pattern_match:8503:38 +case foo; + in {a: + 2} + false + ; end +!!! assert_parses_pattern_match:8503:39 +case foo; + in {Foo: 42 + } + false + ; end +!!! assert_parses_pattern_match:8503:40 +case foo; + in a: {b:}, c: + p c + ; end +!!! assert_parses_pattern_match:8503:41 +case foo; + in {a: + } + true + ; end +!!! assert_parses_pattern_match:8503:42 +case foo; in A then true; end +!!! assert_parses_pattern_match:8503:43 +case foo; in A::B then true; end +!!! assert_parses_pattern_match:8503:44 +case foo; in ::A then true; end +!!! assert_parses_pattern_match:8503:45 +case foo; in [x] then nil; end +!!! assert_parses_pattern_match:8503:46 +case foo; in [x,] then nil; end +!!! assert_parses_pattern_match:8503:47 +case foo; in [x, y] then true; end +!!! assert_parses_pattern_match:8503:48 +case foo; in [x, y,] then true; end +!!! assert_parses_pattern_match:8503:49 +case foo; in [x, y, *] then true; end +!!! assert_parses_pattern_match:8503:50 +case foo; in [x, y, *z] then true; end +!!! assert_parses_pattern_match:8503:51 +case foo; in [x, *y, z] then true; end +!!! assert_parses_pattern_match:8503:52 +case foo; in [x, *, y] then true; end +!!! assert_parses_pattern_match:8503:53 +case foo; in [*x, y] then true; end +!!! assert_parses_pattern_match:8503:54 +case foo; in [*, x] then true; end +!!! assert_parses_pattern_match:8503:55 +case foo; in (1) then true; end +!!! assert_parses_pattern_match:8503:56 +case foo; in x if true; nil; end +!!! assert_parses_pattern_match:8503:57 +case foo; in x unless true; nil; end +!!! assert_parses_pattern_match:8503:58 +case foo; in 1; end +!!! assert_parses_pattern_match:8503:59 +case foo; in ^foo then nil; end +!!! assert_parses_pattern_match:8503:60 +case foo; in "a": then true; end +!!! assert_parses_pattern_match:8503:61 +case foo; in "#{ 'a' }": then true; end +!!! assert_parses_pattern_match:8503:62 +case foo; in "#{ %q{a} }": then true; end +!!! assert_parses_pattern_match:8503:63 +case foo; in "#{ %Q{a} }": then true; end +!!! assert_parses_pattern_match:8503:64 +case foo; in "a": 1 then true; end +!!! assert_parses_pattern_match:8503:65 +case foo; in "#{ 'a' }": 1 then true; end +!!! assert_parses_pattern_match:8503:66 +case foo; in "#{ %q{a} }": 1 then true; end +!!! assert_parses_pattern_match:8503:67 +case foo; in "#{ %Q{a} }": 1 then true; end +!!! assert_parses_pattern_match:8503:68 +case foo; in ^(42) then nil; end +!!! assert_parses_pattern_match:8503:69 +case foo; in { foo: ^(42) } then nil; end +!!! assert_parses_pattern_match:8503:70 +case foo; in ^(0+0) then nil; end +!!! assert_parses_pattern_match:8503:71 +case foo; in ^@a; end +!!! assert_parses_pattern_match:8503:72 +case foo; in ^@@TestPatternMatching; end +!!! assert_parses_pattern_match:8503:73 +case foo; in ^$TestPatternMatching; end +!!! assert_parses_pattern_match:8503:74 +case foo; in ^(1 +); end +!!! assert_parses_pattern_match:8503:75 +case foo; in 1 | 2 then true; end +!!! assert_parses_pattern_match:8503:76 +case foo; in 1 => a then true; end +!!! assert_parses_pattern_match:8503:77 +case foo; in **nil then true; end +!!! block in test_endless_comparison_method:10392:0 +def ===(other) = do_something +!!! block in test_endless_comparison_method:10392:1 +def ==(other) = do_something +!!! block in test_endless_comparison_method:10392:2 +def !=(other) = do_something +!!! block in test_endless_comparison_method:10392:3 +def <=(other) = do_something +!!! block in test_endless_comparison_method:10392:4 +def >=(other) = do_something +!!! block in test_endless_comparison_method:10392:5 +def !=(other) = do_something +!!! block in test_parser_slash_slash_n_escaping_in_literals:7327:0 +'a\ +b' +!!! block in test_parser_slash_slash_n_escaping_in_literals:7327:1 +<<-'HERE' +a\ +b +HERE +!!! block in test_parser_slash_slash_n_escaping_in_literals:7327:2 +%q{a\ +b} +!!! block in test_parser_slash_slash_n_escaping_in_literals:7327:3 +"a\ +b" +!!! block in test_parser_slash_slash_n_escaping_in_literals:7327:4 +<<-"HERE" +a\ +b +HERE +!!! block in test_parser_slash_slash_n_escaping_in_literals:7327:5 +%{a\ +b} +!!! block in test_parser_slash_slash_n_escaping_in_literals:7327:6 +%Q{a\ +b} +!!! block in test_parser_slash_slash_n_escaping_in_literals:7327:7 +%w{a\ +b} +!!! block in test_parser_slash_slash_n_escaping_in_literals:7327:8 +%W{a\ +b} +!!! block in test_parser_slash_slash_n_escaping_in_literals:7327:9 +%i{a\ +b} +!!! block in test_parser_slash_slash_n_escaping_in_literals:7327:10 +%I{a\ +b} +!!! block in test_parser_slash_slash_n_escaping_in_literals:7327:11 +:'a\ +b' +!!! block in test_parser_slash_slash_n_escaping_in_literals:7327:12 +%s{a\ +b} +!!! block in test_parser_slash_slash_n_escaping_in_literals:7327:13 +:"a\ +b" +!!! block in test_parser_slash_slash_n_escaping_in_literals:7327:14 +/a\ +b/ +!!! block in test_parser_slash_slash_n_escaping_in_literals:7327:15 +%r{a\ +b} +!!! block in test_parser_slash_slash_n_escaping_in_literals:7327:16 +%x{a\ +b} +!!! block in test_parser_slash_slash_n_escaping_in_literals:7327:17 +`a\ +b` +!!! block in test_parser_slash_slash_n_escaping_in_literals:7327:18 +<<-`HERE` +a\ +b +HERE +!!! block in test_ruby_bug_11873_a:6017:0 +a b{c d}, :e do end +!!! block in test_ruby_bug_11873_a:6017:1 +a b{c d}, 1 do end +!!! block in test_ruby_bug_11873_a:6017:2 +a b{c d}, 1.0 do end +!!! block in test_ruby_bug_11873_a:6017:3 +a b{c d}, 1.0r do end +!!! block in test_ruby_bug_11873_a:6017:4 +a b{c d}, 1.0i do end +!!! block in test_ruby_bug_11873_a:6022:0 +a b{c(d)}, :e do end +!!! block in test_ruby_bug_11873_a:6022:1 +a b{c(d)}, 1 do end +!!! block in test_ruby_bug_11873_a:6022:2 +a b{c(d)}, 1.0 do end +!!! block in test_ruby_bug_11873_a:6022:3 +a b{c(d)}, 1.0r do end +!!! block in test_ruby_bug_11873_a:6022:4 +a b{c(d)}, 1.0i do end +!!! block in test_ruby_bug_11873_a:6036:0 +a b(c d), :e do end +!!! block in test_ruby_bug_11873_a:6036:1 +a b(c d), 1 do end +!!! block in test_ruby_bug_11873_a:6036:2 +a b(c d), 1.0 do end +!!! block in test_ruby_bug_11873_a:6036:3 +a b(c d), 1.0r do end +!!! block in test_ruby_bug_11873_a:6036:4 +a b(c d), 1.0i do end +!!! block in test_ruby_bug_11873_a:6041:0 +a b(c(d)), :e do end +!!! block in test_ruby_bug_11873_a:6041:1 +a b(c(d)), 1 do end +!!! block in test_ruby_bug_11873_a:6041:2 +a b(c(d)), 1.0 do end +!!! block in test_ruby_bug_11873_a:6041:3 +a b(c(d)), 1.0r do end +!!! block in test_ruby_bug_11873_a:6041:4 +a b(c(d)), 1.0i do end +!!! test___ENCODING__:1037 +__ENCODING__ +!!! test___ENCODING___legacy_:1046 +__ENCODING__ +!!! test_alias:2020 +alias :foo bar +!!! test_alias_gvar:2032 +alias $a $b +!!! test_alias_gvar:2037 +alias $a $+ +!!! test_ambiuous_quoted_label_in_ternary_operator:7204 +a ? b & '': nil +!!! test_and:4447 +foo and bar +!!! test_and:4453 +foo && bar +!!! test_and_asgn:1748 +foo.a &&= 1 +!!! test_and_asgn:1758 +foo[0, 1] &&= 2 +!!! test_and_or_masgn:4475 +foo && (a, b = bar) +!!! test_and_or_masgn:4484 +foo || (a, b = bar) +!!! test_anonymous_blockarg:10861 +def foo(&); bar(&); end +!!! test_arg:2055 +def f(foo); end +!!! test_arg:2066 +def f(foo, bar); end +!!! test_arg_duplicate_ignored:2958 +def foo(_, _); end +!!! test_arg_duplicate_ignored:2972 +def foo(_a, _a); end +!!! test_arg_label:3012 +def foo() a:b end +!!! test_arg_label:3019 +def foo + a:b end +!!! test_arg_label:3026 +f { || a:b } +!!! test_arg_scope:2238 +lambda{|;a|a} +!!! test_args_args_assocs:4077 +fun(foo, :foo => 1) +!!! test_args_args_assocs:4083 +fun(foo, :foo => 1, &baz) +!!! test_args_args_assocs_comma:4092 +foo[bar, :baz => 1,] +!!! test_args_args_comma:3941 +foo[bar,] +!!! test_args_args_star:3908 +fun(foo, *bar) +!!! test_args_args_star:3913 +fun(foo, *bar, &baz) +!!! test_args_assocs:4001 +fun(:foo => 1) +!!! test_args_assocs:4006 +fun(:foo => 1, &baz) +!!! test_args_assocs:4012 +self[:bar => 1] +!!! test_args_assocs:4021 +self.[]= foo, :a => 1 +!!! test_args_assocs:4031 +yield(:foo => 42) +!!! test_args_assocs:4039 +super(:foo => 42) +!!! test_args_assocs_comma:4068 +foo[:baz => 1,] +!!! test_args_assocs_legacy:3951 +fun(:foo => 1) +!!! test_args_assocs_legacy:3956 +fun(:foo => 1, &baz) +!!! test_args_assocs_legacy:3962 +self[:bar => 1] +!!! test_args_assocs_legacy:3971 +self.[]= foo, :a => 1 +!!! test_args_assocs_legacy:3981 +yield(:foo => 42) +!!! test_args_assocs_legacy:3989 +super(:foo => 42) +!!! test_args_block_pass:3934 +fun(&bar) +!!! test_args_cmd:3901 +fun(f bar) +!!! test_args_star:3921 +fun(*bar) +!!! test_args_star:3926 +fun(*bar, &baz) +!!! test_array_assocs:629 +[ 1 => 2 ] +!!! test_array_assocs:637 +[ 1, 2 => 3 ] +!!! test_array_plain:589 +[1, 2] +!!! test_array_splat:598 +[1, *foo, 2] +!!! test_array_splat:611 +[1, *foo] +!!! test_array_splat:622 +[*foo] +!!! test_array_symbols:695 +%i[foo bar] +!!! test_array_symbols_empty:732 +%i[] +!!! test_array_symbols_empty:740 +%I() +!!! test_array_symbols_interp:706 +%I[foo #{bar}] +!!! test_array_symbols_interp:721 +%I[foo#{bar}] +!!! test_array_words:647 +%w[foo bar] +!!! test_array_words_empty:682 +%w[] +!!! test_array_words_empty:689 +%W() +!!! test_array_words_interp:657 +%W[foo #{bar}] +!!! test_array_words_interp:671 +%W[foo #{bar}foo#@baz] +!!! test_asgn_cmd:1126 +foo = m foo +!!! test_asgn_cmd:1130 +foo = bar = m foo +!!! test_asgn_mrhs:1449 +foo = bar, 1 +!!! test_asgn_mrhs:1456 +foo = *bar +!!! test_asgn_mrhs:1461 +foo = baz, *bar +!!! test_back_ref:995 +$+ +!!! test_bang:3434 +!foo +!!! test_bang_cmd:3448 +!m foo +!!! test_begin_cmdarg:5526 +p begin 1.times do 1 end end +!!! test_beginless_erange_after_newline:935 +foo +...100 +!!! test_beginless_irange_after_newline:923 +foo +..100 +!!! test_beginless_range:903 +..100 +!!! test_beginless_range:912 +...100 +!!! test_blockarg:2187 +def f(&block); end +!!! test_break:5037 +break(foo) +!!! test_break:5051 +break foo +!!! test_break:5057 +break() +!!! test_break:5064 +break +!!! test_break_block:5072 +break fun foo do end +!!! test_bug_435:7067 +"#{-> foo {}}" +!!! test_bug_447:7046 +m [] do end +!!! test_bug_447:7055 +m [], 1 do end +!!! test_bug_452:7080 +td (1_500).toString(); td.num do; end +!!! test_bug_466:7096 +foo "#{(1+1).to_i}" do; end +!!! test_bug_473:7113 +m "#{[]}" +!!! test_bug_480:7124 +m "#{}#{()}" +!!! test_bug_481:7136 +m def x(); end; 1.tap do end +!!! test_bug_ascii_8bit_in_literal:5880 +# coding:utf-8 + "\xD0\xBF\xD1\x80\xD0\xBE\xD0\xB2\xD0\xB5\xD1\x80\xD0\xBA\xD0\xB0" +!!! test_bug_cmd_string_lookahead:5752 +desc "foo" do end +!!! test_bug_cmdarg:5549 +assert dogs +!!! test_bug_cmdarg:5554 +assert do: true +!!! test_bug_cmdarg:5562 +f x: -> do meth do end end +!!! test_bug_def_no_paren_eql_begin:5799 +def foo +=begin +=end +end +!!! test_bug_do_block_in_call_args:5762 +bar def foo; self.each do end end +!!! test_bug_do_block_in_cmdarg:5777 +tap (proc do end) +!!! test_bug_do_block_in_hash_brace:6569 +p :foo, {a: proc do end, b: proc do end} +!!! test_bug_do_block_in_hash_brace:6587 +p :foo, {:a => proc do end, b: proc do end} +!!! test_bug_do_block_in_hash_brace:6605 +p :foo, {"a": proc do end, b: proc do end} +!!! test_bug_do_block_in_hash_brace:6623 +p :foo, {proc do end => proc do end, b: proc do end} +!!! test_bug_do_block_in_hash_brace:6643 +p :foo, {** proc do end, b: proc do end} +!!! test_bug_heredoc_do:5835 +f <<-TABLE do +TABLE +end +!!! test_bug_interp_single:5789 +"#{1}" +!!! test_bug_interp_single:5793 +%W"#{1}" +!!! test_bug_lambda_leakage:6550 +->(scope) {}; scope +!!! test_bug_regex_verification:6563 +/#)/x +!!! test_bug_rescue_empty_else:5813 +begin; rescue LoadError; else; end +!!! test_bug_while_not_parens_do:5805 +while not (true) do end +!!! test_case_cond:4844 +case; when foo; 'foo'; end +!!! test_case_cond_else:4857 +case; when foo; 'foo'; else 'bar'; end +!!! test_case_expr:4816 +case foo; when 'bar'; bar; end +!!! test_case_expr_else:4830 +case foo; when 'bar'; bar; else baz; end +!!! test_casgn_scoped:1192 +Bar::Foo = 10 +!!! test_casgn_toplevel:1181 +::Foo = 10 +!!! test_casgn_unscoped:1203 +Foo = 10 +!!! test_character:248 +?a +!!! test_class:1827 +class Foo; end +!!! test_class:1837 +class Foo end +!!! test_class_definition_in_while_cond:6870 +while class Foo; tap do end; end; break; end +!!! test_class_definition_in_while_cond:6882 +while class Foo a = tap do end; end; break; end +!!! test_class_definition_in_while_cond:6895 +while class << self; tap do end; end; break; end +!!! test_class_definition_in_while_cond:6907 +while class << self; a = tap do end; end; break; end +!!! test_class_super:1848 +class Foo < Bar; end +!!! test_class_super_label:1860 +class Foo < a:b; end +!!! test_comments_before_leading_dot__27:7750 +a # +# +.foo +!!! test_comments_before_leading_dot__27:7757 +a # + # +.foo +!!! test_comments_before_leading_dot__27:7764 +a # +# +&.foo +!!! test_comments_before_leading_dot__27:7771 +a # + # +&.foo +!!! test_complex:156 +42i +!!! test_complex:162 +42ri +!!! test_complex:168 +42.1i +!!! test_complex:174 +42.1ri +!!! test_cond_begin:4686 +if (bar); foo; end +!!! test_cond_begin_masgn:4695 +if (bar; a, b = foo); end +!!! test_cond_eflipflop:4758 +if foo...bar; end +!!! test_cond_eflipflop:4772 +!(foo...bar) +!!! test_cond_iflipflop:4735 +if foo..bar; end +!!! test_cond_iflipflop:4749 +!(foo..bar) +!!! test_cond_match_current_line:4781 +if /wat/; end +!!! test_cond_match_current_line:4801 +!/wat/ +!!! test_const_op_asgn:1536 +A += 1 +!!! test_const_op_asgn:1542 +::A += 1 +!!! test_const_op_asgn:1550 +B::A += 1 +!!! test_const_op_asgn:1558 +def x; self::A ||= 1; end +!!! test_const_op_asgn:1567 +def x; ::A ||= 1; end +!!! test_const_scoped:1020 +Bar::Foo +!!! test_const_toplevel:1011 +::Foo +!!! test_const_unscoped:1029 +Foo +!!! test_control_meta_escape_chars_in_regexp__since_31:10686 +/\c\xFF/ +!!! test_control_meta_escape_chars_in_regexp__since_31:10692 +/\c\M-\xFF/ +!!! test_control_meta_escape_chars_in_regexp__since_31:10698 +/\C-\xFF/ +!!! test_control_meta_escape_chars_in_regexp__since_31:10704 +/\C-\M-\xFF/ +!!! test_control_meta_escape_chars_in_regexp__since_31:10710 +/\M-\xFF/ +!!! test_control_meta_escape_chars_in_regexp__since_31:10716 +/\M-\C-\xFF/ +!!! test_control_meta_escape_chars_in_regexp__since_31:10722 +/\M-\c\xFF/ +!!! test_cpath:1807 +module ::Foo; end +!!! test_cpath:1813 +module Bar::Foo; end +!!! test_cvar:973 +@@foo +!!! test_cvasgn:1106 +@@var = 10 +!!! test_dedenting_heredoc:297 +p <<~E +E +!!! test_dedenting_heredoc:304 +p <<~E + E +!!! test_dedenting_heredoc:311 +p <<~E + x +E +!!! test_dedenting_heredoc:318 +p <<~E + ð +E +!!! test_dedenting_heredoc:325 +p <<~E + x + y +E +!!! test_dedenting_heredoc:334 +p <<~E + x + y +E +!!! test_dedenting_heredoc:343 +p <<~E + x + y +E +!!! test_dedenting_heredoc:352 +p <<~E + x + y +E +!!! test_dedenting_heredoc:361 +p <<~E + x + y +E +!!! test_dedenting_heredoc:370 +p <<~E + x + +y +E +!!! test_dedenting_heredoc:380 +p <<~E + x + + y +E +!!! test_dedenting_heredoc:390 +p <<~E + x + \ y +E +!!! test_dedenting_heredoc:399 +p <<~E + x + \ y +E +!!! test_dedenting_heredoc:408 +p <<~"E" + x + #{foo} +E +!!! test_dedenting_heredoc:419 +p <<~`E` + x + #{foo} +E +!!! test_dedenting_heredoc:430 +p <<~"E" + x + #{" y"} +E +!!! test_dedenting_interpolating_heredoc_fake_line_continuation:459 +<<~'FOO' + baz\\ + qux +FOO +!!! test_dedenting_non_interpolating_heredoc_line_continuation:451 +<<~'FOO' + baz\ + qux +FOO +!!! test_def:1899 +def foo; end +!!! test_def:1907 +def String; end +!!! test_def:1911 +def String=; end +!!! test_def:1915 +def until; end +!!! test_def:1919 +def BEGIN; end +!!! test_def:1923 +def END; end +!!! test_defined:1058 +defined? foo +!!! test_defined:1064 +defined?(foo) +!!! test_defined:1072 +defined? @foo +!!! test_defs:1929 +def self.foo; end +!!! test_defs:1937 +def self::foo; end +!!! test_defs:1945 +def (foo).foo; end +!!! test_defs:1949 +def String.foo; end +!!! test_defs:1954 +def String::foo; end +!!! test_empty_stmt:60 +!!! test_endless_method:9786 +def foo() = 42 +!!! test_endless_method:9798 +def inc(x) = x + 1 +!!! test_endless_method:9811 +def obj.foo() = 42 +!!! test_endless_method:9823 +def obj.inc(x) = x + 1 +!!! test_endless_method_command_syntax:9880 +def foo = puts "Hello" +!!! test_endless_method_command_syntax:9892 +def foo() = puts "Hello" +!!! test_endless_method_command_syntax:9904 +def foo(x) = puts x +!!! test_endless_method_command_syntax:9917 +def obj.foo = puts "Hello" +!!! test_endless_method_command_syntax:9931 +def obj.foo() = puts "Hello" +!!! test_endless_method_command_syntax:9945 +def rescued(x) = raise "to be caught" rescue "instance #{x}" +!!! test_endless_method_command_syntax:9964 +def self.rescued(x) = raise "to be caught" rescue "class #{x}" +!!! test_endless_method_command_syntax:9985 +def obj.foo(x) = puts x +!!! test_endless_method_forwarded_args_legacy:9840 +def foo(...) = bar(...) +!!! test_endless_method_with_rescue_mod:9855 +def m() = 1 rescue 2 +!!! test_endless_method_with_rescue_mod:9866 +def self.m() = 1 rescue 2 +!!! test_endless_method_without_args:10404 +def foo = 42 +!!! test_endless_method_without_args:10412 +def foo = 42 rescue nil +!!! test_endless_method_without_args:10423 +def self.foo = 42 +!!! test_endless_method_without_args:10432 +def self.foo = 42 rescue nil +!!! test_ensure:5261 +begin; meth; ensure; bar; end +!!! test_ensure_empty:5274 +begin ensure end +!!! test_false:96 +false +!!! test_float:129 +1.33 +!!! test_float:134 +-1.33 +!!! test_for:5002 +for a in foo do p a; end +!!! test_for:5014 +for a in foo; p a; end +!!! test_for_mlhs:5023 +for a, b in foo; p a, b; end +!!! test_forward_arg:7899 +def foo(...); bar(...); end +!!! test_forward_arg_with_open_args:10745 +def foo ... +end +!!! test_forward_arg_with_open_args:10752 +def foo a, b = 1, ... +end +!!! test_forward_arg_with_open_args:10770 +def foo(a, ...) bar(...) end +!!! test_forward_arg_with_open_args:10781 +def foo a, ... + bar(...) +end +!!! test_forward_arg_with_open_args:10792 +def foo b = 1, ... + bar(...) +end +!!! test_forward_arg_with_open_args:10804 +def foo ...; bar(...); end +!!! test_forward_arg_with_open_args:10814 +def foo a, ...; bar(...); end +!!! test_forward_arg_with_open_args:10825 +def foo b = 1, ...; bar(...); end +!!! test_forward_arg_with_open_args:10837 +(def foo ... + bar(...) +end) +!!! test_forward_arg_with_open_args:10848 +(def foo ...; bar(...); end) +!!! test_forward_args_legacy:7863 +def foo(...); bar(...); end +!!! test_forward_args_legacy:7875 +def foo(...); super(...); end +!!! test_forward_args_legacy:7887 +def foo(...); end +!!! test_forwarded_argument_with_kwrestarg:10962 +def foo(argument, **); bar(argument, **); end +!!! test_forwarded_argument_with_restarg:10923 +def foo(argument, *); bar(argument, *); end +!!! test_forwarded_kwrestarg:10943 +def foo(**); bar(**); end +!!! test_forwarded_restarg:10905 +def foo(*); bar(*); end +!!! test_gvar:980 +$foo +!!! test_gvasgn:1116 +$var = 10 +!!! test_hash_empty:750 +{ } +!!! test_hash_hashrocket:759 +{ 1 => 2 } +!!! test_hash_hashrocket:768 +{ 1 => 2, :foo => "bar" } +!!! test_hash_kwsplat:821 +{ foo: 2, **bar } +!!! test_hash_label:776 +{ foo: 2 } +!!! test_hash_label_end:789 +{ 'foo': 2 } +!!! test_hash_label_end:802 +{ 'foo': 2, 'bar': {}} +!!! test_hash_label_end:810 +f(a ? "a":1) +!!! test_hash_pair_value_omission:10040 +{a:, b:} +!!! test_hash_pair_value_omission:10054 +{puts:} +!!! test_hash_pair_value_omission:10065 +{BAR:} +!!! test_heredoc:263 +<(**nil) {} +!!! test_kwoptarg:2124 +def f(foo: 1); end +!!! test_kwrestarg_named:2135 +def f(**foo); end +!!! test_kwrestarg_unnamed:2146 +def f(**); end +!!! test_lbrace_arg_after_command_args:7235 +let (:a) { m do; end } +!!! test_lparenarg_after_lvar__since_25:6679 +meth (-1.3).abs +!!! test_lparenarg_after_lvar__since_25:6688 +foo (-1.3).abs +!!! test_lvar:959 +foo +!!! test_lvar_injecting_match:3778 +/(?bar)/ =~ 'bar'; match +!!! test_lvasgn:1084 +var = 10; var +!!! test_masgn:1247 +foo, bar = 1, 2 +!!! test_masgn:1258 +(foo, bar) = 1, 2 +!!! test_masgn:1268 +foo, bar, baz = 1, 2 +!!! test_masgn_attr:1390 +self.a, self[1, 2] = foo +!!! test_masgn_attr:1403 +self::a, foo = foo +!!! test_masgn_attr:1411 +self.A, foo = foo +!!! test_masgn_cmd:1439 +foo, bar = m foo +!!! test_masgn_const:1421 +self::A, foo = foo +!!! test_masgn_const:1429 +::A, foo = foo +!!! test_masgn_nested:1365 +a, (b, c) = foo +!!! test_masgn_nested:1379 +((b, )) = foo +!!! test_masgn_splat:1279 +@foo, @@bar = *foo +!!! test_masgn_splat:1288 +a, b = *foo, bar +!!! test_masgn_splat:1296 +a, *b = bar +!!! test_masgn_splat:1302 +a, *b, c = bar +!!! test_masgn_splat:1313 +a, * = bar +!!! test_masgn_splat:1319 +a, *, c = bar +!!! test_masgn_splat:1330 +*b = bar +!!! test_masgn_splat:1336 +*b, c = bar +!!! test_masgn_splat:1346 +* = bar +!!! test_masgn_splat:1352 +*, c, d = bar +!!! test_method_definition_in_while_cond:6816 +while def foo; tap do end; end; break; end +!!! test_method_definition_in_while_cond:6828 +while def self.foo; tap do end; end; break; end +!!! test_method_definition_in_while_cond:6841 +while def foo a = tap do end; end; break; end +!!! test_method_definition_in_while_cond:6854 +while def self.foo a = tap do end; end; break; end +!!! test_module:1789 +module Foo; end +!!! test_multiple_pattern_matches:11086 +{a: 0} => a: +{a: 0} => a: +!!! test_multiple_pattern_matches:11102 +{a: 0} in a: +{a: 0} in a: +!!! test_newline_in_hash_argument:11035 +obj.set foo: +1 +!!! test_newline_in_hash_argument:11046 +obj.set "foo": +1 +!!! test_newline_in_hash_argument:11057 +case foo +in a: +0 +true +in "b": +0 +true +end +!!! test_next:5131 +next(foo) +!!! test_next:5145 +next foo +!!! test_next:5151 +next() +!!! test_next:5158 +next +!!! test_next_block:5166 +next fun foo do end +!!! test_nil:66 +nil +!!! test_nil_expression:73 +() +!!! test_nil_expression:80 +begin end +!!! test_non_lvar_injecting_match:3793 +/#{1}(?bar)/ =~ 'bar' +!!! test_not:3462 +not foo +!!! test_not:3468 +not(foo) +!!! test_not:3474 +not() +!!! test_not_cmd:3488 +not m foo +!!! test_not_masgn__24:4672 +!(a, b = foo) +!!! test_nth_ref:1002 +$10 +!!! test_numbered_args_after_27:7358 +m { _1 + _9 } +!!! test_numbered_args_after_27:7373 +m do _1 + _9 end +!!! test_numbered_args_after_27:7390 +-> { _1 + _9} +!!! test_numbered_args_after_27:7405 +-> do _1 + _9 end +!!! test_numparam_outside_block:7512 +class A; _1; end +!!! test_numparam_outside_block:7520 +module A; _1; end +!!! test_numparam_outside_block:7528 +class << foo; _1; end +!!! test_numparam_outside_block:7536 +def self.m; _1; end +!!! test_numparam_outside_block:7545 +_1 +!!! test_op_asgn:1606 +foo.a += 1 +!!! test_op_asgn:1616 +foo::a += 1 +!!! test_op_asgn:1622 +foo.A += 1 +!!! test_op_asgn_cmd:1630 +foo.a += m foo +!!! test_op_asgn_cmd:1636 +foo::a += m foo +!!! test_op_asgn_cmd:1642 +foo.A += m foo +!!! test_op_asgn_cmd:1654 +foo::A += m foo +!!! test_op_asgn_index:1664 +foo[0, 1] += 2 +!!! test_op_asgn_index_cmd:1678 +foo[0, 1] += m foo +!!! test_optarg:2074 +def f foo = 1; end +!!! test_optarg:2084 +def f(foo=1, bar=2); end +!!! test_or:4461 +foo or bar +!!! test_or:4467 +foo || bar +!!! test_or_asgn:1724 +foo.a ||= 1 +!!! test_or_asgn:1734 +foo[0, 1] ||= 2 +!!! test_parser_bug_272:6528 +a @b do |c|;end +!!! test_parser_bug_490:7151 +def m; class << self; class C; end; end; end +!!! test_parser_bug_490:7162 +def m; class << self; module M; end; end; end +!!! test_parser_bug_490:7173 +def m; class << self; A = nil; end; end +!!! test_parser_bug_507:7265 +m = -> *args do end +!!! test_parser_bug_518:7277 +class A < B +end +!!! test_parser_bug_525:7287 +m1 :k => m2 do; m3() do end; end +!!! test_parser_bug_604:7737 +m a + b do end +!!! test_parser_bug_640:443 +<<~FOO + baz\ + qux +FOO +!!! test_parser_bug_645:9774 +-> (arg={}) {} +!!! test_parser_bug_830:10630 +/\(/ +!!! test_parser_drops_truncated_parts_of_squiggly_heredoc:10446 +<<~HERE + #{} +HERE +!!! test_pattern_matching__FILE__LINE_literals:9473 + case [__FILE__, __LINE__ + 1, __ENCODING__] + in [__FILE__, __LINE__, __ENCODING__] + end +!!! test_pattern_matching_blank_else:9390 +case 1; in 2; 3; else; end +!!! test_pattern_matching_else:9376 +case 1; in 2; 3; else; 4; end +!!! test_pattern_matching_single_line:9540 +1 => [a]; a +!!! test_pattern_matching_single_line:9552 +1 in [a]; a +!!! test_pattern_matching_single_line_allowed_omission_of_parentheses:9566 +[1, 2] => a, b; a +!!! test_pattern_matching_single_line_allowed_omission_of_parentheses:9581 +{a: 1} => a:; a +!!! test_pattern_matching_single_line_allowed_omission_of_parentheses:9596 +[1, 2] in a, b; a +!!! test_pattern_matching_single_line_allowed_omission_of_parentheses:9611 +{a: 1} in a:; a +!!! test_pattern_matching_single_line_allowed_omission_of_parentheses:9626 +{key: :value} in key: value; value +!!! test_pattern_matching_single_line_allowed_omission_of_parentheses:9643 +{key: :value} => key: value; value +!!! test_postexe:5486 +END { 1 } +!!! test_preexe:5467 +BEGIN { 1 } +!!! test_procarg0:2803 +m { |foo| } +!!! test_procarg0:2812 +m { |(foo, bar)| } +!!! test_range_endless:869 +1.. +!!! test_range_endless:877 +1... +!!! test_range_exclusive:861 +1...2 +!!! test_range_inclusive:853 +1..2 +!!! test_rational:142 +42r +!!! test_rational:148 +42.1r +!!! test_redo:5178 +redo +!!! test_regex_interp:551 +/foo#{bar}baz/ +!!! test_regex_plain:541 +/source/im +!!! test_resbody_list:5398 +begin; meth; rescue Exception; bar; end +!!! test_resbody_list_mrhs:5411 +begin; meth; rescue Exception, foo; bar; end +!!! test_resbody_list_var:5444 +begin; meth; rescue foo => ex; bar; end +!!! test_resbody_var:5426 +begin; meth; rescue => ex; bar; end +!!! test_resbody_var:5434 +begin; meth; rescue => @ex; bar; end +!!! test_rescue:5188 +begin; meth; rescue; foo; end +!!! test_rescue_else:5203 +begin; meth; rescue; foo; else; bar; end +!!! test_rescue_else_ensure:5302 +begin; meth; rescue; baz; else foo; ensure; bar end +!!! test_rescue_ensure:5286 +begin; meth; rescue; baz; ensure; bar; end +!!! test_rescue_in_lambda_block:6928 +-> do rescue; end +!!! test_rescue_mod:5319 +meth rescue bar +!!! test_rescue_mod_asgn:5331 +foo = meth rescue bar +!!! test_rescue_mod_masgn:5345 +foo, bar = meth rescue [1, 2] +!!! test_rescue_mod_op_assign:5365 +foo += meth rescue bar +!!! test_rescue_without_begin_end:5381 +meth do; foo; rescue; bar; end +!!! test_restarg_named:2094 +def f(*foo); end +!!! test_restarg_unnamed:2104 +def f(*); end +!!! test_retry:5457 +retry +!!! test_return:5084 +return(foo) +!!! test_return:5098 +return foo +!!! test_return:5104 +return() +!!! test_return:5111 +return +!!! test_return_block:5119 +return fun foo do end +!!! test_ruby_bug_10279:5905 +{a: if true then 42 end} +!!! test_ruby_bug_10653:5915 +true ? 1.tap do |n| p n end : 0 +!!! test_ruby_bug_10653:5945 +false ? raise {} : tap {} +!!! test_ruby_bug_10653:5958 +false ? raise do end : tap do end +!!! test_ruby_bug_11107:5973 +p ->() do a() do end end +!!! test_ruby_bug_11380:5985 +p -> { :hello }, a: 1 do end +!!! test_ruby_bug_11873:6353 +a b{c d}, "x" do end +!!! test_ruby_bug_11873:6367 +a b(c d), "x" do end +!!! test_ruby_bug_11873:6380 +a b{c(d)}, "x" do end +!!! test_ruby_bug_11873:6394 +a b(c(d)), "x" do end +!!! test_ruby_bug_11873:6407 +a b{c d}, /x/ do end +!!! test_ruby_bug_11873:6421 +a b(c d), /x/ do end +!!! test_ruby_bug_11873:6434 +a b{c(d)}, /x/ do end +!!! test_ruby_bug_11873:6448 +a b(c(d)), /x/ do end +!!! test_ruby_bug_11873:6461 +a b{c d}, /x/m do end +!!! test_ruby_bug_11873:6475 +a b(c d), /x/m do end +!!! test_ruby_bug_11873:6488 +a b{c(d)}, /x/m do end +!!! test_ruby_bug_11873:6502 +a b(c(d)), /x/m do end +!!! test_ruby_bug_11873_b:6050 +p p{p(p);p p}, tap do end +!!! test_ruby_bug_11989:6069 +p <<~"E" + x\n y +E +!!! test_ruby_bug_11990:6078 +p <<~E " y" + x +E +!!! test_ruby_bug_12073:6089 +a = 1; a b: 1 +!!! test_ruby_bug_12073:6102 +def foo raise; raise A::B, ''; end +!!! test_ruby_bug_12402:6116 +foo = raise(bar) rescue nil +!!! test_ruby_bug_12402:6127 +foo += raise(bar) rescue nil +!!! test_ruby_bug_12402:6139 +foo[0] += raise(bar) rescue nil +!!! test_ruby_bug_12402:6153 +foo.m += raise(bar) rescue nil +!!! test_ruby_bug_12402:6166 +foo::m += raise(bar) rescue nil +!!! test_ruby_bug_12402:6179 +foo.C += raise(bar) rescue nil +!!! test_ruby_bug_12402:6192 +foo::C ||= raise(bar) rescue nil +!!! test_ruby_bug_12402:6205 +foo = raise bar rescue nil +!!! test_ruby_bug_12402:6216 +foo += raise bar rescue nil +!!! test_ruby_bug_12402:6228 +foo[0] += raise bar rescue nil +!!! test_ruby_bug_12402:6242 +foo.m += raise bar rescue nil +!!! test_ruby_bug_12402:6255 +foo::m += raise bar rescue nil +!!! test_ruby_bug_12402:6268 +foo.C += raise bar rescue nil +!!! test_ruby_bug_12402:6281 +foo::C ||= raise bar rescue nil +!!! test_ruby_bug_12669:6296 +a = b = raise :x +!!! test_ruby_bug_12669:6305 +a += b = raise :x +!!! test_ruby_bug_12669:6314 +a = b += raise :x +!!! test_ruby_bug_12669:6323 +a += b += raise :x +!!! test_ruby_bug_12686:6334 +f (g rescue nil) +!!! test_ruby_bug_13547:7018 +meth[] {} +!!! test_ruby_bug_14690:7250 +let () { m(a) do; end } +!!! test_ruby_bug_15789:7622 +m ->(a = ->{_1}) {a} +!!! test_ruby_bug_15789:7636 +m ->(a: ->{_1}) {a} +!!! test_ruby_bug_9669:5889 +def a b: +return +end +!!! test_ruby_bug_9669:5895 +o = { +a: +1 +} +!!! test_sclass:1884 +class << foo; nil; end +!!! test_self:952 +self +!!! test_send_attr_asgn:3528 +foo.a = 1 +!!! test_send_attr_asgn:3536 +foo::a = 1 +!!! test_send_attr_asgn:3544 +foo.A = 1 +!!! test_send_attr_asgn:3552 +foo::A = 1 +!!! test_send_attr_asgn_conditional:3751 +a&.b = 1 +!!! test_send_binary_op:3308 +foo + 1 +!!! test_send_binary_op:3314 +foo - 1 +!!! test_send_binary_op:3318 +foo * 1 +!!! test_send_binary_op:3322 +foo / 1 +!!! test_send_binary_op:3326 +foo % 1 +!!! test_send_binary_op:3330 +foo ** 1 +!!! test_send_binary_op:3334 +foo | 1 +!!! test_send_binary_op:3338 +foo ^ 1 +!!! test_send_binary_op:3342 +foo & 1 +!!! test_send_binary_op:3346 +foo <=> 1 +!!! test_send_binary_op:3350 +foo < 1 +!!! test_send_binary_op:3354 +foo <= 1 +!!! test_send_binary_op:3358 +foo > 1 +!!! test_send_binary_op:3362 +foo >= 1 +!!! test_send_binary_op:3366 +foo == 1 +!!! test_send_binary_op:3376 +foo != 1 +!!! test_send_binary_op:3382 +foo === 1 +!!! test_send_binary_op:3386 +foo =~ 1 +!!! test_send_binary_op:3396 +foo !~ 1 +!!! test_send_binary_op:3402 +foo << 1 +!!! test_send_binary_op:3406 +foo >> 1 +!!! test_send_block_chain_cmd:3201 +meth 1 do end.fun bar +!!! test_send_block_chain_cmd:3212 +meth 1 do end.fun(bar) +!!! test_send_block_chain_cmd:3225 +meth 1 do end::fun bar +!!! test_send_block_chain_cmd:3236 +meth 1 do end::fun(bar) +!!! test_send_block_chain_cmd:3249 +meth 1 do end.fun bar do end +!!! test_send_block_chain_cmd:3261 +meth 1 do end.fun(bar) {} +!!! test_send_block_chain_cmd:3273 +meth 1 do end.fun {} +!!! test_send_block_conditional:3759 +foo&.bar {} +!!! test_send_call:3721 +foo.(1) +!!! test_send_call:3731 +foo::(1) +!!! test_send_conditional:3743 +a&.b +!!! test_send_index:3562 +foo[1, 2] +!!! test_send_index_asgn:3591 +foo[1, 2] = 3 +!!! test_send_index_asgn_legacy:3603 +foo[1, 2] = 3 +!!! test_send_index_cmd:3584 +foo[m bar] +!!! test_send_index_legacy:3573 +foo[1, 2] +!!! test_send_lambda:3615 +->{ } +!!! test_send_lambda:3625 +-> * { } +!!! test_send_lambda:3636 +-> do end +!!! test_send_lambda_args:3648 +->(a) { } +!!! test_send_lambda_args:3662 +-> (a) { } +!!! test_send_lambda_args_noparen:3686 +-> a: 1 { } +!!! test_send_lambda_args_noparen:3695 +-> a: { } +!!! test_send_lambda_args_shadow:3673 +->(a; foo, bar) { } +!!! test_send_lambda_legacy:3707 +->{ } +!!! test_send_op_asgn_conditional:3770 +a&.b &&= 1 +!!! test_send_plain:3105 +foo.fun +!!! test_send_plain:3112 +foo::fun +!!! test_send_plain:3119 +foo::Fun() +!!! test_send_plain_cmd:3128 +foo.fun bar +!!! test_send_plain_cmd:3135 +foo::fun bar +!!! test_send_plain_cmd:3142 +foo::Fun bar +!!! test_send_self:3044 +fun +!!! test_send_self:3050 +fun! +!!! test_send_self:3056 +fun(1) +!!! test_send_self_block:3066 +fun { } +!!! test_send_self_block:3070 +fun() { } +!!! test_send_self_block:3074 +fun(1) { } +!!! test_send_self_block:3078 +fun do end +!!! test_send_unary_op:3412 +-foo +!!! test_send_unary_op:3418 ++foo +!!! test_send_unary_op:3422 +~foo +!!! test_slash_newline_in_heredocs:7186 +<<~E + 1 \ + 2 + 3 +E +!!! test_slash_newline_in_heredocs:7194 +<<-E + 1 \ + 2 + 3 +E +!!! test_space_args_arg:4132 +fun (1) +!!! test_space_args_arg_block:4146 +fun (1) {} +!!! test_space_args_arg_block:4160 +foo.fun (1) {} +!!! test_space_args_arg_block:4176 +foo::fun (1) {} +!!! test_space_args_arg_call:4198 +fun (1).to_i +!!! test_space_args_arg_newline:4138 +fun (1 +) +!!! test_space_args_block:4430 +fun () {} +!!! test_space_args_cmd:4125 +fun (f bar) +!!! test_string___FILE__:241 +__FILE__ +!!! test_string_concat:226 +"foo#@a" "bar" +!!! test_string_dvar:215 +"#@a #@@a #$a" +!!! test_string_interp:200 +"foo#{bar}baz" +!!! test_string_plain:184 +'foobar' +!!! test_string_plain:191 +%q(foobar) +!!! test_super:3807 +super(foo) +!!! test_super:3815 +super foo +!!! test_super:3821 +super() +!!! test_super_block:3839 +super foo, bar do end +!!! test_super_block:3845 +super do end +!!! test_symbol_interp:484 +:"foo#{bar}baz" +!!! test_symbol_plain:469 +:foo +!!! test_symbol_plain:475 +:'foo' +!!! test_ternary:4605 +foo ? 1 : 2 +!!! test_ternary_ambiguous_symbol:4614 +t=1;(foo)?t:T +!!! test_trailing_forward_arg:8022 +def foo(a, b, ...); bar(a, 42, ...); end +!!! test_true:89 +true +!!! test_unary_num_pow_precedence:3505 ++2.0 ** 10 +!!! test_unary_num_pow_precedence:3512 +-2 ** 10 +!!! test_unary_num_pow_precedence:3519 +-2.0 ** 10 +!!! test_undef:2003 +undef foo, :bar, :"foo#{1}" +!!! test_unless:4529 +unless foo then bar; end +!!! test_unless:4537 +unless foo; bar; end +!!! test_unless_else:4573 +unless foo then bar; else baz; end +!!! test_unless_else:4582 +unless foo; bar; else baz; end +!!! test_unless_mod:4546 +bar unless foo +!!! test_until:4948 +until foo do meth end +!!! test_until:4955 +until foo; meth end +!!! test_until_mod:4963 +meth until foo +!!! test_until_post:4978 +begin meth end until foo +!!! test_var_and_asgn:1714 +a &&= 1 +!!! test_var_op_asgn:1498 +a += 1 +!!! test_var_op_asgn:1504 +@a |= 1 +!!! test_var_op_asgn:1510 +@@var |= 10 +!!! test_var_op_asgn:1514 +def a; @@var |= 10; end +!!! test_var_op_asgn_cmd:1521 +foo += m foo +!!! test_var_or_asgn:1706 +a ||= 1 +!!! test_when_multi:4895 +case foo; when 'bar', 'baz'; bar; end +!!! test_when_splat:4904 +case foo; when 1, *baz; bar; when *foo; end +!!! test_when_then:4883 +case foo; when 'bar' then bar; end +!!! test_while:4924 +while foo do meth end +!!! test_while:4932 +while foo; meth end +!!! test_while_mod:4941 +meth while foo +!!! test_while_post:4970 +begin meth end while foo +!!! test_xstring_interp:524 +`foo#{bar}baz` +!!! test_xstring_plain:515 +`foobar` +!!! test_yield:3855 +yield(foo) +!!! test_yield:3863 +yield foo +!!! test_yield:3869 +yield() +!!! test_yield:3877 +yield +!!! test_zsuper:3831 +super diff --git a/test/translation/parser_test.rb b/test/translation/parser_test.rb new file mode 100644 index 00000000..1df98f47 --- /dev/null +++ b/test/translation/parser_test.rb @@ -0,0 +1,167 @@ +# frozen_string_literal: true + +require_relative "../test_helper" +require "parser/current" + +Parser::Builders::Default.modernize + +module SyntaxTree + module Translation + class ParserTest < Minitest::Test + known_failures = [ + # I think this may be a bug in the parser gem's precedence calculation. + # Unary plus appears to be parsed as part of the number literal in + # CRuby, but parser is parsing it as a separate operator. + "test_unary_num_pow_precedence:3505", + + # Not much to be done about this. Basically, regular expressions with + # named capture groups that use the =~ operator inject local variables + # into the current scope. In the parser gem, it detects this and changes + # future references to that name to be a local variable instead of a + # potential method call. CRuby does not do this. + "test_lvar_injecting_match:3778", + + # This is failing because CRuby is not marking values captured in hash + # patterns as local variables, while the parser gem is. + "test_pattern_matching_hash:8971", + + # This is not actually allowed in the CRuby parser but the parser gem + # thinks it is allowed. + "test_pattern_matching_hash_with_string_keys:9016", + "test_pattern_matching_hash_with_string_keys:9027", + "test_pattern_matching_hash_with_string_keys:9038", + "test_pattern_matching_hash_with_string_keys:9060", + "test_pattern_matching_hash_with_string_keys:9071", + "test_pattern_matching_hash_with_string_keys:9082", + + # This happens with pattern matching where you're matching a literal + # value inside parentheses, which doesn't really do anything. Ripper + # doesn't capture that this value is inside a parentheses, so it's hard + # to translate properly. + "test_pattern_matching_expr_in_paren:9206", + + # These are also failing because of CRuby not marking values captured in + # hash patterns as local variables. + "test_pattern_matching_single_line_allowed_omission_of_parentheses:*", + + # I'm not even sure what this is testing, because the code is invalid in + # CRuby. + "test_control_meta_escape_chars_in_regexp__since_31:*", + ] + + todo_failures = [ + "test_dedenting_heredoc:334", + "test_dedenting_heredoc:390", + "test_dedenting_heredoc:399", + "test_slash_newline_in_heredocs:7194", + "test_parser_slash_slash_n_escaping_in_literals:*", + "test_forwarded_restarg:*", + "test_forwarded_kwrestarg:*", + "test_forwarded_argument_with_restarg:*", + "test_forwarded_argument_with_kwrestarg:*" + ] + + current_version = RUBY_VERSION.split(".")[0..1].join(".") + + if current_version <= "2.7" + # I'm not sure why this is failing on 2.7.0, but we'll turn it off for + # now until we have more time to investigate. + todo_failures.push( + "test_pattern_matching_hash:*", + "test_pattern_matching_single_line:9552" + ) + end + + if current_version <= "3.0" + # In < 3.0, there are some changes to the way the parser gem handles + # forwarded args. We should eventually support this, but for now we're + # going to mark them as todo. + todo_failures.push( + "test_forward_arg:*", + "test_forward_args_legacy:*", + "test_endless_method_forwarded_args_legacy:*", + "test_trailing_forward_arg:*", + "test_forward_arg_with_open_args:10770", + ) + end + + if current_version == "3.1" + # This test actually fails on 3.1.0, even though it's marked as being + # since 3.1. So we're going to skip this test on 3.1, but leave it in + # for other versions. + known_failures.push( + "test_multiple_pattern_matches:11086", + "test_multiple_pattern_matches:11102" + ) + end + + if current_version < "3.2" || RUBY_ENGINE == "truffleruby" + known_failures.push( + "test_if_while_after_class__since_32:11004", + "test_if_while_after_class__since_32:11014", + "test_newline_in_hash_argument:11057" + ) + end + + all_failures = known_failures + todo_failures + + File + .foreach(File.expand_path("parser.txt", __dir__), chomp: true) + .slice_before { |line| line.start_with?("!!!") } + .each do |(prefix, *lines)| + name = prefix[4..] + next if all_failures.any? { |pattern| File.fnmatch?(pattern, name) } + + define_method(name) { assert_parses("#{lines.join("\n")}\n") } + end + + private + + def assert_parses(source) + parser = ::Parser::CurrentRuby.default_parser + parser.diagnostics.consumer = ->(*) {} + + buffer = ::Parser::Source::Buffer.new("(string)", 1) + buffer.source = source + + expected = + begin + parser.parse(buffer) + rescue ::Parser::SyntaxError + # We can get a syntax error if we're parsing a fixture that was + # designed for a later Ruby version but we're running an earlier + # Ruby version. In this case we can just return early from the test. + end + + return if expected.nil? + node = SyntaxTree.parse(source) + assert_equal expected, SyntaxTree::Translation.to_parser(node, buffer) + end + end + end +end + +if ENV["PARSER_LOCATION"] + # Modify the source map == check so that it doesn't check against the node + # itself so we don't get into a recursive loop. + Parser::Source::Map.prepend( + Module.new do + def ==(other) + self.class == other.class && + (instance_variables - %i[@node]).map do |ivar| + instance_variable_get(ivar) == other.instance_variable_get(ivar) + end.reduce(:&) + end + end + ) + + # Next, ensure that we're comparing the nodes and also comparing the source + # ranges so that we're getting all of the necessary information. + Parser::AST::Node.prepend( + Module.new do + def ==(other) + super && (location == other.location) + end + end + ) +end diff --git a/test/visitor_test.rb b/test/visitor_test.rb index 74f3df75..d9637df0 100644 --- a/test/visitor_test.rb +++ b/test/visitor_test.rb @@ -30,13 +30,15 @@ def initialize @visited_nodes = [] end - visit_method def visit_class(node) - @visited_nodes << node.constant.constant.value - super - end + visit_methods do + def visit_class(node) + @visited_nodes << node.constant.constant.value + super + end - visit_method def visit_def(node) - @visited_nodes << node.name.value + def visit_def(node) + @visited_nodes << node.name.value + end end end @@ -53,5 +55,19 @@ def test_visit_method_correction assert_match(/visit_binary/, message) end end + + class VisitMethodsTestVisitor < BasicVisitor + end + + def test_visit_methods + VisitMethodsTestVisitor.visit_methods do + assert_raises(BasicVisitor::VisitMethodError) do + # In reality, this would be a method defined using the def keyword, + # but we're using method_added here to trigger the checker so that we + # aren't defining methods dynamically in the test suite. + VisitMethodsTestVisitor.method_added(:visit_foo) + end + end + end end end diff --git a/test/visitor_with_environment_test.rb b/test/visitor_with_environment_test.rb deleted file mode 100644 index cc4007fe..00000000 --- a/test/visitor_with_environment_test.rb +++ /dev/null @@ -1,659 +0,0 @@ -# frozen_string_literal: true - -require_relative "test_helper" - -module SyntaxTree - class VisitorWithEnvironmentTest < Minitest::Test - class Collector < Visitor - include WithEnvironment - - attr_reader :variables, :arguments - - def initialize - @variables = {} - @arguments = {} - end - - def visit_ident(node) - local = current_environment.find_local(node.value) - return unless local - - value = node.value.delete_suffix(":") - - case local.type - when :argument - @arguments[value] = local - when :variable - @variables[value] = local - end - end - - def visit_label(node) - value = node.value.delete_suffix(":") - local = current_environment.find_local(value) - return unless local - - @arguments[value] = node if local.type == :argument - end - end - - def test_collecting_simple_variables - tree = SyntaxTree.parse(<<~RUBY) - def foo - a = 1 - a - end - RUBY - - visitor = Collector.new - visitor.visit(tree) - - assert_equal(1, visitor.variables.length) - - variable = visitor.variables["a"] - assert_equal(1, variable.definitions.length) - assert_equal(1, variable.usages.length) - - assert_equal(2, variable.definitions[0].start_line) - assert_equal(3, variable.usages[0].start_line) - end - - def test_collecting_aref_variables - tree = SyntaxTree.parse(<<~RUBY) - def foo - a = [] - a[1] - end - RUBY - - visitor = Collector.new - visitor.visit(tree) - - assert_equal(1, visitor.variables.length) - - variable = visitor.variables["a"] - assert_equal(1, variable.definitions.length) - assert_equal(1, variable.usages.length) - - assert_equal(2, variable.definitions[0].start_line) - assert_equal(3, variable.usages[0].start_line) - end - - def test_collecting_multi_assign_variables - tree = SyntaxTree.parse(<<~RUBY) - def foo - a, b = [1, 2] - puts a - puts b - end - RUBY - - visitor = Collector.new - visitor.visit(tree) - - assert_equal(2, visitor.variables.length) - - variable_a = visitor.variables["a"] - assert_equal(1, variable_a.definitions.length) - assert_equal(1, variable_a.usages.length) - - assert_equal(2, variable_a.definitions[0].start_line) - assert_equal(3, variable_a.usages[0].start_line) - - variable_b = visitor.variables["b"] - assert_equal(1, variable_b.definitions.length) - assert_equal(1, variable_b.usages.length) - - assert_equal(2, variable_b.definitions[0].start_line) - assert_equal(4, variable_b.usages[0].start_line) - end - - def test_collecting_pattern_matching_variables - tree = SyntaxTree.parse(<<~RUBY) - def foo - case [1, 2] - in Integer => a, Integer - puts a - end - end - RUBY - - visitor = Collector.new - visitor.visit(tree) - - # There are two occurrences, one on line 3 for pinning and one on line 4 - # for reference - assert_equal(1, visitor.variables.length) - - variable = visitor.variables["a"] - - # Assignment a - assert_equal(3, variable.definitions[0].start_line) - assert_equal(4, variable.usages[0].start_line) - end - - def test_collecting_pinned_variables - tree = SyntaxTree.parse(<<~RUBY) - def foo - a = 18 - case [1, 2] - in ^a, *rest - puts a - puts rest - end - end - RUBY - - visitor = Collector.new - visitor.visit(tree) - - assert_equal(2, visitor.variables.length) - - variable_a = visitor.variables["a"] - assert_equal(2, variable_a.definitions.length) - assert_equal(1, variable_a.usages.length) - - assert_equal(2, variable_a.definitions[0].start_line) - assert_equal(4, variable_a.definitions[1].start_line) - assert_equal(5, variable_a.usages[0].start_line) - - variable_rest = visitor.variables["rest"] - assert_equal(1, variable_rest.definitions.length) - assert_equal(4, variable_rest.definitions[0].start_line) - - # Rest is considered a vcall by the parser instead of a var_ref - # assert_equal(1, variable_rest.usages.length) - # assert_equal(6, variable_rest.usages[0].start_line) - end - - if RUBY_VERSION >= "3.1" - def test_collecting_one_line_pattern_matching_variables - tree = SyntaxTree.parse(<<~RUBY) - def foo - [1] => a - puts a - end - RUBY - - visitor = Collector.new - visitor.visit(tree) - - assert_equal(1, visitor.variables.length) - - variable = visitor.variables["a"] - assert_equal(1, variable.definitions.length) - assert_equal(1, variable.usages.length) - - assert_equal(2, variable.definitions[0].start_line) - assert_equal(3, variable.usages[0].start_line) - end - - def test_collecting_endless_method_arguments - tree = SyntaxTree.parse(<<~RUBY) - def foo(a) = puts a - RUBY - - visitor = Collector.new - visitor.visit(tree) - - assert_equal(1, visitor.arguments.length) - - argument = visitor.arguments["a"] - assert_equal(1, argument.definitions.length) - assert_equal(1, argument.usages.length) - - assert_equal(1, argument.definitions[0].start_line) - assert_equal(1, argument.usages[0].start_line) - end - end - - def test_collecting_method_arguments - tree = SyntaxTree.parse(<<~RUBY) - def foo(a) - puts a - end - RUBY - - visitor = Collector.new - visitor.visit(tree) - - assert_equal(1, visitor.arguments.length) - - argument = visitor.arguments["a"] - assert_equal(1, argument.definitions.length) - assert_equal(1, argument.usages.length) - - assert_equal(1, argument.definitions[0].start_line) - assert_equal(2, argument.usages[0].start_line) - end - - def test_collecting_singleton_method_arguments - tree = SyntaxTree.parse(<<~RUBY) - def self.foo(a) - puts a - end - RUBY - - visitor = Collector.new - visitor.visit(tree) - - assert_equal(1, visitor.arguments.length) - - argument = visitor.arguments["a"] - assert_equal(1, argument.definitions.length) - assert_equal(1, argument.usages.length) - - assert_equal(1, argument.definitions[0].start_line) - assert_equal(2, argument.usages[0].start_line) - end - - def test_collecting_method_arguments_all_types - tree = SyntaxTree.parse(<<~RUBY) - def foo(a, b = 1, *c, d, e: 1, **f, &block) - puts a - puts b - puts c - puts d - puts e - puts f - block.call - end - RUBY - - visitor = Collector.new - visitor.visit(tree) - - assert_equal(7, visitor.arguments.length) - - argument_a = visitor.arguments["a"] - assert_equal(1, argument_a.definitions.length) - assert_equal(1, argument_a.usages.length) - assert_equal(1, argument_a.definitions[0].start_line) - assert_equal(2, argument_a.usages[0].start_line) - - argument_b = visitor.arguments["b"] - assert_equal(1, argument_b.definitions.length) - assert_equal(1, argument_b.usages.length) - assert_equal(1, argument_b.definitions[0].start_line) - assert_equal(3, argument_b.usages[0].start_line) - - argument_c = visitor.arguments["c"] - assert_equal(1, argument_c.definitions.length) - assert_equal(1, argument_c.usages.length) - assert_equal(1, argument_c.definitions[0].start_line) - assert_equal(4, argument_c.usages[0].start_line) - - argument_d = visitor.arguments["d"] - assert_equal(1, argument_d.definitions.length) - assert_equal(1, argument_d.usages.length) - assert_equal(1, argument_d.definitions[0].start_line) - assert_equal(5, argument_d.usages[0].start_line) - - argument_e = visitor.arguments["e"] - assert_equal(1, argument_e.definitions.length) - assert_equal(1, argument_e.usages.length) - assert_equal(1, argument_e.definitions[0].start_line) - assert_equal(6, argument_e.usages[0].start_line) - - argument_f = visitor.arguments["f"] - assert_equal(1, argument_f.definitions.length) - assert_equal(1, argument_f.usages.length) - assert_equal(1, argument_f.definitions[0].start_line) - assert_equal(7, argument_f.usages[0].start_line) - - argument_block = visitor.arguments["block"] - assert_equal(1, argument_block.definitions.length) - assert_equal(1, argument_block.usages.length) - assert_equal(1, argument_block.definitions[0].start_line) - assert_equal(8, argument_block.usages[0].start_line) - end - - def test_collecting_block_arguments - tree = SyntaxTree.parse(<<~RUBY) - def foo - [].each do |i| - puts i - end - end - RUBY - - visitor = Collector.new - visitor.visit(tree) - - assert_equal(1, visitor.arguments.length) - - argument = visitor.arguments["i"] - assert_equal(1, argument.definitions.length) - assert_equal(1, argument.usages.length) - assert_equal(2, argument.definitions[0].start_line) - assert_equal(3, argument.usages[0].start_line) - end - - def test_collecting_one_line_block_arguments - tree = SyntaxTree.parse(<<~RUBY) - def foo - [].each { |i| puts i } - end - RUBY - - visitor = Collector.new - visitor.visit(tree) - - assert_equal(1, visitor.arguments.length) - - argument = visitor.arguments["i"] - assert_equal(1, argument.definitions.length) - assert_equal(1, argument.usages.length) - assert_equal(2, argument.definitions[0].start_line) - assert_equal(2, argument.usages[0].start_line) - end - - def test_collecting_shadowed_block_arguments - tree = SyntaxTree.parse(<<~RUBY) - def foo - i = "something" - - [].each do |i| - puts i - end - - i - end - RUBY - - visitor = Collector.new - visitor.visit(tree) - - assert_equal(1, visitor.arguments.length) - assert_equal(1, visitor.variables.length) - - argument = visitor.arguments["i"] - assert_equal(1, argument.definitions.length) - assert_equal(1, argument.usages.length) - assert_equal(4, argument.definitions[0].start_line) - assert_equal(5, argument.usages[0].start_line) - - variable = visitor.variables["i"] - assert_equal(1, variable.definitions.length) - assert_equal(1, variable.usages.length) - assert_equal(2, variable.definitions[0].start_line) - assert_equal(8, variable.usages[0].start_line) - end - - def test_collecting_shadowed_local_variables - tree = SyntaxTree.parse(<<~RUBY) - def foo(a) - puts a - a = 123 - a - end - RUBY - - visitor = Collector.new - visitor.visit(tree) - - # All occurrences are considered arguments, despite overriding the - # argument value - assert_equal(1, visitor.arguments.length) - assert_equal(0, visitor.variables.length) - - argument = visitor.arguments["a"] - assert_equal(2, argument.definitions.length) - assert_equal(2, argument.usages.length) - - assert_equal(1, argument.definitions[0].start_line) - assert_equal(3, argument.definitions[1].start_line) - assert_equal(2, argument.usages[0].start_line) - assert_equal(4, argument.usages[1].start_line) - end - - def test_variables_in_the_top_level - tree = SyntaxTree.parse(<<~RUBY) - a = 123 - a - RUBY - - visitor = Collector.new - visitor.visit(tree) - - assert_equal(0, visitor.arguments.length) - assert_equal(1, visitor.variables.length) - - variable = visitor.variables["a"] - assert_equal(1, variable.definitions.length) - assert_equal(1, variable.usages.length) - - assert_equal(1, variable.definitions[0].start_line) - assert_equal(2, variable.usages[0].start_line) - end - - def test_aref_field - tree = SyntaxTree.parse(<<~RUBY) - object = {} - object["name"] = "something" - RUBY - - visitor = Collector.new - visitor.visit(tree) - - assert_equal(0, visitor.arguments.length) - assert_equal(1, visitor.variables.length) - - variable = visitor.variables["object"] - assert_equal(1, variable.definitions.length) - assert_equal(1, variable.usages.length) - - assert_equal(1, variable.definitions[0].start_line) - assert_equal(2, variable.usages[0].start_line) - end - - def test_aref_on_a_method_call - tree = SyntaxTree.parse(<<~RUBY) - object = MyObject.new - object.attributes["name"] = "something" - RUBY - - visitor = Collector.new - visitor.visit(tree) - - assert_equal(0, visitor.arguments.length) - assert_equal(1, visitor.variables.length) - - variable = visitor.variables["object"] - assert_equal(1, variable.definitions.length) - assert_equal(1, variable.usages.length) - - assert_equal(1, variable.definitions[0].start_line) - assert_equal(2, variable.usages[0].start_line) - end - - def test_aref_with_two_accesses - tree = SyntaxTree.parse(<<~RUBY) - object = MyObject.new - object["first"]["second"] ||= [] - RUBY - - visitor = Collector.new - visitor.visit(tree) - - assert_equal(0, visitor.arguments.length) - assert_equal(1, visitor.variables.length) - - variable = visitor.variables["object"] - assert_equal(1, variable.definitions.length) - assert_equal(1, variable.usages.length) - - assert_equal(1, variable.definitions[0].start_line) - assert_equal(2, variable.usages[0].start_line) - end - - def test_aref_on_a_method_call_with_arguments - tree = SyntaxTree.parse(<<~RUBY) - object = MyObject.new - object.instance_variable_get(:@attributes)[:something] = :other_thing - RUBY - - visitor = Collector.new - visitor.visit(tree) - - assert_equal(0, visitor.arguments.length) - assert_equal(1, visitor.variables.length) - - variable = visitor.variables["object"] - assert_equal(1, variable.definitions.length) - assert_equal(1, variable.usages.length) - - assert_equal(1, variable.definitions[0].start_line) - assert_equal(2, variable.usages[0].start_line) - end - - def test_double_aref_on_method_call - tree = SyntaxTree.parse(<<~RUBY) - object = MyObject.new - object["attributes"].find { |a| a["field"] == "expected" }["value"] = "changed" - RUBY - - visitor = Collector.new - visitor.visit(tree) - - assert_equal(1, visitor.arguments.length) - assert_equal(1, visitor.variables.length) - - variable = visitor.variables["object"] - assert_equal(1, variable.definitions.length) - assert_equal(1, variable.usages.length) - - assert_equal(1, variable.definitions[0].start_line) - assert_equal(2, variable.usages[0].start_line) - - argument = visitor.arguments["a"] - assert_equal(1, argument.definitions.length) - assert_equal(1, argument.usages.length) - - assert_equal(2, argument.definitions[0].start_line) - assert_equal(2, argument.usages[0].start_line) - end - - def test_nested_arguments - tree = SyntaxTree.parse(<<~RUBY) - [[1, [2, 3]]].each do |one, (two, three)| - one - two - three - end - RUBY - - visitor = Collector.new - visitor.visit(tree) - - assert_equal(3, visitor.arguments.length) - assert_equal(0, visitor.variables.length) - - argument = visitor.arguments["one"] - assert_equal(1, argument.definitions.length) - assert_equal(1, argument.usages.length) - - assert_equal(1, argument.definitions[0].start_line) - assert_equal(2, argument.usages[0].start_line) - - argument = visitor.arguments["two"] - assert_equal(1, argument.definitions.length) - assert_equal(1, argument.usages.length) - - assert_equal(1, argument.definitions[0].start_line) - assert_equal(3, argument.usages[0].start_line) - - argument = visitor.arguments["three"] - assert_equal(1, argument.definitions.length) - assert_equal(1, argument.usages.length) - - assert_equal(1, argument.definitions[0].start_line) - assert_equal(4, argument.usages[0].start_line) - end - - def test_double_nested_arguments - tree = SyntaxTree.parse(<<~RUBY) - [[1, [2, 3]]].each do |one, (two, (three, four))| - one - two - three - four - end - RUBY - - visitor = Collector.new - visitor.visit(tree) - - assert_equal(4, visitor.arguments.length) - assert_equal(0, visitor.variables.length) - - argument = visitor.arguments["one"] - assert_equal(1, argument.definitions.length) - assert_equal(1, argument.usages.length) - - assert_equal(1, argument.definitions[0].start_line) - assert_equal(2, argument.usages[0].start_line) - - argument = visitor.arguments["two"] - assert_equal(1, argument.definitions.length) - assert_equal(1, argument.usages.length) - - assert_equal(1, argument.definitions[0].start_line) - assert_equal(3, argument.usages[0].start_line) - - argument = visitor.arguments["three"] - assert_equal(1, argument.definitions.length) - assert_equal(1, argument.usages.length) - - assert_equal(1, argument.definitions[0].start_line) - assert_equal(4, argument.usages[0].start_line) - - argument = visitor.arguments["four"] - assert_equal(1, argument.definitions.length) - assert_equal(1, argument.usages.length) - - assert_equal(1, argument.definitions[0].start_line) - assert_equal(5, argument.usages[0].start_line) - end - - class Resolver < Visitor - include WithEnvironment - - attr_reader :locals - - def initialize - @locals = [] - end - - def visit_assign(node) - level = 0 - environment = current_environment - level += 1 until (environment = environment.parent).nil? - - locals << [node.target.value.value, level] - super - end - end - - def test_class - source = <<~RUBY - module Level0 - level0 = 0 - - module Level1 - level1 = 1 - - class Level2 - level2 = 2 - end - end - end - RUBY - - visitor = Resolver.new - SyntaxTree.parse(source).accept(visitor) - - assert_equal [["level0", 0], ["level1", 1], ["level2", 2]], visitor.locals - end - end -end diff --git a/test/with_scope_test.rb b/test/with_scope_test.rb new file mode 100644 index 00000000..6b48d17d --- /dev/null +++ b/test/with_scope_test.rb @@ -0,0 +1,567 @@ +# frozen_string_literal: true + +require_relative "test_helper" + +module SyntaxTree + class WithScopeTest < Minitest::Test + class Collector < Visitor + prepend WithScope + + attr_reader :arguments, :variables + + def initialize + @arguments = {} + @variables = {} + end + + def self.collect(source) + new.tap { SyntaxTree.parse(source).accept(_1) } + end + + visit_methods do + def visit_ident(node) + value = node.value.delete_suffix(":") + local = current_scope.find_local(node.value) + + case local&.type + when :argument + arguments[[current_scope.id, value]] = local + when :variable + variables[[current_scope.id, value]] = local + end + end + + def visit_label(node) + value = node.value.delete_suffix(":") + local = current_scope.find_local(value) + + if local&.type == :argument + arguments[[current_scope.id, value]] = node + end + end + + def visit_vcall(node) + local = current_scope.find_local(node.value) + variables[[current_scope.id, value]] = local if local + + super + end + end + end + + def test_collecting_simple_variables + collector = Collector.collect(<<~RUBY) + def foo + a = 1 + a + end + RUBY + + assert_equal(1, collector.variables.length) + assert_variable(collector, "a", definitions: [2], usages: [3]) + end + + def test_collecting_aref_variables + collector = Collector.collect(<<~RUBY) + def foo + a = [] + a[1] + end + RUBY + + assert_equal(1, collector.variables.length) + assert_variable(collector, "a", definitions: [2], usages: [3]) + end + + def test_collecting_multi_assign_variables + collector = Collector.collect(<<~RUBY) + def foo + a, b = [1, 2] + puts a + puts b + end + RUBY + + assert_equal(2, collector.variables.length) + assert_variable(collector, "a", definitions: [2], usages: [3]) + assert_variable(collector, "b", definitions: [2], usages: [4]) + end + + def test_collecting_pattern_matching_variables + collector = Collector.collect(<<~RUBY) + def foo + case [1, 2] + in Integer => a, Integer + puts a + end + end + RUBY + + # There are two occurrences, one on line 3 for pinning and one on line 4 + # for reference + assert_equal(1, collector.variables.length) + assert_variable(collector, "a", definitions: [3], usages: [4]) + end + + def test_collecting_pinned_variables + collector = Collector.collect(<<~RUBY) + def foo + a = 18 + case [1, 2] + in ^a, *rest + puts a + puts rest + end + end + RUBY + + assert_equal(2, collector.variables.length) + assert_variable(collector, "a", definitions: [2], usages: [4, 5]) + assert_variable(collector, "rest", definitions: [4], usages: [6]) + end + + if RUBY_VERSION >= "3.1" + def test_collecting_one_line_pattern_matching_variables + collector = Collector.collect(<<~RUBY) + def foo + [1] => a + puts a + end + RUBY + + assert_equal(1, collector.variables.length) + assert_variable(collector, "a", definitions: [2], usages: [3]) + end + + def test_collecting_endless_method_arguments + collector = Collector.collect(<<~RUBY) + def foo(a) = puts a + RUBY + + assert_equal(1, collector.arguments.length) + assert_argument(collector, "a", definitions: [1], usages: [1]) + end + end + + def test_collecting_method_arguments + collector = Collector.collect(<<~RUBY) + def foo(a) + puts a + end + RUBY + + assert_equal(1, collector.arguments.length) + assert_argument(collector, "a", definitions: [1], usages: [2]) + end + + def test_collecting_methods_with_destructured_post_arguments + collector = Collector.collect(<<~RUBY) + def foo(optional = 1, (bin, bag)) + end + RUBY + + assert_equal(3, collector.arguments.length) + assert_argument(collector, "optional", definitions: [1], usages: []) + assert_argument(collector, "bin", definitions: [1], usages: []) + assert_argument(collector, "bag", definitions: [1], usages: []) + end + + def test_collecting_methods_with_desctructured_post_using_splat + collector = Collector.collect(<<~RUBY) + def foo(optional = 1, (bin, bag, *)) + end + RUBY + + assert_equal(3, collector.arguments.length) + assert_argument(collector, "optional", definitions: [1], usages: []) + assert_argument(collector, "bin", definitions: [1], usages: []) + assert_argument(collector, "bag", definitions: [1], usages: []) + end + + def test_collecting_methods_with_nested_desctructured + collector = Collector.collect(<<~RUBY) + def foo(optional = 1, (bin, (bag))) + end + RUBY + + assert_equal(3, collector.arguments.length) + assert_argument(collector, "optional", definitions: [1], usages: []) + assert_argument(collector, "bin", definitions: [1], usages: []) + assert_argument(collector, "bag", definitions: [1], usages: []) + end + + def test_collecting_singleton_method_arguments + collector = Collector.collect(<<~RUBY) + def self.foo(a) + puts a + end + RUBY + + assert_equal(1, collector.arguments.length) + assert_argument(collector, "a", definitions: [1], usages: [2]) + end + + def test_collecting_method_arguments_all_types + collector = Collector.collect(<<~RUBY) + def foo(a, b = 1, *c, d, e: 1, **f, &block) + puts a + puts b + puts c + puts d + puts e + puts f + block.call + end + RUBY + + assert_equal(7, collector.arguments.length) + assert_argument(collector, "a", definitions: [1], usages: [2]) + assert_argument(collector, "b", definitions: [1], usages: [3]) + assert_argument(collector, "c", definitions: [1], usages: [4]) + assert_argument(collector, "d", definitions: [1], usages: [5]) + assert_argument(collector, "e", definitions: [1], usages: [6]) + assert_argument(collector, "f", definitions: [1], usages: [7]) + assert_argument(collector, "block", definitions: [1], usages: [8]) + end + + def test_collecting_block_arguments + collector = Collector.collect(<<~RUBY) + def foo + [].each do |i| + puts i + end + end + RUBY + + assert_equal(1, collector.arguments.length) + assert_argument(collector, "i", definitions: [2], usages: [3]) + end + + def test_collecting_destructured_block_arguments + collector = Collector.collect(<<~RUBY) + [].each do |(a, *b)| + end + RUBY + + assert_equal(2, collector.arguments.length) + assert_argument(collector, "b", definitions: [1]) + end + + def test_collecting_anonymous_destructured_block_arguments + collector = Collector.collect(<<~RUBY) + [].each do |(a, *)| + end + RUBY + + assert_equal(1, collector.arguments.length) + end + + def test_collecting_one_line_block_arguments + collector = Collector.collect(<<~RUBY) + def foo + [].each { |i| puts i } + end + RUBY + + assert_equal(1, collector.arguments.length) + assert_argument(collector, "i", definitions: [2], usages: [2]) + end + + def test_collecting_shadowed_block_arguments + collector = Collector.collect(<<~RUBY) + def foo + i = "something" + + [].each do |i| + puts i + end + + i + end + RUBY + + assert_equal(1, collector.arguments.length) + assert_argument(collector, "i", definitions: [4], usages: [5]) + + assert_equal(1, collector.variables.length) + assert_variable(collector, "i", definitions: [2], usages: [8]) + end + + def test_collecting_shadowed_local_variables + collector = Collector.collect(<<~RUBY) + def foo(a) + puts a + a = 123 + a + end + RUBY + + # All occurrences are considered arguments, despite overriding the + # argument value + assert_equal(1, collector.arguments.length) + assert_equal(0, collector.variables.length) + assert_argument(collector, "a", definitions: [1, 3], usages: [2, 4]) + end + + def test_variables_in_the_top_level + collector = Collector.collect(<<~RUBY) + a = 123 + a + RUBY + + assert_equal(0, collector.arguments.length) + assert_equal(1, collector.variables.length) + assert_variable(collector, "a", definitions: [1], usages: [2]) + end + + def test_aref_field + collector = Collector.collect(<<~RUBY) + object = {} + object["name"] = "something" + RUBY + + assert_equal(0, collector.arguments.length) + assert_equal(1, collector.variables.length) + assert_variable(collector, "object", definitions: [1], usages: [2]) + end + + def test_aref_on_a_method_call + collector = Collector.collect(<<~RUBY) + object = MyObject.new + object.attributes["name"] = "something" + RUBY + + assert_equal(0, collector.arguments.length) + assert_equal(1, collector.variables.length) + assert_variable(collector, "object", definitions: [1], usages: [2]) + end + + def test_aref_with_two_accesses + collector = Collector.collect(<<~RUBY) + object = MyObject.new + object["first"]["second"] ||= [] + RUBY + + assert_equal(0, collector.arguments.length) + assert_equal(1, collector.variables.length) + assert_variable(collector, "object", definitions: [1], usages: [2]) + end + + def test_aref_on_a_method_call_with_arguments + collector = Collector.collect(<<~RUBY) + object = MyObject.new + object.instance_variable_get(:@attributes)[:something] = :other_thing + RUBY + + assert_equal(0, collector.arguments.length) + assert_equal(1, collector.variables.length) + assert_variable(collector, "object", definitions: [1], usages: [2]) + end + + def test_double_aref_on_method_call + collector = Collector.collect(<<~RUBY) + object = MyObject.new + object["attributes"].find { |a| a["field"] == "expected" }["value"] = "changed" + RUBY + + assert_equal(1, collector.arguments.length) + assert_argument(collector, "a", definitions: [2], usages: [2]) + + assert_equal(1, collector.variables.length) + assert_variable(collector, "object", definitions: [1], usages: [2]) + end + + def test_nested_arguments + collector = Collector.collect(<<~RUBY) + [[1, [2, 3]]].each do |one, (two, three)| + one + two + three + end + RUBY + + assert_equal(3, collector.arguments.length) + assert_equal(0, collector.variables.length) + + assert_argument(collector, "one", definitions: [1], usages: [2]) + assert_argument(collector, "two", definitions: [1], usages: [3]) + assert_argument(collector, "three", definitions: [1], usages: [4]) + end + + def test_double_nested_arguments + collector = Collector.collect(<<~RUBY) + [[1, [2, 3]]].each do |one, (two, (three, four))| + one + two + three + four + end + RUBY + + assert_equal(4, collector.arguments.length) + assert_equal(0, collector.variables.length) + + assert_argument(collector, "one", definitions: [1], usages: [2]) + assert_argument(collector, "two", definitions: [1], usages: [3]) + assert_argument(collector, "three", definitions: [1], usages: [4]) + assert_argument(collector, "four", definitions: [1], usages: [5]) + end + + def test_block_locals + collector = Collector.collect(<<~RUBY) + [].each do |; a| + end + RUBY + + assert_equal(1, collector.variables.length) + + assert_variable(collector, "a", definitions: [1]) + end + + def test_lambda_locals + collector = Collector.collect(<<~RUBY) + ->(;a) { } + RUBY + + assert_equal(1, collector.variables.length) + + assert_variable(collector, "a", definitions: [1]) + end + + def test_regex_named_capture_groups + collector = Collector.collect(<<~RUBY) + if /(?\\w+)-(?\\w+)/ =~ "something-else" + one + two + end + RUBY + + assert_equal(2, collector.variables.length) + + assert_variable(collector, "one", definitions: [1], usages: [2]) + assert_variable(collector, "two", definitions: [1], usages: [3]) + end + + def test_multiline_regex_named_capture_groups + collector = Collector.collect(<<~RUBY) + if %r{ + (?\\w+)- + (?\\w+) + } =~ "something-else" + one + two + end + RUBY + + assert_equal(2, collector.variables.length) + + assert_variable(collector, "one", definitions: [2], usages: [5]) + assert_variable(collector, "two", definitions: [3], usages: [6]) + end + + class Resolver < Visitor + prepend WithScope + + attr_reader :locals + + def initialize + @locals = [] + end + + visit_methods do + def visit_assign(node) + super.tap do + level = 0 + name = node.target.value.value + + scope = current_scope + while !scope.locals.key?(name) && !scope.parent.nil? + level += 1 + scope = scope.parent + end + + locals << [name, level] + end + end + end + end + + def test_resolver + source = <<~RUBY + module Level0 + level0 = 0 + + class Level1 + level1 = 1 + + def level2 + level2 = 2 + + tap do |level3| + level2 = 2 + level3 = 3 + + tap do |level4| + level2 = 2 + level4 = 4 + end + end + end + end + end + RUBY + + resolver = Resolver.new + SyntaxTree.parse(source).accept(resolver) + + expected = [ + ["level0", 0], + ["level1", 0], + ["level2", 0], + ["level2", 1], + ["level3", 0], + ["level2", 2], + ["level4", 0] + ] + + assert_equal expected, resolver.locals + end + + private + + def assert_collected(field, name, definitions: [], usages: []) + keys = field.keys.select { |key| key[1] == name } + assert_equal(1, keys.length) + + variable = field[keys.first] + + assert_equal(definitions.length, variable.definitions.length) + definitions.each_with_index do |definition, index| + assert_equal(definition, variable.definitions[index].start_line) + end + + assert_equal(usages.length, variable.usages.length) + usages.each_with_index do |usage, index| + assert_equal(usage, variable.usages[index].start_line) + end + end + + def assert_argument(collector, name, definitions: [], usages: []) + assert_collected( + collector.arguments, + name, + definitions: definitions, + usages: usages + ) + end + + def assert_variable(collector, name, definitions: [], usages: []) + assert_collected( + collector.variables, + name, + definitions: definitions, + usages: usages + ) + end + end +end diff --git a/test/yarv_test.rb b/test/yarv_test.rb index 6f60d74e..78622434 100644 --- a/test/yarv_test.rb +++ b/test/yarv_test.rb @@ -6,27 +6,27 @@ module SyntaxTree class YARVTest < Minitest::Test CASES = { - "0" => "break 0\n", - "1" => "break 1\n", - "2" => "break 2\n", - "1.0" => "break 1.0\n", - "1 + 2" => "break 1 + 2\n", - "1 - 2" => "break 1 - 2\n", - "1 * 2" => "break 1 * 2\n", - "1 / 2" => "break 1 / 2\n", - "1 % 2" => "break 1 % 2\n", - "1 < 2" => "break 1 < 2\n", - "1 <= 2" => "break 1 <= 2\n", - "1 > 2" => "break 1 > 2\n", - "1 >= 2" => "break 1 >= 2\n", - "1 == 2" => "break 1 == 2\n", - "1 != 2" => "break 1 != 2\n", - "1 & 2" => "break 1 & 2\n", - "1 | 2" => "break 1 | 2\n", - "1 << 2" => "break 1 << 2\n", - "1 >> 2" => "break 1.>>(2)\n", - "1 ** 2" => "break 1.**(2)\n", - "a = 1; a" => "a = 1\nbreak a\n" + "0" => "return 0\n", + "1" => "return 1\n", + "2" => "return 2\n", + "1.0" => "return 1.0\n", + "1 + 2" => "return 1 + 2\n", + "1 - 2" => "return 1 - 2\n", + "1 * 2" => "return 1 * 2\n", + "1 / 2" => "return 1 / 2\n", + "1 % 2" => "return 1 % 2\n", + "1 < 2" => "return 1 < 2\n", + "1 <= 2" => "return 1 <= 2\n", + "1 > 2" => "return 1 > 2\n", + "1 >= 2" => "return 1 >= 2\n", + "1 == 2" => "return 1 == 2\n", + "1 != 2" => "return 1 != 2\n", + "1 & 2" => "return 1 & 2\n", + "1 | 2" => "return 1 | 2\n", + "1 << 2" => "return 1 << 2\n", + "1 >> 2" => "return 1.>>(2)\n", + "1 ** 2" => "return 1.**(2)\n", + "a = 1; a" => "a = 1\nreturn a\n" }.freeze CASES.each do |source, expected| @@ -288,6 +288,188 @@ def value end end + ObjectSpace.each_object(YARV::Instruction.singleton_class) do |instruction| + next if instruction == YARV::Instruction + + define_method("test_instruction_interface_#{instruction.name}") do + methods = instruction.instance_methods(false) + assert_empty(%i[disasm to_a deconstruct_keys call ==] - methods) + end + end + + def test_cfg + iseq = RubyVM::InstructionSequence.compile("100 + (14 < 0 ? -1 : +1)") + iseq = SyntaxTree::YARV::InstructionSequence.from(iseq.to_a) + cfg = SyntaxTree::YARV::ControlFlowGraph.compile(iseq) + + assert_equal(<<~DISASM, cfg.disasm) + == cfg: #@:1 (1,0)-(1,0)> + block_0 + 0000 putobject 100 + 0002 putobject 14 + 0004 putobject_INT2FIX_0_ + 0005 opt_lt + 0007 branchunless 13 + == to: block_13, block_9 + block_9 + == from: block_0 + 0009 putobject -1 + 0011 jump 14 + == to: block_14 + block_13 + == from: block_0 + 0013 putobject_INT2FIX_1_ + == to: block_14 + block_14 + == from: block_9, block_13 + 0014 opt_plus + 0016 leave + == to: leaves + DISASM + end + + def test_dfg + iseq = RubyVM::InstructionSequence.compile("100 + (14 < 0 ? -1 : +1)") + iseq = SyntaxTree::YARV::InstructionSequence.from(iseq.to_a) + cfg = SyntaxTree::YARV::ControlFlowGraph.compile(iseq) + dfg = SyntaxTree::YARV::DataFlowGraph.compile(cfg) + + assert_equal(<<~DISASM, dfg.disasm) + == dfg: #@:1 (1,0)-(1,0)> + block_0 + 0000 putobject 100 # out: out_0 + 0002 putobject 14 # out: 5 + 0004 putobject_INT2FIX_0_ # out: 5 + 0005 opt_lt # in: 2, 4; out: 7 + 0007 branchunless 13 # in: 5 + == to: block_13, block_9 + == out: 0 + block_9 + == from: block_0 + == in: pass_0 + 0009 putobject -1 # out: out_0 + 0011 jump 14 + == to: block_14 + == out: pass_0, 9 + block_13 + == from: block_0 + == in: pass_0 + 0013 putobject_INT2FIX_1_ # out: out_0 + == to: block_14 + == out: pass_0, 13 + block_14 + == from: block_9, block_13 + == in: in_0, in_1 + 0014 opt_plus # in: in_0, in_1; out: 16 + 0016 leave # in: 14 + == to: leaves + DISASM + end + + def test_son + iseq = RubyVM::InstructionSequence.compile("(14 < 0 ? -1 : +1) + 100") + iseq = SyntaxTree::YARV::InstructionSequence.from(iseq.to_a) + cfg = SyntaxTree::YARV::ControlFlowGraph.compile(iseq) + dfg = SyntaxTree::YARV::DataFlowGraph.compile(cfg) + son = SyntaxTree::YARV::SeaOfNodes.compile(dfg) + + assert_equal(<<~MERMAID, son.to_mermaid) + flowchart TD + node_0("0000 putobject 14") + node_2("0002 putobject_INT2FIX_0_") + node_3("0003 opt_lt <calldata!mid:<, argc:1, ARGS_SIMPLE>") + node_5("0005 branchunless 0011") + node_7("0007 putobject -1") + node_11("0011 putobject_INT2FIX_1_") + node_12("0012 putobject 100") + node_14("0014 opt_plus <calldata!mid:+, argc:1, ARGS_SIMPLE>") + node_16("0016 leave") + node_1000("1000 ψ") + node_1001("1001 φ") + node_0 -- "0" --> node_3 + node_2 -- "1" --> node_3 + node_3 --> node_5 + node_3 -- "0" --> node_5 + node_5 -- "branch0" --> node_11 + node_5 -- "fallthrough" --> node_1000 + node_7 -- "0009" --> node_1001 + node_11 -- "branch0" --> node_1000 + node_11 -- "0011" --> node_1001 + node_12 -- "1" --> node_14 + node_14 --> node_16 + node_14 -- "0" --> node_16 + node_1000 --> node_14 + node_1001 -.-> node_1000 + node_1001 -- "0" --> node_14 + linkStyle 0 stroke:green + linkStyle 1 stroke:green + linkStyle 2 stroke:red + linkStyle 3 stroke:green + linkStyle 4 stroke:red + linkStyle 5 stroke:red + linkStyle 6 stroke:green + linkStyle 7 stroke:red + linkStyle 8 stroke:green + linkStyle 9 stroke:green + linkStyle 10 stroke:red + linkStyle 11 stroke:green + linkStyle 12 stroke:red + linkStyle 14 stroke:green + MERMAID + end + + def test_son_indirect_basic_block_argument + iseq = RubyVM::InstructionSequence.compile("100 + (14 < 0 ? -1 : +1)") + iseq = SyntaxTree::YARV::InstructionSequence.from(iseq.to_a) + cfg = SyntaxTree::YARV::ControlFlowGraph.compile(iseq) + dfg = SyntaxTree::YARV::DataFlowGraph.compile(cfg) + son = SyntaxTree::YARV::SeaOfNodes.compile(dfg) + + assert_equal(<<~MERMAID, son.to_mermaid) + flowchart TD + node_0("0000 putobject 100") + node_2("0002 putobject 14") + node_4("0004 putobject_INT2FIX_0_") + node_5("0005 opt_lt <calldata!mid:<, argc:1, ARGS_SIMPLE>") + node_7("0007 branchunless 0013") + node_9("0009 putobject -1") + node_13("0013 putobject_INT2FIX_1_") + node_14("0014 opt_plus <calldata!mid:+, argc:1, ARGS_SIMPLE>") + node_16("0016 leave") + node_1002("1002 ψ") + node_1004("1004 φ") + node_0 -- "0" --> node_14 + node_2 -- "0" --> node_5 + node_4 -- "1" --> node_5 + node_5 --> node_7 + node_5 -- "0" --> node_7 + node_7 -- "branch0" --> node_13 + node_7 -- "fallthrough" --> node_1002 + node_9 -- "0011" --> node_1004 + node_13 -- "branch0" --> node_1002 + node_13 -- "0013" --> node_1004 + node_14 --> node_16 + node_14 -- "0" --> node_16 + node_1002 --> node_14 + node_1004 -.-> node_1002 + node_1004 -- "1" --> node_14 + linkStyle 0 stroke:green + linkStyle 1 stroke:green + linkStyle 2 stroke:green + linkStyle 3 stroke:red + linkStyle 4 stroke:green + linkStyle 5 stroke:red + linkStyle 6 stroke:red + linkStyle 7 stroke:green + linkStyle 8 stroke:red + linkStyle 9 stroke:green + linkStyle 10 stroke:red + linkStyle 11 stroke:green + linkStyle 12 stroke:red + linkStyle 14 stroke:green + MERMAID + end + private def assert_decompiles(expected, source)