diff --git a/.github/dependabot.yml b/.github/dependabot.yml index 46959146..9f77688a 100644 --- a/.github/dependabot.yml +++ b/.github/dependabot.yml @@ -4,3 +4,7 @@ updates: directory: "/" schedule: interval: "daily" + - package-ecosystem: "github-actions" + directory: "/" + schedule: + interval: "daily" diff --git a/.github/workflows/auto-merge.yml b/.github/workflows/auto-merge.yml index 9b28abf4..514ac27a 100644 --- a/.github/workflows/auto-merge.yml +++ b/.github/workflows/auto-merge.yml @@ -12,7 +12,7 @@ jobs: steps: - name: Dependabot metadata id: metadata - uses: dependabot/fetch-metadata@v1.3.3 + uses: dependabot/fetch-metadata@v1.3.5 with: github-token: "${{ secrets.GITHUB_TOKEN }}" - name: Enable auto-merge for Dependabot PRs diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 9f95cc9d..3f811317 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -1,7 +1,9 @@ name: Main + on: - push - pull_request + jobs: ci: strategy: @@ -11,13 +13,14 @@ jobs: - '2.7.0' - '3.0' - '3.1' + - '3.2' - head - truffleruby-head name: CI runs-on: ubuntu-latest env: CI: true - TESTOPTS: --verbose + # TESTOPTS: --verbose steps: - uses: actions/checkout@master - uses: ruby/setup-ruby@v1 @@ -37,7 +40,7 @@ jobs: - uses: ruby/setup-ruby@v1 with: bundler-cache: true - ruby-version: '3.1' + ruby-version: '3.2' - name: Check run: | bundle exec rake stree:check diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 00000000..f5477ea3 --- /dev/null +++ b/.gitmodules @@ -0,0 +1,6 @@ +[submodule "mspec"] + path = spec/mspec + url = git@github.com:ruby/mspec.git +[submodule "spec"] + path = spec/ruby + url = git@github.com:ruby/spec.git diff --git a/.rubocop.yml b/.rubocop.yml index daf5a824..069041bd 100644 --- a/.rubocop.yml +++ b/.rubocop.yml @@ -7,7 +7,7 @@ AllCops: SuggestExtensions: false TargetRubyVersion: 2.7 Exclude: - - '{.git,.github,bin,coverage,pkg,test/fixtures,vendor,tmp}/**/*' + - '{.git,.github,bin,coverage,pkg,spec,test/fixtures,vendor,tmp}/**/*' - test.rb Layout/LineLength: @@ -43,6 +43,9 @@ Lint/NonLocalExitFromIterator: Lint/RedundantRequireStatement: Enabled: false +Lint/RescueException: + Enabled: false + Lint/SuppressedException: Enabled: false @@ -144,3 +147,6 @@ Style/SpecialGlobalVars: Style/StructInheritance: Enabled: false + +Style/YodaExpression: + Enabled: false diff --git a/CHANGELOG.md b/CHANGELOG.md index 557fdf5c..4b29fcbb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,16 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/) a ## [Unreleased] +## [5.2.0] - 2023-01-04 + +### Added + +- An experiment in evaluating compiled instruction sequences has been added to Syntax Tree. This is subject to change, so it will not be well documented or testing at the moment. It does not impact other functionality. + +### Changed + +- Empty parentheses on method calls will now be left in place. Previously they were left in place if the method being called looked like a constant. Now they are left in place for all method calls since the method name can mirror the name of a local variable, in which case the parentheses are required. + ## [5.1.0] - 2022-12-28 ### Added @@ -471,7 +481,8 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/) a - 🎉 Initial release! 🎉 -[unreleased]: https://github.com/ruby-syntax-tree/syntax_tree/compare/v5.1.0...HEAD +[unreleased]: https://github.com/ruby-syntax-tree/syntax_tree/compare/v5.2.0...HEAD +[5.2.0]: https://github.com/ruby-syntax-tree/syntax_tree/compare/v5.1.0...v5.2.0 [5.1.0]: https://github.com/ruby-syntax-tree/syntax_tree/compare/v5.0.1...v5.1.0 [5.0.1]: https://github.com/ruby-syntax-tree/syntax_tree/compare/v5.0.0...v5.0.1 [5.0.0]: https://github.com/ruby-syntax-tree/syntax_tree/compare/v4.3.0...v5.0.0 diff --git a/Gemfile.lock b/Gemfile.lock index 47d0c66b..bb5e3663 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -1,7 +1,7 @@ PATH remote: . specs: - syntax_tree (5.1.0) + syntax_tree (5.2.0) prettier_print (>= 1.2.0) GEM @@ -10,26 +10,26 @@ GEM ast (2.4.2) docile (1.4.0) json (2.6.3) - minitest (5.16.3) + minitest (5.17.0) parallel (1.22.1) - parser (3.1.3.0) + parser (3.2.0.0) ast (~> 2.4.1) prettier_print (1.2.0) rainbow (3.1.1) rake (13.0.6) regexp_parser (2.6.1) rexml (3.2.5) - rubocop (1.41.1) + rubocop (1.42.0) json (~> 2.3) parallel (~> 1.10) parser (>= 3.1.2.1) rainbow (>= 2.2.2, < 4.0) regexp_parser (>= 1.8, < 3.0) rexml (>= 3.2.5, < 4.0) - rubocop-ast (>= 1.23.0, < 2.0) + rubocop-ast (>= 1.24.1, < 2.0) ruby-progressbar (~> 1.7) unicode-display_width (>= 1.4.0, < 3.0) - rubocop-ast (1.24.0) + rubocop-ast (1.24.1) parser (>= 3.1.1.0) ruby-progressbar (1.11.0) simplecov (0.22.0) @@ -38,7 +38,7 @@ GEM simplecov_json_formatter (~> 0.1) simplecov-html (0.12.3) simplecov_json_formatter (0.1.4) - unicode-display_width (2.3.0) + unicode-display_width (2.4.1) PLATFORMS arm64-darwin-21 diff --git a/Rakefile b/Rakefile index 4973d45e..f06d8cf8 100644 --- a/Rakefile +++ b/Rakefile @@ -26,3 +26,10 @@ end SyntaxTree::Rake::CheckTask.new(&configure) SyntaxTree::Rake::WriteTask.new(&configure) + +desc "Run mspec tests using YARV emulation" +task :spec do + Dir["./spec/ruby/language/**/*_spec.rb"].each do |filepath| + sh "exe/yarv ./spec/mspec/bin/mspec-tag #{filepath}" + end +end diff --git a/exe/yarv b/exe/yarv new file mode 100755 index 00000000..3efb23ff --- /dev/null +++ b/exe/yarv @@ -0,0 +1,63 @@ +#!/usr/bin/env ruby +# frozen_string_literal: true + +$:.unshift(File.expand_path("../lib", __dir__)) + +require "syntax_tree" + +# Require these here so that we can run binding.irb without having them require +# anything that we've already patched. +require "irb" +require "irb/completion" +require "irb/color_printer" +require "readline" + +# First, create an instance of our virtual machine. +events = + if ENV["DEBUG"] + SyntaxTree::YARV::VM::STDOUTEvents.new + else + SyntaxTree::YARV::VM::NullEvents.new + end + +vm = SyntaxTree::YARV::VM.new(events) + +# Next, set up a bunch of aliases for methods that we're going to hook into in +# order to set up our virtual machine. +class << Kernel + alias yarv_require require + alias yarv_require_relative require_relative + alias yarv_load load + alias yarv_eval eval + alias yarv_throw throw + alias yarv_catch catch +end + +# Next, patch the methods that we just aliased so that they use our virtual +# machine's versions instead. This allows us to load Ruby files and have them +# execute in our virtual machine instead of the runtime environment. +[Kernel, Kernel.singleton_class].each do |klass| + klass.define_method(:require) { |filepath| vm.require(filepath) } + + klass.define_method(:load) { |filepath| vm.load(filepath) } + + # klass.define_method(:require_relative) do |filepath| + # vm.require_relative(filepath) + # end + + # klass.define_method(:eval) do | + # source, + # binding = TOPLEVEL_BINDING, + # filename = "(eval)", + # lineno = 1 + # | + # vm.eval(source, binding, filename, lineno) + # end + + # klass.define_method(:throw) { |tag, value = nil| vm.throw(tag, value) } + + # klass.define_method(:catch) { |tag, &block| vm.catch(tag, &block) } +end + +# Finally, require the file that we want to execute. +vm.require_resolved(ARGV.shift) diff --git a/lib/syntax_tree.rb b/lib/syntax_tree.rb index ab7ad7f9..f1217ac3 100644 --- a/lib/syntax_tree.rb +++ b/lib/syntax_tree.rb @@ -37,6 +37,7 @@ require_relative "syntax_tree/yarv/legacy" require_relative "syntax_tree/yarv/local_table" require_relative "syntax_tree/yarv/assembler" +require_relative "syntax_tree/yarv/vm" # Syntax Tree is a suite of tools built on top of the internal CRuby parser. It # provides the ability to generate a syntax tree from source, as well as the diff --git a/lib/syntax_tree/node.rb b/lib/syntax_tree/node.rb index e5b09044..f19cfb2c 100644 --- a/lib/syntax_tree/node.rb +++ b/lib/syntax_tree/node.rb @@ -3001,16 +3001,25 @@ def format(q) else q.format(message) - if arguments.is_a?(ArgParen) && arguments.arguments.nil? && - !message.is_a?(Const) - # If you're using an explicit set of parentheses on something that - # looks like a constant, then we need to match that in order to - # maintain valid Ruby. For example, you could do something like Foo(), - # on which we would need to keep the parentheses to make it look like - # a method call. - else - q.format(arguments) - end + # Note that this explicitly leaves parentheses in place even if they are + # empty. There are two reasons we would need to do this. The first is if + # we're calling something that looks like a constant, as in: + # + # Foo() + # + # In this case if we remove the parentheses then this becomes a constant + # reference and not a method call. The second is if we're calling a + # method that is the same name as a local variable that is in scope, as + # in: + # + # foo = foo() + # + # In this case we have to keep the parentheses or else it treats this + # like assigning nil to the local variable. Note that we could attempt + # to be smarter about this by tracking the local variables that are in + # scope, but for now it's simpler and more efficient to just leave the + # parentheses in place. + q.format(arguments) if arguments end end diff --git a/lib/syntax_tree/parser.rb b/lib/syntax_tree/parser.rb index fcefed30..602bb98f 100644 --- a/lib/syntax_tree/parser.rb +++ b/lib/syntax_tree/parser.rb @@ -53,7 +53,7 @@ def initialize(start, line) # there's a BOM at the beginning of the file, which is the reason we need # to compare it to 0 here. def [](byteindex) - indices[byteindex < 0 ? 0 : byteindex] + indices[[byteindex, 0].max] end end diff --git a/lib/syntax_tree/version.rb b/lib/syntax_tree/version.rb index d9bbdfa4..a97f5e43 100644 --- a/lib/syntax_tree/version.rb +++ b/lib/syntax_tree/version.rb @@ -1,5 +1,5 @@ # frozen_string_literal: true module SyntaxTree - VERSION = "5.1.0" + VERSION = "5.2.0" end diff --git a/lib/syntax_tree/yarv.rb b/lib/syntax_tree/yarv.rb index 97592d4d..7e4da7bb 100644 --- a/lib/syntax_tree/yarv.rb +++ b/lib/syntax_tree/yarv.rb @@ -1,277 +1,8 @@ # frozen_string_literal: true -require "forwardable" - module SyntaxTree # This module provides an object representation of the YARV bytecode. module YARV - class VM - class Jump - attr_reader :name - - def initialize(name) - @name = name - end - end - - class Leave - attr_reader :value - - def initialize(value) - @value = value - end - end - - class Frame - attr_reader :iseq, :parent, :stack_index, :_self, :nesting, :svars - - def initialize(iseq, parent, stack_index, _self, nesting) - @iseq = iseq - @parent = parent - @stack_index = stack_index - @_self = _self - @nesting = nesting - @svars = {} - end - end - - class TopFrame < Frame - def initialize(iseq) - super(iseq, nil, 0, TOPLEVEL_BINDING.eval("self"), [Object]) - end - end - - class BlockFrame < Frame - def initialize(iseq, parent, stack_index) - super(iseq, parent, stack_index, parent._self, parent.nesting) - end - end - - class MethodFrame < Frame - attr_reader :name, :block - - def initialize(iseq, parent, stack_index, _self, name, block) - super(iseq, parent, stack_index, _self, parent.nesting) - @name = name - @block = block - end - end - - class ClassFrame < Frame - def initialize(iseq, parent, stack_index, _self) - super(iseq, parent, stack_index, _self, parent.nesting + [_self]) - end - end - - class FrozenCore - define_method("core#hash_merge_kwd") { |left, right| left.merge(right) } - - define_method("core#hash_merge_ptr") do |hash, *values| - hash.merge(values.each_slice(2).to_h) - end - - define_method("core#set_method_alias") do |clazz, new_name, old_name| - clazz.alias_method(new_name, old_name) - end - - define_method("core#set_variable_alias") do |new_name, old_name| - # Using eval here since there isn't a reflection API to be able to - # alias global variables. - eval("alias #{new_name} #{old_name}", binding, __FILE__, __LINE__) - end - - define_method("core#set_postexe") { |&block| END { block.call } } - - define_method("core#undef_method") do |clazz, name| - clazz.undef_method(name) - end - end - - FROZEN_CORE = FrozenCore.new.freeze - - extend Forwardable - - attr_reader :stack - def_delegators :stack, :push, :pop - - attr_reader :frame - def_delegators :frame, :_self - - def initialize - @stack = [] - @frame = nil - end - - ########################################################################## - # Helper methods for frames - ########################################################################## - - def run_frame(frame) - # First, set the current frame to the given value. - @frame = frame - - # Next, set up the local table for the frame. This is actually incorrect - # as it could use the values already on the stack, but for now we're - # just doing this for simplicity. - frame.iseq.local_table.size.times { push(nil) } - - # Yield so that some frame-specific setup can be done. - yield if block_given? - - # This hash is going to hold a mapping of label names to their - # respective indices in our instruction list. - labels = {} - - # This array is going to hold our instructions. - insns = [] - - # Here we're going to preprocess the instruction list from the - # instruction sequence to set up the labels hash and the insns array. - frame.iseq.insns.each do |insn| - case insn - when Integer, Symbol - # skip - when InstructionSequence::Label - labels[insn.name] = insns.length - else - insns << insn - end - end - - # Finally we can execute the instructions one at a time. If they return - # jumps or leaves we will handle those appropriately. - pc = 0 - while pc < insns.length - insn = insns[pc] - pc += 1 - - case (result = insn.call(self)) - when Jump - pc = labels[result.name] - when Leave - return result.value - end - end - ensure - @stack = stack[0...frame.stack_index] - @frame = frame.parent - end - - def run_top_frame(iseq) - run_frame(TopFrame.new(iseq)) - end - - def run_block_frame(iseq, *args, &block) - run_frame(BlockFrame.new(iseq, frame, stack.length)) do - locals = [*args, block] - iseq.local_table.size.times do |index| - local_set(index, 0, locals.shift) - end - end - end - - def run_class_frame(iseq, clazz) - run_frame(ClassFrame.new(iseq, frame, stack.length, clazz)) - end - - def run_method_frame(name, iseq, _self, *args, **kwargs, &block) - run_frame( - MethodFrame.new(iseq, frame, stack.length, _self, name, block) - ) do - locals = [*args, block] - - if iseq.argument_options[:keyword] - # First, set up the keyword bits array. - keyword_bits = - iseq.argument_options[:keyword].map do |config| - kwargs.key?(config.is_a?(Array) ? config[0] : config) - end - - iseq.local_table.locals.each_with_index do |local, index| - # If this is the keyword bits local, then set it appropriately. - if local.name == 2 - locals.insert(index, keyword_bits) - next - end - - # First, find the configuration for this local in the keywords - # list if it exists. - name = local.name - config = - iseq.argument_options[:keyword].find do |keyword| - keyword.is_a?(Array) ? keyword[0] == name : keyword == name - end - - # If the configuration doesn't exist, then the local is not a - # keyword local. - next unless config - - if !config.is_a?(Array) - # required keyword - locals.insert(index, kwargs.fetch(name)) - elsif !config[1].nil? - # optional keyword with embedded default value - locals.insert(index, kwargs.fetch(name, config[1])) - else - # optional keyword with expression default value - locals.insert(index, nil) - end - end - end - - iseq.local_table.size.times do |index| - local_set(index, 0, locals.shift) - end - end - end - - ########################################################################## - # Helper methods for instructions - ########################################################################## - - def const_base - frame.nesting.last - end - - def frame_at(level) - current = frame - level.times { current = current.parent } - current - end - - def frame_svar - current = frame - current = current.parent while current.is_a?(BlockFrame) - current - end - - def frame_yield - current = frame - current = current.parent until current.is_a?(MethodFrame) - current - end - - def frozen_core - FROZEN_CORE - end - - def jump(label) - Jump.new(label.name) - end - - def leave - Leave.new(pop) - end - - def local_get(index, level) - stack[frame_at(level).stack_index + index] - end - - def local_set(index, level, value) - stack[frame_at(level).stack_index + index] = value - end - end - # Compile the given source into a YARV instruction sequence. def self.compile(source, options = Compiler::Options.new) SyntaxTree.parse(source).accept(Compiler.new(options)) diff --git a/lib/syntax_tree/yarv/assembler.rb b/lib/syntax_tree/yarv/assembler.rb index efb179c1..ec467b58 100644 --- a/lib/syntax_tree/yarv/assembler.rb +++ b/lib/syntax_tree/yarv/assembler.rb @@ -69,7 +69,7 @@ def initialize(filepath) end def assemble - iseq = InstructionSequence.new(:top, "
", nil, Location.default) + iseq = InstructionSequence.new("
", "", 1, :top) assemble_iseq(iseq, File.readlines(filepath, chomp: true)) iseq.compile! @@ -138,7 +138,7 @@ def assemble_iseq(iseq, lines) name = parse_symbol(name_value) flags = parse_number(flags_value) - class_iseq = iseq.class_child_iseq(name.to_s, Location.default) + class_iseq = iseq.class_child_iseq(name.to_s, 1) assemble_iseq(class_iseq, body) iseq.defineclass(name, class_iseq, flags) when "defined" @@ -153,7 +153,7 @@ def assemble_iseq(iseq, lines) line_index += body.length name = parse_symbol(operands) - method_iseq = iseq.method_child_iseq(name.to_s, Location.default) + method_iseq = iseq.method_child_iseq(name.to_s, 1) assemble_iseq(method_iseq, body) iseq.definemethod(name, method_iseq) @@ -162,7 +162,7 @@ def assemble_iseq(iseq, lines) line_index += body.length name = parse_symbol(operands) - method_iseq = iseq.method_child_iseq(name.to_s, Location.default) + method_iseq = iseq.method_child_iseq(name.to_s, 1) assemble_iseq(method_iseq, body) iseq.definesmethod(name, method_iseq) @@ -221,7 +221,7 @@ def assemble_iseq(iseq, lines) body = parse_nested(lines[line_index..]) line_index += body.length - block_iseq = iseq.block_child_iseq(Location.default) + block_iseq = iseq.block_child_iseq(1) assemble_iseq(block_iseq, body) block_iseq end @@ -249,7 +249,7 @@ def assemble_iseq(iseq, lines) body = parse_nested(lines[line_index..]) line_index += body.length - block_iseq = iseq.block_child_iseq(Location.default) + block_iseq = iseq.block_child_iseq(1) assemble_iseq(block_iseq, body) block_iseq end @@ -354,7 +354,7 @@ def assemble_iseq(iseq, lines) body = parse_nested(lines[line_index..]) line_index += body.length - block_iseq = iseq.block_child_iseq(Location.default) + block_iseq = iseq.block_child_iseq(1) assemble_iseq(block_iseq, body) block_iseq end diff --git a/lib/syntax_tree/yarv/bf.rb b/lib/syntax_tree/yarv/bf.rb index f642fb2f..21bc2982 100644 --- a/lib/syntax_tree/yarv/bf.rb +++ b/lib/syntax_tree/yarv/bf.rb @@ -13,7 +13,7 @@ def initialize(source) def compile # Set up the top-level instruction sequence that will be returned. - iseq = InstructionSequence.new(:top, "", nil, location) + iseq = InstructionSequence.new("", "", 1, :top) # Set up the $tape global variable that will hold our state. iseq.duphash({ 0 => 0 }) @@ -80,19 +80,6 @@ def compile private - # This is the location of the top instruction sequence, derived from the - # source string. - def location - Location.new( - start_line: 1, - start_char: 0, - start_column: 0, - end_line: source.count("\n") + 1, - end_char: source.size, - end_column: source.size - (source.rindex("\n") || 0) - 1 - ) - end - # $tape[$cursor] += value def change_by(iseq, value) iseq.getglobal(:$tape) @@ -111,6 +98,7 @@ def change_by(iseq, value) end iseq.send(YARV.calldata(:[]=, 2)) + iseq.pop end # $cursor += value @@ -138,6 +126,7 @@ def output_char(iseq) iseq.send(YARV.calldata(:chr)) iseq.send(YARV.calldata(:putc, 1)) + iseq.pop end # $tape[$cursor] = $stdin.getc.ord @@ -150,6 +139,7 @@ def input_char(iseq) iseq.send(YARV.calldata(:ord)) iseq.send(YARV.calldata(:[]=, 2)) + iseq.pop end # unless $tape[$cursor] == 0 @@ -164,14 +154,21 @@ def loop_start(iseq) iseq.putobject(0) iseq.send(YARV.calldata(:==, 1)) - iseq.branchunless(end_label) + iseq.branchif(end_label) [start_label, end_label] end # Jump back to the start of the loop. def loop_end(iseq, start_label, end_label) - iseq.jump(start_label) + iseq.getglobal(:$tape) + iseq.getglobal(:$cursor) + iseq.send(YARV.calldata(:[], 1)) + + iseq.putobject(0) + iseq.send(YARV.calldata(:==, 1)) + iseq.branchunless(start_label) + iseq.push(end_label) end end diff --git a/lib/syntax_tree/yarv/compiler.rb b/lib/syntax_tree/yarv/compiler.rb index 4af5d6f0..4c9a4d50 100644 --- a/lib/syntax_tree/yarv/compiler.rb +++ b/lib/syntax_tree/yarv/compiler.rb @@ -304,10 +304,11 @@ def visit_CHAR(node) end def visit_END(node) + start_line = node.location.start_line once_iseq = - with_child_iseq(iseq.block_child_iseq(node.location)) do + with_child_iseq(iseq.block_child_iseq(start_line)) do postexe_iseq = - with_child_iseq(iseq.block_child_iseq(node.location)) do + with_child_iseq(iseq.block_child_iseq(start_line)) do iseq.event(:RUBY_EVENT_B_CALL) *statements, last_statement = node.statements.body @@ -567,7 +568,7 @@ def visit_binary(node) end def visit_block(node) - with_child_iseq(iseq.block_child_iseq(node.location)) do + with_child_iseq(iseq.block_child_iseq(node.location.start_line)) do iseq.event(:RUBY_EVENT_B_CALL) visit(node.block_var) visit(node.bodystmt) @@ -751,7 +752,9 @@ def visit_case(node) def visit_class(node) name = node.constant.constant.value.to_sym class_iseq = - with_child_iseq(iseq.class_child_iseq(name, node.location)) do + with_child_iseq( + iseq.class_child_iseq(name, node.location.start_line) + ) do iseq.event(:RUBY_EVENT_CLASS) visit(node.bodystmt) iseq.event(:RUBY_EVENT_END) @@ -818,7 +821,8 @@ def visit_const_path_ref(node) def visit_def(node) name = node.name.value.to_sym - method_iseq = iseq.method_child_iseq(name.to_s, node.location) + method_iseq = + iseq.method_child_iseq(name.to_s, node.location.start_line) with_child_iseq(method_iseq) do visit(node.params) if node.params @@ -939,7 +943,9 @@ def visit_for(node) iseq.local_table.plain(name) block_iseq = - with_child_iseq(iseq.block_child_iseq(node.statements.location)) do + with_child_iseq( + iseq.block_child_iseq(node.statements.location.start_line) + ) do iseq.argument_options[:lead_num] ||= 0 iseq.argument_options[:lead_num] += 1 iseq.argument_options[:ambiguous_param0] = true @@ -1076,7 +1082,7 @@ def visit_label(node) def visit_lambda(node) lambda_iseq = - with_child_iseq(iseq.block_child_iseq(node.location)) do + with_child_iseq(iseq.block_child_iseq(node.location.start_line)) do iseq.event(:RUBY_EVENT_B_CALL) visit(node.params) visit(node.statements) @@ -1127,7 +1133,9 @@ def visit_mlhs(node) def visit_module(node) name = node.constant.constant.value.to_sym module_iseq = - with_child_iseq(iseq.module_child_iseq(name, node.location)) do + with_child_iseq( + iseq.module_child_iseq(name, node.location.start_line) + ) do iseq.event(:RUBY_EVENT_CLASS) visit(node.bodystmt) iseq.event(:RUBY_EVENT_END) @@ -1375,10 +1383,11 @@ def visit_program(node) top_iseq = InstructionSequence.new( - :top, "", + "", + 1, + :top, nil, - node.location, options ) @@ -1543,7 +1552,9 @@ def visit_sclass(node) iseq.putnil singleton_iseq = - with_child_iseq(iseq.singleton_class_child_iseq(node.location)) do + with_child_iseq( + iseq.singleton_class_child_iseq(node.location.start_line) + ) do iseq.event(:RUBY_EVENT_CLASS) visit(node.bodystmt) iseq.event(:RUBY_EVENT_END) @@ -2018,7 +2029,7 @@ def visit_pattern(node, end_label) if node.constant iseq.dup visit(node.constant) - iseq.checkmatch(CheckMatch::TYPE_CASE) + iseq.checkmatch(CheckMatch::VM_CHECKMATCH_TYPE_CASE) iseq.branchunless(match_failure_label) end @@ -2078,7 +2089,7 @@ def visit_pattern(node, end_label) iseq.setlocal(lookup.index, lookup.level) else visit(required) - iseq.checkmatch(CheckMatch::TYPE_CASE) + iseq.checkmatch(CheckMatch::VM_CHECKMATCH_TYPE_CASE) iseq.branchunless(match_failure_label) end diff --git a/lib/syntax_tree/yarv/decompiler.rb b/lib/syntax_tree/yarv/decompiler.rb index a6a567fb..47d2a2df 100644 --- a/lib/syntax_tree/yarv/decompiler.rb +++ b/lib/syntax_tree/yarv/decompiler.rb @@ -64,6 +64,13 @@ def decompile(iseq) clauses[label] = clause clause = [] label = insn.name + when BranchIf + body = [ + Assign(block_label.field, node_for(insn.label.name)), + Next(Args([])) + ] + + clause << UnlessNode(clause.pop, Statements(body), nil) when BranchUnless body = [ Assign(block_label.field, node_for(insn.label.name)), @@ -157,6 +164,8 @@ def decompile(iseq) ) end end + when Pop + # skip when PutObject case insn.object when Float diff --git a/lib/syntax_tree/yarv/disassembler.rb b/lib/syntax_tree/yarv/disassembler.rb index 033b6d3d..d303bcb7 100644 --- a/lib/syntax_tree/yarv/disassembler.rb +++ b/lib/syntax_tree/yarv/disassembler.rb @@ -4,7 +4,8 @@ module SyntaxTree module YARV class Disassembler attr_reader :output, :queue - attr_reader :current_prefix, :current_iseq + attr_reader :current_prefix + attr_accessor :current_iseq def initialize @output = StringIO.new @@ -114,7 +115,7 @@ def format_iseq(iseq) output << "#{current_prefix}== disasm: " output << "#:1 " - location = iseq.location + location = Location.fixed(line: iseq.line, char: 0, column: 0) output << "(#{location.start_line},#{location.start_column})-" output << "(#{location.end_line},#{location.end_column})" output << "> " diff --git a/lib/syntax_tree/yarv/instruction_sequence.rb b/lib/syntax_tree/yarv/instruction_sequence.rb index 48305be6..c284221b 100644 --- a/lib/syntax_tree/yarv/instruction_sequence.rb +++ b/lib/syntax_tree/yarv/instruction_sequence.rb @@ -116,18 +116,18 @@ def inspect end end - # The type of the instruction sequence. - attr_reader :type - # The name of the instruction sequence. attr_reader :name + # The source location of the instruction sequence. + attr_reader :file, :line + + # The type of the instruction sequence. + attr_reader :type + # The parent instruction sequence, if there is one. attr_reader :parent_iseq - # The location of the root node of this instruction sequence. - attr_reader :location - # This is the list of information about the arguments to this # instruction sequence. attr_accessor :argument_size @@ -157,16 +157,18 @@ def inspect attr_reader :options def initialize( - type, name, - parent_iseq, - location, + file, + line, + type, + parent_iseq = nil, options = Compiler::Options.new ) - @type = type @name = name + @file = file + @line = line + @type = type @parent_iseq = parent_iseq - @location = location @argument_size = 0 @argument_options = {} @@ -256,9 +258,9 @@ def to_a node_ids: [-1] * insns.length }, name, + file, "", - "", - location.start_line, + line, type, local_table.names, dumped_options, @@ -278,6 +280,12 @@ def disasm def compile! specialize_instructions! if options.specialized_instruction? + catch_table.each do |catch_entry| + if !catch_entry.is_a?(CatchBreak) && catch_entry.iseq + catch_entry.iseq.compile! + end + end + length = 0 insns.each do |insn| case insn @@ -416,30 +424,30 @@ def specialize_instructions! # Child instruction sequence methods ########################################################################## - def child_iseq(type, name, location) - InstructionSequence.new(type, name, self, location, options) + def child_iseq(name, line, type) + InstructionSequence.new(name, file, line, type, self, options) end - def block_child_iseq(location) + def block_child_iseq(line) current = self current = current.parent_iseq while current.type == :block - child_iseq(:block, "block in #{current.name}", location) + child_iseq("block in #{current.name}", line, :block) end - def class_child_iseq(name, location) - child_iseq(:class, "", location) + def class_child_iseq(name, line) + child_iseq("", line, :class) end - def method_child_iseq(name, location) - child_iseq(:method, name, location) + def method_child_iseq(name, line) + child_iseq(name, line, :method) end - def module_child_iseq(name, location) - child_iseq(:class, "", location) + def module_child_iseq(name, line) + child_iseq("", line, :class) end - def singleton_class_child_iseq(location) - child_iseq(:class, "singleton class", location) + def singleton_class_child_iseq(line) + child_iseq("singleton class", line, :class) end ########################################################################## @@ -447,19 +455,39 @@ def singleton_class_child_iseq(location) ########################################################################## class CatchEntry - attr_reader :iseq, :begin_label, :end_label, :exit_label + attr_reader :iseq, :begin_label, :end_label, :exit_label, :restore_sp - def initialize(iseq, begin_label, end_label, exit_label) + def initialize(iseq, begin_label, end_label, exit_label, restore_sp) @iseq = iseq @begin_label = begin_label @end_label = end_label @exit_label = exit_label + @restore_sp = restore_sp end end class CatchBreak < CatchEntry def to_a - [:break, iseq.to_a, begin_label.name, end_label.name, exit_label.name] + [ + :break, + iseq.to_a, + begin_label.name, + end_label.name, + exit_label.name, + restore_sp + ] + end + end + + class CatchEnsure < CatchEntry + def to_a + [ + :ensure, + iseq.to_a, + begin_label.name, + end_label.name, + exit_label.name + ] end end @@ -493,24 +521,64 @@ def to_a end end - def catch_break(iseq, begin_label, end_label, exit_label) - catch_table << CatchBreak.new(iseq, begin_label, end_label, exit_label) - end - - def catch_next(begin_label, end_label, exit_label) - catch_table << CatchNext.new(nil, begin_label, end_label, exit_label) - end - - def catch_redo(begin_label, end_label, exit_label) - catch_table << CatchRedo.new(nil, begin_label, end_label, exit_label) - end - - def catch_rescue(iseq, begin_label, end_label, exit_label) - catch_table << CatchRescue.new(iseq, begin_label, end_label, exit_label) - end - - def catch_retry(begin_label, end_label, exit_label) - catch_table << CatchRetry.new(nil, begin_label, end_label, exit_label) + def catch_break(iseq, begin_label, end_label, exit_label, restore_sp) + catch_table << CatchBreak.new( + iseq, + begin_label, + end_label, + exit_label, + restore_sp + ) + end + + def catch_ensure(iseq, begin_label, end_label, exit_label, restore_sp) + catch_table << CatchEnsure.new( + iseq, + begin_label, + end_label, + exit_label, + restore_sp + ) + end + + def catch_next(begin_label, end_label, exit_label, restore_sp) + catch_table << CatchNext.new( + nil, + begin_label, + end_label, + exit_label, + restore_sp + ) + end + + def catch_redo(begin_label, end_label, exit_label, restore_sp) + catch_table << CatchRedo.new( + nil, + begin_label, + end_label, + exit_label, + restore_sp + ) + end + + def catch_rescue(iseq, begin_label, end_label, exit_label, restore_sp) + catch_table << CatchRescue.new( + iseq, + begin_label, + end_label, + exit_label, + restore_sp + ) + end + + def catch_retry(begin_label, end_label, exit_label, restore_sp) + catch_table << CatchRetry.new( + nil, + begin_label, + end_label, + exit_label, + restore_sp + ) end ########################################################################## @@ -895,7 +963,8 @@ def toregexp(options, length) # This method will create a new instruction sequence from a serialized # RubyVM::InstructionSequence object. def self.from(source, options = Compiler::Options.new, parent_iseq = nil) - iseq = new(source[9], source[5], parent_iseq, Location.default, options) + iseq = + new(source[5], source[6], source[8], source[9], parent_iseq, options) # set up the labels object so that the labels are shared between the # location in the instruction sequence and the instructions that @@ -914,45 +983,9 @@ def self.from(source, options = Compiler::Options.new, parent_iseq = nil) iseq.argument_options[:opt].map! { |opt| labels[opt] } end - # set up the catch table - source[12].each do |entry| - case entry[0] - when :break - iseq.catch_break( - from(entry[1]), - labels[entry[2]], - labels[entry[3]], - labels[entry[4]] - ) - when :next - iseq.catch_next( - labels[entry[2]], - labels[entry[3]], - labels[entry[4]] - ) - when :rescue - iseq.catch_rescue( - from(entry[1]), - labels[entry[2]], - labels[entry[3]], - labels[entry[4]] - ) - when :redo - iseq.catch_redo( - labels[entry[2]], - labels[entry[3]], - labels[entry[4]] - ) - when :retry - iseq.catch_retry( - labels[entry[2]], - labels[entry[3]], - labels[entry[4]] - ) - else - raise "unknown catch type: #{entry[0]}" - end - end + # track the child block iseqs so that our catch table can point to the + # correctly created iseqs + block_iseqs = [] # set up all of the instructions source[13].each do |insn| @@ -1135,6 +1168,7 @@ def self.from(source, options = Compiler::Options.new, parent_iseq = nil) iseq.putspecialobject(opnds[0]) when :send block_iseq = opnds[1] ? from(opnds[1], options, iseq) : nil + block_iseqs << block_iseq if block_iseq iseq.send(CallData.from(opnds[0]), block_iseq) when :setclassvariable iseq.push(SetClassVariable.new(opnds[0], opnds[1])) @@ -1163,6 +1197,76 @@ def self.from(source, options = Compiler::Options.new, parent_iseq = nil) end end + # set up the catch table + source[12].each do |entry| + case entry[0] + when :break + if entry[1] + break_iseq = + block_iseqs.find do |block_iseq| + block_iseq.name == entry[1][5] && + block_iseq.file == entry[1][6] && + block_iseq.line == entry[1][8] + end + + iseq.catch_break( + break_iseq || from(entry[1], options, iseq), + labels[entry[2]], + labels[entry[3]], + labels[entry[4]], + entry[5] + ) + else + iseq.catch_break( + nil, + labels[entry[2]], + labels[entry[3]], + labels[entry[4]], + entry[5] + ) + end + when :ensure + iseq.catch_ensure( + from(entry[1], options, iseq), + labels[entry[2]], + labels[entry[3]], + labels[entry[4]], + entry[5] + ) + when :next + iseq.catch_next( + labels[entry[2]], + labels[entry[3]], + labels[entry[4]], + entry[5] + ) + when :rescue + iseq.catch_rescue( + from(entry[1], options, iseq), + labels[entry[2]], + labels[entry[3]], + labels[entry[4]], + entry[5] + ) + when :redo + iseq.catch_redo( + labels[entry[2]], + labels[entry[3]], + labels[entry[4]], + entry[5] + ) + when :retry + iseq.catch_retry( + labels[entry[2]], + labels[entry[3]], + labels[entry[4]], + entry[5] + ) + else + raise "unknown catch type: #{entry[0]}" + end + end + iseq.compile! if iseq.type == :top iseq end diff --git a/lib/syntax_tree/yarv/instructions.rb b/lib/syntax_tree/yarv/instructions.rb index 288edb16..5e1d116b 100644 --- a/lib/syntax_tree/yarv/instructions.rb +++ b/lib/syntax_tree/yarv/instructions.rb @@ -399,9 +399,11 @@ def call(vm) # ~~~ # class CheckMatch - TYPE_WHEN = 1 - TYPE_CASE = 2 - TYPE_RESCUE = 3 + VM_CHECKMATCH_TYPE_WHEN = 1 + VM_CHECKMATCH_TYPE_CASE = 2 + VM_CHECKMATCH_TYPE_RESCUE = 3 + VM_CHECKMATCH_TYPE_MASK = 0x03 + VM_CHECKMATCH_ARRAY = 0x04 attr_reader :type @@ -434,7 +436,32 @@ def canonical end def call(vm) - raise NotImplementedError, "checkmatch" + target, pattern = vm.pop(2) + + vm.push( + if type & VM_CHECKMATCH_ARRAY > 0 + pattern.any? { |item| check?(item, target) } + else + check?(pattern, target) + end + ) + end + + private + + def check?(pattern, target) + case type & VM_CHECKMATCH_TYPE_MASK + when VM_CHECKMATCH_TYPE_WHEN + pattern + when VM_CHECKMATCH_TYPE_CASE + pattern === target + when VM_CHECKMATCH_TYPE_RESCUE + unless pattern.is_a?(Module) + raise TypeError, "class or module required for rescue clause" + end + + pattern === target + end end end @@ -762,12 +789,26 @@ def canonical def call(vm) object, superclass = vm.pop(2) - iseq = class_iseq - clazz = Class.new(superclass || Object) - vm.push(vm.run_class_frame(iseq, clazz)) + if name == :singletonclass + vm.push(vm.run_class_frame(class_iseq, object.singleton_class)) + elsif object.const_defined?(name) + vm.push(vm.run_class_frame(class_iseq, object.const_get(name))) + elsif flags & TYPE_MODULE > 0 + clazz = Module.new + object.const_set(name, clazz) + vm.push(vm.run_class_frame(class_iseq, clazz)) + else + clazz = + if flags & FLAG_HAS_SUPERCLASS > 0 + Class.new(superclass) + else + Class.new + end - object.const_set(name, clazz) + object.const_set(name, clazz) + vm.push(vm.run_class_frame(class_iseq, clazz)) + end end end @@ -882,17 +923,19 @@ def call(vm) when TYPE_NIL, TYPE_SELF, TYPE_TRUE, TYPE_FALSE, TYPE_ASGN, TYPE_EXPR message when TYPE_IVAR - message if vm._self.instance_variable_defined?(name) + message if vm.frame._self.instance_variable_defined?(name) when TYPE_LVAR raise NotImplementedError, "defined TYPE_LVAR" when TYPE_GVAR message if global_variables.include?(name) when TYPE_CVAR - clazz = vm._self + clazz = vm.frame._self clazz = clazz.singleton_class unless clazz.is_a?(Module) message if clazz.class_variable_defined?(name) when TYPE_CONST - raise NotImplementedError, "defined TYPE_CONST" + clazz = vm.frame._self + clazz = clazz.singleton_class unless clazz.is_a?(Module) + message if clazz.const_defined?(name) when TYPE_METHOD raise NotImplementedError, "defined TYPE_METHOD" when TYPE_YIELD @@ -904,7 +947,9 @@ def call(vm) when TYPE_FUNC message if object.respond_to?(name, true) when TYPE_CONST_FROM - raise NotImplementedError, "defined TYPE_CONST_FROM" + defined = + vm.frame.nesting.any? { |scope| scope.const_defined?(name, true) } + message if defined end vm.push(result) @@ -962,12 +1007,22 @@ def canonical def call(vm) name = method_name + nesting = vm.frame.nesting iseq = method_iseq vm + .frame ._self .__send__(:define_method, name) do |*args, **kwargs, &block| - vm.run_method_frame(name, iseq, self, *args, **kwargs, &block) + vm.run_method_frame( + name, + nesting, + iseq, + self, + *args, + **kwargs, + &block + ) end end end @@ -1024,12 +1079,22 @@ def canonical def call(vm) name = method_name + nesting = vm.frame.nesting iseq = method_iseq vm + .frame ._self .__send__(:define_singleton_method, name) do |*args, **kwargs, &block| - vm.run_method_frame(name, iseq, self, *args, **kwargs, &block) + vm.run_method_frame( + name, + nesting, + iseq, + self, + *args, + **kwargs, + &block + ) end end end @@ -1259,7 +1324,42 @@ def canonical end def call(vm) - raise NotImplementedError, "expandarray" + object = vm.pop + object = + if Array === object + object.dup + elsif object.respond_to?(:to_ary, true) + object.to_ary + else + [object] + end + + splat_flag = flags & 0x01 > 0 + postarg_flag = flags & 0x02 > 0 + + if number == 0 && splat_flag == 0 + # no space left on stack + elsif postarg_flag + values = [] + + if number > object.size + (number - object.size).times { values.push(nil) } + end + [number, object.size].min.times { values.push(object.pop) } + values.push(object.to_a) if splat_flag + + values.each { |item| vm.push(item) } + else + values = [] + + [number, object.size].min.times { values.push(object.shift) } + if number > values.size + (number - values.size).times { values.push(nil) } + end + values.push(object.to_a) if splat_flag + + values.reverse_each { |item| vm.push(item) } + end end end @@ -1424,7 +1524,7 @@ def canonical end def call(vm) - clazz = vm._self + clazz = vm.frame._self clazz = clazz.class unless clazz.is_a?(Class) vm.push(clazz.class_variable_get(name)) end @@ -1474,14 +1574,20 @@ def canonical end def call(vm) - # const_base, allow_nil = - vm.pop(2) + const_base, allow_nil = vm.pop(2) - vm.frame.nesting.reverse_each do |clazz| - if clazz.const_defined?(name) - vm.push(clazz.const_get(name)) + if const_base + if const_base.const_defined?(name) + vm.push(const_base.const_get(name)) return end + elsif const_base.nil? && allow_nil + vm.frame.nesting.reverse_each do |clazz| + if clazz.const_defined?(name) + vm.push(clazz.const_get(name)) + return + end + end end raise NameError, "uninitialized constant #{name}" @@ -1590,7 +1696,7 @@ def canonical def call(vm) method = Object.instance_method(:instance_variable_get) - vm.push(method.bind(vm._self).call(name)) + vm.push(method.bind(vm.frame._self).call(name)) end end @@ -1948,8 +2054,9 @@ def canonical def call(vm) block = if (iseq = block_iseq) + frame = vm.frame ->(*args, **kwargs, &blk) do - vm.run_block_frame(iseq, *args, **kwargs, &blk) + vm.run_block_frame(iseq, frame, *args, **kwargs, &blk) end end @@ -2396,7 +2503,7 @@ def canonical def call(vm) return if @executed - vm.push(vm.run_block_frame(iseq)) + vm.push(vm.run_block_frame(iseq, vm.frame)) @executed = true end end @@ -2960,7 +3067,7 @@ def canonical end def call(vm) - current = vm._self + current = vm.frame._self current = current.class unless current.is_a?(Class) names.each do |name| @@ -4254,7 +4361,7 @@ def canonical end def call(vm) - vm.push(vm._self) + vm.push(vm.frame._self) end end @@ -4310,7 +4417,7 @@ def call(vm) when OBJECT_VMCORE vm.push(vm.frozen_core) when OBJECT_CBASE - value = vm._self + value = vm.frame._self value = value.singleton_class unless value.is_a?(Class) vm.push(value) when OBJECT_CONST_BASE @@ -4418,9 +4525,12 @@ def canonical def call(vm) block = if (iseq = block_iseq) + frame = vm.frame ->(*args, **kwargs, &blk) do - vm.run_block_frame(iseq, *args, **kwargs, &blk) + vm.run_block_frame(iseq, frame, *args, **kwargs, &blk) end + elsif calldata.flag?(CallData::CALL_ARGS_BLOCKARG) + vm.pop end keywords = @@ -4542,7 +4652,7 @@ def canonical end def call(vm) - clazz = vm._self + clazz = vm.frame._self clazz = clazz.class unless clazz.is_a?(Class) clazz.class_variable_set(name, vm.pop) end @@ -4698,7 +4808,7 @@ def canonical def call(vm) method = Object.instance_method(:instance_variable_set) - method.bind(vm._self).call(name, vm.pop) + method.bind(vm.frame._self).call(name, vm.pop) end end @@ -4946,7 +5056,7 @@ def canonical def call(vm) case key when GetSpecial::SVAR_LASTLINE - raise NotImplementedError, "svar SVAR_LASTLINE" + raise NotImplementedError, "setspecial SVAR_LASTLINE" when GetSpecial::SVAR_BACKREF raise NotImplementedError, "setspecial SVAR_BACKREF" when GetSpecial::SVAR_FLIPFLOP_START @@ -4999,7 +5109,27 @@ def canonical end def call(vm) - vm.push(*vm.pop) + value = vm.pop + + vm.push( + if Array === value + value.instance_of?(Array) ? value.dup : Array[*value] + elsif value.nil? + value.to_a + else + if value.respond_to?(:to_a, true) + result = value.to_a + + if result.nil? + [value] + elsif !result.is_a?(Array) + raise TypeError, "expected to_a to return an Array" + end + else + [value] + end + end + ) end end @@ -5061,15 +5191,18 @@ def call(vm) # ~~~ # class Throw - TAG_NONE = 0x0 - TAG_RETURN = 0x1 - TAG_BREAK = 0x2 - TAG_NEXT = 0x3 - TAG_RETRY = 0x4 - TAG_REDO = 0x5 - TAG_RAISE = 0x6 - TAG_THROW = 0x7 - TAG_FATAL = 0x8 + RUBY_TAG_NONE = 0x0 + RUBY_TAG_RETURN = 0x1 + RUBY_TAG_BREAK = 0x2 + RUBY_TAG_NEXT = 0x3 + RUBY_TAG_RETRY = 0x4 + RUBY_TAG_REDO = 0x5 + RUBY_TAG_RAISE = 0x6 + RUBY_TAG_THROW = 0x7 + RUBY_TAG_FATAL = 0x8 + + VM_THROW_NO_ESCAPE_FLAG = 0x8000 + VM_THROW_STATE_MASK = 0xff attr_reader :type @@ -5102,7 +5235,43 @@ def canonical end def call(vm) - raise NotImplementedError, "throw" + state = type & VM_THROW_STATE_MASK + value = vm.pop + + case state + when RUBY_TAG_NONE + case value + when nil + # do nothing + when Exception + raise value + else + raise NotImplementedError + end + when RUBY_TAG_RETURN + raise VM::ReturnError.new(value, error_backtrace(vm)) + when RUBY_TAG_BREAK + raise VM::BreakError.new(value, error_backtrace(vm)) + when RUBY_TAG_NEXT + raise VM::NextError.new(value, error_backtrace(vm)) + else + raise NotImplementedError, "Unknown throw kind #{state}" + end + end + + private + + def error_backtrace(vm) + backtrace = [] + current = vm.frame + + while current + backtrace << "#{current.iseq.file}:#{current.line}:in" \ + "`#{current.iseq.name}'" + current = current.parent + end + + [*backtrace, *caller] end end diff --git a/lib/syntax_tree/yarv/legacy.rb b/lib/syntax_tree/yarv/legacy.rb index 30a95437..b2e33290 100644 --- a/lib/syntax_tree/yarv/legacy.rb +++ b/lib/syntax_tree/yarv/legacy.rb @@ -45,6 +45,14 @@ def pops def pushes 1 end + + def canonical + YARV::GetClassVariable.new(name, nil) + end + + def call(vm) + canonical.call(vm) + end end # ### Summary @@ -94,6 +102,10 @@ def pushes 1 end + def canonical + self + end + def call(vm) vm.push(nil) end @@ -102,8 +114,8 @@ def call(vm) # ### Summary # # `opt_setinlinecache` sets an inline cache for a constant lookup. It pops - # the value it should set off the top of the stack. It then pushes that - # value back onto the top of the stack. + # the value it should set off the top of the stack. It uses this value to + # set the cache. It then pushes that value back onto the top of the stack. # # This instruction is no longer used since in Ruby 3.2 it was replaced by # the consolidated `opt_getconstant_path` instruction. @@ -141,8 +153,11 @@ def pushes 1 end + def canonical + self + end + def call(vm) - vm.push(vm.pop) end end @@ -186,6 +201,14 @@ def pops def pushes 0 end + + def canonical + YARV::SetClassVariable.new(name, nil) + end + + def call(vm) + canonical.call(vm) + end end end end diff --git a/lib/syntax_tree/yarv/vm.rb b/lib/syntax_tree/yarv/vm.rb new file mode 100644 index 00000000..1bbb82ed --- /dev/null +++ b/lib/syntax_tree/yarv/vm.rb @@ -0,0 +1,624 @@ +# frozen_string_literal: true + +require "forwardable" + +module SyntaxTree + # This module provides an object representation of the YARV bytecode. + module YARV + class VM + class Jump + attr_reader :label + + def initialize(label) + @label = label + end + end + + class Leave + attr_reader :value + + def initialize(value) + @value = value + end + end + + class Frame + attr_reader :iseq, :parent, :stack_index, :_self, :nesting, :svars + attr_accessor :line, :pc + + def initialize(iseq, parent, stack_index, _self, nesting) + @iseq = iseq + @parent = parent + @stack_index = stack_index + @_self = _self + @nesting = nesting + + @svars = {} + @line = iseq.line + @pc = 0 + end + end + + class TopFrame < Frame + def initialize(iseq) + super(iseq, nil, 0, TOPLEVEL_BINDING.eval("self"), [Object]) + end + end + + class BlockFrame < Frame + def initialize(iseq, parent, stack_index) + super(iseq, parent, stack_index, parent._self, parent.nesting) + end + end + + class MethodFrame < Frame + attr_reader :name, :block + + def initialize(iseq, nesting, parent, stack_index, _self, name, block) + super(iseq, parent, stack_index, _self, nesting) + @name = name + @block = block + end + end + + class ClassFrame < Frame + def initialize(iseq, parent, stack_index, _self) + super(iseq, parent, stack_index, _self, parent.nesting + [_self]) + end + end + + class RescueFrame < Frame + def initialize(iseq, parent, stack_index) + super(iseq, parent, stack_index, parent._self, parent.nesting) + end + end + + class ThrownError < StandardError + attr_reader :value + + def initialize(value, backtrace) + super("This error was thrown by the Ruby VM.") + @value = value + set_backtrace(backtrace) + end + end + + class ReturnError < ThrownError + end + + class BreakError < ThrownError + end + + class NextError < ThrownError + end + + class FrozenCore + define_method("core#hash_merge_kwd") { |left, right| left.merge(right) } + + define_method("core#hash_merge_ptr") do |hash, *values| + hash.merge(values.each_slice(2).to_h) + end + + define_method("core#set_method_alias") do |clazz, new_name, old_name| + clazz.alias_method(new_name, old_name) + end + + define_method("core#set_variable_alias") do |new_name, old_name| + # Using eval here since there isn't a reflection API to be able to + # alias global variables. + eval("alias #{new_name} #{old_name}", binding, __FILE__, __LINE__) + end + + define_method("core#set_postexe") { |&block| END { block.call } } + + define_method("core#undef_method") do |clazz, name| + clazz.undef_method(name) + nil + end + end + + # This is the main entrypoint for events firing in the VM, which allows + # us to implement tracing. + class NullEvents + def publish_frame_change(frame) + end + + def publish_instruction(iseq, insn) + end + + def publish_stack_change(stack) + end + + def publish_tracepoint(event) + end + end + + # This is a simple implementation of tracing that prints to STDOUT. + class STDOUTEvents + attr_reader :disassembler + + def initialize + @disassembler = Disassembler.new + end + + def publish_frame_change(frame) + puts "%-16s %s" % ["frame-change", "#{frame.iseq.file}@#{frame.line}"] + end + + def publish_instruction(iseq, insn) + disassembler.current_iseq = iseq + puts "%-16s %s" % ["instruction", insn.disasm(disassembler)] + end + + def publish_stack_change(stack) + puts "%-16s %s" % ["stack-change", stack.values.inspect] + end + + def publish_tracepoint(event) + puts "%-16s %s" % ["tracepoint", event.inspect] + end + end + + # This represents the global VM stack. It effectively is an array, but + # wraps mutating functions with instrumentation. + class Stack + attr_reader :events, :values + + def initialize(events) + @events = events + @values = [] + end + + def concat(...) + values.concat(...).tap { events.publish_stack_change(self) } + end + + def last + values.last + end + + def length + values.length + end + + def push(...) + values.push(...).tap { events.publish_stack_change(self) } + end + + def pop(...) + values.pop(...).tap { events.publish_stack_change(self) } + end + + def slice!(...) + values.slice!(...).tap { events.publish_stack_change(self) } + end + + def [](...) + values.[](...) + end + + def []=(...) + values.[]=(...).tap { events.publish_stack_change(self) } + end + end + + FROZEN_CORE = FrozenCore.new.freeze + + extend Forwardable + + attr_reader :events + + attr_reader :stack + def_delegators :stack, :push, :pop + + attr_reader :frame + + def initialize(events = NullEvents.new) + @events = events + @stack = Stack.new(events) + @frame = nil + end + + ########################################################################## + # Helper methods for frames + ########################################################################## + + def run_frame(frame) + # First, set the current frame to the given value. + previous = @frame + @frame = frame + events.publish_frame_change(@frame) + + # Next, set up the local table for the frame. This is actually incorrect + # as it could use the values already on the stack, but for now we're + # just doing this for simplicity. + stack.concat(Array.new(frame.iseq.local_table.size)) + + # Yield so that some frame-specific setup can be done. + start_label = yield if block_given? + frame.pc = frame.iseq.insns.index(start_label) if start_label + + # Finally we can execute the instructions one at a time. If they return + # jumps or leaves we will handle those appropriately. + loop do + case (insn = frame.iseq.insns[frame.pc]) + when Integer + frame.line = insn + frame.pc += 1 + when Symbol + events.publish_tracepoint(insn) + frame.pc += 1 + when InstructionSequence::Label + # skip labels + frame.pc += 1 + else + begin + events.publish_instruction(frame.iseq, insn) + result = insn.call(self) + rescue ReturnError => error + raise if frame.iseq.type != :method + + stack.slice!(frame.stack_index..) + @frame = frame.parent + events.publish_frame_change(@frame) + + return error.value + rescue BreakError => error + raise if frame.iseq.type != :block + + catch_entry = + find_catch_entry(frame, InstructionSequence::CatchBreak) + raise unless catch_entry + + stack.slice!( + ( + frame.stack_index + frame.iseq.local_table.size + + catch_entry.restore_sp + ).. + ) + @frame = frame + events.publish_frame_change(@frame) + + frame.pc = frame.iseq.insns.index(catch_entry.exit_label) + push(result = error.value) + rescue NextError => error + raise if frame.iseq.type != :block + + catch_entry = + find_catch_entry(frame, InstructionSequence::CatchNext) + raise unless catch_entry + + stack.slice!( + ( + frame.stack_index + frame.iseq.local_table.size + + catch_entry.restore_sp + ).. + ) + @frame = frame + events.publish_frame_change(@frame) + + frame.pc = frame.iseq.insns.index(catch_entry.exit_label) + push(result = error.value) + rescue Exception => error + catch_entry = + find_catch_entry(frame, InstructionSequence::CatchRescue) + raise unless catch_entry + + stack.slice!( + ( + frame.stack_index + frame.iseq.local_table.size + + catch_entry.restore_sp + ).. + ) + @frame = frame + events.publish_frame_change(@frame) + + frame.pc = frame.iseq.insns.index(catch_entry.exit_label) + push(result = run_rescue_frame(catch_entry.iseq, frame, error)) + end + + case result + when Jump + frame.pc = frame.iseq.insns.index(result.label) + 1 + when Leave + # this shouldn't be necessary, but is because we're not handling + # the stack correctly at the moment + stack.slice!(frame.stack_index..) + + # restore the previous frame + @frame = previous || frame.parent + events.publish_frame_change(@frame) if @frame + + return result.value + else + frame.pc += 1 + end + end + end + end + + def find_catch_entry(frame, type) + iseq = frame.iseq + iseq.catch_table.find do |catch_entry| + next unless catch_entry.is_a?(type) + + begin_pc = iseq.insns.index(catch_entry.begin_label) + end_pc = iseq.insns.index(catch_entry.end_label) + + (begin_pc...end_pc).cover?(frame.pc) + end + end + + def run_top_frame(iseq) + run_frame(TopFrame.new(iseq)) + end + + def run_block_frame(iseq, frame, *args, **kwargs, &block) + run_frame(BlockFrame.new(iseq, frame, stack.length)) do + setup_arguments(iseq, args, kwargs, block) + end + end + + def run_class_frame(iseq, clazz) + run_frame(ClassFrame.new(iseq, frame, stack.length, clazz)) + end + + def run_method_frame(name, nesting, iseq, _self, *args, **kwargs, &block) + run_frame( + MethodFrame.new( + iseq, + nesting, + frame, + stack.length, + _self, + name, + block + ) + ) { setup_arguments(iseq, args, kwargs, block) } + end + + def run_rescue_frame(iseq, frame, error) + run_frame(RescueFrame.new(iseq, frame, stack.length)) do + local_set(0, 0, error) + nil + end + end + + def setup_arguments(iseq, args, kwargs, block) + locals = [*args] + local_index = 0 + start_label = nil + + # First, set up all of the leading arguments. These are positional and + # required arguments at the start of the argument list. + if (lead_num = iseq.argument_options[:lead_num]) + lead_num.times do + local_set(local_index, 0, locals.shift) + local_index += 1 + end + end + + # Next, set up all of the optional arguments. The opt array contains + # the labels that the frame should start at if the optional is + # present. The last element of the array is the label that the frame + # should start at if all of the optional arguments are present. + if (opt = iseq.argument_options[:opt]) + opt[0...-1].each do |label| + if locals.empty? + start_label = label + break + else + local_set(local_index, 0, locals.shift) + local_index += 1 + end + + start_label = opt.last if start_label.nil? + end + end + + # If there is a splat argument, then we'll set that up here. It will + # grab up all of the remaining positional arguments. + if (rest_start = iseq.argument_options[:rest_start]) + if (post_start = iseq.argument_options[:post_start]) + length = post_start - rest_start + local_set(local_index, 0, locals[0...length]) + locals = locals[length..] + else + local_set(local_index, 0, locals.dup) + locals.clear + end + local_index += 1 + end + + # Next, set up any post arguments. These are positional arguments that + # come after the splat argument. + if (post_num = iseq.argument_options[:post_num]) + post_num.times do + local_set(local_index, 0, locals.shift) + local_index += 1 + end + end + + if (keyword_option = iseq.argument_options[:keyword]) + # First, set up the keyword bits array. + keyword_bits = + keyword_option.map do |config| + kwargs.key?(config.is_a?(Array) ? config[0] : config) + end + + iseq.local_table.locals.each_with_index do |local, index| + # If this is the keyword bits local, then set it appropriately. + if local.name.is_a?(Integer) + local_set(index, 0, keyword_bits) + next + end + + # First, find the configuration for this local in the keywords + # list if it exists. + name = local.name + config = + keyword_option.find do |keyword| + keyword.is_a?(Array) ? keyword[0] == name : keyword == name + end + + # If the configuration doesn't exist, then the local is not a + # keyword local. + next unless config + + if !config.is_a?(Array) + # required keyword + local_set(index, 0, kwargs.fetch(name)) + elsif !config[1].nil? + # optional keyword with embedded default value + local_set(index, 0, kwargs.fetch(name, config[1])) + else + # optional keyword with expression default value + local_set(index, 0, kwargs[name]) + end + end + end + + local_set(local_index, 0, block) if iseq.argument_options[:block_start] + + start_label + end + + ########################################################################## + # Helper methods for instructions + ########################################################################## + + def const_base + frame.nesting.last + end + + def frame_at(level) + current = frame + level.times { current = current.parent } + current + end + + def frame_svar + current = frame + current = current.parent while current.is_a?(BlockFrame) + current + end + + def frame_yield + current = frame + current = current.parent until current.is_a?(MethodFrame) + current + end + + def frozen_core + FROZEN_CORE + end + + def jump(label) + Jump.new(label) + end + + def leave + Leave.new(pop) + end + + def local_get(index, level) + stack[frame_at(level).stack_index + index] + end + + def local_set(index, level, value) + stack[frame_at(level).stack_index + index] = value + end + + ########################################################################## + # Methods for overriding runtime behavior + ########################################################################## + + DLEXT = ".#{RbConfig::CONFIG["DLEXT"]}" + SOEXT = ".#{RbConfig::CONFIG["SOEXT"]}" + + def require_resolved(filepath) + $LOADED_FEATURES << filepath + iseq = RubyVM::InstructionSequence.compile_file(filepath) + run_top_frame(InstructionSequence.from(iseq.to_a)) + end + + def require_internal(filepath, loading: false) + case (extname = File.extname(filepath)) + when "" + # search for all the extensions + searching = filepath + extensions = ["", ".rb", DLEXT, SOEXT] + when ".rb", DLEXT, SOEXT + # search only for the given extension name + searching = File.basename(filepath, extname) + extensions = [extname] + else + # we don't handle these extensions, raise a load error + raise LoadError, "cannot load such file -- #{filepath}" + end + + if filepath.start_with?("/") + # absolute path, search only in the given directory + directories = [File.dirname(searching)] + searching = File.basename(searching) + else + # relative path, search in the load path + directories = $LOAD_PATH + end + + directories.each do |directory| + extensions.each do |extension| + absolute_path = File.join(directory, "#{searching}#{extension}") + next unless File.exist?(absolute_path) + + if !loading && $LOADED_FEATURES.include?(absolute_path) + return false + elsif extension == ".rb" + require_resolved(absolute_path) + return true + elsif loading + return Kernel.send(:yarv_load, filepath) + else + return Kernel.send(:yarv_require, filepath) + end + end + end + + if loading + Kernel.send(:yarv_load, filepath) + else + Kernel.send(:yarv_require, filepath) + end + end + + def require(filepath) + require_internal(filepath, loading: false) + end + + def require_relative(filepath) + Kernel.yarv_require_relative(filepath) + end + + def load(filepath) + require_internal(filepath, loading: true) + end + + def eval( + source, + binding = TOPLEVEL_BINDING, + filename = "(eval)", + lineno = 1 + ) + Kernel.yarv_eval(source, binding, filename, lineno) + end + + def throw(tag, value = nil) + Kernel.throw(tag, value) + end + + def catch(tag, &block) + Kernel.catch(tag, &block) + end + end + end +end diff --git a/spec/mspec b/spec/mspec new file mode 160000 index 00000000..4877d58d --- /dev/null +++ b/spec/mspec @@ -0,0 +1 @@ +Subproject commit 4877d58dff577641bc1ecd1bf3d3c3daa93b423f diff --git a/spec/ruby b/spec/ruby new file mode 160000 index 00000000..71873ae4 --- /dev/null +++ b/spec/ruby @@ -0,0 +1 @@ +Subproject commit 71873ae4421f5b551a5af0f3427e901414736835 diff --git a/test/fixtures/arg_paren.rb b/test/fixtures/arg_paren.rb index 0e01e208..0816af6a 100644 --- a/test/fixtures/arg_paren.rb +++ b/test/fixtures/arg_paren.rb @@ -2,8 +2,6 @@ foo(bar) % foo() -- -foo % foo(barrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrr) - diff --git a/test/yarv_test.rb b/test/yarv_test.rb index f8e0ffdb..6f60d74e 100644 --- a/test/yarv_test.rb +++ b/test/yarv_test.rb @@ -41,9 +41,253 @@ def test_bf ">>.>---.+++++++..+++.>>.<-.<.+++.------.--------.>>+.>++." iseq = YARV::Bf.new(hello_world).compile + stdout, = capture_io { iseq.eval } + assert_equal "Hello World!\n", stdout + Formatter.format(hello_world, YARV::Decompiler.new(iseq).to_ruby) end + # rubocop:disable Layout/LineLength + EMULATION_CASES = { + # adjuststack + "x = [true]; x[0] ||= nil; x[0]" => true, + # anytostring + "\"\#{5}\"" => "5", + "class A2Str; def to_s; 1; end; end; \"\#{A2Str.new}\"" => + "#", + # branchif + "x = true; x ||= \"foo\"; x" => true, + # branchnil + "x = nil; if x&.to_s; 'hi'; else; 'bye'; end" => "bye", + # branchunless + "if 2 + 3; 'hi'; else; 'bye'; end" => "hi", + # checkkeyword + # "def evaluate(value: rand); value.floor; end; evaluate" => 0, + # checkmatch + "'foo' in String" => true, + "case 1; when *[1, 2, 3]; true; end" => true, + # checktype + "['foo'] in [String]" => true, + # concatarray + "[1, *2]" => [1, 2], + # concatstrings + "\"\#{7}\"" => "7", + # defineclass + "class DefineClass; def bar; end; end" => :bar, + "module DefineModule; def bar; end; end" => :bar, + "class << self; self; end" => + TOPLEVEL_BINDING.eval("self").singleton_class, + # defined + "defined?(1)" => "expression", + "defined?(foo = 1)" => "assignment", + "defined?(Object)" => "constant", + # definemethod + "def definemethod = 5; definemethod" => 5, + # definesmethod + "def self.definesmethod = 5; self.definesmethod" => 5, + # dup + "$global = 5" => 5, + # duparray + "[true]" => [true], + # duphash + "{ a: 1 }" => { + a: 1 + }, + # dupn + "Object::X ||= true" => true, + # expandarray + "x, = [true, false, nil]" => [true, false, nil], + "*, x = [true, false, nil]" => [true, false, nil], + # getblockparam + "def getblockparam(&block); block; end; getblockparam { 1 }.call" => 1, + # getblockparamproxy + "def getblockparamproxy(&block); block.call; end; getblockparamproxy { 1 }" => + 1, + # getclassvariable + "class CVar; @@foo = 5; end; class << CVar; @@foo; end" => 5, + # getconstant + "Object" => Object, + # getglobal + "$$" => $$, + # getinstancevariable + "@foo = 5; @foo" => 5, + # getlocal + "value = 5; self.then { self.then { self.then { value } } }" => 5, + # getlocalwc0 + "value = 5; value" => 5, + # getlocalwc1 + "value = 5; self.then { value }" => 5, + # getspecial + "1 if (2 == 2) .. (3 == 3)" => 1, + # intern + ":\"foo\#{1}\"" => :foo1, + # invokeblock + "def invokeblock = yield; invokeblock { 1 }" => 1, + # invokesuper + <<~RUBY => 2, + class Parent + def value + 1 + end + end + + class Child < Parent + def value + super + 1 + end + end + + Child.new.value + RUBY + # jump + "x = 0; if x == 0 then 1 else 2 end" => 1, + # newarray + "[\"value\"]" => ["value"], + # newarraykwsplat + "[\"string\", **{ foo: \"bar\" }]" => ["string", { foo: "bar" }], + # newhash + "def newhash(key, value) = { key => value }; newhash(1, 2)" => { + 1 => 2 + }, + # newrange + "x = 0; y = 1; (x..y).to_a" => [0, 1], + # nop + # objtostring + "\"\#{6}\"" => "6", + # once + "/\#{1}/o" => /1/o, + # opt_and + "0b0110 & 0b1011" => 0b0010, + # opt_aref + "x = [1, 2, 3]; x[1]" => 2, + # opt_aref_with + "x = { \"a\" => 1 }; x[\"a\"]" => 1, + # opt_aset + "x = [1, 2, 3]; x[1] = 4; x" => [1, 4, 3], + # opt_aset_with + "x = { \"a\" => 1 }; x[\"a\"] = 2; x" => { + "a" => 2 + }, + # opt_case_dispatch + <<~RUBY => "foo", + case 1 + when 1 + "foo" + else + "bar" + end + RUBY + # opt_div + "5 / 2" => 2, + # opt_empty_p + "[].empty?" => true, + # opt_eq + "1 == 1" => true, + # opt_ge + "1 >= 1" => true, + # opt_getconstant_path + "::Object" => Object, + # opt_gt + "1 > 1" => false, + # opt_le + "1 <= 1" => true, + # opt_length + "[1, 2, 3].length" => 3, + # opt_lt + "1 < 1" => false, + # opt_ltlt + "\"\" << 2" => "\u0002", + # opt_minus + "1 - 1" => 0, + # opt_mod + "5 % 2" => 1, + # opt_mult + "5 * 2" => 10, + # opt_neq + "1 != 1" => false, + # opt_newarray_max + "def opt_newarray_max(a, b, c) = [a, b, c].max; opt_newarray_max(1, 2, 3)" => + 3, + # opt_newarray_min + "def opt_newarray_min(a, b, c) = [a, b, c].min; opt_newarray_min(1, 2, 3)" => + 1, + # opt_nil_p + "nil.nil?" => true, + # opt_not + "!true" => false, + # opt_or + "0b0110 | 0b1011" => 0b1111, + # opt_plus + "1 + 1" => 2, + # opt_regexpmatch2 + "/foo/ =~ \"~~~foo\"" => 3, + # opt_send_without_block + "5.to_s" => "5", + # opt_size + "[1, 2, 3].size" => 3, + # opt_str_freeze + "\"foo\".freeze" => "foo", + # opt_str_uminus + "-\"foo\"" => -"foo", + # opt_succ + "1.succ" => 2, + # pop + "a ||= 2; a" => 2, + # putnil + "[nil]" => [nil], + # putobject + "2" => 2, + # putobject_INT2FIX_0_ + "0" => 0, + # putobject_INT2FIX_1_ + "1" => 1, + # putself + "self" => TOPLEVEL_BINDING.eval("self"), + # putspecialobject + "[class Undef; def foo = 1; undef foo; end]" => [nil], + # putstring + "\"foo\"" => "foo", + # send + "\"hello\".then { |value| value }" => "hello", + # setblockparam + "def setblockparam(&bar); bar = -> { 1 }; bar.call; end; setblockparam" => + 1, + # setclassvariable + "class CVarSet; @@foo = 1; end; class << CVarSet; @@foo = 10; end" => 10, + # setconstant + "SetConstant = 1" => 1, + # setglobal + "$global = 10" => 10, + # setinstancevariable + "@ivar = 5" => 5, + # setlocal + "x = 5; tap { tap { tap { x = 10 } } }; x" => 10, + # setlocal_WC_0 + "x = 5; x" => 5, + # setlocal_WC_1 + "x = 5; tap { x = 10 }; x" => 10, + # setn + "{}[:key] = 'value'" => "value", + # setspecial + "1 if (1 == 1) .. (2 == 2)" => 1, + # splatarray + "x = *(5)" => [5], + # swap + "!!defined?([[]])" => true, + # throw + # topn + "case 3; when 1..5; 'foo'; end" => "foo", + # toregexp + "/abc \#{1 + 2} def/" => /abc 3 def/ + }.freeze + # rubocop:enable Layout/LineLength + + EMULATION_CASES.each do |source, expected| + define_method("test_emulate_#{source}") do + assert_emulates(expected, source) + end + end + private def assert_decompiles(expected, source) @@ -51,5 +295,41 @@ def assert_decompiles(expected, source) actual = Formatter.format(source, ruby) assert_equal expected, actual end + + def assert_emulates(expected, source) + ruby_iseq = RubyVM::InstructionSequence.compile(source) + yarv_iseq = YARV::InstructionSequence.from(ruby_iseq.to_a) + + exercise_iseq(yarv_iseq) + result = SyntaxTree::YARV::VM.new.run_top_frame(yarv_iseq) + assert_equal(expected, result) + end + + def exercise_iseq(iseq) + iseq.disasm + iseq.to_a + + iseq.insns.each do |insn| + case insn + when YARV::InstructionSequence::Label, Integer, Symbol + next + end + + insn.pushes + insn.pops + insn.canonical + + case insn + when YARV::DefineClass + exercise_iseq(insn.class_iseq) + when YARV::DefineMethod, YARV::DefineSMethod + exercise_iseq(insn.method_iseq) + when YARV::InvokeSuper, YARV::Send + exercise_iseq(insn.block_iseq) if insn.block_iseq + when YARV::Once + exercise_iseq(insn.iseq) + end + end + end end end